mbuf: make segment prefree function public
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73 #include <rte_net.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 #ifdef RTE_LIBRTE_IEEE1588
84 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
85 #else
86 #define IXGBE_TX_IEEE1588_TMST 0
87 #endif
88 /* Bit Mask to indicate what bits required for building TX context */
89 #define IXGBE_TX_OFFLOAD_MASK (                  \
90                 PKT_TX_VLAN_PKT |                \
91                 PKT_TX_IP_CKSUM |                \
92                 PKT_TX_L4_MASK |                 \
93                 PKT_TX_TCP_SEG |                 \
94                 PKT_TX_MACSEC |                  \
95                 PKT_TX_OUTER_IP_CKSUM |          \
96                 IXGBE_TX_IEEE1588_TMST)
97
98 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
99                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
100
101 #if 1
102 #define RTE_PMD_USE_PREFETCH
103 #endif
104
105 #ifdef RTE_PMD_USE_PREFETCH
106 /*
107  * Prefetch a cache line into all cache levels.
108  */
109 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
110 #else
111 #define rte_ixgbe_prefetch(p)   do {} while (0)
112 #endif
113
114 #ifdef RTE_IXGBE_INC_VECTOR
115 uint16_t ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
116                                     uint16_t nb_pkts);
117 #endif
118
119 /*********************************************************************
120  *
121  *  TX functions
122  *
123  **********************************************************************/
124
125 /*
126  * Check for descriptors with their DD bit set and free mbufs.
127  * Return the total number of buffers freed.
128  */
129 static inline int __attribute__((always_inline))
130 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
131 {
132         struct ixgbe_tx_entry *txep;
133         uint32_t status;
134         int i, nb_free = 0;
135         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
136
137         /* check DD bit on threshold descriptor */
138         status = txq->tx_ring[txq->tx_next_dd].wb.status;
139         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
140                 return 0;
141
142         /*
143          * first buffer to free from S/W ring is at index
144          * tx_next_dd - (tx_rs_thresh-1)
145          */
146         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
147
148         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
149                 /* free buffers one at a time */
150                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
151                 txep->mbuf = NULL;
152
153                 if (unlikely(m == NULL))
154                         continue;
155
156                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
157                     (nb_free > 0 && m->pool != free[0]->pool)) {
158                         rte_mempool_put_bulk(free[0]->pool,
159                                              (void **)free, nb_free);
160                         nb_free = 0;
161                 }
162
163                 free[nb_free++] = m;
164         }
165
166         if (nb_free > 0)
167                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
168
169         /* buffers were freed, update counters */
170         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
171         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
172         if (txq->tx_next_dd >= txq->nb_tx_desc)
173                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
174
175         return txq->tx_rs_thresh;
176 }
177
178 /* Populate 4 descriptors with data from 4 mbufs */
179 static inline void
180 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
181 {
182         uint64_t buf_dma_addr;
183         uint32_t pkt_len;
184         int i;
185
186         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
187                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
188                 pkt_len = (*pkts)->data_len;
189
190                 /* write data to descriptor */
191                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
192
193                 txdp->read.cmd_type_len =
194                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
195
196                 txdp->read.olinfo_status =
197                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
198
199                 rte_prefetch0(&(*pkts)->pool);
200         }
201 }
202
203 /* Populate 1 descriptor with data from 1 mbuf */
204 static inline void
205 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
206 {
207         uint64_t buf_dma_addr;
208         uint32_t pkt_len;
209
210         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
211         pkt_len = (*pkts)->data_len;
212
213         /* write data to descriptor */
214         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
215         txdp->read.cmd_type_len =
216                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
217         txdp->read.olinfo_status =
218                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
219         rte_prefetch0(&(*pkts)->pool);
220 }
221
222 /*
223  * Fill H/W descriptor ring with mbuf data.
224  * Copy mbuf pointers to the S/W ring.
225  */
226 static inline void
227 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
228                       uint16_t nb_pkts)
229 {
230         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
231         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
232         const int N_PER_LOOP = 4;
233         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
234         int mainpart, leftover;
235         int i, j;
236
237         /*
238          * Process most of the packets in chunks of N pkts.  Any
239          * leftover packets will get processed one at a time.
240          */
241         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
242         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
243         for (i = 0; i < mainpart; i += N_PER_LOOP) {
244                 /* Copy N mbuf pointers to the S/W ring */
245                 for (j = 0; j < N_PER_LOOP; ++j) {
246                         (txep + i + j)->mbuf = *(pkts + i + j);
247                 }
248                 tx4(txdp + i, pkts + i);
249         }
250
251         if (unlikely(leftover > 0)) {
252                 for (i = 0; i < leftover; ++i) {
253                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
254                         tx1(txdp + mainpart + i, pkts + mainpart + i);
255                 }
256         }
257 }
258
259 static inline uint16_t
260 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
261              uint16_t nb_pkts)
262 {
263         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
264         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
265         uint16_t n = 0;
266
267         /*
268          * Begin scanning the H/W ring for done descriptors when the
269          * number of available descriptors drops below tx_free_thresh.  For
270          * each done descriptor, free the associated buffer.
271          */
272         if (txq->nb_tx_free < txq->tx_free_thresh)
273                 ixgbe_tx_free_bufs(txq);
274
275         /* Only use descriptors that are available */
276         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
277         if (unlikely(nb_pkts == 0))
278                 return 0;
279
280         /* Use exactly nb_pkts descriptors */
281         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
282
283         /*
284          * At this point, we know there are enough descriptors in the
285          * ring to transmit all the packets.  This assumes that each
286          * mbuf contains a single segment, and that no new offloads
287          * are expected, which would require a new context descriptor.
288          */
289
290         /*
291          * See if we're going to wrap-around. If so, handle the top
292          * of the descriptor ring first, then do the bottom.  If not,
293          * the processing looks just like the "bottom" part anyway...
294          */
295         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
296                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
297                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
298
299                 /*
300                  * We know that the last descriptor in the ring will need to
301                  * have its RS bit set because tx_rs_thresh has to be
302                  * a divisor of the ring size
303                  */
304                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
305                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
306                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
307
308                 txq->tx_tail = 0;
309         }
310
311         /* Fill H/W descriptor ring with mbuf data */
312         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
313         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
314
315         /*
316          * Determine if RS bit should be set
317          * This is what we actually want:
318          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
319          * but instead of subtracting 1 and doing >=, we can just do
320          * greater than without subtracting.
321          */
322         if (txq->tx_tail > txq->tx_next_rs) {
323                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
324                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
325                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
326                                                 txq->tx_rs_thresh);
327                 if (txq->tx_next_rs >= txq->nb_tx_desc)
328                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
329         }
330
331         /*
332          * Check for wrap-around. This would only happen if we used
333          * up to the last descriptor in the ring, no more, no less.
334          */
335         if (txq->tx_tail >= txq->nb_tx_desc)
336                 txq->tx_tail = 0;
337
338         /* update tail pointer */
339         rte_wmb();
340         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
341
342         return nb_pkts;
343 }
344
345 uint16_t
346 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
347                        uint16_t nb_pkts)
348 {
349         uint16_t nb_tx;
350
351         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
352         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
353                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
354
355         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
356         nb_tx = 0;
357         while (nb_pkts) {
358                 uint16_t ret, n;
359
360                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
361                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
362                 nb_tx = (uint16_t)(nb_tx + ret);
363                 nb_pkts = (uint16_t)(nb_pkts - ret);
364                 if (ret < n)
365                         break;
366         }
367
368         return nb_tx;
369 }
370
371 #ifdef RTE_IXGBE_INC_VECTOR
372 static uint16_t
373 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
374                     uint16_t nb_pkts)
375 {
376         uint16_t nb_tx = 0;
377         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
378
379         while (nb_pkts) {
380                 uint16_t ret, num;
381
382                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
383                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
384                                                  num);
385                 nb_tx += ret;
386                 nb_pkts -= ret;
387                 if (ret < num)
388                         break;
389         }
390
391         return nb_tx;
392 }
393 #endif
394
395 static inline void
396 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
397                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
398                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
399 {
400         uint32_t type_tucmd_mlhl;
401         uint32_t mss_l4len_idx = 0;
402         uint32_t ctx_idx;
403         uint32_t vlan_macip_lens;
404         union ixgbe_tx_offload tx_offload_mask;
405         uint32_t seqnum_seed = 0;
406
407         ctx_idx = txq->ctx_curr;
408         tx_offload_mask.data[0] = 0;
409         tx_offload_mask.data[1] = 0;
410         type_tucmd_mlhl = 0;
411
412         /* Specify which HW CTX to upload. */
413         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
414
415         if (ol_flags & PKT_TX_VLAN_PKT) {
416                 tx_offload_mask.vlan_tci |= ~0;
417         }
418
419         /* check if TCP segmentation required for this packet */
420         if (ol_flags & PKT_TX_TCP_SEG) {
421                 /* implies IP cksum in IPv4 */
422                 if (ol_flags & PKT_TX_IP_CKSUM)
423                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
424                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
425                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426                 else
427                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
428                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
429                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
430
431                 tx_offload_mask.l2_len |= ~0;
432                 tx_offload_mask.l3_len |= ~0;
433                 tx_offload_mask.l4_len |= ~0;
434                 tx_offload_mask.tso_segsz |= ~0;
435                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
436                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
437         } else { /* no TSO, check if hardware checksum is needed */
438                 if (ol_flags & PKT_TX_IP_CKSUM) {
439                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
440                         tx_offload_mask.l2_len |= ~0;
441                         tx_offload_mask.l3_len |= ~0;
442                 }
443
444                 switch (ol_flags & PKT_TX_L4_MASK) {
445                 case PKT_TX_UDP_CKSUM:
446                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
447                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
448                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
449                         tx_offload_mask.l2_len |= ~0;
450                         tx_offload_mask.l3_len |= ~0;
451                         break;
452                 case PKT_TX_TCP_CKSUM:
453                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
454                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
455                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
456                         tx_offload_mask.l2_len |= ~0;
457                         tx_offload_mask.l3_len |= ~0;
458                         break;
459                 case PKT_TX_SCTP_CKSUM:
460                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
461                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
462                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
463                         tx_offload_mask.l2_len |= ~0;
464                         tx_offload_mask.l3_len |= ~0;
465                         break;
466                 default:
467                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
468                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
469                         break;
470                 }
471         }
472
473         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
474                 tx_offload_mask.outer_l2_len |= ~0;
475                 tx_offload_mask.outer_l3_len |= ~0;
476                 tx_offload_mask.l2_len |= ~0;
477                 seqnum_seed |= tx_offload.outer_l3_len
478                                << IXGBE_ADVTXD_OUTER_IPLEN;
479                 seqnum_seed |= tx_offload.l2_len
480                                << IXGBE_ADVTXD_TUNNEL_LEN;
481         }
482
483         txq->ctx_cache[ctx_idx].flags = ol_flags;
484         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
485                 tx_offload_mask.data[0] & tx_offload.data[0];
486         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
487                 tx_offload_mask.data[1] & tx_offload.data[1];
488         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
489
490         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
491         vlan_macip_lens = tx_offload.l3_len;
492         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
493                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
494                                     IXGBE_ADVTXD_MACLEN_SHIFT);
495         else
496                 vlan_macip_lens |= (tx_offload.l2_len <<
497                                     IXGBE_ADVTXD_MACLEN_SHIFT);
498         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
499         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
500         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
501         ctx_txd->seqnum_seed     = seqnum_seed;
502 }
503
504 /*
505  * Check which hardware context can be used. Use the existing match
506  * or create a new context descriptor.
507  */
508 static inline uint32_t
509 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
510                    union ixgbe_tx_offload tx_offload)
511 {
512         /* If match with the current used context */
513         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
514                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
515                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
516                      & tx_offload.data[0])) &&
517                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
518                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
519                      & tx_offload.data[1]))))
520                 return txq->ctx_curr;
521
522         /* What if match with the next context  */
523         txq->ctx_curr ^= 1;
524         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
525                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
526                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
527                      & tx_offload.data[0])) &&
528                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
529                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
530                      & tx_offload.data[1]))))
531                 return txq->ctx_curr;
532
533         /* Mismatch, use the previous context */
534         return IXGBE_CTX_NUM;
535 }
536
537 static inline uint32_t
538 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
539 {
540         uint32_t tmp = 0;
541
542         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
543                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
544         if (ol_flags & PKT_TX_IP_CKSUM)
545                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
546         if (ol_flags & PKT_TX_TCP_SEG)
547                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
548         return tmp;
549 }
550
551 static inline uint32_t
552 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
553 {
554         uint32_t cmdtype = 0;
555
556         if (ol_flags & PKT_TX_VLAN_PKT)
557                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
558         if (ol_flags & PKT_TX_TCP_SEG)
559                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
560         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
561                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
562         if (ol_flags & PKT_TX_MACSEC)
563                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
564         return cmdtype;
565 }
566
567 /* Default RS bit threshold values */
568 #ifndef DEFAULT_TX_RS_THRESH
569 #define DEFAULT_TX_RS_THRESH   32
570 #endif
571 #ifndef DEFAULT_TX_FREE_THRESH
572 #define DEFAULT_TX_FREE_THRESH 32
573 #endif
574
575 /* Reset transmit descriptors after they have been used */
576 static inline int
577 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
578 {
579         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
580         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
581         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
582         uint16_t nb_tx_desc = txq->nb_tx_desc;
583         uint16_t desc_to_clean_to;
584         uint16_t nb_tx_to_clean;
585         uint32_t status;
586
587         /* Determine the last descriptor needing to be cleaned */
588         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
589         if (desc_to_clean_to >= nb_tx_desc)
590                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
591
592         /* Check to make sure the last descriptor to clean is done */
593         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
594         status = txr[desc_to_clean_to].wb.status;
595         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
596                 PMD_TX_FREE_LOG(DEBUG,
597                                 "TX descriptor %4u is not done"
598                                 "(port=%d queue=%d)",
599                                 desc_to_clean_to,
600                                 txq->port_id, txq->queue_id);
601                 /* Failed to clean any descriptors, better luck next time */
602                 return -(1);
603         }
604
605         /* Figure out how many descriptors will be cleaned */
606         if (last_desc_cleaned > desc_to_clean_to)
607                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
608                                                         desc_to_clean_to);
609         else
610                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
611                                                 last_desc_cleaned);
612
613         PMD_TX_FREE_LOG(DEBUG,
614                         "Cleaning %4u TX descriptors: %4u to %4u "
615                         "(port=%d queue=%d)",
616                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
617                         txq->port_id, txq->queue_id);
618
619         /*
620          * The last descriptor to clean is done, so that means all the
621          * descriptors from the last descriptor that was cleaned
622          * up to the last descriptor with the RS bit set
623          * are done. Only reset the threshold descriptor.
624          */
625         txr[desc_to_clean_to].wb.status = 0;
626
627         /* Update the txq to reflect the last descriptor that was cleaned */
628         txq->last_desc_cleaned = desc_to_clean_to;
629         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
630
631         /* No Error */
632         return 0;
633 }
634
635 uint16_t
636 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
637                 uint16_t nb_pkts)
638 {
639         struct ixgbe_tx_queue *txq;
640         struct ixgbe_tx_entry *sw_ring;
641         struct ixgbe_tx_entry *txe, *txn;
642         volatile union ixgbe_adv_tx_desc *txr;
643         volatile union ixgbe_adv_tx_desc *txd, *txp;
644         struct rte_mbuf     *tx_pkt;
645         struct rte_mbuf     *m_seg;
646         uint64_t buf_dma_addr;
647         uint32_t olinfo_status;
648         uint32_t cmd_type_len;
649         uint32_t pkt_len;
650         uint16_t slen;
651         uint64_t ol_flags;
652         uint16_t tx_id;
653         uint16_t tx_last;
654         uint16_t nb_tx;
655         uint16_t nb_used;
656         uint64_t tx_ol_req;
657         uint32_t ctx = 0;
658         uint32_t new_ctx;
659         union ixgbe_tx_offload tx_offload;
660
661         tx_offload.data[0] = 0;
662         tx_offload.data[1] = 0;
663         txq = tx_queue;
664         sw_ring = txq->sw_ring;
665         txr     = txq->tx_ring;
666         tx_id   = txq->tx_tail;
667         txe = &sw_ring[tx_id];
668         txp = NULL;
669
670         /* Determine if the descriptor ring needs to be cleaned. */
671         if (txq->nb_tx_free < txq->tx_free_thresh)
672                 ixgbe_xmit_cleanup(txq);
673
674         rte_prefetch0(&txe->mbuf->pool);
675
676         /* TX loop */
677         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
678                 new_ctx = 0;
679                 tx_pkt = *tx_pkts++;
680                 pkt_len = tx_pkt->pkt_len;
681
682                 /*
683                  * Determine how many (if any) context descriptors
684                  * are needed for offload functionality.
685                  */
686                 ol_flags = tx_pkt->ol_flags;
687
688                 /* If hardware offload required */
689                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
690                 if (tx_ol_req) {
691                         tx_offload.l2_len = tx_pkt->l2_len;
692                         tx_offload.l3_len = tx_pkt->l3_len;
693                         tx_offload.l4_len = tx_pkt->l4_len;
694                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
695                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
696                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
697                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
698
699                         /* If new context need be built or reuse the exist ctx. */
700                         ctx = what_advctx_update(txq, tx_ol_req,
701                                 tx_offload);
702                         /* Only allocate context descriptor if required*/
703                         new_ctx = (ctx == IXGBE_CTX_NUM);
704                         ctx = txq->ctx_curr;
705                 }
706
707                 /*
708                  * Keep track of how many descriptors are used this loop
709                  * This will always be the number of segments + the number of
710                  * Context descriptors required to transmit the packet
711                  */
712                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
713
714                 if (txp != NULL &&
715                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
716                         /* set RS on the previous packet in the burst */
717                         txp->read.cmd_type_len |=
718                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
719
720                 /*
721                  * The number of descriptors that must be allocated for a
722                  * packet is the number of segments of that packet, plus 1
723                  * Context Descriptor for the hardware offload, if any.
724                  * Determine the last TX descriptor to allocate in the TX ring
725                  * for the packet, starting from the current position (tx_id)
726                  * in the ring.
727                  */
728                 tx_last = (uint16_t) (tx_id + nb_used - 1);
729
730                 /* Circular ring */
731                 if (tx_last >= txq->nb_tx_desc)
732                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
733
734                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
735                            " tx_first=%u tx_last=%u",
736                            (unsigned) txq->port_id,
737                            (unsigned) txq->queue_id,
738                            (unsigned) pkt_len,
739                            (unsigned) tx_id,
740                            (unsigned) tx_last);
741
742                 /*
743                  * Make sure there are enough TX descriptors available to
744                  * transmit the entire packet.
745                  * nb_used better be less than or equal to txq->tx_rs_thresh
746                  */
747                 if (nb_used > txq->nb_tx_free) {
748                         PMD_TX_FREE_LOG(DEBUG,
749                                         "Not enough free TX descriptors "
750                                         "nb_used=%4u nb_free=%4u "
751                                         "(port=%d queue=%d)",
752                                         nb_used, txq->nb_tx_free,
753                                         txq->port_id, txq->queue_id);
754
755                         if (ixgbe_xmit_cleanup(txq) != 0) {
756                                 /* Could not clean any descriptors */
757                                 if (nb_tx == 0)
758                                         return 0;
759                                 goto end_of_tx;
760                         }
761
762                         /* nb_used better be <= txq->tx_rs_thresh */
763                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
764                                 PMD_TX_FREE_LOG(DEBUG,
765                                         "The number of descriptors needed to "
766                                         "transmit the packet exceeds the "
767                                         "RS bit threshold. This will impact "
768                                         "performance."
769                                         "nb_used=%4u nb_free=%4u "
770                                         "tx_rs_thresh=%4u. "
771                                         "(port=%d queue=%d)",
772                                         nb_used, txq->nb_tx_free,
773                                         txq->tx_rs_thresh,
774                                         txq->port_id, txq->queue_id);
775                                 /*
776                                  * Loop here until there are enough TX
777                                  * descriptors or until the ring cannot be
778                                  * cleaned.
779                                  */
780                                 while (nb_used > txq->nb_tx_free) {
781                                         if (ixgbe_xmit_cleanup(txq) != 0) {
782                                                 /*
783                                                  * Could not clean any
784                                                  * descriptors
785                                                  */
786                                                 if (nb_tx == 0)
787                                                         return 0;
788                                                 goto end_of_tx;
789                                         }
790                                 }
791                         }
792                 }
793
794                 /*
795                  * By now there are enough free TX descriptors to transmit
796                  * the packet.
797                  */
798
799                 /*
800                  * Set common flags of all TX Data Descriptors.
801                  *
802                  * The following bits must be set in all Data Descriptors:
803                  *   - IXGBE_ADVTXD_DTYP_DATA
804                  *   - IXGBE_ADVTXD_DCMD_DEXT
805                  *
806                  * The following bits must be set in the first Data Descriptor
807                  * and are ignored in the other ones:
808                  *   - IXGBE_ADVTXD_DCMD_IFCS
809                  *   - IXGBE_ADVTXD_MAC_1588
810                  *   - IXGBE_ADVTXD_DCMD_VLE
811                  *
812                  * The following bits must only be set in the last Data
813                  * Descriptor:
814                  *   - IXGBE_TXD_CMD_EOP
815                  *
816                  * The following bits can be set in any Data Descriptor, but
817                  * are only set in the last Data Descriptor:
818                  *   - IXGBE_TXD_CMD_RS
819                  */
820                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
821                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
822
823 #ifdef RTE_LIBRTE_IEEE1588
824                 if (ol_flags & PKT_TX_IEEE1588_TMST)
825                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
826 #endif
827
828                 olinfo_status = 0;
829                 if (tx_ol_req) {
830
831                         if (ol_flags & PKT_TX_TCP_SEG) {
832                                 /* when TSO is on, paylen in descriptor is the
833                                  * not the packet len but the tcp payload len */
834                                 pkt_len -= (tx_offload.l2_len +
835                                         tx_offload.l3_len + tx_offload.l4_len);
836                         }
837
838                         /*
839                          * Setup the TX Advanced Context Descriptor if required
840                          */
841                         if (new_ctx) {
842                                 volatile struct ixgbe_adv_tx_context_desc *
843                                     ctx_txd;
844
845                                 ctx_txd = (volatile struct
846                                     ixgbe_adv_tx_context_desc *)
847                                     &txr[tx_id];
848
849                                 txn = &sw_ring[txe->next_id];
850                                 rte_prefetch0(&txn->mbuf->pool);
851
852                                 if (txe->mbuf != NULL) {
853                                         rte_pktmbuf_free_seg(txe->mbuf);
854                                         txe->mbuf = NULL;
855                                 }
856
857                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
858                                         tx_offload);
859
860                                 txe->last_id = tx_last;
861                                 tx_id = txe->next_id;
862                                 txe = txn;
863                         }
864
865                         /*
866                          * Setup the TX Advanced Data Descriptor,
867                          * This path will go through
868                          * whatever new/reuse the context descriptor
869                          */
870                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
871                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
872                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
873                 }
874
875                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
876
877                 m_seg = tx_pkt;
878                 do {
879                         txd = &txr[tx_id];
880                         txn = &sw_ring[txe->next_id];
881                         rte_prefetch0(&txn->mbuf->pool);
882
883                         if (txe->mbuf != NULL)
884                                 rte_pktmbuf_free_seg(txe->mbuf);
885                         txe->mbuf = m_seg;
886
887                         /*
888                          * Set up Transmit Data Descriptor.
889                          */
890                         slen = m_seg->data_len;
891                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
892                         txd->read.buffer_addr =
893                                 rte_cpu_to_le_64(buf_dma_addr);
894                         txd->read.cmd_type_len =
895                                 rte_cpu_to_le_32(cmd_type_len | slen);
896                         txd->read.olinfo_status =
897                                 rte_cpu_to_le_32(olinfo_status);
898                         txe->last_id = tx_last;
899                         tx_id = txe->next_id;
900                         txe = txn;
901                         m_seg = m_seg->next;
902                 } while (m_seg != NULL);
903
904                 /*
905                  * The last packet data descriptor needs End Of Packet (EOP)
906                  */
907                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
908                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
909                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
910
911                 /* Set RS bit only on threshold packets' last descriptor */
912                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
913                         PMD_TX_FREE_LOG(DEBUG,
914                                         "Setting RS bit on TXD id="
915                                         "%4u (port=%d queue=%d)",
916                                         tx_last, txq->port_id, txq->queue_id);
917
918                         cmd_type_len |= IXGBE_TXD_CMD_RS;
919
920                         /* Update txq RS bit counters */
921                         txq->nb_tx_used = 0;
922                         txp = NULL;
923                 } else
924                         txp = txd;
925
926                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
927         }
928
929 end_of_tx:
930         /* set RS on last packet in the burst */
931         if (txp != NULL)
932                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
933
934         rte_wmb();
935
936         /*
937          * Set the Transmit Descriptor Tail (TDT)
938          */
939         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
940                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
941                    (unsigned) tx_id, (unsigned) nb_tx);
942         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
943         txq->tx_tail = tx_id;
944
945         return nb_tx;
946 }
947
948 /*********************************************************************
949  *
950  *  TX prep functions
951  *
952  **********************************************************************/
953 uint16_t
954 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
955 {
956         int i, ret;
957         uint64_t ol_flags;
958         struct rte_mbuf *m;
959         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
960
961         for (i = 0; i < nb_pkts; i++) {
962                 m = tx_pkts[i];
963                 ol_flags = m->ol_flags;
964
965                 /**
966                  * Check if packet meets requirements for number of segments
967                  *
968                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
969                  *       non-TSO
970                  */
971
972                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
973                         rte_errno = -EINVAL;
974                         return i;
975                 }
976
977                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
978                         rte_errno = -ENOTSUP;
979                         return i;
980                 }
981
982 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
983                 ret = rte_validate_tx_offload(m);
984                 if (ret != 0) {
985                         rte_errno = ret;
986                         return i;
987                 }
988 #endif
989                 ret = rte_net_intel_cksum_prepare(m);
990                 if (ret != 0) {
991                         rte_errno = ret;
992                         return i;
993                 }
994         }
995
996         return i;
997 }
998
999 /*********************************************************************
1000  *
1001  *  RX functions
1002  *
1003  **********************************************************************/
1004
1005 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1006 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1007 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1008 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1009 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1010 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1011 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1012 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1013 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1014 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1015 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1016 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1017 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1018 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1019 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1020 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1021 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1022 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1023 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1024 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1025 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1027 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1028 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1029 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1030 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1031 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1032 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1033 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1034 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1035 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1036 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1037 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1038
1039 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1040 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1041 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1042 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1043 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1044 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1045 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1046 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1047 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1048 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1049 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1050 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1051 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1052 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1053 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1054 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1055 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1062
1063 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1064 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1065 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1066 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1067 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1068 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1069 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1070 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1071 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1072 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1073 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1074 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1075 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1076 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1077 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1078 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1079 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1086
1087 #define IXGBE_PACKET_TYPE_MAX               0X80
1088 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
1089 #define IXGBE_PACKET_TYPE_SHIFT             0X04
1090
1091 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1092 static inline uint32_t
1093 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1094 {
1095         /**
1096          * Use 2 different table for normal packet and tunnel packet
1097          * to save the space.
1098          */
1099         static const uint32_t
1100                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1101                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1102                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1103                         RTE_PTYPE_L3_IPV4,
1104                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1105                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1106                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1107                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1108                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1109                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1110                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1111                         RTE_PTYPE_L3_IPV4_EXT,
1112                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1113                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1114                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1115                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1116                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1117                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1118                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1119                         RTE_PTYPE_L3_IPV6,
1120                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1121                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1122                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1123                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1124                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1125                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1126                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV6_EXT,
1128                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1129                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1130                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1131                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1132                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1133                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1134                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1135                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1136                         RTE_PTYPE_INNER_L3_IPV6,
1137                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1138                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1139                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1140                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1141                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1142                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1143                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1144                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1145                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1146                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1147                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1148                         RTE_PTYPE_INNER_L3_IPV6,
1149                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1150                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1151                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1152                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1153                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1154                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1155                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1156                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1157                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1158                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1159                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1160                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1161                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1162                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1163                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1164                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1165                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1166                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1167                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1168                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1169                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1170                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1171                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1172                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1173                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1174                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1175                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1176                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1177                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1178                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1179                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1180                         RTE_PTYPE_L2_ETHER |
1181                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1182                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1183         };
1184
1185         static const uint32_t
1186                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1187                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1188                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1189                         RTE_PTYPE_INNER_L2_ETHER,
1190                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1191                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1192                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1193                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1194                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1195                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1196                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1197                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1198                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1199                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1200                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1201                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1202                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1203                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1204                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1205                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1206                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1207                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1208                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1209                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1210                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1211                         RTE_PTYPE_INNER_L4_TCP,
1212                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1213                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1214                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1215                         RTE_PTYPE_INNER_L4_TCP,
1216                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1217                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1218                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1219                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1220                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1221                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1222                         RTE_PTYPE_INNER_L4_TCP,
1223                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1224                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1225                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1226                         RTE_PTYPE_INNER_L3_IPV4,
1227                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1228                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1229                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1230                         RTE_PTYPE_INNER_L4_UDP,
1231                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1232                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1233                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1234                         RTE_PTYPE_INNER_L4_UDP,
1235                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1236                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1237                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1238                         RTE_PTYPE_INNER_L4_SCTP,
1239                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1240                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1241                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1242                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1243                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1244                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1245                         RTE_PTYPE_INNER_L4_UDP,
1246                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1247                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1248                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1249                         RTE_PTYPE_INNER_L4_SCTP,
1250                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1251                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1252                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1253                         RTE_PTYPE_INNER_L3_IPV4,
1254                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1255                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1256                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1257                         RTE_PTYPE_INNER_L4_SCTP,
1258                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1259                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1260                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1261                         RTE_PTYPE_INNER_L4_SCTP,
1262                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1263                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1264                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1265                         RTE_PTYPE_INNER_L4_TCP,
1266                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1267                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1268                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1269                         RTE_PTYPE_INNER_L4_UDP,
1270
1271                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1272                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1273                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1274                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1275                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1276                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1277                         RTE_PTYPE_INNER_L3_IPV4,
1278                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1279                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1280                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1281                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1282                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1283                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1284                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1285                         RTE_PTYPE_INNER_L3_IPV6,
1286                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1287                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1288                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1289                         RTE_PTYPE_INNER_L3_IPV4,
1290                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1291                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1292                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1293                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1294                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1295                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1296                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1297                         RTE_PTYPE_INNER_L3_IPV4,
1298                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1299                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1300                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1301                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1302                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1303                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1304                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1305                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1306                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1307                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1308                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1309                         RTE_PTYPE_INNER_L3_IPV4,
1310                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1311                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1312                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1313                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1314                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1315                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1316                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1317                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1318                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1319                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1320                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1321                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1322                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1323                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1324                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1325                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1326                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1327                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1328                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1329                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1330                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1331                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1332                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1333                         RTE_PTYPE_INNER_L3_IPV4,
1334                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1335                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1336                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1337                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1338                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1339                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1340                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1341                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1342                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1343                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1344                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1345                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1346                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1347                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1348                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1349                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1350                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1351                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1352                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1353                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1354                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1355                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1356                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1357                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1358                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1359                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1360                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1361                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1362         };
1363
1364         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1365                 return RTE_PTYPE_UNKNOWN;
1366
1367         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1368
1369         /* For tunnel packet */
1370         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1371                 /* Remove the tunnel bit to save the space. */
1372                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1373                 return ptype_table_tn[pkt_info];
1374         }
1375
1376         /**
1377          * For x550, if it's not tunnel,
1378          * tunnel type bit should be set to 0.
1379          * Reuse 82599's mask.
1380          */
1381         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1382
1383         return ptype_table[pkt_info];
1384 }
1385
1386 static inline uint64_t
1387 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1388 {
1389         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1390                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1391                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1392                 PKT_RX_RSS_HASH, 0, 0, 0,
1393                 0, 0, 0,  PKT_RX_FDIR,
1394         };
1395 #ifdef RTE_LIBRTE_IEEE1588
1396         static uint64_t ip_pkt_etqf_map[8] = {
1397                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1398                 0, 0, 0, 0,
1399         };
1400
1401         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1402                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1403                                 ip_rss_types_map[pkt_info & 0XF];
1404         else
1405                 return ip_rss_types_map[pkt_info & 0XF];
1406 #else
1407         return ip_rss_types_map[pkt_info & 0XF];
1408 #endif
1409 }
1410
1411 static inline uint64_t
1412 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1413 {
1414         uint64_t pkt_flags;
1415
1416         /*
1417          * Check if VLAN present only.
1418          * Do not check whether L3/L4 rx checksum done by NIC or not,
1419          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1420          */
1421         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1422
1423 #ifdef RTE_LIBRTE_IEEE1588
1424         if (rx_status & IXGBE_RXD_STAT_TMST)
1425                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1426 #endif
1427         return pkt_flags;
1428 }
1429
1430 static inline uint64_t
1431 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1432 {
1433         uint64_t pkt_flags;
1434
1435         /*
1436          * Bit 31: IPE, IPv4 checksum error
1437          * Bit 30: L4I, L4I integrity error
1438          */
1439         static uint64_t error_to_pkt_flags_map[4] = {
1440                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1441                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1442                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1443                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1444         };
1445         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1446                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1447
1448         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1449             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1450                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1451         }
1452
1453         return pkt_flags;
1454 }
1455
1456 /*
1457  * LOOK_AHEAD defines how many desc statuses to check beyond the
1458  * current descriptor.
1459  * It must be a pound define for optimal performance.
1460  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1461  * function only works with LOOK_AHEAD=8.
1462  */
1463 #define LOOK_AHEAD 8
1464 #if (LOOK_AHEAD != 8)
1465 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1466 #endif
1467 static inline int
1468 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1469 {
1470         volatile union ixgbe_adv_rx_desc *rxdp;
1471         struct ixgbe_rx_entry *rxep;
1472         struct rte_mbuf *mb;
1473         uint16_t pkt_len;
1474         uint64_t pkt_flags;
1475         int nb_dd;
1476         uint32_t s[LOOK_AHEAD];
1477         uint32_t pkt_info[LOOK_AHEAD];
1478         int i, j, nb_rx = 0;
1479         uint32_t status;
1480         uint64_t vlan_flags = rxq->vlan_flags;
1481
1482         /* get references to current descriptor and S/W ring entry */
1483         rxdp = &rxq->rx_ring[rxq->rx_tail];
1484         rxep = &rxq->sw_ring[rxq->rx_tail];
1485
1486         status = rxdp->wb.upper.status_error;
1487         /* check to make sure there is at least 1 packet to receive */
1488         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1489                 return 0;
1490
1491         /*
1492          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1493          * reference packets that are ready to be received.
1494          */
1495         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1496              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1497                 /* Read desc statuses backwards to avoid race condition */
1498                 for (j = 0; j < LOOK_AHEAD; j++)
1499                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1500
1501                 rte_smp_rmb();
1502
1503                 /* Compute how many status bits were set */
1504                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1505                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1506                         ;
1507
1508                 for (j = 0; j < nb_dd; j++)
1509                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1510                                                        lo_dword.data);
1511
1512                 nb_rx += nb_dd;
1513
1514                 /* Translate descriptor info to mbuf format */
1515                 for (j = 0; j < nb_dd; ++j) {
1516                         mb = rxep[j].mbuf;
1517                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1518                                   rxq->crc_len;
1519                         mb->data_len = pkt_len;
1520                         mb->pkt_len = pkt_len;
1521                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1522
1523                         /* convert descriptor fields to rte mbuf flags */
1524                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1525                                 vlan_flags);
1526                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1527                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1528                                         ((uint16_t)pkt_info[j]);
1529                         mb->ol_flags = pkt_flags;
1530                         mb->packet_type =
1531                                 ixgbe_rxd_pkt_info_to_pkt_type
1532                                         (pkt_info[j], rxq->pkt_type_mask);
1533
1534                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1535                                 mb->hash.rss = rte_le_to_cpu_32(
1536                                     rxdp[j].wb.lower.hi_dword.rss);
1537                         else if (pkt_flags & PKT_RX_FDIR) {
1538                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1539                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1540                                     IXGBE_ATR_HASH_MASK;
1541                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1542                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1543                         }
1544                 }
1545
1546                 /* Move mbuf pointers from the S/W ring to the stage */
1547                 for (j = 0; j < LOOK_AHEAD; ++j) {
1548                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1549                 }
1550
1551                 /* stop if all requested packets could not be received */
1552                 if (nb_dd != LOOK_AHEAD)
1553                         break;
1554         }
1555
1556         /* clear software ring entries so we can cleanup correctly */
1557         for (i = 0; i < nb_rx; ++i) {
1558                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1559         }
1560
1561
1562         return nb_rx;
1563 }
1564
1565 static inline int
1566 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1567 {
1568         volatile union ixgbe_adv_rx_desc *rxdp;
1569         struct ixgbe_rx_entry *rxep;
1570         struct rte_mbuf *mb;
1571         uint16_t alloc_idx;
1572         __le64 dma_addr;
1573         int diag, i;
1574
1575         /* allocate buffers in bulk directly into the S/W ring */
1576         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1577         rxep = &rxq->sw_ring[alloc_idx];
1578         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1579                                     rxq->rx_free_thresh);
1580         if (unlikely(diag != 0))
1581                 return -ENOMEM;
1582
1583         rxdp = &rxq->rx_ring[alloc_idx];
1584         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1585                 /* populate the static rte mbuf fields */
1586                 mb = rxep[i].mbuf;
1587                 if (reset_mbuf) {
1588                         mb->next = NULL;
1589                         mb->nb_segs = 1;
1590                         mb->port = rxq->port_id;
1591                 }
1592
1593                 rte_mbuf_refcnt_set(mb, 1);
1594                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1595
1596                 /* populate the descriptors */
1597                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1598                 rxdp[i].read.hdr_addr = 0;
1599                 rxdp[i].read.pkt_addr = dma_addr;
1600         }
1601
1602         /* update state of internal queue structure */
1603         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1604         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1605                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1606
1607         /* no errors */
1608         return 0;
1609 }
1610
1611 static inline uint16_t
1612 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1613                          uint16_t nb_pkts)
1614 {
1615         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1616         int i;
1617
1618         /* how many packets are ready to return? */
1619         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1620
1621         /* copy mbuf pointers to the application's packet list */
1622         for (i = 0; i < nb_pkts; ++i)
1623                 rx_pkts[i] = stage[i];
1624
1625         /* update internal queue state */
1626         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1627         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1628
1629         return nb_pkts;
1630 }
1631
1632 static inline uint16_t
1633 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1634              uint16_t nb_pkts)
1635 {
1636         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1637         uint16_t nb_rx = 0;
1638
1639         /* Any previously recv'd pkts will be returned from the Rx stage */
1640         if (rxq->rx_nb_avail)
1641                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1642
1643         /* Scan the H/W ring for packets to receive */
1644         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1645
1646         /* update internal queue state */
1647         rxq->rx_next_avail = 0;
1648         rxq->rx_nb_avail = nb_rx;
1649         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1650
1651         /* if required, allocate new buffers to replenish descriptors */
1652         if (rxq->rx_tail > rxq->rx_free_trigger) {
1653                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1654
1655                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1656                         int i, j;
1657
1658                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1659                                    "queue_id=%u", (unsigned) rxq->port_id,
1660                                    (unsigned) rxq->queue_id);
1661
1662                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1663                                 rxq->rx_free_thresh;
1664
1665                         /*
1666                          * Need to rewind any previous receives if we cannot
1667                          * allocate new buffers to replenish the old ones.
1668                          */
1669                         rxq->rx_nb_avail = 0;
1670                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1671                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1672                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1673
1674                         return 0;
1675                 }
1676
1677                 /* update tail pointer */
1678                 rte_wmb();
1679                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1680                                             cur_free_trigger);
1681         }
1682
1683         if (rxq->rx_tail >= rxq->nb_rx_desc)
1684                 rxq->rx_tail = 0;
1685
1686         /* received any packets this loop? */
1687         if (rxq->rx_nb_avail)
1688                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1689
1690         return 0;
1691 }
1692
1693 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1694 uint16_t
1695 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1696                            uint16_t nb_pkts)
1697 {
1698         uint16_t nb_rx;
1699
1700         if (unlikely(nb_pkts == 0))
1701                 return 0;
1702
1703         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1704                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1705
1706         /* request is relatively large, chunk it up */
1707         nb_rx = 0;
1708         while (nb_pkts) {
1709                 uint16_t ret, n;
1710
1711                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1712                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1713                 nb_rx = (uint16_t)(nb_rx + ret);
1714                 nb_pkts = (uint16_t)(nb_pkts - ret);
1715                 if (ret < n)
1716                         break;
1717         }
1718
1719         return nb_rx;
1720 }
1721
1722 uint16_t
1723 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1724                 uint16_t nb_pkts)
1725 {
1726         struct ixgbe_rx_queue *rxq;
1727         volatile union ixgbe_adv_rx_desc *rx_ring;
1728         volatile union ixgbe_adv_rx_desc *rxdp;
1729         struct ixgbe_rx_entry *sw_ring;
1730         struct ixgbe_rx_entry *rxe;
1731         struct rte_mbuf *rxm;
1732         struct rte_mbuf *nmb;
1733         union ixgbe_adv_rx_desc rxd;
1734         uint64_t dma_addr;
1735         uint32_t staterr;
1736         uint32_t pkt_info;
1737         uint16_t pkt_len;
1738         uint16_t rx_id;
1739         uint16_t nb_rx;
1740         uint16_t nb_hold;
1741         uint64_t pkt_flags;
1742         uint64_t vlan_flags;
1743
1744         nb_rx = 0;
1745         nb_hold = 0;
1746         rxq = rx_queue;
1747         rx_id = rxq->rx_tail;
1748         rx_ring = rxq->rx_ring;
1749         sw_ring = rxq->sw_ring;
1750         vlan_flags = rxq->vlan_flags;
1751         while (nb_rx < nb_pkts) {
1752                 /*
1753                  * The order of operations here is important as the DD status
1754                  * bit must not be read after any other descriptor fields.
1755                  * rx_ring and rxdp are pointing to volatile data so the order
1756                  * of accesses cannot be reordered by the compiler. If they were
1757                  * not volatile, they could be reordered which could lead to
1758                  * using invalid descriptor fields when read from rxd.
1759                  */
1760                 rxdp = &rx_ring[rx_id];
1761                 staterr = rxdp->wb.upper.status_error;
1762                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1763                         break;
1764                 rxd = *rxdp;
1765
1766                 /*
1767                  * End of packet.
1768                  *
1769                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1770                  * is likely to be invalid and to be dropped by the various
1771                  * validation checks performed by the network stack.
1772                  *
1773                  * Allocate a new mbuf to replenish the RX ring descriptor.
1774                  * If the allocation fails:
1775                  *    - arrange for that RX descriptor to be the first one
1776                  *      being parsed the next time the receive function is
1777                  *      invoked [on the same queue].
1778                  *
1779                  *    - Stop parsing the RX ring and return immediately.
1780                  *
1781                  * This policy do not drop the packet received in the RX
1782                  * descriptor for which the allocation of a new mbuf failed.
1783                  * Thus, it allows that packet to be later retrieved if
1784                  * mbuf have been freed in the mean time.
1785                  * As a side effect, holding RX descriptors instead of
1786                  * systematically giving them back to the NIC may lead to
1787                  * RX ring exhaustion situations.
1788                  * However, the NIC can gracefully prevent such situations
1789                  * to happen by sending specific "back-pressure" flow control
1790                  * frames to its peer(s).
1791                  */
1792                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1793                            "ext_err_stat=0x%08x pkt_len=%u",
1794                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1795                            (unsigned) rx_id, (unsigned) staterr,
1796                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1797
1798                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1799                 if (nmb == NULL) {
1800                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1801                                    "queue_id=%u", (unsigned) rxq->port_id,
1802                                    (unsigned) rxq->queue_id);
1803                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1804                         break;
1805                 }
1806
1807                 nb_hold++;
1808                 rxe = &sw_ring[rx_id];
1809                 rx_id++;
1810                 if (rx_id == rxq->nb_rx_desc)
1811                         rx_id = 0;
1812
1813                 /* Prefetch next mbuf while processing current one. */
1814                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1815
1816                 /*
1817                  * When next RX descriptor is on a cache-line boundary,
1818                  * prefetch the next 4 RX descriptors and the next 8 pointers
1819                  * to mbufs.
1820                  */
1821                 if ((rx_id & 0x3) == 0) {
1822                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1823                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1824                 }
1825
1826                 rxm = rxe->mbuf;
1827                 rxe->mbuf = nmb;
1828                 dma_addr =
1829                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1830                 rxdp->read.hdr_addr = 0;
1831                 rxdp->read.pkt_addr = dma_addr;
1832
1833                 /*
1834                  * Initialize the returned mbuf.
1835                  * 1) setup generic mbuf fields:
1836                  *    - number of segments,
1837                  *    - next segment,
1838                  *    - packet length,
1839                  *    - RX port identifier.
1840                  * 2) integrate hardware offload data, if any:
1841                  *    - RSS flag & hash,
1842                  *    - IP checksum flag,
1843                  *    - VLAN TCI, if any,
1844                  *    - error flags.
1845                  */
1846                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1847                                       rxq->crc_len);
1848                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1849                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1850                 rxm->nb_segs = 1;
1851                 rxm->next = NULL;
1852                 rxm->pkt_len = pkt_len;
1853                 rxm->data_len = pkt_len;
1854                 rxm->port = rxq->port_id;
1855
1856                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1857                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1858                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1859
1860                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1861                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1862                 pkt_flags = pkt_flags |
1863                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1864                 rxm->ol_flags = pkt_flags;
1865                 rxm->packet_type =
1866                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1867                                                        rxq->pkt_type_mask);
1868
1869                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1870                         rxm->hash.rss = rte_le_to_cpu_32(
1871                                                 rxd.wb.lower.hi_dword.rss);
1872                 else if (pkt_flags & PKT_RX_FDIR) {
1873                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1874                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1875                                         IXGBE_ATR_HASH_MASK;
1876                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1877                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1878                 }
1879                 /*
1880                  * Store the mbuf address into the next entry of the array
1881                  * of returned packets.
1882                  */
1883                 rx_pkts[nb_rx++] = rxm;
1884         }
1885         rxq->rx_tail = rx_id;
1886
1887         /*
1888          * If the number of free RX descriptors is greater than the RX free
1889          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1890          * register.
1891          * Update the RDT with the value of the last processed RX descriptor
1892          * minus 1, to guarantee that the RDT register is never equal to the
1893          * RDH register, which creates a "full" ring situtation from the
1894          * hardware point of view...
1895          */
1896         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1897         if (nb_hold > rxq->rx_free_thresh) {
1898                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1899                            "nb_hold=%u nb_rx=%u",
1900                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1901                            (unsigned) rx_id, (unsigned) nb_hold,
1902                            (unsigned) nb_rx);
1903                 rx_id = (uint16_t) ((rx_id == 0) ?
1904                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1905                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1906                 nb_hold = 0;
1907         }
1908         rxq->nb_rx_hold = nb_hold;
1909         return nb_rx;
1910 }
1911
1912 /**
1913  * Detect an RSC descriptor.
1914  */
1915 static inline uint32_t
1916 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1917 {
1918         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1919                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1920 }
1921
1922 /**
1923  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1924  *
1925  * Fill the following info in the HEAD buffer of the Rx cluster:
1926  *    - RX port identifier
1927  *    - hardware offload data, if any:
1928  *      - RSS flag & hash
1929  *      - IP checksum flag
1930  *      - VLAN TCI, if any
1931  *      - error flags
1932  * @head HEAD of the packet cluster
1933  * @desc HW descriptor to get data from
1934  * @rxq Pointer to the Rx queue
1935  */
1936 static inline void
1937 ixgbe_fill_cluster_head_buf(
1938         struct rte_mbuf *head,
1939         union ixgbe_adv_rx_desc *desc,
1940         struct ixgbe_rx_queue *rxq,
1941         uint32_t staterr)
1942 {
1943         uint32_t pkt_info;
1944         uint64_t pkt_flags;
1945
1946         head->port = rxq->port_id;
1947
1948         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1949          * set in the pkt_flags field.
1950          */
1951         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1952         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1953         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1954         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1955         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1956         head->ol_flags = pkt_flags;
1957         head->packet_type =
1958                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1959
1960         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1961                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1962         else if (pkt_flags & PKT_RX_FDIR) {
1963                 head->hash.fdir.hash =
1964                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1965                                                           & IXGBE_ATR_HASH_MASK;
1966                 head->hash.fdir.id =
1967                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1968         }
1969 }
1970
1971 /**
1972  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1973  *
1974  * @rx_queue Rx queue handle
1975  * @rx_pkts table of received packets
1976  * @nb_pkts size of rx_pkts table
1977  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1978  *
1979  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1980  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1981  *
1982  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1983  * 1) When non-EOP RSC completion arrives:
1984  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1985  *       segment's data length.
1986  *    b) Set the "next" pointer of the current segment to point to the segment
1987  *       at the NEXTP index.
1988  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1989  *       in the sw_rsc_ring.
1990  * 2) When EOP arrives we just update the cluster's total length and offload
1991  *    flags and deliver the cluster up to the upper layers. In our case - put it
1992  *    in the rx_pkts table.
1993  *
1994  * Returns the number of received packets/clusters (according to the "bulk
1995  * receive" interface).
1996  */
1997 static inline uint16_t
1998 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1999                     bool bulk_alloc)
2000 {
2001         struct ixgbe_rx_queue *rxq = rx_queue;
2002         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2003         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2004         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2005         uint16_t rx_id = rxq->rx_tail;
2006         uint16_t nb_rx = 0;
2007         uint16_t nb_hold = rxq->nb_rx_hold;
2008         uint16_t prev_id = rxq->rx_tail;
2009
2010         while (nb_rx < nb_pkts) {
2011                 bool eop;
2012                 struct ixgbe_rx_entry *rxe;
2013                 struct ixgbe_scattered_rx_entry *sc_entry;
2014                 struct ixgbe_scattered_rx_entry *next_sc_entry;
2015                 struct ixgbe_rx_entry *next_rxe = NULL;
2016                 struct rte_mbuf *first_seg;
2017                 struct rte_mbuf *rxm;
2018                 struct rte_mbuf *nmb;
2019                 union ixgbe_adv_rx_desc rxd;
2020                 uint16_t data_len;
2021                 uint16_t next_id;
2022                 volatile union ixgbe_adv_rx_desc *rxdp;
2023                 uint32_t staterr;
2024
2025 next_desc:
2026                 /*
2027                  * The code in this whole file uses the volatile pointer to
2028                  * ensure the read ordering of the status and the rest of the
2029                  * descriptor fields (on the compiler level only!!!). This is so
2030                  * UGLY - why not to just use the compiler barrier instead? DPDK
2031                  * even has the rte_compiler_barrier() for that.
2032                  *
2033                  * But most importantly this is just wrong because this doesn't
2034                  * ensure memory ordering in a general case at all. For
2035                  * instance, DPDK is supposed to work on Power CPUs where
2036                  * compiler barrier may just not be enough!
2037                  *
2038                  * I tried to write only this function properly to have a
2039                  * starting point (as a part of an LRO/RSC series) but the
2040                  * compiler cursed at me when I tried to cast away the
2041                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2042                  * keeping it the way it is for now.
2043                  *
2044                  * The code in this file is broken in so many other places and
2045                  * will just not work on a big endian CPU anyway therefore the
2046                  * lines below will have to be revisited together with the rest
2047                  * of the ixgbe PMD.
2048                  *
2049                  * TODO:
2050                  *    - Get rid of "volatile" crap and let the compiler do its
2051                  *      job.
2052                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2053                  *      memory ordering below.
2054                  */
2055                 rxdp = &rx_ring[rx_id];
2056                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2057
2058                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2059                         break;
2060
2061                 rxd = *rxdp;
2062
2063                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2064                                   "staterr=0x%x data_len=%u",
2065                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2066                            rte_le_to_cpu_16(rxd.wb.upper.length));
2067
2068                 if (!bulk_alloc) {
2069                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2070                         if (nmb == NULL) {
2071                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2072                                                   "port_id=%u queue_id=%u",
2073                                            rxq->port_id, rxq->queue_id);
2074
2075                                 rte_eth_devices[rxq->port_id].data->
2076                                                         rx_mbuf_alloc_failed++;
2077                                 break;
2078                         }
2079                 } else if (nb_hold > rxq->rx_free_thresh) {
2080                         uint16_t next_rdt = rxq->rx_free_trigger;
2081
2082                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2083                                 rte_wmb();
2084                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2085                                                             next_rdt);
2086                                 nb_hold -= rxq->rx_free_thresh;
2087                         } else {
2088                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2089                                                   "port_id=%u queue_id=%u",
2090                                            rxq->port_id, rxq->queue_id);
2091
2092                                 rte_eth_devices[rxq->port_id].data->
2093                                                         rx_mbuf_alloc_failed++;
2094                                 break;
2095                         }
2096                 }
2097
2098                 nb_hold++;
2099                 rxe = &sw_ring[rx_id];
2100                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2101
2102                 next_id = rx_id + 1;
2103                 if (next_id == rxq->nb_rx_desc)
2104                         next_id = 0;
2105
2106                 /* Prefetch next mbuf while processing current one. */
2107                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2108
2109                 /*
2110                  * When next RX descriptor is on a cache-line boundary,
2111                  * prefetch the next 4 RX descriptors and the next 4 pointers
2112                  * to mbufs.
2113                  */
2114                 if ((next_id & 0x3) == 0) {
2115                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2116                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2117                 }
2118
2119                 rxm = rxe->mbuf;
2120
2121                 if (!bulk_alloc) {
2122                         __le64 dma =
2123                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2124                         /*
2125                          * Update RX descriptor with the physical address of the
2126                          * new data buffer of the new allocated mbuf.
2127                          */
2128                         rxe->mbuf = nmb;
2129
2130                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2131                         rxdp->read.hdr_addr = 0;
2132                         rxdp->read.pkt_addr = dma;
2133                 } else
2134                         rxe->mbuf = NULL;
2135
2136                 /*
2137                  * Set data length & data buffer address of mbuf.
2138                  */
2139                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2140                 rxm->data_len = data_len;
2141
2142                 if (!eop) {
2143                         uint16_t nextp_id;
2144                         /*
2145                          * Get next descriptor index:
2146                          *  - For RSC it's in the NEXTP field.
2147                          *  - For a scattered packet - it's just a following
2148                          *    descriptor.
2149                          */
2150                         if (ixgbe_rsc_count(&rxd))
2151                                 nextp_id =
2152                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2153                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2154                         else
2155                                 nextp_id = next_id;
2156
2157                         next_sc_entry = &sw_sc_ring[nextp_id];
2158                         next_rxe = &sw_ring[nextp_id];
2159                         rte_ixgbe_prefetch(next_rxe);
2160                 }
2161
2162                 sc_entry = &sw_sc_ring[rx_id];
2163                 first_seg = sc_entry->fbuf;
2164                 sc_entry->fbuf = NULL;
2165
2166                 /*
2167                  * If this is the first buffer of the received packet,
2168                  * set the pointer to the first mbuf of the packet and
2169                  * initialize its context.
2170                  * Otherwise, update the total length and the number of segments
2171                  * of the current scattered packet, and update the pointer to
2172                  * the last mbuf of the current packet.
2173                  */
2174                 if (first_seg == NULL) {
2175                         first_seg = rxm;
2176                         first_seg->pkt_len = data_len;
2177                         first_seg->nb_segs = 1;
2178                 } else {
2179                         first_seg->pkt_len += data_len;
2180                         first_seg->nb_segs++;
2181                 }
2182
2183                 prev_id = rx_id;
2184                 rx_id = next_id;
2185
2186                 /*
2187                  * If this is not the last buffer of the received packet, update
2188                  * the pointer to the first mbuf at the NEXTP entry in the
2189                  * sw_sc_ring and continue to parse the RX ring.
2190                  */
2191                 if (!eop && next_rxe) {
2192                         rxm->next = next_rxe->mbuf;
2193                         next_sc_entry->fbuf = first_seg;
2194                         goto next_desc;
2195                 }
2196
2197                 /*
2198                  * This is the last buffer of the received packet - return
2199                  * the current cluster to the user.
2200                  */
2201                 rxm->next = NULL;
2202
2203                 /* Initialize the first mbuf of the returned packet */
2204                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2205
2206                 /*
2207                  * Deal with the case, when HW CRC srip is disabled.
2208                  * That can't happen when LRO is enabled, but still could
2209                  * happen for scattered RX mode.
2210                  */
2211                 first_seg->pkt_len -= rxq->crc_len;
2212                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2213                         struct rte_mbuf *lp;
2214
2215                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2216                                 ;
2217
2218                         first_seg->nb_segs--;
2219                         lp->data_len -= rxq->crc_len - rxm->data_len;
2220                         lp->next = NULL;
2221                         rte_pktmbuf_free_seg(rxm);
2222                 } else
2223                         rxm->data_len -= rxq->crc_len;
2224
2225                 /* Prefetch data of first segment, if configured to do so. */
2226                 rte_packet_prefetch((char *)first_seg->buf_addr +
2227                         first_seg->data_off);
2228
2229                 /*
2230                  * Store the mbuf address into the next entry of the array
2231                  * of returned packets.
2232                  */
2233                 rx_pkts[nb_rx++] = first_seg;
2234         }
2235
2236         /*
2237          * Record index of the next RX descriptor to probe.
2238          */
2239         rxq->rx_tail = rx_id;
2240
2241         /*
2242          * If the number of free RX descriptors is greater than the RX free
2243          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2244          * register.
2245          * Update the RDT with the value of the last processed RX descriptor
2246          * minus 1, to guarantee that the RDT register is never equal to the
2247          * RDH register, which creates a "full" ring situtation from the
2248          * hardware point of view...
2249          */
2250         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2251                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2252                            "nb_hold=%u nb_rx=%u",
2253                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2254
2255                 rte_wmb();
2256                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2257                 nb_hold = 0;
2258         }
2259
2260         rxq->nb_rx_hold = nb_hold;
2261         return nb_rx;
2262 }
2263
2264 uint16_t
2265 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2266                                  uint16_t nb_pkts)
2267 {
2268         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2269 }
2270
2271 uint16_t
2272 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2273                                uint16_t nb_pkts)
2274 {
2275         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2276 }
2277
2278 /*********************************************************************
2279  *
2280  *  Queue management functions
2281  *
2282  **********************************************************************/
2283
2284 static void __attribute__((cold))
2285 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2286 {
2287         unsigned i;
2288
2289         if (txq->sw_ring != NULL) {
2290                 for (i = 0; i < txq->nb_tx_desc; i++) {
2291                         if (txq->sw_ring[i].mbuf != NULL) {
2292                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2293                                 txq->sw_ring[i].mbuf = NULL;
2294                         }
2295                 }
2296         }
2297 }
2298
2299 static void __attribute__((cold))
2300 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2301 {
2302         if (txq != NULL &&
2303             txq->sw_ring != NULL)
2304                 rte_free(txq->sw_ring);
2305 }
2306
2307 static void __attribute__((cold))
2308 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2309 {
2310         if (txq != NULL && txq->ops != NULL) {
2311                 txq->ops->release_mbufs(txq);
2312                 txq->ops->free_swring(txq);
2313                 rte_free(txq);
2314         }
2315 }
2316
2317 void __attribute__((cold))
2318 ixgbe_dev_tx_queue_release(void *txq)
2319 {
2320         ixgbe_tx_queue_release(txq);
2321 }
2322
2323 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2324 static void __attribute__((cold))
2325 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2326 {
2327         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2328         struct ixgbe_tx_entry *txe = txq->sw_ring;
2329         uint16_t prev, i;
2330
2331         /* Zero out HW ring memory */
2332         for (i = 0; i < txq->nb_tx_desc; i++) {
2333                 txq->tx_ring[i] = zeroed_desc;
2334         }
2335
2336         /* Initialize SW ring entries */
2337         prev = (uint16_t) (txq->nb_tx_desc - 1);
2338         for (i = 0; i < txq->nb_tx_desc; i++) {
2339                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2340
2341                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2342                 txe[i].mbuf = NULL;
2343                 txe[i].last_id = i;
2344                 txe[prev].next_id = i;
2345                 prev = i;
2346         }
2347
2348         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2349         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2350
2351         txq->tx_tail = 0;
2352         txq->nb_tx_used = 0;
2353         /*
2354          * Always allow 1 descriptor to be un-allocated to avoid
2355          * a H/W race condition
2356          */
2357         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2358         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2359         txq->ctx_curr = 0;
2360         memset((void *)&txq->ctx_cache, 0,
2361                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2362 }
2363
2364 static const struct ixgbe_txq_ops def_txq_ops = {
2365         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2366         .free_swring = ixgbe_tx_free_swring,
2367         .reset = ixgbe_reset_tx_queue,
2368 };
2369
2370 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2371  * the queue parameters. Used in tx_queue_setup by primary process and then
2372  * in dev_init by secondary process when attaching to an existing ethdev.
2373  */
2374 void __attribute__((cold))
2375 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2376 {
2377         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2378         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2379                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2380                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2381                 dev->tx_pkt_prepare = NULL;
2382 #ifdef RTE_IXGBE_INC_VECTOR
2383                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2384                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2385                                         ixgbe_txq_vec_setup(txq) == 0)) {
2386                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2387                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2388                 } else
2389 #endif
2390                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2391         } else {
2392                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2393                 PMD_INIT_LOG(DEBUG,
2394                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2395                                 (unsigned long)txq->txq_flags,
2396                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2397                 PMD_INIT_LOG(DEBUG,
2398                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2399                                 (unsigned long)txq->tx_rs_thresh,
2400                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2401                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2402                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2403         }
2404 }
2405
2406 int __attribute__((cold))
2407 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2408                          uint16_t queue_idx,
2409                          uint16_t nb_desc,
2410                          unsigned int socket_id,
2411                          const struct rte_eth_txconf *tx_conf)
2412 {
2413         const struct rte_memzone *tz;
2414         struct ixgbe_tx_queue *txq;
2415         struct ixgbe_hw     *hw;
2416         uint16_t tx_rs_thresh, tx_free_thresh;
2417
2418         PMD_INIT_FUNC_TRACE();
2419         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2420
2421         /*
2422          * Validate number of transmit descriptors.
2423          * It must not exceed hardware maximum, and must be multiple
2424          * of IXGBE_ALIGN.
2425          */
2426         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2427                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2428                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2429                 return -EINVAL;
2430         }
2431
2432         /*
2433          * The following two parameters control the setting of the RS bit on
2434          * transmit descriptors.
2435          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2436          * descriptors have been used.
2437          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2438          * descriptors are used or if the number of descriptors required
2439          * to transmit a packet is greater than the number of free TX
2440          * descriptors.
2441          * The following constraints must be satisfied:
2442          *  tx_rs_thresh must be greater than 0.
2443          *  tx_rs_thresh must be less than the size of the ring minus 2.
2444          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2445          *  tx_rs_thresh must be a divisor of the ring size.
2446          *  tx_free_thresh must be greater than 0.
2447          *  tx_free_thresh must be less than the size of the ring minus 3.
2448          * One descriptor in the TX ring is used as a sentinel to avoid a
2449          * H/W race condition, hence the maximum threshold constraints.
2450          * When set to zero use default values.
2451          */
2452         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2453                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2454         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2455                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2456         if (tx_rs_thresh >= (nb_desc - 2)) {
2457                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2458                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2459                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2460                         (int)dev->data->port_id, (int)queue_idx);
2461                 return -(EINVAL);
2462         }
2463         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2464                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2465                         "(tx_rs_thresh=%u port=%d queue=%d)",
2466                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2467                         (int)dev->data->port_id, (int)queue_idx);
2468                 return -(EINVAL);
2469         }
2470         if (tx_free_thresh >= (nb_desc - 3)) {
2471                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2472                              "tx_free_thresh must be less than the number of "
2473                              "TX descriptors minus 3. (tx_free_thresh=%u "
2474                              "port=%d queue=%d)",
2475                              (unsigned int)tx_free_thresh,
2476                              (int)dev->data->port_id, (int)queue_idx);
2477                 return -(EINVAL);
2478         }
2479         if (tx_rs_thresh > tx_free_thresh) {
2480                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2481                              "tx_free_thresh. (tx_free_thresh=%u "
2482                              "tx_rs_thresh=%u port=%d queue=%d)",
2483                              (unsigned int)tx_free_thresh,
2484                              (unsigned int)tx_rs_thresh,
2485                              (int)dev->data->port_id,
2486                              (int)queue_idx);
2487                 return -(EINVAL);
2488         }
2489         if ((nb_desc % tx_rs_thresh) != 0) {
2490                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2491                              "number of TX descriptors. (tx_rs_thresh=%u "
2492                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2493                              (int)dev->data->port_id, (int)queue_idx);
2494                 return -(EINVAL);
2495         }
2496
2497         /*
2498          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2499          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2500          * by the NIC and all descriptors are written back after the NIC
2501          * accumulates WTHRESH descriptors.
2502          */
2503         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2504                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2505                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2506                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2507                              (int)dev->data->port_id, (int)queue_idx);
2508                 return -(EINVAL);
2509         }
2510
2511         /* Free memory prior to re-allocation if needed... */
2512         if (dev->data->tx_queues[queue_idx] != NULL) {
2513                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2514                 dev->data->tx_queues[queue_idx] = NULL;
2515         }
2516
2517         /* First allocate the tx queue data structure */
2518         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2519                                  RTE_CACHE_LINE_SIZE, socket_id);
2520         if (txq == NULL)
2521                 return -ENOMEM;
2522
2523         /*
2524          * Allocate TX ring hardware descriptors. A memzone large enough to
2525          * handle the maximum ring size is allocated in order to allow for
2526          * resizing in later calls to the queue setup function.
2527          */
2528         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2529                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2530                         IXGBE_ALIGN, socket_id);
2531         if (tz == NULL) {
2532                 ixgbe_tx_queue_release(txq);
2533                 return -ENOMEM;
2534         }
2535
2536         txq->nb_tx_desc = nb_desc;
2537         txq->tx_rs_thresh = tx_rs_thresh;
2538         txq->tx_free_thresh = tx_free_thresh;
2539         txq->pthresh = tx_conf->tx_thresh.pthresh;
2540         txq->hthresh = tx_conf->tx_thresh.hthresh;
2541         txq->wthresh = tx_conf->tx_thresh.wthresh;
2542         txq->queue_id = queue_idx;
2543         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2544                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2545         txq->port_id = dev->data->port_id;
2546         txq->txq_flags = tx_conf->txq_flags;
2547         txq->ops = &def_txq_ops;
2548         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2549
2550         /*
2551          * Modification to set VFTDT for virtual function if vf is detected
2552          */
2553         if (hw->mac.type == ixgbe_mac_82599_vf ||
2554             hw->mac.type == ixgbe_mac_X540_vf ||
2555             hw->mac.type == ixgbe_mac_X550_vf ||
2556             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2557             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2558                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2559         else
2560                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2561
2562         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2563         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2564
2565         /* Allocate software ring */
2566         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2567                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2568                                 RTE_CACHE_LINE_SIZE, socket_id);
2569         if (txq->sw_ring == NULL) {
2570                 ixgbe_tx_queue_release(txq);
2571                 return -ENOMEM;
2572         }
2573         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2574                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2575
2576         /* set up vector or scalar TX function as appropriate */
2577         ixgbe_set_tx_function(dev, txq);
2578
2579         txq->ops->reset(txq);
2580
2581         dev->data->tx_queues[queue_idx] = txq;
2582
2583
2584         return 0;
2585 }
2586
2587 /**
2588  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2589  *
2590  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2591  * in the sw_rsc_ring is not set to NULL but rather points to the next
2592  * mbuf of this RSC aggregation (that has not been completed yet and still
2593  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2594  * will just free first "nb_segs" segments of the cluster explicitly by calling
2595  * an rte_pktmbuf_free_seg().
2596  *
2597  * @m scattered cluster head
2598  */
2599 static void __attribute__((cold))
2600 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2601 {
2602         uint8_t i, nb_segs = m->nb_segs;
2603         struct rte_mbuf *next_seg;
2604
2605         for (i = 0; i < nb_segs; i++) {
2606                 next_seg = m->next;
2607                 rte_pktmbuf_free_seg(m);
2608                 m = next_seg;
2609         }
2610 }
2611
2612 static void __attribute__((cold))
2613 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2614 {
2615         unsigned i;
2616
2617 #ifdef RTE_IXGBE_INC_VECTOR
2618         /* SSE Vector driver has a different way of releasing mbufs. */
2619         if (rxq->rx_using_sse) {
2620                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2621                 return;
2622         }
2623 #endif
2624
2625         if (rxq->sw_ring != NULL) {
2626                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2627                         if (rxq->sw_ring[i].mbuf != NULL) {
2628                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2629                                 rxq->sw_ring[i].mbuf = NULL;
2630                         }
2631                 }
2632                 if (rxq->rx_nb_avail) {
2633                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2634                                 struct rte_mbuf *mb;
2635
2636                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2637                                 rte_pktmbuf_free_seg(mb);
2638                         }
2639                         rxq->rx_nb_avail = 0;
2640                 }
2641         }
2642
2643         if (rxq->sw_sc_ring)
2644                 for (i = 0; i < rxq->nb_rx_desc; i++)
2645                         if (rxq->sw_sc_ring[i].fbuf) {
2646                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2647                                 rxq->sw_sc_ring[i].fbuf = NULL;
2648                         }
2649 }
2650
2651 static void __attribute__((cold))
2652 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2653 {
2654         if (rxq != NULL) {
2655                 ixgbe_rx_queue_release_mbufs(rxq);
2656                 rte_free(rxq->sw_ring);
2657                 rte_free(rxq->sw_sc_ring);
2658                 rte_free(rxq);
2659         }
2660 }
2661
2662 void __attribute__((cold))
2663 ixgbe_dev_rx_queue_release(void *rxq)
2664 {
2665         ixgbe_rx_queue_release(rxq);
2666 }
2667
2668 /*
2669  * Check if Rx Burst Bulk Alloc function can be used.
2670  * Return
2671  *        0: the preconditions are satisfied and the bulk allocation function
2672  *           can be used.
2673  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2674  *           function must be used.
2675  */
2676 static inline int __attribute__((cold))
2677 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2678 {
2679         int ret = 0;
2680
2681         /*
2682          * Make sure the following pre-conditions are satisfied:
2683          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2684          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2685          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2686          * Scattered packets are not supported.  This should be checked
2687          * outside of this function.
2688          */
2689         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2690                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2691                              "rxq->rx_free_thresh=%d, "
2692                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2693                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2694                 ret = -EINVAL;
2695         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2696                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2697                              "rxq->rx_free_thresh=%d, "
2698                              "rxq->nb_rx_desc=%d",
2699                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2700                 ret = -EINVAL;
2701         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2702                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2703                              "rxq->nb_rx_desc=%d, "
2704                              "rxq->rx_free_thresh=%d",
2705                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2706                 ret = -EINVAL;
2707         }
2708
2709         return ret;
2710 }
2711
2712 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2713 static void __attribute__((cold))
2714 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2715 {
2716         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2717         unsigned i;
2718         uint16_t len = rxq->nb_rx_desc;
2719
2720         /*
2721          * By default, the Rx queue setup function allocates enough memory for
2722          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2723          * extra memory at the end of the descriptor ring to be zero'd out.
2724          */
2725         if (adapter->rx_bulk_alloc_allowed)
2726                 /* zero out extra memory */
2727                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2728
2729         /*
2730          * Zero out HW ring memory. Zero out extra memory at the end of
2731          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2732          * reads extra memory as zeros.
2733          */
2734         for (i = 0; i < len; i++) {
2735                 rxq->rx_ring[i] = zeroed_desc;
2736         }
2737
2738         /*
2739          * initialize extra software ring entries. Space for these extra
2740          * entries is always allocated
2741          */
2742         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2743         for (i = rxq->nb_rx_desc; i < len; ++i) {
2744                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2745         }
2746
2747         rxq->rx_nb_avail = 0;
2748         rxq->rx_next_avail = 0;
2749         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2750         rxq->rx_tail = 0;
2751         rxq->nb_rx_hold = 0;
2752         rxq->pkt_first_seg = NULL;
2753         rxq->pkt_last_seg = NULL;
2754
2755 #ifdef RTE_IXGBE_INC_VECTOR
2756         rxq->rxrearm_start = 0;
2757         rxq->rxrearm_nb = 0;
2758 #endif
2759 }
2760
2761 int __attribute__((cold))
2762 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2763                          uint16_t queue_idx,
2764                          uint16_t nb_desc,
2765                          unsigned int socket_id,
2766                          const struct rte_eth_rxconf *rx_conf,
2767                          struct rte_mempool *mp)
2768 {
2769         const struct rte_memzone *rz;
2770         struct ixgbe_rx_queue *rxq;
2771         struct ixgbe_hw     *hw;
2772         uint16_t len;
2773         struct ixgbe_adapter *adapter =
2774                 (struct ixgbe_adapter *)dev->data->dev_private;
2775
2776         PMD_INIT_FUNC_TRACE();
2777         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2778
2779         /*
2780          * Validate number of receive descriptors.
2781          * It must not exceed hardware maximum, and must be multiple
2782          * of IXGBE_ALIGN.
2783          */
2784         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2785                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2786                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2787                 return -EINVAL;
2788         }
2789
2790         /* Free memory prior to re-allocation if needed... */
2791         if (dev->data->rx_queues[queue_idx] != NULL) {
2792                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2793                 dev->data->rx_queues[queue_idx] = NULL;
2794         }
2795
2796         /* First allocate the rx queue data structure */
2797         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2798                                  RTE_CACHE_LINE_SIZE, socket_id);
2799         if (rxq == NULL)
2800                 return -ENOMEM;
2801         rxq->mb_pool = mp;
2802         rxq->nb_rx_desc = nb_desc;
2803         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2804         rxq->queue_id = queue_idx;
2805         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2806                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2807         rxq->port_id = dev->data->port_id;
2808         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2809                                                         0 : ETHER_CRC_LEN);
2810         rxq->drop_en = rx_conf->rx_drop_en;
2811         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2812
2813         /*
2814          * The packet type in RX descriptor is different for different NICs.
2815          * Some bits are used for x550 but reserved for other NICS.
2816          * So set different masks for different NICs.
2817          */
2818         if (hw->mac.type == ixgbe_mac_X550 ||
2819             hw->mac.type == ixgbe_mac_X550EM_x ||
2820             hw->mac.type == ixgbe_mac_X550EM_a ||
2821             hw->mac.type == ixgbe_mac_X550_vf ||
2822             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2823             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2824                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2825         else
2826                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2827
2828         /*
2829          * Allocate RX ring hardware descriptors. A memzone large enough to
2830          * handle the maximum ring size is allocated in order to allow for
2831          * resizing in later calls to the queue setup function.
2832          */
2833         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2834                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2835         if (rz == NULL) {
2836                 ixgbe_rx_queue_release(rxq);
2837                 return -ENOMEM;
2838         }
2839
2840         /*
2841          * Zero init all the descriptors in the ring.
2842          */
2843         memset(rz->addr, 0, RX_RING_SZ);
2844
2845         /*
2846          * Modified to setup VFRDT for Virtual Function
2847          */
2848         if (hw->mac.type == ixgbe_mac_82599_vf ||
2849             hw->mac.type == ixgbe_mac_X540_vf ||
2850             hw->mac.type == ixgbe_mac_X550_vf ||
2851             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2852             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2853                 rxq->rdt_reg_addr =
2854                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2855                 rxq->rdh_reg_addr =
2856                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2857         } else {
2858                 rxq->rdt_reg_addr =
2859                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2860                 rxq->rdh_reg_addr =
2861                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2862         }
2863
2864         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2865         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2866
2867         /*
2868          * Certain constraints must be met in order to use the bulk buffer
2869          * allocation Rx burst function. If any of Rx queues doesn't meet them
2870          * the feature should be disabled for the whole port.
2871          */
2872         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2873                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2874                                     "preconditions - canceling the feature for "
2875                                     "the whole port[%d]",
2876                              rxq->queue_id, rxq->port_id);
2877                 adapter->rx_bulk_alloc_allowed = false;
2878         }
2879
2880         /*
2881          * Allocate software ring. Allow for space at the end of the
2882          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2883          * function does not access an invalid memory region.
2884          */
2885         len = nb_desc;
2886         if (adapter->rx_bulk_alloc_allowed)
2887                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2888
2889         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2890                                           sizeof(struct ixgbe_rx_entry) * len,
2891                                           RTE_CACHE_LINE_SIZE, socket_id);
2892         if (!rxq->sw_ring) {
2893                 ixgbe_rx_queue_release(rxq);
2894                 return -ENOMEM;
2895         }
2896
2897         /*
2898          * Always allocate even if it's not going to be needed in order to
2899          * simplify the code.
2900          *
2901          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2902          * be requested in ixgbe_dev_rx_init(), which is called later from
2903          * dev_start() flow.
2904          */
2905         rxq->sw_sc_ring =
2906                 rte_zmalloc_socket("rxq->sw_sc_ring",
2907                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2908                                    RTE_CACHE_LINE_SIZE, socket_id);
2909         if (!rxq->sw_sc_ring) {
2910                 ixgbe_rx_queue_release(rxq);
2911                 return -ENOMEM;
2912         }
2913
2914         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2915                             "dma_addr=0x%"PRIx64,
2916                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2917                      rxq->rx_ring_phys_addr);
2918
2919         if (!rte_is_power_of_2(nb_desc)) {
2920                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2921                                     "preconditions - canceling the feature for "
2922                                     "the whole port[%d]",
2923                              rxq->queue_id, rxq->port_id);
2924                 adapter->rx_vec_allowed = false;
2925         } else
2926                 ixgbe_rxq_vec_setup(rxq);
2927
2928         dev->data->rx_queues[queue_idx] = rxq;
2929
2930         ixgbe_reset_rx_queue(adapter, rxq);
2931
2932         return 0;
2933 }
2934
2935 uint32_t
2936 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2937 {
2938 #define IXGBE_RXQ_SCAN_INTERVAL 4
2939         volatile union ixgbe_adv_rx_desc *rxdp;
2940         struct ixgbe_rx_queue *rxq;
2941         uint32_t desc = 0;
2942
2943         rxq = dev->data->rx_queues[rx_queue_id];
2944         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2945
2946         while ((desc < rxq->nb_rx_desc) &&
2947                 (rxdp->wb.upper.status_error &
2948                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2949                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2950                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2951                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2952                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2953                                 desc - rxq->nb_rx_desc]);
2954         }
2955
2956         return desc;
2957 }
2958
2959 int
2960 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2961 {
2962         volatile union ixgbe_adv_rx_desc *rxdp;
2963         struct ixgbe_rx_queue *rxq = rx_queue;
2964         uint32_t desc;
2965
2966         if (unlikely(offset >= rxq->nb_rx_desc))
2967                 return 0;
2968         desc = rxq->rx_tail + offset;
2969         if (desc >= rxq->nb_rx_desc)
2970                 desc -= rxq->nb_rx_desc;
2971
2972         rxdp = &rxq->rx_ring[desc];
2973         return !!(rxdp->wb.upper.status_error &
2974                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2975 }
2976
2977 int
2978 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
2979 {
2980         struct ixgbe_rx_queue *rxq = rx_queue;
2981         volatile uint32_t *status;
2982         uint32_t nb_hold, desc;
2983
2984         if (unlikely(offset >= rxq->nb_rx_desc))
2985                 return -EINVAL;
2986
2987 #ifdef RTE_IXGBE_INC_VECTOR
2988         if (rxq->rx_using_sse)
2989                 nb_hold = rxq->rxrearm_nb;
2990         else
2991 #endif
2992                 nb_hold = rxq->nb_rx_hold;
2993         if (offset >= rxq->nb_rx_desc - nb_hold)
2994                 return RTE_ETH_RX_DESC_UNAVAIL;
2995
2996         desc = rxq->rx_tail + offset;
2997         if (desc >= rxq->nb_rx_desc)
2998                 desc -= rxq->nb_rx_desc;
2999
3000         status = &rxq->rx_ring[desc].wb.upper.status_error;
3001         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3002                 return RTE_ETH_RX_DESC_DONE;
3003
3004         return RTE_ETH_RX_DESC_AVAIL;
3005 }
3006
3007 int
3008 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3009 {
3010         struct ixgbe_tx_queue *txq = tx_queue;
3011         volatile uint32_t *status;
3012         uint32_t desc;
3013
3014         if (unlikely(offset >= txq->nb_tx_desc))
3015                 return -EINVAL;
3016
3017         desc = txq->tx_tail + offset;
3018         /* go to next desc that has the RS bit */
3019         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3020                 txq->tx_rs_thresh;
3021         if (desc >= txq->nb_tx_desc) {
3022                 desc -= txq->nb_tx_desc;
3023                 if (desc >= txq->nb_tx_desc)
3024                         desc -= txq->nb_tx_desc;
3025         }
3026
3027         status = &txq->tx_ring[desc].wb.status;
3028         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3029                 return RTE_ETH_TX_DESC_DONE;
3030
3031         return RTE_ETH_TX_DESC_FULL;
3032 }
3033
3034 void __attribute__((cold))
3035 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3036 {
3037         unsigned i;
3038         struct ixgbe_adapter *adapter =
3039                 (struct ixgbe_adapter *)dev->data->dev_private;
3040
3041         PMD_INIT_FUNC_TRACE();
3042
3043         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3044                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3045
3046                 if (txq != NULL) {
3047                         txq->ops->release_mbufs(txq);
3048                         txq->ops->reset(txq);
3049                 }
3050         }
3051
3052         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3053                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3054
3055                 if (rxq != NULL) {
3056                         ixgbe_rx_queue_release_mbufs(rxq);
3057                         ixgbe_reset_rx_queue(adapter, rxq);
3058                 }
3059         }
3060 }
3061
3062 void
3063 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3064 {
3065         unsigned i;
3066
3067         PMD_INIT_FUNC_TRACE();
3068
3069         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3070                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3071                 dev->data->rx_queues[i] = NULL;
3072         }
3073         dev->data->nb_rx_queues = 0;
3074
3075         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3076                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3077                 dev->data->tx_queues[i] = NULL;
3078         }
3079         dev->data->nb_tx_queues = 0;
3080 }
3081
3082 /*********************************************************************
3083  *
3084  *  Device RX/TX init functions
3085  *
3086  **********************************************************************/
3087
3088 /**
3089  * Receive Side Scaling (RSS)
3090  * See section 7.1.2.8 in the following document:
3091  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3092  *
3093  * Principles:
3094  * The source and destination IP addresses of the IP header and the source
3095  * and destination ports of TCP/UDP headers, if any, of received packets are
3096  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3097  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3098  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3099  * RSS output index which is used as the RX queue index where to store the
3100  * received packets.
3101  * The following output is supplied in the RX write-back descriptor:
3102  *     - 32-bit result of the Microsoft RSS hash function,
3103  *     - 4-bit RSS type field.
3104  */
3105
3106 /*
3107  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3108  * Used as the default key.
3109  */
3110 static uint8_t rss_intel_key[40] = {
3111         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3112         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3113         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3114         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3115         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3116 };
3117
3118 static void
3119 ixgbe_rss_disable(struct rte_eth_dev *dev)
3120 {
3121         struct ixgbe_hw *hw;
3122         uint32_t mrqc;
3123         uint32_t mrqc_reg;
3124
3125         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3126         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3127         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3128         mrqc &= ~IXGBE_MRQC_RSSEN;
3129         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3130 }
3131
3132 static void
3133 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3134 {
3135         uint8_t  *hash_key;
3136         uint32_t mrqc;
3137         uint32_t rss_key;
3138         uint64_t rss_hf;
3139         uint16_t i;
3140         uint32_t mrqc_reg;
3141         uint32_t rssrk_reg;
3142
3143         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3144         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3145
3146         hash_key = rss_conf->rss_key;
3147         if (hash_key != NULL) {
3148                 /* Fill in RSS hash key */
3149                 for (i = 0; i < 10; i++) {
3150                         rss_key  = hash_key[(i * 4)];
3151                         rss_key |= hash_key[(i * 4) + 1] << 8;
3152                         rss_key |= hash_key[(i * 4) + 2] << 16;
3153                         rss_key |= hash_key[(i * 4) + 3] << 24;
3154                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3155                 }
3156         }
3157
3158         /* Set configured hashing protocols in MRQC register */
3159         rss_hf = rss_conf->rss_hf;
3160         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3161         if (rss_hf & ETH_RSS_IPV4)
3162                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3163         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3164                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3165         if (rss_hf & ETH_RSS_IPV6)
3166                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3167         if (rss_hf & ETH_RSS_IPV6_EX)
3168                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3169         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3170                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3171         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3172                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3173         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3174                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3175         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3176                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3177         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3178                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3179         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3180 }
3181
3182 int
3183 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3184                           struct rte_eth_rss_conf *rss_conf)
3185 {
3186         struct ixgbe_hw *hw;
3187         uint32_t mrqc;
3188         uint64_t rss_hf;
3189         uint32_t mrqc_reg;
3190
3191         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3192
3193         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3194                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3195                         "NIC.");
3196                 return -ENOTSUP;
3197         }
3198         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3199
3200         /*
3201          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3202          *     "RSS enabling cannot be done dynamically while it must be
3203          *      preceded by a software reset"
3204          * Before changing anything, first check that the update RSS operation
3205          * does not attempt to disable RSS, if RSS was enabled at
3206          * initialization time, or does not attempt to enable RSS, if RSS was
3207          * disabled at initialization time.
3208          */
3209         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3210         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3211         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3212                 if (rss_hf != 0) /* Enable RSS */
3213                         return -(EINVAL);
3214                 return 0; /* Nothing to do */
3215         }
3216         /* RSS enabled */
3217         if (rss_hf == 0) /* Disable RSS */
3218                 return -(EINVAL);
3219         ixgbe_hw_rss_hash_set(hw, rss_conf);
3220         return 0;
3221 }
3222
3223 int
3224 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3225                             struct rte_eth_rss_conf *rss_conf)
3226 {
3227         struct ixgbe_hw *hw;
3228         uint8_t *hash_key;
3229         uint32_t mrqc;
3230         uint32_t rss_key;
3231         uint64_t rss_hf;
3232         uint16_t i;
3233         uint32_t mrqc_reg;
3234         uint32_t rssrk_reg;
3235
3236         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3237         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3238         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3239         hash_key = rss_conf->rss_key;
3240         if (hash_key != NULL) {
3241                 /* Return RSS hash key */
3242                 for (i = 0; i < 10; i++) {
3243                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3244                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3245                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3246                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3247                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3248                 }
3249         }
3250
3251         /* Get RSS functions configured in MRQC register */
3252         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3253         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3254                 rss_conf->rss_hf = 0;
3255                 return 0;
3256         }
3257         rss_hf = 0;
3258         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3259                 rss_hf |= ETH_RSS_IPV4;
3260         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3261                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3262         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3263                 rss_hf |= ETH_RSS_IPV6;
3264         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3265                 rss_hf |= ETH_RSS_IPV6_EX;
3266         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3267                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3268         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3269                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3270         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3271                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3272         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3273                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3274         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3275                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3276         rss_conf->rss_hf = rss_hf;
3277         return 0;
3278 }
3279
3280 static void
3281 ixgbe_rss_configure(struct rte_eth_dev *dev)
3282 {
3283         struct rte_eth_rss_conf rss_conf;
3284         struct ixgbe_hw *hw;
3285         uint32_t reta;
3286         uint16_t i;
3287         uint16_t j;
3288         uint16_t sp_reta_size;
3289         uint32_t reta_reg;
3290
3291         PMD_INIT_FUNC_TRACE();
3292         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3293
3294         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3295
3296         /*
3297          * Fill in redirection table
3298          * The byte-swap is needed because NIC registers are in
3299          * little-endian order.
3300          */
3301         reta = 0;
3302         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3303                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3304
3305                 if (j == dev->data->nb_rx_queues)
3306                         j = 0;
3307                 reta = (reta << 8) | j;
3308                 if ((i & 3) == 3)
3309                         IXGBE_WRITE_REG(hw, reta_reg,
3310                                         rte_bswap32(reta));
3311         }
3312
3313         /*
3314          * Configure the RSS key and the RSS protocols used to compute
3315          * the RSS hash of input packets.
3316          */
3317         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3318         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3319                 ixgbe_rss_disable(dev);
3320                 return;
3321         }
3322         if (rss_conf.rss_key == NULL)
3323                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3324         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3325 }
3326
3327 #define NUM_VFTA_REGISTERS 128
3328 #define NIC_RX_BUFFER_SIZE 0x200
3329 #define X550_RX_BUFFER_SIZE 0x180
3330
3331 static void
3332 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3333 {
3334         struct rte_eth_vmdq_dcb_conf *cfg;
3335         struct ixgbe_hw *hw;
3336         enum rte_eth_nb_pools num_pools;
3337         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3338         uint16_t pbsize;
3339         uint8_t nb_tcs; /* number of traffic classes */
3340         int i;
3341
3342         PMD_INIT_FUNC_TRACE();
3343         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3344         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3345         num_pools = cfg->nb_queue_pools;
3346         /* Check we have a valid number of pools */
3347         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3348                 ixgbe_rss_disable(dev);
3349                 return;
3350         }
3351         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3352         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3353
3354         /*
3355          * RXPBSIZE
3356          * split rx buffer up into sections, each for 1 traffic class
3357          */
3358         switch (hw->mac.type) {
3359         case ixgbe_mac_X550:
3360         case ixgbe_mac_X550EM_x:
3361         case ixgbe_mac_X550EM_a:
3362                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3363                 break;
3364         default:
3365                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3366                 break;
3367         }
3368         for (i = 0; i < nb_tcs; i++) {
3369                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3370
3371                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3372                 /* clear 10 bits. */
3373                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3374                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3375         }
3376         /* zero alloc all unused TCs */
3377         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3378                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3379
3380                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3381                 /* clear 10 bits. */
3382                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3383         }
3384
3385         /* MRQC: enable vmdq and dcb */
3386         mrqc = (num_pools == ETH_16_POOLS) ?
3387                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3388         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3389
3390         /* PFVTCTL: turn on virtualisation and set the default pool */
3391         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3392         if (cfg->enable_default_pool) {
3393                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3394         } else {
3395                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3396         }
3397
3398         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3399
3400         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3401         queue_mapping = 0;
3402         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3403                 /*
3404                  * mapping is done with 3 bits per priority,
3405                  * so shift by i*3 each time
3406                  */
3407                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3408
3409         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3410
3411         /* RTRPCS: DCB related */
3412         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3413
3414         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3415         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3416         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3417         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3418
3419         /* VFTA - enable all vlan filters */
3420         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3421                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3422         }
3423
3424         /* VFRE: pool enabling for receive - 16 or 32 */
3425         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3426                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3427
3428         /*
3429          * MPSAR - allow pools to read specific mac addresses
3430          * In this case, all pools should be able to read from mac addr 0
3431          */
3432         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3433         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3434
3435         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3436         for (i = 0; i < cfg->nb_pool_maps; i++) {
3437                 /* set vlan id in VF register and set the valid bit */
3438                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3439                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3440                 /*
3441                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3442                  * pools, we only need to use the first half of the register
3443                  * i.e. bits 0-31
3444                  */
3445                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3446         }
3447 }
3448
3449 /**
3450  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3451  * @dev: pointer to eth_dev structure
3452  * @dcb_config: pointer to ixgbe_dcb_config structure
3453  */
3454 static void
3455 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3456                        struct ixgbe_dcb_config *dcb_config)
3457 {
3458         uint32_t reg;
3459         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3460
3461         PMD_INIT_FUNC_TRACE();
3462         if (hw->mac.type != ixgbe_mac_82598EB) {
3463                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3464                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3465                 reg |= IXGBE_RTTDCS_ARBDIS;
3466                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3467
3468                 /* Enable DCB for Tx with 8 TCs */
3469                 if (dcb_config->num_tcs.pg_tcs == 8) {
3470                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3471                 } else {
3472                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3473                 }
3474                 if (dcb_config->vt_mode)
3475                         reg |= IXGBE_MTQC_VT_ENA;
3476                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3477
3478                 /* Enable the Tx desc arbiter */
3479                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3480                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3481                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3482
3483                 /* Enable Security TX Buffer IFG for DCB */
3484                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3485                 reg |= IXGBE_SECTX_DCB;
3486                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3487         }
3488 }
3489
3490 /**
3491  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3492  * @dev: pointer to rte_eth_dev structure
3493  * @dcb_config: pointer to ixgbe_dcb_config structure
3494  */
3495 static void
3496 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3497                         struct ixgbe_dcb_config *dcb_config)
3498 {
3499         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3500                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3501         struct ixgbe_hw *hw =
3502                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3503
3504         PMD_INIT_FUNC_TRACE();
3505         if (hw->mac.type != ixgbe_mac_82598EB)
3506                 /*PF VF Transmit Enable*/
3507                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3508                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3509
3510         /*Configure general DCB TX parameters*/
3511         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3512 }
3513
3514 static void
3515 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3516                         struct ixgbe_dcb_config *dcb_config)
3517 {
3518         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3519                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3520         struct ixgbe_dcb_tc_config *tc;
3521         uint8_t i, j;
3522
3523         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3524         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3525                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3526                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3527         } else {
3528                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3529                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3530         }
3531         /* User Priority to Traffic Class mapping */
3532         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3533                 j = vmdq_rx_conf->dcb_tc[i];
3534                 tc = &dcb_config->tc_config[j];
3535                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3536                                                 (uint8_t)(1 << j);
3537         }
3538 }
3539
3540 static void
3541 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3542                         struct ixgbe_dcb_config *dcb_config)
3543 {
3544         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3545                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3546         struct ixgbe_dcb_tc_config *tc;
3547         uint8_t i, j;
3548
3549         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3550         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3551                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3552                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3553         } else {
3554                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3555                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3556         }
3557
3558         /* User Priority to Traffic Class mapping */
3559         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3560                 j = vmdq_tx_conf->dcb_tc[i];
3561                 tc = &dcb_config->tc_config[j];
3562                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3563                                                 (uint8_t)(1 << j);
3564         }
3565 }
3566
3567 static void
3568 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3569                 struct ixgbe_dcb_config *dcb_config)
3570 {
3571         struct rte_eth_dcb_rx_conf *rx_conf =
3572                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3573         struct ixgbe_dcb_tc_config *tc;
3574         uint8_t i, j;
3575
3576         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3577         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3578
3579         /* User Priority to Traffic Class mapping */
3580         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3581                 j = rx_conf->dcb_tc[i];
3582                 tc = &dcb_config->tc_config[j];
3583                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3584                                                 (uint8_t)(1 << j);
3585         }
3586 }
3587
3588 static void
3589 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3590                 struct ixgbe_dcb_config *dcb_config)
3591 {
3592         struct rte_eth_dcb_tx_conf *tx_conf =
3593                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3594         struct ixgbe_dcb_tc_config *tc;
3595         uint8_t i, j;
3596
3597         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3598         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3599
3600         /* User Priority to Traffic Class mapping */
3601         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3602                 j = tx_conf->dcb_tc[i];
3603                 tc = &dcb_config->tc_config[j];
3604                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3605                                                 (uint8_t)(1 << j);
3606         }
3607 }
3608
3609 /**
3610  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3611  * @dev: pointer to eth_dev structure
3612  * @dcb_config: pointer to ixgbe_dcb_config structure
3613  */
3614 static void
3615 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3616                        struct ixgbe_dcb_config *dcb_config)
3617 {
3618         uint32_t reg;
3619         uint32_t vlanctrl;
3620         uint8_t i;
3621         uint32_t q;
3622         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3623
3624         PMD_INIT_FUNC_TRACE();
3625         /*
3626          * Disable the arbiter before changing parameters
3627          * (always enable recycle mode; WSP)
3628          */
3629         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3630         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3631
3632         if (hw->mac.type != ixgbe_mac_82598EB) {
3633                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3634                 if (dcb_config->num_tcs.pg_tcs == 4) {
3635                         if (dcb_config->vt_mode)
3636                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3637                                         IXGBE_MRQC_VMDQRT4TCEN;
3638                         else {
3639                                 /* no matter the mode is DCB or DCB_RSS, just
3640                                  * set the MRQE to RSSXTCEN. RSS is controlled
3641                                  * by RSS_FIELD
3642                                  */
3643                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3644                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3645                                         IXGBE_MRQC_RTRSS4TCEN;
3646                         }
3647                 }
3648                 if (dcb_config->num_tcs.pg_tcs == 8) {
3649                         if (dcb_config->vt_mode)
3650                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3651                                         IXGBE_MRQC_VMDQRT8TCEN;
3652                         else {
3653                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3654                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3655                                         IXGBE_MRQC_RTRSS8TCEN;
3656                         }
3657                 }
3658
3659                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3660
3661                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3662                         /* Disable drop for all queues in VMDQ mode*/
3663                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3664                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3665                                                 (IXGBE_QDE_WRITE |
3666                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3667                 } else {
3668                         /* Enable drop for all queues in SRIOV mode */
3669                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3670                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3671                                                 (IXGBE_QDE_WRITE |
3672                                                  (q << IXGBE_QDE_IDX_SHIFT) |
3673                                                  IXGBE_QDE_ENABLE));
3674                 }
3675         }
3676
3677         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3678         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3679         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3680         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3681
3682         /* VFTA - enable all vlan filters */
3683         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3684                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3685         }
3686
3687         /*
3688          * Configure Rx packet plane (recycle mode; WSP) and
3689          * enable arbiter
3690          */
3691         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3692         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3693 }
3694
3695 static void
3696 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3697                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3698 {
3699         switch (hw->mac.type) {
3700         case ixgbe_mac_82598EB:
3701                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3702                 break;
3703         case ixgbe_mac_82599EB:
3704         case ixgbe_mac_X540:
3705         case ixgbe_mac_X550:
3706         case ixgbe_mac_X550EM_x:
3707         case ixgbe_mac_X550EM_a:
3708                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3709                                                   tsa, map);
3710                 break;
3711         default:
3712                 break;
3713         }
3714 }
3715
3716 static void
3717 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3718                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3719 {
3720         switch (hw->mac.type) {
3721         case ixgbe_mac_82598EB:
3722                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3723                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3724                 break;
3725         case ixgbe_mac_82599EB:
3726         case ixgbe_mac_X540:
3727         case ixgbe_mac_X550:
3728         case ixgbe_mac_X550EM_x:
3729         case ixgbe_mac_X550EM_a:
3730                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3731                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3732                 break;
3733         default:
3734                 break;
3735         }
3736 }
3737
3738 #define DCB_RX_CONFIG  1
3739 #define DCB_TX_CONFIG  1
3740 #define DCB_TX_PB      1024
3741 /**
3742  * ixgbe_dcb_hw_configure - Enable DCB and configure
3743  * general DCB in VT mode and non-VT mode parameters
3744  * @dev: pointer to rte_eth_dev structure
3745  * @dcb_config: pointer to ixgbe_dcb_config structure
3746  */
3747 static int
3748 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3749                         struct ixgbe_dcb_config *dcb_config)
3750 {
3751         int     ret = 0;
3752         uint8_t i, pfc_en, nb_tcs;
3753         uint16_t pbsize, rx_buffer_size;
3754         uint8_t config_dcb_rx = 0;
3755         uint8_t config_dcb_tx = 0;
3756         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3757         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3758         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3759         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3760         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3761         struct ixgbe_dcb_tc_config *tc;
3762         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3763         struct ixgbe_hw *hw =
3764                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3765         struct ixgbe_bw_conf *bw_conf =
3766                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
3767
3768         switch (dev->data->dev_conf.rxmode.mq_mode) {
3769         case ETH_MQ_RX_VMDQ_DCB:
3770                 dcb_config->vt_mode = true;
3771                 if (hw->mac.type != ixgbe_mac_82598EB) {
3772                         config_dcb_rx = DCB_RX_CONFIG;
3773                         /*
3774                          *get dcb and VT rx configuration parameters
3775                          *from rte_eth_conf
3776                          */
3777                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3778                         /*Configure general VMDQ and DCB RX parameters*/
3779                         ixgbe_vmdq_dcb_configure(dev);
3780                 }
3781                 break;
3782         case ETH_MQ_RX_DCB:
3783         case ETH_MQ_RX_DCB_RSS:
3784                 dcb_config->vt_mode = false;
3785                 config_dcb_rx = DCB_RX_CONFIG;
3786                 /* Get dcb TX configuration parameters from rte_eth_conf */
3787                 ixgbe_dcb_rx_config(dev, dcb_config);
3788                 /*Configure general DCB RX parameters*/
3789                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
3790                 break;
3791         default:
3792                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3793                 break;
3794         }
3795         switch (dev->data->dev_conf.txmode.mq_mode) {
3796         case ETH_MQ_TX_VMDQ_DCB:
3797                 dcb_config->vt_mode = true;
3798                 config_dcb_tx = DCB_TX_CONFIG;
3799                 /* get DCB and VT TX configuration parameters
3800                  * from rte_eth_conf
3801                  */
3802                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3803                 /*Configure general VMDQ and DCB TX parameters*/
3804                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3805                 break;
3806
3807         case ETH_MQ_TX_DCB:
3808                 dcb_config->vt_mode = false;
3809                 config_dcb_tx = DCB_TX_CONFIG;
3810                 /*get DCB TX configuration parameters from rte_eth_conf*/
3811                 ixgbe_dcb_tx_config(dev, dcb_config);
3812                 /*Configure general DCB TX parameters*/
3813                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3814                 break;
3815         default:
3816                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3817                 break;
3818         }
3819
3820         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3821         /* Unpack map */
3822         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3823         if (nb_tcs == ETH_4_TCS) {
3824                 /* Avoid un-configured priority mapping to TC0 */
3825                 uint8_t j = 4;
3826                 uint8_t mask = 0xFF;
3827
3828                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3829                         mask = (uint8_t)(mask & (~(1 << map[i])));
3830                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3831                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3832                                 map[j++] = i;
3833                         mask >>= 1;
3834                 }
3835                 /* Re-configure 4 TCs BW */
3836                 for (i = 0; i < nb_tcs; i++) {
3837                         tc = &dcb_config->tc_config[i];
3838                         if (bw_conf->tc_num != nb_tcs)
3839                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3840                                         (uint8_t)(100 / nb_tcs);
3841                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3842                                                 (uint8_t)(100 / nb_tcs);
3843                 }
3844                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3845                         tc = &dcb_config->tc_config[i];
3846                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3847                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3848                 }
3849         } else {
3850                 /* Re-configure 8 TCs BW */
3851                 for (i = 0; i < nb_tcs; i++) {
3852                         tc = &dcb_config->tc_config[i];
3853                         if (bw_conf->tc_num != nb_tcs)
3854                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3855                                         (uint8_t)(100 / nb_tcs + (i & 1));
3856                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3857                                 (uint8_t)(100 / nb_tcs + (i & 1));
3858                 }
3859         }
3860
3861         switch (hw->mac.type) {
3862         case ixgbe_mac_X550:
3863         case ixgbe_mac_X550EM_x:
3864         case ixgbe_mac_X550EM_a:
3865                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3866                 break;
3867         default:
3868                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3869                 break;
3870         }
3871
3872         if (config_dcb_rx) {
3873                 /* Set RX buffer size */
3874                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3875                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3876
3877                 for (i = 0; i < nb_tcs; i++) {
3878                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3879                 }
3880                 /* zero alloc all unused TCs */
3881                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3882                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3883                 }
3884         }
3885         if (config_dcb_tx) {
3886                 /* Only support an equally distributed
3887                  *  Tx packet buffer strategy.
3888                  */
3889                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3890                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3891
3892                 for (i = 0; i < nb_tcs; i++) {
3893                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3894                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3895                 }
3896                 /* Clear unused TCs, if any, to zero buffer size*/
3897                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3898                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3899                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3900                 }
3901         }
3902
3903         /*Calculates traffic class credits*/
3904         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3905                                 IXGBE_DCB_TX_CONFIG);
3906         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3907                                 IXGBE_DCB_RX_CONFIG);
3908
3909         if (config_dcb_rx) {
3910                 /* Unpack CEE standard containers */
3911                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3912                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3913                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3914                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3915                 /* Configure PG(ETS) RX */
3916                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3917         }
3918
3919         if (config_dcb_tx) {
3920                 /* Unpack CEE standard containers */
3921                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3922                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3923                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3924                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3925                 /* Configure PG(ETS) TX */
3926                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3927         }
3928
3929         /*Configure queue statistics registers*/
3930         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3931
3932         /* Check if the PFC is supported */
3933         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3934                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3935                 for (i = 0; i < nb_tcs; i++) {
3936                         /*
3937                         * If the TC count is 8,and the default high_water is 48,
3938                         * the low_water is 16 as default.
3939                         */
3940                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3941                         hw->fc.low_water[i] = pbsize / 4;
3942                         /* Enable pfc for this TC */
3943                         tc = &dcb_config->tc_config[i];
3944                         tc->pfc = ixgbe_dcb_pfc_enabled;
3945                 }
3946                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3947                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3948                         pfc_en &= 0x0F;
3949                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3950         }
3951
3952         return ret;
3953 }
3954
3955 /**
3956  * ixgbe_configure_dcb - Configure DCB  Hardware
3957  * @dev: pointer to rte_eth_dev
3958  */
3959 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3960 {
3961         struct ixgbe_dcb_config *dcb_cfg =
3962                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3963         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3964
3965         PMD_INIT_FUNC_TRACE();
3966
3967         /* check support mq_mode for DCB */
3968         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3969             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3970             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3971                 return;
3972
3973         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3974                 return;
3975
3976         /** Configure DCB hardware **/
3977         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3978 }
3979
3980 /*
3981  * VMDq only support for 10 GbE NIC.
3982  */
3983 static void
3984 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3985 {
3986         struct rte_eth_vmdq_rx_conf *cfg;
3987         struct ixgbe_hw *hw;
3988         enum rte_eth_nb_pools num_pools;
3989         uint32_t mrqc, vt_ctl, vlanctrl;
3990         uint32_t vmolr = 0;
3991         int i;
3992
3993         PMD_INIT_FUNC_TRACE();
3994         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3995         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3996         num_pools = cfg->nb_queue_pools;
3997
3998         ixgbe_rss_disable(dev);
3999
4000         /* MRQC: enable vmdq */
4001         mrqc = IXGBE_MRQC_VMDQEN;
4002         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4003
4004         /* PFVTCTL: turn on virtualisation and set the default pool */
4005         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4006         if (cfg->enable_default_pool)
4007                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4008         else
4009                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4010
4011         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4012
4013         for (i = 0; i < (int)num_pools; i++) {
4014                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4015                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4016         }
4017
4018         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4019         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4020         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4021         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4022
4023         /* VFTA - enable all vlan filters */
4024         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4025                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4026
4027         /* VFRE: pool enabling for receive - 64 */
4028         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4029         if (num_pools == ETH_64_POOLS)
4030                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4031
4032         /*
4033          * MPSAR - allow pools to read specific mac addresses
4034          * In this case, all pools should be able to read from mac addr 0
4035          */
4036         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4037         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4038
4039         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4040         for (i = 0; i < cfg->nb_pool_maps; i++) {
4041                 /* set vlan id in VF register and set the valid bit */
4042                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4043                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4044                 /*
4045                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4046                  * pools, we only need to use the first half of the register
4047                  * i.e. bits 0-31
4048                  */
4049                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4050                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4051                                         (cfg->pool_map[i].pools & UINT32_MAX));
4052                 else
4053                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4054                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4055
4056         }
4057
4058         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4059         if (cfg->enable_loop_back) {
4060                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4061                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4062                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4063         }
4064
4065         IXGBE_WRITE_FLUSH(hw);
4066 }
4067
4068 /*
4069  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4070  * @hw: pointer to hardware structure
4071  */
4072 static void
4073 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4074 {
4075         uint32_t reg;
4076         uint32_t q;
4077
4078         PMD_INIT_FUNC_TRACE();
4079         /*PF VF Transmit Enable*/
4080         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4081         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4082
4083         /* Disable the Tx desc arbiter so that MTQC can be changed */
4084         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4085         reg |= IXGBE_RTTDCS_ARBDIS;
4086         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4087
4088         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4089         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4090
4091         /* Disable drop for all queues */
4092         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4093                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4094                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4095
4096         /* Enable the Tx desc arbiter */
4097         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4098         reg &= ~IXGBE_RTTDCS_ARBDIS;
4099         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4100
4101         IXGBE_WRITE_FLUSH(hw);
4102 }
4103
4104 static int __attribute__((cold))
4105 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4106 {
4107         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4108         uint64_t dma_addr;
4109         unsigned int i;
4110
4111         /* Initialize software ring entries */
4112         for (i = 0; i < rxq->nb_rx_desc; i++) {
4113                 volatile union ixgbe_adv_rx_desc *rxd;
4114                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4115
4116                 if (mbuf == NULL) {
4117                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4118                                      (unsigned) rxq->queue_id);
4119                         return -ENOMEM;
4120                 }
4121
4122                 rte_mbuf_refcnt_set(mbuf, 1);
4123                 mbuf->next = NULL;
4124                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4125                 mbuf->nb_segs = 1;
4126                 mbuf->port = rxq->port_id;
4127
4128                 dma_addr =
4129                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
4130                 rxd = &rxq->rx_ring[i];
4131                 rxd->read.hdr_addr = 0;
4132                 rxd->read.pkt_addr = dma_addr;
4133                 rxe[i].mbuf = mbuf;
4134         }
4135
4136         return 0;
4137 }
4138
4139 static int
4140 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4141 {
4142         struct ixgbe_hw *hw;
4143         uint32_t mrqc;
4144
4145         ixgbe_rss_configure(dev);
4146
4147         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4148
4149         /* MRQC: enable VF RSS */
4150         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4151         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4152         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4153         case ETH_64_POOLS:
4154                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4155                 break;
4156
4157         case ETH_32_POOLS:
4158                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4159                 break;
4160
4161         default:
4162                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4163                 return -EINVAL;
4164         }
4165
4166         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4167
4168         return 0;
4169 }
4170
4171 static int
4172 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4173 {
4174         struct ixgbe_hw *hw =
4175                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4176
4177         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4178         case ETH_64_POOLS:
4179                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4180                         IXGBE_MRQC_VMDQEN);
4181                 break;
4182
4183         case ETH_32_POOLS:
4184                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4185                         IXGBE_MRQC_VMDQRT4TCEN);
4186                 break;
4187
4188         case ETH_16_POOLS:
4189                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4190                         IXGBE_MRQC_VMDQRT8TCEN);
4191                 break;
4192         default:
4193                 PMD_INIT_LOG(ERR,
4194                         "invalid pool number in IOV mode");
4195                 break;
4196         }
4197         return 0;
4198 }
4199
4200 static int
4201 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4202 {
4203         struct ixgbe_hw *hw =
4204                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4205
4206         if (hw->mac.type == ixgbe_mac_82598EB)
4207                 return 0;
4208
4209         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4210                 /*
4211                  * SRIOV inactive scheme
4212                  * any DCB/RSS w/o VMDq multi-queue setting
4213                  */
4214                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4215                 case ETH_MQ_RX_RSS:
4216                 case ETH_MQ_RX_DCB_RSS:
4217                 case ETH_MQ_RX_VMDQ_RSS:
4218                         ixgbe_rss_configure(dev);
4219                         break;
4220
4221                 case ETH_MQ_RX_VMDQ_DCB:
4222                         ixgbe_vmdq_dcb_configure(dev);
4223                         break;
4224
4225                 case ETH_MQ_RX_VMDQ_ONLY:
4226                         ixgbe_vmdq_rx_hw_configure(dev);
4227                         break;
4228
4229                 case ETH_MQ_RX_NONE:
4230                 default:
4231                         /* if mq_mode is none, disable rss mode.*/
4232                         ixgbe_rss_disable(dev);
4233                         break;
4234                 }
4235         } else {
4236                 /* SRIOV active scheme
4237                  * Support RSS together with SRIOV.
4238                  */
4239                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4240                 case ETH_MQ_RX_RSS:
4241                 case ETH_MQ_RX_VMDQ_RSS:
4242                         ixgbe_config_vf_rss(dev);
4243                         break;
4244                 case ETH_MQ_RX_VMDQ_DCB:
4245                 case ETH_MQ_RX_DCB:
4246                 /* In SRIOV, the configuration is the same as VMDq case */
4247                         ixgbe_vmdq_dcb_configure(dev);
4248                         break;
4249                 /* DCB/RSS together with SRIOV is not supported */
4250                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4251                 case ETH_MQ_RX_DCB_RSS:
4252                         PMD_INIT_LOG(ERR,
4253                                 "Could not support DCB/RSS with VMDq & SRIOV");
4254                         return -1;
4255                 default:
4256                         ixgbe_config_vf_default(dev);
4257                         break;
4258                 }
4259         }
4260
4261         return 0;
4262 }
4263
4264 static int
4265 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4266 {
4267         struct ixgbe_hw *hw =
4268                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4269         uint32_t mtqc;
4270         uint32_t rttdcs;
4271
4272         if (hw->mac.type == ixgbe_mac_82598EB)
4273                 return 0;
4274
4275         /* disable arbiter before setting MTQC */
4276         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4277         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4278         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4279
4280         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4281                 /*
4282                  * SRIOV inactive scheme
4283                  * any DCB w/o VMDq multi-queue setting
4284                  */
4285                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4286                         ixgbe_vmdq_tx_hw_configure(hw);
4287                 else {
4288                         mtqc = IXGBE_MTQC_64Q_1PB;
4289                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4290                 }
4291         } else {
4292                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4293
4294                 /*
4295                  * SRIOV active scheme
4296                  * FIXME if support DCB together with VMDq & SRIOV
4297                  */
4298                 case ETH_64_POOLS:
4299                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4300                         break;
4301                 case ETH_32_POOLS:
4302                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4303                         break;
4304                 case ETH_16_POOLS:
4305                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4306                                 IXGBE_MTQC_8TC_8TQ;
4307                         break;
4308                 default:
4309                         mtqc = IXGBE_MTQC_64Q_1PB;
4310                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4311                 }
4312                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4313         }
4314
4315         /* re-enable arbiter */
4316         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4317         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4318
4319         return 0;
4320 }
4321
4322 /**
4323  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4324  *
4325  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4326  * spec rev. 3.0 chapter 8.2.3.8.13.
4327  *
4328  * @pool Memory pool of the Rx queue
4329  */
4330 static inline uint32_t
4331 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4332 {
4333         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4334
4335         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4336         uint16_t maxdesc =
4337                 IPV4_MAX_PKT_LEN /
4338                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4339
4340         if (maxdesc >= 16)
4341                 return IXGBE_RSCCTL_MAXDESC_16;
4342         else if (maxdesc >= 8)
4343                 return IXGBE_RSCCTL_MAXDESC_8;
4344         else if (maxdesc >= 4)
4345                 return IXGBE_RSCCTL_MAXDESC_4;
4346         else
4347                 return IXGBE_RSCCTL_MAXDESC_1;
4348 }
4349
4350 /**
4351  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4352  * interrupt
4353  *
4354  * (Taken from FreeBSD tree)
4355  * (yes this is all very magic and confusing :)
4356  *
4357  * @dev port handle
4358  * @entry the register array entry
4359  * @vector the MSIX vector for this queue
4360  * @type RX/TX/MISC
4361  */
4362 static void
4363 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4364 {
4365         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4366         u32 ivar, index;
4367
4368         vector |= IXGBE_IVAR_ALLOC_VAL;
4369
4370         switch (hw->mac.type) {
4371
4372         case ixgbe_mac_82598EB:
4373                 if (type == -1)
4374                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4375                 else
4376                         entry += (type * 64);
4377                 index = (entry >> 2) & 0x1F;
4378                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4379                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4380                 ivar |= (vector << (8 * (entry & 0x3)));
4381                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4382                 break;
4383
4384         case ixgbe_mac_82599EB:
4385         case ixgbe_mac_X540:
4386                 if (type == -1) { /* MISC IVAR */
4387                         index = (entry & 1) * 8;
4388                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4389                         ivar &= ~(0xFF << index);
4390                         ivar |= (vector << index);
4391                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4392                 } else {        /* RX/TX IVARS */
4393                         index = (16 * (entry & 1)) + (8 * type);
4394                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4395                         ivar &= ~(0xFF << index);
4396                         ivar |= (vector << index);
4397                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4398                 }
4399
4400                 break;
4401
4402         default:
4403                 break;
4404         }
4405 }
4406
4407 void __attribute__((cold))
4408 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4409 {
4410         uint16_t i, rx_using_sse;
4411         struct ixgbe_adapter *adapter =
4412                 (struct ixgbe_adapter *)dev->data->dev_private;
4413
4414         /*
4415          * In order to allow Vector Rx there are a few configuration
4416          * conditions to be met and Rx Bulk Allocation should be allowed.
4417          */
4418         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4419             !adapter->rx_bulk_alloc_allowed) {
4420                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4421                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4422                                     "not enabled",
4423                              dev->data->port_id);
4424
4425                 adapter->rx_vec_allowed = false;
4426         }
4427
4428         /*
4429          * Initialize the appropriate LRO callback.
4430          *
4431          * If all queues satisfy the bulk allocation preconditions
4432          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4433          * Otherwise use a single allocation version.
4434          */
4435         if (dev->data->lro) {
4436                 if (adapter->rx_bulk_alloc_allowed) {
4437                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4438                                            "allocation version");
4439                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4440                 } else {
4441                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4442                                            "allocation version");
4443                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4444                 }
4445         } else if (dev->data->scattered_rx) {
4446                 /*
4447                  * Set the non-LRO scattered callback: there are Vector and
4448                  * single allocation versions.
4449                  */
4450                 if (adapter->rx_vec_allowed) {
4451                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4452                                             "callback (port=%d).",
4453                                      dev->data->port_id);
4454
4455                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4456                 } else if (adapter->rx_bulk_alloc_allowed) {
4457                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4458                                            "allocation callback (port=%d).",
4459                                      dev->data->port_id);
4460                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4461                 } else {
4462                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4463                                             "single allocation) "
4464                                             "Scattered Rx callback "
4465                                             "(port=%d).",
4466                                      dev->data->port_id);
4467
4468                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4469                 }
4470         /*
4471          * Below we set "simple" callbacks according to port/queues parameters.
4472          * If parameters allow we are going to choose between the following
4473          * callbacks:
4474          *    - Vector
4475          *    - Bulk Allocation
4476          *    - Single buffer allocation (the simplest one)
4477          */
4478         } else if (adapter->rx_vec_allowed) {
4479                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4480                                     "burst size no less than %d (port=%d).",
4481                              RTE_IXGBE_DESCS_PER_LOOP,
4482                              dev->data->port_id);
4483
4484                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4485         } else if (adapter->rx_bulk_alloc_allowed) {
4486                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4487                                     "satisfied. Rx Burst Bulk Alloc function "
4488                                     "will be used on port=%d.",
4489                              dev->data->port_id);
4490
4491                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4492         } else {
4493                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4494                                     "satisfied, or Scattered Rx is requested "
4495                                     "(port=%d).",
4496                              dev->data->port_id);
4497
4498                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4499         }
4500
4501         /* Propagate information about RX function choice through all queues. */
4502
4503         rx_using_sse =
4504                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4505                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4506
4507         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4508                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4509
4510                 rxq->rx_using_sse = rx_using_sse;
4511         }
4512 }
4513
4514 /**
4515  * ixgbe_set_rsc - configure RSC related port HW registers
4516  *
4517  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4518  * of 82599 Spec (x540 configuration is virtually the same).
4519  *
4520  * @dev port handle
4521  *
4522  * Returns 0 in case of success or a non-zero error code
4523  */
4524 static int
4525 ixgbe_set_rsc(struct rte_eth_dev *dev)
4526 {
4527         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4528         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4529         struct rte_eth_dev_info dev_info = { 0 };
4530         bool rsc_capable = false;
4531         uint16_t i;
4532         uint32_t rdrxctl;
4533
4534         /* Sanity check */
4535         dev->dev_ops->dev_infos_get(dev, &dev_info);
4536         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4537                 rsc_capable = true;
4538
4539         if (!rsc_capable && rx_conf->enable_lro) {
4540                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4541                                    "support it");
4542                 return -EINVAL;
4543         }
4544
4545         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4546
4547         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4548                 /*
4549                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4550                  * 3.0 RSC configuration requires HW CRC stripping being
4551                  * enabled. If user requested both HW CRC stripping off
4552                  * and RSC on - return an error.
4553                  */
4554                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4555                                     "is disabled");
4556                 return -EINVAL;
4557         }
4558
4559         /* RFCTL configuration  */
4560         if (rsc_capable) {
4561                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4562
4563                 if (rx_conf->enable_lro)
4564                         /*
4565                          * Since NFS packets coalescing is not supported - clear
4566                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4567                          * enabled.
4568                          */
4569                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4570                                    IXGBE_RFCTL_NFSR_DIS);
4571                 else
4572                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4573
4574                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4575         }
4576
4577         /* If LRO hasn't been requested - we are done here. */
4578         if (!rx_conf->enable_lro)
4579                 return 0;
4580
4581         /* Set RDRXCTL.RSCACKC bit */
4582         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4583         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4584         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4585
4586         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4587         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4588                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4589                 uint32_t srrctl =
4590                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4591                 uint32_t rscctl =
4592                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4593                 uint32_t psrtype =
4594                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4595                 uint32_t eitr =
4596                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4597
4598                 /*
4599                  * ixgbe PMD doesn't support header-split at the moment.
4600                  *
4601                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4602                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4603                  * should be configured even if header split is not
4604                  * enabled. We will configure it 128 bytes following the
4605                  * recommendation in the spec.
4606                  */
4607                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4608                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4609                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4610
4611                 /*
4612                  * TODO: Consider setting the Receive Descriptor Minimum
4613                  * Threshold Size for an RSC case. This is not an obviously
4614                  * beneficiary option but the one worth considering...
4615                  */
4616
4617                 rscctl |= IXGBE_RSCCTL_RSCEN;
4618                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4619                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4620
4621                 /*
4622                  * RSC: Set ITR interval corresponding to 2K ints/s.
4623                  *
4624                  * Full-sized RSC aggregations for a 10Gb/s link will
4625                  * arrive at about 20K aggregation/s rate.
4626                  *
4627                  * 2K inst/s rate will make only 10% of the
4628                  * aggregations to be closed due to the interrupt timer
4629                  * expiration for a streaming at wire-speed case.
4630                  *
4631                  * For a sparse streaming case this setting will yield
4632                  * at most 500us latency for a single RSC aggregation.
4633                  */
4634                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4635                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4636
4637                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4638                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4639                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4640                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4641
4642                 /*
4643                  * RSC requires the mapping of the queue to the
4644                  * interrupt vector.
4645                  */
4646                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4647         }
4648
4649         dev->data->lro = 1;
4650
4651         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4652
4653         return 0;
4654 }
4655
4656 /*
4657  * Initializes Receive Unit.
4658  */
4659 int __attribute__((cold))
4660 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4661 {
4662         struct ixgbe_hw     *hw;
4663         struct ixgbe_rx_queue *rxq;
4664         uint64_t bus_addr;
4665         uint32_t rxctrl;
4666         uint32_t fctrl;
4667         uint32_t hlreg0;
4668         uint32_t maxfrs;
4669         uint32_t srrctl;
4670         uint32_t rdrxctl;
4671         uint32_t rxcsum;
4672         uint16_t buf_size;
4673         uint16_t i;
4674         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4675         int rc;
4676
4677         PMD_INIT_FUNC_TRACE();
4678         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4679
4680         /*
4681          * Make sure receives are disabled while setting
4682          * up the RX context (registers, descriptor rings, etc.).
4683          */
4684         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4685         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4686
4687         /* Enable receipt of broadcasted frames */
4688         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4689         fctrl |= IXGBE_FCTRL_BAM;
4690         fctrl |= IXGBE_FCTRL_DPF;
4691         fctrl |= IXGBE_FCTRL_PMCF;
4692         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4693
4694         /*
4695          * Configure CRC stripping, if any.
4696          */
4697         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4698         if (rx_conf->hw_strip_crc)
4699                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4700         else
4701                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4702
4703         /*
4704          * Configure jumbo frame support, if any.
4705          */
4706         if (rx_conf->jumbo_frame == 1) {
4707                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4708                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4709                 maxfrs &= 0x0000FFFF;
4710                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4711                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4712         } else
4713                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4714
4715         /*
4716          * If loopback mode is configured for 82599, set LPBK bit.
4717          */
4718         if (hw->mac.type == ixgbe_mac_82599EB &&
4719                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4720                 hlreg0 |= IXGBE_HLREG0_LPBK;
4721         else
4722                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4723
4724         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4725
4726         /* Setup RX queues */
4727         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4728                 rxq = dev->data->rx_queues[i];
4729
4730                 /*
4731                  * Reset crc_len in case it was changed after queue setup by a
4732                  * call to configure.
4733                  */
4734                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4735
4736                 /* Setup the Base and Length of the Rx Descriptor Rings */
4737                 bus_addr = rxq->rx_ring_phys_addr;
4738                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4739                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4740                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4741                                 (uint32_t)(bus_addr >> 32));
4742                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4743                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4744                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4745                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4746
4747                 /* Configure the SRRCTL register */
4748 #ifdef RTE_HEADER_SPLIT_ENABLE
4749                 /*
4750                  * Configure Header Split
4751                  */
4752                 if (rx_conf->header_split) {
4753                         if (hw->mac.type == ixgbe_mac_82599EB) {
4754                                 /* Must setup the PSRTYPE register */
4755                                 uint32_t psrtype;
4756
4757                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4758                                         IXGBE_PSRTYPE_UDPHDR   |
4759                                         IXGBE_PSRTYPE_IPV4HDR  |
4760                                         IXGBE_PSRTYPE_IPV6HDR;
4761                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4762                         }
4763                         srrctl = ((rx_conf->split_hdr_size <<
4764                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4765                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4766                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4767                 } else
4768 #endif
4769                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4770
4771                 /* Set if packets are dropped when no descriptors available */
4772                 if (rxq->drop_en)
4773                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4774
4775                 /*
4776                  * Configure the RX buffer size in the BSIZEPACKET field of
4777                  * the SRRCTL register of the queue.
4778                  * The value is in 1 KB resolution. Valid values can be from
4779                  * 1 KB to 16 KB.
4780                  */
4781                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4782                         RTE_PKTMBUF_HEADROOM);
4783                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4784                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4785
4786                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4787
4788                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4789                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4790
4791                 /* It adds dual VLAN length for supporting dual VLAN */
4792                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4793                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4794                         dev->data->scattered_rx = 1;
4795         }
4796
4797         if (rx_conf->enable_scatter)
4798                 dev->data->scattered_rx = 1;
4799
4800         /*
4801          * Device configured with multiple RX queues.
4802          */
4803         ixgbe_dev_mq_rx_configure(dev);
4804
4805         /*
4806          * Setup the Checksum Register.
4807          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4808          * Enable IP/L4 checkum computation by hardware if requested to do so.
4809          */
4810         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4811         rxcsum |= IXGBE_RXCSUM_PCSD;
4812         if (rx_conf->hw_ip_checksum)
4813                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4814         else
4815                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4816
4817         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4818
4819         if (hw->mac.type == ixgbe_mac_82599EB ||
4820             hw->mac.type == ixgbe_mac_X540) {
4821                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4822                 if (rx_conf->hw_strip_crc)
4823                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4824                 else
4825                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4826                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4827                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4828         }
4829
4830         rc = ixgbe_set_rsc(dev);
4831         if (rc)
4832                 return rc;
4833
4834         ixgbe_set_rx_function(dev);
4835
4836         return 0;
4837 }
4838
4839 /*
4840  * Initializes Transmit Unit.
4841  */
4842 void __attribute__((cold))
4843 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4844 {
4845         struct ixgbe_hw     *hw;
4846         struct ixgbe_tx_queue *txq;
4847         uint64_t bus_addr;
4848         uint32_t hlreg0;
4849         uint32_t txctrl;
4850         uint16_t i;
4851
4852         PMD_INIT_FUNC_TRACE();
4853         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4854
4855         /* Enable TX CRC (checksum offload requirement) and hw padding
4856          * (TSO requirement)
4857          */
4858         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4859         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4860         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4861
4862         /* Setup the Base and Length of the Tx Descriptor Rings */
4863         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4864                 txq = dev->data->tx_queues[i];
4865
4866                 bus_addr = txq->tx_ring_phys_addr;
4867                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4868                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4869                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4870                                 (uint32_t)(bus_addr >> 32));
4871                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4872                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4873                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4874                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4875                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4876
4877                 /*
4878                  * Disable Tx Head Writeback RO bit, since this hoses
4879                  * bookkeeping if things aren't delivered in order.
4880                  */
4881                 switch (hw->mac.type) {
4882                 case ixgbe_mac_82598EB:
4883                         txctrl = IXGBE_READ_REG(hw,
4884                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4885                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4886                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4887                                         txctrl);
4888                         break;
4889
4890                 case ixgbe_mac_82599EB:
4891                 case ixgbe_mac_X540:
4892                 case ixgbe_mac_X550:
4893                 case ixgbe_mac_X550EM_x:
4894                 case ixgbe_mac_X550EM_a:
4895                 default:
4896                         txctrl = IXGBE_READ_REG(hw,
4897                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4898                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4899                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4900                                         txctrl);
4901                         break;
4902                 }
4903         }
4904
4905         /* Device configured with multiple TX queues. */
4906         ixgbe_dev_mq_tx_configure(dev);
4907 }
4908
4909 /*
4910  * Set up link for 82599 loopback mode Tx->Rx.
4911  */
4912 static inline void __attribute__((cold))
4913 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4914 {
4915         PMD_INIT_FUNC_TRACE();
4916
4917         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4918                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4919                                 IXGBE_SUCCESS) {
4920                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4921                         /* ignore error */
4922                         return;
4923                 }
4924         }
4925
4926         /* Restart link */
4927         IXGBE_WRITE_REG(hw,
4928                         IXGBE_AUTOC,
4929                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4930         ixgbe_reset_pipeline_82599(hw);
4931
4932         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4933         msec_delay(50);
4934 }
4935
4936
4937 /*
4938  * Start Transmit and Receive Units.
4939  */
4940 int __attribute__((cold))
4941 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4942 {
4943         struct ixgbe_hw     *hw;
4944         struct ixgbe_tx_queue *txq;
4945         struct ixgbe_rx_queue *rxq;
4946         uint32_t txdctl;
4947         uint32_t dmatxctl;
4948         uint32_t rxctrl;
4949         uint16_t i;
4950         int ret = 0;
4951
4952         PMD_INIT_FUNC_TRACE();
4953         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4954
4955         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4956                 txq = dev->data->tx_queues[i];
4957                 /* Setup Transmit Threshold Registers */
4958                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4959                 txdctl |= txq->pthresh & 0x7F;
4960                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4961                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4962                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4963         }
4964
4965         if (hw->mac.type != ixgbe_mac_82598EB) {
4966                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4967                 dmatxctl |= IXGBE_DMATXCTL_TE;
4968                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4969         }
4970
4971         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4972                 txq = dev->data->tx_queues[i];
4973                 if (!txq->tx_deferred_start) {
4974                         ret = ixgbe_dev_tx_queue_start(dev, i);
4975                         if (ret < 0)
4976                                 return ret;
4977                 }
4978         }
4979
4980         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4981                 rxq = dev->data->rx_queues[i];
4982                 if (!rxq->rx_deferred_start) {
4983                         ret = ixgbe_dev_rx_queue_start(dev, i);
4984                         if (ret < 0)
4985                                 return ret;
4986                 }
4987         }
4988
4989         /* Enable Receive engine */
4990         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4991         if (hw->mac.type == ixgbe_mac_82598EB)
4992                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4993         rxctrl |= IXGBE_RXCTRL_RXEN;
4994         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4995
4996         /* If loopback mode is enabled for 82599, set up the link accordingly */
4997         if (hw->mac.type == ixgbe_mac_82599EB &&
4998                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4999                 ixgbe_setup_loopback_link_82599(hw);
5000
5001         return 0;
5002 }
5003
5004 /*
5005  * Start Receive Units for specified queue.
5006  */
5007 int __attribute__((cold))
5008 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5009 {
5010         struct ixgbe_hw     *hw;
5011         struct ixgbe_rx_queue *rxq;
5012         uint32_t rxdctl;
5013         int poll_ms;
5014
5015         PMD_INIT_FUNC_TRACE();
5016         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5017
5018         if (rx_queue_id < dev->data->nb_rx_queues) {
5019                 rxq = dev->data->rx_queues[rx_queue_id];
5020
5021                 /* Allocate buffers for descriptor rings */
5022                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5023                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5024                                      rx_queue_id);
5025                         return -1;
5026                 }
5027                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5028                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5029                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5030
5031                 /* Wait until RX Enable ready */
5032                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5033                 do {
5034                         rte_delay_ms(1);
5035                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5036                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5037                 if (!poll_ms)
5038                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
5039                                      rx_queue_id);
5040                 rte_wmb();
5041                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5042                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5043                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5044         } else
5045                 return -1;
5046
5047         return 0;
5048 }
5049
5050 /*
5051  * Stop Receive Units for specified queue.
5052  */
5053 int __attribute__((cold))
5054 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5055 {
5056         struct ixgbe_hw     *hw;
5057         struct ixgbe_adapter *adapter =
5058                 (struct ixgbe_adapter *)dev->data->dev_private;
5059         struct ixgbe_rx_queue *rxq;
5060         uint32_t rxdctl;
5061         int poll_ms;
5062
5063         PMD_INIT_FUNC_TRACE();
5064         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5065
5066         if (rx_queue_id < dev->data->nb_rx_queues) {
5067                 rxq = dev->data->rx_queues[rx_queue_id];
5068
5069                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5070                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5071                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5072
5073                 /* Wait until RX Enable bit clear */
5074                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5075                 do {
5076                         rte_delay_ms(1);
5077                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5078                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5079                 if (!poll_ms)
5080                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
5081                                      rx_queue_id);
5082
5083                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
5084
5085                 ixgbe_rx_queue_release_mbufs(rxq);
5086                 ixgbe_reset_rx_queue(adapter, rxq);
5087                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5088         } else
5089                 return -1;
5090
5091         return 0;
5092 }
5093
5094
5095 /*
5096  * Start Transmit Units for specified queue.
5097  */
5098 int __attribute__((cold))
5099 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5100 {
5101         struct ixgbe_hw     *hw;
5102         struct ixgbe_tx_queue *txq;
5103         uint32_t txdctl;
5104         int poll_ms;
5105
5106         PMD_INIT_FUNC_TRACE();
5107         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5108
5109         if (tx_queue_id < dev->data->nb_tx_queues) {
5110                 txq = dev->data->tx_queues[tx_queue_id];
5111                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5112                 txdctl |= IXGBE_TXDCTL_ENABLE;
5113                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5114
5115                 /* Wait until TX Enable ready */
5116                 if (hw->mac.type == ixgbe_mac_82599EB) {
5117                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5118                         do {
5119                                 rte_delay_ms(1);
5120                                 txdctl = IXGBE_READ_REG(hw,
5121                                         IXGBE_TXDCTL(txq->reg_idx));
5122                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5123                         if (!poll_ms)
5124                                 PMD_INIT_LOG(ERR, "Could not enable "
5125                                              "Tx Queue %d", tx_queue_id);
5126                 }
5127                 rte_wmb();
5128                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5129                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5130                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5131         } else
5132                 return -1;
5133
5134         return 0;
5135 }
5136
5137 /*
5138  * Stop Transmit Units for specified queue.
5139  */
5140 int __attribute__((cold))
5141 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5142 {
5143         struct ixgbe_hw     *hw;
5144         struct ixgbe_tx_queue *txq;
5145         uint32_t txdctl;
5146         uint32_t txtdh, txtdt;
5147         int poll_ms;
5148
5149         PMD_INIT_FUNC_TRACE();
5150         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5151
5152         if (tx_queue_id >= dev->data->nb_tx_queues)
5153                 return -1;
5154
5155         txq = dev->data->tx_queues[tx_queue_id];
5156
5157         /* Wait until TX queue is empty */
5158         if (hw->mac.type == ixgbe_mac_82599EB) {
5159                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5160                 do {
5161                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5162                         txtdh = IXGBE_READ_REG(hw,
5163                                                IXGBE_TDH(txq->reg_idx));
5164                         txtdt = IXGBE_READ_REG(hw,
5165                                                IXGBE_TDT(txq->reg_idx));
5166                 } while (--poll_ms && (txtdh != txtdt));
5167                 if (!poll_ms)
5168                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5169                                      "when stopping.", tx_queue_id);
5170         }
5171
5172         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5173         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5174         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5175
5176         /* Wait until TX Enable bit clear */
5177         if (hw->mac.type == ixgbe_mac_82599EB) {
5178                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5179                 do {
5180                         rte_delay_ms(1);
5181                         txdctl = IXGBE_READ_REG(hw,
5182                                                 IXGBE_TXDCTL(txq->reg_idx));
5183                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5184                 if (!poll_ms)
5185                         PMD_INIT_LOG(ERR, "Could not disable "
5186                                      "Tx Queue %d", tx_queue_id);
5187         }
5188
5189         if (txq->ops != NULL) {
5190                 txq->ops->release_mbufs(txq);
5191                 txq->ops->reset(txq);
5192         }
5193         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5194
5195         return 0;
5196 }
5197
5198 void
5199 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5200         struct rte_eth_rxq_info *qinfo)
5201 {
5202         struct ixgbe_rx_queue *rxq;
5203
5204         rxq = dev->data->rx_queues[queue_id];
5205
5206         qinfo->mp = rxq->mb_pool;
5207         qinfo->scattered_rx = dev->data->scattered_rx;
5208         qinfo->nb_desc = rxq->nb_rx_desc;
5209
5210         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5211         qinfo->conf.rx_drop_en = rxq->drop_en;
5212         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5213 }
5214
5215 void
5216 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5217         struct rte_eth_txq_info *qinfo)
5218 {
5219         struct ixgbe_tx_queue *txq;
5220
5221         txq = dev->data->tx_queues[queue_id];
5222
5223         qinfo->nb_desc = txq->nb_tx_desc;
5224
5225         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5226         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5227         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5228
5229         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5230         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5231         qinfo->conf.txq_flags = txq->txq_flags;
5232         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5233 }
5234
5235 /*
5236  * [VF] Initializes Receive Unit.
5237  */
5238 int __attribute__((cold))
5239 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5240 {
5241         struct ixgbe_hw     *hw;
5242         struct ixgbe_rx_queue *rxq;
5243         uint64_t bus_addr;
5244         uint32_t srrctl, psrtype = 0;
5245         uint16_t buf_size;
5246         uint16_t i;
5247         int ret;
5248
5249         PMD_INIT_FUNC_TRACE();
5250         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5251
5252         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5253                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5254                         "it should be power of 2");
5255                 return -1;
5256         }
5257
5258         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5259                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5260                         "it should be equal to or less than %d",
5261                         hw->mac.max_rx_queues);
5262                 return -1;
5263         }
5264
5265         /*
5266          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5267          * disables the VF receipt of packets if the PF MTU is > 1500.
5268          * This is done to deal with 82599 limitations that imposes
5269          * the PF and all VFs to share the same MTU.
5270          * Then, the PF driver enables again the VF receipt of packet when
5271          * the VF driver issues a IXGBE_VF_SET_LPE request.
5272          * In the meantime, the VF device cannot be used, even if the VF driver
5273          * and the Guest VM network stack are ready to accept packets with a
5274          * size up to the PF MTU.
5275          * As a work-around to this PF behaviour, force the call to
5276          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5277          * VF packets received can work in all cases.
5278          */
5279         ixgbevf_rlpml_set_vf(hw,
5280                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5281
5282         /* Setup RX queues */
5283         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5284                 rxq = dev->data->rx_queues[i];
5285
5286                 /* Allocate buffers for descriptor rings */
5287                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5288                 if (ret)
5289                         return ret;
5290
5291                 /* Setup the Base and Length of the Rx Descriptor Rings */
5292                 bus_addr = rxq->rx_ring_phys_addr;
5293
5294                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5295                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5296                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5297                                 (uint32_t)(bus_addr >> 32));
5298                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5299                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5300                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5301                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5302
5303
5304                 /* Configure the SRRCTL register */
5305 #ifdef RTE_HEADER_SPLIT_ENABLE
5306                 /*
5307                  * Configure Header Split
5308                  */
5309                 if (dev->data->dev_conf.rxmode.header_split) {
5310                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5311                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5312                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5313                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5314                 } else
5315 #endif
5316                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5317
5318                 /* Set if packets are dropped when no descriptors available */
5319                 if (rxq->drop_en)
5320                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5321
5322                 /*
5323                  * Configure the RX buffer size in the BSIZEPACKET field of
5324                  * the SRRCTL register of the queue.
5325                  * The value is in 1 KB resolution. Valid values can be from
5326                  * 1 KB to 16 KB.
5327                  */
5328                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5329                         RTE_PKTMBUF_HEADROOM);
5330                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5331                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5332
5333                 /*
5334                  * VF modification to write virtual function SRRCTL register
5335                  */
5336                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5337
5338                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5339                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5340
5341                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5342                     /* It adds dual VLAN length for supporting dual VLAN */
5343                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5344                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5345                         if (!dev->data->scattered_rx)
5346                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5347                         dev->data->scattered_rx = 1;
5348                 }
5349         }
5350
5351 #ifdef RTE_HEADER_SPLIT_ENABLE
5352         if (dev->data->dev_conf.rxmode.header_split)
5353                 /* Must setup the PSRTYPE register */
5354                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5355                         IXGBE_PSRTYPE_UDPHDR   |
5356                         IXGBE_PSRTYPE_IPV4HDR  |
5357                         IXGBE_PSRTYPE_IPV6HDR;
5358 #endif
5359
5360         /* Set RQPL for VF RSS according to max Rx queue */
5361         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5362                 IXGBE_PSRTYPE_RQPL_SHIFT;
5363         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5364
5365         ixgbe_set_rx_function(dev);
5366
5367         return 0;
5368 }
5369
5370 /*
5371  * [VF] Initializes Transmit Unit.
5372  */
5373 void __attribute__((cold))
5374 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5375 {
5376         struct ixgbe_hw     *hw;
5377         struct ixgbe_tx_queue *txq;
5378         uint64_t bus_addr;
5379         uint32_t txctrl;
5380         uint16_t i;
5381
5382         PMD_INIT_FUNC_TRACE();
5383         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5384
5385         /* Setup the Base and Length of the Tx Descriptor Rings */
5386         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5387                 txq = dev->data->tx_queues[i];
5388                 bus_addr = txq->tx_ring_phys_addr;
5389                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5390                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5391                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5392                                 (uint32_t)(bus_addr >> 32));
5393                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5394                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5395                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5396                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5397                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5398
5399                 /*
5400                  * Disable Tx Head Writeback RO bit, since this hoses
5401                  * bookkeeping if things aren't delivered in order.
5402                  */
5403                 txctrl = IXGBE_READ_REG(hw,
5404                                 IXGBE_VFDCA_TXCTRL(i));
5405                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5406                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5407                                 txctrl);
5408         }
5409 }
5410
5411 /*
5412  * [VF] Start Transmit and Receive Units.
5413  */
5414 void __attribute__((cold))
5415 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5416 {
5417         struct ixgbe_hw     *hw;
5418         struct ixgbe_tx_queue *txq;
5419         struct ixgbe_rx_queue *rxq;
5420         uint32_t txdctl;
5421         uint32_t rxdctl;
5422         uint16_t i;
5423         int poll_ms;
5424
5425         PMD_INIT_FUNC_TRACE();
5426         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5427
5428         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5429                 txq = dev->data->tx_queues[i];
5430                 /* Setup Transmit Threshold Registers */
5431                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5432                 txdctl |= txq->pthresh & 0x7F;
5433                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5434                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5435                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5436         }
5437
5438         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5439
5440                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5441                 txdctl |= IXGBE_TXDCTL_ENABLE;
5442                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5443
5444                 poll_ms = 10;
5445                 /* Wait until TX Enable ready */
5446                 do {
5447                         rte_delay_ms(1);
5448                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5449                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5450                 if (!poll_ms)
5451                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5452         }
5453         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5454
5455                 rxq = dev->data->rx_queues[i];
5456
5457                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5458                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5459                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5460
5461                 /* Wait until RX Enable ready */
5462                 poll_ms = 10;
5463                 do {
5464                         rte_delay_ms(1);
5465                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5466                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5467                 if (!poll_ms)
5468                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5469                 rte_wmb();
5470                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5471
5472         }
5473 }
5474
5475 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5476 int __attribute__((weak))
5477 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5478 {
5479         return -1;
5480 }
5481
5482 uint16_t __attribute__((weak))
5483 ixgbe_recv_pkts_vec(
5484         void __rte_unused *rx_queue,
5485         struct rte_mbuf __rte_unused **rx_pkts,
5486         uint16_t __rte_unused nb_pkts)
5487 {
5488         return 0;
5489 }
5490
5491 uint16_t __attribute__((weak))
5492 ixgbe_recv_scattered_pkts_vec(
5493         void __rte_unused *rx_queue,
5494         struct rte_mbuf __rte_unused **rx_pkts,
5495         uint16_t __rte_unused nb_pkts)
5496 {
5497         return 0;
5498 }
5499
5500 int __attribute__((weak))
5501 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5502 {
5503         return -1;
5504 }