net/ixgbe: use I/O device memory read/write API
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73 #include <rte_net.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 /* Bit Mask to indicate what bits required for building TX context */
84 #define IXGBE_TX_OFFLOAD_MASK (                  \
85                 PKT_TX_VLAN_PKT |                \
86                 PKT_TX_IP_CKSUM |                \
87                 PKT_TX_L4_MASK |                 \
88                 PKT_TX_TCP_SEG |                 \
89                 PKT_TX_MACSEC |                  \
90                 PKT_TX_OUTER_IP_CKSUM)
91
92 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
93                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
94
95 #if 1
96 #define RTE_PMD_USE_PREFETCH
97 #endif
98
99 #ifdef RTE_PMD_USE_PREFETCH
100 /*
101  * Prefetch a cache line into all cache levels.
102  */
103 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
104 #else
105 #define rte_ixgbe_prefetch(p)   do {} while (0)
106 #endif
107
108 /*********************************************************************
109  *
110  *  TX functions
111  *
112  **********************************************************************/
113
114 /*
115  * Check for descriptors with their DD bit set and free mbufs.
116  * Return the total number of buffers freed.
117  */
118 static inline int __attribute__((always_inline))
119 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
120 {
121         struct ixgbe_tx_entry *txep;
122         uint32_t status;
123         int i, nb_free = 0;
124         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
125
126         /* check DD bit on threshold descriptor */
127         status = txq->tx_ring[txq->tx_next_dd].wb.status;
128         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
129                 return 0;
130
131         /*
132          * first buffer to free from S/W ring is at index
133          * tx_next_dd - (tx_rs_thresh-1)
134          */
135         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
136
137         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
138                 /* free buffers one at a time */
139                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
140                 txep->mbuf = NULL;
141
142                 if (unlikely(m == NULL))
143                         continue;
144
145                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
146                     (nb_free > 0 && m->pool != free[0]->pool)) {
147                         rte_mempool_put_bulk(free[0]->pool,
148                                              (void **)free, nb_free);
149                         nb_free = 0;
150                 }
151
152                 free[nb_free++] = m;
153         }
154
155         if (nb_free > 0)
156                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
157
158         /* buffers were freed, update counters */
159         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
160         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
161         if (txq->tx_next_dd >= txq->nb_tx_desc)
162                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
163
164         return txq->tx_rs_thresh;
165 }
166
167 /* Populate 4 descriptors with data from 4 mbufs */
168 static inline void
169 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
170 {
171         uint64_t buf_dma_addr;
172         uint32_t pkt_len;
173         int i;
174
175         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
176                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
177                 pkt_len = (*pkts)->data_len;
178
179                 /* write data to descriptor */
180                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
181
182                 txdp->read.cmd_type_len =
183                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
184
185                 txdp->read.olinfo_status =
186                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
187
188                 rte_prefetch0(&(*pkts)->pool);
189         }
190 }
191
192 /* Populate 1 descriptor with data from 1 mbuf */
193 static inline void
194 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
195 {
196         uint64_t buf_dma_addr;
197         uint32_t pkt_len;
198
199         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
200         pkt_len = (*pkts)->data_len;
201
202         /* write data to descriptor */
203         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
204         txdp->read.cmd_type_len =
205                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
206         txdp->read.olinfo_status =
207                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
208         rte_prefetch0(&(*pkts)->pool);
209 }
210
211 /*
212  * Fill H/W descriptor ring with mbuf data.
213  * Copy mbuf pointers to the S/W ring.
214  */
215 static inline void
216 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
217                       uint16_t nb_pkts)
218 {
219         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
220         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
221         const int N_PER_LOOP = 4;
222         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
223         int mainpart, leftover;
224         int i, j;
225
226         /*
227          * Process most of the packets in chunks of N pkts.  Any
228          * leftover packets will get processed one at a time.
229          */
230         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
231         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
232         for (i = 0; i < mainpart; i += N_PER_LOOP) {
233                 /* Copy N mbuf pointers to the S/W ring */
234                 for (j = 0; j < N_PER_LOOP; ++j) {
235                         (txep + i + j)->mbuf = *(pkts + i + j);
236                 }
237                 tx4(txdp + i, pkts + i);
238         }
239
240         if (unlikely(leftover > 0)) {
241                 for (i = 0; i < leftover; ++i) {
242                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
243                         tx1(txdp + mainpart + i, pkts + mainpart + i);
244                 }
245         }
246 }
247
248 static inline uint16_t
249 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
250              uint16_t nb_pkts)
251 {
252         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
253         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
254         uint16_t n = 0;
255
256         /*
257          * Begin scanning the H/W ring for done descriptors when the
258          * number of available descriptors drops below tx_free_thresh.  For
259          * each done descriptor, free the associated buffer.
260          */
261         if (txq->nb_tx_free < txq->tx_free_thresh)
262                 ixgbe_tx_free_bufs(txq);
263
264         /* Only use descriptors that are available */
265         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
266         if (unlikely(nb_pkts == 0))
267                 return 0;
268
269         /* Use exactly nb_pkts descriptors */
270         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
271
272         /*
273          * At this point, we know there are enough descriptors in the
274          * ring to transmit all the packets.  This assumes that each
275          * mbuf contains a single segment, and that no new offloads
276          * are expected, which would require a new context descriptor.
277          */
278
279         /*
280          * See if we're going to wrap-around. If so, handle the top
281          * of the descriptor ring first, then do the bottom.  If not,
282          * the processing looks just like the "bottom" part anyway...
283          */
284         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
285                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
286                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
287
288                 /*
289                  * We know that the last descriptor in the ring will need to
290                  * have its RS bit set because tx_rs_thresh has to be
291                  * a divisor of the ring size
292                  */
293                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
294                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
295                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
296
297                 txq->tx_tail = 0;
298         }
299
300         /* Fill H/W descriptor ring with mbuf data */
301         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
302         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
303
304         /*
305          * Determine if RS bit should be set
306          * This is what we actually want:
307          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
308          * but instead of subtracting 1 and doing >=, we can just do
309          * greater than without subtracting.
310          */
311         if (txq->tx_tail > txq->tx_next_rs) {
312                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
313                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
314                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
315                                                 txq->tx_rs_thresh);
316                 if (txq->tx_next_rs >= txq->nb_tx_desc)
317                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
318         }
319
320         /*
321          * Check for wrap-around. This would only happen if we used
322          * up to the last descriptor in the ring, no more, no less.
323          */
324         if (txq->tx_tail >= txq->nb_tx_desc)
325                 txq->tx_tail = 0;
326
327         /* update tail pointer */
328         rte_wmb();
329         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
330
331         return nb_pkts;
332 }
333
334 uint16_t
335 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
336                        uint16_t nb_pkts)
337 {
338         uint16_t nb_tx;
339
340         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
341         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
342                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
343
344         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
345         nb_tx = 0;
346         while (nb_pkts) {
347                 uint16_t ret, n;
348
349                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
350                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
351                 nb_tx = (uint16_t)(nb_tx + ret);
352                 nb_pkts = (uint16_t)(nb_pkts - ret);
353                 if (ret < n)
354                         break;
355         }
356
357         return nb_tx;
358 }
359
360 static inline void
361 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
362                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
363                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
364 {
365         uint32_t type_tucmd_mlhl;
366         uint32_t mss_l4len_idx = 0;
367         uint32_t ctx_idx;
368         uint32_t vlan_macip_lens;
369         union ixgbe_tx_offload tx_offload_mask;
370         uint32_t seqnum_seed = 0;
371
372         ctx_idx = txq->ctx_curr;
373         tx_offload_mask.data[0] = 0;
374         tx_offload_mask.data[1] = 0;
375         type_tucmd_mlhl = 0;
376
377         /* Specify which HW CTX to upload. */
378         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
379
380         if (ol_flags & PKT_TX_VLAN_PKT) {
381                 tx_offload_mask.vlan_tci |= ~0;
382         }
383
384         /* check if TCP segmentation required for this packet */
385         if (ol_flags & PKT_TX_TCP_SEG) {
386                 /* implies IP cksum in IPv4 */
387                 if (ol_flags & PKT_TX_IP_CKSUM)
388                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
389                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
390                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
391                 else
392                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
393                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
394                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
395
396                 tx_offload_mask.l2_len |= ~0;
397                 tx_offload_mask.l3_len |= ~0;
398                 tx_offload_mask.l4_len |= ~0;
399                 tx_offload_mask.tso_segsz |= ~0;
400                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
401                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
402         } else { /* no TSO, check if hardware checksum is needed */
403                 if (ol_flags & PKT_TX_IP_CKSUM) {
404                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
405                         tx_offload_mask.l2_len |= ~0;
406                         tx_offload_mask.l3_len |= ~0;
407                 }
408
409                 switch (ol_flags & PKT_TX_L4_MASK) {
410                 case PKT_TX_UDP_CKSUM:
411                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
412                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
413                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
414                         tx_offload_mask.l2_len |= ~0;
415                         tx_offload_mask.l3_len |= ~0;
416                         break;
417                 case PKT_TX_TCP_CKSUM:
418                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
419                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
420                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
421                         tx_offload_mask.l2_len |= ~0;
422                         tx_offload_mask.l3_len |= ~0;
423                         break;
424                 case PKT_TX_SCTP_CKSUM:
425                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
426                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
427                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
428                         tx_offload_mask.l2_len |= ~0;
429                         tx_offload_mask.l3_len |= ~0;
430                         break;
431                 default:
432                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
433                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
434                         break;
435                 }
436         }
437
438         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
439                 tx_offload_mask.outer_l2_len |= ~0;
440                 tx_offload_mask.outer_l3_len |= ~0;
441                 tx_offload_mask.l2_len |= ~0;
442                 seqnum_seed |= tx_offload.outer_l3_len
443                                << IXGBE_ADVTXD_OUTER_IPLEN;
444                 seqnum_seed |= tx_offload.l2_len
445                                << IXGBE_ADVTXD_TUNNEL_LEN;
446         }
447
448         txq->ctx_cache[ctx_idx].flags = ol_flags;
449         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
450                 tx_offload_mask.data[0] & tx_offload.data[0];
451         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
452                 tx_offload_mask.data[1] & tx_offload.data[1];
453         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
454
455         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
456         vlan_macip_lens = tx_offload.l3_len;
457         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
458                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
459                                     IXGBE_ADVTXD_MACLEN_SHIFT);
460         else
461                 vlan_macip_lens |= (tx_offload.l2_len <<
462                                     IXGBE_ADVTXD_MACLEN_SHIFT);
463         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
464         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
465         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
466         ctx_txd->seqnum_seed     = seqnum_seed;
467 }
468
469 /*
470  * Check which hardware context can be used. Use the existing match
471  * or create a new context descriptor.
472  */
473 static inline uint32_t
474 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
475                    union ixgbe_tx_offload tx_offload)
476 {
477         /* If match with the current used context */
478         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
479                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
480                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
481                      & tx_offload.data[0])) &&
482                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
483                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
484                      & tx_offload.data[1]))))
485                 return txq->ctx_curr;
486
487         /* What if match with the next context  */
488         txq->ctx_curr ^= 1;
489         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
490                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
491                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
492                      & tx_offload.data[0])) &&
493                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
494                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
495                      & tx_offload.data[1]))))
496                 return txq->ctx_curr;
497
498         /* Mismatch, use the previous context */
499         return IXGBE_CTX_NUM;
500 }
501
502 static inline uint32_t
503 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
504 {
505         uint32_t tmp = 0;
506
507         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
508                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
509         if (ol_flags & PKT_TX_IP_CKSUM)
510                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
511         if (ol_flags & PKT_TX_TCP_SEG)
512                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
513         return tmp;
514 }
515
516 static inline uint32_t
517 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
518 {
519         uint32_t cmdtype = 0;
520
521         if (ol_flags & PKT_TX_VLAN_PKT)
522                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
523         if (ol_flags & PKT_TX_TCP_SEG)
524                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
525         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
526                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
527         if (ol_flags & PKT_TX_MACSEC)
528                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
529         return cmdtype;
530 }
531
532 /* Default RS bit threshold values */
533 #ifndef DEFAULT_TX_RS_THRESH
534 #define DEFAULT_TX_RS_THRESH   32
535 #endif
536 #ifndef DEFAULT_TX_FREE_THRESH
537 #define DEFAULT_TX_FREE_THRESH 32
538 #endif
539
540 /* Reset transmit descriptors after they have been used */
541 static inline int
542 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
543 {
544         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
545         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
546         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
547         uint16_t nb_tx_desc = txq->nb_tx_desc;
548         uint16_t desc_to_clean_to;
549         uint16_t nb_tx_to_clean;
550         uint32_t status;
551
552         /* Determine the last descriptor needing to be cleaned */
553         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
554         if (desc_to_clean_to >= nb_tx_desc)
555                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
556
557         /* Check to make sure the last descriptor to clean is done */
558         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
559         status = txr[desc_to_clean_to].wb.status;
560         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
561                 PMD_TX_FREE_LOG(DEBUG,
562                                 "TX descriptor %4u is not done"
563                                 "(port=%d queue=%d)",
564                                 desc_to_clean_to,
565                                 txq->port_id, txq->queue_id);
566                 /* Failed to clean any descriptors, better luck next time */
567                 return -(1);
568         }
569
570         /* Figure out how many descriptors will be cleaned */
571         if (last_desc_cleaned > desc_to_clean_to)
572                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
573                                                         desc_to_clean_to);
574         else
575                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
576                                                 last_desc_cleaned);
577
578         PMD_TX_FREE_LOG(DEBUG,
579                         "Cleaning %4u TX descriptors: %4u to %4u "
580                         "(port=%d queue=%d)",
581                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
582                         txq->port_id, txq->queue_id);
583
584         /*
585          * The last descriptor to clean is done, so that means all the
586          * descriptors from the last descriptor that was cleaned
587          * up to the last descriptor with the RS bit set
588          * are done. Only reset the threshold descriptor.
589          */
590         txr[desc_to_clean_to].wb.status = 0;
591
592         /* Update the txq to reflect the last descriptor that was cleaned */
593         txq->last_desc_cleaned = desc_to_clean_to;
594         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
595
596         /* No Error */
597         return 0;
598 }
599
600 uint16_t
601 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
602                 uint16_t nb_pkts)
603 {
604         struct ixgbe_tx_queue *txq;
605         struct ixgbe_tx_entry *sw_ring;
606         struct ixgbe_tx_entry *txe, *txn;
607         volatile union ixgbe_adv_tx_desc *txr;
608         volatile union ixgbe_adv_tx_desc *txd, *txp;
609         struct rte_mbuf     *tx_pkt;
610         struct rte_mbuf     *m_seg;
611         uint64_t buf_dma_addr;
612         uint32_t olinfo_status;
613         uint32_t cmd_type_len;
614         uint32_t pkt_len;
615         uint16_t slen;
616         uint64_t ol_flags;
617         uint16_t tx_id;
618         uint16_t tx_last;
619         uint16_t nb_tx;
620         uint16_t nb_used;
621         uint64_t tx_ol_req;
622         uint32_t ctx = 0;
623         uint32_t new_ctx;
624         union ixgbe_tx_offload tx_offload;
625
626         tx_offload.data[0] = 0;
627         tx_offload.data[1] = 0;
628         txq = tx_queue;
629         sw_ring = txq->sw_ring;
630         txr     = txq->tx_ring;
631         tx_id   = txq->tx_tail;
632         txe = &sw_ring[tx_id];
633         txp = NULL;
634
635         /* Determine if the descriptor ring needs to be cleaned. */
636         if (txq->nb_tx_free < txq->tx_free_thresh)
637                 ixgbe_xmit_cleanup(txq);
638
639         rte_prefetch0(&txe->mbuf->pool);
640
641         /* TX loop */
642         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
643                 new_ctx = 0;
644                 tx_pkt = *tx_pkts++;
645                 pkt_len = tx_pkt->pkt_len;
646
647                 /*
648                  * Determine how many (if any) context descriptors
649                  * are needed for offload functionality.
650                  */
651                 ol_flags = tx_pkt->ol_flags;
652
653                 /* If hardware offload required */
654                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
655                 if (tx_ol_req) {
656                         tx_offload.l2_len = tx_pkt->l2_len;
657                         tx_offload.l3_len = tx_pkt->l3_len;
658                         tx_offload.l4_len = tx_pkt->l4_len;
659                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
660                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
661                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
662                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
663
664                         /* If new context need be built or reuse the exist ctx. */
665                         ctx = what_advctx_update(txq, tx_ol_req,
666                                 tx_offload);
667                         /* Only allocate context descriptor if required*/
668                         new_ctx = (ctx == IXGBE_CTX_NUM);
669                         ctx = txq->ctx_curr;
670                 }
671
672                 /*
673                  * Keep track of how many descriptors are used this loop
674                  * This will always be the number of segments + the number of
675                  * Context descriptors required to transmit the packet
676                  */
677                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
678
679                 if (txp != NULL &&
680                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
681                         /* set RS on the previous packet in the burst */
682                         txp->read.cmd_type_len |=
683                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
684
685                 /*
686                  * The number of descriptors that must be allocated for a
687                  * packet is the number of segments of that packet, plus 1
688                  * Context Descriptor for the hardware offload, if any.
689                  * Determine the last TX descriptor to allocate in the TX ring
690                  * for the packet, starting from the current position (tx_id)
691                  * in the ring.
692                  */
693                 tx_last = (uint16_t) (tx_id + nb_used - 1);
694
695                 /* Circular ring */
696                 if (tx_last >= txq->nb_tx_desc)
697                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
698
699                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
700                            " tx_first=%u tx_last=%u",
701                            (unsigned) txq->port_id,
702                            (unsigned) txq->queue_id,
703                            (unsigned) pkt_len,
704                            (unsigned) tx_id,
705                            (unsigned) tx_last);
706
707                 /*
708                  * Make sure there are enough TX descriptors available to
709                  * transmit the entire packet.
710                  * nb_used better be less than or equal to txq->tx_rs_thresh
711                  */
712                 if (nb_used > txq->nb_tx_free) {
713                         PMD_TX_FREE_LOG(DEBUG,
714                                         "Not enough free TX descriptors "
715                                         "nb_used=%4u nb_free=%4u "
716                                         "(port=%d queue=%d)",
717                                         nb_used, txq->nb_tx_free,
718                                         txq->port_id, txq->queue_id);
719
720                         if (ixgbe_xmit_cleanup(txq) != 0) {
721                                 /* Could not clean any descriptors */
722                                 if (nb_tx == 0)
723                                         return 0;
724                                 goto end_of_tx;
725                         }
726
727                         /* nb_used better be <= txq->tx_rs_thresh */
728                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
729                                 PMD_TX_FREE_LOG(DEBUG,
730                                         "The number of descriptors needed to "
731                                         "transmit the packet exceeds the "
732                                         "RS bit threshold. This will impact "
733                                         "performance."
734                                         "nb_used=%4u nb_free=%4u "
735                                         "tx_rs_thresh=%4u. "
736                                         "(port=%d queue=%d)",
737                                         nb_used, txq->nb_tx_free,
738                                         txq->tx_rs_thresh,
739                                         txq->port_id, txq->queue_id);
740                                 /*
741                                  * Loop here until there are enough TX
742                                  * descriptors or until the ring cannot be
743                                  * cleaned.
744                                  */
745                                 while (nb_used > txq->nb_tx_free) {
746                                         if (ixgbe_xmit_cleanup(txq) != 0) {
747                                                 /*
748                                                  * Could not clean any
749                                                  * descriptors
750                                                  */
751                                                 if (nb_tx == 0)
752                                                         return 0;
753                                                 goto end_of_tx;
754                                         }
755                                 }
756                         }
757                 }
758
759                 /*
760                  * By now there are enough free TX descriptors to transmit
761                  * the packet.
762                  */
763
764                 /*
765                  * Set common flags of all TX Data Descriptors.
766                  *
767                  * The following bits must be set in all Data Descriptors:
768                  *   - IXGBE_ADVTXD_DTYP_DATA
769                  *   - IXGBE_ADVTXD_DCMD_DEXT
770                  *
771                  * The following bits must be set in the first Data Descriptor
772                  * and are ignored in the other ones:
773                  *   - IXGBE_ADVTXD_DCMD_IFCS
774                  *   - IXGBE_ADVTXD_MAC_1588
775                  *   - IXGBE_ADVTXD_DCMD_VLE
776                  *
777                  * The following bits must only be set in the last Data
778                  * Descriptor:
779                  *   - IXGBE_TXD_CMD_EOP
780                  *
781                  * The following bits can be set in any Data Descriptor, but
782                  * are only set in the last Data Descriptor:
783                  *   - IXGBE_TXD_CMD_RS
784                  */
785                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
786                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
787
788 #ifdef RTE_LIBRTE_IEEE1588
789                 if (ol_flags & PKT_TX_IEEE1588_TMST)
790                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
791 #endif
792
793                 olinfo_status = 0;
794                 if (tx_ol_req) {
795
796                         if (ol_flags & PKT_TX_TCP_SEG) {
797                                 /* when TSO is on, paylen in descriptor is the
798                                  * not the packet len but the tcp payload len */
799                                 pkt_len -= (tx_offload.l2_len +
800                                         tx_offload.l3_len + tx_offload.l4_len);
801                         }
802
803                         /*
804                          * Setup the TX Advanced Context Descriptor if required
805                          */
806                         if (new_ctx) {
807                                 volatile struct ixgbe_adv_tx_context_desc *
808                                     ctx_txd;
809
810                                 ctx_txd = (volatile struct
811                                     ixgbe_adv_tx_context_desc *)
812                                     &txr[tx_id];
813
814                                 txn = &sw_ring[txe->next_id];
815                                 rte_prefetch0(&txn->mbuf->pool);
816
817                                 if (txe->mbuf != NULL) {
818                                         rte_pktmbuf_free_seg(txe->mbuf);
819                                         txe->mbuf = NULL;
820                                 }
821
822                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
823                                         tx_offload);
824
825                                 txe->last_id = tx_last;
826                                 tx_id = txe->next_id;
827                                 txe = txn;
828                         }
829
830                         /*
831                          * Setup the TX Advanced Data Descriptor,
832                          * This path will go through
833                          * whatever new/reuse the context descriptor
834                          */
835                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
836                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
837                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
838                 }
839
840                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
841
842                 m_seg = tx_pkt;
843                 do {
844                         txd = &txr[tx_id];
845                         txn = &sw_ring[txe->next_id];
846                         rte_prefetch0(&txn->mbuf->pool);
847
848                         if (txe->mbuf != NULL)
849                                 rte_pktmbuf_free_seg(txe->mbuf);
850                         txe->mbuf = m_seg;
851
852                         /*
853                          * Set up Transmit Data Descriptor.
854                          */
855                         slen = m_seg->data_len;
856                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
857                         txd->read.buffer_addr =
858                                 rte_cpu_to_le_64(buf_dma_addr);
859                         txd->read.cmd_type_len =
860                                 rte_cpu_to_le_32(cmd_type_len | slen);
861                         txd->read.olinfo_status =
862                                 rte_cpu_to_le_32(olinfo_status);
863                         txe->last_id = tx_last;
864                         tx_id = txe->next_id;
865                         txe = txn;
866                         m_seg = m_seg->next;
867                 } while (m_seg != NULL);
868
869                 /*
870                  * The last packet data descriptor needs End Of Packet (EOP)
871                  */
872                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
873                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
874                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
875
876                 /* Set RS bit only on threshold packets' last descriptor */
877                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
878                         PMD_TX_FREE_LOG(DEBUG,
879                                         "Setting RS bit on TXD id="
880                                         "%4u (port=%d queue=%d)",
881                                         tx_last, txq->port_id, txq->queue_id);
882
883                         cmd_type_len |= IXGBE_TXD_CMD_RS;
884
885                         /* Update txq RS bit counters */
886                         txq->nb_tx_used = 0;
887                         txp = NULL;
888                 } else
889                         txp = txd;
890
891                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
892         }
893
894 end_of_tx:
895         /* set RS on last packet in the burst */
896         if (txp != NULL)
897                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
898
899         rte_wmb();
900
901         /*
902          * Set the Transmit Descriptor Tail (TDT)
903          */
904         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
905                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
906                    (unsigned) tx_id, (unsigned) nb_tx);
907         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
908         txq->tx_tail = tx_id;
909
910         return nb_tx;
911 }
912
913 /*********************************************************************
914  *
915  *  TX prep functions
916  *
917  **********************************************************************/
918 uint16_t
919 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
920 {
921         int i, ret;
922         uint64_t ol_flags;
923         struct rte_mbuf *m;
924         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
925
926         for (i = 0; i < nb_pkts; i++) {
927                 m = tx_pkts[i];
928                 ol_flags = m->ol_flags;
929
930                 /**
931                  * Check if packet meets requirements for number of segments
932                  *
933                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
934                  *       non-TSO
935                  */
936
937                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
938                         rte_errno = -EINVAL;
939                         return i;
940                 }
941
942                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
943                         rte_errno = -ENOTSUP;
944                         return i;
945                 }
946
947 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
948                 ret = rte_validate_tx_offload(m);
949                 if (ret != 0) {
950                         rte_errno = ret;
951                         return i;
952                 }
953 #endif
954                 ret = rte_net_intel_cksum_prepare(m);
955                 if (ret != 0) {
956                         rte_errno = ret;
957                         return i;
958                 }
959         }
960
961         return i;
962 }
963
964 /*********************************************************************
965  *
966  *  RX functions
967  *
968  **********************************************************************/
969
970 #define IXGBE_PACKET_TYPE_ETHER                         0X00
971 #define IXGBE_PACKET_TYPE_IPV4                          0X01
972 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
973 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
974 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
975 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
976 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
977 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
978 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
979 #define IXGBE_PACKET_TYPE_IPV6                          0X04
980 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
981 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
982 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
983 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
984 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
985 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
986 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
987 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
988 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
989 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
990 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
991 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
992 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
993 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
994 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
995 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
996 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
997 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
998 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
999 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1000 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1001 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1002 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1003
1004 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1005 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1006 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1007 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1008 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1009 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1010 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1011 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1012 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1013 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1014 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1015 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1016 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1017 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1018 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1019 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1020 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1021 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1022 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1023 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1024 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1025 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1026 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1027
1028 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1029 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1030 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1031 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1032 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1033 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1034 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1035 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1036 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1037 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1038 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1039 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1040 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1041 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1042 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1043 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1044 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1045 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1046 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1047 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1048 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1049 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1050 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1051
1052 #define IXGBE_PACKET_TYPE_MAX               0X80
1053 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
1054 #define IXGBE_PACKET_TYPE_SHIFT             0X04
1055
1056 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1057 static inline uint32_t
1058 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1059 {
1060         /**
1061          * Use 2 different table for normal packet and tunnel packet
1062          * to save the space.
1063          */
1064         static const uint32_t
1065                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1066                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1067                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1068                         RTE_PTYPE_L3_IPV4,
1069                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1070                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1071                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1072                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1073                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1074                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1075                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1076                         RTE_PTYPE_L3_IPV4_EXT,
1077                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1078                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1079                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1080                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1081                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1082                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1083                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1084                         RTE_PTYPE_L3_IPV6,
1085                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1086                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1087                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1088                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1089                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1090                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1091                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1092                         RTE_PTYPE_L3_IPV6_EXT,
1093                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1094                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1095                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1096                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1097                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1098                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1099                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1100                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1101                         RTE_PTYPE_INNER_L3_IPV6,
1102                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1103                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1104                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1105                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1106                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1107                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1108                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1109                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1110                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1111                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1112                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1113                         RTE_PTYPE_INNER_L3_IPV6,
1114                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1115                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1116                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1117                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1118                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1119                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1120                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1121                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1122                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1123                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1124                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1125                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1126                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1128                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1129                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1130                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1131                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1132                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1133                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1134                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1135                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1136                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1137                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1138                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1139                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1140                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1141                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1142                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1143                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1144                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1145                         RTE_PTYPE_L2_ETHER |
1146                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1147                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1148         };
1149
1150         static const uint32_t
1151                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1152                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1153                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1154                         RTE_PTYPE_INNER_L2_ETHER,
1155                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1156                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1157                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1158                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1159                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1160                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1161                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1162                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1163                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1164                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1165                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1166                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1167                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1168                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1169                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1170                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1171                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1172                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1173                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1174                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1175                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1176                         RTE_PTYPE_INNER_L4_TCP,
1177                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1178                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1179                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1180                         RTE_PTYPE_INNER_L4_TCP,
1181                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1182                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1183                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1184                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1185                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1186                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1187                         RTE_PTYPE_INNER_L4_TCP,
1188                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1189                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1190                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1191                         RTE_PTYPE_INNER_L3_IPV4,
1192                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1193                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1194                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1195                         RTE_PTYPE_INNER_L4_UDP,
1196                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1197                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1198                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1199                         RTE_PTYPE_INNER_L4_UDP,
1200                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1201                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1202                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1203                         RTE_PTYPE_INNER_L4_SCTP,
1204                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1205                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1207                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1208                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1209                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1210                         RTE_PTYPE_INNER_L4_UDP,
1211                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1212                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1213                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1214                         RTE_PTYPE_INNER_L4_SCTP,
1215                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1216                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1217                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1218                         RTE_PTYPE_INNER_L3_IPV4,
1219                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1220                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1221                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1222                         RTE_PTYPE_INNER_L4_SCTP,
1223                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1224                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1225                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1226                         RTE_PTYPE_INNER_L4_SCTP,
1227                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1228                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1229                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1230                         RTE_PTYPE_INNER_L4_TCP,
1231                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1232                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1233                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1234                         RTE_PTYPE_INNER_L4_UDP,
1235
1236                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1237                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1238                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1239                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1240                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1241                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1242                         RTE_PTYPE_INNER_L3_IPV4,
1243                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1244                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1245                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1246                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1247                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1248                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1249                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1250                         RTE_PTYPE_INNER_L3_IPV6,
1251                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1252                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1253                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1254                         RTE_PTYPE_INNER_L3_IPV4,
1255                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1256                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1257                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1258                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1259                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1260                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1261                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1262                         RTE_PTYPE_INNER_L3_IPV4,
1263                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1264                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1265                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1266                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1267                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1268                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1269                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1270                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1271                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1272                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1273                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1274                         RTE_PTYPE_INNER_L3_IPV4,
1275                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1276                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1277                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1278                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1279                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1280                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1281                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1282                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1283                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1284                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1285                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1286                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1287                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1288                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1289                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1290                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1291                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1292                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1293                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1294                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1295                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1296                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1297                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1298                         RTE_PTYPE_INNER_L3_IPV4,
1299                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1300                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1301                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1302                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1303                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1304                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1305                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1306                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1307                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1308                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1309                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1310                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1311                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1312                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1313                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1314                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1315                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1316                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1317                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1318                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1319                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1320                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1321                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1322                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1323                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1324                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1325                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1326                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1327         };
1328
1329         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1330                 return RTE_PTYPE_UNKNOWN;
1331
1332         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1333
1334         /* For tunnel packet */
1335         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1336                 /* Remove the tunnel bit to save the space. */
1337                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1338                 return ptype_table_tn[pkt_info];
1339         }
1340
1341         /**
1342          * For x550, if it's not tunnel,
1343          * tunnel type bit should be set to 0.
1344          * Reuse 82599's mask.
1345          */
1346         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1347
1348         return ptype_table[pkt_info];
1349 }
1350
1351 static inline uint64_t
1352 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1353 {
1354         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1355                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1356                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1357                 PKT_RX_RSS_HASH, 0, 0, 0,
1358                 0, 0, 0,  PKT_RX_FDIR,
1359         };
1360 #ifdef RTE_LIBRTE_IEEE1588
1361         static uint64_t ip_pkt_etqf_map[8] = {
1362                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1363                 0, 0, 0, 0,
1364         };
1365
1366         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1367                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1368                                 ip_rss_types_map[pkt_info & 0XF];
1369         else
1370                 return ip_rss_types_map[pkt_info & 0XF];
1371 #else
1372         return ip_rss_types_map[pkt_info & 0XF];
1373 #endif
1374 }
1375
1376 static inline uint64_t
1377 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1378 {
1379         uint64_t pkt_flags;
1380
1381         /*
1382          * Check if VLAN present only.
1383          * Do not check whether L3/L4 rx checksum done by NIC or not,
1384          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1385          */
1386         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1387
1388 #ifdef RTE_LIBRTE_IEEE1588
1389         if (rx_status & IXGBE_RXD_STAT_TMST)
1390                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1391 #endif
1392         return pkt_flags;
1393 }
1394
1395 static inline uint64_t
1396 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1397 {
1398         uint64_t pkt_flags;
1399
1400         /*
1401          * Bit 31: IPE, IPv4 checksum error
1402          * Bit 30: L4I, L4I integrity error
1403          */
1404         static uint64_t error_to_pkt_flags_map[4] = {
1405                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1406                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1407                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1408                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1409         };
1410         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1411                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1412
1413         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1414             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1415                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1416         }
1417
1418         return pkt_flags;
1419 }
1420
1421 /*
1422  * LOOK_AHEAD defines how many desc statuses to check beyond the
1423  * current descriptor.
1424  * It must be a pound define for optimal performance.
1425  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1426  * function only works with LOOK_AHEAD=8.
1427  */
1428 #define LOOK_AHEAD 8
1429 #if (LOOK_AHEAD != 8)
1430 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1431 #endif
1432 static inline int
1433 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1434 {
1435         volatile union ixgbe_adv_rx_desc *rxdp;
1436         struct ixgbe_rx_entry *rxep;
1437         struct rte_mbuf *mb;
1438         uint16_t pkt_len;
1439         uint64_t pkt_flags;
1440         int nb_dd;
1441         uint32_t s[LOOK_AHEAD];
1442         uint32_t pkt_info[LOOK_AHEAD];
1443         int i, j, nb_rx = 0;
1444         uint32_t status;
1445         uint64_t vlan_flags = rxq->vlan_flags;
1446
1447         /* get references to current descriptor and S/W ring entry */
1448         rxdp = &rxq->rx_ring[rxq->rx_tail];
1449         rxep = &rxq->sw_ring[rxq->rx_tail];
1450
1451         status = rxdp->wb.upper.status_error;
1452         /* check to make sure there is at least 1 packet to receive */
1453         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1454                 return 0;
1455
1456         /*
1457          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1458          * reference packets that are ready to be received.
1459          */
1460         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1461              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1462                 /* Read desc statuses backwards to avoid race condition */
1463                 for (j = LOOK_AHEAD-1; j >= 0; --j)
1464                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1465
1466                 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1467                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1468                                                        lo_dword.data);
1469
1470                 /* Compute how many status bits were set */
1471                 nb_dd = 0;
1472                 for (j = 0; j < LOOK_AHEAD; ++j)
1473                         nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1474
1475                 nb_rx += nb_dd;
1476
1477                 /* Translate descriptor info to mbuf format */
1478                 for (j = 0; j < nb_dd; ++j) {
1479                         mb = rxep[j].mbuf;
1480                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1481                                   rxq->crc_len;
1482                         mb->data_len = pkt_len;
1483                         mb->pkt_len = pkt_len;
1484                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1485
1486                         /* convert descriptor fields to rte mbuf flags */
1487                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1488                                 vlan_flags);
1489                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1490                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1491                                         ((uint16_t)pkt_info[j]);
1492                         mb->ol_flags = pkt_flags;
1493                         mb->packet_type =
1494                                 ixgbe_rxd_pkt_info_to_pkt_type
1495                                         (pkt_info[j], rxq->pkt_type_mask);
1496
1497                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1498                                 mb->hash.rss = rte_le_to_cpu_32(
1499                                     rxdp[j].wb.lower.hi_dword.rss);
1500                         else if (pkt_flags & PKT_RX_FDIR) {
1501                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1502                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1503                                     IXGBE_ATR_HASH_MASK;
1504                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1505                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1506                         }
1507                 }
1508
1509                 /* Move mbuf pointers from the S/W ring to the stage */
1510                 for (j = 0; j < LOOK_AHEAD; ++j) {
1511                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1512                 }
1513
1514                 /* stop if all requested packets could not be received */
1515                 if (nb_dd != LOOK_AHEAD)
1516                         break;
1517         }
1518
1519         /* clear software ring entries so we can cleanup correctly */
1520         for (i = 0; i < nb_rx; ++i) {
1521                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1522         }
1523
1524
1525         return nb_rx;
1526 }
1527
1528 static inline int
1529 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1530 {
1531         volatile union ixgbe_adv_rx_desc *rxdp;
1532         struct ixgbe_rx_entry *rxep;
1533         struct rte_mbuf *mb;
1534         uint16_t alloc_idx;
1535         __le64 dma_addr;
1536         int diag, i;
1537
1538         /* allocate buffers in bulk directly into the S/W ring */
1539         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1540         rxep = &rxq->sw_ring[alloc_idx];
1541         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1542                                     rxq->rx_free_thresh);
1543         if (unlikely(diag != 0))
1544                 return -ENOMEM;
1545
1546         rxdp = &rxq->rx_ring[alloc_idx];
1547         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1548                 /* populate the static rte mbuf fields */
1549                 mb = rxep[i].mbuf;
1550                 if (reset_mbuf) {
1551                         mb->next = NULL;
1552                         mb->nb_segs = 1;
1553                         mb->port = rxq->port_id;
1554                 }
1555
1556                 rte_mbuf_refcnt_set(mb, 1);
1557                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1558
1559                 /* populate the descriptors */
1560                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1561                 rxdp[i].read.hdr_addr = 0;
1562                 rxdp[i].read.pkt_addr = dma_addr;
1563         }
1564
1565         /* update state of internal queue structure */
1566         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1567         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1568                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1569
1570         /* no errors */
1571         return 0;
1572 }
1573
1574 static inline uint16_t
1575 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1576                          uint16_t nb_pkts)
1577 {
1578         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1579         int i;
1580
1581         /* how many packets are ready to return? */
1582         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1583
1584         /* copy mbuf pointers to the application's packet list */
1585         for (i = 0; i < nb_pkts; ++i)
1586                 rx_pkts[i] = stage[i];
1587
1588         /* update internal queue state */
1589         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1590         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1591
1592         return nb_pkts;
1593 }
1594
1595 static inline uint16_t
1596 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1597              uint16_t nb_pkts)
1598 {
1599         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1600         uint16_t nb_rx = 0;
1601
1602         /* Any previously recv'd pkts will be returned from the Rx stage */
1603         if (rxq->rx_nb_avail)
1604                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1605
1606         /* Scan the H/W ring for packets to receive */
1607         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1608
1609         /* update internal queue state */
1610         rxq->rx_next_avail = 0;
1611         rxq->rx_nb_avail = nb_rx;
1612         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1613
1614         /* if required, allocate new buffers to replenish descriptors */
1615         if (rxq->rx_tail > rxq->rx_free_trigger) {
1616                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1617
1618                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1619                         int i, j;
1620
1621                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1622                                    "queue_id=%u", (unsigned) rxq->port_id,
1623                                    (unsigned) rxq->queue_id);
1624
1625                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1626                                 rxq->rx_free_thresh;
1627
1628                         /*
1629                          * Need to rewind any previous receives if we cannot
1630                          * allocate new buffers to replenish the old ones.
1631                          */
1632                         rxq->rx_nb_avail = 0;
1633                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1634                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1635                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1636
1637                         return 0;
1638                 }
1639
1640                 /* update tail pointer */
1641                 rte_wmb();
1642                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1643                                             cur_free_trigger);
1644         }
1645
1646         if (rxq->rx_tail >= rxq->nb_rx_desc)
1647                 rxq->rx_tail = 0;
1648
1649         /* received any packets this loop? */
1650         if (rxq->rx_nb_avail)
1651                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1652
1653         return 0;
1654 }
1655
1656 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1657 uint16_t
1658 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1659                            uint16_t nb_pkts)
1660 {
1661         uint16_t nb_rx;
1662
1663         if (unlikely(nb_pkts == 0))
1664                 return 0;
1665
1666         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1667                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1668
1669         /* request is relatively large, chunk it up */
1670         nb_rx = 0;
1671         while (nb_pkts) {
1672                 uint16_t ret, n;
1673
1674                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1675                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1676                 nb_rx = (uint16_t)(nb_rx + ret);
1677                 nb_pkts = (uint16_t)(nb_pkts - ret);
1678                 if (ret < n)
1679                         break;
1680         }
1681
1682         return nb_rx;
1683 }
1684
1685 uint16_t
1686 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1687                 uint16_t nb_pkts)
1688 {
1689         struct ixgbe_rx_queue *rxq;
1690         volatile union ixgbe_adv_rx_desc *rx_ring;
1691         volatile union ixgbe_adv_rx_desc *rxdp;
1692         struct ixgbe_rx_entry *sw_ring;
1693         struct ixgbe_rx_entry *rxe;
1694         struct rte_mbuf *rxm;
1695         struct rte_mbuf *nmb;
1696         union ixgbe_adv_rx_desc rxd;
1697         uint64_t dma_addr;
1698         uint32_t staterr;
1699         uint32_t pkt_info;
1700         uint16_t pkt_len;
1701         uint16_t rx_id;
1702         uint16_t nb_rx;
1703         uint16_t nb_hold;
1704         uint64_t pkt_flags;
1705         uint64_t vlan_flags;
1706
1707         nb_rx = 0;
1708         nb_hold = 0;
1709         rxq = rx_queue;
1710         rx_id = rxq->rx_tail;
1711         rx_ring = rxq->rx_ring;
1712         sw_ring = rxq->sw_ring;
1713         vlan_flags = rxq->vlan_flags;
1714         while (nb_rx < nb_pkts) {
1715                 /*
1716                  * The order of operations here is important as the DD status
1717                  * bit must not be read after any other descriptor fields.
1718                  * rx_ring and rxdp are pointing to volatile data so the order
1719                  * of accesses cannot be reordered by the compiler. If they were
1720                  * not volatile, they could be reordered which could lead to
1721                  * using invalid descriptor fields when read from rxd.
1722                  */
1723                 rxdp = &rx_ring[rx_id];
1724                 staterr = rxdp->wb.upper.status_error;
1725                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1726                         break;
1727                 rxd = *rxdp;
1728
1729                 /*
1730                  * End of packet.
1731                  *
1732                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1733                  * is likely to be invalid and to be dropped by the various
1734                  * validation checks performed by the network stack.
1735                  *
1736                  * Allocate a new mbuf to replenish the RX ring descriptor.
1737                  * If the allocation fails:
1738                  *    - arrange for that RX descriptor to be the first one
1739                  *      being parsed the next time the receive function is
1740                  *      invoked [on the same queue].
1741                  *
1742                  *    - Stop parsing the RX ring and return immediately.
1743                  *
1744                  * This policy do not drop the packet received in the RX
1745                  * descriptor for which the allocation of a new mbuf failed.
1746                  * Thus, it allows that packet to be later retrieved if
1747                  * mbuf have been freed in the mean time.
1748                  * As a side effect, holding RX descriptors instead of
1749                  * systematically giving them back to the NIC may lead to
1750                  * RX ring exhaustion situations.
1751                  * However, the NIC can gracefully prevent such situations
1752                  * to happen by sending specific "back-pressure" flow control
1753                  * frames to its peer(s).
1754                  */
1755                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1756                            "ext_err_stat=0x%08x pkt_len=%u",
1757                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1758                            (unsigned) rx_id, (unsigned) staterr,
1759                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1760
1761                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1762                 if (nmb == NULL) {
1763                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1764                                    "queue_id=%u", (unsigned) rxq->port_id,
1765                                    (unsigned) rxq->queue_id);
1766                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1767                         break;
1768                 }
1769
1770                 nb_hold++;
1771                 rxe = &sw_ring[rx_id];
1772                 rx_id++;
1773                 if (rx_id == rxq->nb_rx_desc)
1774                         rx_id = 0;
1775
1776                 /* Prefetch next mbuf while processing current one. */
1777                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1778
1779                 /*
1780                  * When next RX descriptor is on a cache-line boundary,
1781                  * prefetch the next 4 RX descriptors and the next 8 pointers
1782                  * to mbufs.
1783                  */
1784                 if ((rx_id & 0x3) == 0) {
1785                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1786                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1787                 }
1788
1789                 rxm = rxe->mbuf;
1790                 rxe->mbuf = nmb;
1791                 dma_addr =
1792                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1793                 rxdp->read.hdr_addr = 0;
1794                 rxdp->read.pkt_addr = dma_addr;
1795
1796                 /*
1797                  * Initialize the returned mbuf.
1798                  * 1) setup generic mbuf fields:
1799                  *    - number of segments,
1800                  *    - next segment,
1801                  *    - packet length,
1802                  *    - RX port identifier.
1803                  * 2) integrate hardware offload data, if any:
1804                  *    - RSS flag & hash,
1805                  *    - IP checksum flag,
1806                  *    - VLAN TCI, if any,
1807                  *    - error flags.
1808                  */
1809                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1810                                       rxq->crc_len);
1811                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1812                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1813                 rxm->nb_segs = 1;
1814                 rxm->next = NULL;
1815                 rxm->pkt_len = pkt_len;
1816                 rxm->data_len = pkt_len;
1817                 rxm->port = rxq->port_id;
1818
1819                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1820                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1821                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1822
1823                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1824                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1825                 pkt_flags = pkt_flags |
1826                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1827                 rxm->ol_flags = pkt_flags;
1828                 rxm->packet_type =
1829                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1830                                                        rxq->pkt_type_mask);
1831
1832                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1833                         rxm->hash.rss = rte_le_to_cpu_32(
1834                                                 rxd.wb.lower.hi_dword.rss);
1835                 else if (pkt_flags & PKT_RX_FDIR) {
1836                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1837                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1838                                         IXGBE_ATR_HASH_MASK;
1839                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1840                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1841                 }
1842                 /*
1843                  * Store the mbuf address into the next entry of the array
1844                  * of returned packets.
1845                  */
1846                 rx_pkts[nb_rx++] = rxm;
1847         }
1848         rxq->rx_tail = rx_id;
1849
1850         /*
1851          * If the number of free RX descriptors is greater than the RX free
1852          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1853          * register.
1854          * Update the RDT with the value of the last processed RX descriptor
1855          * minus 1, to guarantee that the RDT register is never equal to the
1856          * RDH register, which creates a "full" ring situtation from the
1857          * hardware point of view...
1858          */
1859         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1860         if (nb_hold > rxq->rx_free_thresh) {
1861                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1862                            "nb_hold=%u nb_rx=%u",
1863                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1864                            (unsigned) rx_id, (unsigned) nb_hold,
1865                            (unsigned) nb_rx);
1866                 rx_id = (uint16_t) ((rx_id == 0) ?
1867                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1868                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1869                 nb_hold = 0;
1870         }
1871         rxq->nb_rx_hold = nb_hold;
1872         return nb_rx;
1873 }
1874
1875 /**
1876  * Detect an RSC descriptor.
1877  */
1878 static inline uint32_t
1879 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1880 {
1881         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1882                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1883 }
1884
1885 /**
1886  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1887  *
1888  * Fill the following info in the HEAD buffer of the Rx cluster:
1889  *    - RX port identifier
1890  *    - hardware offload data, if any:
1891  *      - RSS flag & hash
1892  *      - IP checksum flag
1893  *      - VLAN TCI, if any
1894  *      - error flags
1895  * @head HEAD of the packet cluster
1896  * @desc HW descriptor to get data from
1897  * @rxq Pointer to the Rx queue
1898  */
1899 static inline void
1900 ixgbe_fill_cluster_head_buf(
1901         struct rte_mbuf *head,
1902         union ixgbe_adv_rx_desc *desc,
1903         struct ixgbe_rx_queue *rxq,
1904         uint32_t staterr)
1905 {
1906         uint32_t pkt_info;
1907         uint64_t pkt_flags;
1908
1909         head->port = rxq->port_id;
1910
1911         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1912          * set in the pkt_flags field.
1913          */
1914         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1915         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1916         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1917         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1918         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1919         head->ol_flags = pkt_flags;
1920         head->packet_type =
1921                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1922
1923         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1924                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1925         else if (pkt_flags & PKT_RX_FDIR) {
1926                 head->hash.fdir.hash =
1927                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1928                                                           & IXGBE_ATR_HASH_MASK;
1929                 head->hash.fdir.id =
1930                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1931         }
1932 }
1933
1934 /**
1935  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1936  *
1937  * @rx_queue Rx queue handle
1938  * @rx_pkts table of received packets
1939  * @nb_pkts size of rx_pkts table
1940  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1941  *
1942  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1943  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1944  *
1945  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1946  * 1) When non-EOP RSC completion arrives:
1947  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1948  *       segment's data length.
1949  *    b) Set the "next" pointer of the current segment to point to the segment
1950  *       at the NEXTP index.
1951  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1952  *       in the sw_rsc_ring.
1953  * 2) When EOP arrives we just update the cluster's total length and offload
1954  *    flags and deliver the cluster up to the upper layers. In our case - put it
1955  *    in the rx_pkts table.
1956  *
1957  * Returns the number of received packets/clusters (according to the "bulk
1958  * receive" interface).
1959  */
1960 static inline uint16_t
1961 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1962                     bool bulk_alloc)
1963 {
1964         struct ixgbe_rx_queue *rxq = rx_queue;
1965         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1966         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1967         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1968         uint16_t rx_id = rxq->rx_tail;
1969         uint16_t nb_rx = 0;
1970         uint16_t nb_hold = rxq->nb_rx_hold;
1971         uint16_t prev_id = rxq->rx_tail;
1972
1973         while (nb_rx < nb_pkts) {
1974                 bool eop;
1975                 struct ixgbe_rx_entry *rxe;
1976                 struct ixgbe_scattered_rx_entry *sc_entry;
1977                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1978                 struct ixgbe_rx_entry *next_rxe = NULL;
1979                 struct rte_mbuf *first_seg;
1980                 struct rte_mbuf *rxm;
1981                 struct rte_mbuf *nmb;
1982                 union ixgbe_adv_rx_desc rxd;
1983                 uint16_t data_len;
1984                 uint16_t next_id;
1985                 volatile union ixgbe_adv_rx_desc *rxdp;
1986                 uint32_t staterr;
1987
1988 next_desc:
1989                 /*
1990                  * The code in this whole file uses the volatile pointer to
1991                  * ensure the read ordering of the status and the rest of the
1992                  * descriptor fields (on the compiler level only!!!). This is so
1993                  * UGLY - why not to just use the compiler barrier instead? DPDK
1994                  * even has the rte_compiler_barrier() for that.
1995                  *
1996                  * But most importantly this is just wrong because this doesn't
1997                  * ensure memory ordering in a general case at all. For
1998                  * instance, DPDK is supposed to work on Power CPUs where
1999                  * compiler barrier may just not be enough!
2000                  *
2001                  * I tried to write only this function properly to have a
2002                  * starting point (as a part of an LRO/RSC series) but the
2003                  * compiler cursed at me when I tried to cast away the
2004                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2005                  * keeping it the way it is for now.
2006                  *
2007                  * The code in this file is broken in so many other places and
2008                  * will just not work on a big endian CPU anyway therefore the
2009                  * lines below will have to be revisited together with the rest
2010                  * of the ixgbe PMD.
2011                  *
2012                  * TODO:
2013                  *    - Get rid of "volatile" crap and let the compiler do its
2014                  *      job.
2015                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2016                  *      memory ordering below.
2017                  */
2018                 rxdp = &rx_ring[rx_id];
2019                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2020
2021                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2022                         break;
2023
2024                 rxd = *rxdp;
2025
2026                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2027                                   "staterr=0x%x data_len=%u",
2028                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2029                            rte_le_to_cpu_16(rxd.wb.upper.length));
2030
2031                 if (!bulk_alloc) {
2032                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2033                         if (nmb == NULL) {
2034                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2035                                                   "port_id=%u queue_id=%u",
2036                                            rxq->port_id, rxq->queue_id);
2037
2038                                 rte_eth_devices[rxq->port_id].data->
2039                                                         rx_mbuf_alloc_failed++;
2040                                 break;
2041                         }
2042                 } else if (nb_hold > rxq->rx_free_thresh) {
2043                         uint16_t next_rdt = rxq->rx_free_trigger;
2044
2045                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2046                                 rte_wmb();
2047                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2048                                                             next_rdt);
2049                                 nb_hold -= rxq->rx_free_thresh;
2050                         } else {
2051                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2052                                                   "port_id=%u queue_id=%u",
2053                                            rxq->port_id, rxq->queue_id);
2054
2055                                 rte_eth_devices[rxq->port_id].data->
2056                                                         rx_mbuf_alloc_failed++;
2057                                 break;
2058                         }
2059                 }
2060
2061                 nb_hold++;
2062                 rxe = &sw_ring[rx_id];
2063                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2064
2065                 next_id = rx_id + 1;
2066                 if (next_id == rxq->nb_rx_desc)
2067                         next_id = 0;
2068
2069                 /* Prefetch next mbuf while processing current one. */
2070                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2071
2072                 /*
2073                  * When next RX descriptor is on a cache-line boundary,
2074                  * prefetch the next 4 RX descriptors and the next 4 pointers
2075                  * to mbufs.
2076                  */
2077                 if ((next_id & 0x3) == 0) {
2078                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2079                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2080                 }
2081
2082                 rxm = rxe->mbuf;
2083
2084                 if (!bulk_alloc) {
2085                         __le64 dma =
2086                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2087                         /*
2088                          * Update RX descriptor with the physical address of the
2089                          * new data buffer of the new allocated mbuf.
2090                          */
2091                         rxe->mbuf = nmb;
2092
2093                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2094                         rxdp->read.hdr_addr = 0;
2095                         rxdp->read.pkt_addr = dma;
2096                 } else
2097                         rxe->mbuf = NULL;
2098
2099                 /*
2100                  * Set data length & data buffer address of mbuf.
2101                  */
2102                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2103                 rxm->data_len = data_len;
2104
2105                 if (!eop) {
2106                         uint16_t nextp_id;
2107                         /*
2108                          * Get next descriptor index:
2109                          *  - For RSC it's in the NEXTP field.
2110                          *  - For a scattered packet - it's just a following
2111                          *    descriptor.
2112                          */
2113                         if (ixgbe_rsc_count(&rxd))
2114                                 nextp_id =
2115                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2116                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2117                         else
2118                                 nextp_id = next_id;
2119
2120                         next_sc_entry = &sw_sc_ring[nextp_id];
2121                         next_rxe = &sw_ring[nextp_id];
2122                         rte_ixgbe_prefetch(next_rxe);
2123                 }
2124
2125                 sc_entry = &sw_sc_ring[rx_id];
2126                 first_seg = sc_entry->fbuf;
2127                 sc_entry->fbuf = NULL;
2128
2129                 /*
2130                  * If this is the first buffer of the received packet,
2131                  * set the pointer to the first mbuf of the packet and
2132                  * initialize its context.
2133                  * Otherwise, update the total length and the number of segments
2134                  * of the current scattered packet, and update the pointer to
2135                  * the last mbuf of the current packet.
2136                  */
2137                 if (first_seg == NULL) {
2138                         first_seg = rxm;
2139                         first_seg->pkt_len = data_len;
2140                         first_seg->nb_segs = 1;
2141                 } else {
2142                         first_seg->pkt_len += data_len;
2143                         first_seg->nb_segs++;
2144                 }
2145
2146                 prev_id = rx_id;
2147                 rx_id = next_id;
2148
2149                 /*
2150                  * If this is not the last buffer of the received packet, update
2151                  * the pointer to the first mbuf at the NEXTP entry in the
2152                  * sw_sc_ring and continue to parse the RX ring.
2153                  */
2154                 if (!eop && next_rxe) {
2155                         rxm->next = next_rxe->mbuf;
2156                         next_sc_entry->fbuf = first_seg;
2157                         goto next_desc;
2158                 }
2159
2160                 /*
2161                  * This is the last buffer of the received packet - return
2162                  * the current cluster to the user.
2163                  */
2164                 rxm->next = NULL;
2165
2166                 /* Initialize the first mbuf of the returned packet */
2167                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2168
2169                 /*
2170                  * Deal with the case, when HW CRC srip is disabled.
2171                  * That can't happen when LRO is enabled, but still could
2172                  * happen for scattered RX mode.
2173                  */
2174                 first_seg->pkt_len -= rxq->crc_len;
2175                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2176                         struct rte_mbuf *lp;
2177
2178                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2179                                 ;
2180
2181                         first_seg->nb_segs--;
2182                         lp->data_len -= rxq->crc_len - rxm->data_len;
2183                         lp->next = NULL;
2184                         rte_pktmbuf_free_seg(rxm);
2185                 } else
2186                         rxm->data_len -= rxq->crc_len;
2187
2188                 /* Prefetch data of first segment, if configured to do so. */
2189                 rte_packet_prefetch((char *)first_seg->buf_addr +
2190                         first_seg->data_off);
2191
2192                 /*
2193                  * Store the mbuf address into the next entry of the array
2194                  * of returned packets.
2195                  */
2196                 rx_pkts[nb_rx++] = first_seg;
2197         }
2198
2199         /*
2200          * Record index of the next RX descriptor to probe.
2201          */
2202         rxq->rx_tail = rx_id;
2203
2204         /*
2205          * If the number of free RX descriptors is greater than the RX free
2206          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2207          * register.
2208          * Update the RDT with the value of the last processed RX descriptor
2209          * minus 1, to guarantee that the RDT register is never equal to the
2210          * RDH register, which creates a "full" ring situtation from the
2211          * hardware point of view...
2212          */
2213         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2214                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2215                            "nb_hold=%u nb_rx=%u",
2216                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2217
2218                 rte_wmb();
2219                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2220                 nb_hold = 0;
2221         }
2222
2223         rxq->nb_rx_hold = nb_hold;
2224         return nb_rx;
2225 }
2226
2227 uint16_t
2228 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2229                                  uint16_t nb_pkts)
2230 {
2231         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2232 }
2233
2234 uint16_t
2235 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2236                                uint16_t nb_pkts)
2237 {
2238         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2239 }
2240
2241 /*********************************************************************
2242  *
2243  *  Queue management functions
2244  *
2245  **********************************************************************/
2246
2247 static void __attribute__((cold))
2248 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2249 {
2250         unsigned i;
2251
2252         if (txq->sw_ring != NULL) {
2253                 for (i = 0; i < txq->nb_tx_desc; i++) {
2254                         if (txq->sw_ring[i].mbuf != NULL) {
2255                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2256                                 txq->sw_ring[i].mbuf = NULL;
2257                         }
2258                 }
2259         }
2260 }
2261
2262 static void __attribute__((cold))
2263 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2264 {
2265         if (txq != NULL &&
2266             txq->sw_ring != NULL)
2267                 rte_free(txq->sw_ring);
2268 }
2269
2270 static void __attribute__((cold))
2271 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2272 {
2273         if (txq != NULL && txq->ops != NULL) {
2274                 txq->ops->release_mbufs(txq);
2275                 txq->ops->free_swring(txq);
2276                 rte_free(txq);
2277         }
2278 }
2279
2280 void __attribute__((cold))
2281 ixgbe_dev_tx_queue_release(void *txq)
2282 {
2283         ixgbe_tx_queue_release(txq);
2284 }
2285
2286 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2287 static void __attribute__((cold))
2288 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2289 {
2290         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2291         struct ixgbe_tx_entry *txe = txq->sw_ring;
2292         uint16_t prev, i;
2293
2294         /* Zero out HW ring memory */
2295         for (i = 0; i < txq->nb_tx_desc; i++) {
2296                 txq->tx_ring[i] = zeroed_desc;
2297         }
2298
2299         /* Initialize SW ring entries */
2300         prev = (uint16_t) (txq->nb_tx_desc - 1);
2301         for (i = 0; i < txq->nb_tx_desc; i++) {
2302                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2303
2304                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2305                 txe[i].mbuf = NULL;
2306                 txe[i].last_id = i;
2307                 txe[prev].next_id = i;
2308                 prev = i;
2309         }
2310
2311         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2312         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2313
2314         txq->tx_tail = 0;
2315         txq->nb_tx_used = 0;
2316         /*
2317          * Always allow 1 descriptor to be un-allocated to avoid
2318          * a H/W race condition
2319          */
2320         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2321         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2322         txq->ctx_curr = 0;
2323         memset((void *)&txq->ctx_cache, 0,
2324                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2325 }
2326
2327 static const struct ixgbe_txq_ops def_txq_ops = {
2328         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2329         .free_swring = ixgbe_tx_free_swring,
2330         .reset = ixgbe_reset_tx_queue,
2331 };
2332
2333 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2334  * the queue parameters. Used in tx_queue_setup by primary process and then
2335  * in dev_init by secondary process when attaching to an existing ethdev.
2336  */
2337 void __attribute__((cold))
2338 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2339 {
2340         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2341         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2342                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2343                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2344                 dev->tx_pkt_prepare = NULL;
2345 #ifdef RTE_IXGBE_INC_VECTOR
2346                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2347                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2348                                         ixgbe_txq_vec_setup(txq) == 0)) {
2349                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2350                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2351                 } else
2352 #endif
2353                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2354         } else {
2355                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2356                 PMD_INIT_LOG(DEBUG,
2357                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2358                                 (unsigned long)txq->txq_flags,
2359                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2360                 PMD_INIT_LOG(DEBUG,
2361                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2362                                 (unsigned long)txq->tx_rs_thresh,
2363                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2364                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2365                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2366         }
2367 }
2368
2369 int __attribute__((cold))
2370 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2371                          uint16_t queue_idx,
2372                          uint16_t nb_desc,
2373                          unsigned int socket_id,
2374                          const struct rte_eth_txconf *tx_conf)
2375 {
2376         const struct rte_memzone *tz;
2377         struct ixgbe_tx_queue *txq;
2378         struct ixgbe_hw     *hw;
2379         uint16_t tx_rs_thresh, tx_free_thresh;
2380
2381         PMD_INIT_FUNC_TRACE();
2382         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2383
2384         /*
2385          * Validate number of transmit descriptors.
2386          * It must not exceed hardware maximum, and must be multiple
2387          * of IXGBE_ALIGN.
2388          */
2389         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2390                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2391                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2392                 return -EINVAL;
2393         }
2394
2395         /*
2396          * The following two parameters control the setting of the RS bit on
2397          * transmit descriptors.
2398          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2399          * descriptors have been used.
2400          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2401          * descriptors are used or if the number of descriptors required
2402          * to transmit a packet is greater than the number of free TX
2403          * descriptors.
2404          * The following constraints must be satisfied:
2405          *  tx_rs_thresh must be greater than 0.
2406          *  tx_rs_thresh must be less than the size of the ring minus 2.
2407          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2408          *  tx_rs_thresh must be a divisor of the ring size.
2409          *  tx_free_thresh must be greater than 0.
2410          *  tx_free_thresh must be less than the size of the ring minus 3.
2411          * One descriptor in the TX ring is used as a sentinel to avoid a
2412          * H/W race condition, hence the maximum threshold constraints.
2413          * When set to zero use default values.
2414          */
2415         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2416                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2417         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2418                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2419         if (tx_rs_thresh >= (nb_desc - 2)) {
2420                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2421                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2422                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2423                         (int)dev->data->port_id, (int)queue_idx);
2424                 return -(EINVAL);
2425         }
2426         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2427                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2428                         "(tx_rs_thresh=%u port=%d queue=%d)",
2429                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2430                         (int)dev->data->port_id, (int)queue_idx);
2431                 return -(EINVAL);
2432         }
2433         if (tx_free_thresh >= (nb_desc - 3)) {
2434                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2435                              "tx_free_thresh must be less than the number of "
2436                              "TX descriptors minus 3. (tx_free_thresh=%u "
2437                              "port=%d queue=%d)",
2438                              (unsigned int)tx_free_thresh,
2439                              (int)dev->data->port_id, (int)queue_idx);
2440                 return -(EINVAL);
2441         }
2442         if (tx_rs_thresh > tx_free_thresh) {
2443                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2444                              "tx_free_thresh. (tx_free_thresh=%u "
2445                              "tx_rs_thresh=%u port=%d queue=%d)",
2446                              (unsigned int)tx_free_thresh,
2447                              (unsigned int)tx_rs_thresh,
2448                              (int)dev->data->port_id,
2449                              (int)queue_idx);
2450                 return -(EINVAL);
2451         }
2452         if ((nb_desc % tx_rs_thresh) != 0) {
2453                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2454                              "number of TX descriptors. (tx_rs_thresh=%u "
2455                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2456                              (int)dev->data->port_id, (int)queue_idx);
2457                 return -(EINVAL);
2458         }
2459
2460         /*
2461          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2462          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2463          * by the NIC and all descriptors are written back after the NIC
2464          * accumulates WTHRESH descriptors.
2465          */
2466         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2467                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2468                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2469                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2470                              (int)dev->data->port_id, (int)queue_idx);
2471                 return -(EINVAL);
2472         }
2473
2474         /* Free memory prior to re-allocation if needed... */
2475         if (dev->data->tx_queues[queue_idx] != NULL) {
2476                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2477                 dev->data->tx_queues[queue_idx] = NULL;
2478         }
2479
2480         /* First allocate the tx queue data structure */
2481         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2482                                  RTE_CACHE_LINE_SIZE, socket_id);
2483         if (txq == NULL)
2484                 return -ENOMEM;
2485
2486         /*
2487          * Allocate TX ring hardware descriptors. A memzone large enough to
2488          * handle the maximum ring size is allocated in order to allow for
2489          * resizing in later calls to the queue setup function.
2490          */
2491         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2492                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2493                         IXGBE_ALIGN, socket_id);
2494         if (tz == NULL) {
2495                 ixgbe_tx_queue_release(txq);
2496                 return -ENOMEM;
2497         }
2498
2499         txq->nb_tx_desc = nb_desc;
2500         txq->tx_rs_thresh = tx_rs_thresh;
2501         txq->tx_free_thresh = tx_free_thresh;
2502         txq->pthresh = tx_conf->tx_thresh.pthresh;
2503         txq->hthresh = tx_conf->tx_thresh.hthresh;
2504         txq->wthresh = tx_conf->tx_thresh.wthresh;
2505         txq->queue_id = queue_idx;
2506         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2507                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2508         txq->port_id = dev->data->port_id;
2509         txq->txq_flags = tx_conf->txq_flags;
2510         txq->ops = &def_txq_ops;
2511         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2512
2513         /*
2514          * Modification to set VFTDT for virtual function if vf is detected
2515          */
2516         if (hw->mac.type == ixgbe_mac_82599_vf ||
2517             hw->mac.type == ixgbe_mac_X540_vf ||
2518             hw->mac.type == ixgbe_mac_X550_vf ||
2519             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2520             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2521                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2522         else
2523                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2524
2525         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2526         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2527
2528         /* Allocate software ring */
2529         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2530                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2531                                 RTE_CACHE_LINE_SIZE, socket_id);
2532         if (txq->sw_ring == NULL) {
2533                 ixgbe_tx_queue_release(txq);
2534                 return -ENOMEM;
2535         }
2536         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2537                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2538
2539         /* set up vector or scalar TX function as appropriate */
2540         ixgbe_set_tx_function(dev, txq);
2541
2542         txq->ops->reset(txq);
2543
2544         dev->data->tx_queues[queue_idx] = txq;
2545
2546
2547         return 0;
2548 }
2549
2550 /**
2551  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2552  *
2553  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2554  * in the sw_rsc_ring is not set to NULL but rather points to the next
2555  * mbuf of this RSC aggregation (that has not been completed yet and still
2556  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2557  * will just free first "nb_segs" segments of the cluster explicitly by calling
2558  * an rte_pktmbuf_free_seg().
2559  *
2560  * @m scattered cluster head
2561  */
2562 static void __attribute__((cold))
2563 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2564 {
2565         uint8_t i, nb_segs = m->nb_segs;
2566         struct rte_mbuf *next_seg;
2567
2568         for (i = 0; i < nb_segs; i++) {
2569                 next_seg = m->next;
2570                 rte_pktmbuf_free_seg(m);
2571                 m = next_seg;
2572         }
2573 }
2574
2575 static void __attribute__((cold))
2576 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2577 {
2578         unsigned i;
2579
2580 #ifdef RTE_IXGBE_INC_VECTOR
2581         /* SSE Vector driver has a different way of releasing mbufs. */
2582         if (rxq->rx_using_sse) {
2583                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2584                 return;
2585         }
2586 #endif
2587
2588         if (rxq->sw_ring != NULL) {
2589                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2590                         if (rxq->sw_ring[i].mbuf != NULL) {
2591                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2592                                 rxq->sw_ring[i].mbuf = NULL;
2593                         }
2594                 }
2595                 if (rxq->rx_nb_avail) {
2596                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2597                                 struct rte_mbuf *mb;
2598
2599                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2600                                 rte_pktmbuf_free_seg(mb);
2601                         }
2602                         rxq->rx_nb_avail = 0;
2603                 }
2604         }
2605
2606         if (rxq->sw_sc_ring)
2607                 for (i = 0; i < rxq->nb_rx_desc; i++)
2608                         if (rxq->sw_sc_ring[i].fbuf) {
2609                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2610                                 rxq->sw_sc_ring[i].fbuf = NULL;
2611                         }
2612 }
2613
2614 static void __attribute__((cold))
2615 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2616 {
2617         if (rxq != NULL) {
2618                 ixgbe_rx_queue_release_mbufs(rxq);
2619                 rte_free(rxq->sw_ring);
2620                 rte_free(rxq->sw_sc_ring);
2621                 rte_free(rxq);
2622         }
2623 }
2624
2625 void __attribute__((cold))
2626 ixgbe_dev_rx_queue_release(void *rxq)
2627 {
2628         ixgbe_rx_queue_release(rxq);
2629 }
2630
2631 /*
2632  * Check if Rx Burst Bulk Alloc function can be used.
2633  * Return
2634  *        0: the preconditions are satisfied and the bulk allocation function
2635  *           can be used.
2636  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2637  *           function must be used.
2638  */
2639 static inline int __attribute__((cold))
2640 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2641 {
2642         int ret = 0;
2643
2644         /*
2645          * Make sure the following pre-conditions are satisfied:
2646          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2647          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2648          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2649          * Scattered packets are not supported.  This should be checked
2650          * outside of this function.
2651          */
2652         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2653                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2654                              "rxq->rx_free_thresh=%d, "
2655                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2656                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2657                 ret = -EINVAL;
2658         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2659                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2660                              "rxq->rx_free_thresh=%d, "
2661                              "rxq->nb_rx_desc=%d",
2662                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2663                 ret = -EINVAL;
2664         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2665                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2666                              "rxq->nb_rx_desc=%d, "
2667                              "rxq->rx_free_thresh=%d",
2668                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2669                 ret = -EINVAL;
2670         }
2671
2672         return ret;
2673 }
2674
2675 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2676 static void __attribute__((cold))
2677 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2678 {
2679         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2680         unsigned i;
2681         uint16_t len = rxq->nb_rx_desc;
2682
2683         /*
2684          * By default, the Rx queue setup function allocates enough memory for
2685          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2686          * extra memory at the end of the descriptor ring to be zero'd out.
2687          */
2688         if (adapter->rx_bulk_alloc_allowed)
2689                 /* zero out extra memory */
2690                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2691
2692         /*
2693          * Zero out HW ring memory. Zero out extra memory at the end of
2694          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2695          * reads extra memory as zeros.
2696          */
2697         for (i = 0; i < len; i++) {
2698                 rxq->rx_ring[i] = zeroed_desc;
2699         }
2700
2701         /*
2702          * initialize extra software ring entries. Space for these extra
2703          * entries is always allocated
2704          */
2705         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2706         for (i = rxq->nb_rx_desc; i < len; ++i) {
2707                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2708         }
2709
2710         rxq->rx_nb_avail = 0;
2711         rxq->rx_next_avail = 0;
2712         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2713         rxq->rx_tail = 0;
2714         rxq->nb_rx_hold = 0;
2715         rxq->pkt_first_seg = NULL;
2716         rxq->pkt_last_seg = NULL;
2717
2718 #ifdef RTE_IXGBE_INC_VECTOR
2719         rxq->rxrearm_start = 0;
2720         rxq->rxrearm_nb = 0;
2721 #endif
2722 }
2723
2724 int __attribute__((cold))
2725 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2726                          uint16_t queue_idx,
2727                          uint16_t nb_desc,
2728                          unsigned int socket_id,
2729                          const struct rte_eth_rxconf *rx_conf,
2730                          struct rte_mempool *mp)
2731 {
2732         const struct rte_memzone *rz;
2733         struct ixgbe_rx_queue *rxq;
2734         struct ixgbe_hw     *hw;
2735         uint16_t len;
2736         struct ixgbe_adapter *adapter =
2737                 (struct ixgbe_adapter *)dev->data->dev_private;
2738
2739         PMD_INIT_FUNC_TRACE();
2740         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2741
2742         /*
2743          * Validate number of receive descriptors.
2744          * It must not exceed hardware maximum, and must be multiple
2745          * of IXGBE_ALIGN.
2746          */
2747         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2748                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2749                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2750                 return -EINVAL;
2751         }
2752
2753         /* Free memory prior to re-allocation if needed... */
2754         if (dev->data->rx_queues[queue_idx] != NULL) {
2755                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2756                 dev->data->rx_queues[queue_idx] = NULL;
2757         }
2758
2759         /* First allocate the rx queue data structure */
2760         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2761                                  RTE_CACHE_LINE_SIZE, socket_id);
2762         if (rxq == NULL)
2763                 return -ENOMEM;
2764         rxq->mb_pool = mp;
2765         rxq->nb_rx_desc = nb_desc;
2766         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2767         rxq->queue_id = queue_idx;
2768         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2769                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2770         rxq->port_id = dev->data->port_id;
2771         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2772                                                         0 : ETHER_CRC_LEN);
2773         rxq->drop_en = rx_conf->rx_drop_en;
2774         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2775
2776         /*
2777          * The packet type in RX descriptor is different for different NICs.
2778          * Some bits are used for x550 but reserved for other NICS.
2779          * So set different masks for different NICs.
2780          */
2781         if (hw->mac.type == ixgbe_mac_X550 ||
2782             hw->mac.type == ixgbe_mac_X550EM_x ||
2783             hw->mac.type == ixgbe_mac_X550EM_a ||
2784             hw->mac.type == ixgbe_mac_X550_vf ||
2785             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2786             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2787                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2788         else
2789                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2790
2791         /*
2792          * Allocate RX ring hardware descriptors. A memzone large enough to
2793          * handle the maximum ring size is allocated in order to allow for
2794          * resizing in later calls to the queue setup function.
2795          */
2796         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2797                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2798         if (rz == NULL) {
2799                 ixgbe_rx_queue_release(rxq);
2800                 return -ENOMEM;
2801         }
2802
2803         /*
2804          * Zero init all the descriptors in the ring.
2805          */
2806         memset(rz->addr, 0, RX_RING_SZ);
2807
2808         /*
2809          * Modified to setup VFRDT for Virtual Function
2810          */
2811         if (hw->mac.type == ixgbe_mac_82599_vf ||
2812             hw->mac.type == ixgbe_mac_X540_vf ||
2813             hw->mac.type == ixgbe_mac_X550_vf ||
2814             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2815             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2816                 rxq->rdt_reg_addr =
2817                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2818                 rxq->rdh_reg_addr =
2819                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2820         } else {
2821                 rxq->rdt_reg_addr =
2822                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2823                 rxq->rdh_reg_addr =
2824                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2825         }
2826
2827         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2828         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2829
2830         /*
2831          * Certain constraints must be met in order to use the bulk buffer
2832          * allocation Rx burst function. If any of Rx queues doesn't meet them
2833          * the feature should be disabled for the whole port.
2834          */
2835         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2836                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2837                                     "preconditions - canceling the feature for "
2838                                     "the whole port[%d]",
2839                              rxq->queue_id, rxq->port_id);
2840                 adapter->rx_bulk_alloc_allowed = false;
2841         }
2842
2843         /*
2844          * Allocate software ring. Allow for space at the end of the
2845          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2846          * function does not access an invalid memory region.
2847          */
2848         len = nb_desc;
2849         if (adapter->rx_bulk_alloc_allowed)
2850                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2851
2852         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2853                                           sizeof(struct ixgbe_rx_entry) * len,
2854                                           RTE_CACHE_LINE_SIZE, socket_id);
2855         if (!rxq->sw_ring) {
2856                 ixgbe_rx_queue_release(rxq);
2857                 return -ENOMEM;
2858         }
2859
2860         /*
2861          * Always allocate even if it's not going to be needed in order to
2862          * simplify the code.
2863          *
2864          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2865          * be requested in ixgbe_dev_rx_init(), which is called later from
2866          * dev_start() flow.
2867          */
2868         rxq->sw_sc_ring =
2869                 rte_zmalloc_socket("rxq->sw_sc_ring",
2870                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2871                                    RTE_CACHE_LINE_SIZE, socket_id);
2872         if (!rxq->sw_sc_ring) {
2873                 ixgbe_rx_queue_release(rxq);
2874                 return -ENOMEM;
2875         }
2876
2877         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2878                             "dma_addr=0x%"PRIx64,
2879                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2880                      rxq->rx_ring_phys_addr);
2881
2882         if (!rte_is_power_of_2(nb_desc)) {
2883                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2884                                     "preconditions - canceling the feature for "
2885                                     "the whole port[%d]",
2886                              rxq->queue_id, rxq->port_id);
2887                 adapter->rx_vec_allowed = false;
2888         } else
2889                 ixgbe_rxq_vec_setup(rxq);
2890
2891         dev->data->rx_queues[queue_idx] = rxq;
2892
2893         ixgbe_reset_rx_queue(adapter, rxq);
2894
2895         return 0;
2896 }
2897
2898 uint32_t
2899 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2900 {
2901 #define IXGBE_RXQ_SCAN_INTERVAL 4
2902         volatile union ixgbe_adv_rx_desc *rxdp;
2903         struct ixgbe_rx_queue *rxq;
2904         uint32_t desc = 0;
2905
2906         if (rx_queue_id >= dev->data->nb_rx_queues) {
2907                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2908                 return 0;
2909         }
2910
2911         rxq = dev->data->rx_queues[rx_queue_id];
2912         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2913
2914         while ((desc < rxq->nb_rx_desc) &&
2915                 (rxdp->wb.upper.status_error &
2916                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2917                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2918                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2919                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2920                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2921                                 desc - rxq->nb_rx_desc]);
2922         }
2923
2924         return desc;
2925 }
2926
2927 int
2928 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2929 {
2930         volatile union ixgbe_adv_rx_desc *rxdp;
2931         struct ixgbe_rx_queue *rxq = rx_queue;
2932         uint32_t desc;
2933
2934         if (unlikely(offset >= rxq->nb_rx_desc))
2935                 return 0;
2936         desc = rxq->rx_tail + offset;
2937         if (desc >= rxq->nb_rx_desc)
2938                 desc -= rxq->nb_rx_desc;
2939
2940         rxdp = &rxq->rx_ring[desc];
2941         return !!(rxdp->wb.upper.status_error &
2942                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2943 }
2944
2945 void __attribute__((cold))
2946 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2947 {
2948         unsigned i;
2949         struct ixgbe_adapter *adapter =
2950                 (struct ixgbe_adapter *)dev->data->dev_private;
2951
2952         PMD_INIT_FUNC_TRACE();
2953
2954         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2955                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2956
2957                 if (txq != NULL) {
2958                         txq->ops->release_mbufs(txq);
2959                         txq->ops->reset(txq);
2960                 }
2961         }
2962
2963         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2964                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2965
2966                 if (rxq != NULL) {
2967                         ixgbe_rx_queue_release_mbufs(rxq);
2968                         ixgbe_reset_rx_queue(adapter, rxq);
2969                 }
2970         }
2971 }
2972
2973 void
2974 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2975 {
2976         unsigned i;
2977
2978         PMD_INIT_FUNC_TRACE();
2979
2980         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2981                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2982                 dev->data->rx_queues[i] = NULL;
2983         }
2984         dev->data->nb_rx_queues = 0;
2985
2986         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2987                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2988                 dev->data->tx_queues[i] = NULL;
2989         }
2990         dev->data->nb_tx_queues = 0;
2991 }
2992
2993 /*********************************************************************
2994  *
2995  *  Device RX/TX init functions
2996  *
2997  **********************************************************************/
2998
2999 /**
3000  * Receive Side Scaling (RSS)
3001  * See section 7.1.2.8 in the following document:
3002  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3003  *
3004  * Principles:
3005  * The source and destination IP addresses of the IP header and the source
3006  * and destination ports of TCP/UDP headers, if any, of received packets are
3007  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3008  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3009  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3010  * RSS output index which is used as the RX queue index where to store the
3011  * received packets.
3012  * The following output is supplied in the RX write-back descriptor:
3013  *     - 32-bit result of the Microsoft RSS hash function,
3014  *     - 4-bit RSS type field.
3015  */
3016
3017 /*
3018  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3019  * Used as the default key.
3020  */
3021 static uint8_t rss_intel_key[40] = {
3022         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3023         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3024         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3025         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3026         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3027 };
3028
3029 static void
3030 ixgbe_rss_disable(struct rte_eth_dev *dev)
3031 {
3032         struct ixgbe_hw *hw;
3033         uint32_t mrqc;
3034         uint32_t mrqc_reg;
3035
3036         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3037         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3038         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3039         mrqc &= ~IXGBE_MRQC_RSSEN;
3040         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3041 }
3042
3043 static void
3044 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3045 {
3046         uint8_t  *hash_key;
3047         uint32_t mrqc;
3048         uint32_t rss_key;
3049         uint64_t rss_hf;
3050         uint16_t i;
3051         uint32_t mrqc_reg;
3052         uint32_t rssrk_reg;
3053
3054         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3055         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3056
3057         hash_key = rss_conf->rss_key;
3058         if (hash_key != NULL) {
3059                 /* Fill in RSS hash key */
3060                 for (i = 0; i < 10; i++) {
3061                         rss_key  = hash_key[(i * 4)];
3062                         rss_key |= hash_key[(i * 4) + 1] << 8;
3063                         rss_key |= hash_key[(i * 4) + 2] << 16;
3064                         rss_key |= hash_key[(i * 4) + 3] << 24;
3065                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3066                 }
3067         }
3068
3069         /* Set configured hashing protocols in MRQC register */
3070         rss_hf = rss_conf->rss_hf;
3071         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3072         if (rss_hf & ETH_RSS_IPV4)
3073                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3074         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3075                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3076         if (rss_hf & ETH_RSS_IPV6)
3077                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3078         if (rss_hf & ETH_RSS_IPV6_EX)
3079                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3080         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3081                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3082         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3083                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3084         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3085                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3086         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3087                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3088         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3089                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3090         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3091 }
3092
3093 int
3094 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3095                           struct rte_eth_rss_conf *rss_conf)
3096 {
3097         struct ixgbe_hw *hw;
3098         uint32_t mrqc;
3099         uint64_t rss_hf;
3100         uint32_t mrqc_reg;
3101
3102         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3103
3104         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3105                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3106                         "NIC.");
3107                 return -ENOTSUP;
3108         }
3109         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3110
3111         /*
3112          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3113          *     "RSS enabling cannot be done dynamically while it must be
3114          *      preceded by a software reset"
3115          * Before changing anything, first check that the update RSS operation
3116          * does not attempt to disable RSS, if RSS was enabled at
3117          * initialization time, or does not attempt to enable RSS, if RSS was
3118          * disabled at initialization time.
3119          */
3120         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3121         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3122         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3123                 if (rss_hf != 0) /* Enable RSS */
3124                         return -(EINVAL);
3125                 return 0; /* Nothing to do */
3126         }
3127         /* RSS enabled */
3128         if (rss_hf == 0) /* Disable RSS */
3129                 return -(EINVAL);
3130         ixgbe_hw_rss_hash_set(hw, rss_conf);
3131         return 0;
3132 }
3133
3134 int
3135 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3136                             struct rte_eth_rss_conf *rss_conf)
3137 {
3138         struct ixgbe_hw *hw;
3139         uint8_t *hash_key;
3140         uint32_t mrqc;
3141         uint32_t rss_key;
3142         uint64_t rss_hf;
3143         uint16_t i;
3144         uint32_t mrqc_reg;
3145         uint32_t rssrk_reg;
3146
3147         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3148         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3149         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3150         hash_key = rss_conf->rss_key;
3151         if (hash_key != NULL) {
3152                 /* Return RSS hash key */
3153                 for (i = 0; i < 10; i++) {
3154                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3155                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3156                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3157                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3158                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3159                 }
3160         }
3161
3162         /* Get RSS functions configured in MRQC register */
3163         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3164         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3165                 rss_conf->rss_hf = 0;
3166                 return 0;
3167         }
3168         rss_hf = 0;
3169         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3170                 rss_hf |= ETH_RSS_IPV4;
3171         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3172                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3173         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3174                 rss_hf |= ETH_RSS_IPV6;
3175         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3176                 rss_hf |= ETH_RSS_IPV6_EX;
3177         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3178                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3179         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3180                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3181         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3182                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3183         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3184                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3185         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3186                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3187         rss_conf->rss_hf = rss_hf;
3188         return 0;
3189 }
3190
3191 static void
3192 ixgbe_rss_configure(struct rte_eth_dev *dev)
3193 {
3194         struct rte_eth_rss_conf rss_conf;
3195         struct ixgbe_hw *hw;
3196         uint32_t reta;
3197         uint16_t i;
3198         uint16_t j;
3199         uint16_t sp_reta_size;
3200         uint32_t reta_reg;
3201
3202         PMD_INIT_FUNC_TRACE();
3203         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3204
3205         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3206
3207         /*
3208          * Fill in redirection table
3209          * The byte-swap is needed because NIC registers are in
3210          * little-endian order.
3211          */
3212         reta = 0;
3213         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3214                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3215
3216                 if (j == dev->data->nb_rx_queues)
3217                         j = 0;
3218                 reta = (reta << 8) | j;
3219                 if ((i & 3) == 3)
3220                         IXGBE_WRITE_REG(hw, reta_reg,
3221                                         rte_bswap32(reta));
3222         }
3223
3224         /*
3225          * Configure the RSS key and the RSS protocols used to compute
3226          * the RSS hash of input packets.
3227          */
3228         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3229         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3230                 ixgbe_rss_disable(dev);
3231                 return;
3232         }
3233         if (rss_conf.rss_key == NULL)
3234                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3235         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3236 }
3237
3238 #define NUM_VFTA_REGISTERS 128
3239 #define NIC_RX_BUFFER_SIZE 0x200
3240 #define X550_RX_BUFFER_SIZE 0x180
3241
3242 static void
3243 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3244 {
3245         struct rte_eth_vmdq_dcb_conf *cfg;
3246         struct ixgbe_hw *hw;
3247         enum rte_eth_nb_pools num_pools;
3248         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3249         uint16_t pbsize;
3250         uint8_t nb_tcs; /* number of traffic classes */
3251         int i;
3252
3253         PMD_INIT_FUNC_TRACE();
3254         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3255         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3256         num_pools = cfg->nb_queue_pools;
3257         /* Check we have a valid number of pools */
3258         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3259                 ixgbe_rss_disable(dev);
3260                 return;
3261         }
3262         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3263         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3264
3265         /*
3266          * RXPBSIZE
3267          * split rx buffer up into sections, each for 1 traffic class
3268          */
3269         switch (hw->mac.type) {
3270         case ixgbe_mac_X550:
3271         case ixgbe_mac_X550EM_x:
3272         case ixgbe_mac_X550EM_a:
3273                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3274                 break;
3275         default:
3276                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3277                 break;
3278         }
3279         for (i = 0; i < nb_tcs; i++) {
3280                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3281
3282                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3283                 /* clear 10 bits. */
3284                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3285                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3286         }
3287         /* zero alloc all unused TCs */
3288         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3289                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3290
3291                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3292                 /* clear 10 bits. */
3293                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3294         }
3295
3296         /* MRQC: enable vmdq and dcb */
3297         mrqc = (num_pools == ETH_16_POOLS) ?
3298                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3299         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3300
3301         /* PFVTCTL: turn on virtualisation and set the default pool */
3302         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3303         if (cfg->enable_default_pool) {
3304                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3305         } else {
3306                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3307         }
3308
3309         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3310
3311         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3312         queue_mapping = 0;
3313         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3314                 /*
3315                  * mapping is done with 3 bits per priority,
3316                  * so shift by i*3 each time
3317                  */
3318                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3319
3320         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3321
3322         /* RTRPCS: DCB related */
3323         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3324
3325         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3326         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3327         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3328         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3329
3330         /* VFTA - enable all vlan filters */
3331         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3332                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3333         }
3334
3335         /* VFRE: pool enabling for receive - 16 or 32 */
3336         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3337                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3338
3339         /*
3340          * MPSAR - allow pools to read specific mac addresses
3341          * In this case, all pools should be able to read from mac addr 0
3342          */
3343         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3344         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3345
3346         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3347         for (i = 0; i < cfg->nb_pool_maps; i++) {
3348                 /* set vlan id in VF register and set the valid bit */
3349                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3350                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3351                 /*
3352                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3353                  * pools, we only need to use the first half of the register
3354                  * i.e. bits 0-31
3355                  */
3356                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3357         }
3358 }
3359
3360 /**
3361  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3362  * @dev: pointer to eth_dev structure
3363  * @dcb_config: pointer to ixgbe_dcb_config structure
3364  */
3365 static void
3366 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3367                        struct ixgbe_dcb_config *dcb_config)
3368 {
3369         uint32_t reg;
3370         uint32_t q;
3371         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3372
3373         PMD_INIT_FUNC_TRACE();
3374         if (hw->mac.type != ixgbe_mac_82598EB) {
3375                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3376                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3377                 reg |= IXGBE_RTTDCS_ARBDIS;
3378                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3379
3380                 /* Enable DCB for Tx with 8 TCs */
3381                 if (dcb_config->num_tcs.pg_tcs == 8) {
3382                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3383                 } else {
3384                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3385                 }
3386                 if (dcb_config->vt_mode)
3387                         reg |= IXGBE_MTQC_VT_ENA;
3388                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3389
3390                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3391                         /* Disable drop for all queues in VMDQ mode*/
3392                         for (q = 0; q < 128; q++)
3393                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3394                                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3395                 } else {
3396                         /* Enable drop for all queues in SRIOV mode */
3397                         for (q = 0; q < 128; q++)
3398                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3399                                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT) | IXGBE_QDE_ENABLE));
3400                 }
3401
3402                 /* Enable the Tx desc arbiter */
3403                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3404                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3405                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3406
3407                 /* Enable Security TX Buffer IFG for DCB */
3408                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3409                 reg |= IXGBE_SECTX_DCB;
3410                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3411         }
3412 }
3413
3414 /**
3415  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3416  * @dev: pointer to rte_eth_dev structure
3417  * @dcb_config: pointer to ixgbe_dcb_config structure
3418  */
3419 static void
3420 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3421                         struct ixgbe_dcb_config *dcb_config)
3422 {
3423         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3424                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3425         struct ixgbe_hw *hw =
3426                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3427
3428         PMD_INIT_FUNC_TRACE();
3429         if (hw->mac.type != ixgbe_mac_82598EB)
3430                 /*PF VF Transmit Enable*/
3431                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3432                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3433
3434         /*Configure general DCB TX parameters*/
3435         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3436 }
3437
3438 static void
3439 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3440                         struct ixgbe_dcb_config *dcb_config)
3441 {
3442         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3443                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3444         struct ixgbe_dcb_tc_config *tc;
3445         uint8_t i, j;
3446
3447         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3448         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3449                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3450                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3451         } else {
3452                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3453                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3454         }
3455         /* User Priority to Traffic Class mapping */
3456         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3457                 j = vmdq_rx_conf->dcb_tc[i];
3458                 tc = &dcb_config->tc_config[j];
3459                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3460                                                 (uint8_t)(1 << j);
3461         }
3462 }
3463
3464 static void
3465 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3466                         struct ixgbe_dcb_config *dcb_config)
3467 {
3468         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3469                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3470         struct ixgbe_dcb_tc_config *tc;
3471         uint8_t i, j;
3472
3473         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3474         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3475                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3476                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3477         } else {
3478                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3479                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3480         }
3481
3482         /* User Priority to Traffic Class mapping */
3483         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3484                 j = vmdq_tx_conf->dcb_tc[i];
3485                 tc = &dcb_config->tc_config[j];
3486                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3487                                                 (uint8_t)(1 << j);
3488         }
3489 }
3490
3491 static void
3492 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3493                 struct ixgbe_dcb_config *dcb_config)
3494 {
3495         struct rte_eth_dcb_rx_conf *rx_conf =
3496                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3497         struct ixgbe_dcb_tc_config *tc;
3498         uint8_t i, j;
3499
3500         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3501         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3502
3503         /* User Priority to Traffic Class mapping */
3504         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3505                 j = rx_conf->dcb_tc[i];
3506                 tc = &dcb_config->tc_config[j];
3507                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3508                                                 (uint8_t)(1 << j);
3509         }
3510 }
3511
3512 static void
3513 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3514                 struct ixgbe_dcb_config *dcb_config)
3515 {
3516         struct rte_eth_dcb_tx_conf *tx_conf =
3517                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3518         struct ixgbe_dcb_tc_config *tc;
3519         uint8_t i, j;
3520
3521         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3522         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3523
3524         /* User Priority to Traffic Class mapping */
3525         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3526                 j = tx_conf->dcb_tc[i];
3527                 tc = &dcb_config->tc_config[j];
3528                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3529                                                 (uint8_t)(1 << j);
3530         }
3531 }
3532
3533 /**
3534  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3535  * @hw: pointer to hardware structure
3536  * @dcb_config: pointer to ixgbe_dcb_config structure
3537  */
3538 static void
3539 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3540                struct ixgbe_dcb_config *dcb_config)
3541 {
3542         uint32_t reg;
3543         uint32_t vlanctrl;
3544         uint8_t i;
3545
3546         PMD_INIT_FUNC_TRACE();
3547         /*
3548          * Disable the arbiter before changing parameters
3549          * (always enable recycle mode; WSP)
3550          */
3551         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3552         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3553
3554         if (hw->mac.type != ixgbe_mac_82598EB) {
3555                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3556                 if (dcb_config->num_tcs.pg_tcs == 4) {
3557                         if (dcb_config->vt_mode)
3558                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3559                                         IXGBE_MRQC_VMDQRT4TCEN;
3560                         else {
3561                                 /* no matter the mode is DCB or DCB_RSS, just
3562                                  * set the MRQE to RSSXTCEN. RSS is controlled
3563                                  * by RSS_FIELD
3564                                  */
3565                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3566                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3567                                         IXGBE_MRQC_RTRSS4TCEN;
3568                         }
3569                 }
3570                 if (dcb_config->num_tcs.pg_tcs == 8) {
3571                         if (dcb_config->vt_mode)
3572                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3573                                         IXGBE_MRQC_VMDQRT8TCEN;
3574                         else {
3575                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3576                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3577                                         IXGBE_MRQC_RTRSS8TCEN;
3578                         }
3579                 }
3580
3581                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3582         }
3583
3584         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3585         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3586         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3587         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3588
3589         /* VFTA - enable all vlan filters */
3590         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3591                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3592         }
3593
3594         /*
3595          * Configure Rx packet plane (recycle mode; WSP) and
3596          * enable arbiter
3597          */
3598         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3599         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3600 }
3601
3602 static void
3603 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3604                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3605 {
3606         switch (hw->mac.type) {
3607         case ixgbe_mac_82598EB:
3608                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3609                 break;
3610         case ixgbe_mac_82599EB:
3611         case ixgbe_mac_X540:
3612         case ixgbe_mac_X550:
3613         case ixgbe_mac_X550EM_x:
3614         case ixgbe_mac_X550EM_a:
3615                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3616                                                   tsa, map);
3617                 break;
3618         default:
3619                 break;
3620         }
3621 }
3622
3623 static void
3624 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3625                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3626 {
3627         switch (hw->mac.type) {
3628         case ixgbe_mac_82598EB:
3629                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3630                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3631                 break;
3632         case ixgbe_mac_82599EB:
3633         case ixgbe_mac_X540:
3634         case ixgbe_mac_X550:
3635         case ixgbe_mac_X550EM_x:
3636         case ixgbe_mac_X550EM_a:
3637                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3638                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3639                 break;
3640         default:
3641                 break;
3642         }
3643 }
3644
3645 #define DCB_RX_CONFIG  1
3646 #define DCB_TX_CONFIG  1
3647 #define DCB_TX_PB      1024
3648 /**
3649  * ixgbe_dcb_hw_configure - Enable DCB and configure
3650  * general DCB in VT mode and non-VT mode parameters
3651  * @dev: pointer to rte_eth_dev structure
3652  * @dcb_config: pointer to ixgbe_dcb_config structure
3653  */
3654 static int
3655 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3656                         struct ixgbe_dcb_config *dcb_config)
3657 {
3658         int     ret = 0;
3659         uint8_t i, pfc_en, nb_tcs;
3660         uint16_t pbsize, rx_buffer_size;
3661         uint8_t config_dcb_rx = 0;
3662         uint8_t config_dcb_tx = 0;
3663         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3664         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3665         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3666         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3667         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3668         struct ixgbe_dcb_tc_config *tc;
3669         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3670         struct ixgbe_hw *hw =
3671                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3672
3673         switch (dev->data->dev_conf.rxmode.mq_mode) {
3674         case ETH_MQ_RX_VMDQ_DCB:
3675                 dcb_config->vt_mode = true;
3676                 if (hw->mac.type != ixgbe_mac_82598EB) {
3677                         config_dcb_rx = DCB_RX_CONFIG;
3678                         /*
3679                          *get dcb and VT rx configuration parameters
3680                          *from rte_eth_conf
3681                          */
3682                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3683                         /*Configure general VMDQ and DCB RX parameters*/
3684                         ixgbe_vmdq_dcb_configure(dev);
3685                 }
3686                 break;
3687         case ETH_MQ_RX_DCB:
3688         case ETH_MQ_RX_DCB_RSS:
3689                 dcb_config->vt_mode = false;
3690                 config_dcb_rx = DCB_RX_CONFIG;
3691                 /* Get dcb TX configuration parameters from rte_eth_conf */
3692                 ixgbe_dcb_rx_config(dev, dcb_config);
3693                 /*Configure general DCB RX parameters*/
3694                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3695                 break;
3696         default:
3697                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3698                 break;
3699         }
3700         switch (dev->data->dev_conf.txmode.mq_mode) {
3701         case ETH_MQ_TX_VMDQ_DCB:
3702                 dcb_config->vt_mode = true;
3703                 config_dcb_tx = DCB_TX_CONFIG;
3704                 /* get DCB and VT TX configuration parameters
3705                  * from rte_eth_conf
3706                  */
3707                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3708                 /*Configure general VMDQ and DCB TX parameters*/
3709                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3710                 break;
3711
3712         case ETH_MQ_TX_DCB:
3713                 dcb_config->vt_mode = false;
3714                 config_dcb_tx = DCB_TX_CONFIG;
3715                 /*get DCB TX configuration parameters from rte_eth_conf*/
3716                 ixgbe_dcb_tx_config(dev, dcb_config);
3717                 /*Configure general DCB TX parameters*/
3718                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3719                 break;
3720         default:
3721                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3722                 break;
3723         }
3724
3725         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3726         /* Unpack map */
3727         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3728         if (nb_tcs == ETH_4_TCS) {
3729                 /* Avoid un-configured priority mapping to TC0 */
3730                 uint8_t j = 4;
3731                 uint8_t mask = 0xFF;
3732
3733                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3734                         mask = (uint8_t)(mask & (~(1 << map[i])));
3735                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3736                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3737                                 map[j++] = i;
3738                         mask >>= 1;
3739                 }
3740                 /* Re-configure 4 TCs BW */
3741                 for (i = 0; i < nb_tcs; i++) {
3742                         tc = &dcb_config->tc_config[i];
3743                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3744                                                 (uint8_t)(100 / nb_tcs);
3745                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3746                                                 (uint8_t)(100 / nb_tcs);
3747                 }
3748                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3749                         tc = &dcb_config->tc_config[i];
3750                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3751                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3752                 }
3753         }
3754
3755         switch (hw->mac.type) {
3756         case ixgbe_mac_X550:
3757         case ixgbe_mac_X550EM_x:
3758         case ixgbe_mac_X550EM_a:
3759                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3760                 break;
3761         default:
3762                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3763                 break;
3764         }
3765
3766         if (config_dcb_rx) {
3767                 /* Set RX buffer size */
3768                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3769                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3770
3771                 for (i = 0; i < nb_tcs; i++) {
3772                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3773                 }
3774                 /* zero alloc all unused TCs */
3775                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3776                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3777                 }
3778         }
3779         if (config_dcb_tx) {
3780                 /* Only support an equally distributed
3781                  *  Tx packet buffer strategy.
3782                  */
3783                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3784                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3785
3786                 for (i = 0; i < nb_tcs; i++) {
3787                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3788                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3789                 }
3790                 /* Clear unused TCs, if any, to zero buffer size*/
3791                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3792                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3793                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3794                 }
3795         }
3796
3797         /*Calculates traffic class credits*/
3798         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3799                                 IXGBE_DCB_TX_CONFIG);
3800         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3801                                 IXGBE_DCB_RX_CONFIG);
3802
3803         if (config_dcb_rx) {
3804                 /* Unpack CEE standard containers */
3805                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3806                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3807                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3808                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3809                 /* Configure PG(ETS) RX */
3810                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3811         }
3812
3813         if (config_dcb_tx) {
3814                 /* Unpack CEE standard containers */
3815                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3816                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3817                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3818                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3819                 /* Configure PG(ETS) TX */
3820                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3821         }
3822
3823         /*Configure queue statistics registers*/
3824         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3825
3826         /* Check if the PFC is supported */
3827         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3828                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3829                 for (i = 0; i < nb_tcs; i++) {
3830                         /*
3831                         * If the TC count is 8,and the default high_water is 48,
3832                         * the low_water is 16 as default.
3833                         */
3834                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3835                         hw->fc.low_water[i] = pbsize / 4;
3836                         /* Enable pfc for this TC */
3837                         tc = &dcb_config->tc_config[i];
3838                         tc->pfc = ixgbe_dcb_pfc_enabled;
3839                 }
3840                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3841                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3842                         pfc_en &= 0x0F;
3843                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3844         }
3845
3846         return ret;
3847 }
3848
3849 /**
3850  * ixgbe_configure_dcb - Configure DCB  Hardware
3851  * @dev: pointer to rte_eth_dev
3852  */
3853 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3854 {
3855         struct ixgbe_dcb_config *dcb_cfg =
3856                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3857         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3858
3859         PMD_INIT_FUNC_TRACE();
3860
3861         /* check support mq_mode for DCB */
3862         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3863             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3864             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3865                 return;
3866
3867         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3868                 return;
3869
3870         /** Configure DCB hardware **/
3871         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3872 }
3873
3874 /*
3875  * VMDq only support for 10 GbE NIC.
3876  */
3877 static void
3878 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3879 {
3880         struct rte_eth_vmdq_rx_conf *cfg;
3881         struct ixgbe_hw *hw;
3882         enum rte_eth_nb_pools num_pools;
3883         uint32_t mrqc, vt_ctl, vlanctrl;
3884         uint32_t vmolr = 0;
3885         int i;
3886
3887         PMD_INIT_FUNC_TRACE();
3888         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3889         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3890         num_pools = cfg->nb_queue_pools;
3891
3892         ixgbe_rss_disable(dev);
3893
3894         /* MRQC: enable vmdq */
3895         mrqc = IXGBE_MRQC_VMDQEN;
3896         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3897
3898         /* PFVTCTL: turn on virtualisation and set the default pool */
3899         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3900         if (cfg->enable_default_pool)
3901                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3902         else
3903                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3904
3905         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3906
3907         for (i = 0; i < (int)num_pools; i++) {
3908                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3909                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3910         }
3911
3912         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3913         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3914         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3915         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3916
3917         /* VFTA - enable all vlan filters */
3918         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3919                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3920
3921         /* VFRE: pool enabling for receive - 64 */
3922         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3923         if (num_pools == ETH_64_POOLS)
3924                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3925
3926         /*
3927          * MPSAR - allow pools to read specific mac addresses
3928          * In this case, all pools should be able to read from mac addr 0
3929          */
3930         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3931         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3932
3933         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3934         for (i = 0; i < cfg->nb_pool_maps; i++) {
3935                 /* set vlan id in VF register and set the valid bit */
3936                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3937                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3938                 /*
3939                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3940                  * pools, we only need to use the first half of the register
3941                  * i.e. bits 0-31
3942                  */
3943                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3944                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
3945                                         (cfg->pool_map[i].pools & UINT32_MAX));
3946                 else
3947                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
3948                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3949
3950         }
3951
3952         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3953         if (cfg->enable_loop_back) {
3954                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3955                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3956                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3957         }
3958
3959         IXGBE_WRITE_FLUSH(hw);
3960 }
3961
3962 /*
3963  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3964  * @hw: pointer to hardware structure
3965  */
3966 static void
3967 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3968 {
3969         uint32_t reg;
3970         uint32_t q;
3971
3972         PMD_INIT_FUNC_TRACE();
3973         /*PF VF Transmit Enable*/
3974         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3975         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3976
3977         /* Disable the Tx desc arbiter so that MTQC can be changed */
3978         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3979         reg |= IXGBE_RTTDCS_ARBDIS;
3980         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3981
3982         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3983         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3984
3985         /* Disable drop for all queues */
3986         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3987                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3988                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3989
3990         /* Enable the Tx desc arbiter */
3991         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3992         reg &= ~IXGBE_RTTDCS_ARBDIS;
3993         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3994
3995         IXGBE_WRITE_FLUSH(hw);
3996 }
3997
3998 static int __attribute__((cold))
3999 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4000 {
4001         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4002         uint64_t dma_addr;
4003         unsigned int i;
4004
4005         /* Initialize software ring entries */
4006         for (i = 0; i < rxq->nb_rx_desc; i++) {
4007                 volatile union ixgbe_adv_rx_desc *rxd;
4008                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4009
4010                 if (mbuf == NULL) {
4011                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4012                                      (unsigned) rxq->queue_id);
4013                         return -ENOMEM;
4014                 }
4015
4016                 rte_mbuf_refcnt_set(mbuf, 1);
4017                 mbuf->next = NULL;
4018                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4019                 mbuf->nb_segs = 1;
4020                 mbuf->port = rxq->port_id;
4021
4022                 dma_addr =
4023                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
4024                 rxd = &rxq->rx_ring[i];
4025                 rxd->read.hdr_addr = 0;
4026                 rxd->read.pkt_addr = dma_addr;
4027                 rxe[i].mbuf = mbuf;
4028         }
4029
4030         return 0;
4031 }
4032
4033 static int
4034 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4035 {
4036         struct ixgbe_hw *hw;
4037         uint32_t mrqc;
4038
4039         ixgbe_rss_configure(dev);
4040
4041         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4042
4043         /* MRQC: enable VF RSS */
4044         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4045         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4046         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4047         case ETH_64_POOLS:
4048                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4049                 break;
4050
4051         case ETH_32_POOLS:
4052                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4053                 break;
4054
4055         default:
4056                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4057                 return -EINVAL;
4058         }
4059
4060         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4061
4062         return 0;
4063 }
4064
4065 static int
4066 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4067 {
4068         struct ixgbe_hw *hw =
4069                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4070
4071         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4072         case ETH_64_POOLS:
4073                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4074                         IXGBE_MRQC_VMDQEN);
4075                 break;
4076
4077         case ETH_32_POOLS:
4078                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4079                         IXGBE_MRQC_VMDQRT4TCEN);
4080                 break;
4081
4082         case ETH_16_POOLS:
4083                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4084                         IXGBE_MRQC_VMDQRT8TCEN);
4085                 break;
4086         default:
4087                 PMD_INIT_LOG(ERR,
4088                         "invalid pool number in IOV mode");
4089                 break;
4090         }
4091         return 0;
4092 }
4093
4094 static int
4095 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4096 {
4097         struct ixgbe_hw *hw =
4098                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4099
4100         if (hw->mac.type == ixgbe_mac_82598EB)
4101                 return 0;
4102
4103         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4104                 /*
4105                  * SRIOV inactive scheme
4106                  * any DCB/RSS w/o VMDq multi-queue setting
4107                  */
4108                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4109                 case ETH_MQ_RX_RSS:
4110                 case ETH_MQ_RX_DCB_RSS:
4111                 case ETH_MQ_RX_VMDQ_RSS:
4112                         ixgbe_rss_configure(dev);
4113                         break;
4114
4115                 case ETH_MQ_RX_VMDQ_DCB:
4116                         ixgbe_vmdq_dcb_configure(dev);
4117                         break;
4118
4119                 case ETH_MQ_RX_VMDQ_ONLY:
4120                         ixgbe_vmdq_rx_hw_configure(dev);
4121                         break;
4122
4123                 case ETH_MQ_RX_NONE:
4124                 default:
4125                         /* if mq_mode is none, disable rss mode.*/
4126                         ixgbe_rss_disable(dev);
4127                         break;
4128                 }
4129         } else {
4130                 /*
4131                  * SRIOV active scheme
4132                  * Support RSS together with VMDq & SRIOV
4133                  */
4134                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4135                 case ETH_MQ_RX_RSS:
4136                 case ETH_MQ_RX_VMDQ_RSS:
4137                         ixgbe_config_vf_rss(dev);
4138                         break;
4139                 case ETH_MQ_RX_VMDQ_DCB:
4140                         ixgbe_vmdq_dcb_configure(dev);
4141                         break;
4142                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
4143                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4144                         PMD_INIT_LOG(ERR,
4145                                 "Could not support DCB/RSS with VMDq & SRIOV");
4146                         return -1;
4147                 default:
4148                         ixgbe_config_vf_default(dev);
4149                         break;
4150                 }
4151         }
4152
4153         return 0;
4154 }
4155
4156 static int
4157 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4158 {
4159         struct ixgbe_hw *hw =
4160                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4161         uint32_t mtqc;
4162         uint32_t rttdcs;
4163
4164         if (hw->mac.type == ixgbe_mac_82598EB)
4165                 return 0;
4166
4167         /* disable arbiter before setting MTQC */
4168         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4169         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4170         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4171
4172         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4173                 /*
4174                  * SRIOV inactive scheme
4175                  * any DCB w/o VMDq multi-queue setting
4176                  */
4177                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4178                         ixgbe_vmdq_tx_hw_configure(hw);
4179                 else {
4180                         mtqc = IXGBE_MTQC_64Q_1PB;
4181                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4182                 }
4183         } else {
4184                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4185
4186                 /*
4187                  * SRIOV active scheme
4188                  * FIXME if support DCB together with VMDq & SRIOV
4189                  */
4190                 case ETH_64_POOLS:
4191                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4192                         break;
4193                 case ETH_32_POOLS:
4194                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4195                         break;
4196                 case ETH_16_POOLS:
4197                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4198                                 IXGBE_MTQC_8TC_8TQ;
4199                         break;
4200                 default:
4201                         mtqc = IXGBE_MTQC_64Q_1PB;
4202                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4203                 }
4204                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4205         }
4206
4207         /* re-enable arbiter */
4208         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4209         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4210
4211         return 0;
4212 }
4213
4214 /**
4215  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4216  *
4217  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4218  * spec rev. 3.0 chapter 8.2.3.8.13.
4219  *
4220  * @pool Memory pool of the Rx queue
4221  */
4222 static inline uint32_t
4223 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4224 {
4225         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4226
4227         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4228         uint16_t maxdesc =
4229                 IPV4_MAX_PKT_LEN /
4230                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4231
4232         if (maxdesc >= 16)
4233                 return IXGBE_RSCCTL_MAXDESC_16;
4234         else if (maxdesc >= 8)
4235                 return IXGBE_RSCCTL_MAXDESC_8;
4236         else if (maxdesc >= 4)
4237                 return IXGBE_RSCCTL_MAXDESC_4;
4238         else
4239                 return IXGBE_RSCCTL_MAXDESC_1;
4240 }
4241
4242 /**
4243  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4244  * interrupt
4245  *
4246  * (Taken from FreeBSD tree)
4247  * (yes this is all very magic and confusing :)
4248  *
4249  * @dev port handle
4250  * @entry the register array entry
4251  * @vector the MSIX vector for this queue
4252  * @type RX/TX/MISC
4253  */
4254 static void
4255 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4256 {
4257         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4258         u32 ivar, index;
4259
4260         vector |= IXGBE_IVAR_ALLOC_VAL;
4261
4262         switch (hw->mac.type) {
4263
4264         case ixgbe_mac_82598EB:
4265                 if (type == -1)
4266                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4267                 else
4268                         entry += (type * 64);
4269                 index = (entry >> 2) & 0x1F;
4270                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4271                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4272                 ivar |= (vector << (8 * (entry & 0x3)));
4273                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4274                 break;
4275
4276         case ixgbe_mac_82599EB:
4277         case ixgbe_mac_X540:
4278                 if (type == -1) { /* MISC IVAR */
4279                         index = (entry & 1) * 8;
4280                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4281                         ivar &= ~(0xFF << index);
4282                         ivar |= (vector << index);
4283                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4284                 } else {        /* RX/TX IVARS */
4285                         index = (16 * (entry & 1)) + (8 * type);
4286                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4287                         ivar &= ~(0xFF << index);
4288                         ivar |= (vector << index);
4289                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4290                 }
4291
4292                 break;
4293
4294         default:
4295                 break;
4296         }
4297 }
4298
4299 void __attribute__((cold))
4300 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4301 {
4302         uint16_t i, rx_using_sse;
4303         struct ixgbe_adapter *adapter =
4304                 (struct ixgbe_adapter *)dev->data->dev_private;
4305
4306         /*
4307          * In order to allow Vector Rx there are a few configuration
4308          * conditions to be met and Rx Bulk Allocation should be allowed.
4309          */
4310         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4311             !adapter->rx_bulk_alloc_allowed) {
4312                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4313                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4314                                     "not enabled",
4315                              dev->data->port_id);
4316
4317                 adapter->rx_vec_allowed = false;
4318         }
4319
4320         /*
4321          * Initialize the appropriate LRO callback.
4322          *
4323          * If all queues satisfy the bulk allocation preconditions
4324          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4325          * Otherwise use a single allocation version.
4326          */
4327         if (dev->data->lro) {
4328                 if (adapter->rx_bulk_alloc_allowed) {
4329                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4330                                            "allocation version");
4331                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4332                 } else {
4333                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4334                                            "allocation version");
4335                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4336                 }
4337         } else if (dev->data->scattered_rx) {
4338                 /*
4339                  * Set the non-LRO scattered callback: there are Vector and
4340                  * single allocation versions.
4341                  */
4342                 if (adapter->rx_vec_allowed) {
4343                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4344                                             "callback (port=%d).",
4345                                      dev->data->port_id);
4346
4347                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4348                 } else if (adapter->rx_bulk_alloc_allowed) {
4349                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4350                                            "allocation callback (port=%d).",
4351                                      dev->data->port_id);
4352                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4353                 } else {
4354                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4355                                             "single allocation) "
4356                                             "Scattered Rx callback "
4357                                             "(port=%d).",
4358                                      dev->data->port_id);
4359
4360                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4361                 }
4362         /*
4363          * Below we set "simple" callbacks according to port/queues parameters.
4364          * If parameters allow we are going to choose between the following
4365          * callbacks:
4366          *    - Vector
4367          *    - Bulk Allocation
4368          *    - Single buffer allocation (the simplest one)
4369          */
4370         } else if (adapter->rx_vec_allowed) {
4371                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4372                                     "burst size no less than %d (port=%d).",
4373                              RTE_IXGBE_DESCS_PER_LOOP,
4374                              dev->data->port_id);
4375
4376                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4377         } else if (adapter->rx_bulk_alloc_allowed) {
4378                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4379                                     "satisfied. Rx Burst Bulk Alloc function "
4380                                     "will be used on port=%d.",
4381                              dev->data->port_id);
4382
4383                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4384         } else {
4385                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4386                                     "satisfied, or Scattered Rx is requested "
4387                                     "(port=%d).",
4388                              dev->data->port_id);
4389
4390                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4391         }
4392
4393         /* Propagate information about RX function choice through all queues. */
4394
4395         rx_using_sse =
4396                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4397                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4398
4399         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4400                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4401
4402                 rxq->rx_using_sse = rx_using_sse;
4403         }
4404 }
4405
4406 /**
4407  * ixgbe_set_rsc - configure RSC related port HW registers
4408  *
4409  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4410  * of 82599 Spec (x540 configuration is virtually the same).
4411  *
4412  * @dev port handle
4413  *
4414  * Returns 0 in case of success or a non-zero error code
4415  */
4416 static int
4417 ixgbe_set_rsc(struct rte_eth_dev *dev)
4418 {
4419         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4420         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4421         struct rte_eth_dev_info dev_info = { 0 };
4422         bool rsc_capable = false;
4423         uint16_t i;
4424         uint32_t rdrxctl;
4425
4426         /* Sanity check */
4427         dev->dev_ops->dev_infos_get(dev, &dev_info);
4428         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4429                 rsc_capable = true;
4430
4431         if (!rsc_capable && rx_conf->enable_lro) {
4432                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4433                                    "support it");
4434                 return -EINVAL;
4435         }
4436
4437         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4438
4439         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4440                 /*
4441                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4442                  * 3.0 RSC configuration requires HW CRC stripping being
4443                  * enabled. If user requested both HW CRC stripping off
4444                  * and RSC on - return an error.
4445                  */
4446                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4447                                     "is disabled");
4448                 return -EINVAL;
4449         }
4450
4451         /* RFCTL configuration  */
4452         if (rsc_capable) {
4453                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4454
4455                 if (rx_conf->enable_lro)
4456                         /*
4457                          * Since NFS packets coalescing is not supported - clear
4458                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4459                          * enabled.
4460                          */
4461                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4462                                    IXGBE_RFCTL_NFSR_DIS);
4463                 else
4464                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4465
4466                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4467         }
4468
4469         /* If LRO hasn't been requested - we are done here. */
4470         if (!rx_conf->enable_lro)
4471                 return 0;
4472
4473         /* Set RDRXCTL.RSCACKC bit */
4474         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4475         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4476         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4477
4478         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4479         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4480                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4481                 uint32_t srrctl =
4482                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4483                 uint32_t rscctl =
4484                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4485                 uint32_t psrtype =
4486                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4487                 uint32_t eitr =
4488                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4489
4490                 /*
4491                  * ixgbe PMD doesn't support header-split at the moment.
4492                  *
4493                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4494                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4495                  * should be configured even if header split is not
4496                  * enabled. We will configure it 128 bytes following the
4497                  * recommendation in the spec.
4498                  */
4499                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4500                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4501                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4502
4503                 /*
4504                  * TODO: Consider setting the Receive Descriptor Minimum
4505                  * Threshold Size for an RSC case. This is not an obviously
4506                  * beneficiary option but the one worth considering...
4507                  */
4508
4509                 rscctl |= IXGBE_RSCCTL_RSCEN;
4510                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4511                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4512
4513                 /*
4514                  * RSC: Set ITR interval corresponding to 2K ints/s.
4515                  *
4516                  * Full-sized RSC aggregations for a 10Gb/s link will
4517                  * arrive at about 20K aggregation/s rate.
4518                  *
4519                  * 2K inst/s rate will make only 10% of the
4520                  * aggregations to be closed due to the interrupt timer
4521                  * expiration for a streaming at wire-speed case.
4522                  *
4523                  * For a sparse streaming case this setting will yield
4524                  * at most 500us latency for a single RSC aggregation.
4525                  */
4526                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4527                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4528
4529                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4530                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4531                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4532                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4533
4534                 /*
4535                  * RSC requires the mapping of the queue to the
4536                  * interrupt vector.
4537                  */
4538                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4539         }
4540
4541         dev->data->lro = 1;
4542
4543         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4544
4545         return 0;
4546 }
4547
4548 /*
4549  * Initializes Receive Unit.
4550  */
4551 int __attribute__((cold))
4552 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4553 {
4554         struct ixgbe_hw     *hw;
4555         struct ixgbe_rx_queue *rxq;
4556         uint64_t bus_addr;
4557         uint32_t rxctrl;
4558         uint32_t fctrl;
4559         uint32_t hlreg0;
4560         uint32_t maxfrs;
4561         uint32_t srrctl;
4562         uint32_t rdrxctl;
4563         uint32_t rxcsum;
4564         uint16_t buf_size;
4565         uint16_t i;
4566         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4567         int rc;
4568
4569         PMD_INIT_FUNC_TRACE();
4570         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4571
4572         /*
4573          * Make sure receives are disabled while setting
4574          * up the RX context (registers, descriptor rings, etc.).
4575          */
4576         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4577         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4578
4579         /* Enable receipt of broadcasted frames */
4580         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4581         fctrl |= IXGBE_FCTRL_BAM;
4582         fctrl |= IXGBE_FCTRL_DPF;
4583         fctrl |= IXGBE_FCTRL_PMCF;
4584         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4585
4586         /*
4587          * Configure CRC stripping, if any.
4588          */
4589         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4590         if (rx_conf->hw_strip_crc)
4591                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4592         else
4593                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4594
4595         /*
4596          * Configure jumbo frame support, if any.
4597          */
4598         if (rx_conf->jumbo_frame == 1) {
4599                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4600                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4601                 maxfrs &= 0x0000FFFF;
4602                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4603                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4604         } else
4605                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4606
4607         /*
4608          * If loopback mode is configured for 82599, set LPBK bit.
4609          */
4610         if (hw->mac.type == ixgbe_mac_82599EB &&
4611                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4612                 hlreg0 |= IXGBE_HLREG0_LPBK;
4613         else
4614                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4615
4616         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4617
4618         /* Setup RX queues */
4619         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4620                 rxq = dev->data->rx_queues[i];
4621
4622                 /*
4623                  * Reset crc_len in case it was changed after queue setup by a
4624                  * call to configure.
4625                  */
4626                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4627
4628                 /* Setup the Base and Length of the Rx Descriptor Rings */
4629                 bus_addr = rxq->rx_ring_phys_addr;
4630                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4631                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4632                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4633                                 (uint32_t)(bus_addr >> 32));
4634                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4635                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4636                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4637                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4638
4639                 /* Configure the SRRCTL register */
4640 #ifdef RTE_HEADER_SPLIT_ENABLE
4641                 /*
4642                  * Configure Header Split
4643                  */
4644                 if (rx_conf->header_split) {
4645                         if (hw->mac.type == ixgbe_mac_82599EB) {
4646                                 /* Must setup the PSRTYPE register */
4647                                 uint32_t psrtype;
4648
4649                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4650                                         IXGBE_PSRTYPE_UDPHDR   |
4651                                         IXGBE_PSRTYPE_IPV4HDR  |
4652                                         IXGBE_PSRTYPE_IPV6HDR;
4653                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4654                         }
4655                         srrctl = ((rx_conf->split_hdr_size <<
4656                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4657                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4658                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4659                 } else
4660 #endif
4661                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4662
4663                 /* Set if packets are dropped when no descriptors available */
4664                 if (rxq->drop_en)
4665                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4666
4667                 /*
4668                  * Configure the RX buffer size in the BSIZEPACKET field of
4669                  * the SRRCTL register of the queue.
4670                  * The value is in 1 KB resolution. Valid values can be from
4671                  * 1 KB to 16 KB.
4672                  */
4673                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4674                         RTE_PKTMBUF_HEADROOM);
4675                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4676                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4677
4678                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4679
4680                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4681                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4682
4683                 /* It adds dual VLAN length for supporting dual VLAN */
4684                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4685                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4686                         dev->data->scattered_rx = 1;
4687         }
4688
4689         if (rx_conf->enable_scatter)
4690                 dev->data->scattered_rx = 1;
4691
4692         /*
4693          * Device configured with multiple RX queues.
4694          */
4695         ixgbe_dev_mq_rx_configure(dev);
4696
4697         /*
4698          * Setup the Checksum Register.
4699          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4700          * Enable IP/L4 checkum computation by hardware if requested to do so.
4701          */
4702         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4703         rxcsum |= IXGBE_RXCSUM_PCSD;
4704         if (rx_conf->hw_ip_checksum)
4705                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4706         else
4707                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4708
4709         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4710
4711         if (hw->mac.type == ixgbe_mac_82599EB ||
4712             hw->mac.type == ixgbe_mac_X540) {
4713                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4714                 if (rx_conf->hw_strip_crc)
4715                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4716                 else
4717                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4718                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4719                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4720         }
4721
4722         rc = ixgbe_set_rsc(dev);
4723         if (rc)
4724                 return rc;
4725
4726         ixgbe_set_rx_function(dev);
4727
4728         return 0;
4729 }
4730
4731 /*
4732  * Initializes Transmit Unit.
4733  */
4734 void __attribute__((cold))
4735 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4736 {
4737         struct ixgbe_hw     *hw;
4738         struct ixgbe_tx_queue *txq;
4739         uint64_t bus_addr;
4740         uint32_t hlreg0;
4741         uint32_t txctrl;
4742         uint16_t i;
4743
4744         PMD_INIT_FUNC_TRACE();
4745         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4746
4747         /* Enable TX CRC (checksum offload requirement) and hw padding
4748          * (TSO requirement)
4749          */
4750         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4751         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4752         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4753
4754         /* Setup the Base and Length of the Tx Descriptor Rings */
4755         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4756                 txq = dev->data->tx_queues[i];
4757
4758                 bus_addr = txq->tx_ring_phys_addr;
4759                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4760                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4761                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4762                                 (uint32_t)(bus_addr >> 32));
4763                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4764                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4765                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4766                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4767                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4768
4769                 /*
4770                  * Disable Tx Head Writeback RO bit, since this hoses
4771                  * bookkeeping if things aren't delivered in order.
4772                  */
4773                 switch (hw->mac.type) {
4774                 case ixgbe_mac_82598EB:
4775                         txctrl = IXGBE_READ_REG(hw,
4776                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4777                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4778                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4779                                         txctrl);
4780                         break;
4781
4782                 case ixgbe_mac_82599EB:
4783                 case ixgbe_mac_X540:
4784                 case ixgbe_mac_X550:
4785                 case ixgbe_mac_X550EM_x:
4786                 case ixgbe_mac_X550EM_a:
4787                 default:
4788                         txctrl = IXGBE_READ_REG(hw,
4789                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4790                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4791                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4792                                         txctrl);
4793                         break;
4794                 }
4795         }
4796
4797         /* Device configured with multiple TX queues. */
4798         ixgbe_dev_mq_tx_configure(dev);
4799 }
4800
4801 /*
4802  * Set up link for 82599 loopback mode Tx->Rx.
4803  */
4804 static inline void __attribute__((cold))
4805 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4806 {
4807         PMD_INIT_FUNC_TRACE();
4808
4809         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4810                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4811                                 IXGBE_SUCCESS) {
4812                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4813                         /* ignore error */
4814                         return;
4815                 }
4816         }
4817
4818         /* Restart link */
4819         IXGBE_WRITE_REG(hw,
4820                         IXGBE_AUTOC,
4821                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4822         ixgbe_reset_pipeline_82599(hw);
4823
4824         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4825         msec_delay(50);
4826 }
4827
4828
4829 /*
4830  * Start Transmit and Receive Units.
4831  */
4832 int __attribute__((cold))
4833 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4834 {
4835         struct ixgbe_hw     *hw;
4836         struct ixgbe_tx_queue *txq;
4837         struct ixgbe_rx_queue *rxq;
4838         uint32_t txdctl;
4839         uint32_t dmatxctl;
4840         uint32_t rxctrl;
4841         uint16_t i;
4842         int ret = 0;
4843
4844         PMD_INIT_FUNC_TRACE();
4845         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4846
4847         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4848                 txq = dev->data->tx_queues[i];
4849                 /* Setup Transmit Threshold Registers */
4850                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4851                 txdctl |= txq->pthresh & 0x7F;
4852                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4853                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4854                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4855         }
4856
4857         if (hw->mac.type != ixgbe_mac_82598EB) {
4858                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4859                 dmatxctl |= IXGBE_DMATXCTL_TE;
4860                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4861         }
4862
4863         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4864                 txq = dev->data->tx_queues[i];
4865                 if (!txq->tx_deferred_start) {
4866                         ret = ixgbe_dev_tx_queue_start(dev, i);
4867                         if (ret < 0)
4868                                 return ret;
4869                 }
4870         }
4871
4872         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4873                 rxq = dev->data->rx_queues[i];
4874                 if (!rxq->rx_deferred_start) {
4875                         ret = ixgbe_dev_rx_queue_start(dev, i);
4876                         if (ret < 0)
4877                                 return ret;
4878                 }
4879         }
4880
4881         /* Enable Receive engine */
4882         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4883         if (hw->mac.type == ixgbe_mac_82598EB)
4884                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4885         rxctrl |= IXGBE_RXCTRL_RXEN;
4886         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4887
4888         /* If loopback mode is enabled for 82599, set up the link accordingly */
4889         if (hw->mac.type == ixgbe_mac_82599EB &&
4890                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4891                 ixgbe_setup_loopback_link_82599(hw);
4892
4893         return 0;
4894 }
4895
4896 /*
4897  * Start Receive Units for specified queue.
4898  */
4899 int __attribute__((cold))
4900 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4901 {
4902         struct ixgbe_hw     *hw;
4903         struct ixgbe_rx_queue *rxq;
4904         uint32_t rxdctl;
4905         int poll_ms;
4906
4907         PMD_INIT_FUNC_TRACE();
4908         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4909
4910         if (rx_queue_id < dev->data->nb_rx_queues) {
4911                 rxq = dev->data->rx_queues[rx_queue_id];
4912
4913                 /* Allocate buffers for descriptor rings */
4914                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4915                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4916                                      rx_queue_id);
4917                         return -1;
4918                 }
4919                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4920                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4921                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4922
4923                 /* Wait until RX Enable ready */
4924                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4925                 do {
4926                         rte_delay_ms(1);
4927                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4928                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4929                 if (!poll_ms)
4930                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4931                                      rx_queue_id);
4932                 rte_wmb();
4933                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4934                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4935                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4936         } else
4937                 return -1;
4938
4939         return 0;
4940 }
4941
4942 /*
4943  * Stop Receive Units for specified queue.
4944  */
4945 int __attribute__((cold))
4946 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4947 {
4948         struct ixgbe_hw     *hw;
4949         struct ixgbe_adapter *adapter =
4950                 (struct ixgbe_adapter *)dev->data->dev_private;
4951         struct ixgbe_rx_queue *rxq;
4952         uint32_t rxdctl;
4953         int poll_ms;
4954
4955         PMD_INIT_FUNC_TRACE();
4956         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4957
4958         if (rx_queue_id < dev->data->nb_rx_queues) {
4959                 rxq = dev->data->rx_queues[rx_queue_id];
4960
4961                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4962                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4963                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4964
4965                 /* Wait until RX Enable bit clear */
4966                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4967                 do {
4968                         rte_delay_ms(1);
4969                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4970                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
4971                 if (!poll_ms)
4972                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4973                                      rx_queue_id);
4974
4975                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4976
4977                 ixgbe_rx_queue_release_mbufs(rxq);
4978                 ixgbe_reset_rx_queue(adapter, rxq);
4979                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4980         } else
4981                 return -1;
4982
4983         return 0;
4984 }
4985
4986
4987 /*
4988  * Start Transmit Units for specified queue.
4989  */
4990 int __attribute__((cold))
4991 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4992 {
4993         struct ixgbe_hw     *hw;
4994         struct ixgbe_tx_queue *txq;
4995         uint32_t txdctl;
4996         int poll_ms;
4997
4998         PMD_INIT_FUNC_TRACE();
4999         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5000
5001         if (tx_queue_id < dev->data->nb_tx_queues) {
5002                 txq = dev->data->tx_queues[tx_queue_id];
5003                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5004                 txdctl |= IXGBE_TXDCTL_ENABLE;
5005                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5006
5007                 /* Wait until TX Enable ready */
5008                 if (hw->mac.type == ixgbe_mac_82599EB) {
5009                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5010                         do {
5011                                 rte_delay_ms(1);
5012                                 txdctl = IXGBE_READ_REG(hw,
5013                                         IXGBE_TXDCTL(txq->reg_idx));
5014                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5015                         if (!poll_ms)
5016                                 PMD_INIT_LOG(ERR, "Could not enable "
5017                                              "Tx Queue %d", tx_queue_id);
5018                 }
5019                 rte_wmb();
5020                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5021                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5022                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5023         } else
5024                 return -1;
5025
5026         return 0;
5027 }
5028
5029 /*
5030  * Stop Transmit Units for specified queue.
5031  */
5032 int __attribute__((cold))
5033 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5034 {
5035         struct ixgbe_hw     *hw;
5036         struct ixgbe_tx_queue *txq;
5037         uint32_t txdctl;
5038         uint32_t txtdh, txtdt;
5039         int poll_ms;
5040
5041         PMD_INIT_FUNC_TRACE();
5042         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5043
5044         if (tx_queue_id >= dev->data->nb_tx_queues)
5045                 return -1;
5046
5047         txq = dev->data->tx_queues[tx_queue_id];
5048
5049         /* Wait until TX queue is empty */
5050         if (hw->mac.type == ixgbe_mac_82599EB) {
5051                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5052                 do {
5053                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5054                         txtdh = IXGBE_READ_REG(hw,
5055                                                IXGBE_TDH(txq->reg_idx));
5056                         txtdt = IXGBE_READ_REG(hw,
5057                                                IXGBE_TDT(txq->reg_idx));
5058                 } while (--poll_ms && (txtdh != txtdt));
5059                 if (!poll_ms)
5060                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5061                                      "when stopping.", tx_queue_id);
5062         }
5063
5064         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5065         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5066         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5067
5068         /* Wait until TX Enable bit clear */
5069         if (hw->mac.type == ixgbe_mac_82599EB) {
5070                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5071                 do {
5072                         rte_delay_ms(1);
5073                         txdctl = IXGBE_READ_REG(hw,
5074                                                 IXGBE_TXDCTL(txq->reg_idx));
5075                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5076                 if (!poll_ms)
5077                         PMD_INIT_LOG(ERR, "Could not disable "
5078                                      "Tx Queue %d", tx_queue_id);
5079         }
5080
5081         if (txq->ops != NULL) {
5082                 txq->ops->release_mbufs(txq);
5083                 txq->ops->reset(txq);
5084         }
5085         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5086
5087         return 0;
5088 }
5089
5090 void
5091 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5092         struct rte_eth_rxq_info *qinfo)
5093 {
5094         struct ixgbe_rx_queue *rxq;
5095
5096         rxq = dev->data->rx_queues[queue_id];
5097
5098         qinfo->mp = rxq->mb_pool;
5099         qinfo->scattered_rx = dev->data->scattered_rx;
5100         qinfo->nb_desc = rxq->nb_rx_desc;
5101
5102         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5103         qinfo->conf.rx_drop_en = rxq->drop_en;
5104         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5105 }
5106
5107 void
5108 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5109         struct rte_eth_txq_info *qinfo)
5110 {
5111         struct ixgbe_tx_queue *txq;
5112
5113         txq = dev->data->tx_queues[queue_id];
5114
5115         qinfo->nb_desc = txq->nb_tx_desc;
5116
5117         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5118         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5119         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5120
5121         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5122         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5123         qinfo->conf.txq_flags = txq->txq_flags;
5124         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5125 }
5126
5127 /*
5128  * [VF] Initializes Receive Unit.
5129  */
5130 int __attribute__((cold))
5131 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5132 {
5133         struct ixgbe_hw     *hw;
5134         struct ixgbe_rx_queue *rxq;
5135         uint64_t bus_addr;
5136         uint32_t srrctl, psrtype = 0;
5137         uint16_t buf_size;
5138         uint16_t i;
5139         int ret;
5140
5141         PMD_INIT_FUNC_TRACE();
5142         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5143
5144         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5145                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5146                         "it should be power of 2");
5147                 return -1;
5148         }
5149
5150         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5151                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5152                         "it should be equal to or less than %d",
5153                         hw->mac.max_rx_queues);
5154                 return -1;
5155         }
5156
5157         /*
5158          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5159          * disables the VF receipt of packets if the PF MTU is > 1500.
5160          * This is done to deal with 82599 limitations that imposes
5161          * the PF and all VFs to share the same MTU.
5162          * Then, the PF driver enables again the VF receipt of packet when
5163          * the VF driver issues a IXGBE_VF_SET_LPE request.
5164          * In the meantime, the VF device cannot be used, even if the VF driver
5165          * and the Guest VM network stack are ready to accept packets with a
5166          * size up to the PF MTU.
5167          * As a work-around to this PF behaviour, force the call to
5168          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5169          * VF packets received can work in all cases.
5170          */
5171         ixgbevf_rlpml_set_vf(hw,
5172                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5173
5174         /* Setup RX queues */
5175         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5176                 rxq = dev->data->rx_queues[i];
5177
5178                 /* Allocate buffers for descriptor rings */
5179                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5180                 if (ret)
5181                         return ret;
5182
5183                 /* Setup the Base and Length of the Rx Descriptor Rings */
5184                 bus_addr = rxq->rx_ring_phys_addr;
5185
5186                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5187                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5188                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5189                                 (uint32_t)(bus_addr >> 32));
5190                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5191                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5192                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5193                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5194
5195
5196                 /* Configure the SRRCTL register */
5197 #ifdef RTE_HEADER_SPLIT_ENABLE
5198                 /*
5199                  * Configure Header Split
5200                  */
5201                 if (dev->data->dev_conf.rxmode.header_split) {
5202                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5203                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5204                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5205                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5206                 } else
5207 #endif
5208                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5209
5210                 /* Set if packets are dropped when no descriptors available */
5211                 if (rxq->drop_en)
5212                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5213
5214                 /*
5215                  * Configure the RX buffer size in the BSIZEPACKET field of
5216                  * the SRRCTL register of the queue.
5217                  * The value is in 1 KB resolution. Valid values can be from
5218                  * 1 KB to 16 KB.
5219                  */
5220                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5221                         RTE_PKTMBUF_HEADROOM);
5222                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5223                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5224
5225                 /*
5226                  * VF modification to write virtual function SRRCTL register
5227                  */
5228                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5229
5230                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5231                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5232
5233                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5234                     /* It adds dual VLAN length for supporting dual VLAN */
5235                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5236                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5237                         if (!dev->data->scattered_rx)
5238                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5239                         dev->data->scattered_rx = 1;
5240                 }
5241         }
5242
5243 #ifdef RTE_HEADER_SPLIT_ENABLE
5244         if (dev->data->dev_conf.rxmode.header_split)
5245                 /* Must setup the PSRTYPE register */
5246                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5247                         IXGBE_PSRTYPE_UDPHDR   |
5248                         IXGBE_PSRTYPE_IPV4HDR  |
5249                         IXGBE_PSRTYPE_IPV6HDR;
5250 #endif
5251
5252         /* Set RQPL for VF RSS according to max Rx queue */
5253         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5254                 IXGBE_PSRTYPE_RQPL_SHIFT;
5255         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5256
5257         ixgbe_set_rx_function(dev);
5258
5259         return 0;
5260 }
5261
5262 /*
5263  * [VF] Initializes Transmit Unit.
5264  */
5265 void __attribute__((cold))
5266 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5267 {
5268         struct ixgbe_hw     *hw;
5269         struct ixgbe_tx_queue *txq;
5270         uint64_t bus_addr;
5271         uint32_t txctrl;
5272         uint16_t i;
5273
5274         PMD_INIT_FUNC_TRACE();
5275         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5276
5277         /* Setup the Base and Length of the Tx Descriptor Rings */
5278         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5279                 txq = dev->data->tx_queues[i];
5280                 bus_addr = txq->tx_ring_phys_addr;
5281                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5282                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5283                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5284                                 (uint32_t)(bus_addr >> 32));
5285                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5286                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5287                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5288                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5289                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5290
5291                 /*
5292                  * Disable Tx Head Writeback RO bit, since this hoses
5293                  * bookkeeping if things aren't delivered in order.
5294                  */
5295                 txctrl = IXGBE_READ_REG(hw,
5296                                 IXGBE_VFDCA_TXCTRL(i));
5297                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5298                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5299                                 txctrl);
5300         }
5301 }
5302
5303 /*
5304  * [VF] Start Transmit and Receive Units.
5305  */
5306 void __attribute__((cold))
5307 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5308 {
5309         struct ixgbe_hw     *hw;
5310         struct ixgbe_tx_queue *txq;
5311         struct ixgbe_rx_queue *rxq;
5312         uint32_t txdctl;
5313         uint32_t rxdctl;
5314         uint16_t i;
5315         int poll_ms;
5316
5317         PMD_INIT_FUNC_TRACE();
5318         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5319
5320         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5321                 txq = dev->data->tx_queues[i];
5322                 /* Setup Transmit Threshold Registers */
5323                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5324                 txdctl |= txq->pthresh & 0x7F;
5325                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5326                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5327                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5328         }
5329
5330         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5331
5332                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5333                 txdctl |= IXGBE_TXDCTL_ENABLE;
5334                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5335
5336                 poll_ms = 10;
5337                 /* Wait until TX Enable ready */
5338                 do {
5339                         rte_delay_ms(1);
5340                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5341                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5342                 if (!poll_ms)
5343                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5344         }
5345         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5346
5347                 rxq = dev->data->rx_queues[i];
5348
5349                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5350                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5351                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5352
5353                 /* Wait until RX Enable ready */
5354                 poll_ms = 10;
5355                 do {
5356                         rte_delay_ms(1);
5357                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5358                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5359                 if (!poll_ms)
5360                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5361                 rte_wmb();
5362                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5363
5364         }
5365 }
5366
5367 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5368 int __attribute__((weak))
5369 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5370 {
5371         return -1;
5372 }
5373
5374 uint16_t __attribute__((weak))
5375 ixgbe_recv_pkts_vec(
5376         void __rte_unused *rx_queue,
5377         struct rte_mbuf __rte_unused **rx_pkts,
5378         uint16_t __rte_unused nb_pkts)
5379 {
5380         return 0;
5381 }
5382
5383 uint16_t __attribute__((weak))
5384 ixgbe_recv_scattered_pkts_vec(
5385         void __rte_unused *rx_queue,
5386         struct rte_mbuf __rte_unused **rx_pkts,
5387         uint16_t __rte_unused nb_pkts)
5388 {
5389         return 0;
5390 }
5391
5392 int __attribute__((weak))
5393 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5394 {
5395         return -1;
5396 }