net/ixgbe: allow bulk alloc for the max size desc ring
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73 #include <rte_net.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 /* Bit Mask to indicate what bits required for building TX context */
84 #define IXGBE_TX_OFFLOAD_MASK (                  \
85                 PKT_TX_VLAN_PKT |                \
86                 PKT_TX_IP_CKSUM |                \
87                 PKT_TX_L4_MASK |                 \
88                 PKT_TX_TCP_SEG |                 \
89                 PKT_TX_MACSEC |                  \
90                 PKT_TX_OUTER_IP_CKSUM)
91
92 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
93                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
94
95 #if 1
96 #define RTE_PMD_USE_PREFETCH
97 #endif
98
99 #ifdef RTE_PMD_USE_PREFETCH
100 /*
101  * Prefetch a cache line into all cache levels.
102  */
103 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
104 #else
105 #define rte_ixgbe_prefetch(p)   do {} while (0)
106 #endif
107
108 /*********************************************************************
109  *
110  *  TX functions
111  *
112  **********************************************************************/
113
114 /*
115  * Check for descriptors with their DD bit set and free mbufs.
116  * Return the total number of buffers freed.
117  */
118 static inline int __attribute__((always_inline))
119 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
120 {
121         struct ixgbe_tx_entry *txep;
122         uint32_t status;
123         int i, nb_free = 0;
124         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
125
126         /* check DD bit on threshold descriptor */
127         status = txq->tx_ring[txq->tx_next_dd].wb.status;
128         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
129                 return 0;
130
131         /*
132          * first buffer to free from S/W ring is at index
133          * tx_next_dd - (tx_rs_thresh-1)
134          */
135         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
136
137         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
138                 /* free buffers one at a time */
139                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
140                 txep->mbuf = NULL;
141
142                 if (unlikely(m == NULL))
143                         continue;
144
145                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
146                     (nb_free > 0 && m->pool != free[0]->pool)) {
147                         rte_mempool_put_bulk(free[0]->pool,
148                                              (void **)free, nb_free);
149                         nb_free = 0;
150                 }
151
152                 free[nb_free++] = m;
153         }
154
155         if (nb_free > 0)
156                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
157
158         /* buffers were freed, update counters */
159         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
160         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
161         if (txq->tx_next_dd >= txq->nb_tx_desc)
162                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
163
164         return txq->tx_rs_thresh;
165 }
166
167 /* Populate 4 descriptors with data from 4 mbufs */
168 static inline void
169 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
170 {
171         uint64_t buf_dma_addr;
172         uint32_t pkt_len;
173         int i;
174
175         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
176                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
177                 pkt_len = (*pkts)->data_len;
178
179                 /* write data to descriptor */
180                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
181
182                 txdp->read.cmd_type_len =
183                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
184
185                 txdp->read.olinfo_status =
186                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
187
188                 rte_prefetch0(&(*pkts)->pool);
189         }
190 }
191
192 /* Populate 1 descriptor with data from 1 mbuf */
193 static inline void
194 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
195 {
196         uint64_t buf_dma_addr;
197         uint32_t pkt_len;
198
199         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
200         pkt_len = (*pkts)->data_len;
201
202         /* write data to descriptor */
203         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
204         txdp->read.cmd_type_len =
205                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
206         txdp->read.olinfo_status =
207                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
208         rte_prefetch0(&(*pkts)->pool);
209 }
210
211 /*
212  * Fill H/W descriptor ring with mbuf data.
213  * Copy mbuf pointers to the S/W ring.
214  */
215 static inline void
216 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
217                       uint16_t nb_pkts)
218 {
219         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
220         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
221         const int N_PER_LOOP = 4;
222         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
223         int mainpart, leftover;
224         int i, j;
225
226         /*
227          * Process most of the packets in chunks of N pkts.  Any
228          * leftover packets will get processed one at a time.
229          */
230         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
231         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
232         for (i = 0; i < mainpart; i += N_PER_LOOP) {
233                 /* Copy N mbuf pointers to the S/W ring */
234                 for (j = 0; j < N_PER_LOOP; ++j) {
235                         (txep + i + j)->mbuf = *(pkts + i + j);
236                 }
237                 tx4(txdp + i, pkts + i);
238         }
239
240         if (unlikely(leftover > 0)) {
241                 for (i = 0; i < leftover; ++i) {
242                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
243                         tx1(txdp + mainpart + i, pkts + mainpart + i);
244                 }
245         }
246 }
247
248 static inline uint16_t
249 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
250              uint16_t nb_pkts)
251 {
252         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
253         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
254         uint16_t n = 0;
255
256         /*
257          * Begin scanning the H/W ring for done descriptors when the
258          * number of available descriptors drops below tx_free_thresh.  For
259          * each done descriptor, free the associated buffer.
260          */
261         if (txq->nb_tx_free < txq->tx_free_thresh)
262                 ixgbe_tx_free_bufs(txq);
263
264         /* Only use descriptors that are available */
265         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
266         if (unlikely(nb_pkts == 0))
267                 return 0;
268
269         /* Use exactly nb_pkts descriptors */
270         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
271
272         /*
273          * At this point, we know there are enough descriptors in the
274          * ring to transmit all the packets.  This assumes that each
275          * mbuf contains a single segment, and that no new offloads
276          * are expected, which would require a new context descriptor.
277          */
278
279         /*
280          * See if we're going to wrap-around. If so, handle the top
281          * of the descriptor ring first, then do the bottom.  If not,
282          * the processing looks just like the "bottom" part anyway...
283          */
284         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
285                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
286                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
287
288                 /*
289                  * We know that the last descriptor in the ring will need to
290                  * have its RS bit set because tx_rs_thresh has to be
291                  * a divisor of the ring size
292                  */
293                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
294                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
295                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
296
297                 txq->tx_tail = 0;
298         }
299
300         /* Fill H/W descriptor ring with mbuf data */
301         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
302         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
303
304         /*
305          * Determine if RS bit should be set
306          * This is what we actually want:
307          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
308          * but instead of subtracting 1 and doing >=, we can just do
309          * greater than without subtracting.
310          */
311         if (txq->tx_tail > txq->tx_next_rs) {
312                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
313                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
314                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
315                                                 txq->tx_rs_thresh);
316                 if (txq->tx_next_rs >= txq->nb_tx_desc)
317                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
318         }
319
320         /*
321          * Check for wrap-around. This would only happen if we used
322          * up to the last descriptor in the ring, no more, no less.
323          */
324         if (txq->tx_tail >= txq->nb_tx_desc)
325                 txq->tx_tail = 0;
326
327         /* update tail pointer */
328         rte_wmb();
329         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
330
331         return nb_pkts;
332 }
333
334 uint16_t
335 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
336                        uint16_t nb_pkts)
337 {
338         uint16_t nb_tx;
339
340         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
341         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
342                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
343
344         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
345         nb_tx = 0;
346         while (nb_pkts) {
347                 uint16_t ret, n;
348
349                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
350                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
351                 nb_tx = (uint16_t)(nb_tx + ret);
352                 nb_pkts = (uint16_t)(nb_pkts - ret);
353                 if (ret < n)
354                         break;
355         }
356
357         return nb_tx;
358 }
359
360 static inline void
361 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
362                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
363                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
364 {
365         uint32_t type_tucmd_mlhl;
366         uint32_t mss_l4len_idx = 0;
367         uint32_t ctx_idx;
368         uint32_t vlan_macip_lens;
369         union ixgbe_tx_offload tx_offload_mask;
370         uint32_t seqnum_seed = 0;
371
372         ctx_idx = txq->ctx_curr;
373         tx_offload_mask.data[0] = 0;
374         tx_offload_mask.data[1] = 0;
375         type_tucmd_mlhl = 0;
376
377         /* Specify which HW CTX to upload. */
378         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
379
380         if (ol_flags & PKT_TX_VLAN_PKT) {
381                 tx_offload_mask.vlan_tci |= ~0;
382         }
383
384         /* check if TCP segmentation required for this packet */
385         if (ol_flags & PKT_TX_TCP_SEG) {
386                 /* implies IP cksum in IPv4 */
387                 if (ol_flags & PKT_TX_IP_CKSUM)
388                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
389                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
390                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
391                 else
392                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
393                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
394                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
395
396                 tx_offload_mask.l2_len |= ~0;
397                 tx_offload_mask.l3_len |= ~0;
398                 tx_offload_mask.l4_len |= ~0;
399                 tx_offload_mask.tso_segsz |= ~0;
400                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
401                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
402         } else { /* no TSO, check if hardware checksum is needed */
403                 if (ol_flags & PKT_TX_IP_CKSUM) {
404                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
405                         tx_offload_mask.l2_len |= ~0;
406                         tx_offload_mask.l3_len |= ~0;
407                 }
408
409                 switch (ol_flags & PKT_TX_L4_MASK) {
410                 case PKT_TX_UDP_CKSUM:
411                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
412                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
413                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
414                         tx_offload_mask.l2_len |= ~0;
415                         tx_offload_mask.l3_len |= ~0;
416                         break;
417                 case PKT_TX_TCP_CKSUM:
418                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
419                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
420                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
421                         tx_offload_mask.l2_len |= ~0;
422                         tx_offload_mask.l3_len |= ~0;
423                         break;
424                 case PKT_TX_SCTP_CKSUM:
425                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
426                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
427                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
428                         tx_offload_mask.l2_len |= ~0;
429                         tx_offload_mask.l3_len |= ~0;
430                         break;
431                 default:
432                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
433                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
434                         break;
435                 }
436         }
437
438         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
439                 tx_offload_mask.outer_l2_len |= ~0;
440                 tx_offload_mask.outer_l3_len |= ~0;
441                 tx_offload_mask.l2_len |= ~0;
442                 seqnum_seed |= tx_offload.outer_l3_len
443                                << IXGBE_ADVTXD_OUTER_IPLEN;
444                 seqnum_seed |= tx_offload.l2_len
445                                << IXGBE_ADVTXD_TUNNEL_LEN;
446         }
447
448         txq->ctx_cache[ctx_idx].flags = ol_flags;
449         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
450                 tx_offload_mask.data[0] & tx_offload.data[0];
451         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
452                 tx_offload_mask.data[1] & tx_offload.data[1];
453         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
454
455         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
456         vlan_macip_lens = tx_offload.l3_len;
457         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
458                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
459                                     IXGBE_ADVTXD_MACLEN_SHIFT);
460         else
461                 vlan_macip_lens |= (tx_offload.l2_len <<
462                                     IXGBE_ADVTXD_MACLEN_SHIFT);
463         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
464         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
465         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
466         ctx_txd->seqnum_seed     = seqnum_seed;
467 }
468
469 /*
470  * Check which hardware context can be used. Use the existing match
471  * or create a new context descriptor.
472  */
473 static inline uint32_t
474 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
475                    union ixgbe_tx_offload tx_offload)
476 {
477         /* If match with the current used context */
478         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
479                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
480                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
481                      & tx_offload.data[0])) &&
482                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
483                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
484                      & tx_offload.data[1]))))
485                 return txq->ctx_curr;
486
487         /* What if match with the next context  */
488         txq->ctx_curr ^= 1;
489         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
490                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
491                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
492                      & tx_offload.data[0])) &&
493                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
494                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
495                      & tx_offload.data[1]))))
496                 return txq->ctx_curr;
497
498         /* Mismatch, use the previous context */
499         return IXGBE_CTX_NUM;
500 }
501
502 static inline uint32_t
503 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
504 {
505         uint32_t tmp = 0;
506
507         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
508                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
509         if (ol_flags & PKT_TX_IP_CKSUM)
510                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
511         if (ol_flags & PKT_TX_TCP_SEG)
512                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
513         return tmp;
514 }
515
516 static inline uint32_t
517 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
518 {
519         uint32_t cmdtype = 0;
520
521         if (ol_flags & PKT_TX_VLAN_PKT)
522                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
523         if (ol_flags & PKT_TX_TCP_SEG)
524                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
525         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
526                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
527         if (ol_flags & PKT_TX_MACSEC)
528                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
529         return cmdtype;
530 }
531
532 /* Default RS bit threshold values */
533 #ifndef DEFAULT_TX_RS_THRESH
534 #define DEFAULT_TX_RS_THRESH   32
535 #endif
536 #ifndef DEFAULT_TX_FREE_THRESH
537 #define DEFAULT_TX_FREE_THRESH 32
538 #endif
539
540 /* Reset transmit descriptors after they have been used */
541 static inline int
542 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
543 {
544         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
545         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
546         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
547         uint16_t nb_tx_desc = txq->nb_tx_desc;
548         uint16_t desc_to_clean_to;
549         uint16_t nb_tx_to_clean;
550         uint32_t status;
551
552         /* Determine the last descriptor needing to be cleaned */
553         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
554         if (desc_to_clean_to >= nb_tx_desc)
555                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
556
557         /* Check to make sure the last descriptor to clean is done */
558         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
559         status = txr[desc_to_clean_to].wb.status;
560         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
561                 PMD_TX_FREE_LOG(DEBUG,
562                                 "TX descriptor %4u is not done"
563                                 "(port=%d queue=%d)",
564                                 desc_to_clean_to,
565                                 txq->port_id, txq->queue_id);
566                 /* Failed to clean any descriptors, better luck next time */
567                 return -(1);
568         }
569
570         /* Figure out how many descriptors will be cleaned */
571         if (last_desc_cleaned > desc_to_clean_to)
572                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
573                                                         desc_to_clean_to);
574         else
575                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
576                                                 last_desc_cleaned);
577
578         PMD_TX_FREE_LOG(DEBUG,
579                         "Cleaning %4u TX descriptors: %4u to %4u "
580                         "(port=%d queue=%d)",
581                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
582                         txq->port_id, txq->queue_id);
583
584         /*
585          * The last descriptor to clean is done, so that means all the
586          * descriptors from the last descriptor that was cleaned
587          * up to the last descriptor with the RS bit set
588          * are done. Only reset the threshold descriptor.
589          */
590         txr[desc_to_clean_to].wb.status = 0;
591
592         /* Update the txq to reflect the last descriptor that was cleaned */
593         txq->last_desc_cleaned = desc_to_clean_to;
594         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
595
596         /* No Error */
597         return 0;
598 }
599
600 uint16_t
601 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
602                 uint16_t nb_pkts)
603 {
604         struct ixgbe_tx_queue *txq;
605         struct ixgbe_tx_entry *sw_ring;
606         struct ixgbe_tx_entry *txe, *txn;
607         volatile union ixgbe_adv_tx_desc *txr;
608         volatile union ixgbe_adv_tx_desc *txd, *txp;
609         struct rte_mbuf     *tx_pkt;
610         struct rte_mbuf     *m_seg;
611         uint64_t buf_dma_addr;
612         uint32_t olinfo_status;
613         uint32_t cmd_type_len;
614         uint32_t pkt_len;
615         uint16_t slen;
616         uint64_t ol_flags;
617         uint16_t tx_id;
618         uint16_t tx_last;
619         uint16_t nb_tx;
620         uint16_t nb_used;
621         uint64_t tx_ol_req;
622         uint32_t ctx = 0;
623         uint32_t new_ctx;
624         union ixgbe_tx_offload tx_offload;
625
626         tx_offload.data[0] = 0;
627         tx_offload.data[1] = 0;
628         txq = tx_queue;
629         sw_ring = txq->sw_ring;
630         txr     = txq->tx_ring;
631         tx_id   = txq->tx_tail;
632         txe = &sw_ring[tx_id];
633         txp = NULL;
634
635         /* Determine if the descriptor ring needs to be cleaned. */
636         if (txq->nb_tx_free < txq->tx_free_thresh)
637                 ixgbe_xmit_cleanup(txq);
638
639         rte_prefetch0(&txe->mbuf->pool);
640
641         /* TX loop */
642         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
643                 new_ctx = 0;
644                 tx_pkt = *tx_pkts++;
645                 pkt_len = tx_pkt->pkt_len;
646
647                 /*
648                  * Determine how many (if any) context descriptors
649                  * are needed for offload functionality.
650                  */
651                 ol_flags = tx_pkt->ol_flags;
652
653                 /* If hardware offload required */
654                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
655                 if (tx_ol_req) {
656                         tx_offload.l2_len = tx_pkt->l2_len;
657                         tx_offload.l3_len = tx_pkt->l3_len;
658                         tx_offload.l4_len = tx_pkt->l4_len;
659                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
660                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
661                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
662                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
663
664                         /* If new context need be built or reuse the exist ctx. */
665                         ctx = what_advctx_update(txq, tx_ol_req,
666                                 tx_offload);
667                         /* Only allocate context descriptor if required*/
668                         new_ctx = (ctx == IXGBE_CTX_NUM);
669                         ctx = txq->ctx_curr;
670                 }
671
672                 /*
673                  * Keep track of how many descriptors are used this loop
674                  * This will always be the number of segments + the number of
675                  * Context descriptors required to transmit the packet
676                  */
677                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
678
679                 if (txp != NULL &&
680                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
681                         /* set RS on the previous packet in the burst */
682                         txp->read.cmd_type_len |=
683                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
684
685                 /*
686                  * The number of descriptors that must be allocated for a
687                  * packet is the number of segments of that packet, plus 1
688                  * Context Descriptor for the hardware offload, if any.
689                  * Determine the last TX descriptor to allocate in the TX ring
690                  * for the packet, starting from the current position (tx_id)
691                  * in the ring.
692                  */
693                 tx_last = (uint16_t) (tx_id + nb_used - 1);
694
695                 /* Circular ring */
696                 if (tx_last >= txq->nb_tx_desc)
697                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
698
699                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
700                            " tx_first=%u tx_last=%u",
701                            (unsigned) txq->port_id,
702                            (unsigned) txq->queue_id,
703                            (unsigned) pkt_len,
704                            (unsigned) tx_id,
705                            (unsigned) tx_last);
706
707                 /*
708                  * Make sure there are enough TX descriptors available to
709                  * transmit the entire packet.
710                  * nb_used better be less than or equal to txq->tx_rs_thresh
711                  */
712                 if (nb_used > txq->nb_tx_free) {
713                         PMD_TX_FREE_LOG(DEBUG,
714                                         "Not enough free TX descriptors "
715                                         "nb_used=%4u nb_free=%4u "
716                                         "(port=%d queue=%d)",
717                                         nb_used, txq->nb_tx_free,
718                                         txq->port_id, txq->queue_id);
719
720                         if (ixgbe_xmit_cleanup(txq) != 0) {
721                                 /* Could not clean any descriptors */
722                                 if (nb_tx == 0)
723                                         return 0;
724                                 goto end_of_tx;
725                         }
726
727                         /* nb_used better be <= txq->tx_rs_thresh */
728                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
729                                 PMD_TX_FREE_LOG(DEBUG,
730                                         "The number of descriptors needed to "
731                                         "transmit the packet exceeds the "
732                                         "RS bit threshold. This will impact "
733                                         "performance."
734                                         "nb_used=%4u nb_free=%4u "
735                                         "tx_rs_thresh=%4u. "
736                                         "(port=%d queue=%d)",
737                                         nb_used, txq->nb_tx_free,
738                                         txq->tx_rs_thresh,
739                                         txq->port_id, txq->queue_id);
740                                 /*
741                                  * Loop here until there are enough TX
742                                  * descriptors or until the ring cannot be
743                                  * cleaned.
744                                  */
745                                 while (nb_used > txq->nb_tx_free) {
746                                         if (ixgbe_xmit_cleanup(txq) != 0) {
747                                                 /*
748                                                  * Could not clean any
749                                                  * descriptors
750                                                  */
751                                                 if (nb_tx == 0)
752                                                         return 0;
753                                                 goto end_of_tx;
754                                         }
755                                 }
756                         }
757                 }
758
759                 /*
760                  * By now there are enough free TX descriptors to transmit
761                  * the packet.
762                  */
763
764                 /*
765                  * Set common flags of all TX Data Descriptors.
766                  *
767                  * The following bits must be set in all Data Descriptors:
768                  *   - IXGBE_ADVTXD_DTYP_DATA
769                  *   - IXGBE_ADVTXD_DCMD_DEXT
770                  *
771                  * The following bits must be set in the first Data Descriptor
772                  * and are ignored in the other ones:
773                  *   - IXGBE_ADVTXD_DCMD_IFCS
774                  *   - IXGBE_ADVTXD_MAC_1588
775                  *   - IXGBE_ADVTXD_DCMD_VLE
776                  *
777                  * The following bits must only be set in the last Data
778                  * Descriptor:
779                  *   - IXGBE_TXD_CMD_EOP
780                  *
781                  * The following bits can be set in any Data Descriptor, but
782                  * are only set in the last Data Descriptor:
783                  *   - IXGBE_TXD_CMD_RS
784                  */
785                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
786                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
787
788 #ifdef RTE_LIBRTE_IEEE1588
789                 if (ol_flags & PKT_TX_IEEE1588_TMST)
790                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
791 #endif
792
793                 olinfo_status = 0;
794                 if (tx_ol_req) {
795
796                         if (ol_flags & PKT_TX_TCP_SEG) {
797                                 /* when TSO is on, paylen in descriptor is the
798                                  * not the packet len but the tcp payload len */
799                                 pkt_len -= (tx_offload.l2_len +
800                                         tx_offload.l3_len + tx_offload.l4_len);
801                         }
802
803                         /*
804                          * Setup the TX Advanced Context Descriptor if required
805                          */
806                         if (new_ctx) {
807                                 volatile struct ixgbe_adv_tx_context_desc *
808                                     ctx_txd;
809
810                                 ctx_txd = (volatile struct
811                                     ixgbe_adv_tx_context_desc *)
812                                     &txr[tx_id];
813
814                                 txn = &sw_ring[txe->next_id];
815                                 rte_prefetch0(&txn->mbuf->pool);
816
817                                 if (txe->mbuf != NULL) {
818                                         rte_pktmbuf_free_seg(txe->mbuf);
819                                         txe->mbuf = NULL;
820                                 }
821
822                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
823                                         tx_offload);
824
825                                 txe->last_id = tx_last;
826                                 tx_id = txe->next_id;
827                                 txe = txn;
828                         }
829
830                         /*
831                          * Setup the TX Advanced Data Descriptor,
832                          * This path will go through
833                          * whatever new/reuse the context descriptor
834                          */
835                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
836                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
837                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
838                 }
839
840                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
841
842                 m_seg = tx_pkt;
843                 do {
844                         txd = &txr[tx_id];
845                         txn = &sw_ring[txe->next_id];
846                         rte_prefetch0(&txn->mbuf->pool);
847
848                         if (txe->mbuf != NULL)
849                                 rte_pktmbuf_free_seg(txe->mbuf);
850                         txe->mbuf = m_seg;
851
852                         /*
853                          * Set up Transmit Data Descriptor.
854                          */
855                         slen = m_seg->data_len;
856                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
857                         txd->read.buffer_addr =
858                                 rte_cpu_to_le_64(buf_dma_addr);
859                         txd->read.cmd_type_len =
860                                 rte_cpu_to_le_32(cmd_type_len | slen);
861                         txd->read.olinfo_status =
862                                 rte_cpu_to_le_32(olinfo_status);
863                         txe->last_id = tx_last;
864                         tx_id = txe->next_id;
865                         txe = txn;
866                         m_seg = m_seg->next;
867                 } while (m_seg != NULL);
868
869                 /*
870                  * The last packet data descriptor needs End Of Packet (EOP)
871                  */
872                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
873                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
874                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
875
876                 /* Set RS bit only on threshold packets' last descriptor */
877                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
878                         PMD_TX_FREE_LOG(DEBUG,
879                                         "Setting RS bit on TXD id="
880                                         "%4u (port=%d queue=%d)",
881                                         tx_last, txq->port_id, txq->queue_id);
882
883                         cmd_type_len |= IXGBE_TXD_CMD_RS;
884
885                         /* Update txq RS bit counters */
886                         txq->nb_tx_used = 0;
887                         txp = NULL;
888                 } else
889                         txp = txd;
890
891                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
892         }
893
894 end_of_tx:
895         /* set RS on last packet in the burst */
896         if (txp != NULL)
897                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
898
899         rte_wmb();
900
901         /*
902          * Set the Transmit Descriptor Tail (TDT)
903          */
904         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
905                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
906                    (unsigned) tx_id, (unsigned) nb_tx);
907         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
908         txq->tx_tail = tx_id;
909
910         return nb_tx;
911 }
912
913 /*********************************************************************
914  *
915  *  TX prep functions
916  *
917  **********************************************************************/
918 uint16_t
919 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
920 {
921         int i, ret;
922         uint64_t ol_flags;
923         struct rte_mbuf *m;
924         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
925
926         for (i = 0; i < nb_pkts; i++) {
927                 m = tx_pkts[i];
928                 ol_flags = m->ol_flags;
929
930                 /**
931                  * Check if packet meets requirements for number of segments
932                  *
933                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
934                  *       non-TSO
935                  */
936
937                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
938                         rte_errno = -EINVAL;
939                         return i;
940                 }
941
942                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
943                         rte_errno = -ENOTSUP;
944                         return i;
945                 }
946
947 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
948                 ret = rte_validate_tx_offload(m);
949                 if (ret != 0) {
950                         rte_errno = ret;
951                         return i;
952                 }
953 #endif
954                 ret = rte_net_intel_cksum_prepare(m);
955                 if (ret != 0) {
956                         rte_errno = ret;
957                         return i;
958                 }
959         }
960
961         return i;
962 }
963
964 /*********************************************************************
965  *
966  *  RX functions
967  *
968  **********************************************************************/
969
970 #define IXGBE_PACKET_TYPE_ETHER                         0X00
971 #define IXGBE_PACKET_TYPE_IPV4                          0X01
972 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
973 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
974 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
975 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
976 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
977 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
978 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
979 #define IXGBE_PACKET_TYPE_IPV6                          0X04
980 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
981 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
982 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
983 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
984 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
985 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
986 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
987 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
988 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
989 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
990 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
991 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
992 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
993 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
994 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
995 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
996 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
997 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
998 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
999 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1000 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1001 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1002 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1003
1004 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1005 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1006 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1007 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1008 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1009 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1010 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1011 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1012 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1013 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1014 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1015 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1016 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1017 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1018 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1019 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1020 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1021 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1022 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1023 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1024 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1025 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1026 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1027
1028 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1029 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1030 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1031 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1032 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1033 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1034 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1035 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1036 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1037 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1038 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1039 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1040 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1041 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1042 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1043 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1044 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1045 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1046 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1047 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1048 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1049 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1050 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1051
1052 #define IXGBE_PACKET_TYPE_MAX               0X80
1053 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
1054 #define IXGBE_PACKET_TYPE_SHIFT             0X04
1055
1056 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1057 static inline uint32_t
1058 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1059 {
1060         /**
1061          * Use 2 different table for normal packet and tunnel packet
1062          * to save the space.
1063          */
1064         static const uint32_t
1065                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1066                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1067                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1068                         RTE_PTYPE_L3_IPV4,
1069                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1070                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1071                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1072                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1073                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1074                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1075                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1076                         RTE_PTYPE_L3_IPV4_EXT,
1077                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1078                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1079                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1080                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1081                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1082                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1083                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1084                         RTE_PTYPE_L3_IPV6,
1085                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1086                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1087                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1088                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1089                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1090                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1091                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1092                         RTE_PTYPE_L3_IPV6_EXT,
1093                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1094                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1095                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1096                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1097                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1098                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1099                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1100                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1101                         RTE_PTYPE_INNER_L3_IPV6,
1102                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1103                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1104                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1105                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1106                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1107                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1108                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1109                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1110                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1111                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1112                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1113                         RTE_PTYPE_INNER_L3_IPV6,
1114                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1115                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1116                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1117                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1118                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1119                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1120                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1121                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1122                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1123                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1124                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1125                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1126                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1128                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1129                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1130                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1131                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1132                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1133                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1134                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1135                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1136                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1137                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1138                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1139                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1140                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1141                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1142                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1143                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1144                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1145                         RTE_PTYPE_L2_ETHER |
1146                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1147                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1148         };
1149
1150         static const uint32_t
1151                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1152                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1153                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1154                         RTE_PTYPE_INNER_L2_ETHER,
1155                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1156                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1157                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1158                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1159                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1160                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1161                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1162                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1163                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1164                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1165                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1166                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1167                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1168                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1169                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1170                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1171                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1172                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1173                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1174                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1175                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1176                         RTE_PTYPE_INNER_L4_TCP,
1177                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1178                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1179                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1180                         RTE_PTYPE_INNER_L4_TCP,
1181                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1182                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1183                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1184                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1185                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1186                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1187                         RTE_PTYPE_INNER_L4_TCP,
1188                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1189                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1190                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1191                         RTE_PTYPE_INNER_L3_IPV4,
1192                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1193                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1194                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1195                         RTE_PTYPE_INNER_L4_UDP,
1196                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1197                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1198                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1199                         RTE_PTYPE_INNER_L4_UDP,
1200                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1201                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1202                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1203                         RTE_PTYPE_INNER_L4_SCTP,
1204                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1205                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1207                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1208                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1209                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1210                         RTE_PTYPE_INNER_L4_UDP,
1211                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1212                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1213                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1214                         RTE_PTYPE_INNER_L4_SCTP,
1215                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1216                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1217                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1218                         RTE_PTYPE_INNER_L3_IPV4,
1219                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1220                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1221                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1222                         RTE_PTYPE_INNER_L4_SCTP,
1223                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1224                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1225                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1226                         RTE_PTYPE_INNER_L4_SCTP,
1227                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1228                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1229                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1230                         RTE_PTYPE_INNER_L4_TCP,
1231                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1232                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1233                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1234                         RTE_PTYPE_INNER_L4_UDP,
1235
1236                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1237                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1238                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1239                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1240                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1241                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1242                         RTE_PTYPE_INNER_L3_IPV4,
1243                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1244                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1245                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1246                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1247                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1248                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1249                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1250                         RTE_PTYPE_INNER_L3_IPV6,
1251                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1252                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1253                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1254                         RTE_PTYPE_INNER_L3_IPV4,
1255                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1256                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1257                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1258                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1259                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1260                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1261                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1262                         RTE_PTYPE_INNER_L3_IPV4,
1263                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1264                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1265                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1266                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1267                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1268                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1269                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1270                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1271                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1272                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1273                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1274                         RTE_PTYPE_INNER_L3_IPV4,
1275                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1276                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1277                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1278                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1279                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1280                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1281                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1282                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1283                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1284                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1285                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1286                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1287                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1288                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1289                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1290                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1291                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1292                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1293                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1294                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1295                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1296                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1297                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1298                         RTE_PTYPE_INNER_L3_IPV4,
1299                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1300                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1301                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1302                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1303                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1304                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1305                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1306                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1307                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1308                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1309                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1310                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1311                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1312                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1313                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1314                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1315                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1316                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1317                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1318                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1319                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1320                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1321                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1322                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1323                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1324                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1325                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1326                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1327         };
1328
1329         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1330                 return RTE_PTYPE_UNKNOWN;
1331
1332         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1333
1334         /* For tunnel packet */
1335         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1336                 /* Remove the tunnel bit to save the space. */
1337                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1338                 return ptype_table_tn[pkt_info];
1339         }
1340
1341         /**
1342          * For x550, if it's not tunnel,
1343          * tunnel type bit should be set to 0.
1344          * Reuse 82599's mask.
1345          */
1346         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1347
1348         return ptype_table[pkt_info];
1349 }
1350
1351 static inline uint64_t
1352 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1353 {
1354         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1355                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1356                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1357                 PKT_RX_RSS_HASH, 0, 0, 0,
1358                 0, 0, 0,  PKT_RX_FDIR,
1359         };
1360 #ifdef RTE_LIBRTE_IEEE1588
1361         static uint64_t ip_pkt_etqf_map[8] = {
1362                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1363                 0, 0, 0, 0,
1364         };
1365
1366         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1367                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1368                                 ip_rss_types_map[pkt_info & 0XF];
1369         else
1370                 return ip_rss_types_map[pkt_info & 0XF];
1371 #else
1372         return ip_rss_types_map[pkt_info & 0XF];
1373 #endif
1374 }
1375
1376 static inline uint64_t
1377 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1378 {
1379         uint64_t pkt_flags;
1380
1381         /*
1382          * Check if VLAN present only.
1383          * Do not check whether L3/L4 rx checksum done by NIC or not,
1384          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1385          */
1386         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1387
1388 #ifdef RTE_LIBRTE_IEEE1588
1389         if (rx_status & IXGBE_RXD_STAT_TMST)
1390                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1391 #endif
1392         return pkt_flags;
1393 }
1394
1395 static inline uint64_t
1396 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1397 {
1398         uint64_t pkt_flags;
1399
1400         /*
1401          * Bit 31: IPE, IPv4 checksum error
1402          * Bit 30: L4I, L4I integrity error
1403          */
1404         static uint64_t error_to_pkt_flags_map[4] = {
1405                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1406                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1407                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1408                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1409         };
1410         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1411                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1412
1413         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1414             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1415                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1416         }
1417
1418         return pkt_flags;
1419 }
1420
1421 /*
1422  * LOOK_AHEAD defines how many desc statuses to check beyond the
1423  * current descriptor.
1424  * It must be a pound define for optimal performance.
1425  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1426  * function only works with LOOK_AHEAD=8.
1427  */
1428 #define LOOK_AHEAD 8
1429 #if (LOOK_AHEAD != 8)
1430 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1431 #endif
1432 static inline int
1433 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1434 {
1435         volatile union ixgbe_adv_rx_desc *rxdp;
1436         struct ixgbe_rx_entry *rxep;
1437         struct rte_mbuf *mb;
1438         uint16_t pkt_len;
1439         uint64_t pkt_flags;
1440         int nb_dd;
1441         uint32_t s[LOOK_AHEAD];
1442         uint32_t pkt_info[LOOK_AHEAD];
1443         int i, j, nb_rx = 0;
1444         uint32_t status;
1445         uint64_t vlan_flags = rxq->vlan_flags;
1446
1447         /* get references to current descriptor and S/W ring entry */
1448         rxdp = &rxq->rx_ring[rxq->rx_tail];
1449         rxep = &rxq->sw_ring[rxq->rx_tail];
1450
1451         status = rxdp->wb.upper.status_error;
1452         /* check to make sure there is at least 1 packet to receive */
1453         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1454                 return 0;
1455
1456         /*
1457          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1458          * reference packets that are ready to be received.
1459          */
1460         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1461              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1462                 /* Read desc statuses backwards to avoid race condition */
1463                 for (j = LOOK_AHEAD-1; j >= 0; --j)
1464                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1465
1466                 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1467                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1468                                                        lo_dword.data);
1469
1470                 /* Compute how many status bits were set */
1471                 nb_dd = 0;
1472                 for (j = 0; j < LOOK_AHEAD; ++j)
1473                         nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1474
1475                 nb_rx += nb_dd;
1476
1477                 /* Translate descriptor info to mbuf format */
1478                 for (j = 0; j < nb_dd; ++j) {
1479                         mb = rxep[j].mbuf;
1480                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1481                                   rxq->crc_len;
1482                         mb->data_len = pkt_len;
1483                         mb->pkt_len = pkt_len;
1484                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1485
1486                         /* convert descriptor fields to rte mbuf flags */
1487                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1488                                 vlan_flags);
1489                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1490                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1491                                         ((uint16_t)pkt_info[j]);
1492                         mb->ol_flags = pkt_flags;
1493                         mb->packet_type =
1494                                 ixgbe_rxd_pkt_info_to_pkt_type
1495                                         (pkt_info[j], rxq->pkt_type_mask);
1496
1497                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1498                                 mb->hash.rss = rte_le_to_cpu_32(
1499                                     rxdp[j].wb.lower.hi_dword.rss);
1500                         else if (pkt_flags & PKT_RX_FDIR) {
1501                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1502                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1503                                     IXGBE_ATR_HASH_MASK;
1504                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1505                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1506                         }
1507                 }
1508
1509                 /* Move mbuf pointers from the S/W ring to the stage */
1510                 for (j = 0; j < LOOK_AHEAD; ++j) {
1511                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1512                 }
1513
1514                 /* stop if all requested packets could not be received */
1515                 if (nb_dd != LOOK_AHEAD)
1516                         break;
1517         }
1518
1519         /* clear software ring entries so we can cleanup correctly */
1520         for (i = 0; i < nb_rx; ++i) {
1521                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1522         }
1523
1524
1525         return nb_rx;
1526 }
1527
1528 static inline int
1529 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1530 {
1531         volatile union ixgbe_adv_rx_desc *rxdp;
1532         struct ixgbe_rx_entry *rxep;
1533         struct rte_mbuf *mb;
1534         uint16_t alloc_idx;
1535         __le64 dma_addr;
1536         int diag, i;
1537
1538         /* allocate buffers in bulk directly into the S/W ring */
1539         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1540         rxep = &rxq->sw_ring[alloc_idx];
1541         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1542                                     rxq->rx_free_thresh);
1543         if (unlikely(diag != 0))
1544                 return -ENOMEM;
1545
1546         rxdp = &rxq->rx_ring[alloc_idx];
1547         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1548                 /* populate the static rte mbuf fields */
1549                 mb = rxep[i].mbuf;
1550                 if (reset_mbuf) {
1551                         mb->next = NULL;
1552                         mb->nb_segs = 1;
1553                         mb->port = rxq->port_id;
1554                 }
1555
1556                 rte_mbuf_refcnt_set(mb, 1);
1557                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1558
1559                 /* populate the descriptors */
1560                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1561                 rxdp[i].read.hdr_addr = 0;
1562                 rxdp[i].read.pkt_addr = dma_addr;
1563         }
1564
1565         /* update state of internal queue structure */
1566         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1567         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1568                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1569
1570         /* no errors */
1571         return 0;
1572 }
1573
1574 static inline uint16_t
1575 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1576                          uint16_t nb_pkts)
1577 {
1578         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1579         int i;
1580
1581         /* how many packets are ready to return? */
1582         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1583
1584         /* copy mbuf pointers to the application's packet list */
1585         for (i = 0; i < nb_pkts; ++i)
1586                 rx_pkts[i] = stage[i];
1587
1588         /* update internal queue state */
1589         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1590         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1591
1592         return nb_pkts;
1593 }
1594
1595 static inline uint16_t
1596 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1597              uint16_t nb_pkts)
1598 {
1599         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1600         uint16_t nb_rx = 0;
1601
1602         /* Any previously recv'd pkts will be returned from the Rx stage */
1603         if (rxq->rx_nb_avail)
1604                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1605
1606         /* Scan the H/W ring for packets to receive */
1607         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1608
1609         /* update internal queue state */
1610         rxq->rx_next_avail = 0;
1611         rxq->rx_nb_avail = nb_rx;
1612         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1613
1614         /* if required, allocate new buffers to replenish descriptors */
1615         if (rxq->rx_tail > rxq->rx_free_trigger) {
1616                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1617
1618                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1619                         int i, j;
1620
1621                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1622                                    "queue_id=%u", (unsigned) rxq->port_id,
1623                                    (unsigned) rxq->queue_id);
1624
1625                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1626                                 rxq->rx_free_thresh;
1627
1628                         /*
1629                          * Need to rewind any previous receives if we cannot
1630                          * allocate new buffers to replenish the old ones.
1631                          */
1632                         rxq->rx_nb_avail = 0;
1633                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1634                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1635                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1636
1637                         return 0;
1638                 }
1639
1640                 /* update tail pointer */
1641                 rte_wmb();
1642                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1643         }
1644
1645         if (rxq->rx_tail >= rxq->nb_rx_desc)
1646                 rxq->rx_tail = 0;
1647
1648         /* received any packets this loop? */
1649         if (rxq->rx_nb_avail)
1650                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1651
1652         return 0;
1653 }
1654
1655 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1656 uint16_t
1657 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1658                            uint16_t nb_pkts)
1659 {
1660         uint16_t nb_rx;
1661
1662         if (unlikely(nb_pkts == 0))
1663                 return 0;
1664
1665         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1666                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1667
1668         /* request is relatively large, chunk it up */
1669         nb_rx = 0;
1670         while (nb_pkts) {
1671                 uint16_t ret, n;
1672
1673                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1674                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1675                 nb_rx = (uint16_t)(nb_rx + ret);
1676                 nb_pkts = (uint16_t)(nb_pkts - ret);
1677                 if (ret < n)
1678                         break;
1679         }
1680
1681         return nb_rx;
1682 }
1683
1684 uint16_t
1685 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1686                 uint16_t nb_pkts)
1687 {
1688         struct ixgbe_rx_queue *rxq;
1689         volatile union ixgbe_adv_rx_desc *rx_ring;
1690         volatile union ixgbe_adv_rx_desc *rxdp;
1691         struct ixgbe_rx_entry *sw_ring;
1692         struct ixgbe_rx_entry *rxe;
1693         struct rte_mbuf *rxm;
1694         struct rte_mbuf *nmb;
1695         union ixgbe_adv_rx_desc rxd;
1696         uint64_t dma_addr;
1697         uint32_t staterr;
1698         uint32_t pkt_info;
1699         uint16_t pkt_len;
1700         uint16_t rx_id;
1701         uint16_t nb_rx;
1702         uint16_t nb_hold;
1703         uint64_t pkt_flags;
1704         uint64_t vlan_flags;
1705
1706         nb_rx = 0;
1707         nb_hold = 0;
1708         rxq = rx_queue;
1709         rx_id = rxq->rx_tail;
1710         rx_ring = rxq->rx_ring;
1711         sw_ring = rxq->sw_ring;
1712         vlan_flags = rxq->vlan_flags;
1713         while (nb_rx < nb_pkts) {
1714                 /*
1715                  * The order of operations here is important as the DD status
1716                  * bit must not be read after any other descriptor fields.
1717                  * rx_ring and rxdp are pointing to volatile data so the order
1718                  * of accesses cannot be reordered by the compiler. If they were
1719                  * not volatile, they could be reordered which could lead to
1720                  * using invalid descriptor fields when read from rxd.
1721                  */
1722                 rxdp = &rx_ring[rx_id];
1723                 staterr = rxdp->wb.upper.status_error;
1724                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1725                         break;
1726                 rxd = *rxdp;
1727
1728                 /*
1729                  * End of packet.
1730                  *
1731                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1732                  * is likely to be invalid and to be dropped by the various
1733                  * validation checks performed by the network stack.
1734                  *
1735                  * Allocate a new mbuf to replenish the RX ring descriptor.
1736                  * If the allocation fails:
1737                  *    - arrange for that RX descriptor to be the first one
1738                  *      being parsed the next time the receive function is
1739                  *      invoked [on the same queue].
1740                  *
1741                  *    - Stop parsing the RX ring and return immediately.
1742                  *
1743                  * This policy do not drop the packet received in the RX
1744                  * descriptor for which the allocation of a new mbuf failed.
1745                  * Thus, it allows that packet to be later retrieved if
1746                  * mbuf have been freed in the mean time.
1747                  * As a side effect, holding RX descriptors instead of
1748                  * systematically giving them back to the NIC may lead to
1749                  * RX ring exhaustion situations.
1750                  * However, the NIC can gracefully prevent such situations
1751                  * to happen by sending specific "back-pressure" flow control
1752                  * frames to its peer(s).
1753                  */
1754                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1755                            "ext_err_stat=0x%08x pkt_len=%u",
1756                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1757                            (unsigned) rx_id, (unsigned) staterr,
1758                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1759
1760                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1761                 if (nmb == NULL) {
1762                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1763                                    "queue_id=%u", (unsigned) rxq->port_id,
1764                                    (unsigned) rxq->queue_id);
1765                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1766                         break;
1767                 }
1768
1769                 nb_hold++;
1770                 rxe = &sw_ring[rx_id];
1771                 rx_id++;
1772                 if (rx_id == rxq->nb_rx_desc)
1773                         rx_id = 0;
1774
1775                 /* Prefetch next mbuf while processing current one. */
1776                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1777
1778                 /*
1779                  * When next RX descriptor is on a cache-line boundary,
1780                  * prefetch the next 4 RX descriptors and the next 8 pointers
1781                  * to mbufs.
1782                  */
1783                 if ((rx_id & 0x3) == 0) {
1784                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1785                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1786                 }
1787
1788                 rxm = rxe->mbuf;
1789                 rxe->mbuf = nmb;
1790                 dma_addr =
1791                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1792                 rxdp->read.hdr_addr = 0;
1793                 rxdp->read.pkt_addr = dma_addr;
1794
1795                 /*
1796                  * Initialize the returned mbuf.
1797                  * 1) setup generic mbuf fields:
1798                  *    - number of segments,
1799                  *    - next segment,
1800                  *    - packet length,
1801                  *    - RX port identifier.
1802                  * 2) integrate hardware offload data, if any:
1803                  *    - RSS flag & hash,
1804                  *    - IP checksum flag,
1805                  *    - VLAN TCI, if any,
1806                  *    - error flags.
1807                  */
1808                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1809                                       rxq->crc_len);
1810                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1811                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1812                 rxm->nb_segs = 1;
1813                 rxm->next = NULL;
1814                 rxm->pkt_len = pkt_len;
1815                 rxm->data_len = pkt_len;
1816                 rxm->port = rxq->port_id;
1817
1818                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1819                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1820                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1821
1822                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1823                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1824                 pkt_flags = pkt_flags |
1825                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1826                 rxm->ol_flags = pkt_flags;
1827                 rxm->packet_type =
1828                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1829                                                        rxq->pkt_type_mask);
1830
1831                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1832                         rxm->hash.rss = rte_le_to_cpu_32(
1833                                                 rxd.wb.lower.hi_dword.rss);
1834                 else if (pkt_flags & PKT_RX_FDIR) {
1835                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1836                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1837                                         IXGBE_ATR_HASH_MASK;
1838                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1839                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1840                 }
1841                 /*
1842                  * Store the mbuf address into the next entry of the array
1843                  * of returned packets.
1844                  */
1845                 rx_pkts[nb_rx++] = rxm;
1846         }
1847         rxq->rx_tail = rx_id;
1848
1849         /*
1850          * If the number of free RX descriptors is greater than the RX free
1851          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1852          * register.
1853          * Update the RDT with the value of the last processed RX descriptor
1854          * minus 1, to guarantee that the RDT register is never equal to the
1855          * RDH register, which creates a "full" ring situtation from the
1856          * hardware point of view...
1857          */
1858         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1859         if (nb_hold > rxq->rx_free_thresh) {
1860                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1861                            "nb_hold=%u nb_rx=%u",
1862                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1863                            (unsigned) rx_id, (unsigned) nb_hold,
1864                            (unsigned) nb_rx);
1865                 rx_id = (uint16_t) ((rx_id == 0) ?
1866                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1867                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1868                 nb_hold = 0;
1869         }
1870         rxq->nb_rx_hold = nb_hold;
1871         return nb_rx;
1872 }
1873
1874 /**
1875  * Detect an RSC descriptor.
1876  */
1877 static inline uint32_t
1878 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1879 {
1880         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1881                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1882 }
1883
1884 /**
1885  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1886  *
1887  * Fill the following info in the HEAD buffer of the Rx cluster:
1888  *    - RX port identifier
1889  *    - hardware offload data, if any:
1890  *      - RSS flag & hash
1891  *      - IP checksum flag
1892  *      - VLAN TCI, if any
1893  *      - error flags
1894  * @head HEAD of the packet cluster
1895  * @desc HW descriptor to get data from
1896  * @rxq Pointer to the Rx queue
1897  */
1898 static inline void
1899 ixgbe_fill_cluster_head_buf(
1900         struct rte_mbuf *head,
1901         union ixgbe_adv_rx_desc *desc,
1902         struct ixgbe_rx_queue *rxq,
1903         uint32_t staterr)
1904 {
1905         uint32_t pkt_info;
1906         uint64_t pkt_flags;
1907
1908         head->port = rxq->port_id;
1909
1910         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1911          * set in the pkt_flags field.
1912          */
1913         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1914         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1915         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1916         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1917         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1918         head->ol_flags = pkt_flags;
1919         head->packet_type =
1920                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1921
1922         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1923                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1924         else if (pkt_flags & PKT_RX_FDIR) {
1925                 head->hash.fdir.hash =
1926                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1927                                                           & IXGBE_ATR_HASH_MASK;
1928                 head->hash.fdir.id =
1929                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1930         }
1931 }
1932
1933 /**
1934  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1935  *
1936  * @rx_queue Rx queue handle
1937  * @rx_pkts table of received packets
1938  * @nb_pkts size of rx_pkts table
1939  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1940  *
1941  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1942  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1943  *
1944  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1945  * 1) When non-EOP RSC completion arrives:
1946  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1947  *       segment's data length.
1948  *    b) Set the "next" pointer of the current segment to point to the segment
1949  *       at the NEXTP index.
1950  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1951  *       in the sw_rsc_ring.
1952  * 2) When EOP arrives we just update the cluster's total length and offload
1953  *    flags and deliver the cluster up to the upper layers. In our case - put it
1954  *    in the rx_pkts table.
1955  *
1956  * Returns the number of received packets/clusters (according to the "bulk
1957  * receive" interface).
1958  */
1959 static inline uint16_t
1960 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1961                     bool bulk_alloc)
1962 {
1963         struct ixgbe_rx_queue *rxq = rx_queue;
1964         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1965         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1966         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1967         uint16_t rx_id = rxq->rx_tail;
1968         uint16_t nb_rx = 0;
1969         uint16_t nb_hold = rxq->nb_rx_hold;
1970         uint16_t prev_id = rxq->rx_tail;
1971
1972         while (nb_rx < nb_pkts) {
1973                 bool eop;
1974                 struct ixgbe_rx_entry *rxe;
1975                 struct ixgbe_scattered_rx_entry *sc_entry;
1976                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1977                 struct ixgbe_rx_entry *next_rxe = NULL;
1978                 struct rte_mbuf *first_seg;
1979                 struct rte_mbuf *rxm;
1980                 struct rte_mbuf *nmb;
1981                 union ixgbe_adv_rx_desc rxd;
1982                 uint16_t data_len;
1983                 uint16_t next_id;
1984                 volatile union ixgbe_adv_rx_desc *rxdp;
1985                 uint32_t staterr;
1986
1987 next_desc:
1988                 /*
1989                  * The code in this whole file uses the volatile pointer to
1990                  * ensure the read ordering of the status and the rest of the
1991                  * descriptor fields (on the compiler level only!!!). This is so
1992                  * UGLY - why not to just use the compiler barrier instead? DPDK
1993                  * even has the rte_compiler_barrier() for that.
1994                  *
1995                  * But most importantly this is just wrong because this doesn't
1996                  * ensure memory ordering in a general case at all. For
1997                  * instance, DPDK is supposed to work on Power CPUs where
1998                  * compiler barrier may just not be enough!
1999                  *
2000                  * I tried to write only this function properly to have a
2001                  * starting point (as a part of an LRO/RSC series) but the
2002                  * compiler cursed at me when I tried to cast away the
2003                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2004                  * keeping it the way it is for now.
2005                  *
2006                  * The code in this file is broken in so many other places and
2007                  * will just not work on a big endian CPU anyway therefore the
2008                  * lines below will have to be revisited together with the rest
2009                  * of the ixgbe PMD.
2010                  *
2011                  * TODO:
2012                  *    - Get rid of "volatile" crap and let the compiler do its
2013                  *      job.
2014                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2015                  *      memory ordering below.
2016                  */
2017                 rxdp = &rx_ring[rx_id];
2018                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2019
2020                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2021                         break;
2022
2023                 rxd = *rxdp;
2024
2025                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2026                                   "staterr=0x%x data_len=%u",
2027                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2028                            rte_le_to_cpu_16(rxd.wb.upper.length));
2029
2030                 if (!bulk_alloc) {
2031                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2032                         if (nmb == NULL) {
2033                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2034                                                   "port_id=%u queue_id=%u",
2035                                            rxq->port_id, rxq->queue_id);
2036
2037                                 rte_eth_devices[rxq->port_id].data->
2038                                                         rx_mbuf_alloc_failed++;
2039                                 break;
2040                         }
2041                 } else if (nb_hold > rxq->rx_free_thresh) {
2042                         uint16_t next_rdt = rxq->rx_free_trigger;
2043
2044                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2045                                 rte_wmb();
2046                                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
2047                                                     next_rdt);
2048                                 nb_hold -= rxq->rx_free_thresh;
2049                         } else {
2050                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2051                                                   "port_id=%u queue_id=%u",
2052                                            rxq->port_id, rxq->queue_id);
2053
2054                                 rte_eth_devices[rxq->port_id].data->
2055                                                         rx_mbuf_alloc_failed++;
2056                                 break;
2057                         }
2058                 }
2059
2060                 nb_hold++;
2061                 rxe = &sw_ring[rx_id];
2062                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2063
2064                 next_id = rx_id + 1;
2065                 if (next_id == rxq->nb_rx_desc)
2066                         next_id = 0;
2067
2068                 /* Prefetch next mbuf while processing current one. */
2069                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2070
2071                 /*
2072                  * When next RX descriptor is on a cache-line boundary,
2073                  * prefetch the next 4 RX descriptors and the next 4 pointers
2074                  * to mbufs.
2075                  */
2076                 if ((next_id & 0x3) == 0) {
2077                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2078                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2079                 }
2080
2081                 rxm = rxe->mbuf;
2082
2083                 if (!bulk_alloc) {
2084                         __le64 dma =
2085                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2086                         /*
2087                          * Update RX descriptor with the physical address of the
2088                          * new data buffer of the new allocated mbuf.
2089                          */
2090                         rxe->mbuf = nmb;
2091
2092                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2093                         rxdp->read.hdr_addr = 0;
2094                         rxdp->read.pkt_addr = dma;
2095                 } else
2096                         rxe->mbuf = NULL;
2097
2098                 /*
2099                  * Set data length & data buffer address of mbuf.
2100                  */
2101                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2102                 rxm->data_len = data_len;
2103
2104                 if (!eop) {
2105                         uint16_t nextp_id;
2106                         /*
2107                          * Get next descriptor index:
2108                          *  - For RSC it's in the NEXTP field.
2109                          *  - For a scattered packet - it's just a following
2110                          *    descriptor.
2111                          */
2112                         if (ixgbe_rsc_count(&rxd))
2113                                 nextp_id =
2114                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2115                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2116                         else
2117                                 nextp_id = next_id;
2118
2119                         next_sc_entry = &sw_sc_ring[nextp_id];
2120                         next_rxe = &sw_ring[nextp_id];
2121                         rte_ixgbe_prefetch(next_rxe);
2122                 }
2123
2124                 sc_entry = &sw_sc_ring[rx_id];
2125                 first_seg = sc_entry->fbuf;
2126                 sc_entry->fbuf = NULL;
2127
2128                 /*
2129                  * If this is the first buffer of the received packet,
2130                  * set the pointer to the first mbuf of the packet and
2131                  * initialize its context.
2132                  * Otherwise, update the total length and the number of segments
2133                  * of the current scattered packet, and update the pointer to
2134                  * the last mbuf of the current packet.
2135                  */
2136                 if (first_seg == NULL) {
2137                         first_seg = rxm;
2138                         first_seg->pkt_len = data_len;
2139                         first_seg->nb_segs = 1;
2140                 } else {
2141                         first_seg->pkt_len += data_len;
2142                         first_seg->nb_segs++;
2143                 }
2144
2145                 prev_id = rx_id;
2146                 rx_id = next_id;
2147
2148                 /*
2149                  * If this is not the last buffer of the received packet, update
2150                  * the pointer to the first mbuf at the NEXTP entry in the
2151                  * sw_sc_ring and continue to parse the RX ring.
2152                  */
2153                 if (!eop && next_rxe) {
2154                         rxm->next = next_rxe->mbuf;
2155                         next_sc_entry->fbuf = first_seg;
2156                         goto next_desc;
2157                 }
2158
2159                 /*
2160                  * This is the last buffer of the received packet - return
2161                  * the current cluster to the user.
2162                  */
2163                 rxm->next = NULL;
2164
2165                 /* Initialize the first mbuf of the returned packet */
2166                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2167
2168                 /*
2169                  * Deal with the case, when HW CRC srip is disabled.
2170                  * That can't happen when LRO is enabled, but still could
2171                  * happen for scattered RX mode.
2172                  */
2173                 first_seg->pkt_len -= rxq->crc_len;
2174                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2175                         struct rte_mbuf *lp;
2176
2177                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2178                                 ;
2179
2180                         first_seg->nb_segs--;
2181                         lp->data_len -= rxq->crc_len - rxm->data_len;
2182                         lp->next = NULL;
2183                         rte_pktmbuf_free_seg(rxm);
2184                 } else
2185                         rxm->data_len -= rxq->crc_len;
2186
2187                 /* Prefetch data of first segment, if configured to do so. */
2188                 rte_packet_prefetch((char *)first_seg->buf_addr +
2189                         first_seg->data_off);
2190
2191                 /*
2192                  * Store the mbuf address into the next entry of the array
2193                  * of returned packets.
2194                  */
2195                 rx_pkts[nb_rx++] = first_seg;
2196         }
2197
2198         /*
2199          * Record index of the next RX descriptor to probe.
2200          */
2201         rxq->rx_tail = rx_id;
2202
2203         /*
2204          * If the number of free RX descriptors is greater than the RX free
2205          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2206          * register.
2207          * Update the RDT with the value of the last processed RX descriptor
2208          * minus 1, to guarantee that the RDT register is never equal to the
2209          * RDH register, which creates a "full" ring situtation from the
2210          * hardware point of view...
2211          */
2212         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2213                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2214                            "nb_hold=%u nb_rx=%u",
2215                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2216
2217                 rte_wmb();
2218                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
2219                 nb_hold = 0;
2220         }
2221
2222         rxq->nb_rx_hold = nb_hold;
2223         return nb_rx;
2224 }
2225
2226 uint16_t
2227 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2228                                  uint16_t nb_pkts)
2229 {
2230         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2231 }
2232
2233 uint16_t
2234 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2235                                uint16_t nb_pkts)
2236 {
2237         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2238 }
2239
2240 /*********************************************************************
2241  *
2242  *  Queue management functions
2243  *
2244  **********************************************************************/
2245
2246 static void __attribute__((cold))
2247 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2248 {
2249         unsigned i;
2250
2251         if (txq->sw_ring != NULL) {
2252                 for (i = 0; i < txq->nb_tx_desc; i++) {
2253                         if (txq->sw_ring[i].mbuf != NULL) {
2254                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2255                                 txq->sw_ring[i].mbuf = NULL;
2256                         }
2257                 }
2258         }
2259 }
2260
2261 static void __attribute__((cold))
2262 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2263 {
2264         if (txq != NULL &&
2265             txq->sw_ring != NULL)
2266                 rte_free(txq->sw_ring);
2267 }
2268
2269 static void __attribute__((cold))
2270 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2271 {
2272         if (txq != NULL && txq->ops != NULL) {
2273                 txq->ops->release_mbufs(txq);
2274                 txq->ops->free_swring(txq);
2275                 rte_free(txq);
2276         }
2277 }
2278
2279 void __attribute__((cold))
2280 ixgbe_dev_tx_queue_release(void *txq)
2281 {
2282         ixgbe_tx_queue_release(txq);
2283 }
2284
2285 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2286 static void __attribute__((cold))
2287 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2288 {
2289         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2290         struct ixgbe_tx_entry *txe = txq->sw_ring;
2291         uint16_t prev, i;
2292
2293         /* Zero out HW ring memory */
2294         for (i = 0; i < txq->nb_tx_desc; i++) {
2295                 txq->tx_ring[i] = zeroed_desc;
2296         }
2297
2298         /* Initialize SW ring entries */
2299         prev = (uint16_t) (txq->nb_tx_desc - 1);
2300         for (i = 0; i < txq->nb_tx_desc; i++) {
2301                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2302
2303                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2304                 txe[i].mbuf = NULL;
2305                 txe[i].last_id = i;
2306                 txe[prev].next_id = i;
2307                 prev = i;
2308         }
2309
2310         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2311         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2312
2313         txq->tx_tail = 0;
2314         txq->nb_tx_used = 0;
2315         /*
2316          * Always allow 1 descriptor to be un-allocated to avoid
2317          * a H/W race condition
2318          */
2319         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2320         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2321         txq->ctx_curr = 0;
2322         memset((void *)&txq->ctx_cache, 0,
2323                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2324 }
2325
2326 static const struct ixgbe_txq_ops def_txq_ops = {
2327         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2328         .free_swring = ixgbe_tx_free_swring,
2329         .reset = ixgbe_reset_tx_queue,
2330 };
2331
2332 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2333  * the queue parameters. Used in tx_queue_setup by primary process and then
2334  * in dev_init by secondary process when attaching to an existing ethdev.
2335  */
2336 void __attribute__((cold))
2337 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2338 {
2339         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2340         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2341                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2342                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2343                 dev->tx_pkt_prepare = NULL;
2344 #ifdef RTE_IXGBE_INC_VECTOR
2345                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2346                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2347                                         ixgbe_txq_vec_setup(txq) == 0)) {
2348                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2349                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2350                 } else
2351 #endif
2352                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2353         } else {
2354                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2355                 PMD_INIT_LOG(DEBUG,
2356                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2357                                 (unsigned long)txq->txq_flags,
2358                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2359                 PMD_INIT_LOG(DEBUG,
2360                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2361                                 (unsigned long)txq->tx_rs_thresh,
2362                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2363                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2364                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2365         }
2366 }
2367
2368 int __attribute__((cold))
2369 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2370                          uint16_t queue_idx,
2371                          uint16_t nb_desc,
2372                          unsigned int socket_id,
2373                          const struct rte_eth_txconf *tx_conf)
2374 {
2375         const struct rte_memzone *tz;
2376         struct ixgbe_tx_queue *txq;
2377         struct ixgbe_hw     *hw;
2378         uint16_t tx_rs_thresh, tx_free_thresh;
2379
2380         PMD_INIT_FUNC_TRACE();
2381         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2382
2383         /*
2384          * Validate number of transmit descriptors.
2385          * It must not exceed hardware maximum, and must be multiple
2386          * of IXGBE_ALIGN.
2387          */
2388         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2389                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2390                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2391                 return -EINVAL;
2392         }
2393
2394         /*
2395          * The following two parameters control the setting of the RS bit on
2396          * transmit descriptors.
2397          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2398          * descriptors have been used.
2399          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2400          * descriptors are used or if the number of descriptors required
2401          * to transmit a packet is greater than the number of free TX
2402          * descriptors.
2403          * The following constraints must be satisfied:
2404          *  tx_rs_thresh must be greater than 0.
2405          *  tx_rs_thresh must be less than the size of the ring minus 2.
2406          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2407          *  tx_rs_thresh must be a divisor of the ring size.
2408          *  tx_free_thresh must be greater than 0.
2409          *  tx_free_thresh must be less than the size of the ring minus 3.
2410          * One descriptor in the TX ring is used as a sentinel to avoid a
2411          * H/W race condition, hence the maximum threshold constraints.
2412          * When set to zero use default values.
2413          */
2414         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2415                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2416         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2417                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2418         if (tx_rs_thresh >= (nb_desc - 2)) {
2419                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2420                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2421                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2422                         (int)dev->data->port_id, (int)queue_idx);
2423                 return -(EINVAL);
2424         }
2425         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2426                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2427                         "(tx_rs_thresh=%u port=%d queue=%d)",
2428                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2429                         (int)dev->data->port_id, (int)queue_idx);
2430                 return -(EINVAL);
2431         }
2432         if (tx_free_thresh >= (nb_desc - 3)) {
2433                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2434                              "tx_free_thresh must be less than the number of "
2435                              "TX descriptors minus 3. (tx_free_thresh=%u "
2436                              "port=%d queue=%d)",
2437                              (unsigned int)tx_free_thresh,
2438                              (int)dev->data->port_id, (int)queue_idx);
2439                 return -(EINVAL);
2440         }
2441         if (tx_rs_thresh > tx_free_thresh) {
2442                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2443                              "tx_free_thresh. (tx_free_thresh=%u "
2444                              "tx_rs_thresh=%u port=%d queue=%d)",
2445                              (unsigned int)tx_free_thresh,
2446                              (unsigned int)tx_rs_thresh,
2447                              (int)dev->data->port_id,
2448                              (int)queue_idx);
2449                 return -(EINVAL);
2450         }
2451         if ((nb_desc % tx_rs_thresh) != 0) {
2452                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2453                              "number of TX descriptors. (tx_rs_thresh=%u "
2454                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2455                              (int)dev->data->port_id, (int)queue_idx);
2456                 return -(EINVAL);
2457         }
2458
2459         /*
2460          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2461          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2462          * by the NIC and all descriptors are written back after the NIC
2463          * accumulates WTHRESH descriptors.
2464          */
2465         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2466                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2467                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2468                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2469                              (int)dev->data->port_id, (int)queue_idx);
2470                 return -(EINVAL);
2471         }
2472
2473         /* Free memory prior to re-allocation if needed... */
2474         if (dev->data->tx_queues[queue_idx] != NULL) {
2475                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2476                 dev->data->tx_queues[queue_idx] = NULL;
2477         }
2478
2479         /* First allocate the tx queue data structure */
2480         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2481                                  RTE_CACHE_LINE_SIZE, socket_id);
2482         if (txq == NULL)
2483                 return -ENOMEM;
2484
2485         /*
2486          * Allocate TX ring hardware descriptors. A memzone large enough to
2487          * handle the maximum ring size is allocated in order to allow for
2488          * resizing in later calls to the queue setup function.
2489          */
2490         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2491                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2492                         IXGBE_ALIGN, socket_id);
2493         if (tz == NULL) {
2494                 ixgbe_tx_queue_release(txq);
2495                 return -ENOMEM;
2496         }
2497
2498         txq->nb_tx_desc = nb_desc;
2499         txq->tx_rs_thresh = tx_rs_thresh;
2500         txq->tx_free_thresh = tx_free_thresh;
2501         txq->pthresh = tx_conf->tx_thresh.pthresh;
2502         txq->hthresh = tx_conf->tx_thresh.hthresh;
2503         txq->wthresh = tx_conf->tx_thresh.wthresh;
2504         txq->queue_id = queue_idx;
2505         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2506                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2507         txq->port_id = dev->data->port_id;
2508         txq->txq_flags = tx_conf->txq_flags;
2509         txq->ops = &def_txq_ops;
2510         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2511
2512         /*
2513          * Modification to set VFTDT for virtual function if vf is detected
2514          */
2515         if (hw->mac.type == ixgbe_mac_82599_vf ||
2516             hw->mac.type == ixgbe_mac_X540_vf ||
2517             hw->mac.type == ixgbe_mac_X550_vf ||
2518             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2519             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2520                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2521         else
2522                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2523
2524         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2525         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2526
2527         /* Allocate software ring */
2528         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2529                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2530                                 RTE_CACHE_LINE_SIZE, socket_id);
2531         if (txq->sw_ring == NULL) {
2532                 ixgbe_tx_queue_release(txq);
2533                 return -ENOMEM;
2534         }
2535         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2536                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2537
2538         /* set up vector or scalar TX function as appropriate */
2539         ixgbe_set_tx_function(dev, txq);
2540
2541         txq->ops->reset(txq);
2542
2543         dev->data->tx_queues[queue_idx] = txq;
2544
2545
2546         return 0;
2547 }
2548
2549 /**
2550  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2551  *
2552  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2553  * in the sw_rsc_ring is not set to NULL but rather points to the next
2554  * mbuf of this RSC aggregation (that has not been completed yet and still
2555  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2556  * will just free first "nb_segs" segments of the cluster explicitly by calling
2557  * an rte_pktmbuf_free_seg().
2558  *
2559  * @m scattered cluster head
2560  */
2561 static void __attribute__((cold))
2562 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2563 {
2564         uint8_t i, nb_segs = m->nb_segs;
2565         struct rte_mbuf *next_seg;
2566
2567         for (i = 0; i < nb_segs; i++) {
2568                 next_seg = m->next;
2569                 rte_pktmbuf_free_seg(m);
2570                 m = next_seg;
2571         }
2572 }
2573
2574 static void __attribute__((cold))
2575 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2576 {
2577         unsigned i;
2578
2579 #ifdef RTE_IXGBE_INC_VECTOR
2580         /* SSE Vector driver has a different way of releasing mbufs. */
2581         if (rxq->rx_using_sse) {
2582                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2583                 return;
2584         }
2585 #endif
2586
2587         if (rxq->sw_ring != NULL) {
2588                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2589                         if (rxq->sw_ring[i].mbuf != NULL) {
2590                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2591                                 rxq->sw_ring[i].mbuf = NULL;
2592                         }
2593                 }
2594                 if (rxq->rx_nb_avail) {
2595                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2596                                 struct rte_mbuf *mb;
2597
2598                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2599                                 rte_pktmbuf_free_seg(mb);
2600                         }
2601                         rxq->rx_nb_avail = 0;
2602                 }
2603         }
2604
2605         if (rxq->sw_sc_ring)
2606                 for (i = 0; i < rxq->nb_rx_desc; i++)
2607                         if (rxq->sw_sc_ring[i].fbuf) {
2608                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2609                                 rxq->sw_sc_ring[i].fbuf = NULL;
2610                         }
2611 }
2612
2613 static void __attribute__((cold))
2614 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2615 {
2616         if (rxq != NULL) {
2617                 ixgbe_rx_queue_release_mbufs(rxq);
2618                 rte_free(rxq->sw_ring);
2619                 rte_free(rxq->sw_sc_ring);
2620                 rte_free(rxq);
2621         }
2622 }
2623
2624 void __attribute__((cold))
2625 ixgbe_dev_rx_queue_release(void *rxq)
2626 {
2627         ixgbe_rx_queue_release(rxq);
2628 }
2629
2630 /*
2631  * Check if Rx Burst Bulk Alloc function can be used.
2632  * Return
2633  *        0: the preconditions are satisfied and the bulk allocation function
2634  *           can be used.
2635  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2636  *           function must be used.
2637  */
2638 static inline int __attribute__((cold))
2639 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2640 {
2641         int ret = 0;
2642
2643         /*
2644          * Make sure the following pre-conditions are satisfied:
2645          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2646          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2647          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2648          * Scattered packets are not supported.  This should be checked
2649          * outside of this function.
2650          */
2651         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2652                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2653                              "rxq->rx_free_thresh=%d, "
2654                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2655                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2656                 ret = -EINVAL;
2657         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2658                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2659                              "rxq->rx_free_thresh=%d, "
2660                              "rxq->nb_rx_desc=%d",
2661                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2662                 ret = -EINVAL;
2663         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2664                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2665                              "rxq->nb_rx_desc=%d, "
2666                              "rxq->rx_free_thresh=%d",
2667                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2668                 ret = -EINVAL;
2669         }
2670
2671         return ret;
2672 }
2673
2674 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2675 static void __attribute__((cold))
2676 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2677 {
2678         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2679         unsigned i;
2680         uint16_t len = rxq->nb_rx_desc;
2681
2682         /*
2683          * By default, the Rx queue setup function allocates enough memory for
2684          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2685          * extra memory at the end of the descriptor ring to be zero'd out.
2686          */
2687         if (adapter->rx_bulk_alloc_allowed)
2688                 /* zero out extra memory */
2689                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2690
2691         /*
2692          * Zero out HW ring memory. Zero out extra memory at the end of
2693          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2694          * reads extra memory as zeros.
2695          */
2696         for (i = 0; i < len; i++) {
2697                 rxq->rx_ring[i] = zeroed_desc;
2698         }
2699
2700         /*
2701          * initialize extra software ring entries. Space for these extra
2702          * entries is always allocated
2703          */
2704         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2705         for (i = rxq->nb_rx_desc; i < len; ++i) {
2706                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2707         }
2708
2709         rxq->rx_nb_avail = 0;
2710         rxq->rx_next_avail = 0;
2711         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2712         rxq->rx_tail = 0;
2713         rxq->nb_rx_hold = 0;
2714         rxq->pkt_first_seg = NULL;
2715         rxq->pkt_last_seg = NULL;
2716
2717 #ifdef RTE_IXGBE_INC_VECTOR
2718         rxq->rxrearm_start = 0;
2719         rxq->rxrearm_nb = 0;
2720 #endif
2721 }
2722
2723 int __attribute__((cold))
2724 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2725                          uint16_t queue_idx,
2726                          uint16_t nb_desc,
2727                          unsigned int socket_id,
2728                          const struct rte_eth_rxconf *rx_conf,
2729                          struct rte_mempool *mp)
2730 {
2731         const struct rte_memzone *rz;
2732         struct ixgbe_rx_queue *rxq;
2733         struct ixgbe_hw     *hw;
2734         uint16_t len;
2735         struct ixgbe_adapter *adapter =
2736                 (struct ixgbe_adapter *)dev->data->dev_private;
2737
2738         PMD_INIT_FUNC_TRACE();
2739         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2740
2741         /*
2742          * Validate number of receive descriptors.
2743          * It must not exceed hardware maximum, and must be multiple
2744          * of IXGBE_ALIGN.
2745          */
2746         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2747                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2748                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2749                 return -EINVAL;
2750         }
2751
2752         /* Free memory prior to re-allocation if needed... */
2753         if (dev->data->rx_queues[queue_idx] != NULL) {
2754                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2755                 dev->data->rx_queues[queue_idx] = NULL;
2756         }
2757
2758         /* First allocate the rx queue data structure */
2759         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2760                                  RTE_CACHE_LINE_SIZE, socket_id);
2761         if (rxq == NULL)
2762                 return -ENOMEM;
2763         rxq->mb_pool = mp;
2764         rxq->nb_rx_desc = nb_desc;
2765         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2766         rxq->queue_id = queue_idx;
2767         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2768                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2769         rxq->port_id = dev->data->port_id;
2770         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2771                                                         0 : ETHER_CRC_LEN);
2772         rxq->drop_en = rx_conf->rx_drop_en;
2773         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2774
2775         /*
2776          * The packet type in RX descriptor is different for different NICs.
2777          * Some bits are used for x550 but reserved for other NICS.
2778          * So set different masks for different NICs.
2779          */
2780         if (hw->mac.type == ixgbe_mac_X550 ||
2781             hw->mac.type == ixgbe_mac_X550EM_x ||
2782             hw->mac.type == ixgbe_mac_X550EM_a ||
2783             hw->mac.type == ixgbe_mac_X550_vf ||
2784             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2785             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2786                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2787         else
2788                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2789
2790         /*
2791          * Allocate RX ring hardware descriptors. A memzone large enough to
2792          * handle the maximum ring size is allocated in order to allow for
2793          * resizing in later calls to the queue setup function.
2794          */
2795         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2796                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2797         if (rz == NULL) {
2798                 ixgbe_rx_queue_release(rxq);
2799                 return -ENOMEM;
2800         }
2801
2802         /*
2803          * Zero init all the descriptors in the ring.
2804          */
2805         memset(rz->addr, 0, RX_RING_SZ);
2806
2807         /*
2808          * Modified to setup VFRDT for Virtual Function
2809          */
2810         if (hw->mac.type == ixgbe_mac_82599_vf ||
2811             hw->mac.type == ixgbe_mac_X540_vf ||
2812             hw->mac.type == ixgbe_mac_X550_vf ||
2813             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2814             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2815                 rxq->rdt_reg_addr =
2816                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2817                 rxq->rdh_reg_addr =
2818                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2819         } else {
2820                 rxq->rdt_reg_addr =
2821                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2822                 rxq->rdh_reg_addr =
2823                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2824         }
2825
2826         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2827         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2828
2829         /*
2830          * Certain constraints must be met in order to use the bulk buffer
2831          * allocation Rx burst function. If any of Rx queues doesn't meet them
2832          * the feature should be disabled for the whole port.
2833          */
2834         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2835                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2836                                     "preconditions - canceling the feature for "
2837                                     "the whole port[%d]",
2838                              rxq->queue_id, rxq->port_id);
2839                 adapter->rx_bulk_alloc_allowed = false;
2840         }
2841
2842         /*
2843          * Allocate software ring. Allow for space at the end of the
2844          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2845          * function does not access an invalid memory region.
2846          */
2847         len = nb_desc;
2848         if (adapter->rx_bulk_alloc_allowed)
2849                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2850
2851         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2852                                           sizeof(struct ixgbe_rx_entry) * len,
2853                                           RTE_CACHE_LINE_SIZE, socket_id);
2854         if (!rxq->sw_ring) {
2855                 ixgbe_rx_queue_release(rxq);
2856                 return -ENOMEM;
2857         }
2858
2859         /*
2860          * Always allocate even if it's not going to be needed in order to
2861          * simplify the code.
2862          *
2863          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2864          * be requested in ixgbe_dev_rx_init(), which is called later from
2865          * dev_start() flow.
2866          */
2867         rxq->sw_sc_ring =
2868                 rte_zmalloc_socket("rxq->sw_sc_ring",
2869                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2870                                    RTE_CACHE_LINE_SIZE, socket_id);
2871         if (!rxq->sw_sc_ring) {
2872                 ixgbe_rx_queue_release(rxq);
2873                 return -ENOMEM;
2874         }
2875
2876         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2877                             "dma_addr=0x%"PRIx64,
2878                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2879                      rxq->rx_ring_phys_addr);
2880
2881         if (!rte_is_power_of_2(nb_desc)) {
2882                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2883                                     "preconditions - canceling the feature for "
2884                                     "the whole port[%d]",
2885                              rxq->queue_id, rxq->port_id);
2886                 adapter->rx_vec_allowed = false;
2887         } else
2888                 ixgbe_rxq_vec_setup(rxq);
2889
2890         dev->data->rx_queues[queue_idx] = rxq;
2891
2892         ixgbe_reset_rx_queue(adapter, rxq);
2893
2894         return 0;
2895 }
2896
2897 uint32_t
2898 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2899 {
2900 #define IXGBE_RXQ_SCAN_INTERVAL 4
2901         volatile union ixgbe_adv_rx_desc *rxdp;
2902         struct ixgbe_rx_queue *rxq;
2903         uint32_t desc = 0;
2904
2905         if (rx_queue_id >= dev->data->nb_rx_queues) {
2906                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2907                 return 0;
2908         }
2909
2910         rxq = dev->data->rx_queues[rx_queue_id];
2911         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2912
2913         while ((desc < rxq->nb_rx_desc) &&
2914                 (rxdp->wb.upper.status_error &
2915                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2916                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2917                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2918                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2919                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2920                                 desc - rxq->nb_rx_desc]);
2921         }
2922
2923         return desc;
2924 }
2925
2926 int
2927 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2928 {
2929         volatile union ixgbe_adv_rx_desc *rxdp;
2930         struct ixgbe_rx_queue *rxq = rx_queue;
2931         uint32_t desc;
2932
2933         if (unlikely(offset >= rxq->nb_rx_desc))
2934                 return 0;
2935         desc = rxq->rx_tail + offset;
2936         if (desc >= rxq->nb_rx_desc)
2937                 desc -= rxq->nb_rx_desc;
2938
2939         rxdp = &rxq->rx_ring[desc];
2940         return !!(rxdp->wb.upper.status_error &
2941                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2942 }
2943
2944 void __attribute__((cold))
2945 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2946 {
2947         unsigned i;
2948         struct ixgbe_adapter *adapter =
2949                 (struct ixgbe_adapter *)dev->data->dev_private;
2950
2951         PMD_INIT_FUNC_TRACE();
2952
2953         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2954                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2955
2956                 if (txq != NULL) {
2957                         txq->ops->release_mbufs(txq);
2958                         txq->ops->reset(txq);
2959                 }
2960         }
2961
2962         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2963                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2964
2965                 if (rxq != NULL) {
2966                         ixgbe_rx_queue_release_mbufs(rxq);
2967                         ixgbe_reset_rx_queue(adapter, rxq);
2968                 }
2969         }
2970 }
2971
2972 void
2973 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2974 {
2975         unsigned i;
2976
2977         PMD_INIT_FUNC_TRACE();
2978
2979         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2980                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2981                 dev->data->rx_queues[i] = NULL;
2982         }
2983         dev->data->nb_rx_queues = 0;
2984
2985         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2986                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2987                 dev->data->tx_queues[i] = NULL;
2988         }
2989         dev->data->nb_tx_queues = 0;
2990 }
2991
2992 /*********************************************************************
2993  *
2994  *  Device RX/TX init functions
2995  *
2996  **********************************************************************/
2997
2998 /**
2999  * Receive Side Scaling (RSS)
3000  * See section 7.1.2.8 in the following document:
3001  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3002  *
3003  * Principles:
3004  * The source and destination IP addresses of the IP header and the source
3005  * and destination ports of TCP/UDP headers, if any, of received packets are
3006  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3007  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3008  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3009  * RSS output index which is used as the RX queue index where to store the
3010  * received packets.
3011  * The following output is supplied in the RX write-back descriptor:
3012  *     - 32-bit result of the Microsoft RSS hash function,
3013  *     - 4-bit RSS type field.
3014  */
3015
3016 /*
3017  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3018  * Used as the default key.
3019  */
3020 static uint8_t rss_intel_key[40] = {
3021         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3022         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3023         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3024         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3025         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3026 };
3027
3028 static void
3029 ixgbe_rss_disable(struct rte_eth_dev *dev)
3030 {
3031         struct ixgbe_hw *hw;
3032         uint32_t mrqc;
3033         uint32_t mrqc_reg;
3034
3035         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3036         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3037         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3038         mrqc &= ~IXGBE_MRQC_RSSEN;
3039         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3040 }
3041
3042 static void
3043 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3044 {
3045         uint8_t  *hash_key;
3046         uint32_t mrqc;
3047         uint32_t rss_key;
3048         uint64_t rss_hf;
3049         uint16_t i;
3050         uint32_t mrqc_reg;
3051         uint32_t rssrk_reg;
3052
3053         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3054         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3055
3056         hash_key = rss_conf->rss_key;
3057         if (hash_key != NULL) {
3058                 /* Fill in RSS hash key */
3059                 for (i = 0; i < 10; i++) {
3060                         rss_key  = hash_key[(i * 4)];
3061                         rss_key |= hash_key[(i * 4) + 1] << 8;
3062                         rss_key |= hash_key[(i * 4) + 2] << 16;
3063                         rss_key |= hash_key[(i * 4) + 3] << 24;
3064                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3065                 }
3066         }
3067
3068         /* Set configured hashing protocols in MRQC register */
3069         rss_hf = rss_conf->rss_hf;
3070         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3071         if (rss_hf & ETH_RSS_IPV4)
3072                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3073         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3074                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3075         if (rss_hf & ETH_RSS_IPV6)
3076                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3077         if (rss_hf & ETH_RSS_IPV6_EX)
3078                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3079         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3080                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3081         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3082                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3083         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3084                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3085         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3086                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3087         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3088                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3089         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3090 }
3091
3092 int
3093 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3094                           struct rte_eth_rss_conf *rss_conf)
3095 {
3096         struct ixgbe_hw *hw;
3097         uint32_t mrqc;
3098         uint64_t rss_hf;
3099         uint32_t mrqc_reg;
3100
3101         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3102
3103         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3104                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3105                         "NIC.");
3106                 return -ENOTSUP;
3107         }
3108         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3109
3110         /*
3111          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3112          *     "RSS enabling cannot be done dynamically while it must be
3113          *      preceded by a software reset"
3114          * Before changing anything, first check that the update RSS operation
3115          * does not attempt to disable RSS, if RSS was enabled at
3116          * initialization time, or does not attempt to enable RSS, if RSS was
3117          * disabled at initialization time.
3118          */
3119         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3120         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3121         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3122                 if (rss_hf != 0) /* Enable RSS */
3123                         return -(EINVAL);
3124                 return 0; /* Nothing to do */
3125         }
3126         /* RSS enabled */
3127         if (rss_hf == 0) /* Disable RSS */
3128                 return -(EINVAL);
3129         ixgbe_hw_rss_hash_set(hw, rss_conf);
3130         return 0;
3131 }
3132
3133 int
3134 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3135                             struct rte_eth_rss_conf *rss_conf)
3136 {
3137         struct ixgbe_hw *hw;
3138         uint8_t *hash_key;
3139         uint32_t mrqc;
3140         uint32_t rss_key;
3141         uint64_t rss_hf;
3142         uint16_t i;
3143         uint32_t mrqc_reg;
3144         uint32_t rssrk_reg;
3145
3146         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3147         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3148         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3149         hash_key = rss_conf->rss_key;
3150         if (hash_key != NULL) {
3151                 /* Return RSS hash key */
3152                 for (i = 0; i < 10; i++) {
3153                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3154                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3155                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3156                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3157                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3158                 }
3159         }
3160
3161         /* Get RSS functions configured in MRQC register */
3162         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3163         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3164                 rss_conf->rss_hf = 0;
3165                 return 0;
3166         }
3167         rss_hf = 0;
3168         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3169                 rss_hf |= ETH_RSS_IPV4;
3170         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3171                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3172         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3173                 rss_hf |= ETH_RSS_IPV6;
3174         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3175                 rss_hf |= ETH_RSS_IPV6_EX;
3176         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3177                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3178         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3179                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3180         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3181                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3182         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3183                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3184         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3185                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3186         rss_conf->rss_hf = rss_hf;
3187         return 0;
3188 }
3189
3190 static void
3191 ixgbe_rss_configure(struct rte_eth_dev *dev)
3192 {
3193         struct rte_eth_rss_conf rss_conf;
3194         struct ixgbe_hw *hw;
3195         uint32_t reta;
3196         uint16_t i;
3197         uint16_t j;
3198         uint16_t sp_reta_size;
3199         uint32_t reta_reg;
3200
3201         PMD_INIT_FUNC_TRACE();
3202         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3203
3204         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3205
3206         /*
3207          * Fill in redirection table
3208          * The byte-swap is needed because NIC registers are in
3209          * little-endian order.
3210          */
3211         reta = 0;
3212         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3213                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3214
3215                 if (j == dev->data->nb_rx_queues)
3216                         j = 0;
3217                 reta = (reta << 8) | j;
3218                 if ((i & 3) == 3)
3219                         IXGBE_WRITE_REG(hw, reta_reg,
3220                                         rte_bswap32(reta));
3221         }
3222
3223         /*
3224          * Configure the RSS key and the RSS protocols used to compute
3225          * the RSS hash of input packets.
3226          */
3227         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3228         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3229                 ixgbe_rss_disable(dev);
3230                 return;
3231         }
3232         if (rss_conf.rss_key == NULL)
3233                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3234         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3235 }
3236
3237 #define NUM_VFTA_REGISTERS 128
3238 #define NIC_RX_BUFFER_SIZE 0x200
3239 #define X550_RX_BUFFER_SIZE 0x180
3240
3241 static void
3242 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3243 {
3244         struct rte_eth_vmdq_dcb_conf *cfg;
3245         struct ixgbe_hw *hw;
3246         enum rte_eth_nb_pools num_pools;
3247         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3248         uint16_t pbsize;
3249         uint8_t nb_tcs; /* number of traffic classes */
3250         int i;
3251
3252         PMD_INIT_FUNC_TRACE();
3253         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3254         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3255         num_pools = cfg->nb_queue_pools;
3256         /* Check we have a valid number of pools */
3257         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3258                 ixgbe_rss_disable(dev);
3259                 return;
3260         }
3261         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3262         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3263
3264         /*
3265          * RXPBSIZE
3266          * split rx buffer up into sections, each for 1 traffic class
3267          */
3268         switch (hw->mac.type) {
3269         case ixgbe_mac_X550:
3270         case ixgbe_mac_X550EM_x:
3271         case ixgbe_mac_X550EM_a:
3272                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3273                 break;
3274         default:
3275                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3276                 break;
3277         }
3278         for (i = 0; i < nb_tcs; i++) {
3279                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3280
3281                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3282                 /* clear 10 bits. */
3283                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3284                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3285         }
3286         /* zero alloc all unused TCs */
3287         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3288                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3289
3290                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3291                 /* clear 10 bits. */
3292                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3293         }
3294
3295         /* MRQC: enable vmdq and dcb */
3296         mrqc = (num_pools == ETH_16_POOLS) ?
3297                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3298         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3299
3300         /* PFVTCTL: turn on virtualisation and set the default pool */
3301         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3302         if (cfg->enable_default_pool) {
3303                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3304         } else {
3305                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3306         }
3307
3308         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3309
3310         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3311         queue_mapping = 0;
3312         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3313                 /*
3314                  * mapping is done with 3 bits per priority,
3315                  * so shift by i*3 each time
3316                  */
3317                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3318
3319         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3320
3321         /* RTRPCS: DCB related */
3322         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3323
3324         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3325         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3326         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3327         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3328
3329         /* VFTA - enable all vlan filters */
3330         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3331                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3332         }
3333
3334         /* VFRE: pool enabling for receive - 16 or 32 */
3335         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3336                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3337
3338         /*
3339          * MPSAR - allow pools to read specific mac addresses
3340          * In this case, all pools should be able to read from mac addr 0
3341          */
3342         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3343         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3344
3345         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3346         for (i = 0; i < cfg->nb_pool_maps; i++) {
3347                 /* set vlan id in VF register and set the valid bit */
3348                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3349                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3350                 /*
3351                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3352                  * pools, we only need to use the first half of the register
3353                  * i.e. bits 0-31
3354                  */
3355                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3356         }
3357 }
3358
3359 /**
3360  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3361  * @dev: pointer to eth_dev structure
3362  * @dcb_config: pointer to ixgbe_dcb_config structure
3363  */
3364 static void
3365 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3366                        struct ixgbe_dcb_config *dcb_config)
3367 {
3368         uint32_t reg;
3369         uint32_t q;
3370         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3371
3372         PMD_INIT_FUNC_TRACE();
3373         if (hw->mac.type != ixgbe_mac_82598EB) {
3374                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3375                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3376                 reg |= IXGBE_RTTDCS_ARBDIS;
3377                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3378
3379                 /* Enable DCB for Tx with 8 TCs */
3380                 if (dcb_config->num_tcs.pg_tcs == 8) {
3381                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3382                 } else {
3383                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3384                 }
3385                 if (dcb_config->vt_mode)
3386                         reg |= IXGBE_MTQC_VT_ENA;
3387                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3388
3389                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3390                         /* Disable drop for all queues in VMDQ mode*/
3391                         for (q = 0; q < 128; q++)
3392                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3393                                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3394                 } else {
3395                         /* Enable drop for all queues in SRIOV mode */
3396                         for (q = 0; q < 128; q++)
3397                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3398                                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT) | IXGBE_QDE_ENABLE));
3399                 }
3400
3401                 /* Enable the Tx desc arbiter */
3402                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3403                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3404                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3405
3406                 /* Enable Security TX Buffer IFG for DCB */
3407                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3408                 reg |= IXGBE_SECTX_DCB;
3409                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3410         }
3411 }
3412
3413 /**
3414  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3415  * @dev: pointer to rte_eth_dev structure
3416  * @dcb_config: pointer to ixgbe_dcb_config structure
3417  */
3418 static void
3419 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3420                         struct ixgbe_dcb_config *dcb_config)
3421 {
3422         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3423                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3424         struct ixgbe_hw *hw =
3425                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3426
3427         PMD_INIT_FUNC_TRACE();
3428         if (hw->mac.type != ixgbe_mac_82598EB)
3429                 /*PF VF Transmit Enable*/
3430                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3431                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3432
3433         /*Configure general DCB TX parameters*/
3434         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3435 }
3436
3437 static void
3438 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3439                         struct ixgbe_dcb_config *dcb_config)
3440 {
3441         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3442                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3443         struct ixgbe_dcb_tc_config *tc;
3444         uint8_t i, j;
3445
3446         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3447         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3448                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3449                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3450         } else {
3451                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3452                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3453         }
3454         /* User Priority to Traffic Class mapping */
3455         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3456                 j = vmdq_rx_conf->dcb_tc[i];
3457                 tc = &dcb_config->tc_config[j];
3458                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3459                                                 (uint8_t)(1 << j);
3460         }
3461 }
3462
3463 static void
3464 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3465                         struct ixgbe_dcb_config *dcb_config)
3466 {
3467         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3468                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3469         struct ixgbe_dcb_tc_config *tc;
3470         uint8_t i, j;
3471
3472         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3473         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3474                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3475                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3476         } else {
3477                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3478                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3479         }
3480
3481         /* User Priority to Traffic Class mapping */
3482         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3483                 j = vmdq_tx_conf->dcb_tc[i];
3484                 tc = &dcb_config->tc_config[j];
3485                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3486                                                 (uint8_t)(1 << j);
3487         }
3488 }
3489
3490 static void
3491 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3492                 struct ixgbe_dcb_config *dcb_config)
3493 {
3494         struct rte_eth_dcb_rx_conf *rx_conf =
3495                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3496         struct ixgbe_dcb_tc_config *tc;
3497         uint8_t i, j;
3498
3499         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3500         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3501
3502         /* User Priority to Traffic Class mapping */
3503         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3504                 j = rx_conf->dcb_tc[i];
3505                 tc = &dcb_config->tc_config[j];
3506                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3507                                                 (uint8_t)(1 << j);
3508         }
3509 }
3510
3511 static void
3512 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3513                 struct ixgbe_dcb_config *dcb_config)
3514 {
3515         struct rte_eth_dcb_tx_conf *tx_conf =
3516                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3517         struct ixgbe_dcb_tc_config *tc;
3518         uint8_t i, j;
3519
3520         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3521         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3522
3523         /* User Priority to Traffic Class mapping */
3524         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3525                 j = tx_conf->dcb_tc[i];
3526                 tc = &dcb_config->tc_config[j];
3527                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3528                                                 (uint8_t)(1 << j);
3529         }
3530 }
3531
3532 /**
3533  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3534  * @hw: pointer to hardware structure
3535  * @dcb_config: pointer to ixgbe_dcb_config structure
3536  */
3537 static void
3538 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3539                struct ixgbe_dcb_config *dcb_config)
3540 {
3541         uint32_t reg;
3542         uint32_t vlanctrl;
3543         uint8_t i;
3544
3545         PMD_INIT_FUNC_TRACE();
3546         /*
3547          * Disable the arbiter before changing parameters
3548          * (always enable recycle mode; WSP)
3549          */
3550         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3551         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3552
3553         if (hw->mac.type != ixgbe_mac_82598EB) {
3554                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3555                 if (dcb_config->num_tcs.pg_tcs == 4) {
3556                         if (dcb_config->vt_mode)
3557                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3558                                         IXGBE_MRQC_VMDQRT4TCEN;
3559                         else {
3560                                 /* no matter the mode is DCB or DCB_RSS, just
3561                                  * set the MRQE to RSSXTCEN. RSS is controlled
3562                                  * by RSS_FIELD
3563                                  */
3564                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3565                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3566                                         IXGBE_MRQC_RTRSS4TCEN;
3567                         }
3568                 }
3569                 if (dcb_config->num_tcs.pg_tcs == 8) {
3570                         if (dcb_config->vt_mode)
3571                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3572                                         IXGBE_MRQC_VMDQRT8TCEN;
3573                         else {
3574                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3575                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3576                                         IXGBE_MRQC_RTRSS8TCEN;
3577                         }
3578                 }
3579
3580                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3581         }
3582
3583         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3584         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3585         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3586         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3587
3588         /* VFTA - enable all vlan filters */
3589         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3590                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3591         }
3592
3593         /*
3594          * Configure Rx packet plane (recycle mode; WSP) and
3595          * enable arbiter
3596          */
3597         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3598         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3599 }
3600
3601 static void
3602 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3603                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3604 {
3605         switch (hw->mac.type) {
3606         case ixgbe_mac_82598EB:
3607                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3608                 break;
3609         case ixgbe_mac_82599EB:
3610         case ixgbe_mac_X540:
3611         case ixgbe_mac_X550:
3612         case ixgbe_mac_X550EM_x:
3613         case ixgbe_mac_X550EM_a:
3614                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3615                                                   tsa, map);
3616                 break;
3617         default:
3618                 break;
3619         }
3620 }
3621
3622 static void
3623 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3624                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3625 {
3626         switch (hw->mac.type) {
3627         case ixgbe_mac_82598EB:
3628                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3629                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3630                 break;
3631         case ixgbe_mac_82599EB:
3632         case ixgbe_mac_X540:
3633         case ixgbe_mac_X550:
3634         case ixgbe_mac_X550EM_x:
3635         case ixgbe_mac_X550EM_a:
3636                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3637                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3638                 break;
3639         default:
3640                 break;
3641         }
3642 }
3643
3644 #define DCB_RX_CONFIG  1
3645 #define DCB_TX_CONFIG  1
3646 #define DCB_TX_PB      1024
3647 /**
3648  * ixgbe_dcb_hw_configure - Enable DCB and configure
3649  * general DCB in VT mode and non-VT mode parameters
3650  * @dev: pointer to rte_eth_dev structure
3651  * @dcb_config: pointer to ixgbe_dcb_config structure
3652  */
3653 static int
3654 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3655                         struct ixgbe_dcb_config *dcb_config)
3656 {
3657         int     ret = 0;
3658         uint8_t i, pfc_en, nb_tcs;
3659         uint16_t pbsize, rx_buffer_size;
3660         uint8_t config_dcb_rx = 0;
3661         uint8_t config_dcb_tx = 0;
3662         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3663         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3664         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3665         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3666         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3667         struct ixgbe_dcb_tc_config *tc;
3668         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3669         struct ixgbe_hw *hw =
3670                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3671
3672         switch (dev->data->dev_conf.rxmode.mq_mode) {
3673         case ETH_MQ_RX_VMDQ_DCB:
3674                 dcb_config->vt_mode = true;
3675                 if (hw->mac.type != ixgbe_mac_82598EB) {
3676                         config_dcb_rx = DCB_RX_CONFIG;
3677                         /*
3678                          *get dcb and VT rx configuration parameters
3679                          *from rte_eth_conf
3680                          */
3681                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3682                         /*Configure general VMDQ and DCB RX parameters*/
3683                         ixgbe_vmdq_dcb_configure(dev);
3684                 }
3685                 break;
3686         case ETH_MQ_RX_DCB:
3687         case ETH_MQ_RX_DCB_RSS:
3688                 dcb_config->vt_mode = false;
3689                 config_dcb_rx = DCB_RX_CONFIG;
3690                 /* Get dcb TX configuration parameters from rte_eth_conf */
3691                 ixgbe_dcb_rx_config(dev, dcb_config);
3692                 /*Configure general DCB RX parameters*/
3693                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3694                 break;
3695         default:
3696                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3697                 break;
3698         }
3699         switch (dev->data->dev_conf.txmode.mq_mode) {
3700         case ETH_MQ_TX_VMDQ_DCB:
3701                 dcb_config->vt_mode = true;
3702                 config_dcb_tx = DCB_TX_CONFIG;
3703                 /* get DCB and VT TX configuration parameters
3704                  * from rte_eth_conf
3705                  */
3706                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3707                 /*Configure general VMDQ and DCB TX parameters*/
3708                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3709                 break;
3710
3711         case ETH_MQ_TX_DCB:
3712                 dcb_config->vt_mode = false;
3713                 config_dcb_tx = DCB_TX_CONFIG;
3714                 /*get DCB TX configuration parameters from rte_eth_conf*/
3715                 ixgbe_dcb_tx_config(dev, dcb_config);
3716                 /*Configure general DCB TX parameters*/
3717                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3718                 break;
3719         default:
3720                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3721                 break;
3722         }
3723
3724         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3725         /* Unpack map */
3726         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3727         if (nb_tcs == ETH_4_TCS) {
3728                 /* Avoid un-configured priority mapping to TC0 */
3729                 uint8_t j = 4;
3730                 uint8_t mask = 0xFF;
3731
3732                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3733                         mask = (uint8_t)(mask & (~(1 << map[i])));
3734                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3735                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3736                                 map[j++] = i;
3737                         mask >>= 1;
3738                 }
3739                 /* Re-configure 4 TCs BW */
3740                 for (i = 0; i < nb_tcs; i++) {
3741                         tc = &dcb_config->tc_config[i];
3742                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3743                                                 (uint8_t)(100 / nb_tcs);
3744                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3745                                                 (uint8_t)(100 / nb_tcs);
3746                 }
3747                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3748                         tc = &dcb_config->tc_config[i];
3749                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3750                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3751                 }
3752         }
3753
3754         switch (hw->mac.type) {
3755         case ixgbe_mac_X550:
3756         case ixgbe_mac_X550EM_x:
3757         case ixgbe_mac_X550EM_a:
3758                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3759                 break;
3760         default:
3761                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3762                 break;
3763         }
3764
3765         if (config_dcb_rx) {
3766                 /* Set RX buffer size */
3767                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3768                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3769
3770                 for (i = 0; i < nb_tcs; i++) {
3771                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3772                 }
3773                 /* zero alloc all unused TCs */
3774                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3775                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3776                 }
3777         }
3778         if (config_dcb_tx) {
3779                 /* Only support an equally distributed
3780                  *  Tx packet buffer strategy.
3781                  */
3782                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3783                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3784
3785                 for (i = 0; i < nb_tcs; i++) {
3786                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3787                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3788                 }
3789                 /* Clear unused TCs, if any, to zero buffer size*/
3790                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3791                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3792                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3793                 }
3794         }
3795
3796         /*Calculates traffic class credits*/
3797         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3798                                 IXGBE_DCB_TX_CONFIG);
3799         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3800                                 IXGBE_DCB_RX_CONFIG);
3801
3802         if (config_dcb_rx) {
3803                 /* Unpack CEE standard containers */
3804                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3805                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3806                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3807                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3808                 /* Configure PG(ETS) RX */
3809                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3810         }
3811
3812         if (config_dcb_tx) {
3813                 /* Unpack CEE standard containers */
3814                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3815                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3816                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3817                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3818                 /* Configure PG(ETS) TX */
3819                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3820         }
3821
3822         /*Configure queue statistics registers*/
3823         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3824
3825         /* Check if the PFC is supported */
3826         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3827                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3828                 for (i = 0; i < nb_tcs; i++) {
3829                         /*
3830                         * If the TC count is 8,and the default high_water is 48,
3831                         * the low_water is 16 as default.
3832                         */
3833                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3834                         hw->fc.low_water[i] = pbsize / 4;
3835                         /* Enable pfc for this TC */
3836                         tc = &dcb_config->tc_config[i];
3837                         tc->pfc = ixgbe_dcb_pfc_enabled;
3838                 }
3839                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3840                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3841                         pfc_en &= 0x0F;
3842                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3843         }
3844
3845         return ret;
3846 }
3847
3848 /**
3849  * ixgbe_configure_dcb - Configure DCB  Hardware
3850  * @dev: pointer to rte_eth_dev
3851  */
3852 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3853 {
3854         struct ixgbe_dcb_config *dcb_cfg =
3855                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3856         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3857
3858         PMD_INIT_FUNC_TRACE();
3859
3860         /* check support mq_mode for DCB */
3861         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3862             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3863             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3864                 return;
3865
3866         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3867                 return;
3868
3869         /** Configure DCB hardware **/
3870         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3871 }
3872
3873 /*
3874  * VMDq only support for 10 GbE NIC.
3875  */
3876 static void
3877 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3878 {
3879         struct rte_eth_vmdq_rx_conf *cfg;
3880         struct ixgbe_hw *hw;
3881         enum rte_eth_nb_pools num_pools;
3882         uint32_t mrqc, vt_ctl, vlanctrl;
3883         uint32_t vmolr = 0;
3884         int i;
3885
3886         PMD_INIT_FUNC_TRACE();
3887         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3888         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3889         num_pools = cfg->nb_queue_pools;
3890
3891         ixgbe_rss_disable(dev);
3892
3893         /* MRQC: enable vmdq */
3894         mrqc = IXGBE_MRQC_VMDQEN;
3895         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3896
3897         /* PFVTCTL: turn on virtualisation and set the default pool */
3898         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3899         if (cfg->enable_default_pool)
3900                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3901         else
3902                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3903
3904         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3905
3906         for (i = 0; i < (int)num_pools; i++) {
3907                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3908                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3909         }
3910
3911         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3912         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3913         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3914         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3915
3916         /* VFTA - enable all vlan filters */
3917         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3918                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3919
3920         /* VFRE: pool enabling for receive - 64 */
3921         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3922         if (num_pools == ETH_64_POOLS)
3923                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3924
3925         /*
3926          * MPSAR - allow pools to read specific mac addresses
3927          * In this case, all pools should be able to read from mac addr 0
3928          */
3929         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3930         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3931
3932         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3933         for (i = 0; i < cfg->nb_pool_maps; i++) {
3934                 /* set vlan id in VF register and set the valid bit */
3935                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3936                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3937                 /*
3938                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3939                  * pools, we only need to use the first half of the register
3940                  * i.e. bits 0-31
3941                  */
3942                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3943                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
3944                                         (cfg->pool_map[i].pools & UINT32_MAX));
3945                 else
3946                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
3947                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3948
3949         }
3950
3951         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3952         if (cfg->enable_loop_back) {
3953                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3954                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3955                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3956         }
3957
3958         IXGBE_WRITE_FLUSH(hw);
3959 }
3960
3961 /*
3962  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3963  * @hw: pointer to hardware structure
3964  */
3965 static void
3966 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3967 {
3968         uint32_t reg;
3969         uint32_t q;
3970
3971         PMD_INIT_FUNC_TRACE();
3972         /*PF VF Transmit Enable*/
3973         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3974         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3975
3976         /* Disable the Tx desc arbiter so that MTQC can be changed */
3977         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3978         reg |= IXGBE_RTTDCS_ARBDIS;
3979         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3980
3981         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3982         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3983
3984         /* Disable drop for all queues */
3985         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3986                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3987                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3988
3989         /* Enable the Tx desc arbiter */
3990         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3991         reg &= ~IXGBE_RTTDCS_ARBDIS;
3992         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3993
3994         IXGBE_WRITE_FLUSH(hw);
3995 }
3996
3997 static int __attribute__((cold))
3998 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3999 {
4000         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4001         uint64_t dma_addr;
4002         unsigned int i;
4003
4004         /* Initialize software ring entries */
4005         for (i = 0; i < rxq->nb_rx_desc; i++) {
4006                 volatile union ixgbe_adv_rx_desc *rxd;
4007                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4008
4009                 if (mbuf == NULL) {
4010                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4011                                      (unsigned) rxq->queue_id);
4012                         return -ENOMEM;
4013                 }
4014
4015                 rte_mbuf_refcnt_set(mbuf, 1);
4016                 mbuf->next = NULL;
4017                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4018                 mbuf->nb_segs = 1;
4019                 mbuf->port = rxq->port_id;
4020
4021                 dma_addr =
4022                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
4023                 rxd = &rxq->rx_ring[i];
4024                 rxd->read.hdr_addr = 0;
4025                 rxd->read.pkt_addr = dma_addr;
4026                 rxe[i].mbuf = mbuf;
4027         }
4028
4029         return 0;
4030 }
4031
4032 static int
4033 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4034 {
4035         struct ixgbe_hw *hw;
4036         uint32_t mrqc;
4037
4038         ixgbe_rss_configure(dev);
4039
4040         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4041
4042         /* MRQC: enable VF RSS */
4043         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4044         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4045         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4046         case ETH_64_POOLS:
4047                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4048                 break;
4049
4050         case ETH_32_POOLS:
4051                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4052                 break;
4053
4054         default:
4055                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4056                 return -EINVAL;
4057         }
4058
4059         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4060
4061         return 0;
4062 }
4063
4064 static int
4065 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4066 {
4067         struct ixgbe_hw *hw =
4068                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4069
4070         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4071         case ETH_64_POOLS:
4072                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4073                         IXGBE_MRQC_VMDQEN);
4074                 break;
4075
4076         case ETH_32_POOLS:
4077                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4078                         IXGBE_MRQC_VMDQRT4TCEN);
4079                 break;
4080
4081         case ETH_16_POOLS:
4082                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4083                         IXGBE_MRQC_VMDQRT8TCEN);
4084                 break;
4085         default:
4086                 PMD_INIT_LOG(ERR,
4087                         "invalid pool number in IOV mode");
4088                 break;
4089         }
4090         return 0;
4091 }
4092
4093 static int
4094 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4095 {
4096         struct ixgbe_hw *hw =
4097                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4098
4099         if (hw->mac.type == ixgbe_mac_82598EB)
4100                 return 0;
4101
4102         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4103                 /*
4104                  * SRIOV inactive scheme
4105                  * any DCB/RSS w/o VMDq multi-queue setting
4106                  */
4107                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4108                 case ETH_MQ_RX_RSS:
4109                 case ETH_MQ_RX_DCB_RSS:
4110                 case ETH_MQ_RX_VMDQ_RSS:
4111                         ixgbe_rss_configure(dev);
4112                         break;
4113
4114                 case ETH_MQ_RX_VMDQ_DCB:
4115                         ixgbe_vmdq_dcb_configure(dev);
4116                         break;
4117
4118                 case ETH_MQ_RX_VMDQ_ONLY:
4119                         ixgbe_vmdq_rx_hw_configure(dev);
4120                         break;
4121
4122                 case ETH_MQ_RX_NONE:
4123                 default:
4124                         /* if mq_mode is none, disable rss mode.*/
4125                         ixgbe_rss_disable(dev);
4126                         break;
4127                 }
4128         } else {
4129                 /*
4130                  * SRIOV active scheme
4131                  * Support RSS together with VMDq & SRIOV
4132                  */
4133                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4134                 case ETH_MQ_RX_RSS:
4135                 case ETH_MQ_RX_VMDQ_RSS:
4136                         ixgbe_config_vf_rss(dev);
4137                         break;
4138                 case ETH_MQ_RX_VMDQ_DCB:
4139                         ixgbe_vmdq_dcb_configure(dev);
4140                         break;
4141                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
4142                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4143                         PMD_INIT_LOG(ERR,
4144                                 "Could not support DCB/RSS with VMDq & SRIOV");
4145                         return -1;
4146                 default:
4147                         ixgbe_config_vf_default(dev);
4148                         break;
4149                 }
4150         }
4151
4152         return 0;
4153 }
4154
4155 static int
4156 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4157 {
4158         struct ixgbe_hw *hw =
4159                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4160         uint32_t mtqc;
4161         uint32_t rttdcs;
4162
4163         if (hw->mac.type == ixgbe_mac_82598EB)
4164                 return 0;
4165
4166         /* disable arbiter before setting MTQC */
4167         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4168         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4169         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4170
4171         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4172                 /*
4173                  * SRIOV inactive scheme
4174                  * any DCB w/o VMDq multi-queue setting
4175                  */
4176                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4177                         ixgbe_vmdq_tx_hw_configure(hw);
4178                 else {
4179                         mtqc = IXGBE_MTQC_64Q_1PB;
4180                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4181                 }
4182         } else {
4183                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4184
4185                 /*
4186                  * SRIOV active scheme
4187                  * FIXME if support DCB together with VMDq & SRIOV
4188                  */
4189                 case ETH_64_POOLS:
4190                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4191                         break;
4192                 case ETH_32_POOLS:
4193                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4194                         break;
4195                 case ETH_16_POOLS:
4196                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4197                                 IXGBE_MTQC_8TC_8TQ;
4198                         break;
4199                 default:
4200                         mtqc = IXGBE_MTQC_64Q_1PB;
4201                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4202                 }
4203                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4204         }
4205
4206         /* re-enable arbiter */
4207         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4208         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4209
4210         return 0;
4211 }
4212
4213 /**
4214  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4215  *
4216  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4217  * spec rev. 3.0 chapter 8.2.3.8.13.
4218  *
4219  * @pool Memory pool of the Rx queue
4220  */
4221 static inline uint32_t
4222 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4223 {
4224         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4225
4226         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4227         uint16_t maxdesc =
4228                 IPV4_MAX_PKT_LEN /
4229                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4230
4231         if (maxdesc >= 16)
4232                 return IXGBE_RSCCTL_MAXDESC_16;
4233         else if (maxdesc >= 8)
4234                 return IXGBE_RSCCTL_MAXDESC_8;
4235         else if (maxdesc >= 4)
4236                 return IXGBE_RSCCTL_MAXDESC_4;
4237         else
4238                 return IXGBE_RSCCTL_MAXDESC_1;
4239 }
4240
4241 /**
4242  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4243  * interrupt
4244  *
4245  * (Taken from FreeBSD tree)
4246  * (yes this is all very magic and confusing :)
4247  *
4248  * @dev port handle
4249  * @entry the register array entry
4250  * @vector the MSIX vector for this queue
4251  * @type RX/TX/MISC
4252  */
4253 static void
4254 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4255 {
4256         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4257         u32 ivar, index;
4258
4259         vector |= IXGBE_IVAR_ALLOC_VAL;
4260
4261         switch (hw->mac.type) {
4262
4263         case ixgbe_mac_82598EB:
4264                 if (type == -1)
4265                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4266                 else
4267                         entry += (type * 64);
4268                 index = (entry >> 2) & 0x1F;
4269                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4270                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4271                 ivar |= (vector << (8 * (entry & 0x3)));
4272                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4273                 break;
4274
4275         case ixgbe_mac_82599EB:
4276         case ixgbe_mac_X540:
4277                 if (type == -1) { /* MISC IVAR */
4278                         index = (entry & 1) * 8;
4279                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4280                         ivar &= ~(0xFF << index);
4281                         ivar |= (vector << index);
4282                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4283                 } else {        /* RX/TX IVARS */
4284                         index = (16 * (entry & 1)) + (8 * type);
4285                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4286                         ivar &= ~(0xFF << index);
4287                         ivar |= (vector << index);
4288                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4289                 }
4290
4291                 break;
4292
4293         default:
4294                 break;
4295         }
4296 }
4297
4298 void __attribute__((cold))
4299 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4300 {
4301         uint16_t i, rx_using_sse;
4302         struct ixgbe_adapter *adapter =
4303                 (struct ixgbe_adapter *)dev->data->dev_private;
4304
4305         /*
4306          * In order to allow Vector Rx there are a few configuration
4307          * conditions to be met and Rx Bulk Allocation should be allowed.
4308          */
4309         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4310             !adapter->rx_bulk_alloc_allowed) {
4311                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4312                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4313                                     "not enabled",
4314                              dev->data->port_id);
4315
4316                 adapter->rx_vec_allowed = false;
4317         }
4318
4319         /*
4320          * Initialize the appropriate LRO callback.
4321          *
4322          * If all queues satisfy the bulk allocation preconditions
4323          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4324          * Otherwise use a single allocation version.
4325          */
4326         if (dev->data->lro) {
4327                 if (adapter->rx_bulk_alloc_allowed) {
4328                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4329                                            "allocation version");
4330                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4331                 } else {
4332                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4333                                            "allocation version");
4334                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4335                 }
4336         } else if (dev->data->scattered_rx) {
4337                 /*
4338                  * Set the non-LRO scattered callback: there are Vector and
4339                  * single allocation versions.
4340                  */
4341                 if (adapter->rx_vec_allowed) {
4342                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4343                                             "callback (port=%d).",
4344                                      dev->data->port_id);
4345
4346                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4347                 } else if (adapter->rx_bulk_alloc_allowed) {
4348                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4349                                            "allocation callback (port=%d).",
4350                                      dev->data->port_id);
4351                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4352                 } else {
4353                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4354                                             "single allocation) "
4355                                             "Scattered Rx callback "
4356                                             "(port=%d).",
4357                                      dev->data->port_id);
4358
4359                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4360                 }
4361         /*
4362          * Below we set "simple" callbacks according to port/queues parameters.
4363          * If parameters allow we are going to choose between the following
4364          * callbacks:
4365          *    - Vector
4366          *    - Bulk Allocation
4367          *    - Single buffer allocation (the simplest one)
4368          */
4369         } else if (adapter->rx_vec_allowed) {
4370                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4371                                     "burst size no less than %d (port=%d).",
4372                              RTE_IXGBE_DESCS_PER_LOOP,
4373                              dev->data->port_id);
4374
4375                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4376         } else if (adapter->rx_bulk_alloc_allowed) {
4377                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4378                                     "satisfied. Rx Burst Bulk Alloc function "
4379                                     "will be used on port=%d.",
4380                              dev->data->port_id);
4381
4382                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4383         } else {
4384                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4385                                     "satisfied, or Scattered Rx is requested "
4386                                     "(port=%d).",
4387                              dev->data->port_id);
4388
4389                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4390         }
4391
4392         /* Propagate information about RX function choice through all queues. */
4393
4394         rx_using_sse =
4395                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4396                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4397
4398         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4399                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4400
4401                 rxq->rx_using_sse = rx_using_sse;
4402         }
4403 }
4404
4405 /**
4406  * ixgbe_set_rsc - configure RSC related port HW registers
4407  *
4408  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4409  * of 82599 Spec (x540 configuration is virtually the same).
4410  *
4411  * @dev port handle
4412  *
4413  * Returns 0 in case of success or a non-zero error code
4414  */
4415 static int
4416 ixgbe_set_rsc(struct rte_eth_dev *dev)
4417 {
4418         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4419         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4420         struct rte_eth_dev_info dev_info = { 0 };
4421         bool rsc_capable = false;
4422         uint16_t i;
4423         uint32_t rdrxctl;
4424
4425         /* Sanity check */
4426         dev->dev_ops->dev_infos_get(dev, &dev_info);
4427         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4428                 rsc_capable = true;
4429
4430         if (!rsc_capable && rx_conf->enable_lro) {
4431                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4432                                    "support it");
4433                 return -EINVAL;
4434         }
4435
4436         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4437
4438         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4439                 /*
4440                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4441                  * 3.0 RSC configuration requires HW CRC stripping being
4442                  * enabled. If user requested both HW CRC stripping off
4443                  * and RSC on - return an error.
4444                  */
4445                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4446                                     "is disabled");
4447                 return -EINVAL;
4448         }
4449
4450         /* RFCTL configuration  */
4451         if (rsc_capable) {
4452                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4453
4454                 if (rx_conf->enable_lro)
4455                         /*
4456                          * Since NFS packets coalescing is not supported - clear
4457                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4458                          * enabled.
4459                          */
4460                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4461                                    IXGBE_RFCTL_NFSR_DIS);
4462                 else
4463                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4464
4465                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4466         }
4467
4468         /* If LRO hasn't been requested - we are done here. */
4469         if (!rx_conf->enable_lro)
4470                 return 0;
4471
4472         /* Set RDRXCTL.RSCACKC bit */
4473         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4474         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4475         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4476
4477         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4478         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4479                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4480                 uint32_t srrctl =
4481                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4482                 uint32_t rscctl =
4483                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4484                 uint32_t psrtype =
4485                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4486                 uint32_t eitr =
4487                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4488
4489                 /*
4490                  * ixgbe PMD doesn't support header-split at the moment.
4491                  *
4492                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4493                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4494                  * should be configured even if header split is not
4495                  * enabled. We will configure it 128 bytes following the
4496                  * recommendation in the spec.
4497                  */
4498                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4499                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4500                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4501
4502                 /*
4503                  * TODO: Consider setting the Receive Descriptor Minimum
4504                  * Threshold Size for an RSC case. This is not an obviously
4505                  * beneficiary option but the one worth considering...
4506                  */
4507
4508                 rscctl |= IXGBE_RSCCTL_RSCEN;
4509                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4510                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4511
4512                 /*
4513                  * RSC: Set ITR interval corresponding to 2K ints/s.
4514                  *
4515                  * Full-sized RSC aggregations for a 10Gb/s link will
4516                  * arrive at about 20K aggregation/s rate.
4517                  *
4518                  * 2K inst/s rate will make only 10% of the
4519                  * aggregations to be closed due to the interrupt timer
4520                  * expiration for a streaming at wire-speed case.
4521                  *
4522                  * For a sparse streaming case this setting will yield
4523                  * at most 500us latency for a single RSC aggregation.
4524                  */
4525                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4526                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4527
4528                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4529                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4530                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4531                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4532
4533                 /*
4534                  * RSC requires the mapping of the queue to the
4535                  * interrupt vector.
4536                  */
4537                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4538         }
4539
4540         dev->data->lro = 1;
4541
4542         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4543
4544         return 0;
4545 }
4546
4547 /*
4548  * Initializes Receive Unit.
4549  */
4550 int __attribute__((cold))
4551 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4552 {
4553         struct ixgbe_hw     *hw;
4554         struct ixgbe_rx_queue *rxq;
4555         uint64_t bus_addr;
4556         uint32_t rxctrl;
4557         uint32_t fctrl;
4558         uint32_t hlreg0;
4559         uint32_t maxfrs;
4560         uint32_t srrctl;
4561         uint32_t rdrxctl;
4562         uint32_t rxcsum;
4563         uint16_t buf_size;
4564         uint16_t i;
4565         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4566         int rc;
4567
4568         PMD_INIT_FUNC_TRACE();
4569         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4570
4571         /*
4572          * Make sure receives are disabled while setting
4573          * up the RX context (registers, descriptor rings, etc.).
4574          */
4575         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4576         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4577
4578         /* Enable receipt of broadcasted frames */
4579         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4580         fctrl |= IXGBE_FCTRL_BAM;
4581         fctrl |= IXGBE_FCTRL_DPF;
4582         fctrl |= IXGBE_FCTRL_PMCF;
4583         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4584
4585         /*
4586          * Configure CRC stripping, if any.
4587          */
4588         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4589         if (rx_conf->hw_strip_crc)
4590                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4591         else
4592                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4593
4594         /*
4595          * Configure jumbo frame support, if any.
4596          */
4597         if (rx_conf->jumbo_frame == 1) {
4598                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4599                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4600                 maxfrs &= 0x0000FFFF;
4601                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4602                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4603         } else
4604                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4605
4606         /*
4607          * If loopback mode is configured for 82599, set LPBK bit.
4608          */
4609         if (hw->mac.type == ixgbe_mac_82599EB &&
4610                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4611                 hlreg0 |= IXGBE_HLREG0_LPBK;
4612         else
4613                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4614
4615         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4616
4617         /* Setup RX queues */
4618         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4619                 rxq = dev->data->rx_queues[i];
4620
4621                 /*
4622                  * Reset crc_len in case it was changed after queue setup by a
4623                  * call to configure.
4624                  */
4625                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4626
4627                 /* Setup the Base and Length of the Rx Descriptor Rings */
4628                 bus_addr = rxq->rx_ring_phys_addr;
4629                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4630                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4631                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4632                                 (uint32_t)(bus_addr >> 32));
4633                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4634                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4635                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4636                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4637
4638                 /* Configure the SRRCTL register */
4639 #ifdef RTE_HEADER_SPLIT_ENABLE
4640                 /*
4641                  * Configure Header Split
4642                  */
4643                 if (rx_conf->header_split) {
4644                         if (hw->mac.type == ixgbe_mac_82599EB) {
4645                                 /* Must setup the PSRTYPE register */
4646                                 uint32_t psrtype;
4647
4648                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4649                                         IXGBE_PSRTYPE_UDPHDR   |
4650                                         IXGBE_PSRTYPE_IPV4HDR  |
4651                                         IXGBE_PSRTYPE_IPV6HDR;
4652                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4653                         }
4654                         srrctl = ((rx_conf->split_hdr_size <<
4655                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4656                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4657                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4658                 } else
4659 #endif
4660                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4661
4662                 /* Set if packets are dropped when no descriptors available */
4663                 if (rxq->drop_en)
4664                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4665
4666                 /*
4667                  * Configure the RX buffer size in the BSIZEPACKET field of
4668                  * the SRRCTL register of the queue.
4669                  * The value is in 1 KB resolution. Valid values can be from
4670                  * 1 KB to 16 KB.
4671                  */
4672                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4673                         RTE_PKTMBUF_HEADROOM);
4674                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4675                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4676
4677                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4678
4679                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4680                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4681
4682                 /* It adds dual VLAN length for supporting dual VLAN */
4683                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4684                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4685                         dev->data->scattered_rx = 1;
4686         }
4687
4688         if (rx_conf->enable_scatter)
4689                 dev->data->scattered_rx = 1;
4690
4691         /*
4692          * Device configured with multiple RX queues.
4693          */
4694         ixgbe_dev_mq_rx_configure(dev);
4695
4696         /*
4697          * Setup the Checksum Register.
4698          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4699          * Enable IP/L4 checkum computation by hardware if requested to do so.
4700          */
4701         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4702         rxcsum |= IXGBE_RXCSUM_PCSD;
4703         if (rx_conf->hw_ip_checksum)
4704                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4705         else
4706                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4707
4708         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4709
4710         if (hw->mac.type == ixgbe_mac_82599EB ||
4711             hw->mac.type == ixgbe_mac_X540) {
4712                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4713                 if (rx_conf->hw_strip_crc)
4714                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4715                 else
4716                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4717                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4718                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4719         }
4720
4721         rc = ixgbe_set_rsc(dev);
4722         if (rc)
4723                 return rc;
4724
4725         ixgbe_set_rx_function(dev);
4726
4727         return 0;
4728 }
4729
4730 /*
4731  * Initializes Transmit Unit.
4732  */
4733 void __attribute__((cold))
4734 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4735 {
4736         struct ixgbe_hw     *hw;
4737         struct ixgbe_tx_queue *txq;
4738         uint64_t bus_addr;
4739         uint32_t hlreg0;
4740         uint32_t txctrl;
4741         uint16_t i;
4742
4743         PMD_INIT_FUNC_TRACE();
4744         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4745
4746         /* Enable TX CRC (checksum offload requirement) and hw padding
4747          * (TSO requirement)
4748          */
4749         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4750         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4751         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4752
4753         /* Setup the Base and Length of the Tx Descriptor Rings */
4754         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4755                 txq = dev->data->tx_queues[i];
4756
4757                 bus_addr = txq->tx_ring_phys_addr;
4758                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4759                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4760                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4761                                 (uint32_t)(bus_addr >> 32));
4762                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4763                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4764                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4765                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4766                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4767
4768                 /*
4769                  * Disable Tx Head Writeback RO bit, since this hoses
4770                  * bookkeeping if things aren't delivered in order.
4771                  */
4772                 switch (hw->mac.type) {
4773                 case ixgbe_mac_82598EB:
4774                         txctrl = IXGBE_READ_REG(hw,
4775                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4776                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4777                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4778                                         txctrl);
4779                         break;
4780
4781                 case ixgbe_mac_82599EB:
4782                 case ixgbe_mac_X540:
4783                 case ixgbe_mac_X550:
4784                 case ixgbe_mac_X550EM_x:
4785                 case ixgbe_mac_X550EM_a:
4786                 default:
4787                         txctrl = IXGBE_READ_REG(hw,
4788                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4789                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4790                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4791                                         txctrl);
4792                         break;
4793                 }
4794         }
4795
4796         /* Device configured with multiple TX queues. */
4797         ixgbe_dev_mq_tx_configure(dev);
4798 }
4799
4800 /*
4801  * Set up link for 82599 loopback mode Tx->Rx.
4802  */
4803 static inline void __attribute__((cold))
4804 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4805 {
4806         PMD_INIT_FUNC_TRACE();
4807
4808         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4809                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4810                                 IXGBE_SUCCESS) {
4811                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4812                         /* ignore error */
4813                         return;
4814                 }
4815         }
4816
4817         /* Restart link */
4818         IXGBE_WRITE_REG(hw,
4819                         IXGBE_AUTOC,
4820                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4821         ixgbe_reset_pipeline_82599(hw);
4822
4823         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4824         msec_delay(50);
4825 }
4826
4827
4828 /*
4829  * Start Transmit and Receive Units.
4830  */
4831 int __attribute__((cold))
4832 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4833 {
4834         struct ixgbe_hw     *hw;
4835         struct ixgbe_tx_queue *txq;
4836         struct ixgbe_rx_queue *rxq;
4837         uint32_t txdctl;
4838         uint32_t dmatxctl;
4839         uint32_t rxctrl;
4840         uint16_t i;
4841         int ret = 0;
4842
4843         PMD_INIT_FUNC_TRACE();
4844         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4845
4846         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4847                 txq = dev->data->tx_queues[i];
4848                 /* Setup Transmit Threshold Registers */
4849                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4850                 txdctl |= txq->pthresh & 0x7F;
4851                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4852                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4853                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4854         }
4855
4856         if (hw->mac.type != ixgbe_mac_82598EB) {
4857                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4858                 dmatxctl |= IXGBE_DMATXCTL_TE;
4859                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4860         }
4861
4862         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4863                 txq = dev->data->tx_queues[i];
4864                 if (!txq->tx_deferred_start) {
4865                         ret = ixgbe_dev_tx_queue_start(dev, i);
4866                         if (ret < 0)
4867                                 return ret;
4868                 }
4869         }
4870
4871         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4872                 rxq = dev->data->rx_queues[i];
4873                 if (!rxq->rx_deferred_start) {
4874                         ret = ixgbe_dev_rx_queue_start(dev, i);
4875                         if (ret < 0)
4876                                 return ret;
4877                 }
4878         }
4879
4880         /* Enable Receive engine */
4881         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4882         if (hw->mac.type == ixgbe_mac_82598EB)
4883                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4884         rxctrl |= IXGBE_RXCTRL_RXEN;
4885         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4886
4887         /* If loopback mode is enabled for 82599, set up the link accordingly */
4888         if (hw->mac.type == ixgbe_mac_82599EB &&
4889                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4890                 ixgbe_setup_loopback_link_82599(hw);
4891
4892         return 0;
4893 }
4894
4895 /*
4896  * Start Receive Units for specified queue.
4897  */
4898 int __attribute__((cold))
4899 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4900 {
4901         struct ixgbe_hw     *hw;
4902         struct ixgbe_rx_queue *rxq;
4903         uint32_t rxdctl;
4904         int poll_ms;
4905
4906         PMD_INIT_FUNC_TRACE();
4907         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4908
4909         if (rx_queue_id < dev->data->nb_rx_queues) {
4910                 rxq = dev->data->rx_queues[rx_queue_id];
4911
4912                 /* Allocate buffers for descriptor rings */
4913                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4914                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4915                                      rx_queue_id);
4916                         return -1;
4917                 }
4918                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4919                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4920                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4921
4922                 /* Wait until RX Enable ready */
4923                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4924                 do {
4925                         rte_delay_ms(1);
4926                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4927                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4928                 if (!poll_ms)
4929                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4930                                      rx_queue_id);
4931                 rte_wmb();
4932                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4933                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4934                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4935         } else
4936                 return -1;
4937
4938         return 0;
4939 }
4940
4941 /*
4942  * Stop Receive Units for specified queue.
4943  */
4944 int __attribute__((cold))
4945 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4946 {
4947         struct ixgbe_hw     *hw;
4948         struct ixgbe_adapter *adapter =
4949                 (struct ixgbe_adapter *)dev->data->dev_private;
4950         struct ixgbe_rx_queue *rxq;
4951         uint32_t rxdctl;
4952         int poll_ms;
4953
4954         PMD_INIT_FUNC_TRACE();
4955         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4956
4957         if (rx_queue_id < dev->data->nb_rx_queues) {
4958                 rxq = dev->data->rx_queues[rx_queue_id];
4959
4960                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4961                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4962                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4963
4964                 /* Wait until RX Enable bit clear */
4965                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4966                 do {
4967                         rte_delay_ms(1);
4968                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4969                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
4970                 if (!poll_ms)
4971                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4972                                      rx_queue_id);
4973
4974                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4975
4976                 ixgbe_rx_queue_release_mbufs(rxq);
4977                 ixgbe_reset_rx_queue(adapter, rxq);
4978                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4979         } else
4980                 return -1;
4981
4982         return 0;
4983 }
4984
4985
4986 /*
4987  * Start Transmit Units for specified queue.
4988  */
4989 int __attribute__((cold))
4990 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4991 {
4992         struct ixgbe_hw     *hw;
4993         struct ixgbe_tx_queue *txq;
4994         uint32_t txdctl;
4995         int poll_ms;
4996
4997         PMD_INIT_FUNC_TRACE();
4998         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4999
5000         if (tx_queue_id < dev->data->nb_tx_queues) {
5001                 txq = dev->data->tx_queues[tx_queue_id];
5002                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5003                 txdctl |= IXGBE_TXDCTL_ENABLE;
5004                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5005
5006                 /* Wait until TX Enable ready */
5007                 if (hw->mac.type == ixgbe_mac_82599EB) {
5008                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5009                         do {
5010                                 rte_delay_ms(1);
5011                                 txdctl = IXGBE_READ_REG(hw,
5012                                         IXGBE_TXDCTL(txq->reg_idx));
5013                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5014                         if (!poll_ms)
5015                                 PMD_INIT_LOG(ERR, "Could not enable "
5016                                              "Tx Queue %d", tx_queue_id);
5017                 }
5018                 rte_wmb();
5019                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5020                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5021                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5022         } else
5023                 return -1;
5024
5025         return 0;
5026 }
5027
5028 /*
5029  * Stop Transmit Units for specified queue.
5030  */
5031 int __attribute__((cold))
5032 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5033 {
5034         struct ixgbe_hw     *hw;
5035         struct ixgbe_tx_queue *txq;
5036         uint32_t txdctl;
5037         uint32_t txtdh, txtdt;
5038         int poll_ms;
5039
5040         PMD_INIT_FUNC_TRACE();
5041         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5042
5043         if (tx_queue_id >= dev->data->nb_tx_queues)
5044                 return -1;
5045
5046         txq = dev->data->tx_queues[tx_queue_id];
5047
5048         /* Wait until TX queue is empty */
5049         if (hw->mac.type == ixgbe_mac_82599EB) {
5050                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5051                 do {
5052                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5053                         txtdh = IXGBE_READ_REG(hw,
5054                                                IXGBE_TDH(txq->reg_idx));
5055                         txtdt = IXGBE_READ_REG(hw,
5056                                                IXGBE_TDT(txq->reg_idx));
5057                 } while (--poll_ms && (txtdh != txtdt));
5058                 if (!poll_ms)
5059                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5060                                      "when stopping.", tx_queue_id);
5061         }
5062
5063         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5064         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5065         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5066
5067         /* Wait until TX Enable bit clear */
5068         if (hw->mac.type == ixgbe_mac_82599EB) {
5069                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5070                 do {
5071                         rte_delay_ms(1);
5072                         txdctl = IXGBE_READ_REG(hw,
5073                                                 IXGBE_TXDCTL(txq->reg_idx));
5074                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5075                 if (!poll_ms)
5076                         PMD_INIT_LOG(ERR, "Could not disable "
5077                                      "Tx Queue %d", tx_queue_id);
5078         }
5079
5080         if (txq->ops != NULL) {
5081                 txq->ops->release_mbufs(txq);
5082                 txq->ops->reset(txq);
5083         }
5084         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5085
5086         return 0;
5087 }
5088
5089 void
5090 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5091         struct rte_eth_rxq_info *qinfo)
5092 {
5093         struct ixgbe_rx_queue *rxq;
5094
5095         rxq = dev->data->rx_queues[queue_id];
5096
5097         qinfo->mp = rxq->mb_pool;
5098         qinfo->scattered_rx = dev->data->scattered_rx;
5099         qinfo->nb_desc = rxq->nb_rx_desc;
5100
5101         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5102         qinfo->conf.rx_drop_en = rxq->drop_en;
5103         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5104 }
5105
5106 void
5107 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5108         struct rte_eth_txq_info *qinfo)
5109 {
5110         struct ixgbe_tx_queue *txq;
5111
5112         txq = dev->data->tx_queues[queue_id];
5113
5114         qinfo->nb_desc = txq->nb_tx_desc;
5115
5116         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5117         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5118         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5119
5120         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5121         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5122         qinfo->conf.txq_flags = txq->txq_flags;
5123         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5124 }
5125
5126 /*
5127  * [VF] Initializes Receive Unit.
5128  */
5129 int __attribute__((cold))
5130 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5131 {
5132         struct ixgbe_hw     *hw;
5133         struct ixgbe_rx_queue *rxq;
5134         uint64_t bus_addr;
5135         uint32_t srrctl, psrtype = 0;
5136         uint16_t buf_size;
5137         uint16_t i;
5138         int ret;
5139
5140         PMD_INIT_FUNC_TRACE();
5141         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5142
5143         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5144                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5145                         "it should be power of 2");
5146                 return -1;
5147         }
5148
5149         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5150                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5151                         "it should be equal to or less than %d",
5152                         hw->mac.max_rx_queues);
5153                 return -1;
5154         }
5155
5156         /*
5157          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5158          * disables the VF receipt of packets if the PF MTU is > 1500.
5159          * This is done to deal with 82599 limitations that imposes
5160          * the PF and all VFs to share the same MTU.
5161          * Then, the PF driver enables again the VF receipt of packet when
5162          * the VF driver issues a IXGBE_VF_SET_LPE request.
5163          * In the meantime, the VF device cannot be used, even if the VF driver
5164          * and the Guest VM network stack are ready to accept packets with a
5165          * size up to the PF MTU.
5166          * As a work-around to this PF behaviour, force the call to
5167          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5168          * VF packets received can work in all cases.
5169          */
5170         ixgbevf_rlpml_set_vf(hw,
5171                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5172
5173         /* Setup RX queues */
5174         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5175                 rxq = dev->data->rx_queues[i];
5176
5177                 /* Allocate buffers for descriptor rings */
5178                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5179                 if (ret)
5180                         return ret;
5181
5182                 /* Setup the Base and Length of the Rx Descriptor Rings */
5183                 bus_addr = rxq->rx_ring_phys_addr;
5184
5185                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5186                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5187                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5188                                 (uint32_t)(bus_addr >> 32));
5189                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5190                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5191                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5192                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5193
5194
5195                 /* Configure the SRRCTL register */
5196 #ifdef RTE_HEADER_SPLIT_ENABLE
5197                 /*
5198                  * Configure Header Split
5199                  */
5200                 if (dev->data->dev_conf.rxmode.header_split) {
5201                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5202                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5203                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5204                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5205                 } else
5206 #endif
5207                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5208
5209                 /* Set if packets are dropped when no descriptors available */
5210                 if (rxq->drop_en)
5211                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5212
5213                 /*
5214                  * Configure the RX buffer size in the BSIZEPACKET field of
5215                  * the SRRCTL register of the queue.
5216                  * The value is in 1 KB resolution. Valid values can be from
5217                  * 1 KB to 16 KB.
5218                  */
5219                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5220                         RTE_PKTMBUF_HEADROOM);
5221                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5222                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5223
5224                 /*
5225                  * VF modification to write virtual function SRRCTL register
5226                  */
5227                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5228
5229                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5230                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5231
5232                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5233                     /* It adds dual VLAN length for supporting dual VLAN */
5234                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5235                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5236                         if (!dev->data->scattered_rx)
5237                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5238                         dev->data->scattered_rx = 1;
5239                 }
5240         }
5241
5242 #ifdef RTE_HEADER_SPLIT_ENABLE
5243         if (dev->data->dev_conf.rxmode.header_split)
5244                 /* Must setup the PSRTYPE register */
5245                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5246                         IXGBE_PSRTYPE_UDPHDR   |
5247                         IXGBE_PSRTYPE_IPV4HDR  |
5248                         IXGBE_PSRTYPE_IPV6HDR;
5249 #endif
5250
5251         /* Set RQPL for VF RSS according to max Rx queue */
5252         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5253                 IXGBE_PSRTYPE_RQPL_SHIFT;
5254         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5255
5256         ixgbe_set_rx_function(dev);
5257
5258         return 0;
5259 }
5260
5261 /*
5262  * [VF] Initializes Transmit Unit.
5263  */
5264 void __attribute__((cold))
5265 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5266 {
5267         struct ixgbe_hw     *hw;
5268         struct ixgbe_tx_queue *txq;
5269         uint64_t bus_addr;
5270         uint32_t txctrl;
5271         uint16_t i;
5272
5273         PMD_INIT_FUNC_TRACE();
5274         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5275
5276         /* Setup the Base and Length of the Tx Descriptor Rings */
5277         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5278                 txq = dev->data->tx_queues[i];
5279                 bus_addr = txq->tx_ring_phys_addr;
5280                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5281                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5282                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5283                                 (uint32_t)(bus_addr >> 32));
5284                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5285                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5286                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5287                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5288                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5289
5290                 /*
5291                  * Disable Tx Head Writeback RO bit, since this hoses
5292                  * bookkeeping if things aren't delivered in order.
5293                  */
5294                 txctrl = IXGBE_READ_REG(hw,
5295                                 IXGBE_VFDCA_TXCTRL(i));
5296                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5297                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5298                                 txctrl);
5299         }
5300 }
5301
5302 /*
5303  * [VF] Start Transmit and Receive Units.
5304  */
5305 void __attribute__((cold))
5306 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5307 {
5308         struct ixgbe_hw     *hw;
5309         struct ixgbe_tx_queue *txq;
5310         struct ixgbe_rx_queue *rxq;
5311         uint32_t txdctl;
5312         uint32_t rxdctl;
5313         uint16_t i;
5314         int poll_ms;
5315
5316         PMD_INIT_FUNC_TRACE();
5317         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5318
5319         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5320                 txq = dev->data->tx_queues[i];
5321                 /* Setup Transmit Threshold Registers */
5322                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5323                 txdctl |= txq->pthresh & 0x7F;
5324                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5325                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5326                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5327         }
5328
5329         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5330
5331                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5332                 txdctl |= IXGBE_TXDCTL_ENABLE;
5333                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5334
5335                 poll_ms = 10;
5336                 /* Wait until TX Enable ready */
5337                 do {
5338                         rte_delay_ms(1);
5339                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5340                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5341                 if (!poll_ms)
5342                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5343         }
5344         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5345
5346                 rxq = dev->data->rx_queues[i];
5347
5348                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5349                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5350                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5351
5352                 /* Wait until RX Enable ready */
5353                 poll_ms = 10;
5354                 do {
5355                         rte_delay_ms(1);
5356                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5357                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5358                 if (!poll_ms)
5359                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5360                 rte_wmb();
5361                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5362
5363         }
5364 }
5365
5366 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5367 int __attribute__((weak))
5368 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5369 {
5370         return -1;
5371 }
5372
5373 uint16_t __attribute__((weak))
5374 ixgbe_recv_pkts_vec(
5375         void __rte_unused *rx_queue,
5376         struct rte_mbuf __rte_unused **rx_pkts,
5377         uint16_t __rte_unused nb_pkts)
5378 {
5379         return 0;
5380 }
5381
5382 uint16_t __attribute__((weak))
5383 ixgbe_recv_scattered_pkts_vec(
5384         void __rte_unused *rx_queue,
5385         struct rte_mbuf __rte_unused **rx_pkts,
5386         uint16_t __rte_unused nb_pkts)
5387 {
5388         return 0;
5389 }
5390
5391 int __attribute__((weak))
5392 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5393 {
5394         return -1;
5395 }