net/ixgbe: fix bitmask of supported Tx flags
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73 #include <rte_net.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 #ifdef RTE_LIBRTE_IEEE1588
84 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
85 #else
86 #define IXGBE_TX_IEEE1588_TMST 0
87 #endif
88 /* Bit Mask to indicate what bits required for building TX context */
89 #define IXGBE_TX_OFFLOAD_MASK (                  \
90                 PKT_TX_VLAN_PKT |                \
91                 PKT_TX_IP_CKSUM |                \
92                 PKT_TX_L4_MASK |                 \
93                 PKT_TX_TCP_SEG |                 \
94                 PKT_TX_MACSEC |                  \
95                 PKT_TX_OUTER_IP_CKSUM |          \
96                 IXGBE_TX_IEEE1588_TMST)
97
98 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
99                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
100
101 #if 1
102 #define RTE_PMD_USE_PREFETCH
103 #endif
104
105 #ifdef RTE_PMD_USE_PREFETCH
106 /*
107  * Prefetch a cache line into all cache levels.
108  */
109 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
110 #else
111 #define rte_ixgbe_prefetch(p)   do {} while (0)
112 #endif
113
114 /*********************************************************************
115  *
116  *  TX functions
117  *
118  **********************************************************************/
119
120 /*
121  * Check for descriptors with their DD bit set and free mbufs.
122  * Return the total number of buffers freed.
123  */
124 static inline int __attribute__((always_inline))
125 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
126 {
127         struct ixgbe_tx_entry *txep;
128         uint32_t status;
129         int i, nb_free = 0;
130         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
131
132         /* check DD bit on threshold descriptor */
133         status = txq->tx_ring[txq->tx_next_dd].wb.status;
134         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
135                 return 0;
136
137         /*
138          * first buffer to free from S/W ring is at index
139          * tx_next_dd - (tx_rs_thresh-1)
140          */
141         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
142
143         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
144                 /* free buffers one at a time */
145                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
146                 txep->mbuf = NULL;
147
148                 if (unlikely(m == NULL))
149                         continue;
150
151                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
152                     (nb_free > 0 && m->pool != free[0]->pool)) {
153                         rte_mempool_put_bulk(free[0]->pool,
154                                              (void **)free, nb_free);
155                         nb_free = 0;
156                 }
157
158                 free[nb_free++] = m;
159         }
160
161         if (nb_free > 0)
162                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
163
164         /* buffers were freed, update counters */
165         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
166         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
167         if (txq->tx_next_dd >= txq->nb_tx_desc)
168                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
169
170         return txq->tx_rs_thresh;
171 }
172
173 /* Populate 4 descriptors with data from 4 mbufs */
174 static inline void
175 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
176 {
177         uint64_t buf_dma_addr;
178         uint32_t pkt_len;
179         int i;
180
181         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
182                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
183                 pkt_len = (*pkts)->data_len;
184
185                 /* write data to descriptor */
186                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
187
188                 txdp->read.cmd_type_len =
189                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
190
191                 txdp->read.olinfo_status =
192                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
193
194                 rte_prefetch0(&(*pkts)->pool);
195         }
196 }
197
198 /* Populate 1 descriptor with data from 1 mbuf */
199 static inline void
200 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
201 {
202         uint64_t buf_dma_addr;
203         uint32_t pkt_len;
204
205         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
206         pkt_len = (*pkts)->data_len;
207
208         /* write data to descriptor */
209         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
210         txdp->read.cmd_type_len =
211                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
212         txdp->read.olinfo_status =
213                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
214         rte_prefetch0(&(*pkts)->pool);
215 }
216
217 /*
218  * Fill H/W descriptor ring with mbuf data.
219  * Copy mbuf pointers to the S/W ring.
220  */
221 static inline void
222 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
223                       uint16_t nb_pkts)
224 {
225         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
226         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
227         const int N_PER_LOOP = 4;
228         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
229         int mainpart, leftover;
230         int i, j;
231
232         /*
233          * Process most of the packets in chunks of N pkts.  Any
234          * leftover packets will get processed one at a time.
235          */
236         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
237         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
238         for (i = 0; i < mainpart; i += N_PER_LOOP) {
239                 /* Copy N mbuf pointers to the S/W ring */
240                 for (j = 0; j < N_PER_LOOP; ++j) {
241                         (txep + i + j)->mbuf = *(pkts + i + j);
242                 }
243                 tx4(txdp + i, pkts + i);
244         }
245
246         if (unlikely(leftover > 0)) {
247                 for (i = 0; i < leftover; ++i) {
248                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
249                         tx1(txdp + mainpart + i, pkts + mainpart + i);
250                 }
251         }
252 }
253
254 static inline uint16_t
255 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
256              uint16_t nb_pkts)
257 {
258         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
259         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
260         uint16_t n = 0;
261
262         /*
263          * Begin scanning the H/W ring for done descriptors when the
264          * number of available descriptors drops below tx_free_thresh.  For
265          * each done descriptor, free the associated buffer.
266          */
267         if (txq->nb_tx_free < txq->tx_free_thresh)
268                 ixgbe_tx_free_bufs(txq);
269
270         /* Only use descriptors that are available */
271         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
272         if (unlikely(nb_pkts == 0))
273                 return 0;
274
275         /* Use exactly nb_pkts descriptors */
276         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
277
278         /*
279          * At this point, we know there are enough descriptors in the
280          * ring to transmit all the packets.  This assumes that each
281          * mbuf contains a single segment, and that no new offloads
282          * are expected, which would require a new context descriptor.
283          */
284
285         /*
286          * See if we're going to wrap-around. If so, handle the top
287          * of the descriptor ring first, then do the bottom.  If not,
288          * the processing looks just like the "bottom" part anyway...
289          */
290         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
291                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
292                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
293
294                 /*
295                  * We know that the last descriptor in the ring will need to
296                  * have its RS bit set because tx_rs_thresh has to be
297                  * a divisor of the ring size
298                  */
299                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
300                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
301                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
302
303                 txq->tx_tail = 0;
304         }
305
306         /* Fill H/W descriptor ring with mbuf data */
307         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
308         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
309
310         /*
311          * Determine if RS bit should be set
312          * This is what we actually want:
313          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
314          * but instead of subtracting 1 and doing >=, we can just do
315          * greater than without subtracting.
316          */
317         if (txq->tx_tail > txq->tx_next_rs) {
318                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
319                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
320                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
321                                                 txq->tx_rs_thresh);
322                 if (txq->tx_next_rs >= txq->nb_tx_desc)
323                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
324         }
325
326         /*
327          * Check for wrap-around. This would only happen if we used
328          * up to the last descriptor in the ring, no more, no less.
329          */
330         if (txq->tx_tail >= txq->nb_tx_desc)
331                 txq->tx_tail = 0;
332
333         /* update tail pointer */
334         rte_wmb();
335         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
336
337         return nb_pkts;
338 }
339
340 uint16_t
341 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
342                        uint16_t nb_pkts)
343 {
344         uint16_t nb_tx;
345
346         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
347         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
348                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
349
350         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
351         nb_tx = 0;
352         while (nb_pkts) {
353                 uint16_t ret, n;
354
355                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
356                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
357                 nb_tx = (uint16_t)(nb_tx + ret);
358                 nb_pkts = (uint16_t)(nb_pkts - ret);
359                 if (ret < n)
360                         break;
361         }
362
363         return nb_tx;
364 }
365
366 static inline void
367 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
368                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
369                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
370 {
371         uint32_t type_tucmd_mlhl;
372         uint32_t mss_l4len_idx = 0;
373         uint32_t ctx_idx;
374         uint32_t vlan_macip_lens;
375         union ixgbe_tx_offload tx_offload_mask;
376         uint32_t seqnum_seed = 0;
377
378         ctx_idx = txq->ctx_curr;
379         tx_offload_mask.data[0] = 0;
380         tx_offload_mask.data[1] = 0;
381         type_tucmd_mlhl = 0;
382
383         /* Specify which HW CTX to upload. */
384         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
385
386         if (ol_flags & PKT_TX_VLAN_PKT) {
387                 tx_offload_mask.vlan_tci |= ~0;
388         }
389
390         /* check if TCP segmentation required for this packet */
391         if (ol_flags & PKT_TX_TCP_SEG) {
392                 /* implies IP cksum in IPv4 */
393                 if (ol_flags & PKT_TX_IP_CKSUM)
394                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
395                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
396                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
397                 else
398                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
399                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
400                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
401
402                 tx_offload_mask.l2_len |= ~0;
403                 tx_offload_mask.l3_len |= ~0;
404                 tx_offload_mask.l4_len |= ~0;
405                 tx_offload_mask.tso_segsz |= ~0;
406                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
407                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
408         } else { /* no TSO, check if hardware checksum is needed */
409                 if (ol_flags & PKT_TX_IP_CKSUM) {
410                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
411                         tx_offload_mask.l2_len |= ~0;
412                         tx_offload_mask.l3_len |= ~0;
413                 }
414
415                 switch (ol_flags & PKT_TX_L4_MASK) {
416                 case PKT_TX_UDP_CKSUM:
417                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
418                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
419                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
420                         tx_offload_mask.l2_len |= ~0;
421                         tx_offload_mask.l3_len |= ~0;
422                         break;
423                 case PKT_TX_TCP_CKSUM:
424                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
425                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
427                         tx_offload_mask.l2_len |= ~0;
428                         tx_offload_mask.l3_len |= ~0;
429                         break;
430                 case PKT_TX_SCTP_CKSUM:
431                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
432                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
433                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
434                         tx_offload_mask.l2_len |= ~0;
435                         tx_offload_mask.l3_len |= ~0;
436                         break;
437                 default:
438                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
439                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
440                         break;
441                 }
442         }
443
444         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
445                 tx_offload_mask.outer_l2_len |= ~0;
446                 tx_offload_mask.outer_l3_len |= ~0;
447                 tx_offload_mask.l2_len |= ~0;
448                 seqnum_seed |= tx_offload.outer_l3_len
449                                << IXGBE_ADVTXD_OUTER_IPLEN;
450                 seqnum_seed |= tx_offload.l2_len
451                                << IXGBE_ADVTXD_TUNNEL_LEN;
452         }
453
454         txq->ctx_cache[ctx_idx].flags = ol_flags;
455         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
456                 tx_offload_mask.data[0] & tx_offload.data[0];
457         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
458                 tx_offload_mask.data[1] & tx_offload.data[1];
459         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
460
461         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
462         vlan_macip_lens = tx_offload.l3_len;
463         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
464                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
465                                     IXGBE_ADVTXD_MACLEN_SHIFT);
466         else
467                 vlan_macip_lens |= (tx_offload.l2_len <<
468                                     IXGBE_ADVTXD_MACLEN_SHIFT);
469         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
470         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
471         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
472         ctx_txd->seqnum_seed     = seqnum_seed;
473 }
474
475 /*
476  * Check which hardware context can be used. Use the existing match
477  * or create a new context descriptor.
478  */
479 static inline uint32_t
480 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
481                    union ixgbe_tx_offload tx_offload)
482 {
483         /* If match with the current used context */
484         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
485                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
486                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
487                      & tx_offload.data[0])) &&
488                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
489                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
490                      & tx_offload.data[1]))))
491                 return txq->ctx_curr;
492
493         /* What if match with the next context  */
494         txq->ctx_curr ^= 1;
495         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
496                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
497                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
498                      & tx_offload.data[0])) &&
499                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
500                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
501                      & tx_offload.data[1]))))
502                 return txq->ctx_curr;
503
504         /* Mismatch, use the previous context */
505         return IXGBE_CTX_NUM;
506 }
507
508 static inline uint32_t
509 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
510 {
511         uint32_t tmp = 0;
512
513         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
514                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
515         if (ol_flags & PKT_TX_IP_CKSUM)
516                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
517         if (ol_flags & PKT_TX_TCP_SEG)
518                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
519         return tmp;
520 }
521
522 static inline uint32_t
523 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
524 {
525         uint32_t cmdtype = 0;
526
527         if (ol_flags & PKT_TX_VLAN_PKT)
528                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
529         if (ol_flags & PKT_TX_TCP_SEG)
530                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
531         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
532                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
533         if (ol_flags & PKT_TX_MACSEC)
534                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
535         return cmdtype;
536 }
537
538 /* Default RS bit threshold values */
539 #ifndef DEFAULT_TX_RS_THRESH
540 #define DEFAULT_TX_RS_THRESH   32
541 #endif
542 #ifndef DEFAULT_TX_FREE_THRESH
543 #define DEFAULT_TX_FREE_THRESH 32
544 #endif
545
546 /* Reset transmit descriptors after they have been used */
547 static inline int
548 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
549 {
550         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
551         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
552         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
553         uint16_t nb_tx_desc = txq->nb_tx_desc;
554         uint16_t desc_to_clean_to;
555         uint16_t nb_tx_to_clean;
556         uint32_t status;
557
558         /* Determine the last descriptor needing to be cleaned */
559         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
560         if (desc_to_clean_to >= nb_tx_desc)
561                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
562
563         /* Check to make sure the last descriptor to clean is done */
564         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
565         status = txr[desc_to_clean_to].wb.status;
566         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
567                 PMD_TX_FREE_LOG(DEBUG,
568                                 "TX descriptor %4u is not done"
569                                 "(port=%d queue=%d)",
570                                 desc_to_clean_to,
571                                 txq->port_id, txq->queue_id);
572                 /* Failed to clean any descriptors, better luck next time */
573                 return -(1);
574         }
575
576         /* Figure out how many descriptors will be cleaned */
577         if (last_desc_cleaned > desc_to_clean_to)
578                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
579                                                         desc_to_clean_to);
580         else
581                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
582                                                 last_desc_cleaned);
583
584         PMD_TX_FREE_LOG(DEBUG,
585                         "Cleaning %4u TX descriptors: %4u to %4u "
586                         "(port=%d queue=%d)",
587                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
588                         txq->port_id, txq->queue_id);
589
590         /*
591          * The last descriptor to clean is done, so that means all the
592          * descriptors from the last descriptor that was cleaned
593          * up to the last descriptor with the RS bit set
594          * are done. Only reset the threshold descriptor.
595          */
596         txr[desc_to_clean_to].wb.status = 0;
597
598         /* Update the txq to reflect the last descriptor that was cleaned */
599         txq->last_desc_cleaned = desc_to_clean_to;
600         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
601
602         /* No Error */
603         return 0;
604 }
605
606 uint16_t
607 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
608                 uint16_t nb_pkts)
609 {
610         struct ixgbe_tx_queue *txq;
611         struct ixgbe_tx_entry *sw_ring;
612         struct ixgbe_tx_entry *txe, *txn;
613         volatile union ixgbe_adv_tx_desc *txr;
614         volatile union ixgbe_adv_tx_desc *txd, *txp;
615         struct rte_mbuf     *tx_pkt;
616         struct rte_mbuf     *m_seg;
617         uint64_t buf_dma_addr;
618         uint32_t olinfo_status;
619         uint32_t cmd_type_len;
620         uint32_t pkt_len;
621         uint16_t slen;
622         uint64_t ol_flags;
623         uint16_t tx_id;
624         uint16_t tx_last;
625         uint16_t nb_tx;
626         uint16_t nb_used;
627         uint64_t tx_ol_req;
628         uint32_t ctx = 0;
629         uint32_t new_ctx;
630         union ixgbe_tx_offload tx_offload;
631
632         tx_offload.data[0] = 0;
633         tx_offload.data[1] = 0;
634         txq = tx_queue;
635         sw_ring = txq->sw_ring;
636         txr     = txq->tx_ring;
637         tx_id   = txq->tx_tail;
638         txe = &sw_ring[tx_id];
639         txp = NULL;
640
641         /* Determine if the descriptor ring needs to be cleaned. */
642         if (txq->nb_tx_free < txq->tx_free_thresh)
643                 ixgbe_xmit_cleanup(txq);
644
645         rte_prefetch0(&txe->mbuf->pool);
646
647         /* TX loop */
648         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
649                 new_ctx = 0;
650                 tx_pkt = *tx_pkts++;
651                 pkt_len = tx_pkt->pkt_len;
652
653                 /*
654                  * Determine how many (if any) context descriptors
655                  * are needed for offload functionality.
656                  */
657                 ol_flags = tx_pkt->ol_flags;
658
659                 /* If hardware offload required */
660                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
661                 if (tx_ol_req) {
662                         tx_offload.l2_len = tx_pkt->l2_len;
663                         tx_offload.l3_len = tx_pkt->l3_len;
664                         tx_offload.l4_len = tx_pkt->l4_len;
665                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
666                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
667                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
668                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
669
670                         /* If new context need be built or reuse the exist ctx. */
671                         ctx = what_advctx_update(txq, tx_ol_req,
672                                 tx_offload);
673                         /* Only allocate context descriptor if required*/
674                         new_ctx = (ctx == IXGBE_CTX_NUM);
675                         ctx = txq->ctx_curr;
676                 }
677
678                 /*
679                  * Keep track of how many descriptors are used this loop
680                  * This will always be the number of segments + the number of
681                  * Context descriptors required to transmit the packet
682                  */
683                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
684
685                 if (txp != NULL &&
686                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
687                         /* set RS on the previous packet in the burst */
688                         txp->read.cmd_type_len |=
689                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
690
691                 /*
692                  * The number of descriptors that must be allocated for a
693                  * packet is the number of segments of that packet, plus 1
694                  * Context Descriptor for the hardware offload, if any.
695                  * Determine the last TX descriptor to allocate in the TX ring
696                  * for the packet, starting from the current position (tx_id)
697                  * in the ring.
698                  */
699                 tx_last = (uint16_t) (tx_id + nb_used - 1);
700
701                 /* Circular ring */
702                 if (tx_last >= txq->nb_tx_desc)
703                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
704
705                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
706                            " tx_first=%u tx_last=%u",
707                            (unsigned) txq->port_id,
708                            (unsigned) txq->queue_id,
709                            (unsigned) pkt_len,
710                            (unsigned) tx_id,
711                            (unsigned) tx_last);
712
713                 /*
714                  * Make sure there are enough TX descriptors available to
715                  * transmit the entire packet.
716                  * nb_used better be less than or equal to txq->tx_rs_thresh
717                  */
718                 if (nb_used > txq->nb_tx_free) {
719                         PMD_TX_FREE_LOG(DEBUG,
720                                         "Not enough free TX descriptors "
721                                         "nb_used=%4u nb_free=%4u "
722                                         "(port=%d queue=%d)",
723                                         nb_used, txq->nb_tx_free,
724                                         txq->port_id, txq->queue_id);
725
726                         if (ixgbe_xmit_cleanup(txq) != 0) {
727                                 /* Could not clean any descriptors */
728                                 if (nb_tx == 0)
729                                         return 0;
730                                 goto end_of_tx;
731                         }
732
733                         /* nb_used better be <= txq->tx_rs_thresh */
734                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
735                                 PMD_TX_FREE_LOG(DEBUG,
736                                         "The number of descriptors needed to "
737                                         "transmit the packet exceeds the "
738                                         "RS bit threshold. This will impact "
739                                         "performance."
740                                         "nb_used=%4u nb_free=%4u "
741                                         "tx_rs_thresh=%4u. "
742                                         "(port=%d queue=%d)",
743                                         nb_used, txq->nb_tx_free,
744                                         txq->tx_rs_thresh,
745                                         txq->port_id, txq->queue_id);
746                                 /*
747                                  * Loop here until there are enough TX
748                                  * descriptors or until the ring cannot be
749                                  * cleaned.
750                                  */
751                                 while (nb_used > txq->nb_tx_free) {
752                                         if (ixgbe_xmit_cleanup(txq) != 0) {
753                                                 /*
754                                                  * Could not clean any
755                                                  * descriptors
756                                                  */
757                                                 if (nb_tx == 0)
758                                                         return 0;
759                                                 goto end_of_tx;
760                                         }
761                                 }
762                         }
763                 }
764
765                 /*
766                  * By now there are enough free TX descriptors to transmit
767                  * the packet.
768                  */
769
770                 /*
771                  * Set common flags of all TX Data Descriptors.
772                  *
773                  * The following bits must be set in all Data Descriptors:
774                  *   - IXGBE_ADVTXD_DTYP_DATA
775                  *   - IXGBE_ADVTXD_DCMD_DEXT
776                  *
777                  * The following bits must be set in the first Data Descriptor
778                  * and are ignored in the other ones:
779                  *   - IXGBE_ADVTXD_DCMD_IFCS
780                  *   - IXGBE_ADVTXD_MAC_1588
781                  *   - IXGBE_ADVTXD_DCMD_VLE
782                  *
783                  * The following bits must only be set in the last Data
784                  * Descriptor:
785                  *   - IXGBE_TXD_CMD_EOP
786                  *
787                  * The following bits can be set in any Data Descriptor, but
788                  * are only set in the last Data Descriptor:
789                  *   - IXGBE_TXD_CMD_RS
790                  */
791                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
792                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
793
794 #ifdef RTE_LIBRTE_IEEE1588
795                 if (ol_flags & PKT_TX_IEEE1588_TMST)
796                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
797 #endif
798
799                 olinfo_status = 0;
800                 if (tx_ol_req) {
801
802                         if (ol_flags & PKT_TX_TCP_SEG) {
803                                 /* when TSO is on, paylen in descriptor is the
804                                  * not the packet len but the tcp payload len */
805                                 pkt_len -= (tx_offload.l2_len +
806                                         tx_offload.l3_len + tx_offload.l4_len);
807                         }
808
809                         /*
810                          * Setup the TX Advanced Context Descriptor if required
811                          */
812                         if (new_ctx) {
813                                 volatile struct ixgbe_adv_tx_context_desc *
814                                     ctx_txd;
815
816                                 ctx_txd = (volatile struct
817                                     ixgbe_adv_tx_context_desc *)
818                                     &txr[tx_id];
819
820                                 txn = &sw_ring[txe->next_id];
821                                 rte_prefetch0(&txn->mbuf->pool);
822
823                                 if (txe->mbuf != NULL) {
824                                         rte_pktmbuf_free_seg(txe->mbuf);
825                                         txe->mbuf = NULL;
826                                 }
827
828                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
829                                         tx_offload);
830
831                                 txe->last_id = tx_last;
832                                 tx_id = txe->next_id;
833                                 txe = txn;
834                         }
835
836                         /*
837                          * Setup the TX Advanced Data Descriptor,
838                          * This path will go through
839                          * whatever new/reuse the context descriptor
840                          */
841                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
842                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
843                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
844                 }
845
846                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
847
848                 m_seg = tx_pkt;
849                 do {
850                         txd = &txr[tx_id];
851                         txn = &sw_ring[txe->next_id];
852                         rte_prefetch0(&txn->mbuf->pool);
853
854                         if (txe->mbuf != NULL)
855                                 rte_pktmbuf_free_seg(txe->mbuf);
856                         txe->mbuf = m_seg;
857
858                         /*
859                          * Set up Transmit Data Descriptor.
860                          */
861                         slen = m_seg->data_len;
862                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
863                         txd->read.buffer_addr =
864                                 rte_cpu_to_le_64(buf_dma_addr);
865                         txd->read.cmd_type_len =
866                                 rte_cpu_to_le_32(cmd_type_len | slen);
867                         txd->read.olinfo_status =
868                                 rte_cpu_to_le_32(olinfo_status);
869                         txe->last_id = tx_last;
870                         tx_id = txe->next_id;
871                         txe = txn;
872                         m_seg = m_seg->next;
873                 } while (m_seg != NULL);
874
875                 /*
876                  * The last packet data descriptor needs End Of Packet (EOP)
877                  */
878                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
879                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
880                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
881
882                 /* Set RS bit only on threshold packets' last descriptor */
883                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
884                         PMD_TX_FREE_LOG(DEBUG,
885                                         "Setting RS bit on TXD id="
886                                         "%4u (port=%d queue=%d)",
887                                         tx_last, txq->port_id, txq->queue_id);
888
889                         cmd_type_len |= IXGBE_TXD_CMD_RS;
890
891                         /* Update txq RS bit counters */
892                         txq->nb_tx_used = 0;
893                         txp = NULL;
894                 } else
895                         txp = txd;
896
897                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
898         }
899
900 end_of_tx:
901         /* set RS on last packet in the burst */
902         if (txp != NULL)
903                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
904
905         rte_wmb();
906
907         /*
908          * Set the Transmit Descriptor Tail (TDT)
909          */
910         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
911                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
912                    (unsigned) tx_id, (unsigned) nb_tx);
913         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
914         txq->tx_tail = tx_id;
915
916         return nb_tx;
917 }
918
919 /*********************************************************************
920  *
921  *  TX prep functions
922  *
923  **********************************************************************/
924 uint16_t
925 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
926 {
927         int i, ret;
928         uint64_t ol_flags;
929         struct rte_mbuf *m;
930         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
931
932         for (i = 0; i < nb_pkts; i++) {
933                 m = tx_pkts[i];
934                 ol_flags = m->ol_flags;
935
936                 /**
937                  * Check if packet meets requirements for number of segments
938                  *
939                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
940                  *       non-TSO
941                  */
942
943                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
944                         rte_errno = -EINVAL;
945                         return i;
946                 }
947
948                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
949                         rte_errno = -ENOTSUP;
950                         return i;
951                 }
952
953 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
954                 ret = rte_validate_tx_offload(m);
955                 if (ret != 0) {
956                         rte_errno = ret;
957                         return i;
958                 }
959 #endif
960                 ret = rte_net_intel_cksum_prepare(m);
961                 if (ret != 0) {
962                         rte_errno = ret;
963                         return i;
964                 }
965         }
966
967         return i;
968 }
969
970 /*********************************************************************
971  *
972  *  RX functions
973  *
974  **********************************************************************/
975
976 #define IXGBE_PACKET_TYPE_ETHER                         0X00
977 #define IXGBE_PACKET_TYPE_IPV4                          0X01
978 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
979 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
980 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
981 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
982 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
983 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
984 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
985 #define IXGBE_PACKET_TYPE_IPV6                          0X04
986 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
987 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
988 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
989 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
990 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
991 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
992 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
993 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
994 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
995 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
996 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
997 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
998 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
999 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1000 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1001 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1002 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1003 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1004 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1005 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1006 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1007 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1008 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1009
1010 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1011 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1012 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1013 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1014 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1015 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1016 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1017 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1018 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1019 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1020 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1021 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1022 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1023 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1024 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1025 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1026 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1027 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1028 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1029 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1030 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1031 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1032 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1033
1034 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1035 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1036 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1037 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1038 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1039 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1040 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1041 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1042 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1043 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1044 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1045 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1046 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1047 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1048 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1049 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1050 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1051 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1052 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1053 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1054 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1055 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1056 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1057
1058 #define IXGBE_PACKET_TYPE_MAX               0X80
1059 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
1060 #define IXGBE_PACKET_TYPE_SHIFT             0X04
1061
1062 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1063 static inline uint32_t
1064 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1065 {
1066         /**
1067          * Use 2 different table for normal packet and tunnel packet
1068          * to save the space.
1069          */
1070         static const uint32_t
1071                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1072                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1073                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1074                         RTE_PTYPE_L3_IPV4,
1075                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1076                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1077                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1078                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1079                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1080                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1081                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1082                         RTE_PTYPE_L3_IPV4_EXT,
1083                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1084                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1085                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1086                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1087                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1088                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1089                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1090                         RTE_PTYPE_L3_IPV6,
1091                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1092                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1093                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1094                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1095                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1096                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1097                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1098                         RTE_PTYPE_L3_IPV6_EXT,
1099                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1100                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1101                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1102                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1103                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1104                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1105                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1106                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1107                         RTE_PTYPE_INNER_L3_IPV6,
1108                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1109                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1110                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1111                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1112                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1113                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1114                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1115                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1116                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1117                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1118                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1119                         RTE_PTYPE_INNER_L3_IPV6,
1120                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1121                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1122                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1123                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1124                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1125                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1126                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1128                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1129                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1130                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1131                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1132                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1133                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1134                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1135                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1136                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1137                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1138                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1139                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1140                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1141                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1142                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1143                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1144                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1145                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1146                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1147                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1148                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1149                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1150                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1151                         RTE_PTYPE_L2_ETHER |
1152                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1153                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1154         };
1155
1156         static const uint32_t
1157                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1158                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1159                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1160                         RTE_PTYPE_INNER_L2_ETHER,
1161                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1162                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1163                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1164                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1165                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1166                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1167                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1168                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1169                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1170                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1171                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1172                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1173                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1174                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1175                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1176                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1177                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1178                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1179                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1180                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1181                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1182                         RTE_PTYPE_INNER_L4_TCP,
1183                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1184                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1185                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1186                         RTE_PTYPE_INNER_L4_TCP,
1187                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1188                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1189                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1190                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1191                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1192                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1193                         RTE_PTYPE_INNER_L4_TCP,
1194                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1195                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1196                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1197                         RTE_PTYPE_INNER_L3_IPV4,
1198                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1199                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1201                         RTE_PTYPE_INNER_L4_UDP,
1202                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1203                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1204                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1205                         RTE_PTYPE_INNER_L4_UDP,
1206                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1207                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1208                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1209                         RTE_PTYPE_INNER_L4_SCTP,
1210                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1211                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1212                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1213                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1214                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1216                         RTE_PTYPE_INNER_L4_UDP,
1217                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1218                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1219                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1220                         RTE_PTYPE_INNER_L4_SCTP,
1221                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1222                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1223                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1224                         RTE_PTYPE_INNER_L3_IPV4,
1225                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1226                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1227                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1228                         RTE_PTYPE_INNER_L4_SCTP,
1229                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1230                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1231                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1232                         RTE_PTYPE_INNER_L4_SCTP,
1233                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1234                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1235                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1236                         RTE_PTYPE_INNER_L4_TCP,
1237                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1238                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1239                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1240                         RTE_PTYPE_INNER_L4_UDP,
1241
1242                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1243                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1244                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1245                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1246                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1247                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1248                         RTE_PTYPE_INNER_L3_IPV4,
1249                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1250                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1251                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1252                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1253                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1254                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1255                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1256                         RTE_PTYPE_INNER_L3_IPV6,
1257                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1258                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1259                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1260                         RTE_PTYPE_INNER_L3_IPV4,
1261                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1262                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1263                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1264                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1265                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1266                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1267                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1268                         RTE_PTYPE_INNER_L3_IPV4,
1269                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1270                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1271                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1272                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1273                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1274                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1275                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1276                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1277                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1278                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1279                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1280                         RTE_PTYPE_INNER_L3_IPV4,
1281                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1282                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1283                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1284                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1285                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1286                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1287                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1288                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1289                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1290                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1291                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1292                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1293                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1294                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1295                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1296                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1297                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1298                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1299                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1300                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1301                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1302                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1303                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1304                         RTE_PTYPE_INNER_L3_IPV4,
1305                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1306                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1307                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1308                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1309                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1310                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1311                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1312                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1313                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1314                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1315                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1316                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1317                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1318                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1319                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1320                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1321                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1322                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1323                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1324                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1325                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1326                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1327                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1328                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1329                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1330                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1331                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1332                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1333         };
1334
1335         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1336                 return RTE_PTYPE_UNKNOWN;
1337
1338         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1339
1340         /* For tunnel packet */
1341         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1342                 /* Remove the tunnel bit to save the space. */
1343                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1344                 return ptype_table_tn[pkt_info];
1345         }
1346
1347         /**
1348          * For x550, if it's not tunnel,
1349          * tunnel type bit should be set to 0.
1350          * Reuse 82599's mask.
1351          */
1352         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1353
1354         return ptype_table[pkt_info];
1355 }
1356
1357 static inline uint64_t
1358 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1359 {
1360         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1361                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1362                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1363                 PKT_RX_RSS_HASH, 0, 0, 0,
1364                 0, 0, 0,  PKT_RX_FDIR,
1365         };
1366 #ifdef RTE_LIBRTE_IEEE1588
1367         static uint64_t ip_pkt_etqf_map[8] = {
1368                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1369                 0, 0, 0, 0,
1370         };
1371
1372         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1373                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1374                                 ip_rss_types_map[pkt_info & 0XF];
1375         else
1376                 return ip_rss_types_map[pkt_info & 0XF];
1377 #else
1378         return ip_rss_types_map[pkt_info & 0XF];
1379 #endif
1380 }
1381
1382 static inline uint64_t
1383 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1384 {
1385         uint64_t pkt_flags;
1386
1387         /*
1388          * Check if VLAN present only.
1389          * Do not check whether L3/L4 rx checksum done by NIC or not,
1390          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1391          */
1392         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1393
1394 #ifdef RTE_LIBRTE_IEEE1588
1395         if (rx_status & IXGBE_RXD_STAT_TMST)
1396                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1397 #endif
1398         return pkt_flags;
1399 }
1400
1401 static inline uint64_t
1402 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1403 {
1404         uint64_t pkt_flags;
1405
1406         /*
1407          * Bit 31: IPE, IPv4 checksum error
1408          * Bit 30: L4I, L4I integrity error
1409          */
1410         static uint64_t error_to_pkt_flags_map[4] = {
1411                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1412                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1413                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1414                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1415         };
1416         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1417                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1418
1419         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1420             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1421                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1422         }
1423
1424         return pkt_flags;
1425 }
1426
1427 /*
1428  * LOOK_AHEAD defines how many desc statuses to check beyond the
1429  * current descriptor.
1430  * It must be a pound define for optimal performance.
1431  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1432  * function only works with LOOK_AHEAD=8.
1433  */
1434 #define LOOK_AHEAD 8
1435 #if (LOOK_AHEAD != 8)
1436 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1437 #endif
1438 static inline int
1439 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1440 {
1441         volatile union ixgbe_adv_rx_desc *rxdp;
1442         struct ixgbe_rx_entry *rxep;
1443         struct rte_mbuf *mb;
1444         uint16_t pkt_len;
1445         uint64_t pkt_flags;
1446         int nb_dd;
1447         uint32_t s[LOOK_AHEAD];
1448         uint32_t pkt_info[LOOK_AHEAD];
1449         int i, j, nb_rx = 0;
1450         uint32_t status;
1451         uint64_t vlan_flags = rxq->vlan_flags;
1452
1453         /* get references to current descriptor and S/W ring entry */
1454         rxdp = &rxq->rx_ring[rxq->rx_tail];
1455         rxep = &rxq->sw_ring[rxq->rx_tail];
1456
1457         status = rxdp->wb.upper.status_error;
1458         /* check to make sure there is at least 1 packet to receive */
1459         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1460                 return 0;
1461
1462         /*
1463          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1464          * reference packets that are ready to be received.
1465          */
1466         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1467              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1468                 /* Read desc statuses backwards to avoid race condition */
1469                 for (j = LOOK_AHEAD-1; j >= 0; --j)
1470                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1471
1472                 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1473                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1474                                                        lo_dword.data);
1475
1476                 /* Compute how many status bits were set */
1477                 nb_dd = 0;
1478                 for (j = 0; j < LOOK_AHEAD; ++j)
1479                         nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1480
1481                 nb_rx += nb_dd;
1482
1483                 /* Translate descriptor info to mbuf format */
1484                 for (j = 0; j < nb_dd; ++j) {
1485                         mb = rxep[j].mbuf;
1486                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1487                                   rxq->crc_len;
1488                         mb->data_len = pkt_len;
1489                         mb->pkt_len = pkt_len;
1490                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1491
1492                         /* convert descriptor fields to rte mbuf flags */
1493                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1494                                 vlan_flags);
1495                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1496                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1497                                         ((uint16_t)pkt_info[j]);
1498                         mb->ol_flags = pkt_flags;
1499                         mb->packet_type =
1500                                 ixgbe_rxd_pkt_info_to_pkt_type
1501                                         (pkt_info[j], rxq->pkt_type_mask);
1502
1503                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1504                                 mb->hash.rss = rte_le_to_cpu_32(
1505                                     rxdp[j].wb.lower.hi_dword.rss);
1506                         else if (pkt_flags & PKT_RX_FDIR) {
1507                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1508                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1509                                     IXGBE_ATR_HASH_MASK;
1510                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1511                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1512                         }
1513                 }
1514
1515                 /* Move mbuf pointers from the S/W ring to the stage */
1516                 for (j = 0; j < LOOK_AHEAD; ++j) {
1517                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1518                 }
1519
1520                 /* stop if all requested packets could not be received */
1521                 if (nb_dd != LOOK_AHEAD)
1522                         break;
1523         }
1524
1525         /* clear software ring entries so we can cleanup correctly */
1526         for (i = 0; i < nb_rx; ++i) {
1527                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1528         }
1529
1530
1531         return nb_rx;
1532 }
1533
1534 static inline int
1535 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1536 {
1537         volatile union ixgbe_adv_rx_desc *rxdp;
1538         struct ixgbe_rx_entry *rxep;
1539         struct rte_mbuf *mb;
1540         uint16_t alloc_idx;
1541         __le64 dma_addr;
1542         int diag, i;
1543
1544         /* allocate buffers in bulk directly into the S/W ring */
1545         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1546         rxep = &rxq->sw_ring[alloc_idx];
1547         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1548                                     rxq->rx_free_thresh);
1549         if (unlikely(diag != 0))
1550                 return -ENOMEM;
1551
1552         rxdp = &rxq->rx_ring[alloc_idx];
1553         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1554                 /* populate the static rte mbuf fields */
1555                 mb = rxep[i].mbuf;
1556                 if (reset_mbuf) {
1557                         mb->next = NULL;
1558                         mb->nb_segs = 1;
1559                         mb->port = rxq->port_id;
1560                 }
1561
1562                 rte_mbuf_refcnt_set(mb, 1);
1563                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1564
1565                 /* populate the descriptors */
1566                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1567                 rxdp[i].read.hdr_addr = 0;
1568                 rxdp[i].read.pkt_addr = dma_addr;
1569         }
1570
1571         /* update state of internal queue structure */
1572         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1573         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1574                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1575
1576         /* no errors */
1577         return 0;
1578 }
1579
1580 static inline uint16_t
1581 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1582                          uint16_t nb_pkts)
1583 {
1584         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1585         int i;
1586
1587         /* how many packets are ready to return? */
1588         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1589
1590         /* copy mbuf pointers to the application's packet list */
1591         for (i = 0; i < nb_pkts; ++i)
1592                 rx_pkts[i] = stage[i];
1593
1594         /* update internal queue state */
1595         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1596         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1597
1598         return nb_pkts;
1599 }
1600
1601 static inline uint16_t
1602 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1603              uint16_t nb_pkts)
1604 {
1605         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1606         uint16_t nb_rx = 0;
1607
1608         /* Any previously recv'd pkts will be returned from the Rx stage */
1609         if (rxq->rx_nb_avail)
1610                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1611
1612         /* Scan the H/W ring for packets to receive */
1613         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1614
1615         /* update internal queue state */
1616         rxq->rx_next_avail = 0;
1617         rxq->rx_nb_avail = nb_rx;
1618         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1619
1620         /* if required, allocate new buffers to replenish descriptors */
1621         if (rxq->rx_tail > rxq->rx_free_trigger) {
1622                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1623
1624                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1625                         int i, j;
1626
1627                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1628                                    "queue_id=%u", (unsigned) rxq->port_id,
1629                                    (unsigned) rxq->queue_id);
1630
1631                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1632                                 rxq->rx_free_thresh;
1633
1634                         /*
1635                          * Need to rewind any previous receives if we cannot
1636                          * allocate new buffers to replenish the old ones.
1637                          */
1638                         rxq->rx_nb_avail = 0;
1639                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1640                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1641                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1642
1643                         return 0;
1644                 }
1645
1646                 /* update tail pointer */
1647                 rte_wmb();
1648                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1649                                             cur_free_trigger);
1650         }
1651
1652         if (rxq->rx_tail >= rxq->nb_rx_desc)
1653                 rxq->rx_tail = 0;
1654
1655         /* received any packets this loop? */
1656         if (rxq->rx_nb_avail)
1657                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1658
1659         return 0;
1660 }
1661
1662 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1663 uint16_t
1664 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1665                            uint16_t nb_pkts)
1666 {
1667         uint16_t nb_rx;
1668
1669         if (unlikely(nb_pkts == 0))
1670                 return 0;
1671
1672         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1673                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1674
1675         /* request is relatively large, chunk it up */
1676         nb_rx = 0;
1677         while (nb_pkts) {
1678                 uint16_t ret, n;
1679
1680                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1681                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1682                 nb_rx = (uint16_t)(nb_rx + ret);
1683                 nb_pkts = (uint16_t)(nb_pkts - ret);
1684                 if (ret < n)
1685                         break;
1686         }
1687
1688         return nb_rx;
1689 }
1690
1691 uint16_t
1692 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1693                 uint16_t nb_pkts)
1694 {
1695         struct ixgbe_rx_queue *rxq;
1696         volatile union ixgbe_adv_rx_desc *rx_ring;
1697         volatile union ixgbe_adv_rx_desc *rxdp;
1698         struct ixgbe_rx_entry *sw_ring;
1699         struct ixgbe_rx_entry *rxe;
1700         struct rte_mbuf *rxm;
1701         struct rte_mbuf *nmb;
1702         union ixgbe_adv_rx_desc rxd;
1703         uint64_t dma_addr;
1704         uint32_t staterr;
1705         uint32_t pkt_info;
1706         uint16_t pkt_len;
1707         uint16_t rx_id;
1708         uint16_t nb_rx;
1709         uint16_t nb_hold;
1710         uint64_t pkt_flags;
1711         uint64_t vlan_flags;
1712
1713         nb_rx = 0;
1714         nb_hold = 0;
1715         rxq = rx_queue;
1716         rx_id = rxq->rx_tail;
1717         rx_ring = rxq->rx_ring;
1718         sw_ring = rxq->sw_ring;
1719         vlan_flags = rxq->vlan_flags;
1720         while (nb_rx < nb_pkts) {
1721                 /*
1722                  * The order of operations here is important as the DD status
1723                  * bit must not be read after any other descriptor fields.
1724                  * rx_ring and rxdp are pointing to volatile data so the order
1725                  * of accesses cannot be reordered by the compiler. If they were
1726                  * not volatile, they could be reordered which could lead to
1727                  * using invalid descriptor fields when read from rxd.
1728                  */
1729                 rxdp = &rx_ring[rx_id];
1730                 staterr = rxdp->wb.upper.status_error;
1731                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1732                         break;
1733                 rxd = *rxdp;
1734
1735                 /*
1736                  * End of packet.
1737                  *
1738                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1739                  * is likely to be invalid and to be dropped by the various
1740                  * validation checks performed by the network stack.
1741                  *
1742                  * Allocate a new mbuf to replenish the RX ring descriptor.
1743                  * If the allocation fails:
1744                  *    - arrange for that RX descriptor to be the first one
1745                  *      being parsed the next time the receive function is
1746                  *      invoked [on the same queue].
1747                  *
1748                  *    - Stop parsing the RX ring and return immediately.
1749                  *
1750                  * This policy do not drop the packet received in the RX
1751                  * descriptor for which the allocation of a new mbuf failed.
1752                  * Thus, it allows that packet to be later retrieved if
1753                  * mbuf have been freed in the mean time.
1754                  * As a side effect, holding RX descriptors instead of
1755                  * systematically giving them back to the NIC may lead to
1756                  * RX ring exhaustion situations.
1757                  * However, the NIC can gracefully prevent such situations
1758                  * to happen by sending specific "back-pressure" flow control
1759                  * frames to its peer(s).
1760                  */
1761                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1762                            "ext_err_stat=0x%08x pkt_len=%u",
1763                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1764                            (unsigned) rx_id, (unsigned) staterr,
1765                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1766
1767                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1768                 if (nmb == NULL) {
1769                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1770                                    "queue_id=%u", (unsigned) rxq->port_id,
1771                                    (unsigned) rxq->queue_id);
1772                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1773                         break;
1774                 }
1775
1776                 nb_hold++;
1777                 rxe = &sw_ring[rx_id];
1778                 rx_id++;
1779                 if (rx_id == rxq->nb_rx_desc)
1780                         rx_id = 0;
1781
1782                 /* Prefetch next mbuf while processing current one. */
1783                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1784
1785                 /*
1786                  * When next RX descriptor is on a cache-line boundary,
1787                  * prefetch the next 4 RX descriptors and the next 8 pointers
1788                  * to mbufs.
1789                  */
1790                 if ((rx_id & 0x3) == 0) {
1791                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1792                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1793                 }
1794
1795                 rxm = rxe->mbuf;
1796                 rxe->mbuf = nmb;
1797                 dma_addr =
1798                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1799                 rxdp->read.hdr_addr = 0;
1800                 rxdp->read.pkt_addr = dma_addr;
1801
1802                 /*
1803                  * Initialize the returned mbuf.
1804                  * 1) setup generic mbuf fields:
1805                  *    - number of segments,
1806                  *    - next segment,
1807                  *    - packet length,
1808                  *    - RX port identifier.
1809                  * 2) integrate hardware offload data, if any:
1810                  *    - RSS flag & hash,
1811                  *    - IP checksum flag,
1812                  *    - VLAN TCI, if any,
1813                  *    - error flags.
1814                  */
1815                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1816                                       rxq->crc_len);
1817                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1818                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1819                 rxm->nb_segs = 1;
1820                 rxm->next = NULL;
1821                 rxm->pkt_len = pkt_len;
1822                 rxm->data_len = pkt_len;
1823                 rxm->port = rxq->port_id;
1824
1825                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1826                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1827                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1828
1829                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1830                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1831                 pkt_flags = pkt_flags |
1832                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1833                 rxm->ol_flags = pkt_flags;
1834                 rxm->packet_type =
1835                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1836                                                        rxq->pkt_type_mask);
1837
1838                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1839                         rxm->hash.rss = rte_le_to_cpu_32(
1840                                                 rxd.wb.lower.hi_dword.rss);
1841                 else if (pkt_flags & PKT_RX_FDIR) {
1842                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1843                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1844                                         IXGBE_ATR_HASH_MASK;
1845                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1846                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1847                 }
1848                 /*
1849                  * Store the mbuf address into the next entry of the array
1850                  * of returned packets.
1851                  */
1852                 rx_pkts[nb_rx++] = rxm;
1853         }
1854         rxq->rx_tail = rx_id;
1855
1856         /*
1857          * If the number of free RX descriptors is greater than the RX free
1858          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1859          * register.
1860          * Update the RDT with the value of the last processed RX descriptor
1861          * minus 1, to guarantee that the RDT register is never equal to the
1862          * RDH register, which creates a "full" ring situtation from the
1863          * hardware point of view...
1864          */
1865         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1866         if (nb_hold > rxq->rx_free_thresh) {
1867                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1868                            "nb_hold=%u nb_rx=%u",
1869                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1870                            (unsigned) rx_id, (unsigned) nb_hold,
1871                            (unsigned) nb_rx);
1872                 rx_id = (uint16_t) ((rx_id == 0) ?
1873                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1874                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1875                 nb_hold = 0;
1876         }
1877         rxq->nb_rx_hold = nb_hold;
1878         return nb_rx;
1879 }
1880
1881 /**
1882  * Detect an RSC descriptor.
1883  */
1884 static inline uint32_t
1885 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1886 {
1887         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1888                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1889 }
1890
1891 /**
1892  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1893  *
1894  * Fill the following info in the HEAD buffer of the Rx cluster:
1895  *    - RX port identifier
1896  *    - hardware offload data, if any:
1897  *      - RSS flag & hash
1898  *      - IP checksum flag
1899  *      - VLAN TCI, if any
1900  *      - error flags
1901  * @head HEAD of the packet cluster
1902  * @desc HW descriptor to get data from
1903  * @rxq Pointer to the Rx queue
1904  */
1905 static inline void
1906 ixgbe_fill_cluster_head_buf(
1907         struct rte_mbuf *head,
1908         union ixgbe_adv_rx_desc *desc,
1909         struct ixgbe_rx_queue *rxq,
1910         uint32_t staterr)
1911 {
1912         uint32_t pkt_info;
1913         uint64_t pkt_flags;
1914
1915         head->port = rxq->port_id;
1916
1917         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1918          * set in the pkt_flags field.
1919          */
1920         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1921         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1922         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1923         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1924         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1925         head->ol_flags = pkt_flags;
1926         head->packet_type =
1927                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1928
1929         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1930                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1931         else if (pkt_flags & PKT_RX_FDIR) {
1932                 head->hash.fdir.hash =
1933                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1934                                                           & IXGBE_ATR_HASH_MASK;
1935                 head->hash.fdir.id =
1936                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1937         }
1938 }
1939
1940 /**
1941  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1942  *
1943  * @rx_queue Rx queue handle
1944  * @rx_pkts table of received packets
1945  * @nb_pkts size of rx_pkts table
1946  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1947  *
1948  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1949  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1950  *
1951  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1952  * 1) When non-EOP RSC completion arrives:
1953  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1954  *       segment's data length.
1955  *    b) Set the "next" pointer of the current segment to point to the segment
1956  *       at the NEXTP index.
1957  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1958  *       in the sw_rsc_ring.
1959  * 2) When EOP arrives we just update the cluster's total length and offload
1960  *    flags and deliver the cluster up to the upper layers. In our case - put it
1961  *    in the rx_pkts table.
1962  *
1963  * Returns the number of received packets/clusters (according to the "bulk
1964  * receive" interface).
1965  */
1966 static inline uint16_t
1967 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1968                     bool bulk_alloc)
1969 {
1970         struct ixgbe_rx_queue *rxq = rx_queue;
1971         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1972         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1973         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1974         uint16_t rx_id = rxq->rx_tail;
1975         uint16_t nb_rx = 0;
1976         uint16_t nb_hold = rxq->nb_rx_hold;
1977         uint16_t prev_id = rxq->rx_tail;
1978
1979         while (nb_rx < nb_pkts) {
1980                 bool eop;
1981                 struct ixgbe_rx_entry *rxe;
1982                 struct ixgbe_scattered_rx_entry *sc_entry;
1983                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1984                 struct ixgbe_rx_entry *next_rxe = NULL;
1985                 struct rte_mbuf *first_seg;
1986                 struct rte_mbuf *rxm;
1987                 struct rte_mbuf *nmb;
1988                 union ixgbe_adv_rx_desc rxd;
1989                 uint16_t data_len;
1990                 uint16_t next_id;
1991                 volatile union ixgbe_adv_rx_desc *rxdp;
1992                 uint32_t staterr;
1993
1994 next_desc:
1995                 /*
1996                  * The code in this whole file uses the volatile pointer to
1997                  * ensure the read ordering of the status and the rest of the
1998                  * descriptor fields (on the compiler level only!!!). This is so
1999                  * UGLY - why not to just use the compiler barrier instead? DPDK
2000                  * even has the rte_compiler_barrier() for that.
2001                  *
2002                  * But most importantly this is just wrong because this doesn't
2003                  * ensure memory ordering in a general case at all. For
2004                  * instance, DPDK is supposed to work on Power CPUs where
2005                  * compiler barrier may just not be enough!
2006                  *
2007                  * I tried to write only this function properly to have a
2008                  * starting point (as a part of an LRO/RSC series) but the
2009                  * compiler cursed at me when I tried to cast away the
2010                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2011                  * keeping it the way it is for now.
2012                  *
2013                  * The code in this file is broken in so many other places and
2014                  * will just not work on a big endian CPU anyway therefore the
2015                  * lines below will have to be revisited together with the rest
2016                  * of the ixgbe PMD.
2017                  *
2018                  * TODO:
2019                  *    - Get rid of "volatile" crap and let the compiler do its
2020                  *      job.
2021                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2022                  *      memory ordering below.
2023                  */
2024                 rxdp = &rx_ring[rx_id];
2025                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2026
2027                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2028                         break;
2029
2030                 rxd = *rxdp;
2031
2032                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2033                                   "staterr=0x%x data_len=%u",
2034                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2035                            rte_le_to_cpu_16(rxd.wb.upper.length));
2036
2037                 if (!bulk_alloc) {
2038                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2039                         if (nmb == NULL) {
2040                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2041                                                   "port_id=%u queue_id=%u",
2042                                            rxq->port_id, rxq->queue_id);
2043
2044                                 rte_eth_devices[rxq->port_id].data->
2045                                                         rx_mbuf_alloc_failed++;
2046                                 break;
2047                         }
2048                 } else if (nb_hold > rxq->rx_free_thresh) {
2049                         uint16_t next_rdt = rxq->rx_free_trigger;
2050
2051                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2052                                 rte_wmb();
2053                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2054                                                             next_rdt);
2055                                 nb_hold -= rxq->rx_free_thresh;
2056                         } else {
2057                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2058                                                   "port_id=%u queue_id=%u",
2059                                            rxq->port_id, rxq->queue_id);
2060
2061                                 rte_eth_devices[rxq->port_id].data->
2062                                                         rx_mbuf_alloc_failed++;
2063                                 break;
2064                         }
2065                 }
2066
2067                 nb_hold++;
2068                 rxe = &sw_ring[rx_id];
2069                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2070
2071                 next_id = rx_id + 1;
2072                 if (next_id == rxq->nb_rx_desc)
2073                         next_id = 0;
2074
2075                 /* Prefetch next mbuf while processing current one. */
2076                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2077
2078                 /*
2079                  * When next RX descriptor is on a cache-line boundary,
2080                  * prefetch the next 4 RX descriptors and the next 4 pointers
2081                  * to mbufs.
2082                  */
2083                 if ((next_id & 0x3) == 0) {
2084                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2085                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2086                 }
2087
2088                 rxm = rxe->mbuf;
2089
2090                 if (!bulk_alloc) {
2091                         __le64 dma =
2092                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2093                         /*
2094                          * Update RX descriptor with the physical address of the
2095                          * new data buffer of the new allocated mbuf.
2096                          */
2097                         rxe->mbuf = nmb;
2098
2099                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2100                         rxdp->read.hdr_addr = 0;
2101                         rxdp->read.pkt_addr = dma;
2102                 } else
2103                         rxe->mbuf = NULL;
2104
2105                 /*
2106                  * Set data length & data buffer address of mbuf.
2107                  */
2108                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2109                 rxm->data_len = data_len;
2110
2111                 if (!eop) {
2112                         uint16_t nextp_id;
2113                         /*
2114                          * Get next descriptor index:
2115                          *  - For RSC it's in the NEXTP field.
2116                          *  - For a scattered packet - it's just a following
2117                          *    descriptor.
2118                          */
2119                         if (ixgbe_rsc_count(&rxd))
2120                                 nextp_id =
2121                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2122                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2123                         else
2124                                 nextp_id = next_id;
2125
2126                         next_sc_entry = &sw_sc_ring[nextp_id];
2127                         next_rxe = &sw_ring[nextp_id];
2128                         rte_ixgbe_prefetch(next_rxe);
2129                 }
2130
2131                 sc_entry = &sw_sc_ring[rx_id];
2132                 first_seg = sc_entry->fbuf;
2133                 sc_entry->fbuf = NULL;
2134
2135                 /*
2136                  * If this is the first buffer of the received packet,
2137                  * set the pointer to the first mbuf of the packet and
2138                  * initialize its context.
2139                  * Otherwise, update the total length and the number of segments
2140                  * of the current scattered packet, and update the pointer to
2141                  * the last mbuf of the current packet.
2142                  */
2143                 if (first_seg == NULL) {
2144                         first_seg = rxm;
2145                         first_seg->pkt_len = data_len;
2146                         first_seg->nb_segs = 1;
2147                 } else {
2148                         first_seg->pkt_len += data_len;
2149                         first_seg->nb_segs++;
2150                 }
2151
2152                 prev_id = rx_id;
2153                 rx_id = next_id;
2154
2155                 /*
2156                  * If this is not the last buffer of the received packet, update
2157                  * the pointer to the first mbuf at the NEXTP entry in the
2158                  * sw_sc_ring and continue to parse the RX ring.
2159                  */
2160                 if (!eop && next_rxe) {
2161                         rxm->next = next_rxe->mbuf;
2162                         next_sc_entry->fbuf = first_seg;
2163                         goto next_desc;
2164                 }
2165
2166                 /*
2167                  * This is the last buffer of the received packet - return
2168                  * the current cluster to the user.
2169                  */
2170                 rxm->next = NULL;
2171
2172                 /* Initialize the first mbuf of the returned packet */
2173                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2174
2175                 /*
2176                  * Deal with the case, when HW CRC srip is disabled.
2177                  * That can't happen when LRO is enabled, but still could
2178                  * happen for scattered RX mode.
2179                  */
2180                 first_seg->pkt_len -= rxq->crc_len;
2181                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2182                         struct rte_mbuf *lp;
2183
2184                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2185                                 ;
2186
2187                         first_seg->nb_segs--;
2188                         lp->data_len -= rxq->crc_len - rxm->data_len;
2189                         lp->next = NULL;
2190                         rte_pktmbuf_free_seg(rxm);
2191                 } else
2192                         rxm->data_len -= rxq->crc_len;
2193
2194                 /* Prefetch data of first segment, if configured to do so. */
2195                 rte_packet_prefetch((char *)first_seg->buf_addr +
2196                         first_seg->data_off);
2197
2198                 /*
2199                  * Store the mbuf address into the next entry of the array
2200                  * of returned packets.
2201                  */
2202                 rx_pkts[nb_rx++] = first_seg;
2203         }
2204
2205         /*
2206          * Record index of the next RX descriptor to probe.
2207          */
2208         rxq->rx_tail = rx_id;
2209
2210         /*
2211          * If the number of free RX descriptors is greater than the RX free
2212          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2213          * register.
2214          * Update the RDT with the value of the last processed RX descriptor
2215          * minus 1, to guarantee that the RDT register is never equal to the
2216          * RDH register, which creates a "full" ring situtation from the
2217          * hardware point of view...
2218          */
2219         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2220                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2221                            "nb_hold=%u nb_rx=%u",
2222                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2223
2224                 rte_wmb();
2225                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2226                 nb_hold = 0;
2227         }
2228
2229         rxq->nb_rx_hold = nb_hold;
2230         return nb_rx;
2231 }
2232
2233 uint16_t
2234 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2235                                  uint16_t nb_pkts)
2236 {
2237         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2238 }
2239
2240 uint16_t
2241 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2242                                uint16_t nb_pkts)
2243 {
2244         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2245 }
2246
2247 /*********************************************************************
2248  *
2249  *  Queue management functions
2250  *
2251  **********************************************************************/
2252
2253 static void __attribute__((cold))
2254 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2255 {
2256         unsigned i;
2257
2258         if (txq->sw_ring != NULL) {
2259                 for (i = 0; i < txq->nb_tx_desc; i++) {
2260                         if (txq->sw_ring[i].mbuf != NULL) {
2261                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2262                                 txq->sw_ring[i].mbuf = NULL;
2263                         }
2264                 }
2265         }
2266 }
2267
2268 static void __attribute__((cold))
2269 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2270 {
2271         if (txq != NULL &&
2272             txq->sw_ring != NULL)
2273                 rte_free(txq->sw_ring);
2274 }
2275
2276 static void __attribute__((cold))
2277 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2278 {
2279         if (txq != NULL && txq->ops != NULL) {
2280                 txq->ops->release_mbufs(txq);
2281                 txq->ops->free_swring(txq);
2282                 rte_free(txq);
2283         }
2284 }
2285
2286 void __attribute__((cold))
2287 ixgbe_dev_tx_queue_release(void *txq)
2288 {
2289         ixgbe_tx_queue_release(txq);
2290 }
2291
2292 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2293 static void __attribute__((cold))
2294 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2295 {
2296         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2297         struct ixgbe_tx_entry *txe = txq->sw_ring;
2298         uint16_t prev, i;
2299
2300         /* Zero out HW ring memory */
2301         for (i = 0; i < txq->nb_tx_desc; i++) {
2302                 txq->tx_ring[i] = zeroed_desc;
2303         }
2304
2305         /* Initialize SW ring entries */
2306         prev = (uint16_t) (txq->nb_tx_desc - 1);
2307         for (i = 0; i < txq->nb_tx_desc; i++) {
2308                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2309
2310                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2311                 txe[i].mbuf = NULL;
2312                 txe[i].last_id = i;
2313                 txe[prev].next_id = i;
2314                 prev = i;
2315         }
2316
2317         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2318         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2319
2320         txq->tx_tail = 0;
2321         txq->nb_tx_used = 0;
2322         /*
2323          * Always allow 1 descriptor to be un-allocated to avoid
2324          * a H/W race condition
2325          */
2326         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2327         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2328         txq->ctx_curr = 0;
2329         memset((void *)&txq->ctx_cache, 0,
2330                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2331 }
2332
2333 static const struct ixgbe_txq_ops def_txq_ops = {
2334         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2335         .free_swring = ixgbe_tx_free_swring,
2336         .reset = ixgbe_reset_tx_queue,
2337 };
2338
2339 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2340  * the queue parameters. Used in tx_queue_setup by primary process and then
2341  * in dev_init by secondary process when attaching to an existing ethdev.
2342  */
2343 void __attribute__((cold))
2344 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2345 {
2346         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2347         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2348                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2349                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2350                 dev->tx_pkt_prepare = NULL;
2351 #ifdef RTE_IXGBE_INC_VECTOR
2352                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2353                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2354                                         ixgbe_txq_vec_setup(txq) == 0)) {
2355                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2356                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2357                 } else
2358 #endif
2359                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2360         } else {
2361                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2362                 PMD_INIT_LOG(DEBUG,
2363                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2364                                 (unsigned long)txq->txq_flags,
2365                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2366                 PMD_INIT_LOG(DEBUG,
2367                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2368                                 (unsigned long)txq->tx_rs_thresh,
2369                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2370                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2371                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2372         }
2373 }
2374
2375 int __attribute__((cold))
2376 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2377                          uint16_t queue_idx,
2378                          uint16_t nb_desc,
2379                          unsigned int socket_id,
2380                          const struct rte_eth_txconf *tx_conf)
2381 {
2382         const struct rte_memzone *tz;
2383         struct ixgbe_tx_queue *txq;
2384         struct ixgbe_hw     *hw;
2385         uint16_t tx_rs_thresh, tx_free_thresh;
2386
2387         PMD_INIT_FUNC_TRACE();
2388         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2389
2390         /*
2391          * Validate number of transmit descriptors.
2392          * It must not exceed hardware maximum, and must be multiple
2393          * of IXGBE_ALIGN.
2394          */
2395         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2396                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2397                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2398                 return -EINVAL;
2399         }
2400
2401         /*
2402          * The following two parameters control the setting of the RS bit on
2403          * transmit descriptors.
2404          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2405          * descriptors have been used.
2406          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2407          * descriptors are used or if the number of descriptors required
2408          * to transmit a packet is greater than the number of free TX
2409          * descriptors.
2410          * The following constraints must be satisfied:
2411          *  tx_rs_thresh must be greater than 0.
2412          *  tx_rs_thresh must be less than the size of the ring minus 2.
2413          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2414          *  tx_rs_thresh must be a divisor of the ring size.
2415          *  tx_free_thresh must be greater than 0.
2416          *  tx_free_thresh must be less than the size of the ring minus 3.
2417          * One descriptor in the TX ring is used as a sentinel to avoid a
2418          * H/W race condition, hence the maximum threshold constraints.
2419          * When set to zero use default values.
2420          */
2421         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2422                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2423         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2424                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2425         if (tx_rs_thresh >= (nb_desc - 2)) {
2426                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2427                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2428                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2429                         (int)dev->data->port_id, (int)queue_idx);
2430                 return -(EINVAL);
2431         }
2432         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2433                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2434                         "(tx_rs_thresh=%u port=%d queue=%d)",
2435                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2436                         (int)dev->data->port_id, (int)queue_idx);
2437                 return -(EINVAL);
2438         }
2439         if (tx_free_thresh >= (nb_desc - 3)) {
2440                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2441                              "tx_free_thresh must be less than the number of "
2442                              "TX descriptors minus 3. (tx_free_thresh=%u "
2443                              "port=%d queue=%d)",
2444                              (unsigned int)tx_free_thresh,
2445                              (int)dev->data->port_id, (int)queue_idx);
2446                 return -(EINVAL);
2447         }
2448         if (tx_rs_thresh > tx_free_thresh) {
2449                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2450                              "tx_free_thresh. (tx_free_thresh=%u "
2451                              "tx_rs_thresh=%u port=%d queue=%d)",
2452                              (unsigned int)tx_free_thresh,
2453                              (unsigned int)tx_rs_thresh,
2454                              (int)dev->data->port_id,
2455                              (int)queue_idx);
2456                 return -(EINVAL);
2457         }
2458         if ((nb_desc % tx_rs_thresh) != 0) {
2459                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2460                              "number of TX descriptors. (tx_rs_thresh=%u "
2461                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2462                              (int)dev->data->port_id, (int)queue_idx);
2463                 return -(EINVAL);
2464         }
2465
2466         /*
2467          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2468          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2469          * by the NIC and all descriptors are written back after the NIC
2470          * accumulates WTHRESH descriptors.
2471          */
2472         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2473                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2474                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2475                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2476                              (int)dev->data->port_id, (int)queue_idx);
2477                 return -(EINVAL);
2478         }
2479
2480         /* Free memory prior to re-allocation if needed... */
2481         if (dev->data->tx_queues[queue_idx] != NULL) {
2482                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2483                 dev->data->tx_queues[queue_idx] = NULL;
2484         }
2485
2486         /* First allocate the tx queue data structure */
2487         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2488                                  RTE_CACHE_LINE_SIZE, socket_id);
2489         if (txq == NULL)
2490                 return -ENOMEM;
2491
2492         /*
2493          * Allocate TX ring hardware descriptors. A memzone large enough to
2494          * handle the maximum ring size is allocated in order to allow for
2495          * resizing in later calls to the queue setup function.
2496          */
2497         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2498                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2499                         IXGBE_ALIGN, socket_id);
2500         if (tz == NULL) {
2501                 ixgbe_tx_queue_release(txq);
2502                 return -ENOMEM;
2503         }
2504
2505         txq->nb_tx_desc = nb_desc;
2506         txq->tx_rs_thresh = tx_rs_thresh;
2507         txq->tx_free_thresh = tx_free_thresh;
2508         txq->pthresh = tx_conf->tx_thresh.pthresh;
2509         txq->hthresh = tx_conf->tx_thresh.hthresh;
2510         txq->wthresh = tx_conf->tx_thresh.wthresh;
2511         txq->queue_id = queue_idx;
2512         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2513                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2514         txq->port_id = dev->data->port_id;
2515         txq->txq_flags = tx_conf->txq_flags;
2516         txq->ops = &def_txq_ops;
2517         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2518
2519         /*
2520          * Modification to set VFTDT for virtual function if vf is detected
2521          */
2522         if (hw->mac.type == ixgbe_mac_82599_vf ||
2523             hw->mac.type == ixgbe_mac_X540_vf ||
2524             hw->mac.type == ixgbe_mac_X550_vf ||
2525             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2526             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2527                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2528         else
2529                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2530
2531         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2532         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2533
2534         /* Allocate software ring */
2535         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2536                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2537                                 RTE_CACHE_LINE_SIZE, socket_id);
2538         if (txq->sw_ring == NULL) {
2539                 ixgbe_tx_queue_release(txq);
2540                 return -ENOMEM;
2541         }
2542         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2543                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2544
2545         /* set up vector or scalar TX function as appropriate */
2546         ixgbe_set_tx_function(dev, txq);
2547
2548         txq->ops->reset(txq);
2549
2550         dev->data->tx_queues[queue_idx] = txq;
2551
2552
2553         return 0;
2554 }
2555
2556 /**
2557  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2558  *
2559  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2560  * in the sw_rsc_ring is not set to NULL but rather points to the next
2561  * mbuf of this RSC aggregation (that has not been completed yet and still
2562  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2563  * will just free first "nb_segs" segments of the cluster explicitly by calling
2564  * an rte_pktmbuf_free_seg().
2565  *
2566  * @m scattered cluster head
2567  */
2568 static void __attribute__((cold))
2569 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2570 {
2571         uint8_t i, nb_segs = m->nb_segs;
2572         struct rte_mbuf *next_seg;
2573
2574         for (i = 0; i < nb_segs; i++) {
2575                 next_seg = m->next;
2576                 rte_pktmbuf_free_seg(m);
2577                 m = next_seg;
2578         }
2579 }
2580
2581 static void __attribute__((cold))
2582 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2583 {
2584         unsigned i;
2585
2586 #ifdef RTE_IXGBE_INC_VECTOR
2587         /* SSE Vector driver has a different way of releasing mbufs. */
2588         if (rxq->rx_using_sse) {
2589                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2590                 return;
2591         }
2592 #endif
2593
2594         if (rxq->sw_ring != NULL) {
2595                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2596                         if (rxq->sw_ring[i].mbuf != NULL) {
2597                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2598                                 rxq->sw_ring[i].mbuf = NULL;
2599                         }
2600                 }
2601                 if (rxq->rx_nb_avail) {
2602                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2603                                 struct rte_mbuf *mb;
2604
2605                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2606                                 rte_pktmbuf_free_seg(mb);
2607                         }
2608                         rxq->rx_nb_avail = 0;
2609                 }
2610         }
2611
2612         if (rxq->sw_sc_ring)
2613                 for (i = 0; i < rxq->nb_rx_desc; i++)
2614                         if (rxq->sw_sc_ring[i].fbuf) {
2615                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2616                                 rxq->sw_sc_ring[i].fbuf = NULL;
2617                         }
2618 }
2619
2620 static void __attribute__((cold))
2621 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2622 {
2623         if (rxq != NULL) {
2624                 ixgbe_rx_queue_release_mbufs(rxq);
2625                 rte_free(rxq->sw_ring);
2626                 rte_free(rxq->sw_sc_ring);
2627                 rte_free(rxq);
2628         }
2629 }
2630
2631 void __attribute__((cold))
2632 ixgbe_dev_rx_queue_release(void *rxq)
2633 {
2634         ixgbe_rx_queue_release(rxq);
2635 }
2636
2637 /*
2638  * Check if Rx Burst Bulk Alloc function can be used.
2639  * Return
2640  *        0: the preconditions are satisfied and the bulk allocation function
2641  *           can be used.
2642  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2643  *           function must be used.
2644  */
2645 static inline int __attribute__((cold))
2646 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2647 {
2648         int ret = 0;
2649
2650         /*
2651          * Make sure the following pre-conditions are satisfied:
2652          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2653          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2654          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2655          * Scattered packets are not supported.  This should be checked
2656          * outside of this function.
2657          */
2658         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2659                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2660                              "rxq->rx_free_thresh=%d, "
2661                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2662                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2663                 ret = -EINVAL;
2664         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2665                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2666                              "rxq->rx_free_thresh=%d, "
2667                              "rxq->nb_rx_desc=%d",
2668                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2669                 ret = -EINVAL;
2670         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2671                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2672                              "rxq->nb_rx_desc=%d, "
2673                              "rxq->rx_free_thresh=%d",
2674                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2675                 ret = -EINVAL;
2676         }
2677
2678         return ret;
2679 }
2680
2681 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2682 static void __attribute__((cold))
2683 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2684 {
2685         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2686         unsigned i;
2687         uint16_t len = rxq->nb_rx_desc;
2688
2689         /*
2690          * By default, the Rx queue setup function allocates enough memory for
2691          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2692          * extra memory at the end of the descriptor ring to be zero'd out.
2693          */
2694         if (adapter->rx_bulk_alloc_allowed)
2695                 /* zero out extra memory */
2696                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2697
2698         /*
2699          * Zero out HW ring memory. Zero out extra memory at the end of
2700          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2701          * reads extra memory as zeros.
2702          */
2703         for (i = 0; i < len; i++) {
2704                 rxq->rx_ring[i] = zeroed_desc;
2705         }
2706
2707         /*
2708          * initialize extra software ring entries. Space for these extra
2709          * entries is always allocated
2710          */
2711         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2712         for (i = rxq->nb_rx_desc; i < len; ++i) {
2713                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2714         }
2715
2716         rxq->rx_nb_avail = 0;
2717         rxq->rx_next_avail = 0;
2718         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2719         rxq->rx_tail = 0;
2720         rxq->nb_rx_hold = 0;
2721         rxq->pkt_first_seg = NULL;
2722         rxq->pkt_last_seg = NULL;
2723
2724 #ifdef RTE_IXGBE_INC_VECTOR
2725         rxq->rxrearm_start = 0;
2726         rxq->rxrearm_nb = 0;
2727 #endif
2728 }
2729
2730 int __attribute__((cold))
2731 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2732                          uint16_t queue_idx,
2733                          uint16_t nb_desc,
2734                          unsigned int socket_id,
2735                          const struct rte_eth_rxconf *rx_conf,
2736                          struct rte_mempool *mp)
2737 {
2738         const struct rte_memzone *rz;
2739         struct ixgbe_rx_queue *rxq;
2740         struct ixgbe_hw     *hw;
2741         uint16_t len;
2742         struct ixgbe_adapter *adapter =
2743                 (struct ixgbe_adapter *)dev->data->dev_private;
2744
2745         PMD_INIT_FUNC_TRACE();
2746         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2747
2748         /*
2749          * Validate number of receive descriptors.
2750          * It must not exceed hardware maximum, and must be multiple
2751          * of IXGBE_ALIGN.
2752          */
2753         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2754                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2755                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2756                 return -EINVAL;
2757         }
2758
2759         /* Free memory prior to re-allocation if needed... */
2760         if (dev->data->rx_queues[queue_idx] != NULL) {
2761                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2762                 dev->data->rx_queues[queue_idx] = NULL;
2763         }
2764
2765         /* First allocate the rx queue data structure */
2766         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2767                                  RTE_CACHE_LINE_SIZE, socket_id);
2768         if (rxq == NULL)
2769                 return -ENOMEM;
2770         rxq->mb_pool = mp;
2771         rxq->nb_rx_desc = nb_desc;
2772         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2773         rxq->queue_id = queue_idx;
2774         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2775                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2776         rxq->port_id = dev->data->port_id;
2777         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2778                                                         0 : ETHER_CRC_LEN);
2779         rxq->drop_en = rx_conf->rx_drop_en;
2780         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2781
2782         /*
2783          * The packet type in RX descriptor is different for different NICs.
2784          * Some bits are used for x550 but reserved for other NICS.
2785          * So set different masks for different NICs.
2786          */
2787         if (hw->mac.type == ixgbe_mac_X550 ||
2788             hw->mac.type == ixgbe_mac_X550EM_x ||
2789             hw->mac.type == ixgbe_mac_X550EM_a ||
2790             hw->mac.type == ixgbe_mac_X550_vf ||
2791             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2792             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2793                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2794         else
2795                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2796
2797         /*
2798          * Allocate RX ring hardware descriptors. A memzone large enough to
2799          * handle the maximum ring size is allocated in order to allow for
2800          * resizing in later calls to the queue setup function.
2801          */
2802         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2803                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2804         if (rz == NULL) {
2805                 ixgbe_rx_queue_release(rxq);
2806                 return -ENOMEM;
2807         }
2808
2809         /*
2810          * Zero init all the descriptors in the ring.
2811          */
2812         memset(rz->addr, 0, RX_RING_SZ);
2813
2814         /*
2815          * Modified to setup VFRDT for Virtual Function
2816          */
2817         if (hw->mac.type == ixgbe_mac_82599_vf ||
2818             hw->mac.type == ixgbe_mac_X540_vf ||
2819             hw->mac.type == ixgbe_mac_X550_vf ||
2820             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2821             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2822                 rxq->rdt_reg_addr =
2823                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2824                 rxq->rdh_reg_addr =
2825                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2826         } else {
2827                 rxq->rdt_reg_addr =
2828                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2829                 rxq->rdh_reg_addr =
2830                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2831         }
2832
2833         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2834         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2835
2836         /*
2837          * Certain constraints must be met in order to use the bulk buffer
2838          * allocation Rx burst function. If any of Rx queues doesn't meet them
2839          * the feature should be disabled for the whole port.
2840          */
2841         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2842                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2843                                     "preconditions - canceling the feature for "
2844                                     "the whole port[%d]",
2845                              rxq->queue_id, rxq->port_id);
2846                 adapter->rx_bulk_alloc_allowed = false;
2847         }
2848
2849         /*
2850          * Allocate software ring. Allow for space at the end of the
2851          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2852          * function does not access an invalid memory region.
2853          */
2854         len = nb_desc;
2855         if (adapter->rx_bulk_alloc_allowed)
2856                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2857
2858         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2859                                           sizeof(struct ixgbe_rx_entry) * len,
2860                                           RTE_CACHE_LINE_SIZE, socket_id);
2861         if (!rxq->sw_ring) {
2862                 ixgbe_rx_queue_release(rxq);
2863                 return -ENOMEM;
2864         }
2865
2866         /*
2867          * Always allocate even if it's not going to be needed in order to
2868          * simplify the code.
2869          *
2870          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2871          * be requested in ixgbe_dev_rx_init(), which is called later from
2872          * dev_start() flow.
2873          */
2874         rxq->sw_sc_ring =
2875                 rte_zmalloc_socket("rxq->sw_sc_ring",
2876                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2877                                    RTE_CACHE_LINE_SIZE, socket_id);
2878         if (!rxq->sw_sc_ring) {
2879                 ixgbe_rx_queue_release(rxq);
2880                 return -ENOMEM;
2881         }
2882
2883         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2884                             "dma_addr=0x%"PRIx64,
2885                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2886                      rxq->rx_ring_phys_addr);
2887
2888         if (!rte_is_power_of_2(nb_desc)) {
2889                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2890                                     "preconditions - canceling the feature for "
2891                                     "the whole port[%d]",
2892                              rxq->queue_id, rxq->port_id);
2893                 adapter->rx_vec_allowed = false;
2894         } else
2895                 ixgbe_rxq_vec_setup(rxq);
2896
2897         dev->data->rx_queues[queue_idx] = rxq;
2898
2899         ixgbe_reset_rx_queue(adapter, rxq);
2900
2901         return 0;
2902 }
2903
2904 uint32_t
2905 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2906 {
2907 #define IXGBE_RXQ_SCAN_INTERVAL 4
2908         volatile union ixgbe_adv_rx_desc *rxdp;
2909         struct ixgbe_rx_queue *rxq;
2910         uint32_t desc = 0;
2911
2912         if (rx_queue_id >= dev->data->nb_rx_queues) {
2913                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2914                 return 0;
2915         }
2916
2917         rxq = dev->data->rx_queues[rx_queue_id];
2918         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2919
2920         while ((desc < rxq->nb_rx_desc) &&
2921                 (rxdp->wb.upper.status_error &
2922                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2923                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2924                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2925                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2926                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2927                                 desc - rxq->nb_rx_desc]);
2928         }
2929
2930         return desc;
2931 }
2932
2933 int
2934 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2935 {
2936         volatile union ixgbe_adv_rx_desc *rxdp;
2937         struct ixgbe_rx_queue *rxq = rx_queue;
2938         uint32_t desc;
2939
2940         if (unlikely(offset >= rxq->nb_rx_desc))
2941                 return 0;
2942         desc = rxq->rx_tail + offset;
2943         if (desc >= rxq->nb_rx_desc)
2944                 desc -= rxq->nb_rx_desc;
2945
2946         rxdp = &rxq->rx_ring[desc];
2947         return !!(rxdp->wb.upper.status_error &
2948                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2949 }
2950
2951 void __attribute__((cold))
2952 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2953 {
2954         unsigned i;
2955         struct ixgbe_adapter *adapter =
2956                 (struct ixgbe_adapter *)dev->data->dev_private;
2957
2958         PMD_INIT_FUNC_TRACE();
2959
2960         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2961                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2962
2963                 if (txq != NULL) {
2964                         txq->ops->release_mbufs(txq);
2965                         txq->ops->reset(txq);
2966                 }
2967         }
2968
2969         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2970                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2971
2972                 if (rxq != NULL) {
2973                         ixgbe_rx_queue_release_mbufs(rxq);
2974                         ixgbe_reset_rx_queue(adapter, rxq);
2975                 }
2976         }
2977 }
2978
2979 void
2980 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2981 {
2982         unsigned i;
2983
2984         PMD_INIT_FUNC_TRACE();
2985
2986         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2987                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2988                 dev->data->rx_queues[i] = NULL;
2989         }
2990         dev->data->nb_rx_queues = 0;
2991
2992         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2993                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2994                 dev->data->tx_queues[i] = NULL;
2995         }
2996         dev->data->nb_tx_queues = 0;
2997 }
2998
2999 /*********************************************************************
3000  *
3001  *  Device RX/TX init functions
3002  *
3003  **********************************************************************/
3004
3005 /**
3006  * Receive Side Scaling (RSS)
3007  * See section 7.1.2.8 in the following document:
3008  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3009  *
3010  * Principles:
3011  * The source and destination IP addresses of the IP header and the source
3012  * and destination ports of TCP/UDP headers, if any, of received packets are
3013  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3014  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3015  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3016  * RSS output index which is used as the RX queue index where to store the
3017  * received packets.
3018  * The following output is supplied in the RX write-back descriptor:
3019  *     - 32-bit result of the Microsoft RSS hash function,
3020  *     - 4-bit RSS type field.
3021  */
3022
3023 /*
3024  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3025  * Used as the default key.
3026  */
3027 static uint8_t rss_intel_key[40] = {
3028         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3029         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3030         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3031         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3032         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3033 };
3034
3035 static void
3036 ixgbe_rss_disable(struct rte_eth_dev *dev)
3037 {
3038         struct ixgbe_hw *hw;
3039         uint32_t mrqc;
3040         uint32_t mrqc_reg;
3041
3042         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3043         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3044         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3045         mrqc &= ~IXGBE_MRQC_RSSEN;
3046         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3047 }
3048
3049 static void
3050 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3051 {
3052         uint8_t  *hash_key;
3053         uint32_t mrqc;
3054         uint32_t rss_key;
3055         uint64_t rss_hf;
3056         uint16_t i;
3057         uint32_t mrqc_reg;
3058         uint32_t rssrk_reg;
3059
3060         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3061         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3062
3063         hash_key = rss_conf->rss_key;
3064         if (hash_key != NULL) {
3065                 /* Fill in RSS hash key */
3066                 for (i = 0; i < 10; i++) {
3067                         rss_key  = hash_key[(i * 4)];
3068                         rss_key |= hash_key[(i * 4) + 1] << 8;
3069                         rss_key |= hash_key[(i * 4) + 2] << 16;
3070                         rss_key |= hash_key[(i * 4) + 3] << 24;
3071                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3072                 }
3073         }
3074
3075         /* Set configured hashing protocols in MRQC register */
3076         rss_hf = rss_conf->rss_hf;
3077         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3078         if (rss_hf & ETH_RSS_IPV4)
3079                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3080         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3081                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3082         if (rss_hf & ETH_RSS_IPV6)
3083                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3084         if (rss_hf & ETH_RSS_IPV6_EX)
3085                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3086         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3087                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3088         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3089                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3090         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3091                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3092         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3093                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3094         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3095                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3096         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3097 }
3098
3099 int
3100 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3101                           struct rte_eth_rss_conf *rss_conf)
3102 {
3103         struct ixgbe_hw *hw;
3104         uint32_t mrqc;
3105         uint64_t rss_hf;
3106         uint32_t mrqc_reg;
3107
3108         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3109
3110         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3111                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3112                         "NIC.");
3113                 return -ENOTSUP;
3114         }
3115         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3116
3117         /*
3118          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3119          *     "RSS enabling cannot be done dynamically while it must be
3120          *      preceded by a software reset"
3121          * Before changing anything, first check that the update RSS operation
3122          * does not attempt to disable RSS, if RSS was enabled at
3123          * initialization time, or does not attempt to enable RSS, if RSS was
3124          * disabled at initialization time.
3125          */
3126         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3127         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3128         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3129                 if (rss_hf != 0) /* Enable RSS */
3130                         return -(EINVAL);
3131                 return 0; /* Nothing to do */
3132         }
3133         /* RSS enabled */
3134         if (rss_hf == 0) /* Disable RSS */
3135                 return -(EINVAL);
3136         ixgbe_hw_rss_hash_set(hw, rss_conf);
3137         return 0;
3138 }
3139
3140 int
3141 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3142                             struct rte_eth_rss_conf *rss_conf)
3143 {
3144         struct ixgbe_hw *hw;
3145         uint8_t *hash_key;
3146         uint32_t mrqc;
3147         uint32_t rss_key;
3148         uint64_t rss_hf;
3149         uint16_t i;
3150         uint32_t mrqc_reg;
3151         uint32_t rssrk_reg;
3152
3153         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3154         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3155         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3156         hash_key = rss_conf->rss_key;
3157         if (hash_key != NULL) {
3158                 /* Return RSS hash key */
3159                 for (i = 0; i < 10; i++) {
3160                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3161                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3162                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3163                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3164                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3165                 }
3166         }
3167
3168         /* Get RSS functions configured in MRQC register */
3169         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3170         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3171                 rss_conf->rss_hf = 0;
3172                 return 0;
3173         }
3174         rss_hf = 0;
3175         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3176                 rss_hf |= ETH_RSS_IPV4;
3177         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3178                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3179         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3180                 rss_hf |= ETH_RSS_IPV6;
3181         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3182                 rss_hf |= ETH_RSS_IPV6_EX;
3183         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3184                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3185         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3186                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3187         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3188                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3189         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3190                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3191         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3192                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3193         rss_conf->rss_hf = rss_hf;
3194         return 0;
3195 }
3196
3197 static void
3198 ixgbe_rss_configure(struct rte_eth_dev *dev)
3199 {
3200         struct rte_eth_rss_conf rss_conf;
3201         struct ixgbe_hw *hw;
3202         uint32_t reta;
3203         uint16_t i;
3204         uint16_t j;
3205         uint16_t sp_reta_size;
3206         uint32_t reta_reg;
3207
3208         PMD_INIT_FUNC_TRACE();
3209         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3210
3211         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3212
3213         /*
3214          * Fill in redirection table
3215          * The byte-swap is needed because NIC registers are in
3216          * little-endian order.
3217          */
3218         reta = 0;
3219         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3220                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3221
3222                 if (j == dev->data->nb_rx_queues)
3223                         j = 0;
3224                 reta = (reta << 8) | j;
3225                 if ((i & 3) == 3)
3226                         IXGBE_WRITE_REG(hw, reta_reg,
3227                                         rte_bswap32(reta));
3228         }
3229
3230         /*
3231          * Configure the RSS key and the RSS protocols used to compute
3232          * the RSS hash of input packets.
3233          */
3234         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3235         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3236                 ixgbe_rss_disable(dev);
3237                 return;
3238         }
3239         if (rss_conf.rss_key == NULL)
3240                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3241         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3242 }
3243
3244 #define NUM_VFTA_REGISTERS 128
3245 #define NIC_RX_BUFFER_SIZE 0x200
3246 #define X550_RX_BUFFER_SIZE 0x180
3247
3248 static void
3249 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3250 {
3251         struct rte_eth_vmdq_dcb_conf *cfg;
3252         struct ixgbe_hw *hw;
3253         enum rte_eth_nb_pools num_pools;
3254         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3255         uint16_t pbsize;
3256         uint8_t nb_tcs; /* number of traffic classes */
3257         int i;
3258
3259         PMD_INIT_FUNC_TRACE();
3260         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3261         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3262         num_pools = cfg->nb_queue_pools;
3263         /* Check we have a valid number of pools */
3264         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3265                 ixgbe_rss_disable(dev);
3266                 return;
3267         }
3268         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3269         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3270
3271         /*
3272          * RXPBSIZE
3273          * split rx buffer up into sections, each for 1 traffic class
3274          */
3275         switch (hw->mac.type) {
3276         case ixgbe_mac_X550:
3277         case ixgbe_mac_X550EM_x:
3278         case ixgbe_mac_X550EM_a:
3279                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3280                 break;
3281         default:
3282                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3283                 break;
3284         }
3285         for (i = 0; i < nb_tcs; i++) {
3286                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3287
3288                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3289                 /* clear 10 bits. */
3290                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3291                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3292         }
3293         /* zero alloc all unused TCs */
3294         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3295                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3296
3297                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3298                 /* clear 10 bits. */
3299                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3300         }
3301
3302         /* MRQC: enable vmdq and dcb */
3303         mrqc = (num_pools == ETH_16_POOLS) ?
3304                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3305         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3306
3307         /* PFVTCTL: turn on virtualisation and set the default pool */
3308         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3309         if (cfg->enable_default_pool) {
3310                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3311         } else {
3312                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3313         }
3314
3315         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3316
3317         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3318         queue_mapping = 0;
3319         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3320                 /*
3321                  * mapping is done with 3 bits per priority,
3322                  * so shift by i*3 each time
3323                  */
3324                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3325
3326         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3327
3328         /* RTRPCS: DCB related */
3329         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3330
3331         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3332         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3333         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3334         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3335
3336         /* VFTA - enable all vlan filters */
3337         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3338                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3339         }
3340
3341         /* VFRE: pool enabling for receive - 16 or 32 */
3342         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3343                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3344
3345         /*
3346          * MPSAR - allow pools to read specific mac addresses
3347          * In this case, all pools should be able to read from mac addr 0
3348          */
3349         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3350         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3351
3352         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3353         for (i = 0; i < cfg->nb_pool_maps; i++) {
3354                 /* set vlan id in VF register and set the valid bit */
3355                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3356                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3357                 /*
3358                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3359                  * pools, we only need to use the first half of the register
3360                  * i.e. bits 0-31
3361                  */
3362                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3363         }
3364 }
3365
3366 /**
3367  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3368  * @dev: pointer to eth_dev structure
3369  * @dcb_config: pointer to ixgbe_dcb_config structure
3370  */
3371 static void
3372 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3373                        struct ixgbe_dcb_config *dcb_config)
3374 {
3375         uint32_t reg;
3376         uint32_t q;
3377         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3378
3379         PMD_INIT_FUNC_TRACE();
3380         if (hw->mac.type != ixgbe_mac_82598EB) {
3381                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3382                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3383                 reg |= IXGBE_RTTDCS_ARBDIS;
3384                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3385
3386                 /* Enable DCB for Tx with 8 TCs */
3387                 if (dcb_config->num_tcs.pg_tcs == 8) {
3388                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3389                 } else {
3390                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3391                 }
3392                 if (dcb_config->vt_mode)
3393                         reg |= IXGBE_MTQC_VT_ENA;
3394                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3395
3396                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3397                         /* Disable drop for all queues in VMDQ mode*/
3398                         for (q = 0; q < 128; q++)
3399                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3400                                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3401                 } else {
3402                         /* Enable drop for all queues in SRIOV mode */
3403                         for (q = 0; q < 128; q++)
3404                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3405                                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT) | IXGBE_QDE_ENABLE));
3406                 }
3407
3408                 /* Enable the Tx desc arbiter */
3409                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3410                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3411                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3412
3413                 /* Enable Security TX Buffer IFG for DCB */
3414                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3415                 reg |= IXGBE_SECTX_DCB;
3416                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3417         }
3418 }
3419
3420 /**
3421  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3422  * @dev: pointer to rte_eth_dev structure
3423  * @dcb_config: pointer to ixgbe_dcb_config structure
3424  */
3425 static void
3426 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3427                         struct ixgbe_dcb_config *dcb_config)
3428 {
3429         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3430                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3431         struct ixgbe_hw *hw =
3432                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3433
3434         PMD_INIT_FUNC_TRACE();
3435         if (hw->mac.type != ixgbe_mac_82598EB)
3436                 /*PF VF Transmit Enable*/
3437                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3438                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3439
3440         /*Configure general DCB TX parameters*/
3441         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3442 }
3443
3444 static void
3445 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3446                         struct ixgbe_dcb_config *dcb_config)
3447 {
3448         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3449                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3450         struct ixgbe_dcb_tc_config *tc;
3451         uint8_t i, j;
3452
3453         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3454         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3455                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3456                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3457         } else {
3458                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3459                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3460         }
3461         /* User Priority to Traffic Class mapping */
3462         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3463                 j = vmdq_rx_conf->dcb_tc[i];
3464                 tc = &dcb_config->tc_config[j];
3465                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3466                                                 (uint8_t)(1 << j);
3467         }
3468 }
3469
3470 static void
3471 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3472                         struct ixgbe_dcb_config *dcb_config)
3473 {
3474         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3475                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3476         struct ixgbe_dcb_tc_config *tc;
3477         uint8_t i, j;
3478
3479         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3480         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3481                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3482                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3483         } else {
3484                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3485                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3486         }
3487
3488         /* User Priority to Traffic Class mapping */
3489         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3490                 j = vmdq_tx_conf->dcb_tc[i];
3491                 tc = &dcb_config->tc_config[j];
3492                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3493                                                 (uint8_t)(1 << j);
3494         }
3495 }
3496
3497 static void
3498 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3499                 struct ixgbe_dcb_config *dcb_config)
3500 {
3501         struct rte_eth_dcb_rx_conf *rx_conf =
3502                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3503         struct ixgbe_dcb_tc_config *tc;
3504         uint8_t i, j;
3505
3506         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3507         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3508
3509         /* User Priority to Traffic Class mapping */
3510         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3511                 j = rx_conf->dcb_tc[i];
3512                 tc = &dcb_config->tc_config[j];
3513                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3514                                                 (uint8_t)(1 << j);
3515         }
3516 }
3517
3518 static void
3519 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3520                 struct ixgbe_dcb_config *dcb_config)
3521 {
3522         struct rte_eth_dcb_tx_conf *tx_conf =
3523                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3524         struct ixgbe_dcb_tc_config *tc;
3525         uint8_t i, j;
3526
3527         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3528         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3529
3530         /* User Priority to Traffic Class mapping */
3531         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3532                 j = tx_conf->dcb_tc[i];
3533                 tc = &dcb_config->tc_config[j];
3534                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3535                                                 (uint8_t)(1 << j);
3536         }
3537 }
3538
3539 /**
3540  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3541  * @hw: pointer to hardware structure
3542  * @dcb_config: pointer to ixgbe_dcb_config structure
3543  */
3544 static void
3545 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3546                struct ixgbe_dcb_config *dcb_config)
3547 {
3548         uint32_t reg;
3549         uint32_t vlanctrl;
3550         uint8_t i;
3551
3552         PMD_INIT_FUNC_TRACE();
3553         /*
3554          * Disable the arbiter before changing parameters
3555          * (always enable recycle mode; WSP)
3556          */
3557         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3558         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3559
3560         if (hw->mac.type != ixgbe_mac_82598EB) {
3561                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3562                 if (dcb_config->num_tcs.pg_tcs == 4) {
3563                         if (dcb_config->vt_mode)
3564                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3565                                         IXGBE_MRQC_VMDQRT4TCEN;
3566                         else {
3567                                 /* no matter the mode is DCB or DCB_RSS, just
3568                                  * set the MRQE to RSSXTCEN. RSS is controlled
3569                                  * by RSS_FIELD
3570                                  */
3571                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3572                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3573                                         IXGBE_MRQC_RTRSS4TCEN;
3574                         }
3575                 }
3576                 if (dcb_config->num_tcs.pg_tcs == 8) {
3577                         if (dcb_config->vt_mode)
3578                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3579                                         IXGBE_MRQC_VMDQRT8TCEN;
3580                         else {
3581                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3582                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3583                                         IXGBE_MRQC_RTRSS8TCEN;
3584                         }
3585                 }
3586
3587                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3588         }
3589
3590         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3591         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3592         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3593         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3594
3595         /* VFTA - enable all vlan filters */
3596         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3597                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3598         }
3599
3600         /*
3601          * Configure Rx packet plane (recycle mode; WSP) and
3602          * enable arbiter
3603          */
3604         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3605         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3606 }
3607
3608 static void
3609 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3610                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3611 {
3612         switch (hw->mac.type) {
3613         case ixgbe_mac_82598EB:
3614                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3615                 break;
3616         case ixgbe_mac_82599EB:
3617         case ixgbe_mac_X540:
3618         case ixgbe_mac_X550:
3619         case ixgbe_mac_X550EM_x:
3620         case ixgbe_mac_X550EM_a:
3621                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3622                                                   tsa, map);
3623                 break;
3624         default:
3625                 break;
3626         }
3627 }
3628
3629 static void
3630 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3631                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3632 {
3633         switch (hw->mac.type) {
3634         case ixgbe_mac_82598EB:
3635                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3636                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3637                 break;
3638         case ixgbe_mac_82599EB:
3639         case ixgbe_mac_X540:
3640         case ixgbe_mac_X550:
3641         case ixgbe_mac_X550EM_x:
3642         case ixgbe_mac_X550EM_a:
3643                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3644                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3645                 break;
3646         default:
3647                 break;
3648         }
3649 }
3650
3651 #define DCB_RX_CONFIG  1
3652 #define DCB_TX_CONFIG  1
3653 #define DCB_TX_PB      1024
3654 /**
3655  * ixgbe_dcb_hw_configure - Enable DCB and configure
3656  * general DCB in VT mode and non-VT mode parameters
3657  * @dev: pointer to rte_eth_dev structure
3658  * @dcb_config: pointer to ixgbe_dcb_config structure
3659  */
3660 static int
3661 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3662                         struct ixgbe_dcb_config *dcb_config)
3663 {
3664         int     ret = 0;
3665         uint8_t i, pfc_en, nb_tcs;
3666         uint16_t pbsize, rx_buffer_size;
3667         uint8_t config_dcb_rx = 0;
3668         uint8_t config_dcb_tx = 0;
3669         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3670         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3671         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3672         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3673         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3674         struct ixgbe_dcb_tc_config *tc;
3675         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3676         struct ixgbe_hw *hw =
3677                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3678
3679         switch (dev->data->dev_conf.rxmode.mq_mode) {
3680         case ETH_MQ_RX_VMDQ_DCB:
3681                 dcb_config->vt_mode = true;
3682                 if (hw->mac.type != ixgbe_mac_82598EB) {
3683                         config_dcb_rx = DCB_RX_CONFIG;
3684                         /*
3685                          *get dcb and VT rx configuration parameters
3686                          *from rte_eth_conf
3687                          */
3688                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3689                         /*Configure general VMDQ and DCB RX parameters*/
3690                         ixgbe_vmdq_dcb_configure(dev);
3691                 }
3692                 break;
3693         case ETH_MQ_RX_DCB:
3694         case ETH_MQ_RX_DCB_RSS:
3695                 dcb_config->vt_mode = false;
3696                 config_dcb_rx = DCB_RX_CONFIG;
3697                 /* Get dcb TX configuration parameters from rte_eth_conf */
3698                 ixgbe_dcb_rx_config(dev, dcb_config);
3699                 /*Configure general DCB RX parameters*/
3700                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3701                 break;
3702         default:
3703                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3704                 break;
3705         }
3706         switch (dev->data->dev_conf.txmode.mq_mode) {
3707         case ETH_MQ_TX_VMDQ_DCB:
3708                 dcb_config->vt_mode = true;
3709                 config_dcb_tx = DCB_TX_CONFIG;
3710                 /* get DCB and VT TX configuration parameters
3711                  * from rte_eth_conf
3712                  */
3713                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3714                 /*Configure general VMDQ and DCB TX parameters*/
3715                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3716                 break;
3717
3718         case ETH_MQ_TX_DCB:
3719                 dcb_config->vt_mode = false;
3720                 config_dcb_tx = DCB_TX_CONFIG;
3721                 /*get DCB TX configuration parameters from rte_eth_conf*/
3722                 ixgbe_dcb_tx_config(dev, dcb_config);
3723                 /*Configure general DCB TX parameters*/
3724                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3725                 break;
3726         default:
3727                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3728                 break;
3729         }
3730
3731         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3732         /* Unpack map */
3733         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3734         if (nb_tcs == ETH_4_TCS) {
3735                 /* Avoid un-configured priority mapping to TC0 */
3736                 uint8_t j = 4;
3737                 uint8_t mask = 0xFF;
3738
3739                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3740                         mask = (uint8_t)(mask & (~(1 << map[i])));
3741                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3742                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3743                                 map[j++] = i;
3744                         mask >>= 1;
3745                 }
3746                 /* Re-configure 4 TCs BW */
3747                 for (i = 0; i < nb_tcs; i++) {
3748                         tc = &dcb_config->tc_config[i];
3749                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3750                                                 (uint8_t)(100 / nb_tcs);
3751                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3752                                                 (uint8_t)(100 / nb_tcs);
3753                 }
3754                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3755                         tc = &dcb_config->tc_config[i];
3756                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3757                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3758                 }
3759         }
3760
3761         switch (hw->mac.type) {
3762         case ixgbe_mac_X550:
3763         case ixgbe_mac_X550EM_x:
3764         case ixgbe_mac_X550EM_a:
3765                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3766                 break;
3767         default:
3768                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3769                 break;
3770         }
3771
3772         if (config_dcb_rx) {
3773                 /* Set RX buffer size */
3774                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3775                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3776
3777                 for (i = 0; i < nb_tcs; i++) {
3778                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3779                 }
3780                 /* zero alloc all unused TCs */
3781                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3782                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3783                 }
3784         }
3785         if (config_dcb_tx) {
3786                 /* Only support an equally distributed
3787                  *  Tx packet buffer strategy.
3788                  */
3789                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3790                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3791
3792                 for (i = 0; i < nb_tcs; i++) {
3793                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3794                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3795                 }
3796                 /* Clear unused TCs, if any, to zero buffer size*/
3797                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3798                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3799                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3800                 }
3801         }
3802
3803         /*Calculates traffic class credits*/
3804         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3805                                 IXGBE_DCB_TX_CONFIG);
3806         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3807                                 IXGBE_DCB_RX_CONFIG);
3808
3809         if (config_dcb_rx) {
3810                 /* Unpack CEE standard containers */
3811                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3812                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3813                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3814                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3815                 /* Configure PG(ETS) RX */
3816                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3817         }
3818
3819         if (config_dcb_tx) {
3820                 /* Unpack CEE standard containers */
3821                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3822                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3823                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3824                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3825                 /* Configure PG(ETS) TX */
3826                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3827         }
3828
3829         /*Configure queue statistics registers*/
3830         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3831
3832         /* Check if the PFC is supported */
3833         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3834                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3835                 for (i = 0; i < nb_tcs; i++) {
3836                         /*
3837                         * If the TC count is 8,and the default high_water is 48,
3838                         * the low_water is 16 as default.
3839                         */
3840                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3841                         hw->fc.low_water[i] = pbsize / 4;
3842                         /* Enable pfc for this TC */
3843                         tc = &dcb_config->tc_config[i];
3844                         tc->pfc = ixgbe_dcb_pfc_enabled;
3845                 }
3846                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3847                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3848                         pfc_en &= 0x0F;
3849                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3850         }
3851
3852         return ret;
3853 }
3854
3855 /**
3856  * ixgbe_configure_dcb - Configure DCB  Hardware
3857  * @dev: pointer to rte_eth_dev
3858  */
3859 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3860 {
3861         struct ixgbe_dcb_config *dcb_cfg =
3862                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3863         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3864
3865         PMD_INIT_FUNC_TRACE();
3866
3867         /* check support mq_mode for DCB */
3868         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3869             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3870             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3871                 return;
3872
3873         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3874                 return;
3875
3876         /** Configure DCB hardware **/
3877         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3878 }
3879
3880 /*
3881  * VMDq only support for 10 GbE NIC.
3882  */
3883 static void
3884 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3885 {
3886         struct rte_eth_vmdq_rx_conf *cfg;
3887         struct ixgbe_hw *hw;
3888         enum rte_eth_nb_pools num_pools;
3889         uint32_t mrqc, vt_ctl, vlanctrl;
3890         uint32_t vmolr = 0;
3891         int i;
3892
3893         PMD_INIT_FUNC_TRACE();
3894         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3895         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3896         num_pools = cfg->nb_queue_pools;
3897
3898         ixgbe_rss_disable(dev);
3899
3900         /* MRQC: enable vmdq */
3901         mrqc = IXGBE_MRQC_VMDQEN;
3902         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3903
3904         /* PFVTCTL: turn on virtualisation and set the default pool */
3905         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3906         if (cfg->enable_default_pool)
3907                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3908         else
3909                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3910
3911         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3912
3913         for (i = 0; i < (int)num_pools; i++) {
3914                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3915                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3916         }
3917
3918         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3919         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3920         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3921         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3922
3923         /* VFTA - enable all vlan filters */
3924         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3925                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3926
3927         /* VFRE: pool enabling for receive - 64 */
3928         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3929         if (num_pools == ETH_64_POOLS)
3930                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3931
3932         /*
3933          * MPSAR - allow pools to read specific mac addresses
3934          * In this case, all pools should be able to read from mac addr 0
3935          */
3936         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3937         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3938
3939         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3940         for (i = 0; i < cfg->nb_pool_maps; i++) {
3941                 /* set vlan id in VF register and set the valid bit */
3942                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3943                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3944                 /*
3945                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3946                  * pools, we only need to use the first half of the register
3947                  * i.e. bits 0-31
3948                  */
3949                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3950                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
3951                                         (cfg->pool_map[i].pools & UINT32_MAX));
3952                 else
3953                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
3954                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3955
3956         }
3957
3958         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3959         if (cfg->enable_loop_back) {
3960                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3961                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3962                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3963         }
3964
3965         IXGBE_WRITE_FLUSH(hw);
3966 }
3967
3968 /*
3969  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3970  * @hw: pointer to hardware structure
3971  */
3972 static void
3973 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3974 {
3975         uint32_t reg;
3976         uint32_t q;
3977
3978         PMD_INIT_FUNC_TRACE();
3979         /*PF VF Transmit Enable*/
3980         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3981         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3982
3983         /* Disable the Tx desc arbiter so that MTQC can be changed */
3984         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3985         reg |= IXGBE_RTTDCS_ARBDIS;
3986         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3987
3988         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3989         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3990
3991         /* Disable drop for all queues */
3992         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3993                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3994                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3995
3996         /* Enable the Tx desc arbiter */
3997         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3998         reg &= ~IXGBE_RTTDCS_ARBDIS;
3999         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4000
4001         IXGBE_WRITE_FLUSH(hw);
4002 }
4003
4004 static int __attribute__((cold))
4005 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4006 {
4007         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4008         uint64_t dma_addr;
4009         unsigned int i;
4010
4011         /* Initialize software ring entries */
4012         for (i = 0; i < rxq->nb_rx_desc; i++) {
4013                 volatile union ixgbe_adv_rx_desc *rxd;
4014                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4015
4016                 if (mbuf == NULL) {
4017                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4018                                      (unsigned) rxq->queue_id);
4019                         return -ENOMEM;
4020                 }
4021
4022                 rte_mbuf_refcnt_set(mbuf, 1);
4023                 mbuf->next = NULL;
4024                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4025                 mbuf->nb_segs = 1;
4026                 mbuf->port = rxq->port_id;
4027
4028                 dma_addr =
4029                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
4030                 rxd = &rxq->rx_ring[i];
4031                 rxd->read.hdr_addr = 0;
4032                 rxd->read.pkt_addr = dma_addr;
4033                 rxe[i].mbuf = mbuf;
4034         }
4035
4036         return 0;
4037 }
4038
4039 static int
4040 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4041 {
4042         struct ixgbe_hw *hw;
4043         uint32_t mrqc;
4044
4045         ixgbe_rss_configure(dev);
4046
4047         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4048
4049         /* MRQC: enable VF RSS */
4050         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4051         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4052         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4053         case ETH_64_POOLS:
4054                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4055                 break;
4056
4057         case ETH_32_POOLS:
4058                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4059                 break;
4060
4061         default:
4062                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4063                 return -EINVAL;
4064         }
4065
4066         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4067
4068         return 0;
4069 }
4070
4071 static int
4072 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4073 {
4074         struct ixgbe_hw *hw =
4075                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4076
4077         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4078         case ETH_64_POOLS:
4079                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4080                         IXGBE_MRQC_VMDQEN);
4081                 break;
4082
4083         case ETH_32_POOLS:
4084                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4085                         IXGBE_MRQC_VMDQRT4TCEN);
4086                 break;
4087
4088         case ETH_16_POOLS:
4089                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4090                         IXGBE_MRQC_VMDQRT8TCEN);
4091                 break;
4092         default:
4093                 PMD_INIT_LOG(ERR,
4094                         "invalid pool number in IOV mode");
4095                 break;
4096         }
4097         return 0;
4098 }
4099
4100 static int
4101 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4102 {
4103         struct ixgbe_hw *hw =
4104                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4105
4106         if (hw->mac.type == ixgbe_mac_82598EB)
4107                 return 0;
4108
4109         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4110                 /*
4111                  * SRIOV inactive scheme
4112                  * any DCB/RSS w/o VMDq multi-queue setting
4113                  */
4114                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4115                 case ETH_MQ_RX_RSS:
4116                 case ETH_MQ_RX_DCB_RSS:
4117                 case ETH_MQ_RX_VMDQ_RSS:
4118                         ixgbe_rss_configure(dev);
4119                         break;
4120
4121                 case ETH_MQ_RX_VMDQ_DCB:
4122                         ixgbe_vmdq_dcb_configure(dev);
4123                         break;
4124
4125                 case ETH_MQ_RX_VMDQ_ONLY:
4126                         ixgbe_vmdq_rx_hw_configure(dev);
4127                         break;
4128
4129                 case ETH_MQ_RX_NONE:
4130                 default:
4131                         /* if mq_mode is none, disable rss mode.*/
4132                         ixgbe_rss_disable(dev);
4133                         break;
4134                 }
4135         } else {
4136                 /*
4137                  * SRIOV active scheme
4138                  * Support RSS together with VMDq & SRIOV
4139                  */
4140                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4141                 case ETH_MQ_RX_RSS:
4142                 case ETH_MQ_RX_VMDQ_RSS:
4143                         ixgbe_config_vf_rss(dev);
4144                         break;
4145                 case ETH_MQ_RX_VMDQ_DCB:
4146                         ixgbe_vmdq_dcb_configure(dev);
4147                         break;
4148                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
4149                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4150                         PMD_INIT_LOG(ERR,
4151                                 "Could not support DCB/RSS with VMDq & SRIOV");
4152                         return -1;
4153                 default:
4154                         ixgbe_config_vf_default(dev);
4155                         break;
4156                 }
4157         }
4158
4159         return 0;
4160 }
4161
4162 static int
4163 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4164 {
4165         struct ixgbe_hw *hw =
4166                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4167         uint32_t mtqc;
4168         uint32_t rttdcs;
4169
4170         if (hw->mac.type == ixgbe_mac_82598EB)
4171                 return 0;
4172
4173         /* disable arbiter before setting MTQC */
4174         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4175         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4176         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4177
4178         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4179                 /*
4180                  * SRIOV inactive scheme
4181                  * any DCB w/o VMDq multi-queue setting
4182                  */
4183                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4184                         ixgbe_vmdq_tx_hw_configure(hw);
4185                 else {
4186                         mtqc = IXGBE_MTQC_64Q_1PB;
4187                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4188                 }
4189         } else {
4190                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4191
4192                 /*
4193                  * SRIOV active scheme
4194                  * FIXME if support DCB together with VMDq & SRIOV
4195                  */
4196                 case ETH_64_POOLS:
4197                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4198                         break;
4199                 case ETH_32_POOLS:
4200                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4201                         break;
4202                 case ETH_16_POOLS:
4203                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4204                                 IXGBE_MTQC_8TC_8TQ;
4205                         break;
4206                 default:
4207                         mtqc = IXGBE_MTQC_64Q_1PB;
4208                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4209                 }
4210                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4211         }
4212
4213         /* re-enable arbiter */
4214         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4215         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4216
4217         return 0;
4218 }
4219
4220 /**
4221  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4222  *
4223  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4224  * spec rev. 3.0 chapter 8.2.3.8.13.
4225  *
4226  * @pool Memory pool of the Rx queue
4227  */
4228 static inline uint32_t
4229 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4230 {
4231         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4232
4233         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4234         uint16_t maxdesc =
4235                 IPV4_MAX_PKT_LEN /
4236                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4237
4238         if (maxdesc >= 16)
4239                 return IXGBE_RSCCTL_MAXDESC_16;
4240         else if (maxdesc >= 8)
4241                 return IXGBE_RSCCTL_MAXDESC_8;
4242         else if (maxdesc >= 4)
4243                 return IXGBE_RSCCTL_MAXDESC_4;
4244         else
4245                 return IXGBE_RSCCTL_MAXDESC_1;
4246 }
4247
4248 /**
4249  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4250  * interrupt
4251  *
4252  * (Taken from FreeBSD tree)
4253  * (yes this is all very magic and confusing :)
4254  *
4255  * @dev port handle
4256  * @entry the register array entry
4257  * @vector the MSIX vector for this queue
4258  * @type RX/TX/MISC
4259  */
4260 static void
4261 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4262 {
4263         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4264         u32 ivar, index;
4265
4266         vector |= IXGBE_IVAR_ALLOC_VAL;
4267
4268         switch (hw->mac.type) {
4269
4270         case ixgbe_mac_82598EB:
4271                 if (type == -1)
4272                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4273                 else
4274                         entry += (type * 64);
4275                 index = (entry >> 2) & 0x1F;
4276                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4277                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4278                 ivar |= (vector << (8 * (entry & 0x3)));
4279                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4280                 break;
4281
4282         case ixgbe_mac_82599EB:
4283         case ixgbe_mac_X540:
4284                 if (type == -1) { /* MISC IVAR */
4285                         index = (entry & 1) * 8;
4286                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4287                         ivar &= ~(0xFF << index);
4288                         ivar |= (vector << index);
4289                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4290                 } else {        /* RX/TX IVARS */
4291                         index = (16 * (entry & 1)) + (8 * type);
4292                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4293                         ivar &= ~(0xFF << index);
4294                         ivar |= (vector << index);
4295                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4296                 }
4297
4298                 break;
4299
4300         default:
4301                 break;
4302         }
4303 }
4304
4305 void __attribute__((cold))
4306 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4307 {
4308         uint16_t i, rx_using_sse;
4309         struct ixgbe_adapter *adapter =
4310                 (struct ixgbe_adapter *)dev->data->dev_private;
4311
4312         /*
4313          * In order to allow Vector Rx there are a few configuration
4314          * conditions to be met and Rx Bulk Allocation should be allowed.
4315          */
4316         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4317             !adapter->rx_bulk_alloc_allowed) {
4318                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4319                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4320                                     "not enabled",
4321                              dev->data->port_id);
4322
4323                 adapter->rx_vec_allowed = false;
4324         }
4325
4326         /*
4327          * Initialize the appropriate LRO callback.
4328          *
4329          * If all queues satisfy the bulk allocation preconditions
4330          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4331          * Otherwise use a single allocation version.
4332          */
4333         if (dev->data->lro) {
4334                 if (adapter->rx_bulk_alloc_allowed) {
4335                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4336                                            "allocation version");
4337                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4338                 } else {
4339                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4340                                            "allocation version");
4341                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4342                 }
4343         } else if (dev->data->scattered_rx) {
4344                 /*
4345                  * Set the non-LRO scattered callback: there are Vector and
4346                  * single allocation versions.
4347                  */
4348                 if (adapter->rx_vec_allowed) {
4349                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4350                                             "callback (port=%d).",
4351                                      dev->data->port_id);
4352
4353                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4354                 } else if (adapter->rx_bulk_alloc_allowed) {
4355                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4356                                            "allocation callback (port=%d).",
4357                                      dev->data->port_id);
4358                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4359                 } else {
4360                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4361                                             "single allocation) "
4362                                             "Scattered Rx callback "
4363                                             "(port=%d).",
4364                                      dev->data->port_id);
4365
4366                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4367                 }
4368         /*
4369          * Below we set "simple" callbacks according to port/queues parameters.
4370          * If parameters allow we are going to choose between the following
4371          * callbacks:
4372          *    - Vector
4373          *    - Bulk Allocation
4374          *    - Single buffer allocation (the simplest one)
4375          */
4376         } else if (adapter->rx_vec_allowed) {
4377                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4378                                     "burst size no less than %d (port=%d).",
4379                              RTE_IXGBE_DESCS_PER_LOOP,
4380                              dev->data->port_id);
4381
4382                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4383         } else if (adapter->rx_bulk_alloc_allowed) {
4384                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4385                                     "satisfied. Rx Burst Bulk Alloc function "
4386                                     "will be used on port=%d.",
4387                              dev->data->port_id);
4388
4389                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4390         } else {
4391                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4392                                     "satisfied, or Scattered Rx is requested "
4393                                     "(port=%d).",
4394                              dev->data->port_id);
4395
4396                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4397         }
4398
4399         /* Propagate information about RX function choice through all queues. */
4400
4401         rx_using_sse =
4402                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4403                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4404
4405         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4406                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4407
4408                 rxq->rx_using_sse = rx_using_sse;
4409         }
4410 }
4411
4412 /**
4413  * ixgbe_set_rsc - configure RSC related port HW registers
4414  *
4415  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4416  * of 82599 Spec (x540 configuration is virtually the same).
4417  *
4418  * @dev port handle
4419  *
4420  * Returns 0 in case of success or a non-zero error code
4421  */
4422 static int
4423 ixgbe_set_rsc(struct rte_eth_dev *dev)
4424 {
4425         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4426         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4427         struct rte_eth_dev_info dev_info = { 0 };
4428         bool rsc_capable = false;
4429         uint16_t i;
4430         uint32_t rdrxctl;
4431
4432         /* Sanity check */
4433         dev->dev_ops->dev_infos_get(dev, &dev_info);
4434         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4435                 rsc_capable = true;
4436
4437         if (!rsc_capable && rx_conf->enable_lro) {
4438                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4439                                    "support it");
4440                 return -EINVAL;
4441         }
4442
4443         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4444
4445         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4446                 /*
4447                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4448                  * 3.0 RSC configuration requires HW CRC stripping being
4449                  * enabled. If user requested both HW CRC stripping off
4450                  * and RSC on - return an error.
4451                  */
4452                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4453                                     "is disabled");
4454                 return -EINVAL;
4455         }
4456
4457         /* RFCTL configuration  */
4458         if (rsc_capable) {
4459                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4460
4461                 if (rx_conf->enable_lro)
4462                         /*
4463                          * Since NFS packets coalescing is not supported - clear
4464                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4465                          * enabled.
4466                          */
4467                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4468                                    IXGBE_RFCTL_NFSR_DIS);
4469                 else
4470                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4471
4472                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4473         }
4474
4475         /* If LRO hasn't been requested - we are done here. */
4476         if (!rx_conf->enable_lro)
4477                 return 0;
4478
4479         /* Set RDRXCTL.RSCACKC bit */
4480         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4481         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4482         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4483
4484         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4485         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4486                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4487                 uint32_t srrctl =
4488                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4489                 uint32_t rscctl =
4490                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4491                 uint32_t psrtype =
4492                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4493                 uint32_t eitr =
4494                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4495
4496                 /*
4497                  * ixgbe PMD doesn't support header-split at the moment.
4498                  *
4499                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4500                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4501                  * should be configured even if header split is not
4502                  * enabled. We will configure it 128 bytes following the
4503                  * recommendation in the spec.
4504                  */
4505                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4506                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4507                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4508
4509                 /*
4510                  * TODO: Consider setting the Receive Descriptor Minimum
4511                  * Threshold Size for an RSC case. This is not an obviously
4512                  * beneficiary option but the one worth considering...
4513                  */
4514
4515                 rscctl |= IXGBE_RSCCTL_RSCEN;
4516                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4517                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4518
4519                 /*
4520                  * RSC: Set ITR interval corresponding to 2K ints/s.
4521                  *
4522                  * Full-sized RSC aggregations for a 10Gb/s link will
4523                  * arrive at about 20K aggregation/s rate.
4524                  *
4525                  * 2K inst/s rate will make only 10% of the
4526                  * aggregations to be closed due to the interrupt timer
4527                  * expiration for a streaming at wire-speed case.
4528                  *
4529                  * For a sparse streaming case this setting will yield
4530                  * at most 500us latency for a single RSC aggregation.
4531                  */
4532                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4533                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4534
4535                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4536                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4537                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4538                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4539
4540                 /*
4541                  * RSC requires the mapping of the queue to the
4542                  * interrupt vector.
4543                  */
4544                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4545         }
4546
4547         dev->data->lro = 1;
4548
4549         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4550
4551         return 0;
4552 }
4553
4554 /*
4555  * Initializes Receive Unit.
4556  */
4557 int __attribute__((cold))
4558 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4559 {
4560         struct ixgbe_hw     *hw;
4561         struct ixgbe_rx_queue *rxq;
4562         uint64_t bus_addr;
4563         uint32_t rxctrl;
4564         uint32_t fctrl;
4565         uint32_t hlreg0;
4566         uint32_t maxfrs;
4567         uint32_t srrctl;
4568         uint32_t rdrxctl;
4569         uint32_t rxcsum;
4570         uint16_t buf_size;
4571         uint16_t i;
4572         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4573         int rc;
4574
4575         PMD_INIT_FUNC_TRACE();
4576         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4577
4578         /*
4579          * Make sure receives are disabled while setting
4580          * up the RX context (registers, descriptor rings, etc.).
4581          */
4582         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4583         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4584
4585         /* Enable receipt of broadcasted frames */
4586         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4587         fctrl |= IXGBE_FCTRL_BAM;
4588         fctrl |= IXGBE_FCTRL_DPF;
4589         fctrl |= IXGBE_FCTRL_PMCF;
4590         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4591
4592         /*
4593          * Configure CRC stripping, if any.
4594          */
4595         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4596         if (rx_conf->hw_strip_crc)
4597                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4598         else
4599                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4600
4601         /*
4602          * Configure jumbo frame support, if any.
4603          */
4604         if (rx_conf->jumbo_frame == 1) {
4605                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4606                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4607                 maxfrs &= 0x0000FFFF;
4608                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4609                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4610         } else
4611                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4612
4613         /*
4614          * If loopback mode is configured for 82599, set LPBK bit.
4615          */
4616         if (hw->mac.type == ixgbe_mac_82599EB &&
4617                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4618                 hlreg0 |= IXGBE_HLREG0_LPBK;
4619         else
4620                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4621
4622         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4623
4624         /* Setup RX queues */
4625         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4626                 rxq = dev->data->rx_queues[i];
4627
4628                 /*
4629                  * Reset crc_len in case it was changed after queue setup by a
4630                  * call to configure.
4631                  */
4632                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4633
4634                 /* Setup the Base and Length of the Rx Descriptor Rings */
4635                 bus_addr = rxq->rx_ring_phys_addr;
4636                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4637                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4638                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4639                                 (uint32_t)(bus_addr >> 32));
4640                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4641                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4642                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4643                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4644
4645                 /* Configure the SRRCTL register */
4646 #ifdef RTE_HEADER_SPLIT_ENABLE
4647                 /*
4648                  * Configure Header Split
4649                  */
4650                 if (rx_conf->header_split) {
4651                         if (hw->mac.type == ixgbe_mac_82599EB) {
4652                                 /* Must setup the PSRTYPE register */
4653                                 uint32_t psrtype;
4654
4655                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4656                                         IXGBE_PSRTYPE_UDPHDR   |
4657                                         IXGBE_PSRTYPE_IPV4HDR  |
4658                                         IXGBE_PSRTYPE_IPV6HDR;
4659                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4660                         }
4661                         srrctl = ((rx_conf->split_hdr_size <<
4662                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4663                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4664                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4665                 } else
4666 #endif
4667                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4668
4669                 /* Set if packets are dropped when no descriptors available */
4670                 if (rxq->drop_en)
4671                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4672
4673                 /*
4674                  * Configure the RX buffer size in the BSIZEPACKET field of
4675                  * the SRRCTL register of the queue.
4676                  * The value is in 1 KB resolution. Valid values can be from
4677                  * 1 KB to 16 KB.
4678                  */
4679                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4680                         RTE_PKTMBUF_HEADROOM);
4681                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4682                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4683
4684                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4685
4686                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4687                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4688
4689                 /* It adds dual VLAN length for supporting dual VLAN */
4690                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4691                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4692                         dev->data->scattered_rx = 1;
4693         }
4694
4695         if (rx_conf->enable_scatter)
4696                 dev->data->scattered_rx = 1;
4697
4698         /*
4699          * Device configured with multiple RX queues.
4700          */
4701         ixgbe_dev_mq_rx_configure(dev);
4702
4703         /*
4704          * Setup the Checksum Register.
4705          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4706          * Enable IP/L4 checkum computation by hardware if requested to do so.
4707          */
4708         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4709         rxcsum |= IXGBE_RXCSUM_PCSD;
4710         if (rx_conf->hw_ip_checksum)
4711                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4712         else
4713                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4714
4715         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4716
4717         if (hw->mac.type == ixgbe_mac_82599EB ||
4718             hw->mac.type == ixgbe_mac_X540) {
4719                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4720                 if (rx_conf->hw_strip_crc)
4721                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4722                 else
4723                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4724                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4725                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4726         }
4727
4728         rc = ixgbe_set_rsc(dev);
4729         if (rc)
4730                 return rc;
4731
4732         ixgbe_set_rx_function(dev);
4733
4734         return 0;
4735 }
4736
4737 /*
4738  * Initializes Transmit Unit.
4739  */
4740 void __attribute__((cold))
4741 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4742 {
4743         struct ixgbe_hw     *hw;
4744         struct ixgbe_tx_queue *txq;
4745         uint64_t bus_addr;
4746         uint32_t hlreg0;
4747         uint32_t txctrl;
4748         uint16_t i;
4749
4750         PMD_INIT_FUNC_TRACE();
4751         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4752
4753         /* Enable TX CRC (checksum offload requirement) and hw padding
4754          * (TSO requirement)
4755          */
4756         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4757         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4758         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4759
4760         /* Setup the Base and Length of the Tx Descriptor Rings */
4761         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4762                 txq = dev->data->tx_queues[i];
4763
4764                 bus_addr = txq->tx_ring_phys_addr;
4765                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4766                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4767                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4768                                 (uint32_t)(bus_addr >> 32));
4769                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4770                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4771                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4772                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4773                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4774
4775                 /*
4776                  * Disable Tx Head Writeback RO bit, since this hoses
4777                  * bookkeeping if things aren't delivered in order.
4778                  */
4779                 switch (hw->mac.type) {
4780                 case ixgbe_mac_82598EB:
4781                         txctrl = IXGBE_READ_REG(hw,
4782                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4783                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4784                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4785                                         txctrl);
4786                         break;
4787
4788                 case ixgbe_mac_82599EB:
4789                 case ixgbe_mac_X540:
4790                 case ixgbe_mac_X550:
4791                 case ixgbe_mac_X550EM_x:
4792                 case ixgbe_mac_X550EM_a:
4793                 default:
4794                         txctrl = IXGBE_READ_REG(hw,
4795                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4796                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4797                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4798                                         txctrl);
4799                         break;
4800                 }
4801         }
4802
4803         /* Device configured with multiple TX queues. */
4804         ixgbe_dev_mq_tx_configure(dev);
4805 }
4806
4807 /*
4808  * Set up link for 82599 loopback mode Tx->Rx.
4809  */
4810 static inline void __attribute__((cold))
4811 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4812 {
4813         PMD_INIT_FUNC_TRACE();
4814
4815         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4816                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4817                                 IXGBE_SUCCESS) {
4818                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4819                         /* ignore error */
4820                         return;
4821                 }
4822         }
4823
4824         /* Restart link */
4825         IXGBE_WRITE_REG(hw,
4826                         IXGBE_AUTOC,
4827                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4828         ixgbe_reset_pipeline_82599(hw);
4829
4830         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4831         msec_delay(50);
4832 }
4833
4834
4835 /*
4836  * Start Transmit and Receive Units.
4837  */
4838 int __attribute__((cold))
4839 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4840 {
4841         struct ixgbe_hw     *hw;
4842         struct ixgbe_tx_queue *txq;
4843         struct ixgbe_rx_queue *rxq;
4844         uint32_t txdctl;
4845         uint32_t dmatxctl;
4846         uint32_t rxctrl;
4847         uint16_t i;
4848         int ret = 0;
4849
4850         PMD_INIT_FUNC_TRACE();
4851         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4852
4853         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4854                 txq = dev->data->tx_queues[i];
4855                 /* Setup Transmit Threshold Registers */
4856                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4857                 txdctl |= txq->pthresh & 0x7F;
4858                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4859                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4860                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4861         }
4862
4863         if (hw->mac.type != ixgbe_mac_82598EB) {
4864                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4865                 dmatxctl |= IXGBE_DMATXCTL_TE;
4866                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4867         }
4868
4869         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4870                 txq = dev->data->tx_queues[i];
4871                 if (!txq->tx_deferred_start) {
4872                         ret = ixgbe_dev_tx_queue_start(dev, i);
4873                         if (ret < 0)
4874                                 return ret;
4875                 }
4876         }
4877
4878         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4879                 rxq = dev->data->rx_queues[i];
4880                 if (!rxq->rx_deferred_start) {
4881                         ret = ixgbe_dev_rx_queue_start(dev, i);
4882                         if (ret < 0)
4883                                 return ret;
4884                 }
4885         }
4886
4887         /* Enable Receive engine */
4888         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4889         if (hw->mac.type == ixgbe_mac_82598EB)
4890                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4891         rxctrl |= IXGBE_RXCTRL_RXEN;
4892         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4893
4894         /* If loopback mode is enabled for 82599, set up the link accordingly */
4895         if (hw->mac.type == ixgbe_mac_82599EB &&
4896                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4897                 ixgbe_setup_loopback_link_82599(hw);
4898
4899         return 0;
4900 }
4901
4902 /*
4903  * Start Receive Units for specified queue.
4904  */
4905 int __attribute__((cold))
4906 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4907 {
4908         struct ixgbe_hw     *hw;
4909         struct ixgbe_rx_queue *rxq;
4910         uint32_t rxdctl;
4911         int poll_ms;
4912
4913         PMD_INIT_FUNC_TRACE();
4914         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4915
4916         if (rx_queue_id < dev->data->nb_rx_queues) {
4917                 rxq = dev->data->rx_queues[rx_queue_id];
4918
4919                 /* Allocate buffers for descriptor rings */
4920                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4921                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4922                                      rx_queue_id);
4923                         return -1;
4924                 }
4925                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4926                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4927                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4928
4929                 /* Wait until RX Enable ready */
4930                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4931                 do {
4932                         rte_delay_ms(1);
4933                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4934                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4935                 if (!poll_ms)
4936                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4937                                      rx_queue_id);
4938                 rte_wmb();
4939                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4940                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4941                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4942         } else
4943                 return -1;
4944
4945         return 0;
4946 }
4947
4948 /*
4949  * Stop Receive Units for specified queue.
4950  */
4951 int __attribute__((cold))
4952 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4953 {
4954         struct ixgbe_hw     *hw;
4955         struct ixgbe_adapter *adapter =
4956                 (struct ixgbe_adapter *)dev->data->dev_private;
4957         struct ixgbe_rx_queue *rxq;
4958         uint32_t rxdctl;
4959         int poll_ms;
4960
4961         PMD_INIT_FUNC_TRACE();
4962         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4963
4964         if (rx_queue_id < dev->data->nb_rx_queues) {
4965                 rxq = dev->data->rx_queues[rx_queue_id];
4966
4967                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4968                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4969                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4970
4971                 /* Wait until RX Enable bit clear */
4972                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4973                 do {
4974                         rte_delay_ms(1);
4975                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4976                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
4977                 if (!poll_ms)
4978                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4979                                      rx_queue_id);
4980
4981                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4982
4983                 ixgbe_rx_queue_release_mbufs(rxq);
4984                 ixgbe_reset_rx_queue(adapter, rxq);
4985                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4986         } else
4987                 return -1;
4988
4989         return 0;
4990 }
4991
4992
4993 /*
4994  * Start Transmit Units for specified queue.
4995  */
4996 int __attribute__((cold))
4997 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4998 {
4999         struct ixgbe_hw     *hw;
5000         struct ixgbe_tx_queue *txq;
5001         uint32_t txdctl;
5002         int poll_ms;
5003
5004         PMD_INIT_FUNC_TRACE();
5005         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5006
5007         if (tx_queue_id < dev->data->nb_tx_queues) {
5008                 txq = dev->data->tx_queues[tx_queue_id];
5009                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5010                 txdctl |= IXGBE_TXDCTL_ENABLE;
5011                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5012
5013                 /* Wait until TX Enable ready */
5014                 if (hw->mac.type == ixgbe_mac_82599EB) {
5015                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5016                         do {
5017                                 rte_delay_ms(1);
5018                                 txdctl = IXGBE_READ_REG(hw,
5019                                         IXGBE_TXDCTL(txq->reg_idx));
5020                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5021                         if (!poll_ms)
5022                                 PMD_INIT_LOG(ERR, "Could not enable "
5023                                              "Tx Queue %d", tx_queue_id);
5024                 }
5025                 rte_wmb();
5026                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5027                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5028                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5029         } else
5030                 return -1;
5031
5032         return 0;
5033 }
5034
5035 /*
5036  * Stop Transmit Units for specified queue.
5037  */
5038 int __attribute__((cold))
5039 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5040 {
5041         struct ixgbe_hw     *hw;
5042         struct ixgbe_tx_queue *txq;
5043         uint32_t txdctl;
5044         uint32_t txtdh, txtdt;
5045         int poll_ms;
5046
5047         PMD_INIT_FUNC_TRACE();
5048         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5049
5050         if (tx_queue_id >= dev->data->nb_tx_queues)
5051                 return -1;
5052
5053         txq = dev->data->tx_queues[tx_queue_id];
5054
5055         /* Wait until TX queue is empty */
5056         if (hw->mac.type == ixgbe_mac_82599EB) {
5057                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5058                 do {
5059                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5060                         txtdh = IXGBE_READ_REG(hw,
5061                                                IXGBE_TDH(txq->reg_idx));
5062                         txtdt = IXGBE_READ_REG(hw,
5063                                                IXGBE_TDT(txq->reg_idx));
5064                 } while (--poll_ms && (txtdh != txtdt));
5065                 if (!poll_ms)
5066                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5067                                      "when stopping.", tx_queue_id);
5068         }
5069
5070         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5071         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5072         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5073
5074         /* Wait until TX Enable bit clear */
5075         if (hw->mac.type == ixgbe_mac_82599EB) {
5076                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5077                 do {
5078                         rte_delay_ms(1);
5079                         txdctl = IXGBE_READ_REG(hw,
5080                                                 IXGBE_TXDCTL(txq->reg_idx));
5081                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5082                 if (!poll_ms)
5083                         PMD_INIT_LOG(ERR, "Could not disable "
5084                                      "Tx Queue %d", tx_queue_id);
5085         }
5086
5087         if (txq->ops != NULL) {
5088                 txq->ops->release_mbufs(txq);
5089                 txq->ops->reset(txq);
5090         }
5091         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5092
5093         return 0;
5094 }
5095
5096 void
5097 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5098         struct rte_eth_rxq_info *qinfo)
5099 {
5100         struct ixgbe_rx_queue *rxq;
5101
5102         rxq = dev->data->rx_queues[queue_id];
5103
5104         qinfo->mp = rxq->mb_pool;
5105         qinfo->scattered_rx = dev->data->scattered_rx;
5106         qinfo->nb_desc = rxq->nb_rx_desc;
5107
5108         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5109         qinfo->conf.rx_drop_en = rxq->drop_en;
5110         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5111 }
5112
5113 void
5114 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5115         struct rte_eth_txq_info *qinfo)
5116 {
5117         struct ixgbe_tx_queue *txq;
5118
5119         txq = dev->data->tx_queues[queue_id];
5120
5121         qinfo->nb_desc = txq->nb_tx_desc;
5122
5123         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5124         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5125         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5126
5127         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5128         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5129         qinfo->conf.txq_flags = txq->txq_flags;
5130         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5131 }
5132
5133 /*
5134  * [VF] Initializes Receive Unit.
5135  */
5136 int __attribute__((cold))
5137 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5138 {
5139         struct ixgbe_hw     *hw;
5140         struct ixgbe_rx_queue *rxq;
5141         uint64_t bus_addr;
5142         uint32_t srrctl, psrtype = 0;
5143         uint16_t buf_size;
5144         uint16_t i;
5145         int ret;
5146
5147         PMD_INIT_FUNC_TRACE();
5148         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5149
5150         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5151                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5152                         "it should be power of 2");
5153                 return -1;
5154         }
5155
5156         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5157                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5158                         "it should be equal to or less than %d",
5159                         hw->mac.max_rx_queues);
5160                 return -1;
5161         }
5162
5163         /*
5164          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5165          * disables the VF receipt of packets if the PF MTU is > 1500.
5166          * This is done to deal with 82599 limitations that imposes
5167          * the PF and all VFs to share the same MTU.
5168          * Then, the PF driver enables again the VF receipt of packet when
5169          * the VF driver issues a IXGBE_VF_SET_LPE request.
5170          * In the meantime, the VF device cannot be used, even if the VF driver
5171          * and the Guest VM network stack are ready to accept packets with a
5172          * size up to the PF MTU.
5173          * As a work-around to this PF behaviour, force the call to
5174          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5175          * VF packets received can work in all cases.
5176          */
5177         ixgbevf_rlpml_set_vf(hw,
5178                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5179
5180         /* Setup RX queues */
5181         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5182                 rxq = dev->data->rx_queues[i];
5183
5184                 /* Allocate buffers for descriptor rings */
5185                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5186                 if (ret)
5187                         return ret;
5188
5189                 /* Setup the Base and Length of the Rx Descriptor Rings */
5190                 bus_addr = rxq->rx_ring_phys_addr;
5191
5192                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5193                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5194                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5195                                 (uint32_t)(bus_addr >> 32));
5196                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5197                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5198                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5199                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5200
5201
5202                 /* Configure the SRRCTL register */
5203 #ifdef RTE_HEADER_SPLIT_ENABLE
5204                 /*
5205                  * Configure Header Split
5206                  */
5207                 if (dev->data->dev_conf.rxmode.header_split) {
5208                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5209                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5210                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5211                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5212                 } else
5213 #endif
5214                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5215
5216                 /* Set if packets are dropped when no descriptors available */
5217                 if (rxq->drop_en)
5218                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5219
5220                 /*
5221                  * Configure the RX buffer size in the BSIZEPACKET field of
5222                  * the SRRCTL register of the queue.
5223                  * The value is in 1 KB resolution. Valid values can be from
5224                  * 1 KB to 16 KB.
5225                  */
5226                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5227                         RTE_PKTMBUF_HEADROOM);
5228                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5229                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5230
5231                 /*
5232                  * VF modification to write virtual function SRRCTL register
5233                  */
5234                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5235
5236                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5237                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5238
5239                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5240                     /* It adds dual VLAN length for supporting dual VLAN */
5241                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5242                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5243                         if (!dev->data->scattered_rx)
5244                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5245                         dev->data->scattered_rx = 1;
5246                 }
5247         }
5248
5249 #ifdef RTE_HEADER_SPLIT_ENABLE
5250         if (dev->data->dev_conf.rxmode.header_split)
5251                 /* Must setup the PSRTYPE register */
5252                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5253                         IXGBE_PSRTYPE_UDPHDR   |
5254                         IXGBE_PSRTYPE_IPV4HDR  |
5255                         IXGBE_PSRTYPE_IPV6HDR;
5256 #endif
5257
5258         /* Set RQPL for VF RSS according to max Rx queue */
5259         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5260                 IXGBE_PSRTYPE_RQPL_SHIFT;
5261         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5262
5263         ixgbe_set_rx_function(dev);
5264
5265         return 0;
5266 }
5267
5268 /*
5269  * [VF] Initializes Transmit Unit.
5270  */
5271 void __attribute__((cold))
5272 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5273 {
5274         struct ixgbe_hw     *hw;
5275         struct ixgbe_tx_queue *txq;
5276         uint64_t bus_addr;
5277         uint32_t txctrl;
5278         uint16_t i;
5279
5280         PMD_INIT_FUNC_TRACE();
5281         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5282
5283         /* Setup the Base and Length of the Tx Descriptor Rings */
5284         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5285                 txq = dev->data->tx_queues[i];
5286                 bus_addr = txq->tx_ring_phys_addr;
5287                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5288                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5289                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5290                                 (uint32_t)(bus_addr >> 32));
5291                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5292                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5293                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5294                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5295                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5296
5297                 /*
5298                  * Disable Tx Head Writeback RO bit, since this hoses
5299                  * bookkeeping if things aren't delivered in order.
5300                  */
5301                 txctrl = IXGBE_READ_REG(hw,
5302                                 IXGBE_VFDCA_TXCTRL(i));
5303                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5304                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5305                                 txctrl);
5306         }
5307 }
5308
5309 /*
5310  * [VF] Start Transmit and Receive Units.
5311  */
5312 void __attribute__((cold))
5313 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5314 {
5315         struct ixgbe_hw     *hw;
5316         struct ixgbe_tx_queue *txq;
5317         struct ixgbe_rx_queue *rxq;
5318         uint32_t txdctl;
5319         uint32_t rxdctl;
5320         uint16_t i;
5321         int poll_ms;
5322
5323         PMD_INIT_FUNC_TRACE();
5324         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5325
5326         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5327                 txq = dev->data->tx_queues[i];
5328                 /* Setup Transmit Threshold Registers */
5329                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5330                 txdctl |= txq->pthresh & 0x7F;
5331                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5332                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5333                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5334         }
5335
5336         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5337
5338                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5339                 txdctl |= IXGBE_TXDCTL_ENABLE;
5340                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5341
5342                 poll_ms = 10;
5343                 /* Wait until TX Enable ready */
5344                 do {
5345                         rte_delay_ms(1);
5346                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5347                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5348                 if (!poll_ms)
5349                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5350         }
5351         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5352
5353                 rxq = dev->data->rx_queues[i];
5354
5355                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5356                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5357                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5358
5359                 /* Wait until RX Enable ready */
5360                 poll_ms = 10;
5361                 do {
5362                         rte_delay_ms(1);
5363                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5364                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5365                 if (!poll_ms)
5366                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5367                 rte_wmb();
5368                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5369
5370         }
5371 }
5372
5373 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5374 int __attribute__((weak))
5375 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5376 {
5377         return -1;
5378 }
5379
5380 uint16_t __attribute__((weak))
5381 ixgbe_recv_pkts_vec(
5382         void __rte_unused *rx_queue,
5383         struct rte_mbuf __rte_unused **rx_pkts,
5384         uint16_t __rte_unused nb_pkts)
5385 {
5386         return 0;
5387 }
5388
5389 uint16_t __attribute__((weak))
5390 ixgbe_recv_scattered_pkts_vec(
5391         void __rte_unused *rx_queue,
5392         struct rte_mbuf __rte_unused **rx_pkts,
5393         uint16_t __rte_unused nb_pkts)
5394 {
5395         return 0;
5396 }
5397
5398 int __attribute__((weak))
5399 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5400 {
5401         return -1;
5402 }