remove unused ring includes
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73
74 #include "ixgbe_logs.h"
75 #include "base/ixgbe_api.h"
76 #include "base/ixgbe_vf.h"
77 #include "ixgbe_ethdev.h"
78 #include "base/ixgbe_dcb.h"
79 #include "base/ixgbe_common.h"
80 #include "ixgbe_rxtx.h"
81
82 /* Bit Mask to indicate what bits required for building TX context */
83 #define IXGBE_TX_OFFLOAD_MASK (                  \
84                 PKT_TX_VLAN_PKT |                \
85                 PKT_TX_IP_CKSUM |                \
86                 PKT_TX_L4_MASK |                 \
87                 PKT_TX_TCP_SEG |                 \
88                 PKT_TX_OUTER_IP_CKSUM)
89
90 #if 1
91 #define RTE_PMD_USE_PREFETCH
92 #endif
93
94 #ifdef RTE_PMD_USE_PREFETCH
95 /*
96  * Prefetch a cache line into all cache levels.
97  */
98 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
99 #else
100 #define rte_ixgbe_prefetch(p)   do {} while (0)
101 #endif
102
103 /*********************************************************************
104  *
105  *  TX functions
106  *
107  **********************************************************************/
108
109 /*
110  * Check for descriptors with their DD bit set and free mbufs.
111  * Return the total number of buffers freed.
112  */
113 static inline int __attribute__((always_inline))
114 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
115 {
116         struct ixgbe_tx_entry *txep;
117         uint32_t status;
118         int i, nb_free = 0;
119         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
120
121         /* check DD bit on threshold descriptor */
122         status = txq->tx_ring[txq->tx_next_dd].wb.status;
123         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
124                 return 0;
125
126         /*
127          * first buffer to free from S/W ring is at index
128          * tx_next_dd - (tx_rs_thresh-1)
129          */
130         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
131
132         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
133                 /* free buffers one at a time */
134                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
135                 txep->mbuf = NULL;
136
137                 if (unlikely(m == NULL))
138                         continue;
139
140                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
141                     (nb_free > 0 && m->pool != free[0]->pool)) {
142                         rte_mempool_put_bulk(free[0]->pool,
143                                              (void **)free, nb_free);
144                         nb_free = 0;
145                 }
146
147                 free[nb_free++] = m;
148         }
149
150         if (nb_free > 0)
151                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
152
153         /* buffers were freed, update counters */
154         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
155         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
156         if (txq->tx_next_dd >= txq->nb_tx_desc)
157                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
158
159         return txq->tx_rs_thresh;
160 }
161
162 /* Populate 4 descriptors with data from 4 mbufs */
163 static inline void
164 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
165 {
166         uint64_t buf_dma_addr;
167         uint32_t pkt_len;
168         int i;
169
170         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
171                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
172                 pkt_len = (*pkts)->data_len;
173
174                 /* write data to descriptor */
175                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
176
177                 txdp->read.cmd_type_len =
178                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
179
180                 txdp->read.olinfo_status =
181                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
182
183                 rte_prefetch0(&(*pkts)->pool);
184         }
185 }
186
187 /* Populate 1 descriptor with data from 1 mbuf */
188 static inline void
189 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
190 {
191         uint64_t buf_dma_addr;
192         uint32_t pkt_len;
193
194         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
195         pkt_len = (*pkts)->data_len;
196
197         /* write data to descriptor */
198         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
199         txdp->read.cmd_type_len =
200                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
201         txdp->read.olinfo_status =
202                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
203         rte_prefetch0(&(*pkts)->pool);
204 }
205
206 /*
207  * Fill H/W descriptor ring with mbuf data.
208  * Copy mbuf pointers to the S/W ring.
209  */
210 static inline void
211 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
212                       uint16_t nb_pkts)
213 {
214         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
215         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
216         const int N_PER_LOOP = 4;
217         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
218         int mainpart, leftover;
219         int i, j;
220
221         /*
222          * Process most of the packets in chunks of N pkts.  Any
223          * leftover packets will get processed one at a time.
224          */
225         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
226         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
227         for (i = 0; i < mainpart; i += N_PER_LOOP) {
228                 /* Copy N mbuf pointers to the S/W ring */
229                 for (j = 0; j < N_PER_LOOP; ++j) {
230                         (txep + i + j)->mbuf = *(pkts + i + j);
231                 }
232                 tx4(txdp + i, pkts + i);
233         }
234
235         if (unlikely(leftover > 0)) {
236                 for (i = 0; i < leftover; ++i) {
237                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
238                         tx1(txdp + mainpart + i, pkts + mainpart + i);
239                 }
240         }
241 }
242
243 static inline uint16_t
244 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
245              uint16_t nb_pkts)
246 {
247         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
248         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
249         uint16_t n = 0;
250
251         /*
252          * Begin scanning the H/W ring for done descriptors when the
253          * number of available descriptors drops below tx_free_thresh.  For
254          * each done descriptor, free the associated buffer.
255          */
256         if (txq->nb_tx_free < txq->tx_free_thresh)
257                 ixgbe_tx_free_bufs(txq);
258
259         /* Only use descriptors that are available */
260         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
261         if (unlikely(nb_pkts == 0))
262                 return 0;
263
264         /* Use exactly nb_pkts descriptors */
265         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
266
267         /*
268          * At this point, we know there are enough descriptors in the
269          * ring to transmit all the packets.  This assumes that each
270          * mbuf contains a single segment, and that no new offloads
271          * are expected, which would require a new context descriptor.
272          */
273
274         /*
275          * See if we're going to wrap-around. If so, handle the top
276          * of the descriptor ring first, then do the bottom.  If not,
277          * the processing looks just like the "bottom" part anyway...
278          */
279         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
280                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
281                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
282
283                 /*
284                  * We know that the last descriptor in the ring will need to
285                  * have its RS bit set because tx_rs_thresh has to be
286                  * a divisor of the ring size
287                  */
288                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
289                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
290                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
291
292                 txq->tx_tail = 0;
293         }
294
295         /* Fill H/W descriptor ring with mbuf data */
296         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
297         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
298
299         /*
300          * Determine if RS bit should be set
301          * This is what we actually want:
302          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
303          * but instead of subtracting 1 and doing >=, we can just do
304          * greater than without subtracting.
305          */
306         if (txq->tx_tail > txq->tx_next_rs) {
307                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
308                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
309                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
310                                                 txq->tx_rs_thresh);
311                 if (txq->tx_next_rs >= txq->nb_tx_desc)
312                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
313         }
314
315         /*
316          * Check for wrap-around. This would only happen if we used
317          * up to the last descriptor in the ring, no more, no less.
318          */
319         if (txq->tx_tail >= txq->nb_tx_desc)
320                 txq->tx_tail = 0;
321
322         /* update tail pointer */
323         rte_wmb();
324         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
325
326         return nb_pkts;
327 }
328
329 uint16_t
330 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
331                        uint16_t nb_pkts)
332 {
333         uint16_t nb_tx;
334
335         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
336         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
337                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
338
339         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
340         nb_tx = 0;
341         while (nb_pkts) {
342                 uint16_t ret, n;
343
344                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
345                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
346                 nb_tx = (uint16_t)(nb_tx + ret);
347                 nb_pkts = (uint16_t)(nb_pkts - ret);
348                 if (ret < n)
349                         break;
350         }
351
352         return nb_tx;
353 }
354
355 static inline void
356 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
357                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
358                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
359 {
360         uint32_t type_tucmd_mlhl;
361         uint32_t mss_l4len_idx = 0;
362         uint32_t ctx_idx;
363         uint32_t vlan_macip_lens;
364         union ixgbe_tx_offload tx_offload_mask;
365         uint32_t seqnum_seed = 0;
366
367         ctx_idx = txq->ctx_curr;
368         tx_offload_mask.data[0] = 0;
369         tx_offload_mask.data[1] = 0;
370         type_tucmd_mlhl = 0;
371
372         /* Specify which HW CTX to upload. */
373         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
374
375         if (ol_flags & PKT_TX_VLAN_PKT) {
376                 tx_offload_mask.vlan_tci |= ~0;
377         }
378
379         /* check if TCP segmentation required for this packet */
380         if (ol_flags & PKT_TX_TCP_SEG) {
381                 /* implies IP cksum in IPv4 */
382                 if (ol_flags & PKT_TX_IP_CKSUM)
383                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
384                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
385                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
386                 else
387                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
388                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
389                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
390
391                 tx_offload_mask.l2_len |= ~0;
392                 tx_offload_mask.l3_len |= ~0;
393                 tx_offload_mask.l4_len |= ~0;
394                 tx_offload_mask.tso_segsz |= ~0;
395                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
396                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
397         } else { /* no TSO, check if hardware checksum is needed */
398                 if (ol_flags & PKT_TX_IP_CKSUM) {
399                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
400                         tx_offload_mask.l2_len |= ~0;
401                         tx_offload_mask.l3_len |= ~0;
402                 }
403
404                 switch (ol_flags & PKT_TX_L4_MASK) {
405                 case PKT_TX_UDP_CKSUM:
406                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
407                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
408                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
409                         tx_offload_mask.l2_len |= ~0;
410                         tx_offload_mask.l3_len |= ~0;
411                         break;
412                 case PKT_TX_TCP_CKSUM:
413                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
414                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
415                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
416                         tx_offload_mask.l2_len |= ~0;
417                         tx_offload_mask.l3_len |= ~0;
418                         break;
419                 case PKT_TX_SCTP_CKSUM:
420                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
421                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
422                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
423                         tx_offload_mask.l2_len |= ~0;
424                         tx_offload_mask.l3_len |= ~0;
425                         break;
426                 default:
427                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
428                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
429                         break;
430                 }
431         }
432
433         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
434                 tx_offload_mask.outer_l2_len |= ~0;
435                 tx_offload_mask.outer_l3_len |= ~0;
436                 tx_offload_mask.l2_len |= ~0;
437                 seqnum_seed |= tx_offload.outer_l3_len
438                                << IXGBE_ADVTXD_OUTER_IPLEN;
439                 seqnum_seed |= tx_offload.l2_len
440                                << IXGBE_ADVTXD_TUNNEL_LEN;
441         }
442
443         txq->ctx_cache[ctx_idx].flags = ol_flags;
444         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
445                 tx_offload_mask.data[0] & tx_offload.data[0];
446         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
447                 tx_offload_mask.data[1] & tx_offload.data[1];
448         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
449
450         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
451         vlan_macip_lens = tx_offload.l3_len;
452         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
453                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
454                                     IXGBE_ADVTXD_MACLEN_SHIFT);
455         else
456                 vlan_macip_lens |= (tx_offload.l2_len <<
457                                     IXGBE_ADVTXD_MACLEN_SHIFT);
458         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
459         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
460         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
461         ctx_txd->seqnum_seed     = seqnum_seed;
462 }
463
464 /*
465  * Check which hardware context can be used. Use the existing match
466  * or create a new context descriptor.
467  */
468 static inline uint32_t
469 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
470                    union ixgbe_tx_offload tx_offload)
471 {
472         /* If match with the current used context */
473         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
474                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
475                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
476                      & tx_offload.data[0])) &&
477                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
478                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
479                      & tx_offload.data[1]))))
480                 return txq->ctx_curr;
481
482         /* What if match with the next context  */
483         txq->ctx_curr ^= 1;
484         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
485                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
486                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
487                      & tx_offload.data[0])) &&
488                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
489                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
490                      & tx_offload.data[1]))))
491                 return txq->ctx_curr;
492
493         /* Mismatch, use the previous context */
494         return IXGBE_CTX_NUM;
495 }
496
497 static inline uint32_t
498 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
499 {
500         uint32_t tmp = 0;
501
502         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
503                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
504         if (ol_flags & PKT_TX_IP_CKSUM)
505                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
506         if (ol_flags & PKT_TX_TCP_SEG)
507                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
508         return tmp;
509 }
510
511 static inline uint32_t
512 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
513 {
514         uint32_t cmdtype = 0;
515
516         if (ol_flags & PKT_TX_VLAN_PKT)
517                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
518         if (ol_flags & PKT_TX_TCP_SEG)
519                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
520         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
521                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
522         return cmdtype;
523 }
524
525 /* Default RS bit threshold values */
526 #ifndef DEFAULT_TX_RS_THRESH
527 #define DEFAULT_TX_RS_THRESH   32
528 #endif
529 #ifndef DEFAULT_TX_FREE_THRESH
530 #define DEFAULT_TX_FREE_THRESH 32
531 #endif
532
533 /* Reset transmit descriptors after they have been used */
534 static inline int
535 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
536 {
537         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
538         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
539         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
540         uint16_t nb_tx_desc = txq->nb_tx_desc;
541         uint16_t desc_to_clean_to;
542         uint16_t nb_tx_to_clean;
543         uint32_t status;
544
545         /* Determine the last descriptor needing to be cleaned */
546         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
547         if (desc_to_clean_to >= nb_tx_desc)
548                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
549
550         /* Check to make sure the last descriptor to clean is done */
551         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
552         status = txr[desc_to_clean_to].wb.status;
553         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
554                 PMD_TX_FREE_LOG(DEBUG,
555                                 "TX descriptor %4u is not done"
556                                 "(port=%d queue=%d)",
557                                 desc_to_clean_to,
558                                 txq->port_id, txq->queue_id);
559                 /* Failed to clean any descriptors, better luck next time */
560                 return -(1);
561         }
562
563         /* Figure out how many descriptors will be cleaned */
564         if (last_desc_cleaned > desc_to_clean_to)
565                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
566                                                         desc_to_clean_to);
567         else
568                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
569                                                 last_desc_cleaned);
570
571         PMD_TX_FREE_LOG(DEBUG,
572                         "Cleaning %4u TX descriptors: %4u to %4u "
573                         "(port=%d queue=%d)",
574                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
575                         txq->port_id, txq->queue_id);
576
577         /*
578          * The last descriptor to clean is done, so that means all the
579          * descriptors from the last descriptor that was cleaned
580          * up to the last descriptor with the RS bit set
581          * are done. Only reset the threshold descriptor.
582          */
583         txr[desc_to_clean_to].wb.status = 0;
584
585         /* Update the txq to reflect the last descriptor that was cleaned */
586         txq->last_desc_cleaned = desc_to_clean_to;
587         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
588
589         /* No Error */
590         return 0;
591 }
592
593 uint16_t
594 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
595                 uint16_t nb_pkts)
596 {
597         struct ixgbe_tx_queue *txq;
598         struct ixgbe_tx_entry *sw_ring;
599         struct ixgbe_tx_entry *txe, *txn;
600         volatile union ixgbe_adv_tx_desc *txr;
601         volatile union ixgbe_adv_tx_desc *txd, *txp;
602         struct rte_mbuf     *tx_pkt;
603         struct rte_mbuf     *m_seg;
604         uint64_t buf_dma_addr;
605         uint32_t olinfo_status;
606         uint32_t cmd_type_len;
607         uint32_t pkt_len;
608         uint16_t slen;
609         uint64_t ol_flags;
610         uint16_t tx_id;
611         uint16_t tx_last;
612         uint16_t nb_tx;
613         uint16_t nb_used;
614         uint64_t tx_ol_req;
615         uint32_t ctx = 0;
616         uint32_t new_ctx;
617         union ixgbe_tx_offload tx_offload;
618
619         tx_offload.data[0] = 0;
620         tx_offload.data[1] = 0;
621         txq = tx_queue;
622         sw_ring = txq->sw_ring;
623         txr     = txq->tx_ring;
624         tx_id   = txq->tx_tail;
625         txe = &sw_ring[tx_id];
626         txp = NULL;
627
628         /* Determine if the descriptor ring needs to be cleaned. */
629         if (txq->nb_tx_free < txq->tx_free_thresh)
630                 ixgbe_xmit_cleanup(txq);
631
632         rte_prefetch0(&txe->mbuf->pool);
633
634         /* TX loop */
635         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
636                 new_ctx = 0;
637                 tx_pkt = *tx_pkts++;
638                 pkt_len = tx_pkt->pkt_len;
639
640                 /*
641                  * Determine how many (if any) context descriptors
642                  * are needed for offload functionality.
643                  */
644                 ol_flags = tx_pkt->ol_flags;
645
646                 /* If hardware offload required */
647                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
648                 if (tx_ol_req) {
649                         tx_offload.l2_len = tx_pkt->l2_len;
650                         tx_offload.l3_len = tx_pkt->l3_len;
651                         tx_offload.l4_len = tx_pkt->l4_len;
652                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
653                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
654                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
655                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
656
657                         /* If new context need be built or reuse the exist ctx. */
658                         ctx = what_advctx_update(txq, tx_ol_req,
659                                 tx_offload);
660                         /* Only allocate context descriptor if required*/
661                         new_ctx = (ctx == IXGBE_CTX_NUM);
662                         ctx = txq->ctx_curr;
663                 }
664
665                 /*
666                  * Keep track of how many descriptors are used this loop
667                  * This will always be the number of segments + the number of
668                  * Context descriptors required to transmit the packet
669                  */
670                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
671
672                 if (txp != NULL &&
673                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
674                         /* set RS on the previous packet in the burst */
675                         txp->read.cmd_type_len |=
676                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
677
678                 /*
679                  * The number of descriptors that must be allocated for a
680                  * packet is the number of segments of that packet, plus 1
681                  * Context Descriptor for the hardware offload, if any.
682                  * Determine the last TX descriptor to allocate in the TX ring
683                  * for the packet, starting from the current position (tx_id)
684                  * in the ring.
685                  */
686                 tx_last = (uint16_t) (tx_id + nb_used - 1);
687
688                 /* Circular ring */
689                 if (tx_last >= txq->nb_tx_desc)
690                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
691
692                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
693                            " tx_first=%u tx_last=%u",
694                            (unsigned) txq->port_id,
695                            (unsigned) txq->queue_id,
696                            (unsigned) pkt_len,
697                            (unsigned) tx_id,
698                            (unsigned) tx_last);
699
700                 /*
701                  * Make sure there are enough TX descriptors available to
702                  * transmit the entire packet.
703                  * nb_used better be less than or equal to txq->tx_rs_thresh
704                  */
705                 if (nb_used > txq->nb_tx_free) {
706                         PMD_TX_FREE_LOG(DEBUG,
707                                         "Not enough free TX descriptors "
708                                         "nb_used=%4u nb_free=%4u "
709                                         "(port=%d queue=%d)",
710                                         nb_used, txq->nb_tx_free,
711                                         txq->port_id, txq->queue_id);
712
713                         if (ixgbe_xmit_cleanup(txq) != 0) {
714                                 /* Could not clean any descriptors */
715                                 if (nb_tx == 0)
716                                         return 0;
717                                 goto end_of_tx;
718                         }
719
720                         /* nb_used better be <= txq->tx_rs_thresh */
721                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
722                                 PMD_TX_FREE_LOG(DEBUG,
723                                         "The number of descriptors needed to "
724                                         "transmit the packet exceeds the "
725                                         "RS bit threshold. This will impact "
726                                         "performance."
727                                         "nb_used=%4u nb_free=%4u "
728                                         "tx_rs_thresh=%4u. "
729                                         "(port=%d queue=%d)",
730                                         nb_used, txq->nb_tx_free,
731                                         txq->tx_rs_thresh,
732                                         txq->port_id, txq->queue_id);
733                                 /*
734                                  * Loop here until there are enough TX
735                                  * descriptors or until the ring cannot be
736                                  * cleaned.
737                                  */
738                                 while (nb_used > txq->nb_tx_free) {
739                                         if (ixgbe_xmit_cleanup(txq) != 0) {
740                                                 /*
741                                                  * Could not clean any
742                                                  * descriptors
743                                                  */
744                                                 if (nb_tx == 0)
745                                                         return 0;
746                                                 goto end_of_tx;
747                                         }
748                                 }
749                         }
750                 }
751
752                 /*
753                  * By now there are enough free TX descriptors to transmit
754                  * the packet.
755                  */
756
757                 /*
758                  * Set common flags of all TX Data Descriptors.
759                  *
760                  * The following bits must be set in all Data Descriptors:
761                  *   - IXGBE_ADVTXD_DTYP_DATA
762                  *   - IXGBE_ADVTXD_DCMD_DEXT
763                  *
764                  * The following bits must be set in the first Data Descriptor
765                  * and are ignored in the other ones:
766                  *   - IXGBE_ADVTXD_DCMD_IFCS
767                  *   - IXGBE_ADVTXD_MAC_1588
768                  *   - IXGBE_ADVTXD_DCMD_VLE
769                  *
770                  * The following bits must only be set in the last Data
771                  * Descriptor:
772                  *   - IXGBE_TXD_CMD_EOP
773                  *
774                  * The following bits can be set in any Data Descriptor, but
775                  * are only set in the last Data Descriptor:
776                  *   - IXGBE_TXD_CMD_RS
777                  */
778                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
779                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
780
781 #ifdef RTE_LIBRTE_IEEE1588
782                 if (ol_flags & PKT_TX_IEEE1588_TMST)
783                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
784 #endif
785
786                 olinfo_status = 0;
787                 if (tx_ol_req) {
788
789                         if (ol_flags & PKT_TX_TCP_SEG) {
790                                 /* when TSO is on, paylen in descriptor is the
791                                  * not the packet len but the tcp payload len */
792                                 pkt_len -= (tx_offload.l2_len +
793                                         tx_offload.l3_len + tx_offload.l4_len);
794                         }
795
796                         /*
797                          * Setup the TX Advanced Context Descriptor if required
798                          */
799                         if (new_ctx) {
800                                 volatile struct ixgbe_adv_tx_context_desc *
801                                     ctx_txd;
802
803                                 ctx_txd = (volatile struct
804                                     ixgbe_adv_tx_context_desc *)
805                                     &txr[tx_id];
806
807                                 txn = &sw_ring[txe->next_id];
808                                 rte_prefetch0(&txn->mbuf->pool);
809
810                                 if (txe->mbuf != NULL) {
811                                         rte_pktmbuf_free_seg(txe->mbuf);
812                                         txe->mbuf = NULL;
813                                 }
814
815                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
816                                         tx_offload);
817
818                                 txe->last_id = tx_last;
819                                 tx_id = txe->next_id;
820                                 txe = txn;
821                         }
822
823                         /*
824                          * Setup the TX Advanced Data Descriptor,
825                          * This path will go through
826                          * whatever new/reuse the context descriptor
827                          */
828                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
829                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
830                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
831                 }
832
833                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
834
835                 m_seg = tx_pkt;
836                 do {
837                         txd = &txr[tx_id];
838                         txn = &sw_ring[txe->next_id];
839                         rte_prefetch0(&txn->mbuf->pool);
840
841                         if (txe->mbuf != NULL)
842                                 rte_pktmbuf_free_seg(txe->mbuf);
843                         txe->mbuf = m_seg;
844
845                         /*
846                          * Set up Transmit Data Descriptor.
847                          */
848                         slen = m_seg->data_len;
849                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
850                         txd->read.buffer_addr =
851                                 rte_cpu_to_le_64(buf_dma_addr);
852                         txd->read.cmd_type_len =
853                                 rte_cpu_to_le_32(cmd_type_len | slen);
854                         txd->read.olinfo_status =
855                                 rte_cpu_to_le_32(olinfo_status);
856                         txe->last_id = tx_last;
857                         tx_id = txe->next_id;
858                         txe = txn;
859                         m_seg = m_seg->next;
860                 } while (m_seg != NULL);
861
862                 /*
863                  * The last packet data descriptor needs End Of Packet (EOP)
864                  */
865                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
866                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
867                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
868
869                 /* Set RS bit only on threshold packets' last descriptor */
870                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
871                         PMD_TX_FREE_LOG(DEBUG,
872                                         "Setting RS bit on TXD id="
873                                         "%4u (port=%d queue=%d)",
874                                         tx_last, txq->port_id, txq->queue_id);
875
876                         cmd_type_len |= IXGBE_TXD_CMD_RS;
877
878                         /* Update txq RS bit counters */
879                         txq->nb_tx_used = 0;
880                         txp = NULL;
881                 } else
882                         txp = txd;
883
884                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
885         }
886
887 end_of_tx:
888         /* set RS on last packet in the burst */
889         if (txp != NULL)
890                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
891
892         rte_wmb();
893
894         /*
895          * Set the Transmit Descriptor Tail (TDT)
896          */
897         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
898                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
899                    (unsigned) tx_id, (unsigned) nb_tx);
900         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
901         txq->tx_tail = tx_id;
902
903         return nb_tx;
904 }
905
906 /*********************************************************************
907  *
908  *  RX functions
909  *
910  **********************************************************************/
911
912 #define IXGBE_PACKET_TYPE_ETHER                         0X00
913 #define IXGBE_PACKET_TYPE_IPV4                          0X01
914 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
915 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
916 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
917 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
918 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
919 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
920 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
921 #define IXGBE_PACKET_TYPE_IPV6                          0X04
922 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
923 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
924 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
925 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
926 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
927 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
928 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
929 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
930 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
931 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
932 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
933 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
934 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
935 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
936 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
937 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
938 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
939 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
940 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
941 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
942 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
943 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
944 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
945
946 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
947 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
948 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
949 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
950 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
951 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
952 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
953 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
954 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
955 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
956 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
957 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
958 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
959 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
960 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
961 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
962 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
963 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
964 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
965 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
966 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
967 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
968 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
969
970 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
971 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
972 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
973 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
974 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
975 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
976 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
977 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
978 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
979 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
980 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
981 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
982 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
983 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
984 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
985 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
986 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
987 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
988 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
989 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
990 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
991 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
992 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
993
994 #define IXGBE_PACKET_TYPE_MAX               0X80
995 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
996 #define IXGBE_PACKET_TYPE_SHIFT             0X04
997
998 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
999 static inline uint32_t
1000 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1001 {
1002         /**
1003          * Use 2 different table for normal packet and tunnel packet
1004          * to save the space.
1005          */
1006         static const uint32_t
1007                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1008                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1009                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1010                         RTE_PTYPE_L3_IPV4,
1011                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1012                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1013                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1014                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1015                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1016                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1017                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1018                         RTE_PTYPE_L3_IPV4_EXT,
1019                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1020                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1021                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1022                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1023                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1024                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1025                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1026                         RTE_PTYPE_L3_IPV6,
1027                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1028                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1029                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1030                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1031                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1032                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1033                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1034                         RTE_PTYPE_L3_IPV6_EXT,
1035                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1036                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1037                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1038                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1039                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1040                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1041                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1042                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1043                         RTE_PTYPE_INNER_L3_IPV6,
1044                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1045                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1046                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1047                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1048                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1049                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1050                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1051                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1052                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1053                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1054                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1055                         RTE_PTYPE_INNER_L3_IPV6,
1056                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1057                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1058                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1059                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1060                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1061                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1062                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1063                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1064                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1065                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1066                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1067                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1068                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1069                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1070                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1071                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1072                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1073                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1074                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1075                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1076                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1077                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1078                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1079                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1080                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1081                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1082                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1083                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1084                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1085                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1086                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1087                         RTE_PTYPE_L2_ETHER |
1088                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1089                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1090         };
1091
1092         static const uint32_t
1093                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1094                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1095                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1096                         RTE_PTYPE_INNER_L2_ETHER,
1097                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1098                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1099                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1100                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1101                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1102                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1103                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1104                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1105                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1106                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1107                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1108                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1109                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1110                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1111                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1112                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1113                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1114                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1115                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1116                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1117                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1118                         RTE_PTYPE_INNER_L4_TCP,
1119                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1120                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1121                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1122                         RTE_PTYPE_INNER_L4_TCP,
1123                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1124                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1125                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1126                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1128                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1129                         RTE_PTYPE_INNER_L4_TCP,
1130                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1131                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1132                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1133                         RTE_PTYPE_INNER_L3_IPV4,
1134                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1135                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1136                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1137                         RTE_PTYPE_INNER_L4_UDP,
1138                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1139                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1140                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1141                         RTE_PTYPE_INNER_L4_UDP,
1142                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1143                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1144                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1145                         RTE_PTYPE_INNER_L4_SCTP,
1146                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1147                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1148                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1149                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1150                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1151                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1152                         RTE_PTYPE_INNER_L4_UDP,
1153                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1154                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1155                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1156                         RTE_PTYPE_INNER_L4_SCTP,
1157                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1158                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1159                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1160                         RTE_PTYPE_INNER_L3_IPV4,
1161                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1162                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1163                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1164                         RTE_PTYPE_INNER_L4_SCTP,
1165                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1166                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1167                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1168                         RTE_PTYPE_INNER_L4_SCTP,
1169                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1170                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1171                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1172                         RTE_PTYPE_INNER_L4_TCP,
1173                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1174                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1175                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1176                         RTE_PTYPE_INNER_L4_UDP,
1177
1178                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1179                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1180                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1181                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1182                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1183                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1184                         RTE_PTYPE_INNER_L3_IPV4,
1185                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1186                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1187                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1188                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1189                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1190                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1191                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1192                         RTE_PTYPE_INNER_L3_IPV6,
1193                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1194                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1195                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1196                         RTE_PTYPE_INNER_L3_IPV4,
1197                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1198                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1199                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1200                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1201                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1202                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1203                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1204                         RTE_PTYPE_INNER_L3_IPV4,
1205                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1206                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1207                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1208                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1209                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1210                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1211                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1212                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1213                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1214                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1215                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1216                         RTE_PTYPE_INNER_L3_IPV4,
1217                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1218                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1219                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1220                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1221                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1222                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1223                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1224                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1225                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1226                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1227                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1228                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1229                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1230                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1231                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1232                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1233                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1234                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1235                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1236                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1237                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1238                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1239                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1240                         RTE_PTYPE_INNER_L3_IPV4,
1241                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1242                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1243                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1244                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1245                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1246                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1247                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1248                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1249                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1250                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1251                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1252                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1253                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1254                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1255                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1256                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1257                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1258                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1259                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1260                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1261                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1262                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1263                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1264                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1265                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1266                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1267                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1268                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1269         };
1270
1271         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1272                 return RTE_PTYPE_UNKNOWN;
1273
1274         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1275
1276         /* For tunnel packet */
1277         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1278                 /* Remove the tunnel bit to save the space. */
1279                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1280                 return ptype_table_tn[pkt_info];
1281         }
1282
1283         /**
1284          * For x550, if it's not tunnel,
1285          * tunnel type bit should be set to 0.
1286          * Reuse 82599's mask.
1287          */
1288         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1289
1290         return ptype_table[pkt_info];
1291 }
1292
1293 static inline uint64_t
1294 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1295 {
1296         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1297                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1298                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1299                 PKT_RX_RSS_HASH, 0, 0, 0,
1300                 0, 0, 0,  PKT_RX_FDIR,
1301         };
1302 #ifdef RTE_LIBRTE_IEEE1588
1303         static uint64_t ip_pkt_etqf_map[8] = {
1304                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1305                 0, 0, 0, 0,
1306         };
1307
1308         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1309                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1310                                 ip_rss_types_map[pkt_info & 0XF];
1311         else
1312                 return ip_rss_types_map[pkt_info & 0XF];
1313 #else
1314         return ip_rss_types_map[pkt_info & 0XF];
1315 #endif
1316 }
1317
1318 static inline uint64_t
1319 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1320 {
1321         uint64_t pkt_flags;
1322
1323         /*
1324          * Check if VLAN present only.
1325          * Do not check whether L3/L4 rx checksum done by NIC or not,
1326          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1327          */
1328         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1329
1330 #ifdef RTE_LIBRTE_IEEE1588
1331         if (rx_status & IXGBE_RXD_STAT_TMST)
1332                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1333 #endif
1334         return pkt_flags;
1335 }
1336
1337 static inline uint64_t
1338 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1339 {
1340         uint64_t pkt_flags;
1341
1342         /*
1343          * Bit 31: IPE, IPv4 checksum error
1344          * Bit 30: L4I, L4I integrity error
1345          */
1346         static uint64_t error_to_pkt_flags_map[4] = {
1347                 0,  PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
1348                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1349         };
1350         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1351                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1352
1353         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1354             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1355                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1356         }
1357
1358         return pkt_flags;
1359 }
1360
1361 /*
1362  * LOOK_AHEAD defines how many desc statuses to check beyond the
1363  * current descriptor.
1364  * It must be a pound define for optimal performance.
1365  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1366  * function only works with LOOK_AHEAD=8.
1367  */
1368 #define LOOK_AHEAD 8
1369 #if (LOOK_AHEAD != 8)
1370 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1371 #endif
1372 static inline int
1373 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1374 {
1375         volatile union ixgbe_adv_rx_desc *rxdp;
1376         struct ixgbe_rx_entry *rxep;
1377         struct rte_mbuf *mb;
1378         uint16_t pkt_len;
1379         uint64_t pkt_flags;
1380         int nb_dd;
1381         uint32_t s[LOOK_AHEAD];
1382         uint32_t pkt_info[LOOK_AHEAD];
1383         int i, j, nb_rx = 0;
1384         uint32_t status;
1385         uint64_t vlan_flags = rxq->vlan_flags;
1386
1387         /* get references to current descriptor and S/W ring entry */
1388         rxdp = &rxq->rx_ring[rxq->rx_tail];
1389         rxep = &rxq->sw_ring[rxq->rx_tail];
1390
1391         status = rxdp->wb.upper.status_error;
1392         /* check to make sure there is at least 1 packet to receive */
1393         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1394                 return 0;
1395
1396         /*
1397          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1398          * reference packets that are ready to be received.
1399          */
1400         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1401              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1402                 /* Read desc statuses backwards to avoid race condition */
1403                 for (j = LOOK_AHEAD-1; j >= 0; --j)
1404                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1405
1406                 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1407                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1408                                                        lo_dword.data);
1409
1410                 /* Compute how many status bits were set */
1411                 nb_dd = 0;
1412                 for (j = 0; j < LOOK_AHEAD; ++j)
1413                         nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1414
1415                 nb_rx += nb_dd;
1416
1417                 /* Translate descriptor info to mbuf format */
1418                 for (j = 0; j < nb_dd; ++j) {
1419                         mb = rxep[j].mbuf;
1420                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1421                                   rxq->crc_len;
1422                         mb->data_len = pkt_len;
1423                         mb->pkt_len = pkt_len;
1424                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1425
1426                         /* convert descriptor fields to rte mbuf flags */
1427                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1428                                 vlan_flags);
1429                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1430                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1431                                         ((uint16_t)pkt_info[j]);
1432                         mb->ol_flags = pkt_flags;
1433                         mb->packet_type =
1434                                 ixgbe_rxd_pkt_info_to_pkt_type
1435                                         (pkt_info[j], rxq->pkt_type_mask);
1436
1437                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1438                                 mb->hash.rss = rte_le_to_cpu_32(
1439                                     rxdp[j].wb.lower.hi_dword.rss);
1440                         else if (pkt_flags & PKT_RX_FDIR) {
1441                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1442                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1443                                     IXGBE_ATR_HASH_MASK;
1444                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1445                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1446                         }
1447                 }
1448
1449                 /* Move mbuf pointers from the S/W ring to the stage */
1450                 for (j = 0; j < LOOK_AHEAD; ++j) {
1451                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1452                 }
1453
1454                 /* stop if all requested packets could not be received */
1455                 if (nb_dd != LOOK_AHEAD)
1456                         break;
1457         }
1458
1459         /* clear software ring entries so we can cleanup correctly */
1460         for (i = 0; i < nb_rx; ++i) {
1461                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1462         }
1463
1464
1465         return nb_rx;
1466 }
1467
1468 static inline int
1469 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1470 {
1471         volatile union ixgbe_adv_rx_desc *rxdp;
1472         struct ixgbe_rx_entry *rxep;
1473         struct rte_mbuf *mb;
1474         uint16_t alloc_idx;
1475         __le64 dma_addr;
1476         int diag, i;
1477
1478         /* allocate buffers in bulk directly into the S/W ring */
1479         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1480         rxep = &rxq->sw_ring[alloc_idx];
1481         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1482                                     rxq->rx_free_thresh);
1483         if (unlikely(diag != 0))
1484                 return -ENOMEM;
1485
1486         rxdp = &rxq->rx_ring[alloc_idx];
1487         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1488                 /* populate the static rte mbuf fields */
1489                 mb = rxep[i].mbuf;
1490                 if (reset_mbuf) {
1491                         mb->next = NULL;
1492                         mb->nb_segs = 1;
1493                         mb->port = rxq->port_id;
1494                 }
1495
1496                 rte_mbuf_refcnt_set(mb, 1);
1497                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1498
1499                 /* populate the descriptors */
1500                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1501                 rxdp[i].read.hdr_addr = 0;
1502                 rxdp[i].read.pkt_addr = dma_addr;
1503         }
1504
1505         /* update state of internal queue structure */
1506         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1507         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1508                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1509
1510         /* no errors */
1511         return 0;
1512 }
1513
1514 static inline uint16_t
1515 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1516                          uint16_t nb_pkts)
1517 {
1518         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1519         int i;
1520
1521         /* how many packets are ready to return? */
1522         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1523
1524         /* copy mbuf pointers to the application's packet list */
1525         for (i = 0; i < nb_pkts; ++i)
1526                 rx_pkts[i] = stage[i];
1527
1528         /* update internal queue state */
1529         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1530         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1531
1532         return nb_pkts;
1533 }
1534
1535 static inline uint16_t
1536 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1537              uint16_t nb_pkts)
1538 {
1539         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1540         uint16_t nb_rx = 0;
1541
1542         /* Any previously recv'd pkts will be returned from the Rx stage */
1543         if (rxq->rx_nb_avail)
1544                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1545
1546         /* Scan the H/W ring for packets to receive */
1547         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1548
1549         /* update internal queue state */
1550         rxq->rx_next_avail = 0;
1551         rxq->rx_nb_avail = nb_rx;
1552         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1553
1554         /* if required, allocate new buffers to replenish descriptors */
1555         if (rxq->rx_tail > rxq->rx_free_trigger) {
1556                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1557
1558                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1559                         int i, j;
1560
1561                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1562                                    "queue_id=%u", (unsigned) rxq->port_id,
1563                                    (unsigned) rxq->queue_id);
1564
1565                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1566                                 rxq->rx_free_thresh;
1567
1568                         /*
1569                          * Need to rewind any previous receives if we cannot
1570                          * allocate new buffers to replenish the old ones.
1571                          */
1572                         rxq->rx_nb_avail = 0;
1573                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1574                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1575                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1576
1577                         return 0;
1578                 }
1579
1580                 /* update tail pointer */
1581                 rte_wmb();
1582                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1583         }
1584
1585         if (rxq->rx_tail >= rxq->nb_rx_desc)
1586                 rxq->rx_tail = 0;
1587
1588         /* received any packets this loop? */
1589         if (rxq->rx_nb_avail)
1590                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1591
1592         return 0;
1593 }
1594
1595 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1596 uint16_t
1597 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1598                            uint16_t nb_pkts)
1599 {
1600         uint16_t nb_rx;
1601
1602         if (unlikely(nb_pkts == 0))
1603                 return 0;
1604
1605         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1606                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1607
1608         /* request is relatively large, chunk it up */
1609         nb_rx = 0;
1610         while (nb_pkts) {
1611                 uint16_t ret, n;
1612
1613                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1614                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1615                 nb_rx = (uint16_t)(nb_rx + ret);
1616                 nb_pkts = (uint16_t)(nb_pkts - ret);
1617                 if (ret < n)
1618                         break;
1619         }
1620
1621         return nb_rx;
1622 }
1623
1624 uint16_t
1625 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1626                 uint16_t nb_pkts)
1627 {
1628         struct ixgbe_rx_queue *rxq;
1629         volatile union ixgbe_adv_rx_desc *rx_ring;
1630         volatile union ixgbe_adv_rx_desc *rxdp;
1631         struct ixgbe_rx_entry *sw_ring;
1632         struct ixgbe_rx_entry *rxe;
1633         struct rte_mbuf *rxm;
1634         struct rte_mbuf *nmb;
1635         union ixgbe_adv_rx_desc rxd;
1636         uint64_t dma_addr;
1637         uint32_t staterr;
1638         uint32_t pkt_info;
1639         uint16_t pkt_len;
1640         uint16_t rx_id;
1641         uint16_t nb_rx;
1642         uint16_t nb_hold;
1643         uint64_t pkt_flags;
1644         uint64_t vlan_flags;
1645
1646         nb_rx = 0;
1647         nb_hold = 0;
1648         rxq = rx_queue;
1649         rx_id = rxq->rx_tail;
1650         rx_ring = rxq->rx_ring;
1651         sw_ring = rxq->sw_ring;
1652         vlan_flags = rxq->vlan_flags;
1653         while (nb_rx < nb_pkts) {
1654                 /*
1655                  * The order of operations here is important as the DD status
1656                  * bit must not be read after any other descriptor fields.
1657                  * rx_ring and rxdp are pointing to volatile data so the order
1658                  * of accesses cannot be reordered by the compiler. If they were
1659                  * not volatile, they could be reordered which could lead to
1660                  * using invalid descriptor fields when read from rxd.
1661                  */
1662                 rxdp = &rx_ring[rx_id];
1663                 staterr = rxdp->wb.upper.status_error;
1664                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1665                         break;
1666                 rxd = *rxdp;
1667
1668                 /*
1669                  * End of packet.
1670                  *
1671                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1672                  * is likely to be invalid and to be dropped by the various
1673                  * validation checks performed by the network stack.
1674                  *
1675                  * Allocate a new mbuf to replenish the RX ring descriptor.
1676                  * If the allocation fails:
1677                  *    - arrange for that RX descriptor to be the first one
1678                  *      being parsed the next time the receive function is
1679                  *      invoked [on the same queue].
1680                  *
1681                  *    - Stop parsing the RX ring and return immediately.
1682                  *
1683                  * This policy do not drop the packet received in the RX
1684                  * descriptor for which the allocation of a new mbuf failed.
1685                  * Thus, it allows that packet to be later retrieved if
1686                  * mbuf have been freed in the mean time.
1687                  * As a side effect, holding RX descriptors instead of
1688                  * systematically giving them back to the NIC may lead to
1689                  * RX ring exhaustion situations.
1690                  * However, the NIC can gracefully prevent such situations
1691                  * to happen by sending specific "back-pressure" flow control
1692                  * frames to its peer(s).
1693                  */
1694                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1695                            "ext_err_stat=0x%08x pkt_len=%u",
1696                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1697                            (unsigned) rx_id, (unsigned) staterr,
1698                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1699
1700                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1701                 if (nmb == NULL) {
1702                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1703                                    "queue_id=%u", (unsigned) rxq->port_id,
1704                                    (unsigned) rxq->queue_id);
1705                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1706                         break;
1707                 }
1708
1709                 nb_hold++;
1710                 rxe = &sw_ring[rx_id];
1711                 rx_id++;
1712                 if (rx_id == rxq->nb_rx_desc)
1713                         rx_id = 0;
1714
1715                 /* Prefetch next mbuf while processing current one. */
1716                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1717
1718                 /*
1719                  * When next RX descriptor is on a cache-line boundary,
1720                  * prefetch the next 4 RX descriptors and the next 8 pointers
1721                  * to mbufs.
1722                  */
1723                 if ((rx_id & 0x3) == 0) {
1724                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1725                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1726                 }
1727
1728                 rxm = rxe->mbuf;
1729                 rxe->mbuf = nmb;
1730                 dma_addr =
1731                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1732                 rxdp->read.hdr_addr = 0;
1733                 rxdp->read.pkt_addr = dma_addr;
1734
1735                 /*
1736                  * Initialize the returned mbuf.
1737                  * 1) setup generic mbuf fields:
1738                  *    - number of segments,
1739                  *    - next segment,
1740                  *    - packet length,
1741                  *    - RX port identifier.
1742                  * 2) integrate hardware offload data, if any:
1743                  *    - RSS flag & hash,
1744                  *    - IP checksum flag,
1745                  *    - VLAN TCI, if any,
1746                  *    - error flags.
1747                  */
1748                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1749                                       rxq->crc_len);
1750                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1751                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1752                 rxm->nb_segs = 1;
1753                 rxm->next = NULL;
1754                 rxm->pkt_len = pkt_len;
1755                 rxm->data_len = pkt_len;
1756                 rxm->port = rxq->port_id;
1757
1758                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1759                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1760                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1761
1762                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1763                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1764                 pkt_flags = pkt_flags |
1765                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1766                 rxm->ol_flags = pkt_flags;
1767                 rxm->packet_type =
1768                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1769                                                        rxq->pkt_type_mask);
1770
1771                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1772                         rxm->hash.rss = rte_le_to_cpu_32(
1773                                                 rxd.wb.lower.hi_dword.rss);
1774                 else if (pkt_flags & PKT_RX_FDIR) {
1775                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1776                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1777                                         IXGBE_ATR_HASH_MASK;
1778                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1779                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1780                 }
1781                 /*
1782                  * Store the mbuf address into the next entry of the array
1783                  * of returned packets.
1784                  */
1785                 rx_pkts[nb_rx++] = rxm;
1786         }
1787         rxq->rx_tail = rx_id;
1788
1789         /*
1790          * If the number of free RX descriptors is greater than the RX free
1791          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1792          * register.
1793          * Update the RDT with the value of the last processed RX descriptor
1794          * minus 1, to guarantee that the RDT register is never equal to the
1795          * RDH register, which creates a "full" ring situtation from the
1796          * hardware point of view...
1797          */
1798         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1799         if (nb_hold > rxq->rx_free_thresh) {
1800                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1801                            "nb_hold=%u nb_rx=%u",
1802                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1803                            (unsigned) rx_id, (unsigned) nb_hold,
1804                            (unsigned) nb_rx);
1805                 rx_id = (uint16_t) ((rx_id == 0) ?
1806                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1807                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1808                 nb_hold = 0;
1809         }
1810         rxq->nb_rx_hold = nb_hold;
1811         return nb_rx;
1812 }
1813
1814 /**
1815  * Detect an RSC descriptor.
1816  */
1817 static inline uint32_t
1818 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1819 {
1820         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1821                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1822 }
1823
1824 /**
1825  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1826  *
1827  * Fill the following info in the HEAD buffer of the Rx cluster:
1828  *    - RX port identifier
1829  *    - hardware offload data, if any:
1830  *      - RSS flag & hash
1831  *      - IP checksum flag
1832  *      - VLAN TCI, if any
1833  *      - error flags
1834  * @head HEAD of the packet cluster
1835  * @desc HW descriptor to get data from
1836  * @rxq Pointer to the Rx queue
1837  */
1838 static inline void
1839 ixgbe_fill_cluster_head_buf(
1840         struct rte_mbuf *head,
1841         union ixgbe_adv_rx_desc *desc,
1842         struct ixgbe_rx_queue *rxq,
1843         uint32_t staterr)
1844 {
1845         uint32_t pkt_info;
1846         uint64_t pkt_flags;
1847
1848         head->port = rxq->port_id;
1849
1850         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1851          * set in the pkt_flags field.
1852          */
1853         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1854         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1855         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1856         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1857         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1858         head->ol_flags = pkt_flags;
1859         head->packet_type =
1860                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1861
1862         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1863                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1864         else if (pkt_flags & PKT_RX_FDIR) {
1865                 head->hash.fdir.hash =
1866                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1867                                                           & IXGBE_ATR_HASH_MASK;
1868                 head->hash.fdir.id =
1869                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1870         }
1871 }
1872
1873 /**
1874  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1875  *
1876  * @rx_queue Rx queue handle
1877  * @rx_pkts table of received packets
1878  * @nb_pkts size of rx_pkts table
1879  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1880  *
1881  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1882  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1883  *
1884  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1885  * 1) When non-EOP RSC completion arrives:
1886  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1887  *       segment's data length.
1888  *    b) Set the "next" pointer of the current segment to point to the segment
1889  *       at the NEXTP index.
1890  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1891  *       in the sw_rsc_ring.
1892  * 2) When EOP arrives we just update the cluster's total length and offload
1893  *    flags and deliver the cluster up to the upper layers. In our case - put it
1894  *    in the rx_pkts table.
1895  *
1896  * Returns the number of received packets/clusters (according to the "bulk
1897  * receive" interface).
1898  */
1899 static inline uint16_t
1900 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1901                     bool bulk_alloc)
1902 {
1903         struct ixgbe_rx_queue *rxq = rx_queue;
1904         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1905         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1906         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1907         uint16_t rx_id = rxq->rx_tail;
1908         uint16_t nb_rx = 0;
1909         uint16_t nb_hold = rxq->nb_rx_hold;
1910         uint16_t prev_id = rxq->rx_tail;
1911
1912         while (nb_rx < nb_pkts) {
1913                 bool eop;
1914                 struct ixgbe_rx_entry *rxe;
1915                 struct ixgbe_scattered_rx_entry *sc_entry;
1916                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1917                 struct ixgbe_rx_entry *next_rxe = NULL;
1918                 struct rte_mbuf *first_seg;
1919                 struct rte_mbuf *rxm;
1920                 struct rte_mbuf *nmb;
1921                 union ixgbe_adv_rx_desc rxd;
1922                 uint16_t data_len;
1923                 uint16_t next_id;
1924                 volatile union ixgbe_adv_rx_desc *rxdp;
1925                 uint32_t staterr;
1926
1927 next_desc:
1928                 /*
1929                  * The code in this whole file uses the volatile pointer to
1930                  * ensure the read ordering of the status and the rest of the
1931                  * descriptor fields (on the compiler level only!!!). This is so
1932                  * UGLY - why not to just use the compiler barrier instead? DPDK
1933                  * even has the rte_compiler_barrier() for that.
1934                  *
1935                  * But most importantly this is just wrong because this doesn't
1936                  * ensure memory ordering in a general case at all. For
1937                  * instance, DPDK is supposed to work on Power CPUs where
1938                  * compiler barrier may just not be enough!
1939                  *
1940                  * I tried to write only this function properly to have a
1941                  * starting point (as a part of an LRO/RSC series) but the
1942                  * compiler cursed at me when I tried to cast away the
1943                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1944                  * keeping it the way it is for now.
1945                  *
1946                  * The code in this file is broken in so many other places and
1947                  * will just not work on a big endian CPU anyway therefore the
1948                  * lines below will have to be revisited together with the rest
1949                  * of the ixgbe PMD.
1950                  *
1951                  * TODO:
1952                  *    - Get rid of "volatile" crap and let the compiler do its
1953                  *      job.
1954                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
1955                  *      memory ordering below.
1956                  */
1957                 rxdp = &rx_ring[rx_id];
1958                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1959
1960                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1961                         break;
1962
1963                 rxd = *rxdp;
1964
1965                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1966                                   "staterr=0x%x data_len=%u",
1967                            rxq->port_id, rxq->queue_id, rx_id, staterr,
1968                            rte_le_to_cpu_16(rxd.wb.upper.length));
1969
1970                 if (!bulk_alloc) {
1971                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1972                         if (nmb == NULL) {
1973                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1974                                                   "port_id=%u queue_id=%u",
1975                                            rxq->port_id, rxq->queue_id);
1976
1977                                 rte_eth_devices[rxq->port_id].data->
1978                                                         rx_mbuf_alloc_failed++;
1979                                 break;
1980                         }
1981                 } else if (nb_hold > rxq->rx_free_thresh) {
1982                         uint16_t next_rdt = rxq->rx_free_trigger;
1983
1984                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1985                                 rte_wmb();
1986                                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1987                                                     next_rdt);
1988                                 nb_hold -= rxq->rx_free_thresh;
1989                         } else {
1990                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1991                                                   "port_id=%u queue_id=%u",
1992                                            rxq->port_id, rxq->queue_id);
1993
1994                                 rte_eth_devices[rxq->port_id].data->
1995                                                         rx_mbuf_alloc_failed++;
1996                                 break;
1997                         }
1998                 }
1999
2000                 nb_hold++;
2001                 rxe = &sw_ring[rx_id];
2002                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2003
2004                 next_id = rx_id + 1;
2005                 if (next_id == rxq->nb_rx_desc)
2006                         next_id = 0;
2007
2008                 /* Prefetch next mbuf while processing current one. */
2009                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2010
2011                 /*
2012                  * When next RX descriptor is on a cache-line boundary,
2013                  * prefetch the next 4 RX descriptors and the next 4 pointers
2014                  * to mbufs.
2015                  */
2016                 if ((next_id & 0x3) == 0) {
2017                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2018                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2019                 }
2020
2021                 rxm = rxe->mbuf;
2022
2023                 if (!bulk_alloc) {
2024                         __le64 dma =
2025                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2026                         /*
2027                          * Update RX descriptor with the physical address of the
2028                          * new data buffer of the new allocated mbuf.
2029                          */
2030                         rxe->mbuf = nmb;
2031
2032                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2033                         rxdp->read.hdr_addr = 0;
2034                         rxdp->read.pkt_addr = dma;
2035                 } else
2036                         rxe->mbuf = NULL;
2037
2038                 /*
2039                  * Set data length & data buffer address of mbuf.
2040                  */
2041                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2042                 rxm->data_len = data_len;
2043
2044                 if (!eop) {
2045                         uint16_t nextp_id;
2046                         /*
2047                          * Get next descriptor index:
2048                          *  - For RSC it's in the NEXTP field.
2049                          *  - For a scattered packet - it's just a following
2050                          *    descriptor.
2051                          */
2052                         if (ixgbe_rsc_count(&rxd))
2053                                 nextp_id =
2054                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2055                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2056                         else
2057                                 nextp_id = next_id;
2058
2059                         next_sc_entry = &sw_sc_ring[nextp_id];
2060                         next_rxe = &sw_ring[nextp_id];
2061                         rte_ixgbe_prefetch(next_rxe);
2062                 }
2063
2064                 sc_entry = &sw_sc_ring[rx_id];
2065                 first_seg = sc_entry->fbuf;
2066                 sc_entry->fbuf = NULL;
2067
2068                 /*
2069                  * If this is the first buffer of the received packet,
2070                  * set the pointer to the first mbuf of the packet and
2071                  * initialize its context.
2072                  * Otherwise, update the total length and the number of segments
2073                  * of the current scattered packet, and update the pointer to
2074                  * the last mbuf of the current packet.
2075                  */
2076                 if (first_seg == NULL) {
2077                         first_seg = rxm;
2078                         first_seg->pkt_len = data_len;
2079                         first_seg->nb_segs = 1;
2080                 } else {
2081                         first_seg->pkt_len += data_len;
2082                         first_seg->nb_segs++;
2083                 }
2084
2085                 prev_id = rx_id;
2086                 rx_id = next_id;
2087
2088                 /*
2089                  * If this is not the last buffer of the received packet, update
2090                  * the pointer to the first mbuf at the NEXTP entry in the
2091                  * sw_sc_ring and continue to parse the RX ring.
2092                  */
2093                 if (!eop && next_rxe) {
2094                         rxm->next = next_rxe->mbuf;
2095                         next_sc_entry->fbuf = first_seg;
2096                         goto next_desc;
2097                 }
2098
2099                 /*
2100                  * This is the last buffer of the received packet - return
2101                  * the current cluster to the user.
2102                  */
2103                 rxm->next = NULL;
2104
2105                 /* Initialize the first mbuf of the returned packet */
2106                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2107
2108                 /*
2109                  * Deal with the case, when HW CRC srip is disabled.
2110                  * That can't happen when LRO is enabled, but still could
2111                  * happen for scattered RX mode.
2112                  */
2113                 first_seg->pkt_len -= rxq->crc_len;
2114                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2115                         struct rte_mbuf *lp;
2116
2117                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2118                                 ;
2119
2120                         first_seg->nb_segs--;
2121                         lp->data_len -= rxq->crc_len - rxm->data_len;
2122                         lp->next = NULL;
2123                         rte_pktmbuf_free_seg(rxm);
2124                 } else
2125                         rxm->data_len -= rxq->crc_len;
2126
2127                 /* Prefetch data of first segment, if configured to do so. */
2128                 rte_packet_prefetch((char *)first_seg->buf_addr +
2129                         first_seg->data_off);
2130
2131                 /*
2132                  * Store the mbuf address into the next entry of the array
2133                  * of returned packets.
2134                  */
2135                 rx_pkts[nb_rx++] = first_seg;
2136         }
2137
2138         /*
2139          * Record index of the next RX descriptor to probe.
2140          */
2141         rxq->rx_tail = rx_id;
2142
2143         /*
2144          * If the number of free RX descriptors is greater than the RX free
2145          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2146          * register.
2147          * Update the RDT with the value of the last processed RX descriptor
2148          * minus 1, to guarantee that the RDT register is never equal to the
2149          * RDH register, which creates a "full" ring situtation from the
2150          * hardware point of view...
2151          */
2152         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2153                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2154                            "nb_hold=%u nb_rx=%u",
2155                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2156
2157                 rte_wmb();
2158                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
2159                 nb_hold = 0;
2160         }
2161
2162         rxq->nb_rx_hold = nb_hold;
2163         return nb_rx;
2164 }
2165
2166 uint16_t
2167 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2168                                  uint16_t nb_pkts)
2169 {
2170         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2171 }
2172
2173 uint16_t
2174 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2175                                uint16_t nb_pkts)
2176 {
2177         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2178 }
2179
2180 /*********************************************************************
2181  *
2182  *  Queue management functions
2183  *
2184  **********************************************************************/
2185
2186 static void __attribute__((cold))
2187 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2188 {
2189         unsigned i;
2190
2191         if (txq->sw_ring != NULL) {
2192                 for (i = 0; i < txq->nb_tx_desc; i++) {
2193                         if (txq->sw_ring[i].mbuf != NULL) {
2194                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2195                                 txq->sw_ring[i].mbuf = NULL;
2196                         }
2197                 }
2198         }
2199 }
2200
2201 static void __attribute__((cold))
2202 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2203 {
2204         if (txq != NULL &&
2205             txq->sw_ring != NULL)
2206                 rte_free(txq->sw_ring);
2207 }
2208
2209 static void __attribute__((cold))
2210 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2211 {
2212         if (txq != NULL && txq->ops != NULL) {
2213                 txq->ops->release_mbufs(txq);
2214                 txq->ops->free_swring(txq);
2215                 rte_free(txq);
2216         }
2217 }
2218
2219 void __attribute__((cold))
2220 ixgbe_dev_tx_queue_release(void *txq)
2221 {
2222         ixgbe_tx_queue_release(txq);
2223 }
2224
2225 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2226 static void __attribute__((cold))
2227 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2228 {
2229         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2230         struct ixgbe_tx_entry *txe = txq->sw_ring;
2231         uint16_t prev, i;
2232
2233         /* Zero out HW ring memory */
2234         for (i = 0; i < txq->nb_tx_desc; i++) {
2235                 txq->tx_ring[i] = zeroed_desc;
2236         }
2237
2238         /* Initialize SW ring entries */
2239         prev = (uint16_t) (txq->nb_tx_desc - 1);
2240         for (i = 0; i < txq->nb_tx_desc; i++) {
2241                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2242
2243                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2244                 txe[i].mbuf = NULL;
2245                 txe[i].last_id = i;
2246                 txe[prev].next_id = i;
2247                 prev = i;
2248         }
2249
2250         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2251         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2252
2253         txq->tx_tail = 0;
2254         txq->nb_tx_used = 0;
2255         /*
2256          * Always allow 1 descriptor to be un-allocated to avoid
2257          * a H/W race condition
2258          */
2259         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2260         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2261         txq->ctx_curr = 0;
2262         memset((void *)&txq->ctx_cache, 0,
2263                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2264 }
2265
2266 static const struct ixgbe_txq_ops def_txq_ops = {
2267         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2268         .free_swring = ixgbe_tx_free_swring,
2269         .reset = ixgbe_reset_tx_queue,
2270 };
2271
2272 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2273  * the queue parameters. Used in tx_queue_setup by primary process and then
2274  * in dev_init by secondary process when attaching to an existing ethdev.
2275  */
2276 void __attribute__((cold))
2277 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2278 {
2279         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2280         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2281                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2282                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2283 #ifdef RTE_IXGBE_INC_VECTOR
2284                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2285                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2286                                         ixgbe_txq_vec_setup(txq) == 0)) {
2287                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2288                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2289                 } else
2290 #endif
2291                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2292         } else {
2293                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2294                 PMD_INIT_LOG(DEBUG,
2295                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2296                                 (unsigned long)txq->txq_flags,
2297                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2298                 PMD_INIT_LOG(DEBUG,
2299                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2300                                 (unsigned long)txq->tx_rs_thresh,
2301                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2302                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2303         }
2304 }
2305
2306 int __attribute__((cold))
2307 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2308                          uint16_t queue_idx,
2309                          uint16_t nb_desc,
2310                          unsigned int socket_id,
2311                          const struct rte_eth_txconf *tx_conf)
2312 {
2313         const struct rte_memzone *tz;
2314         struct ixgbe_tx_queue *txq;
2315         struct ixgbe_hw     *hw;
2316         uint16_t tx_rs_thresh, tx_free_thresh;
2317
2318         PMD_INIT_FUNC_TRACE();
2319         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2320
2321         /*
2322          * Validate number of transmit descriptors.
2323          * It must not exceed hardware maximum, and must be multiple
2324          * of IXGBE_ALIGN.
2325          */
2326         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2327                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2328                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2329                 return -EINVAL;
2330         }
2331
2332         /*
2333          * The following two parameters control the setting of the RS bit on
2334          * transmit descriptors.
2335          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2336          * descriptors have been used.
2337          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2338          * descriptors are used or if the number of descriptors required
2339          * to transmit a packet is greater than the number of free TX
2340          * descriptors.
2341          * The following constraints must be satisfied:
2342          *  tx_rs_thresh must be greater than 0.
2343          *  tx_rs_thresh must be less than the size of the ring minus 2.
2344          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2345          *  tx_rs_thresh must be a divisor of the ring size.
2346          *  tx_free_thresh must be greater than 0.
2347          *  tx_free_thresh must be less than the size of the ring minus 3.
2348          * One descriptor in the TX ring is used as a sentinel to avoid a
2349          * H/W race condition, hence the maximum threshold constraints.
2350          * When set to zero use default values.
2351          */
2352         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2353                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2354         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2355                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2356         if (tx_rs_thresh >= (nb_desc - 2)) {
2357                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2358                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2359                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2360                         (int)dev->data->port_id, (int)queue_idx);
2361                 return -(EINVAL);
2362         }
2363         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2364                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2365                         "(tx_rs_thresh=%u port=%d queue=%d)",
2366                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2367                         (int)dev->data->port_id, (int)queue_idx);
2368                 return -(EINVAL);
2369         }
2370         if (tx_free_thresh >= (nb_desc - 3)) {
2371                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2372                              "tx_free_thresh must be less than the number of "
2373                              "TX descriptors minus 3. (tx_free_thresh=%u "
2374                              "port=%d queue=%d)",
2375                              (unsigned int)tx_free_thresh,
2376                              (int)dev->data->port_id, (int)queue_idx);
2377                 return -(EINVAL);
2378         }
2379         if (tx_rs_thresh > tx_free_thresh) {
2380                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2381                              "tx_free_thresh. (tx_free_thresh=%u "
2382                              "tx_rs_thresh=%u port=%d queue=%d)",
2383                              (unsigned int)tx_free_thresh,
2384                              (unsigned int)tx_rs_thresh,
2385                              (int)dev->data->port_id,
2386                              (int)queue_idx);
2387                 return -(EINVAL);
2388         }
2389         if ((nb_desc % tx_rs_thresh) != 0) {
2390                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2391                              "number of TX descriptors. (tx_rs_thresh=%u "
2392                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2393                              (int)dev->data->port_id, (int)queue_idx);
2394                 return -(EINVAL);
2395         }
2396
2397         /*
2398          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2399          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2400          * by the NIC and all descriptors are written back after the NIC
2401          * accumulates WTHRESH descriptors.
2402          */
2403         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2404                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2405                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2406                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2407                              (int)dev->data->port_id, (int)queue_idx);
2408                 return -(EINVAL);
2409         }
2410
2411         /* Free memory prior to re-allocation if needed... */
2412         if (dev->data->tx_queues[queue_idx] != NULL) {
2413                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2414                 dev->data->tx_queues[queue_idx] = NULL;
2415         }
2416
2417         /* First allocate the tx queue data structure */
2418         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2419                                  RTE_CACHE_LINE_SIZE, socket_id);
2420         if (txq == NULL)
2421                 return -ENOMEM;
2422
2423         /*
2424          * Allocate TX ring hardware descriptors. A memzone large enough to
2425          * handle the maximum ring size is allocated in order to allow for
2426          * resizing in later calls to the queue setup function.
2427          */
2428         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2429                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2430                         IXGBE_ALIGN, socket_id);
2431         if (tz == NULL) {
2432                 ixgbe_tx_queue_release(txq);
2433                 return -ENOMEM;
2434         }
2435
2436         txq->nb_tx_desc = nb_desc;
2437         txq->tx_rs_thresh = tx_rs_thresh;
2438         txq->tx_free_thresh = tx_free_thresh;
2439         txq->pthresh = tx_conf->tx_thresh.pthresh;
2440         txq->hthresh = tx_conf->tx_thresh.hthresh;
2441         txq->wthresh = tx_conf->tx_thresh.wthresh;
2442         txq->queue_id = queue_idx;
2443         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2444                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2445         txq->port_id = dev->data->port_id;
2446         txq->txq_flags = tx_conf->txq_flags;
2447         txq->ops = &def_txq_ops;
2448         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2449
2450         /*
2451          * Modification to set VFTDT for virtual function if vf is detected
2452          */
2453         if (hw->mac.type == ixgbe_mac_82599_vf ||
2454             hw->mac.type == ixgbe_mac_X540_vf ||
2455             hw->mac.type == ixgbe_mac_X550_vf ||
2456             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2457             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2458                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2459         else
2460                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2461
2462         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2463         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2464
2465         /* Allocate software ring */
2466         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2467                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2468                                 RTE_CACHE_LINE_SIZE, socket_id);
2469         if (txq->sw_ring == NULL) {
2470                 ixgbe_tx_queue_release(txq);
2471                 return -ENOMEM;
2472         }
2473         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2474                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2475
2476         /* set up vector or scalar TX function as appropriate */
2477         ixgbe_set_tx_function(dev, txq);
2478
2479         txq->ops->reset(txq);
2480
2481         dev->data->tx_queues[queue_idx] = txq;
2482
2483
2484         return 0;
2485 }
2486
2487 /**
2488  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2489  *
2490  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2491  * in the sw_rsc_ring is not set to NULL but rather points to the next
2492  * mbuf of this RSC aggregation (that has not been completed yet and still
2493  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2494  * will just free first "nb_segs" segments of the cluster explicitly by calling
2495  * an rte_pktmbuf_free_seg().
2496  *
2497  * @m scattered cluster head
2498  */
2499 static void __attribute__((cold))
2500 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2501 {
2502         uint8_t i, nb_segs = m->nb_segs;
2503         struct rte_mbuf *next_seg;
2504
2505         for (i = 0; i < nb_segs; i++) {
2506                 next_seg = m->next;
2507                 rte_pktmbuf_free_seg(m);
2508                 m = next_seg;
2509         }
2510 }
2511
2512 static void __attribute__((cold))
2513 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2514 {
2515         unsigned i;
2516
2517 #ifdef RTE_IXGBE_INC_VECTOR
2518         /* SSE Vector driver has a different way of releasing mbufs. */
2519         if (rxq->rx_using_sse) {
2520                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2521                 return;
2522         }
2523 #endif
2524
2525         if (rxq->sw_ring != NULL) {
2526                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2527                         if (rxq->sw_ring[i].mbuf != NULL) {
2528                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2529                                 rxq->sw_ring[i].mbuf = NULL;
2530                         }
2531                 }
2532                 if (rxq->rx_nb_avail) {
2533                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2534                                 struct rte_mbuf *mb;
2535
2536                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2537                                 rte_pktmbuf_free_seg(mb);
2538                         }
2539                         rxq->rx_nb_avail = 0;
2540                 }
2541         }
2542
2543         if (rxq->sw_sc_ring)
2544                 for (i = 0; i < rxq->nb_rx_desc; i++)
2545                         if (rxq->sw_sc_ring[i].fbuf) {
2546                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2547                                 rxq->sw_sc_ring[i].fbuf = NULL;
2548                         }
2549 }
2550
2551 static void __attribute__((cold))
2552 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2553 {
2554         if (rxq != NULL) {
2555                 ixgbe_rx_queue_release_mbufs(rxq);
2556                 rte_free(rxq->sw_ring);
2557                 rte_free(rxq->sw_sc_ring);
2558                 rte_free(rxq);
2559         }
2560 }
2561
2562 void __attribute__((cold))
2563 ixgbe_dev_rx_queue_release(void *rxq)
2564 {
2565         ixgbe_rx_queue_release(rxq);
2566 }
2567
2568 /*
2569  * Check if Rx Burst Bulk Alloc function can be used.
2570  * Return
2571  *        0: the preconditions are satisfied and the bulk allocation function
2572  *           can be used.
2573  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2574  *           function must be used.
2575  */
2576 static inline int __attribute__((cold))
2577 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2578 {
2579         int ret = 0;
2580
2581         /*
2582          * Make sure the following pre-conditions are satisfied:
2583          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2584          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2585          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2586          *   rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2587          * Scattered packets are not supported.  This should be checked
2588          * outside of this function.
2589          */
2590         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2591                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2592                              "rxq->rx_free_thresh=%d, "
2593                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2594                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2595                 ret = -EINVAL;
2596         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2597                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2598                              "rxq->rx_free_thresh=%d, "
2599                              "rxq->nb_rx_desc=%d",
2600                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2601                 ret = -EINVAL;
2602         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2603                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2604                              "rxq->nb_rx_desc=%d, "
2605                              "rxq->rx_free_thresh=%d",
2606                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2607                 ret = -EINVAL;
2608         } else if (!(rxq->nb_rx_desc <
2609                (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2610                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2611                              "rxq->nb_rx_desc=%d, "
2612                              "IXGBE_MAX_RING_DESC=%d, "
2613                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2614                              rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2615                              RTE_PMD_IXGBE_RX_MAX_BURST);
2616                 ret = -EINVAL;
2617         }
2618
2619         return ret;
2620 }
2621
2622 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2623 static void __attribute__((cold))
2624 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2625 {
2626         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2627         unsigned i;
2628         uint16_t len = rxq->nb_rx_desc;
2629
2630         /*
2631          * By default, the Rx queue setup function allocates enough memory for
2632          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2633          * extra memory at the end of the descriptor ring to be zero'd out. A
2634          * pre-condition for using the Rx burst bulk alloc function is that the
2635          * number of descriptors is less than or equal to
2636          * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2637          * constraints here to see if we need to zero out memory after the end
2638          * of the H/W descriptor ring.
2639          */
2640         if (adapter->rx_bulk_alloc_allowed)
2641                 /* zero out extra memory */
2642                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2643
2644         /*
2645          * Zero out HW ring memory. Zero out extra memory at the end of
2646          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2647          * reads extra memory as zeros.
2648          */
2649         for (i = 0; i < len; i++) {
2650                 rxq->rx_ring[i] = zeroed_desc;
2651         }
2652
2653         /*
2654          * initialize extra software ring entries. Space for these extra
2655          * entries is always allocated
2656          */
2657         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2658         for (i = rxq->nb_rx_desc; i < len; ++i) {
2659                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2660         }
2661
2662         rxq->rx_nb_avail = 0;
2663         rxq->rx_next_avail = 0;
2664         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2665         rxq->rx_tail = 0;
2666         rxq->nb_rx_hold = 0;
2667         rxq->pkt_first_seg = NULL;
2668         rxq->pkt_last_seg = NULL;
2669
2670 #ifdef RTE_IXGBE_INC_VECTOR
2671         rxq->rxrearm_start = 0;
2672         rxq->rxrearm_nb = 0;
2673 #endif
2674 }
2675
2676 int __attribute__((cold))
2677 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2678                          uint16_t queue_idx,
2679                          uint16_t nb_desc,
2680                          unsigned int socket_id,
2681                          const struct rte_eth_rxconf *rx_conf,
2682                          struct rte_mempool *mp)
2683 {
2684         const struct rte_memzone *rz;
2685         struct ixgbe_rx_queue *rxq;
2686         struct ixgbe_hw     *hw;
2687         uint16_t len;
2688         struct ixgbe_adapter *adapter =
2689                 (struct ixgbe_adapter *)dev->data->dev_private;
2690
2691         PMD_INIT_FUNC_TRACE();
2692         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2693
2694         /*
2695          * Validate number of receive descriptors.
2696          * It must not exceed hardware maximum, and must be multiple
2697          * of IXGBE_ALIGN.
2698          */
2699         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2700                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2701                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2702                 return -EINVAL;
2703         }
2704
2705         /* Free memory prior to re-allocation if needed... */
2706         if (dev->data->rx_queues[queue_idx] != NULL) {
2707                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2708                 dev->data->rx_queues[queue_idx] = NULL;
2709         }
2710
2711         /* First allocate the rx queue data structure */
2712         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2713                                  RTE_CACHE_LINE_SIZE, socket_id);
2714         if (rxq == NULL)
2715                 return -ENOMEM;
2716         rxq->mb_pool = mp;
2717         rxq->nb_rx_desc = nb_desc;
2718         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2719         rxq->queue_id = queue_idx;
2720         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2721                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2722         rxq->port_id = dev->data->port_id;
2723         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2724                                                         0 : ETHER_CRC_LEN);
2725         rxq->drop_en = rx_conf->rx_drop_en;
2726         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2727
2728         /*
2729          * The packet type in RX descriptor is different for different NICs.
2730          * Some bits are used for x550 but reserved for other NICS.
2731          * So set different masks for different NICs.
2732          */
2733         if (hw->mac.type == ixgbe_mac_X550 ||
2734             hw->mac.type == ixgbe_mac_X550EM_x ||
2735             hw->mac.type == ixgbe_mac_X550EM_a ||
2736             hw->mac.type == ixgbe_mac_X550_vf ||
2737             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2738             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2739                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2740         else
2741                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2742
2743         /*
2744          * Allocate RX ring hardware descriptors. A memzone large enough to
2745          * handle the maximum ring size is allocated in order to allow for
2746          * resizing in later calls to the queue setup function.
2747          */
2748         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2749                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2750         if (rz == NULL) {
2751                 ixgbe_rx_queue_release(rxq);
2752                 return -ENOMEM;
2753         }
2754
2755         /*
2756          * Zero init all the descriptors in the ring.
2757          */
2758         memset(rz->addr, 0, RX_RING_SZ);
2759
2760         /*
2761          * Modified to setup VFRDT for Virtual Function
2762          */
2763         if (hw->mac.type == ixgbe_mac_82599_vf ||
2764             hw->mac.type == ixgbe_mac_X540_vf ||
2765             hw->mac.type == ixgbe_mac_X550_vf ||
2766             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2767             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2768                 rxq->rdt_reg_addr =
2769                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2770                 rxq->rdh_reg_addr =
2771                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2772         } else {
2773                 rxq->rdt_reg_addr =
2774                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2775                 rxq->rdh_reg_addr =
2776                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2777         }
2778
2779         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2780         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2781
2782         /*
2783          * Certain constraints must be met in order to use the bulk buffer
2784          * allocation Rx burst function. If any of Rx queues doesn't meet them
2785          * the feature should be disabled for the whole port.
2786          */
2787         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2788                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2789                                     "preconditions - canceling the feature for "
2790                                     "the whole port[%d]",
2791                              rxq->queue_id, rxq->port_id);
2792                 adapter->rx_bulk_alloc_allowed = false;
2793         }
2794
2795         /*
2796          * Allocate software ring. Allow for space at the end of the
2797          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2798          * function does not access an invalid memory region.
2799          */
2800         len = nb_desc;
2801         if (adapter->rx_bulk_alloc_allowed)
2802                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2803
2804         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2805                                           sizeof(struct ixgbe_rx_entry) * len,
2806                                           RTE_CACHE_LINE_SIZE, socket_id);
2807         if (!rxq->sw_ring) {
2808                 ixgbe_rx_queue_release(rxq);
2809                 return -ENOMEM;
2810         }
2811
2812         /*
2813          * Always allocate even if it's not going to be needed in order to
2814          * simplify the code.
2815          *
2816          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2817          * be requested in ixgbe_dev_rx_init(), which is called later from
2818          * dev_start() flow.
2819          */
2820         rxq->sw_sc_ring =
2821                 rte_zmalloc_socket("rxq->sw_sc_ring",
2822                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2823                                    RTE_CACHE_LINE_SIZE, socket_id);
2824         if (!rxq->sw_sc_ring) {
2825                 ixgbe_rx_queue_release(rxq);
2826                 return -ENOMEM;
2827         }
2828
2829         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2830                             "dma_addr=0x%"PRIx64,
2831                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2832                      rxq->rx_ring_phys_addr);
2833
2834         if (!rte_is_power_of_2(nb_desc)) {
2835                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2836                                     "preconditions - canceling the feature for "
2837                                     "the whole port[%d]",
2838                              rxq->queue_id, rxq->port_id);
2839                 adapter->rx_vec_allowed = false;
2840         } else
2841                 ixgbe_rxq_vec_setup(rxq);
2842
2843         dev->data->rx_queues[queue_idx] = rxq;
2844
2845         ixgbe_reset_rx_queue(adapter, rxq);
2846
2847         return 0;
2848 }
2849
2850 uint32_t
2851 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2852 {
2853 #define IXGBE_RXQ_SCAN_INTERVAL 4
2854         volatile union ixgbe_adv_rx_desc *rxdp;
2855         struct ixgbe_rx_queue *rxq;
2856         uint32_t desc = 0;
2857
2858         if (rx_queue_id >= dev->data->nb_rx_queues) {
2859                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2860                 return 0;
2861         }
2862
2863         rxq = dev->data->rx_queues[rx_queue_id];
2864         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2865
2866         while ((desc < rxq->nb_rx_desc) &&
2867                 (rxdp->wb.upper.status_error &
2868                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2869                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2870                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2871                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2872                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2873                                 desc - rxq->nb_rx_desc]);
2874         }
2875
2876         return desc;
2877 }
2878
2879 int
2880 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2881 {
2882         volatile union ixgbe_adv_rx_desc *rxdp;
2883         struct ixgbe_rx_queue *rxq = rx_queue;
2884         uint32_t desc;
2885
2886         if (unlikely(offset >= rxq->nb_rx_desc))
2887                 return 0;
2888         desc = rxq->rx_tail + offset;
2889         if (desc >= rxq->nb_rx_desc)
2890                 desc -= rxq->nb_rx_desc;
2891
2892         rxdp = &rxq->rx_ring[desc];
2893         return !!(rxdp->wb.upper.status_error &
2894                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2895 }
2896
2897 void __attribute__((cold))
2898 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2899 {
2900         unsigned i;
2901         struct ixgbe_adapter *adapter =
2902                 (struct ixgbe_adapter *)dev->data->dev_private;
2903
2904         PMD_INIT_FUNC_TRACE();
2905
2906         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2907                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2908
2909                 if (txq != NULL) {
2910                         txq->ops->release_mbufs(txq);
2911                         txq->ops->reset(txq);
2912                 }
2913         }
2914
2915         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2916                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2917
2918                 if (rxq != NULL) {
2919                         ixgbe_rx_queue_release_mbufs(rxq);
2920                         ixgbe_reset_rx_queue(adapter, rxq);
2921                 }
2922         }
2923 }
2924
2925 void
2926 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2927 {
2928         unsigned i;
2929
2930         PMD_INIT_FUNC_TRACE();
2931
2932         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2933                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2934                 dev->data->rx_queues[i] = NULL;
2935         }
2936         dev->data->nb_rx_queues = 0;
2937
2938         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2939                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2940                 dev->data->tx_queues[i] = NULL;
2941         }
2942         dev->data->nb_tx_queues = 0;
2943 }
2944
2945 /*********************************************************************
2946  *
2947  *  Device RX/TX init functions
2948  *
2949  **********************************************************************/
2950
2951 /**
2952  * Receive Side Scaling (RSS)
2953  * See section 7.1.2.8 in the following document:
2954  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2955  *
2956  * Principles:
2957  * The source and destination IP addresses of the IP header and the source
2958  * and destination ports of TCP/UDP headers, if any, of received packets are
2959  * hashed against a configurable random key to compute a 32-bit RSS hash result.
2960  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2961  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
2962  * RSS output index which is used as the RX queue index where to store the
2963  * received packets.
2964  * The following output is supplied in the RX write-back descriptor:
2965  *     - 32-bit result of the Microsoft RSS hash function,
2966  *     - 4-bit RSS type field.
2967  */
2968
2969 /*
2970  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2971  * Used as the default key.
2972  */
2973 static uint8_t rss_intel_key[40] = {
2974         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2975         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2976         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2977         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2978         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2979 };
2980
2981 static void
2982 ixgbe_rss_disable(struct rte_eth_dev *dev)
2983 {
2984         struct ixgbe_hw *hw;
2985         uint32_t mrqc;
2986         uint32_t mrqc_reg;
2987
2988         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2989         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2990         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2991         mrqc &= ~IXGBE_MRQC_RSSEN;
2992         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2993 }
2994
2995 static void
2996 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
2997 {
2998         uint8_t  *hash_key;
2999         uint32_t mrqc;
3000         uint32_t rss_key;
3001         uint64_t rss_hf;
3002         uint16_t i;
3003         uint32_t mrqc_reg;
3004         uint32_t rssrk_reg;
3005
3006         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3007         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3008
3009         hash_key = rss_conf->rss_key;
3010         if (hash_key != NULL) {
3011                 /* Fill in RSS hash key */
3012                 for (i = 0; i < 10; i++) {
3013                         rss_key  = hash_key[(i * 4)];
3014                         rss_key |= hash_key[(i * 4) + 1] << 8;
3015                         rss_key |= hash_key[(i * 4) + 2] << 16;
3016                         rss_key |= hash_key[(i * 4) + 3] << 24;
3017                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3018                 }
3019         }
3020
3021         /* Set configured hashing protocols in MRQC register */
3022         rss_hf = rss_conf->rss_hf;
3023         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3024         if (rss_hf & ETH_RSS_IPV4)
3025                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3026         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3027                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3028         if (rss_hf & ETH_RSS_IPV6)
3029                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3030         if (rss_hf & ETH_RSS_IPV6_EX)
3031                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3032         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3033                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3034         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3035                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3036         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3037                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3038         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3039                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3040         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3041                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3042         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3043 }
3044
3045 int
3046 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3047                           struct rte_eth_rss_conf *rss_conf)
3048 {
3049         struct ixgbe_hw *hw;
3050         uint32_t mrqc;
3051         uint64_t rss_hf;
3052         uint32_t mrqc_reg;
3053
3054         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3055
3056         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3057                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3058                         "NIC.");
3059                 return -ENOTSUP;
3060         }
3061         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3062
3063         /*
3064          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3065          *     "RSS enabling cannot be done dynamically while it must be
3066          *      preceded by a software reset"
3067          * Before changing anything, first check that the update RSS operation
3068          * does not attempt to disable RSS, if RSS was enabled at
3069          * initialization time, or does not attempt to enable RSS, if RSS was
3070          * disabled at initialization time.
3071          */
3072         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3073         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3074         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3075                 if (rss_hf != 0) /* Enable RSS */
3076                         return -(EINVAL);
3077                 return 0; /* Nothing to do */
3078         }
3079         /* RSS enabled */
3080         if (rss_hf == 0) /* Disable RSS */
3081                 return -(EINVAL);
3082         ixgbe_hw_rss_hash_set(hw, rss_conf);
3083         return 0;
3084 }
3085
3086 int
3087 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3088                             struct rte_eth_rss_conf *rss_conf)
3089 {
3090         struct ixgbe_hw *hw;
3091         uint8_t *hash_key;
3092         uint32_t mrqc;
3093         uint32_t rss_key;
3094         uint64_t rss_hf;
3095         uint16_t i;
3096         uint32_t mrqc_reg;
3097         uint32_t rssrk_reg;
3098
3099         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3100         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3101         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3102         hash_key = rss_conf->rss_key;
3103         if (hash_key != NULL) {
3104                 /* Return RSS hash key */
3105                 for (i = 0; i < 10; i++) {
3106                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3107                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3108                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3109                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3110                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3111                 }
3112         }
3113
3114         /* Get RSS functions configured in MRQC register */
3115         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3116         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3117                 rss_conf->rss_hf = 0;
3118                 return 0;
3119         }
3120         rss_hf = 0;
3121         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3122                 rss_hf |= ETH_RSS_IPV4;
3123         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3124                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3125         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3126                 rss_hf |= ETH_RSS_IPV6;
3127         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3128                 rss_hf |= ETH_RSS_IPV6_EX;
3129         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3130                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3131         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3132                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3133         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3134                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3135         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3136                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3137         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3138                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3139         rss_conf->rss_hf = rss_hf;
3140         return 0;
3141 }
3142
3143 static void
3144 ixgbe_rss_configure(struct rte_eth_dev *dev)
3145 {
3146         struct rte_eth_rss_conf rss_conf;
3147         struct ixgbe_hw *hw;
3148         uint32_t reta;
3149         uint16_t i;
3150         uint16_t j;
3151         uint16_t sp_reta_size;
3152         uint32_t reta_reg;
3153
3154         PMD_INIT_FUNC_TRACE();
3155         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3156
3157         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3158
3159         /*
3160          * Fill in redirection table
3161          * The byte-swap is needed because NIC registers are in
3162          * little-endian order.
3163          */
3164         reta = 0;
3165         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3166                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3167
3168                 if (j == dev->data->nb_rx_queues)
3169                         j = 0;
3170                 reta = (reta << 8) | j;
3171                 if ((i & 3) == 3)
3172                         IXGBE_WRITE_REG(hw, reta_reg,
3173                                         rte_bswap32(reta));
3174         }
3175
3176         /*
3177          * Configure the RSS key and the RSS protocols used to compute
3178          * the RSS hash of input packets.
3179          */
3180         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3181         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3182                 ixgbe_rss_disable(dev);
3183                 return;
3184         }
3185         if (rss_conf.rss_key == NULL)
3186                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3187         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3188 }
3189
3190 #define NUM_VFTA_REGISTERS 128
3191 #define NIC_RX_BUFFER_SIZE 0x200
3192 #define X550_RX_BUFFER_SIZE 0x180
3193
3194 static void
3195 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3196 {
3197         struct rte_eth_vmdq_dcb_conf *cfg;
3198         struct ixgbe_hw *hw;
3199         enum rte_eth_nb_pools num_pools;
3200         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3201         uint16_t pbsize;
3202         uint8_t nb_tcs; /* number of traffic classes */
3203         int i;
3204
3205         PMD_INIT_FUNC_TRACE();
3206         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3207         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3208         num_pools = cfg->nb_queue_pools;
3209         /* Check we have a valid number of pools */
3210         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3211                 ixgbe_rss_disable(dev);
3212                 return;
3213         }
3214         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3215         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3216
3217         /*
3218          * RXPBSIZE
3219          * split rx buffer up into sections, each for 1 traffic class
3220          */
3221         switch (hw->mac.type) {
3222         case ixgbe_mac_X550:
3223         case ixgbe_mac_X550EM_x:
3224         case ixgbe_mac_X550EM_a:
3225                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3226                 break;
3227         default:
3228                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3229                 break;
3230         }
3231         for (i = 0; i < nb_tcs; i++) {
3232                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3233
3234                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3235                 /* clear 10 bits. */
3236                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3237                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3238         }
3239         /* zero alloc all unused TCs */
3240         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3241                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3242
3243                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3244                 /* clear 10 bits. */
3245                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3246         }
3247
3248         /* MRQC: enable vmdq and dcb */
3249         mrqc = (num_pools == ETH_16_POOLS) ?
3250                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3251         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3252
3253         /* PFVTCTL: turn on virtualisation and set the default pool */
3254         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3255         if (cfg->enable_default_pool) {
3256                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3257         } else {
3258                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3259         }
3260
3261         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3262
3263         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3264         queue_mapping = 0;
3265         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3266                 /*
3267                  * mapping is done with 3 bits per priority,
3268                  * so shift by i*3 each time
3269                  */
3270                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3271
3272         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3273
3274         /* RTRPCS: DCB related */
3275         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3276
3277         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3278         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3279         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3280         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3281
3282         /* VFTA - enable all vlan filters */
3283         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3284                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3285         }
3286
3287         /* VFRE: pool enabling for receive - 16 or 32 */
3288         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3289                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3290
3291         /*
3292          * MPSAR - allow pools to read specific mac addresses
3293          * In this case, all pools should be able to read from mac addr 0
3294          */
3295         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3296         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3297
3298         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3299         for (i = 0; i < cfg->nb_pool_maps; i++) {
3300                 /* set vlan id in VF register and set the valid bit */
3301                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3302                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3303                 /*
3304                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3305                  * pools, we only need to use the first half of the register
3306                  * i.e. bits 0-31
3307                  */
3308                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3309         }
3310 }
3311
3312 /**
3313  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3314  * @hw: pointer to hardware structure
3315  * @dcb_config: pointer to ixgbe_dcb_config structure
3316  */
3317 static void
3318 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
3319                        struct ixgbe_dcb_config *dcb_config)
3320 {
3321         uint32_t reg;
3322         uint32_t q;
3323
3324         PMD_INIT_FUNC_TRACE();
3325         if (hw->mac.type != ixgbe_mac_82598EB) {
3326                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3327                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3328                 reg |= IXGBE_RTTDCS_ARBDIS;
3329                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3330
3331                 /* Enable DCB for Tx with 8 TCs */
3332                 if (dcb_config->num_tcs.pg_tcs == 8) {
3333                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3334                 } else {
3335                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3336                 }
3337                 if (dcb_config->vt_mode)
3338                         reg |= IXGBE_MTQC_VT_ENA;
3339                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3340
3341                 /* Disable drop for all queues */
3342                 for (q = 0; q < 128; q++)
3343                         IXGBE_WRITE_REG(hw, IXGBE_QDE,
3344                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3345
3346                 /* Enable the Tx desc arbiter */
3347                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3348                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3349                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3350
3351                 /* Enable Security TX Buffer IFG for DCB */
3352                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3353                 reg |= IXGBE_SECTX_DCB;
3354                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3355         }
3356 }
3357
3358 /**
3359  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3360  * @dev: pointer to rte_eth_dev structure
3361  * @dcb_config: pointer to ixgbe_dcb_config structure
3362  */
3363 static void
3364 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3365                         struct ixgbe_dcb_config *dcb_config)
3366 {
3367         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3368                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3369         struct ixgbe_hw *hw =
3370                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3371
3372         PMD_INIT_FUNC_TRACE();
3373         if (hw->mac.type != ixgbe_mac_82598EB)
3374                 /*PF VF Transmit Enable*/
3375                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3376                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3377
3378         /*Configure general DCB TX parameters*/
3379         ixgbe_dcb_tx_hw_config(hw, dcb_config);
3380 }
3381
3382 static void
3383 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3384                         struct ixgbe_dcb_config *dcb_config)
3385 {
3386         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3387                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3388         struct ixgbe_dcb_tc_config *tc;
3389         uint8_t i, j;
3390
3391         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3392         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3393                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3394                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3395         } else {
3396                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3397                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3398         }
3399         /* User Priority to Traffic Class mapping */
3400         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3401                 j = vmdq_rx_conf->dcb_tc[i];
3402                 tc = &dcb_config->tc_config[j];
3403                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3404                                                 (uint8_t)(1 << j);
3405         }
3406 }
3407
3408 static void
3409 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3410                         struct ixgbe_dcb_config *dcb_config)
3411 {
3412         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3413                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3414         struct ixgbe_dcb_tc_config *tc;
3415         uint8_t i, j;
3416
3417         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3418         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3419                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3420                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3421         } else {
3422                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3423                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3424         }
3425
3426         /* User Priority to Traffic Class mapping */
3427         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3428                 j = vmdq_tx_conf->dcb_tc[i];
3429                 tc = &dcb_config->tc_config[j];
3430                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3431                                                 (uint8_t)(1 << j);
3432         }
3433 }
3434
3435 static void
3436 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3437                 struct ixgbe_dcb_config *dcb_config)
3438 {
3439         struct rte_eth_dcb_rx_conf *rx_conf =
3440                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3441         struct ixgbe_dcb_tc_config *tc;
3442         uint8_t i, j;
3443
3444         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3445         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3446
3447         /* User Priority to Traffic Class mapping */
3448         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3449                 j = rx_conf->dcb_tc[i];
3450                 tc = &dcb_config->tc_config[j];
3451                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3452                                                 (uint8_t)(1 << j);
3453         }
3454 }
3455
3456 static void
3457 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3458                 struct ixgbe_dcb_config *dcb_config)
3459 {
3460         struct rte_eth_dcb_tx_conf *tx_conf =
3461                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3462         struct ixgbe_dcb_tc_config *tc;
3463         uint8_t i, j;
3464
3465         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3466         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3467
3468         /* User Priority to Traffic Class mapping */
3469         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3470                 j = tx_conf->dcb_tc[i];
3471                 tc = &dcb_config->tc_config[j];
3472                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3473                                                 (uint8_t)(1 << j);
3474         }
3475 }
3476
3477 /**
3478  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3479  * @hw: pointer to hardware structure
3480  * @dcb_config: pointer to ixgbe_dcb_config structure
3481  */
3482 static void
3483 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3484                struct ixgbe_dcb_config *dcb_config)
3485 {
3486         uint32_t reg;
3487         uint32_t vlanctrl;
3488         uint8_t i;
3489
3490         PMD_INIT_FUNC_TRACE();
3491         /*
3492          * Disable the arbiter before changing parameters
3493          * (always enable recycle mode; WSP)
3494          */
3495         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3496         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3497
3498         if (hw->mac.type != ixgbe_mac_82598EB) {
3499                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3500                 if (dcb_config->num_tcs.pg_tcs == 4) {
3501                         if (dcb_config->vt_mode)
3502                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3503                                         IXGBE_MRQC_VMDQRT4TCEN;
3504                         else {
3505                                 /* no matter the mode is DCB or DCB_RSS, just
3506                                  * set the MRQE to RSSXTCEN. RSS is controlled
3507                                  * by RSS_FIELD
3508                                  */
3509                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3510                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3511                                         IXGBE_MRQC_RTRSS4TCEN;
3512                         }
3513                 }
3514                 if (dcb_config->num_tcs.pg_tcs == 8) {
3515                         if (dcb_config->vt_mode)
3516                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3517                                         IXGBE_MRQC_VMDQRT8TCEN;
3518                         else {
3519                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3520                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3521                                         IXGBE_MRQC_RTRSS8TCEN;
3522                         }
3523                 }
3524
3525                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3526         }
3527
3528         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3529         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3530         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3531         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3532
3533         /* VFTA - enable all vlan filters */
3534         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3535                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3536         }
3537
3538         /*
3539          * Configure Rx packet plane (recycle mode; WSP) and
3540          * enable arbiter
3541          */
3542         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3543         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3544 }
3545
3546 static void
3547 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3548                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3549 {
3550         switch (hw->mac.type) {
3551         case ixgbe_mac_82598EB:
3552                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3553                 break;
3554         case ixgbe_mac_82599EB:
3555         case ixgbe_mac_X540:
3556         case ixgbe_mac_X550:
3557         case ixgbe_mac_X550EM_x:
3558         case ixgbe_mac_X550EM_a:
3559                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3560                                                   tsa, map);
3561                 break;
3562         default:
3563                 break;
3564         }
3565 }
3566
3567 static void
3568 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3569                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3570 {
3571         switch (hw->mac.type) {
3572         case ixgbe_mac_82598EB:
3573                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3574                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3575                 break;
3576         case ixgbe_mac_82599EB:
3577         case ixgbe_mac_X540:
3578         case ixgbe_mac_X550:
3579         case ixgbe_mac_X550EM_x:
3580         case ixgbe_mac_X550EM_a:
3581                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3582                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3583                 break;
3584         default:
3585                 break;
3586         }
3587 }
3588
3589 #define DCB_RX_CONFIG  1
3590 #define DCB_TX_CONFIG  1
3591 #define DCB_TX_PB      1024
3592 /**
3593  * ixgbe_dcb_hw_configure - Enable DCB and configure
3594  * general DCB in VT mode and non-VT mode parameters
3595  * @dev: pointer to rte_eth_dev structure
3596  * @dcb_config: pointer to ixgbe_dcb_config structure
3597  */
3598 static int
3599 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3600                         struct ixgbe_dcb_config *dcb_config)
3601 {
3602         int     ret = 0;
3603         uint8_t i, pfc_en, nb_tcs;
3604         uint16_t pbsize, rx_buffer_size;
3605         uint8_t config_dcb_rx = 0;
3606         uint8_t config_dcb_tx = 0;
3607         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3608         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3609         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3610         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3611         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3612         struct ixgbe_dcb_tc_config *tc;
3613         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3614         struct ixgbe_hw *hw =
3615                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3616
3617         switch (dev->data->dev_conf.rxmode.mq_mode) {
3618         case ETH_MQ_RX_VMDQ_DCB:
3619                 dcb_config->vt_mode = true;
3620                 if (hw->mac.type != ixgbe_mac_82598EB) {
3621                         config_dcb_rx = DCB_RX_CONFIG;
3622                         /*
3623                          *get dcb and VT rx configuration parameters
3624                          *from rte_eth_conf
3625                          */
3626                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3627                         /*Configure general VMDQ and DCB RX parameters*/
3628                         ixgbe_vmdq_dcb_configure(dev);
3629                 }
3630                 break;
3631         case ETH_MQ_RX_DCB:
3632         case ETH_MQ_RX_DCB_RSS:
3633                 dcb_config->vt_mode = false;
3634                 config_dcb_rx = DCB_RX_CONFIG;
3635                 /* Get dcb TX configuration parameters from rte_eth_conf */
3636                 ixgbe_dcb_rx_config(dev, dcb_config);
3637                 /*Configure general DCB RX parameters*/
3638                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3639                 break;
3640         default:
3641                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3642                 break;
3643         }
3644         switch (dev->data->dev_conf.txmode.mq_mode) {
3645         case ETH_MQ_TX_VMDQ_DCB:
3646                 dcb_config->vt_mode = true;
3647                 config_dcb_tx = DCB_TX_CONFIG;
3648                 /* get DCB and VT TX configuration parameters
3649                  * from rte_eth_conf
3650                  */
3651                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3652                 /*Configure general VMDQ and DCB TX parameters*/
3653                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3654                 break;
3655
3656         case ETH_MQ_TX_DCB:
3657                 dcb_config->vt_mode = false;
3658                 config_dcb_tx = DCB_TX_CONFIG;
3659                 /*get DCB TX configuration parameters from rte_eth_conf*/
3660                 ixgbe_dcb_tx_config(dev, dcb_config);
3661                 /*Configure general DCB TX parameters*/
3662                 ixgbe_dcb_tx_hw_config(hw, dcb_config);
3663                 break;
3664         default:
3665                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3666                 break;
3667         }
3668
3669         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3670         /* Unpack map */
3671         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3672         if (nb_tcs == ETH_4_TCS) {
3673                 /* Avoid un-configured priority mapping to TC0 */
3674                 uint8_t j = 4;
3675                 uint8_t mask = 0xFF;
3676
3677                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3678                         mask = (uint8_t)(mask & (~(1 << map[i])));
3679                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3680                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3681                                 map[j++] = i;
3682                         mask >>= 1;
3683                 }
3684                 /* Re-configure 4 TCs BW */
3685                 for (i = 0; i < nb_tcs; i++) {
3686                         tc = &dcb_config->tc_config[i];
3687                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3688                                                 (uint8_t)(100 / nb_tcs);
3689                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3690                                                 (uint8_t)(100 / nb_tcs);
3691                 }
3692                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3693                         tc = &dcb_config->tc_config[i];
3694                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3695                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3696                 }
3697         }
3698
3699         switch (hw->mac.type) {
3700         case ixgbe_mac_X550:
3701         case ixgbe_mac_X550EM_x:
3702         case ixgbe_mac_X550EM_a:
3703                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3704                 break;
3705         default:
3706                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3707                 break;
3708         }
3709
3710         if (config_dcb_rx) {
3711                 /* Set RX buffer size */
3712                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3713                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3714
3715                 for (i = 0; i < nb_tcs; i++) {
3716                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3717                 }
3718                 /* zero alloc all unused TCs */
3719                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3720                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3721                 }
3722         }
3723         if (config_dcb_tx) {
3724                 /* Only support an equally distributed
3725                  *  Tx packet buffer strategy.
3726                  */
3727                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3728                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3729
3730                 for (i = 0; i < nb_tcs; i++) {
3731                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3732                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3733                 }
3734                 /* Clear unused TCs, if any, to zero buffer size*/
3735                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3736                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3737                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3738                 }
3739         }
3740
3741         /*Calculates traffic class credits*/
3742         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3743                                 IXGBE_DCB_TX_CONFIG);
3744         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3745                                 IXGBE_DCB_RX_CONFIG);
3746
3747         if (config_dcb_rx) {
3748                 /* Unpack CEE standard containers */
3749                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3750                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3751                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3752                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3753                 /* Configure PG(ETS) RX */
3754                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3755         }
3756
3757         if (config_dcb_tx) {
3758                 /* Unpack CEE standard containers */
3759                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3760                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3761                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3762                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3763                 /* Configure PG(ETS) TX */
3764                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3765         }
3766
3767         /*Configure queue statistics registers*/
3768         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3769
3770         /* Check if the PFC is supported */
3771         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3772                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3773                 for (i = 0; i < nb_tcs; i++) {
3774                         /*
3775                         * If the TC count is 8,and the default high_water is 48,
3776                         * the low_water is 16 as default.
3777                         */
3778                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3779                         hw->fc.low_water[i] = pbsize / 4;
3780                         /* Enable pfc for this TC */
3781                         tc = &dcb_config->tc_config[i];
3782                         tc->pfc = ixgbe_dcb_pfc_enabled;
3783                 }
3784                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3785                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3786                         pfc_en &= 0x0F;
3787                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3788         }
3789
3790         return ret;
3791 }
3792
3793 /**
3794  * ixgbe_configure_dcb - Configure DCB  Hardware
3795  * @dev: pointer to rte_eth_dev
3796  */
3797 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3798 {
3799         struct ixgbe_dcb_config *dcb_cfg =
3800                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3801         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3802
3803         PMD_INIT_FUNC_TRACE();
3804
3805         /* check support mq_mode for DCB */
3806         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3807             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3808             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3809                 return;
3810
3811         if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES)
3812                 return;
3813
3814         /** Configure DCB hardware **/
3815         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3816 }
3817
3818 /*
3819  * VMDq only support for 10 GbE NIC.
3820  */
3821 static void
3822 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3823 {
3824         struct rte_eth_vmdq_rx_conf *cfg;
3825         struct ixgbe_hw *hw;
3826         enum rte_eth_nb_pools num_pools;
3827         uint32_t mrqc, vt_ctl, vlanctrl;
3828         uint32_t vmolr = 0;
3829         int i;
3830
3831         PMD_INIT_FUNC_TRACE();
3832         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3833         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3834         num_pools = cfg->nb_queue_pools;
3835
3836         ixgbe_rss_disable(dev);
3837
3838         /* MRQC: enable vmdq */
3839         mrqc = IXGBE_MRQC_VMDQEN;
3840         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3841
3842         /* PFVTCTL: turn on virtualisation and set the default pool */
3843         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3844         if (cfg->enable_default_pool)
3845                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3846         else
3847                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3848
3849         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3850
3851         for (i = 0; i < (int)num_pools; i++) {
3852                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3853                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3854         }
3855
3856         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3857         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3858         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3859         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3860
3861         /* VFTA - enable all vlan filters */
3862         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3863                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3864
3865         /* VFRE: pool enabling for receive - 64 */
3866         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3867         if (num_pools == ETH_64_POOLS)
3868                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3869
3870         /*
3871          * MPSAR - allow pools to read specific mac addresses
3872          * In this case, all pools should be able to read from mac addr 0
3873          */
3874         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3875         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3876
3877         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3878         for (i = 0; i < cfg->nb_pool_maps; i++) {
3879                 /* set vlan id in VF register and set the valid bit */
3880                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3881                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3882                 /*
3883                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3884                  * pools, we only need to use the first half of the register
3885                  * i.e. bits 0-31
3886                  */
3887                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3888                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
3889                                         (cfg->pool_map[i].pools & UINT32_MAX));
3890                 else
3891                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
3892                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3893
3894         }
3895
3896         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3897         if (cfg->enable_loop_back) {
3898                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3899                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3900                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3901         }
3902
3903         IXGBE_WRITE_FLUSH(hw);
3904 }
3905
3906 /*
3907  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3908  * @hw: pointer to hardware structure
3909  */
3910 static void
3911 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3912 {
3913         uint32_t reg;
3914         uint32_t q;
3915
3916         PMD_INIT_FUNC_TRACE();
3917         /*PF VF Transmit Enable*/
3918         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3919         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3920
3921         /* Disable the Tx desc arbiter so that MTQC can be changed */
3922         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3923         reg |= IXGBE_RTTDCS_ARBDIS;
3924         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3925
3926         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3927         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3928
3929         /* Disable drop for all queues */
3930         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3931                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3932                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3933
3934         /* Enable the Tx desc arbiter */
3935         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3936         reg &= ~IXGBE_RTTDCS_ARBDIS;
3937         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3938
3939         IXGBE_WRITE_FLUSH(hw);
3940 }
3941
3942 static int __attribute__((cold))
3943 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3944 {
3945         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
3946         uint64_t dma_addr;
3947         unsigned int i;
3948
3949         /* Initialize software ring entries */
3950         for (i = 0; i < rxq->nb_rx_desc; i++) {
3951                 volatile union ixgbe_adv_rx_desc *rxd;
3952                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
3953
3954                 if (mbuf == NULL) {
3955                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3956                                      (unsigned) rxq->queue_id);
3957                         return -ENOMEM;
3958                 }
3959
3960                 rte_mbuf_refcnt_set(mbuf, 1);
3961                 mbuf->next = NULL;
3962                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3963                 mbuf->nb_segs = 1;
3964                 mbuf->port = rxq->port_id;
3965
3966                 dma_addr =
3967                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
3968                 rxd = &rxq->rx_ring[i];
3969                 rxd->read.hdr_addr = 0;
3970                 rxd->read.pkt_addr = dma_addr;
3971                 rxe[i].mbuf = mbuf;
3972         }
3973
3974         return 0;
3975 }
3976
3977 static int
3978 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
3979 {
3980         struct ixgbe_hw *hw;
3981         uint32_t mrqc;
3982
3983         ixgbe_rss_configure(dev);
3984
3985         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3986
3987         /* MRQC: enable VF RSS */
3988         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
3989         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
3990         switch (RTE_ETH_DEV_SRIOV(dev).active) {
3991         case ETH_64_POOLS:
3992                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
3993                 break;
3994
3995         case ETH_32_POOLS:
3996                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
3997                 break;
3998
3999         default:
4000                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4001                 return -EINVAL;
4002         }
4003
4004         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4005
4006         return 0;
4007 }
4008
4009 static int
4010 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4011 {
4012         struct ixgbe_hw *hw =
4013                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4014
4015         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4016         case ETH_64_POOLS:
4017                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4018                         IXGBE_MRQC_VMDQEN);
4019                 break;
4020
4021         case ETH_32_POOLS:
4022                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4023                         IXGBE_MRQC_VMDQRT4TCEN);
4024                 break;
4025
4026         case ETH_16_POOLS:
4027                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4028                         IXGBE_MRQC_VMDQRT8TCEN);
4029                 break;
4030         default:
4031                 PMD_INIT_LOG(ERR,
4032                         "invalid pool number in IOV mode");
4033                 break;
4034         }
4035         return 0;
4036 }
4037
4038 static int
4039 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4040 {
4041         struct ixgbe_hw *hw =
4042                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4043
4044         if (hw->mac.type == ixgbe_mac_82598EB)
4045                 return 0;
4046
4047         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4048                 /*
4049                  * SRIOV inactive scheme
4050                  * any DCB/RSS w/o VMDq multi-queue setting
4051                  */
4052                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4053                 case ETH_MQ_RX_RSS:
4054                 case ETH_MQ_RX_DCB_RSS:
4055                 case ETH_MQ_RX_VMDQ_RSS:
4056                         ixgbe_rss_configure(dev);
4057                         break;
4058
4059                 case ETH_MQ_RX_VMDQ_DCB:
4060                         ixgbe_vmdq_dcb_configure(dev);
4061                         break;
4062
4063                 case ETH_MQ_RX_VMDQ_ONLY:
4064                         ixgbe_vmdq_rx_hw_configure(dev);
4065                         break;
4066
4067                 case ETH_MQ_RX_NONE:
4068                 default:
4069                         /* if mq_mode is none, disable rss mode.*/
4070                         ixgbe_rss_disable(dev);
4071                         break;
4072                 }
4073         } else {
4074                 /*
4075                  * SRIOV active scheme
4076                  * Support RSS together with VMDq & SRIOV
4077                  */
4078                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4079                 case ETH_MQ_RX_RSS:
4080                 case ETH_MQ_RX_VMDQ_RSS:
4081                         ixgbe_config_vf_rss(dev);
4082                         break;
4083
4084                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
4085                 case ETH_MQ_RX_VMDQ_DCB:
4086                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4087                         PMD_INIT_LOG(ERR,
4088                                 "Could not support DCB with VMDq & SRIOV");
4089                         return -1;
4090                 default:
4091                         ixgbe_config_vf_default(dev);
4092                         break;
4093                 }
4094         }
4095
4096         return 0;
4097 }
4098
4099 static int
4100 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4101 {
4102         struct ixgbe_hw *hw =
4103                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4104         uint32_t mtqc;
4105         uint32_t rttdcs;
4106
4107         if (hw->mac.type == ixgbe_mac_82598EB)
4108                 return 0;
4109
4110         /* disable arbiter before setting MTQC */
4111         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4112         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4113         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4114
4115         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4116                 /*
4117                  * SRIOV inactive scheme
4118                  * any DCB w/o VMDq multi-queue setting
4119                  */
4120                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4121                         ixgbe_vmdq_tx_hw_configure(hw);
4122                 else {
4123                         mtqc = IXGBE_MTQC_64Q_1PB;
4124                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4125                 }
4126         } else {
4127                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4128
4129                 /*
4130                  * SRIOV active scheme
4131                  * FIXME if support DCB together with VMDq & SRIOV
4132                  */
4133                 case ETH_64_POOLS:
4134                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4135                         break;
4136                 case ETH_32_POOLS:
4137                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4138                         break;
4139                 case ETH_16_POOLS:
4140                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4141                                 IXGBE_MTQC_8TC_8TQ;
4142                         break;
4143                 default:
4144                         mtqc = IXGBE_MTQC_64Q_1PB;
4145                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4146                 }
4147                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4148         }
4149
4150         /* re-enable arbiter */
4151         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4152         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4153
4154         return 0;
4155 }
4156
4157 /**
4158  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4159  *
4160  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4161  * spec rev. 3.0 chapter 8.2.3.8.13.
4162  *
4163  * @pool Memory pool of the Rx queue
4164  */
4165 static inline uint32_t
4166 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4167 {
4168         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4169
4170         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4171         uint16_t maxdesc =
4172                 IPV4_MAX_PKT_LEN /
4173                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4174
4175         if (maxdesc >= 16)
4176                 return IXGBE_RSCCTL_MAXDESC_16;
4177         else if (maxdesc >= 8)
4178                 return IXGBE_RSCCTL_MAXDESC_8;
4179         else if (maxdesc >= 4)
4180                 return IXGBE_RSCCTL_MAXDESC_4;
4181         else
4182                 return IXGBE_RSCCTL_MAXDESC_1;
4183 }
4184
4185 /**
4186  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4187  * interrupt
4188  *
4189  * (Taken from FreeBSD tree)
4190  * (yes this is all very magic and confusing :)
4191  *
4192  * @dev port handle
4193  * @entry the register array entry
4194  * @vector the MSIX vector for this queue
4195  * @type RX/TX/MISC
4196  */
4197 static void
4198 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4199 {
4200         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4201         u32 ivar, index;
4202
4203         vector |= IXGBE_IVAR_ALLOC_VAL;
4204
4205         switch (hw->mac.type) {
4206
4207         case ixgbe_mac_82598EB:
4208                 if (type == -1)
4209                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4210                 else
4211                         entry += (type * 64);
4212                 index = (entry >> 2) & 0x1F;
4213                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4214                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4215                 ivar |= (vector << (8 * (entry & 0x3)));
4216                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4217                 break;
4218
4219         case ixgbe_mac_82599EB:
4220         case ixgbe_mac_X540:
4221                 if (type == -1) { /* MISC IVAR */
4222                         index = (entry & 1) * 8;
4223                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4224                         ivar &= ~(0xFF << index);
4225                         ivar |= (vector << index);
4226                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4227                 } else {        /* RX/TX IVARS */
4228                         index = (16 * (entry & 1)) + (8 * type);
4229                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4230                         ivar &= ~(0xFF << index);
4231                         ivar |= (vector << index);
4232                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4233                 }
4234
4235                 break;
4236
4237         default:
4238                 break;
4239         }
4240 }
4241
4242 void __attribute__((cold))
4243 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4244 {
4245         uint16_t i, rx_using_sse;
4246         struct ixgbe_adapter *adapter =
4247                 (struct ixgbe_adapter *)dev->data->dev_private;
4248
4249         /*
4250          * In order to allow Vector Rx there are a few configuration
4251          * conditions to be met and Rx Bulk Allocation should be allowed.
4252          */
4253         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4254             !adapter->rx_bulk_alloc_allowed) {
4255                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4256                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4257                                     "not enabled",
4258                              dev->data->port_id);
4259
4260                 adapter->rx_vec_allowed = false;
4261         }
4262
4263         /*
4264          * Initialize the appropriate LRO callback.
4265          *
4266          * If all queues satisfy the bulk allocation preconditions
4267          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4268          * Otherwise use a single allocation version.
4269          */
4270         if (dev->data->lro) {
4271                 if (adapter->rx_bulk_alloc_allowed) {
4272                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4273                                            "allocation version");
4274                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4275                 } else {
4276                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4277                                            "allocation version");
4278                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4279                 }
4280         } else if (dev->data->scattered_rx) {
4281                 /*
4282                  * Set the non-LRO scattered callback: there are Vector and
4283                  * single allocation versions.
4284                  */
4285                 if (adapter->rx_vec_allowed) {
4286                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4287                                             "callback (port=%d).",
4288                                      dev->data->port_id);
4289
4290                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4291                 } else if (adapter->rx_bulk_alloc_allowed) {
4292                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4293                                            "allocation callback (port=%d).",
4294                                      dev->data->port_id);
4295                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4296                 } else {
4297                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4298                                             "single allocation) "
4299                                             "Scattered Rx callback "
4300                                             "(port=%d).",
4301                                      dev->data->port_id);
4302
4303                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4304                 }
4305         /*
4306          * Below we set "simple" callbacks according to port/queues parameters.
4307          * If parameters allow we are going to choose between the following
4308          * callbacks:
4309          *    - Vector
4310          *    - Bulk Allocation
4311          *    - Single buffer allocation (the simplest one)
4312          */
4313         } else if (adapter->rx_vec_allowed) {
4314                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4315                                     "burst size no less than %d (port=%d).",
4316                              RTE_IXGBE_DESCS_PER_LOOP,
4317                              dev->data->port_id);
4318
4319                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4320         } else if (adapter->rx_bulk_alloc_allowed) {
4321                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4322                                     "satisfied. Rx Burst Bulk Alloc function "
4323                                     "will be used on port=%d.",
4324                              dev->data->port_id);
4325
4326                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4327         } else {
4328                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4329                                     "satisfied, or Scattered Rx is requested "
4330                                     "(port=%d).",
4331                              dev->data->port_id);
4332
4333                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4334         }
4335
4336         /* Propagate information about RX function choice through all queues. */
4337
4338         rx_using_sse =
4339                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4340                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4341
4342         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4343                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4344
4345                 rxq->rx_using_sse = rx_using_sse;
4346         }
4347 }
4348
4349 /**
4350  * ixgbe_set_rsc - configure RSC related port HW registers
4351  *
4352  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4353  * of 82599 Spec (x540 configuration is virtually the same).
4354  *
4355  * @dev port handle
4356  *
4357  * Returns 0 in case of success or a non-zero error code
4358  */
4359 static int
4360 ixgbe_set_rsc(struct rte_eth_dev *dev)
4361 {
4362         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4363         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4364         struct rte_eth_dev_info dev_info = { 0 };
4365         bool rsc_capable = false;
4366         uint16_t i;
4367         uint32_t rdrxctl;
4368
4369         /* Sanity check */
4370         dev->dev_ops->dev_infos_get(dev, &dev_info);
4371         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4372                 rsc_capable = true;
4373
4374         if (!rsc_capable && rx_conf->enable_lro) {
4375                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4376                                    "support it");
4377                 return -EINVAL;
4378         }
4379
4380         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4381
4382         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4383                 /*
4384                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4385                  * 3.0 RSC configuration requires HW CRC stripping being
4386                  * enabled. If user requested both HW CRC stripping off
4387                  * and RSC on - return an error.
4388                  */
4389                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4390                                     "is disabled");
4391                 return -EINVAL;
4392         }
4393
4394         /* RFCTL configuration  */
4395         if (rsc_capable) {
4396                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4397
4398                 if (rx_conf->enable_lro)
4399                         /*
4400                          * Since NFS packets coalescing is not supported - clear
4401                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4402                          * enabled.
4403                          */
4404                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4405                                    IXGBE_RFCTL_NFSR_DIS);
4406                 else
4407                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4408
4409                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4410         }
4411
4412         /* If LRO hasn't been requested - we are done here. */
4413         if (!rx_conf->enable_lro)
4414                 return 0;
4415
4416         /* Set RDRXCTL.RSCACKC bit */
4417         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4418         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4419         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4420
4421         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4422         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4423                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4424                 uint32_t srrctl =
4425                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4426                 uint32_t rscctl =
4427                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4428                 uint32_t psrtype =
4429                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4430                 uint32_t eitr =
4431                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4432
4433                 /*
4434                  * ixgbe PMD doesn't support header-split at the moment.
4435                  *
4436                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4437                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4438                  * should be configured even if header split is not
4439                  * enabled. We will configure it 128 bytes following the
4440                  * recommendation in the spec.
4441                  */
4442                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4443                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4444                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4445
4446                 /*
4447                  * TODO: Consider setting the Receive Descriptor Minimum
4448                  * Threshold Size for an RSC case. This is not an obviously
4449                  * beneficiary option but the one worth considering...
4450                  */
4451
4452                 rscctl |= IXGBE_RSCCTL_RSCEN;
4453                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4454                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4455
4456                 /*
4457                  * RSC: Set ITR interval corresponding to 2K ints/s.
4458                  *
4459                  * Full-sized RSC aggregations for a 10Gb/s link will
4460                  * arrive at about 20K aggregation/s rate.
4461                  *
4462                  * 2K inst/s rate will make only 10% of the
4463                  * aggregations to be closed due to the interrupt timer
4464                  * expiration for a streaming at wire-speed case.
4465                  *
4466                  * For a sparse streaming case this setting will yield
4467                  * at most 500us latency for a single RSC aggregation.
4468                  */
4469                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4470                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4471
4472                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4473                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4474                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4475                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4476
4477                 /*
4478                  * RSC requires the mapping of the queue to the
4479                  * interrupt vector.
4480                  */
4481                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4482         }
4483
4484         dev->data->lro = 1;
4485
4486         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4487
4488         return 0;
4489 }
4490
4491 /*
4492  * Initializes Receive Unit.
4493  */
4494 int __attribute__((cold))
4495 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4496 {
4497         struct ixgbe_hw     *hw;
4498         struct ixgbe_rx_queue *rxq;
4499         uint64_t bus_addr;
4500         uint32_t rxctrl;
4501         uint32_t fctrl;
4502         uint32_t hlreg0;
4503         uint32_t maxfrs;
4504         uint32_t srrctl;
4505         uint32_t rdrxctl;
4506         uint32_t rxcsum;
4507         uint16_t buf_size;
4508         uint16_t i;
4509         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4510         int rc;
4511
4512         PMD_INIT_FUNC_TRACE();
4513         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4514
4515         /*
4516          * Make sure receives are disabled while setting
4517          * up the RX context (registers, descriptor rings, etc.).
4518          */
4519         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4520         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4521
4522         /* Enable receipt of broadcasted frames */
4523         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4524         fctrl |= IXGBE_FCTRL_BAM;
4525         fctrl |= IXGBE_FCTRL_DPF;
4526         fctrl |= IXGBE_FCTRL_PMCF;
4527         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4528
4529         /*
4530          * Configure CRC stripping, if any.
4531          */
4532         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4533         if (rx_conf->hw_strip_crc)
4534                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4535         else
4536                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4537
4538         /*
4539          * Configure jumbo frame support, if any.
4540          */
4541         if (rx_conf->jumbo_frame == 1) {
4542                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4543                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4544                 maxfrs &= 0x0000FFFF;
4545                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4546                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4547         } else
4548                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4549
4550         /*
4551          * If loopback mode is configured for 82599, set LPBK bit.
4552          */
4553         if (hw->mac.type == ixgbe_mac_82599EB &&
4554                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4555                 hlreg0 |= IXGBE_HLREG0_LPBK;
4556         else
4557                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4558
4559         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4560
4561         /* Setup RX queues */
4562         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4563                 rxq = dev->data->rx_queues[i];
4564
4565                 /*
4566                  * Reset crc_len in case it was changed after queue setup by a
4567                  * call to configure.
4568                  */
4569                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4570
4571                 /* Setup the Base and Length of the Rx Descriptor Rings */
4572                 bus_addr = rxq->rx_ring_phys_addr;
4573                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4574                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4575                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4576                                 (uint32_t)(bus_addr >> 32));
4577                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4578                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4579                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4580                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4581
4582                 /* Configure the SRRCTL register */
4583 #ifdef RTE_HEADER_SPLIT_ENABLE
4584                 /*
4585                  * Configure Header Split
4586                  */
4587                 if (rx_conf->header_split) {
4588                         if (hw->mac.type == ixgbe_mac_82599EB) {
4589                                 /* Must setup the PSRTYPE register */
4590                                 uint32_t psrtype;
4591
4592                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4593                                         IXGBE_PSRTYPE_UDPHDR   |
4594                                         IXGBE_PSRTYPE_IPV4HDR  |
4595                                         IXGBE_PSRTYPE_IPV6HDR;
4596                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4597                         }
4598                         srrctl = ((rx_conf->split_hdr_size <<
4599                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4600                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4601                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4602                 } else
4603 #endif
4604                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4605
4606                 /* Set if packets are dropped when no descriptors available */
4607                 if (rxq->drop_en)
4608                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4609
4610                 /*
4611                  * Configure the RX buffer size in the BSIZEPACKET field of
4612                  * the SRRCTL register of the queue.
4613                  * The value is in 1 KB resolution. Valid values can be from
4614                  * 1 KB to 16 KB.
4615                  */
4616                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4617                         RTE_PKTMBUF_HEADROOM);
4618                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4619                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4620
4621                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4622
4623                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4624                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4625
4626                 /* It adds dual VLAN length for supporting dual VLAN */
4627                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4628                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4629                         dev->data->scattered_rx = 1;
4630         }
4631
4632         if (rx_conf->enable_scatter)
4633                 dev->data->scattered_rx = 1;
4634
4635         /*
4636          * Device configured with multiple RX queues.
4637          */
4638         ixgbe_dev_mq_rx_configure(dev);
4639
4640         /*
4641          * Setup the Checksum Register.
4642          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4643          * Enable IP/L4 checkum computation by hardware if requested to do so.
4644          */
4645         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4646         rxcsum |= IXGBE_RXCSUM_PCSD;
4647         if (rx_conf->hw_ip_checksum)
4648                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4649         else
4650                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4651
4652         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4653
4654         if (hw->mac.type == ixgbe_mac_82599EB ||
4655             hw->mac.type == ixgbe_mac_X540) {
4656                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4657                 if (rx_conf->hw_strip_crc)
4658                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4659                 else
4660                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4661                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4662                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4663         }
4664
4665         rc = ixgbe_set_rsc(dev);
4666         if (rc)
4667                 return rc;
4668
4669         ixgbe_set_rx_function(dev);
4670
4671         return 0;
4672 }
4673
4674 /*
4675  * Initializes Transmit Unit.
4676  */
4677 void __attribute__((cold))
4678 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4679 {
4680         struct ixgbe_hw     *hw;
4681         struct ixgbe_tx_queue *txq;
4682         uint64_t bus_addr;
4683         uint32_t hlreg0;
4684         uint32_t txctrl;
4685         uint16_t i;
4686
4687         PMD_INIT_FUNC_TRACE();
4688         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4689
4690         /* Enable TX CRC (checksum offload requirement) and hw padding
4691          * (TSO requirement)
4692          */
4693         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4694         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4695         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4696
4697         /* Setup the Base and Length of the Tx Descriptor Rings */
4698         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4699                 txq = dev->data->tx_queues[i];
4700
4701                 bus_addr = txq->tx_ring_phys_addr;
4702                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4703                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4704                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4705                                 (uint32_t)(bus_addr >> 32));
4706                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4707                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4708                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4709                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4710                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4711
4712                 /*
4713                  * Disable Tx Head Writeback RO bit, since this hoses
4714                  * bookkeeping if things aren't delivered in order.
4715                  */
4716                 switch (hw->mac.type) {
4717                 case ixgbe_mac_82598EB:
4718                         txctrl = IXGBE_READ_REG(hw,
4719                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4720                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4721                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4722                                         txctrl);
4723                         break;
4724
4725                 case ixgbe_mac_82599EB:
4726                 case ixgbe_mac_X540:
4727                 case ixgbe_mac_X550:
4728                 case ixgbe_mac_X550EM_x:
4729                 case ixgbe_mac_X550EM_a:
4730                 default:
4731                         txctrl = IXGBE_READ_REG(hw,
4732                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4733                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4734                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4735                                         txctrl);
4736                         break;
4737                 }
4738         }
4739
4740         /* Device configured with multiple TX queues. */
4741         ixgbe_dev_mq_tx_configure(dev);
4742 }
4743
4744 /*
4745  * Set up link for 82599 loopback mode Tx->Rx.
4746  */
4747 static inline void __attribute__((cold))
4748 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4749 {
4750         PMD_INIT_FUNC_TRACE();
4751
4752         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4753                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4754                                 IXGBE_SUCCESS) {
4755                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4756                         /* ignore error */
4757                         return;
4758                 }
4759         }
4760
4761         /* Restart link */
4762         IXGBE_WRITE_REG(hw,
4763                         IXGBE_AUTOC,
4764                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4765         ixgbe_reset_pipeline_82599(hw);
4766
4767         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4768         msec_delay(50);
4769 }
4770
4771
4772 /*
4773  * Start Transmit and Receive Units.
4774  */
4775 int __attribute__((cold))
4776 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4777 {
4778         struct ixgbe_hw     *hw;
4779         struct ixgbe_tx_queue *txq;
4780         struct ixgbe_rx_queue *rxq;
4781         uint32_t txdctl;
4782         uint32_t dmatxctl;
4783         uint32_t rxctrl;
4784         uint16_t i;
4785         int ret = 0;
4786
4787         PMD_INIT_FUNC_TRACE();
4788         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4789
4790         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4791                 txq = dev->data->tx_queues[i];
4792                 /* Setup Transmit Threshold Registers */
4793                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4794                 txdctl |= txq->pthresh & 0x7F;
4795                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4796                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4797                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4798         }
4799
4800         if (hw->mac.type != ixgbe_mac_82598EB) {
4801                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4802                 dmatxctl |= IXGBE_DMATXCTL_TE;
4803                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4804         }
4805
4806         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4807                 txq = dev->data->tx_queues[i];
4808                 if (!txq->tx_deferred_start) {
4809                         ret = ixgbe_dev_tx_queue_start(dev, i);
4810                         if (ret < 0)
4811                                 return ret;
4812                 }
4813         }
4814
4815         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4816                 rxq = dev->data->rx_queues[i];
4817                 if (!rxq->rx_deferred_start) {
4818                         ret = ixgbe_dev_rx_queue_start(dev, i);
4819                         if (ret < 0)
4820                                 return ret;
4821                 }
4822         }
4823
4824         /* Enable Receive engine */
4825         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4826         if (hw->mac.type == ixgbe_mac_82598EB)
4827                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4828         rxctrl |= IXGBE_RXCTRL_RXEN;
4829         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4830
4831         /* If loopback mode is enabled for 82599, set up the link accordingly */
4832         if (hw->mac.type == ixgbe_mac_82599EB &&
4833                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4834                 ixgbe_setup_loopback_link_82599(hw);
4835
4836         return 0;
4837 }
4838
4839 /*
4840  * Start Receive Units for specified queue.
4841  */
4842 int __attribute__((cold))
4843 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4844 {
4845         struct ixgbe_hw     *hw;
4846         struct ixgbe_rx_queue *rxq;
4847         uint32_t rxdctl;
4848         int poll_ms;
4849
4850         PMD_INIT_FUNC_TRACE();
4851         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4852
4853         if (rx_queue_id < dev->data->nb_rx_queues) {
4854                 rxq = dev->data->rx_queues[rx_queue_id];
4855
4856                 /* Allocate buffers for descriptor rings */
4857                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4858                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4859                                      rx_queue_id);
4860                         return -1;
4861                 }
4862                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4863                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4864                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4865
4866                 /* Wait until RX Enable ready */
4867                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4868                 do {
4869                         rte_delay_ms(1);
4870                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4871                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4872                 if (!poll_ms)
4873                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4874                                      rx_queue_id);
4875                 rte_wmb();
4876                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4877                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4878                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4879         } else
4880                 return -1;
4881
4882         return 0;
4883 }
4884
4885 /*
4886  * Stop Receive Units for specified queue.
4887  */
4888 int __attribute__((cold))
4889 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4890 {
4891         struct ixgbe_hw     *hw;
4892         struct ixgbe_adapter *adapter =
4893                 (struct ixgbe_adapter *)dev->data->dev_private;
4894         struct ixgbe_rx_queue *rxq;
4895         uint32_t rxdctl;
4896         int poll_ms;
4897
4898         PMD_INIT_FUNC_TRACE();
4899         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4900
4901         if (rx_queue_id < dev->data->nb_rx_queues) {
4902                 rxq = dev->data->rx_queues[rx_queue_id];
4903
4904                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4905                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4906                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4907
4908                 /* Wait until RX Enable bit clear */
4909                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4910                 do {
4911                         rte_delay_ms(1);
4912                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4913                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
4914                 if (!poll_ms)
4915                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4916                                      rx_queue_id);
4917
4918                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4919
4920                 ixgbe_rx_queue_release_mbufs(rxq);
4921                 ixgbe_reset_rx_queue(adapter, rxq);
4922                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4923         } else
4924                 return -1;
4925
4926         return 0;
4927 }
4928
4929
4930 /*
4931  * Start Transmit Units for specified queue.
4932  */
4933 int __attribute__((cold))
4934 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4935 {
4936         struct ixgbe_hw     *hw;
4937         struct ixgbe_tx_queue *txq;
4938         uint32_t txdctl;
4939         int poll_ms;
4940
4941         PMD_INIT_FUNC_TRACE();
4942         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4943
4944         if (tx_queue_id < dev->data->nb_tx_queues) {
4945                 txq = dev->data->tx_queues[tx_queue_id];
4946                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4947                 txdctl |= IXGBE_TXDCTL_ENABLE;
4948                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4949
4950                 /* Wait until TX Enable ready */
4951                 if (hw->mac.type == ixgbe_mac_82599EB) {
4952                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4953                         do {
4954                                 rte_delay_ms(1);
4955                                 txdctl = IXGBE_READ_REG(hw,
4956                                         IXGBE_TXDCTL(txq->reg_idx));
4957                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4958                         if (!poll_ms)
4959                                 PMD_INIT_LOG(ERR, "Could not enable "
4960                                              "Tx Queue %d", tx_queue_id);
4961                 }
4962                 rte_wmb();
4963                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4964                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4965                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4966         } else
4967                 return -1;
4968
4969         return 0;
4970 }
4971
4972 /*
4973  * Stop Transmit Units for specified queue.
4974  */
4975 int __attribute__((cold))
4976 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4977 {
4978         struct ixgbe_hw     *hw;
4979         struct ixgbe_tx_queue *txq;
4980         uint32_t txdctl;
4981         uint32_t txtdh, txtdt;
4982         int poll_ms;
4983
4984         PMD_INIT_FUNC_TRACE();
4985         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4986
4987         if (tx_queue_id >= dev->data->nb_tx_queues)
4988                 return -1;
4989
4990         txq = dev->data->tx_queues[tx_queue_id];
4991
4992         /* Wait until TX queue is empty */
4993         if (hw->mac.type == ixgbe_mac_82599EB) {
4994                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4995                 do {
4996                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
4997                         txtdh = IXGBE_READ_REG(hw,
4998                                                IXGBE_TDH(txq->reg_idx));
4999                         txtdt = IXGBE_READ_REG(hw,
5000                                                IXGBE_TDT(txq->reg_idx));
5001                 } while (--poll_ms && (txtdh != txtdt));
5002                 if (!poll_ms)
5003                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5004                                      "when stopping.", tx_queue_id);
5005         }
5006
5007         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5008         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5009         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5010
5011         /* Wait until TX Enable bit clear */
5012         if (hw->mac.type == ixgbe_mac_82599EB) {
5013                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5014                 do {
5015                         rte_delay_ms(1);
5016                         txdctl = IXGBE_READ_REG(hw,
5017                                                 IXGBE_TXDCTL(txq->reg_idx));
5018                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5019                 if (!poll_ms)
5020                         PMD_INIT_LOG(ERR, "Could not disable "
5021                                      "Tx Queue %d", tx_queue_id);
5022         }
5023
5024         if (txq->ops != NULL) {
5025                 txq->ops->release_mbufs(txq);
5026                 txq->ops->reset(txq);
5027         }
5028         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5029
5030         return 0;
5031 }
5032
5033 void
5034 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5035         struct rte_eth_rxq_info *qinfo)
5036 {
5037         struct ixgbe_rx_queue *rxq;
5038
5039         rxq = dev->data->rx_queues[queue_id];
5040
5041         qinfo->mp = rxq->mb_pool;
5042         qinfo->scattered_rx = dev->data->scattered_rx;
5043         qinfo->nb_desc = rxq->nb_rx_desc;
5044
5045         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5046         qinfo->conf.rx_drop_en = rxq->drop_en;
5047         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5048 }
5049
5050 void
5051 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5052         struct rte_eth_txq_info *qinfo)
5053 {
5054         struct ixgbe_tx_queue *txq;
5055
5056         txq = dev->data->tx_queues[queue_id];
5057
5058         qinfo->nb_desc = txq->nb_tx_desc;
5059
5060         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5061         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5062         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5063
5064         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5065         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5066         qinfo->conf.txq_flags = txq->txq_flags;
5067         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5068 }
5069
5070 /*
5071  * [VF] Initializes Receive Unit.
5072  */
5073 int __attribute__((cold))
5074 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5075 {
5076         struct ixgbe_hw     *hw;
5077         struct ixgbe_rx_queue *rxq;
5078         uint64_t bus_addr;
5079         uint32_t srrctl, psrtype = 0;
5080         uint16_t buf_size;
5081         uint16_t i;
5082         int ret;
5083
5084         PMD_INIT_FUNC_TRACE();
5085         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5086
5087         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5088                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5089                         "it should be power of 2");
5090                 return -1;
5091         }
5092
5093         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5094                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5095                         "it should be equal to or less than %d",
5096                         hw->mac.max_rx_queues);
5097                 return -1;
5098         }
5099
5100         /*
5101          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5102          * disables the VF receipt of packets if the PF MTU is > 1500.
5103          * This is done to deal with 82599 limitations that imposes
5104          * the PF and all VFs to share the same MTU.
5105          * Then, the PF driver enables again the VF receipt of packet when
5106          * the VF driver issues a IXGBE_VF_SET_LPE request.
5107          * In the meantime, the VF device cannot be used, even if the VF driver
5108          * and the Guest VM network stack are ready to accept packets with a
5109          * size up to the PF MTU.
5110          * As a work-around to this PF behaviour, force the call to
5111          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5112          * VF packets received can work in all cases.
5113          */
5114         ixgbevf_rlpml_set_vf(hw,
5115                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5116
5117         /* Setup RX queues */
5118         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5119                 rxq = dev->data->rx_queues[i];
5120
5121                 /* Allocate buffers for descriptor rings */
5122                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5123                 if (ret)
5124                         return ret;
5125
5126                 /* Setup the Base and Length of the Rx Descriptor Rings */
5127                 bus_addr = rxq->rx_ring_phys_addr;
5128
5129                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5130                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5131                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5132                                 (uint32_t)(bus_addr >> 32));
5133                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5134                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5135                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5136                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5137
5138
5139                 /* Configure the SRRCTL register */
5140 #ifdef RTE_HEADER_SPLIT_ENABLE
5141                 /*
5142                  * Configure Header Split
5143                  */
5144                 if (dev->data->dev_conf.rxmode.header_split) {
5145                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5146                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5147                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5148                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5149                 } else
5150 #endif
5151                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5152
5153                 /* Set if packets are dropped when no descriptors available */
5154                 if (rxq->drop_en)
5155                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5156
5157                 /*
5158                  * Configure the RX buffer size in the BSIZEPACKET field of
5159                  * the SRRCTL register of the queue.
5160                  * The value is in 1 KB resolution. Valid values can be from
5161                  * 1 KB to 16 KB.
5162                  */
5163                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5164                         RTE_PKTMBUF_HEADROOM);
5165                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5166                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5167
5168                 /*
5169                  * VF modification to write virtual function SRRCTL register
5170                  */
5171                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5172
5173                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5174                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5175
5176                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5177                     /* It adds dual VLAN length for supporting dual VLAN */
5178                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5179                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5180                         if (!dev->data->scattered_rx)
5181                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5182                         dev->data->scattered_rx = 1;
5183                 }
5184         }
5185
5186 #ifdef RTE_HEADER_SPLIT_ENABLE
5187         if (dev->data->dev_conf.rxmode.header_split)
5188                 /* Must setup the PSRTYPE register */
5189                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5190                         IXGBE_PSRTYPE_UDPHDR   |
5191                         IXGBE_PSRTYPE_IPV4HDR  |
5192                         IXGBE_PSRTYPE_IPV6HDR;
5193 #endif
5194
5195         /* Set RQPL for VF RSS according to max Rx queue */
5196         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5197                 IXGBE_PSRTYPE_RQPL_SHIFT;
5198         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5199
5200         ixgbe_set_rx_function(dev);
5201
5202         return 0;
5203 }
5204
5205 /*
5206  * [VF] Initializes Transmit Unit.
5207  */
5208 void __attribute__((cold))
5209 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5210 {
5211         struct ixgbe_hw     *hw;
5212         struct ixgbe_tx_queue *txq;
5213         uint64_t bus_addr;
5214         uint32_t txctrl;
5215         uint16_t i;
5216
5217         PMD_INIT_FUNC_TRACE();
5218         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5219
5220         /* Setup the Base and Length of the Tx Descriptor Rings */
5221         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5222                 txq = dev->data->tx_queues[i];
5223                 bus_addr = txq->tx_ring_phys_addr;
5224                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5225                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5226                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5227                                 (uint32_t)(bus_addr >> 32));
5228                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5229                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5230                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5231                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5232                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5233
5234                 /*
5235                  * Disable Tx Head Writeback RO bit, since this hoses
5236                  * bookkeeping if things aren't delivered in order.
5237                  */
5238                 txctrl = IXGBE_READ_REG(hw,
5239                                 IXGBE_VFDCA_TXCTRL(i));
5240                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5241                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5242                                 txctrl);
5243         }
5244 }
5245
5246 /*
5247  * [VF] Start Transmit and Receive Units.
5248  */
5249 void __attribute__((cold))
5250 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5251 {
5252         struct ixgbe_hw     *hw;
5253         struct ixgbe_tx_queue *txq;
5254         struct ixgbe_rx_queue *rxq;
5255         uint32_t txdctl;
5256         uint32_t rxdctl;
5257         uint16_t i;
5258         int poll_ms;
5259
5260         PMD_INIT_FUNC_TRACE();
5261         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5262
5263         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5264                 txq = dev->data->tx_queues[i];
5265                 /* Setup Transmit Threshold Registers */
5266                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5267                 txdctl |= txq->pthresh & 0x7F;
5268                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5269                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5270                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5271         }
5272
5273         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5274
5275                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5276                 txdctl |= IXGBE_TXDCTL_ENABLE;
5277                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5278
5279                 poll_ms = 10;
5280                 /* Wait until TX Enable ready */
5281                 do {
5282                         rte_delay_ms(1);
5283                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5284                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5285                 if (!poll_ms)
5286                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5287         }
5288         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5289
5290                 rxq = dev->data->rx_queues[i];
5291
5292                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5293                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5294                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5295
5296                 /* Wait until RX Enable ready */
5297                 poll_ms = 10;
5298                 do {
5299                         rte_delay_ms(1);
5300                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5301                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5302                 if (!poll_ms)
5303                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5304                 rte_wmb();
5305                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5306
5307         }
5308 }
5309
5310 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5311 int __attribute__((weak))
5312 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5313 {
5314         return -1;
5315 }
5316
5317 uint16_t __attribute__((weak))
5318 ixgbe_recv_pkts_vec(
5319         void __rte_unused *rx_queue,
5320         struct rte_mbuf __rte_unused **rx_pkts,
5321         uint16_t __rte_unused nb_pkts)
5322 {
5323         return 0;
5324 }
5325
5326 uint16_t __attribute__((weak))
5327 ixgbe_recv_scattered_pkts_vec(
5328         void __rte_unused *rx_queue,
5329         struct rte_mbuf __rte_unused **rx_pkts,
5330         uint16_t __rte_unused nb_pkts)
5331 {
5332         return 0;
5333 }
5334
5335 int __attribute__((weak))
5336 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5337 {
5338         return -1;
5339 }