mbuf: add new Rx flags for stripped VLAN
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73 #include <rte_ip.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 /* Bit Mask to indicate what bits required for building TX context */
84 #define IXGBE_TX_OFFLOAD_MASK (                  \
85                 PKT_TX_VLAN_PKT |                \
86                 PKT_TX_IP_CKSUM |                \
87                 PKT_TX_L4_MASK |                 \
88                 PKT_TX_TCP_SEG |                 \
89                 PKT_TX_OUTER_IP_CKSUM)
90
91 #if 1
92 #define RTE_PMD_USE_PREFETCH
93 #endif
94
95 #ifdef RTE_PMD_USE_PREFETCH
96 /*
97  * Prefetch a cache line into all cache levels.
98  */
99 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
100 #else
101 #define rte_ixgbe_prefetch(p)   do {} while (0)
102 #endif
103
104 /*********************************************************************
105  *
106  *  TX functions
107  *
108  **********************************************************************/
109
110 /*
111  * Check for descriptors with their DD bit set and free mbufs.
112  * Return the total number of buffers freed.
113  */
114 static inline int __attribute__((always_inline))
115 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
116 {
117         struct ixgbe_tx_entry *txep;
118         uint32_t status;
119         int i, nb_free = 0;
120         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
121
122         /* check DD bit on threshold descriptor */
123         status = txq->tx_ring[txq->tx_next_dd].wb.status;
124         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
125                 return 0;
126
127         /*
128          * first buffer to free from S/W ring is at index
129          * tx_next_dd - (tx_rs_thresh-1)
130          */
131         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
132
133         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
134                 /* free buffers one at a time */
135                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
136                 txep->mbuf = NULL;
137
138                 if (unlikely(m == NULL))
139                         continue;
140
141                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
142                     (nb_free > 0 && m->pool != free[0]->pool)) {
143                         rte_mempool_put_bulk(free[0]->pool,
144                                              (void **)free, nb_free);
145                         nb_free = 0;
146                 }
147
148                 free[nb_free++] = m;
149         }
150
151         if (nb_free > 0)
152                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
153
154         /* buffers were freed, update counters */
155         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
156         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
157         if (txq->tx_next_dd >= txq->nb_tx_desc)
158                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
159
160         return txq->tx_rs_thresh;
161 }
162
163 /* Populate 4 descriptors with data from 4 mbufs */
164 static inline void
165 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
166 {
167         uint64_t buf_dma_addr;
168         uint32_t pkt_len;
169         int i;
170
171         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
172                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
173                 pkt_len = (*pkts)->data_len;
174
175                 /* write data to descriptor */
176                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
177
178                 txdp->read.cmd_type_len =
179                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
180
181                 txdp->read.olinfo_status =
182                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
183
184                 rte_prefetch0(&(*pkts)->pool);
185         }
186 }
187
188 /* Populate 1 descriptor with data from 1 mbuf */
189 static inline void
190 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
191 {
192         uint64_t buf_dma_addr;
193         uint32_t pkt_len;
194
195         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
196         pkt_len = (*pkts)->data_len;
197
198         /* write data to descriptor */
199         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
200         txdp->read.cmd_type_len =
201                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
202         txdp->read.olinfo_status =
203                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
204         rte_prefetch0(&(*pkts)->pool);
205 }
206
207 /*
208  * Fill H/W descriptor ring with mbuf data.
209  * Copy mbuf pointers to the S/W ring.
210  */
211 static inline void
212 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
213                       uint16_t nb_pkts)
214 {
215         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
216         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
217         const int N_PER_LOOP = 4;
218         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
219         int mainpart, leftover;
220         int i, j;
221
222         /*
223          * Process most of the packets in chunks of N pkts.  Any
224          * leftover packets will get processed one at a time.
225          */
226         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
227         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
228         for (i = 0; i < mainpart; i += N_PER_LOOP) {
229                 /* Copy N mbuf pointers to the S/W ring */
230                 for (j = 0; j < N_PER_LOOP; ++j) {
231                         (txep + i + j)->mbuf = *(pkts + i + j);
232                 }
233                 tx4(txdp + i, pkts + i);
234         }
235
236         if (unlikely(leftover > 0)) {
237                 for (i = 0; i < leftover; ++i) {
238                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
239                         tx1(txdp + mainpart + i, pkts + mainpart + i);
240                 }
241         }
242 }
243
244 static inline uint16_t
245 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
246              uint16_t nb_pkts)
247 {
248         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
249         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
250         uint16_t n = 0;
251
252         /*
253          * Begin scanning the H/W ring for done descriptors when the
254          * number of available descriptors drops below tx_free_thresh.  For
255          * each done descriptor, free the associated buffer.
256          */
257         if (txq->nb_tx_free < txq->tx_free_thresh)
258                 ixgbe_tx_free_bufs(txq);
259
260         /* Only use descriptors that are available */
261         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
262         if (unlikely(nb_pkts == 0))
263                 return 0;
264
265         /* Use exactly nb_pkts descriptors */
266         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
267
268         /*
269          * At this point, we know there are enough descriptors in the
270          * ring to transmit all the packets.  This assumes that each
271          * mbuf contains a single segment, and that no new offloads
272          * are expected, which would require a new context descriptor.
273          */
274
275         /*
276          * See if we're going to wrap-around. If so, handle the top
277          * of the descriptor ring first, then do the bottom.  If not,
278          * the processing looks just like the "bottom" part anyway...
279          */
280         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
281                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
282                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
283
284                 /*
285                  * We know that the last descriptor in the ring will need to
286                  * have its RS bit set because tx_rs_thresh has to be
287                  * a divisor of the ring size
288                  */
289                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
290                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
291                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
292
293                 txq->tx_tail = 0;
294         }
295
296         /* Fill H/W descriptor ring with mbuf data */
297         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
298         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
299
300         /*
301          * Determine if RS bit should be set
302          * This is what we actually want:
303          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
304          * but instead of subtracting 1 and doing >=, we can just do
305          * greater than without subtracting.
306          */
307         if (txq->tx_tail > txq->tx_next_rs) {
308                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
309                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
310                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
311                                                 txq->tx_rs_thresh);
312                 if (txq->tx_next_rs >= txq->nb_tx_desc)
313                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
314         }
315
316         /*
317          * Check for wrap-around. This would only happen if we used
318          * up to the last descriptor in the ring, no more, no less.
319          */
320         if (txq->tx_tail >= txq->nb_tx_desc)
321                 txq->tx_tail = 0;
322
323         /* update tail pointer */
324         rte_wmb();
325         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
326
327         return nb_pkts;
328 }
329
330 uint16_t
331 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
332                        uint16_t nb_pkts)
333 {
334         uint16_t nb_tx;
335
336         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
337         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
338                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
339
340         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
341         nb_tx = 0;
342         while (nb_pkts) {
343                 uint16_t ret, n;
344
345                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
346                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
347                 nb_tx = (uint16_t)(nb_tx + ret);
348                 nb_pkts = (uint16_t)(nb_pkts - ret);
349                 if (ret < n)
350                         break;
351         }
352
353         return nb_tx;
354 }
355
356 static inline void
357 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
358                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
359                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
360 {
361         uint32_t type_tucmd_mlhl;
362         uint32_t mss_l4len_idx = 0;
363         uint32_t ctx_idx;
364         uint32_t vlan_macip_lens;
365         union ixgbe_tx_offload tx_offload_mask;
366         uint32_t seqnum_seed = 0;
367
368         ctx_idx = txq->ctx_curr;
369         tx_offload_mask.data[0] = 0;
370         tx_offload_mask.data[1] = 0;
371         type_tucmd_mlhl = 0;
372
373         /* Specify which HW CTX to upload. */
374         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
375
376         if (ol_flags & PKT_TX_VLAN_PKT) {
377                 tx_offload_mask.vlan_tci |= ~0;
378         }
379
380         /* check if TCP segmentation required for this packet */
381         if (ol_flags & PKT_TX_TCP_SEG) {
382                 /* implies IP cksum in IPv4 */
383                 if (ol_flags & PKT_TX_IP_CKSUM)
384                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
385                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
386                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
387                 else
388                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
389                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
390                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
391
392                 tx_offload_mask.l2_len |= ~0;
393                 tx_offload_mask.l3_len |= ~0;
394                 tx_offload_mask.l4_len |= ~0;
395                 tx_offload_mask.tso_segsz |= ~0;
396                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
397                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
398         } else { /* no TSO, check if hardware checksum is needed */
399                 if (ol_flags & PKT_TX_IP_CKSUM) {
400                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
401                         tx_offload_mask.l2_len |= ~0;
402                         tx_offload_mask.l3_len |= ~0;
403                 }
404
405                 switch (ol_flags & PKT_TX_L4_MASK) {
406                 case PKT_TX_UDP_CKSUM:
407                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
408                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
409                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
410                         tx_offload_mask.l2_len |= ~0;
411                         tx_offload_mask.l3_len |= ~0;
412                         break;
413                 case PKT_TX_TCP_CKSUM:
414                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
415                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
416                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
417                         tx_offload_mask.l2_len |= ~0;
418                         tx_offload_mask.l3_len |= ~0;
419                         break;
420                 case PKT_TX_SCTP_CKSUM:
421                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
422                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
423                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
424                         tx_offload_mask.l2_len |= ~0;
425                         tx_offload_mask.l3_len |= ~0;
426                         break;
427                 default:
428                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
429                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
430                         break;
431                 }
432         }
433
434         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
435                 tx_offload_mask.outer_l2_len |= ~0;
436                 tx_offload_mask.outer_l3_len |= ~0;
437                 tx_offload_mask.l2_len |= ~0;
438                 seqnum_seed |= tx_offload.outer_l3_len
439                                << IXGBE_ADVTXD_OUTER_IPLEN;
440                 seqnum_seed |= tx_offload.l2_len
441                                << IXGBE_ADVTXD_TUNNEL_LEN;
442         }
443
444         txq->ctx_cache[ctx_idx].flags = ol_flags;
445         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
446                 tx_offload_mask.data[0] & tx_offload.data[0];
447         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
448                 tx_offload_mask.data[1] & tx_offload.data[1];
449         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
450
451         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
452         vlan_macip_lens = tx_offload.l3_len;
453         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
454                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
455                                     IXGBE_ADVTXD_MACLEN_SHIFT);
456         else
457                 vlan_macip_lens |= (tx_offload.l2_len <<
458                                     IXGBE_ADVTXD_MACLEN_SHIFT);
459         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
460         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
461         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
462         ctx_txd->seqnum_seed     = seqnum_seed;
463 }
464
465 /*
466  * Check which hardware context can be used. Use the existing match
467  * or create a new context descriptor.
468  */
469 static inline uint32_t
470 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
471                    union ixgbe_tx_offload tx_offload)
472 {
473         /* If match with the current used context */
474         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
475                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
476                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
477                      & tx_offload.data[0])) &&
478                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
479                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
480                      & tx_offload.data[1]))))
481                 return txq->ctx_curr;
482
483         /* What if match with the next context  */
484         txq->ctx_curr ^= 1;
485         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
486                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
487                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
488                      & tx_offload.data[0])) &&
489                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
490                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
491                      & tx_offload.data[1]))))
492                 return txq->ctx_curr;
493
494         /* Mismatch, use the previous context */
495         return IXGBE_CTX_NUM;
496 }
497
498 static inline uint32_t
499 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
500 {
501         uint32_t tmp = 0;
502
503         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
504                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
505         if (ol_flags & PKT_TX_IP_CKSUM)
506                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
507         if (ol_flags & PKT_TX_TCP_SEG)
508                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
509         return tmp;
510 }
511
512 static inline uint32_t
513 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
514 {
515         uint32_t cmdtype = 0;
516
517         if (ol_flags & PKT_TX_VLAN_PKT)
518                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
519         if (ol_flags & PKT_TX_TCP_SEG)
520                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
521         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
522                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
523         return cmdtype;
524 }
525
526 /* Default RS bit threshold values */
527 #ifndef DEFAULT_TX_RS_THRESH
528 #define DEFAULT_TX_RS_THRESH   32
529 #endif
530 #ifndef DEFAULT_TX_FREE_THRESH
531 #define DEFAULT_TX_FREE_THRESH 32
532 #endif
533
534 /* Reset transmit descriptors after they have been used */
535 static inline int
536 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
537 {
538         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
539         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
540         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
541         uint16_t nb_tx_desc = txq->nb_tx_desc;
542         uint16_t desc_to_clean_to;
543         uint16_t nb_tx_to_clean;
544         uint32_t status;
545
546         /* Determine the last descriptor needing to be cleaned */
547         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
548         if (desc_to_clean_to >= nb_tx_desc)
549                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
550
551         /* Check to make sure the last descriptor to clean is done */
552         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
553         status = txr[desc_to_clean_to].wb.status;
554         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
555                 PMD_TX_FREE_LOG(DEBUG,
556                                 "TX descriptor %4u is not done"
557                                 "(port=%d queue=%d)",
558                                 desc_to_clean_to,
559                                 txq->port_id, txq->queue_id);
560                 /* Failed to clean any descriptors, better luck next time */
561                 return -(1);
562         }
563
564         /* Figure out how many descriptors will be cleaned */
565         if (last_desc_cleaned > desc_to_clean_to)
566                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
567                                                         desc_to_clean_to);
568         else
569                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
570                                                 last_desc_cleaned);
571
572         PMD_TX_FREE_LOG(DEBUG,
573                         "Cleaning %4u TX descriptors: %4u to %4u "
574                         "(port=%d queue=%d)",
575                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
576                         txq->port_id, txq->queue_id);
577
578         /*
579          * The last descriptor to clean is done, so that means all the
580          * descriptors from the last descriptor that was cleaned
581          * up to the last descriptor with the RS bit set
582          * are done. Only reset the threshold descriptor.
583          */
584         txr[desc_to_clean_to].wb.status = 0;
585
586         /* Update the txq to reflect the last descriptor that was cleaned */
587         txq->last_desc_cleaned = desc_to_clean_to;
588         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
589
590         /* No Error */
591         return 0;
592 }
593
594 uint16_t
595 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
596                 uint16_t nb_pkts)
597 {
598         struct ixgbe_tx_queue *txq;
599         struct ixgbe_tx_entry *sw_ring;
600         struct ixgbe_tx_entry *txe, *txn;
601         volatile union ixgbe_adv_tx_desc *txr;
602         volatile union ixgbe_adv_tx_desc *txd, *txp;
603         struct rte_mbuf     *tx_pkt;
604         struct rte_mbuf     *m_seg;
605         uint64_t buf_dma_addr;
606         uint32_t olinfo_status;
607         uint32_t cmd_type_len;
608         uint32_t pkt_len;
609         uint16_t slen;
610         uint64_t ol_flags;
611         uint16_t tx_id;
612         uint16_t tx_last;
613         uint16_t nb_tx;
614         uint16_t nb_used;
615         uint64_t tx_ol_req;
616         uint32_t ctx = 0;
617         uint32_t new_ctx;
618         union ixgbe_tx_offload tx_offload;
619
620         tx_offload.data[0] = 0;
621         tx_offload.data[1] = 0;
622         txq = tx_queue;
623         sw_ring = txq->sw_ring;
624         txr     = txq->tx_ring;
625         tx_id   = txq->tx_tail;
626         txe = &sw_ring[tx_id];
627         txp = NULL;
628
629         /* Determine if the descriptor ring needs to be cleaned. */
630         if (txq->nb_tx_free < txq->tx_free_thresh)
631                 ixgbe_xmit_cleanup(txq);
632
633         rte_prefetch0(&txe->mbuf->pool);
634
635         /* TX loop */
636         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
637                 new_ctx = 0;
638                 tx_pkt = *tx_pkts++;
639                 pkt_len = tx_pkt->pkt_len;
640
641                 /*
642                  * Determine how many (if any) context descriptors
643                  * are needed for offload functionality.
644                  */
645                 ol_flags = tx_pkt->ol_flags;
646
647                 /* If hardware offload required */
648                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
649                 if (tx_ol_req) {
650                         tx_offload.l2_len = tx_pkt->l2_len;
651                         tx_offload.l3_len = tx_pkt->l3_len;
652                         tx_offload.l4_len = tx_pkt->l4_len;
653                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
654                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
655                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
656                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
657
658                         /* If new context need be built or reuse the exist ctx. */
659                         ctx = what_advctx_update(txq, tx_ol_req,
660                                 tx_offload);
661                         /* Only allocate context descriptor if required*/
662                         new_ctx = (ctx == IXGBE_CTX_NUM);
663                         ctx = txq->ctx_curr;
664                 }
665
666                 /*
667                  * Keep track of how many descriptors are used this loop
668                  * This will always be the number of segments + the number of
669                  * Context descriptors required to transmit the packet
670                  */
671                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
672
673                 if (txp != NULL &&
674                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
675                         /* set RS on the previous packet in the burst */
676                         txp->read.cmd_type_len |=
677                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
678
679                 /*
680                  * The number of descriptors that must be allocated for a
681                  * packet is the number of segments of that packet, plus 1
682                  * Context Descriptor for the hardware offload, if any.
683                  * Determine the last TX descriptor to allocate in the TX ring
684                  * for the packet, starting from the current position (tx_id)
685                  * in the ring.
686                  */
687                 tx_last = (uint16_t) (tx_id + nb_used - 1);
688
689                 /* Circular ring */
690                 if (tx_last >= txq->nb_tx_desc)
691                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
692
693                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
694                            " tx_first=%u tx_last=%u",
695                            (unsigned) txq->port_id,
696                            (unsigned) txq->queue_id,
697                            (unsigned) pkt_len,
698                            (unsigned) tx_id,
699                            (unsigned) tx_last);
700
701                 /*
702                  * Make sure there are enough TX descriptors available to
703                  * transmit the entire packet.
704                  * nb_used better be less than or equal to txq->tx_rs_thresh
705                  */
706                 if (nb_used > txq->nb_tx_free) {
707                         PMD_TX_FREE_LOG(DEBUG,
708                                         "Not enough free TX descriptors "
709                                         "nb_used=%4u nb_free=%4u "
710                                         "(port=%d queue=%d)",
711                                         nb_used, txq->nb_tx_free,
712                                         txq->port_id, txq->queue_id);
713
714                         if (ixgbe_xmit_cleanup(txq) != 0) {
715                                 /* Could not clean any descriptors */
716                                 if (nb_tx == 0)
717                                         return 0;
718                                 goto end_of_tx;
719                         }
720
721                         /* nb_used better be <= txq->tx_rs_thresh */
722                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
723                                 PMD_TX_FREE_LOG(DEBUG,
724                                         "The number of descriptors needed to "
725                                         "transmit the packet exceeds the "
726                                         "RS bit threshold. This will impact "
727                                         "performance."
728                                         "nb_used=%4u nb_free=%4u "
729                                         "tx_rs_thresh=%4u. "
730                                         "(port=%d queue=%d)",
731                                         nb_used, txq->nb_tx_free,
732                                         txq->tx_rs_thresh,
733                                         txq->port_id, txq->queue_id);
734                                 /*
735                                  * Loop here until there are enough TX
736                                  * descriptors or until the ring cannot be
737                                  * cleaned.
738                                  */
739                                 while (nb_used > txq->nb_tx_free) {
740                                         if (ixgbe_xmit_cleanup(txq) != 0) {
741                                                 /*
742                                                  * Could not clean any
743                                                  * descriptors
744                                                  */
745                                                 if (nb_tx == 0)
746                                                         return 0;
747                                                 goto end_of_tx;
748                                         }
749                                 }
750                         }
751                 }
752
753                 /*
754                  * By now there are enough free TX descriptors to transmit
755                  * the packet.
756                  */
757
758                 /*
759                  * Set common flags of all TX Data Descriptors.
760                  *
761                  * The following bits must be set in all Data Descriptors:
762                  *   - IXGBE_ADVTXD_DTYP_DATA
763                  *   - IXGBE_ADVTXD_DCMD_DEXT
764                  *
765                  * The following bits must be set in the first Data Descriptor
766                  * and are ignored in the other ones:
767                  *   - IXGBE_ADVTXD_DCMD_IFCS
768                  *   - IXGBE_ADVTXD_MAC_1588
769                  *   - IXGBE_ADVTXD_DCMD_VLE
770                  *
771                  * The following bits must only be set in the last Data
772                  * Descriptor:
773                  *   - IXGBE_TXD_CMD_EOP
774                  *
775                  * The following bits can be set in any Data Descriptor, but
776                  * are only set in the last Data Descriptor:
777                  *   - IXGBE_TXD_CMD_RS
778                  */
779                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
780                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
781
782 #ifdef RTE_LIBRTE_IEEE1588
783                 if (ol_flags & PKT_TX_IEEE1588_TMST)
784                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
785 #endif
786
787                 olinfo_status = 0;
788                 if (tx_ol_req) {
789
790                         if (ol_flags & PKT_TX_TCP_SEG) {
791                                 /* when TSO is on, paylen in descriptor is the
792                                  * not the packet len but the tcp payload len */
793                                 pkt_len -= (tx_offload.l2_len +
794                                         tx_offload.l3_len + tx_offload.l4_len);
795                         }
796
797                         /*
798                          * Setup the TX Advanced Context Descriptor if required
799                          */
800                         if (new_ctx) {
801                                 volatile struct ixgbe_adv_tx_context_desc *
802                                     ctx_txd;
803
804                                 ctx_txd = (volatile struct
805                                     ixgbe_adv_tx_context_desc *)
806                                     &txr[tx_id];
807
808                                 txn = &sw_ring[txe->next_id];
809                                 rte_prefetch0(&txn->mbuf->pool);
810
811                                 if (txe->mbuf != NULL) {
812                                         rte_pktmbuf_free_seg(txe->mbuf);
813                                         txe->mbuf = NULL;
814                                 }
815
816                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
817                                         tx_offload);
818
819                                 txe->last_id = tx_last;
820                                 tx_id = txe->next_id;
821                                 txe = txn;
822                         }
823
824                         /*
825                          * Setup the TX Advanced Data Descriptor,
826                          * This path will go through
827                          * whatever new/reuse the context descriptor
828                          */
829                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
830                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
831                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
832                 }
833
834                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
835
836                 m_seg = tx_pkt;
837                 do {
838                         txd = &txr[tx_id];
839                         txn = &sw_ring[txe->next_id];
840                         rte_prefetch0(&txn->mbuf->pool);
841
842                         if (txe->mbuf != NULL)
843                                 rte_pktmbuf_free_seg(txe->mbuf);
844                         txe->mbuf = m_seg;
845
846                         /*
847                          * Set up Transmit Data Descriptor.
848                          */
849                         slen = m_seg->data_len;
850                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
851                         txd->read.buffer_addr =
852                                 rte_cpu_to_le_64(buf_dma_addr);
853                         txd->read.cmd_type_len =
854                                 rte_cpu_to_le_32(cmd_type_len | slen);
855                         txd->read.olinfo_status =
856                                 rte_cpu_to_le_32(olinfo_status);
857                         txe->last_id = tx_last;
858                         tx_id = txe->next_id;
859                         txe = txn;
860                         m_seg = m_seg->next;
861                 } while (m_seg != NULL);
862
863                 /*
864                  * The last packet data descriptor needs End Of Packet (EOP)
865                  */
866                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
867                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
868                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
869
870                 /* Set RS bit only on threshold packets' last descriptor */
871                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
872                         PMD_TX_FREE_LOG(DEBUG,
873                                         "Setting RS bit on TXD id="
874                                         "%4u (port=%d queue=%d)",
875                                         tx_last, txq->port_id, txq->queue_id);
876
877                         cmd_type_len |= IXGBE_TXD_CMD_RS;
878
879                         /* Update txq RS bit counters */
880                         txq->nb_tx_used = 0;
881                         txp = NULL;
882                 } else
883                         txp = txd;
884
885                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
886         }
887
888 end_of_tx:
889         /* set RS on last packet in the burst */
890         if (txp != NULL)
891                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
892
893         rte_wmb();
894
895         /*
896          * Set the Transmit Descriptor Tail (TDT)
897          */
898         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
899                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
900                    (unsigned) tx_id, (unsigned) nb_tx);
901         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
902         txq->tx_tail = tx_id;
903
904         return nb_tx;
905 }
906
907 /*********************************************************************
908  *
909  *  RX functions
910  *
911  **********************************************************************/
912 #define IXGBE_PACKET_TYPE_IPV4              0X01
913 #define IXGBE_PACKET_TYPE_IPV4_TCP          0X11
914 #define IXGBE_PACKET_TYPE_IPV4_UDP          0X21
915 #define IXGBE_PACKET_TYPE_IPV4_SCTP         0X41
916 #define IXGBE_PACKET_TYPE_IPV4_EXT          0X03
917 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP     0X43
918 #define IXGBE_PACKET_TYPE_IPV6              0X04
919 #define IXGBE_PACKET_TYPE_IPV6_TCP          0X14
920 #define IXGBE_PACKET_TYPE_IPV6_UDP          0X24
921 #define IXGBE_PACKET_TYPE_IPV6_EXT          0X0C
922 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP      0X1C
923 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP      0X2C
924 #define IXGBE_PACKET_TYPE_IPV4_IPV6         0X05
925 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP     0X15
926 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP     0X25
927 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT     0X0D
928 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
929 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
930
931 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
932 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
933 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
934 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
935 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
936 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
937 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
938 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
939 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
940 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
941 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
942 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
943 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
944 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
945 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
946 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
947 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
948 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
949 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
950
951 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
952 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
953 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
954 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
955 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
956 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
957 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
958 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
959 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
960 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
961 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
962 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
963 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
964 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
965 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
966 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
967 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
968 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
969 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
970
971 #define IXGBE_PACKET_TYPE_MAX               0X80
972 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
973 #define IXGBE_PACKET_TYPE_SHIFT             0X04
974
975 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
976 static inline uint32_t
977 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
978 {
979         /**
980          * Use 2 different table for normal packet and tunnel packet
981          * to save the space.
982          */
983         static const uint32_t
984                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
985                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
986                         RTE_PTYPE_L3_IPV4,
987                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
988                         RTE_PTYPE_L3_IPV4_EXT,
989                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
990                         RTE_PTYPE_L3_IPV6,
991                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
992                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
993                         RTE_PTYPE_INNER_L3_IPV6,
994                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
995                         RTE_PTYPE_L3_IPV6_EXT,
996                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
997                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
998                         RTE_PTYPE_INNER_L3_IPV6_EXT,
999                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1000                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1001                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1002                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1003                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1004                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1005                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1006                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1007                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1008                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1009                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1010                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1011                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1012                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1013                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1014                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1015                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1016                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1017                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1018                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1019                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1020                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1021                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1022                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1023                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1024                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1025                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1026                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1027         };
1028
1029         static const uint32_t
1030                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1031                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1032                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1033                         RTE_PTYPE_INNER_L2_ETHER,
1034                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1035                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1036                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1037                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1038                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1039                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1040                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1041                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1042                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1043                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1044                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1045                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1046                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1047                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1048                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1049                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1050                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1051                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1052                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1053                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1054                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1055                         RTE_PTYPE_INNER_L4_TCP,
1056                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1057                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1058                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1059                         RTE_PTYPE_INNER_L4_TCP,
1060                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1061                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1062                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1063                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1064                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1065                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1066                         RTE_PTYPE_INNER_L4_TCP,
1067                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1068                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1069                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1070                         RTE_PTYPE_INNER_L3_IPV4,
1071                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1072                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1073                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1074                         RTE_PTYPE_INNER_L4_UDP,
1075                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1076                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1077                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1078                         RTE_PTYPE_INNER_L4_UDP,
1079                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1080                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1081                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1082                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1083                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1084                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1085                         RTE_PTYPE_INNER_L4_UDP,
1086                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1087                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1088                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1089                         RTE_PTYPE_INNER_L3_IPV4,
1090                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1091                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1092                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1093                         RTE_PTYPE_INNER_L4_SCTP,
1094                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1095                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1096                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1097                         RTE_PTYPE_INNER_L4_SCTP,
1098
1099                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1100                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1101                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1102                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1103                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1104                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1105                         RTE_PTYPE_INNER_L3_IPV4,
1106                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1107                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1108                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1109                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1110                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1111                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1112                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1113                         RTE_PTYPE_INNER_L3_IPV6,
1114                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1115                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1116                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1117                         RTE_PTYPE_INNER_L3_IPV4,
1118                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1119                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1120                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1121                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1122                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1123                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1124                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1125                         RTE_PTYPE_INNER_L3_IPV4,
1126                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1128                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1129                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1130                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1131                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1132                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1133                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1134                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1135                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1136                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1137                         RTE_PTYPE_INNER_L3_IPV4,
1138                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1139                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1140                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1141                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1142                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1143                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1144                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1145                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1146                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1147                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1148                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1149                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1150                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1151                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1152                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1153                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1154                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1155                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1156                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1157                         RTE_PTYPE_INNER_L3_IPV4,
1158                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1159                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1160                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1161                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1162                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1163                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1164                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1165                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1166                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1167                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1168                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1169                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1170                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1171                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1172                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1173                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1174         };
1175
1176         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1177                 return RTE_PTYPE_UNKNOWN;
1178
1179         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1180
1181         /* For tunnel packet */
1182         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1183                 /* Remove the tunnel bit to save the space. */
1184                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1185                 return ptype_table_tn[pkt_info];
1186         }
1187
1188         /**
1189          * For x550, if it's not tunnel,
1190          * tunnel type bit should be set to 0.
1191          * Reuse 82599's mask.
1192          */
1193         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1194
1195         return ptype_table[pkt_info];
1196 }
1197
1198 static inline uint64_t
1199 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1200 {
1201         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1202                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1203                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1204                 PKT_RX_RSS_HASH, 0, 0, 0,
1205                 0, 0, 0,  PKT_RX_FDIR,
1206         };
1207 #ifdef RTE_LIBRTE_IEEE1588
1208         static uint64_t ip_pkt_etqf_map[8] = {
1209                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1210                 0, 0, 0, 0,
1211         };
1212
1213         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1214                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1215                                 ip_rss_types_map[pkt_info & 0XF];
1216         else
1217                 return ip_rss_types_map[pkt_info & 0XF];
1218 #else
1219         return ip_rss_types_map[pkt_info & 0XF];
1220 #endif
1221 }
1222
1223 static inline uint64_t
1224 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1225 {
1226         uint64_t pkt_flags;
1227
1228         /*
1229          * Check if VLAN present only.
1230          * Do not check whether L3/L4 rx checksum done by NIC or not,
1231          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1232          */
1233         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1234
1235 #ifdef RTE_LIBRTE_IEEE1588
1236         if (rx_status & IXGBE_RXD_STAT_TMST)
1237                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1238 #endif
1239         return pkt_flags;
1240 }
1241
1242 static inline uint64_t
1243 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1244 {
1245         uint64_t pkt_flags;
1246
1247         /*
1248          * Bit 31: IPE, IPv4 checksum error
1249          * Bit 30: L4I, L4I integrity error
1250          */
1251         static uint64_t error_to_pkt_flags_map[4] = {
1252                 0,  PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
1253                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1254         };
1255         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1256                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1257
1258         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1259             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1260                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1261         }
1262
1263         return pkt_flags;
1264 }
1265
1266 /*
1267  * LOOK_AHEAD defines how many desc statuses to check beyond the
1268  * current descriptor.
1269  * It must be a pound define for optimal performance.
1270  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1271  * function only works with LOOK_AHEAD=8.
1272  */
1273 #define LOOK_AHEAD 8
1274 #if (LOOK_AHEAD != 8)
1275 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1276 #endif
1277 static inline int
1278 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1279 {
1280         volatile union ixgbe_adv_rx_desc *rxdp;
1281         struct ixgbe_rx_entry *rxep;
1282         struct rte_mbuf *mb;
1283         uint16_t pkt_len;
1284         uint64_t pkt_flags;
1285         int nb_dd;
1286         uint32_t s[LOOK_AHEAD];
1287         uint32_t pkt_info[LOOK_AHEAD];
1288         int i, j, nb_rx = 0;
1289         uint32_t status;
1290         uint64_t vlan_flags = rxq->vlan_flags;
1291
1292         /* get references to current descriptor and S/W ring entry */
1293         rxdp = &rxq->rx_ring[rxq->rx_tail];
1294         rxep = &rxq->sw_ring[rxq->rx_tail];
1295
1296         status = rxdp->wb.upper.status_error;
1297         /* check to make sure there is at least 1 packet to receive */
1298         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1299                 return 0;
1300
1301         /*
1302          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1303          * reference packets that are ready to be received.
1304          */
1305         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1306              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1307                 /* Read desc statuses backwards to avoid race condition */
1308                 for (j = LOOK_AHEAD-1; j >= 0; --j)
1309                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1310
1311                 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1312                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1313                                                        lo_dword.data);
1314
1315                 /* Compute how many status bits were set */
1316                 nb_dd = 0;
1317                 for (j = 0; j < LOOK_AHEAD; ++j)
1318                         nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1319
1320                 nb_rx += nb_dd;
1321
1322                 /* Translate descriptor info to mbuf format */
1323                 for (j = 0; j < nb_dd; ++j) {
1324                         mb = rxep[j].mbuf;
1325                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1326                                   rxq->crc_len;
1327                         mb->data_len = pkt_len;
1328                         mb->pkt_len = pkt_len;
1329                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1330
1331                         /* convert descriptor fields to rte mbuf flags */
1332                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1333                                 vlan_flags);
1334                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1335                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1336                                         ((uint16_t)pkt_info[j]);
1337                         mb->ol_flags = pkt_flags;
1338                         mb->packet_type =
1339                                 ixgbe_rxd_pkt_info_to_pkt_type
1340                                         (pkt_info[j], rxq->pkt_type_mask);
1341
1342                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1343                                 mb->hash.rss = rte_le_to_cpu_32(
1344                                     rxdp[j].wb.lower.hi_dword.rss);
1345                         else if (pkt_flags & PKT_RX_FDIR) {
1346                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1347                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1348                                     IXGBE_ATR_HASH_MASK;
1349                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1350                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1351                         }
1352                 }
1353
1354                 /* Move mbuf pointers from the S/W ring to the stage */
1355                 for (j = 0; j < LOOK_AHEAD; ++j) {
1356                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1357                 }
1358
1359                 /* stop if all requested packets could not be received */
1360                 if (nb_dd != LOOK_AHEAD)
1361                         break;
1362         }
1363
1364         /* clear software ring entries so we can cleanup correctly */
1365         for (i = 0; i < nb_rx; ++i) {
1366                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1367         }
1368
1369
1370         return nb_rx;
1371 }
1372
1373 static inline int
1374 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1375 {
1376         volatile union ixgbe_adv_rx_desc *rxdp;
1377         struct ixgbe_rx_entry *rxep;
1378         struct rte_mbuf *mb;
1379         uint16_t alloc_idx;
1380         __le64 dma_addr;
1381         int diag, i;
1382
1383         /* allocate buffers in bulk directly into the S/W ring */
1384         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1385         rxep = &rxq->sw_ring[alloc_idx];
1386         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1387                                     rxq->rx_free_thresh);
1388         if (unlikely(diag != 0))
1389                 return -ENOMEM;
1390
1391         rxdp = &rxq->rx_ring[alloc_idx];
1392         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1393                 /* populate the static rte mbuf fields */
1394                 mb = rxep[i].mbuf;
1395                 if (reset_mbuf) {
1396                         mb->next = NULL;
1397                         mb->nb_segs = 1;
1398                         mb->port = rxq->port_id;
1399                 }
1400
1401                 rte_mbuf_refcnt_set(mb, 1);
1402                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1403
1404                 /* populate the descriptors */
1405                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1406                 rxdp[i].read.hdr_addr = 0;
1407                 rxdp[i].read.pkt_addr = dma_addr;
1408         }
1409
1410         /* update state of internal queue structure */
1411         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1412         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1413                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1414
1415         /* no errors */
1416         return 0;
1417 }
1418
1419 static inline uint16_t
1420 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1421                          uint16_t nb_pkts)
1422 {
1423         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1424         int i;
1425
1426         /* how many packets are ready to return? */
1427         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1428
1429         /* copy mbuf pointers to the application's packet list */
1430         for (i = 0; i < nb_pkts; ++i)
1431                 rx_pkts[i] = stage[i];
1432
1433         /* update internal queue state */
1434         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1435         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1436
1437         return nb_pkts;
1438 }
1439
1440 static inline uint16_t
1441 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1442              uint16_t nb_pkts)
1443 {
1444         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1445         uint16_t nb_rx = 0;
1446
1447         /* Any previously recv'd pkts will be returned from the Rx stage */
1448         if (rxq->rx_nb_avail)
1449                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1450
1451         /* Scan the H/W ring for packets to receive */
1452         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1453
1454         /* update internal queue state */
1455         rxq->rx_next_avail = 0;
1456         rxq->rx_nb_avail = nb_rx;
1457         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1458
1459         /* if required, allocate new buffers to replenish descriptors */
1460         if (rxq->rx_tail > rxq->rx_free_trigger) {
1461                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1462
1463                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1464                         int i, j;
1465
1466                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1467                                    "queue_id=%u", (unsigned) rxq->port_id,
1468                                    (unsigned) rxq->queue_id);
1469
1470                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1471                                 rxq->rx_free_thresh;
1472
1473                         /*
1474                          * Need to rewind any previous receives if we cannot
1475                          * allocate new buffers to replenish the old ones.
1476                          */
1477                         rxq->rx_nb_avail = 0;
1478                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1479                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1480                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1481
1482                         return 0;
1483                 }
1484
1485                 /* update tail pointer */
1486                 rte_wmb();
1487                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1488         }
1489
1490         if (rxq->rx_tail >= rxq->nb_rx_desc)
1491                 rxq->rx_tail = 0;
1492
1493         /* received any packets this loop? */
1494         if (rxq->rx_nb_avail)
1495                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1496
1497         return 0;
1498 }
1499
1500 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1501 uint16_t
1502 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1503                            uint16_t nb_pkts)
1504 {
1505         uint16_t nb_rx;
1506
1507         if (unlikely(nb_pkts == 0))
1508                 return 0;
1509
1510         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1511                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1512
1513         /* request is relatively large, chunk it up */
1514         nb_rx = 0;
1515         while (nb_pkts) {
1516                 uint16_t ret, n;
1517
1518                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1519                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1520                 nb_rx = (uint16_t)(nb_rx + ret);
1521                 nb_pkts = (uint16_t)(nb_pkts - ret);
1522                 if (ret < n)
1523                         break;
1524         }
1525
1526         return nb_rx;
1527 }
1528
1529 uint16_t
1530 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1531                 uint16_t nb_pkts)
1532 {
1533         struct ixgbe_rx_queue *rxq;
1534         volatile union ixgbe_adv_rx_desc *rx_ring;
1535         volatile union ixgbe_adv_rx_desc *rxdp;
1536         struct ixgbe_rx_entry *sw_ring;
1537         struct ixgbe_rx_entry *rxe;
1538         struct rte_mbuf *rxm;
1539         struct rte_mbuf *nmb;
1540         union ixgbe_adv_rx_desc rxd;
1541         uint64_t dma_addr;
1542         uint32_t staterr;
1543         uint32_t pkt_info;
1544         uint16_t pkt_len;
1545         uint16_t rx_id;
1546         uint16_t nb_rx;
1547         uint16_t nb_hold;
1548         uint64_t pkt_flags;
1549         uint64_t vlan_flags;
1550
1551         nb_rx = 0;
1552         nb_hold = 0;
1553         rxq = rx_queue;
1554         rx_id = rxq->rx_tail;
1555         rx_ring = rxq->rx_ring;
1556         sw_ring = rxq->sw_ring;
1557         vlan_flags = rxq->vlan_flags;
1558         while (nb_rx < nb_pkts) {
1559                 /*
1560                  * The order of operations here is important as the DD status
1561                  * bit must not be read after any other descriptor fields.
1562                  * rx_ring and rxdp are pointing to volatile data so the order
1563                  * of accesses cannot be reordered by the compiler. If they were
1564                  * not volatile, they could be reordered which could lead to
1565                  * using invalid descriptor fields when read from rxd.
1566                  */
1567                 rxdp = &rx_ring[rx_id];
1568                 staterr = rxdp->wb.upper.status_error;
1569                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1570                         break;
1571                 rxd = *rxdp;
1572
1573                 /*
1574                  * End of packet.
1575                  *
1576                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1577                  * is likely to be invalid and to be dropped by the various
1578                  * validation checks performed by the network stack.
1579                  *
1580                  * Allocate a new mbuf to replenish the RX ring descriptor.
1581                  * If the allocation fails:
1582                  *    - arrange for that RX descriptor to be the first one
1583                  *      being parsed the next time the receive function is
1584                  *      invoked [on the same queue].
1585                  *
1586                  *    - Stop parsing the RX ring and return immediately.
1587                  *
1588                  * This policy do not drop the packet received in the RX
1589                  * descriptor for which the allocation of a new mbuf failed.
1590                  * Thus, it allows that packet to be later retrieved if
1591                  * mbuf have been freed in the mean time.
1592                  * As a side effect, holding RX descriptors instead of
1593                  * systematically giving them back to the NIC may lead to
1594                  * RX ring exhaustion situations.
1595                  * However, the NIC can gracefully prevent such situations
1596                  * to happen by sending specific "back-pressure" flow control
1597                  * frames to its peer(s).
1598                  */
1599                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1600                            "ext_err_stat=0x%08x pkt_len=%u",
1601                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1602                            (unsigned) rx_id, (unsigned) staterr,
1603                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1604
1605                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1606                 if (nmb == NULL) {
1607                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1608                                    "queue_id=%u", (unsigned) rxq->port_id,
1609                                    (unsigned) rxq->queue_id);
1610                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1611                         break;
1612                 }
1613
1614                 nb_hold++;
1615                 rxe = &sw_ring[rx_id];
1616                 rx_id++;
1617                 if (rx_id == rxq->nb_rx_desc)
1618                         rx_id = 0;
1619
1620                 /* Prefetch next mbuf while processing current one. */
1621                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1622
1623                 /*
1624                  * When next RX descriptor is on a cache-line boundary,
1625                  * prefetch the next 4 RX descriptors and the next 8 pointers
1626                  * to mbufs.
1627                  */
1628                 if ((rx_id & 0x3) == 0) {
1629                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1630                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1631                 }
1632
1633                 rxm = rxe->mbuf;
1634                 rxe->mbuf = nmb;
1635                 dma_addr =
1636                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1637                 rxdp->read.hdr_addr = 0;
1638                 rxdp->read.pkt_addr = dma_addr;
1639
1640                 /*
1641                  * Initialize the returned mbuf.
1642                  * 1) setup generic mbuf fields:
1643                  *    - number of segments,
1644                  *    - next segment,
1645                  *    - packet length,
1646                  *    - RX port identifier.
1647                  * 2) integrate hardware offload data, if any:
1648                  *    - RSS flag & hash,
1649                  *    - IP checksum flag,
1650                  *    - VLAN TCI, if any,
1651                  *    - error flags.
1652                  */
1653                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1654                                       rxq->crc_len);
1655                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1656                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1657                 rxm->nb_segs = 1;
1658                 rxm->next = NULL;
1659                 rxm->pkt_len = pkt_len;
1660                 rxm->data_len = pkt_len;
1661                 rxm->port = rxq->port_id;
1662
1663                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1664                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1665                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1666
1667                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1668                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1669                 pkt_flags = pkt_flags |
1670                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1671                 rxm->ol_flags = pkt_flags;
1672                 rxm->packet_type =
1673                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1674                                                        rxq->pkt_type_mask);
1675
1676                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1677                         rxm->hash.rss = rte_le_to_cpu_32(
1678                                                 rxd.wb.lower.hi_dword.rss);
1679                 else if (pkt_flags & PKT_RX_FDIR) {
1680                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1681                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1682                                         IXGBE_ATR_HASH_MASK;
1683                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1684                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1685                 }
1686                 /*
1687                  * Store the mbuf address into the next entry of the array
1688                  * of returned packets.
1689                  */
1690                 rx_pkts[nb_rx++] = rxm;
1691         }
1692         rxq->rx_tail = rx_id;
1693
1694         /*
1695          * If the number of free RX descriptors is greater than the RX free
1696          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1697          * register.
1698          * Update the RDT with the value of the last processed RX descriptor
1699          * minus 1, to guarantee that the RDT register is never equal to the
1700          * RDH register, which creates a "full" ring situtation from the
1701          * hardware point of view...
1702          */
1703         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1704         if (nb_hold > rxq->rx_free_thresh) {
1705                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1706                            "nb_hold=%u nb_rx=%u",
1707                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1708                            (unsigned) rx_id, (unsigned) nb_hold,
1709                            (unsigned) nb_rx);
1710                 rx_id = (uint16_t) ((rx_id == 0) ?
1711                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1712                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1713                 nb_hold = 0;
1714         }
1715         rxq->nb_rx_hold = nb_hold;
1716         return nb_rx;
1717 }
1718
1719 /**
1720  * Detect an RSC descriptor.
1721  */
1722 static inline uint32_t
1723 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1724 {
1725         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1726                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1727 }
1728
1729 /**
1730  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1731  *
1732  * Fill the following info in the HEAD buffer of the Rx cluster:
1733  *    - RX port identifier
1734  *    - hardware offload data, if any:
1735  *      - RSS flag & hash
1736  *      - IP checksum flag
1737  *      - VLAN TCI, if any
1738  *      - error flags
1739  * @head HEAD of the packet cluster
1740  * @desc HW descriptor to get data from
1741  * @rxq Pointer to the Rx queue
1742  */
1743 static inline void
1744 ixgbe_fill_cluster_head_buf(
1745         struct rte_mbuf *head,
1746         union ixgbe_adv_rx_desc *desc,
1747         struct ixgbe_rx_queue *rxq,
1748         uint32_t staterr)
1749 {
1750         uint32_t pkt_info;
1751         uint64_t pkt_flags;
1752
1753         head->port = rxq->port_id;
1754
1755         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1756          * set in the pkt_flags field.
1757          */
1758         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1759         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1760         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1761         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1762         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1763         head->ol_flags = pkt_flags;
1764         head->packet_type =
1765                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1766
1767         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1768                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1769         else if (pkt_flags & PKT_RX_FDIR) {
1770                 head->hash.fdir.hash =
1771                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1772                                                           & IXGBE_ATR_HASH_MASK;
1773                 head->hash.fdir.id =
1774                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1775         }
1776 }
1777
1778 /**
1779  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1780  *
1781  * @rx_queue Rx queue handle
1782  * @rx_pkts table of received packets
1783  * @nb_pkts size of rx_pkts table
1784  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1785  *
1786  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1787  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1788  *
1789  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1790  * 1) When non-EOP RSC completion arrives:
1791  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1792  *       segment's data length.
1793  *    b) Set the "next" pointer of the current segment to point to the segment
1794  *       at the NEXTP index.
1795  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1796  *       in the sw_rsc_ring.
1797  * 2) When EOP arrives we just update the cluster's total length and offload
1798  *    flags and deliver the cluster up to the upper layers. In our case - put it
1799  *    in the rx_pkts table.
1800  *
1801  * Returns the number of received packets/clusters (according to the "bulk
1802  * receive" interface).
1803  */
1804 static inline uint16_t
1805 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1806                     bool bulk_alloc)
1807 {
1808         struct ixgbe_rx_queue *rxq = rx_queue;
1809         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1810         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1811         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1812         uint16_t rx_id = rxq->rx_tail;
1813         uint16_t nb_rx = 0;
1814         uint16_t nb_hold = rxq->nb_rx_hold;
1815         uint16_t prev_id = rxq->rx_tail;
1816
1817         while (nb_rx < nb_pkts) {
1818                 bool eop;
1819                 struct ixgbe_rx_entry *rxe;
1820                 struct ixgbe_scattered_rx_entry *sc_entry;
1821                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1822                 struct ixgbe_rx_entry *next_rxe = NULL;
1823                 struct rte_mbuf *first_seg;
1824                 struct rte_mbuf *rxm;
1825                 struct rte_mbuf *nmb;
1826                 union ixgbe_adv_rx_desc rxd;
1827                 uint16_t data_len;
1828                 uint16_t next_id;
1829                 volatile union ixgbe_adv_rx_desc *rxdp;
1830                 uint32_t staterr;
1831
1832 next_desc:
1833                 /*
1834                  * The code in this whole file uses the volatile pointer to
1835                  * ensure the read ordering of the status and the rest of the
1836                  * descriptor fields (on the compiler level only!!!). This is so
1837                  * UGLY - why not to just use the compiler barrier instead? DPDK
1838                  * even has the rte_compiler_barrier() for that.
1839                  *
1840                  * But most importantly this is just wrong because this doesn't
1841                  * ensure memory ordering in a general case at all. For
1842                  * instance, DPDK is supposed to work on Power CPUs where
1843                  * compiler barrier may just not be enough!
1844                  *
1845                  * I tried to write only this function properly to have a
1846                  * starting point (as a part of an LRO/RSC series) but the
1847                  * compiler cursed at me when I tried to cast away the
1848                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1849                  * keeping it the way it is for now.
1850                  *
1851                  * The code in this file is broken in so many other places and
1852                  * will just not work on a big endian CPU anyway therefore the
1853                  * lines below will have to be revisited together with the rest
1854                  * of the ixgbe PMD.
1855                  *
1856                  * TODO:
1857                  *    - Get rid of "volatile" crap and let the compiler do its
1858                  *      job.
1859                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
1860                  *      memory ordering below.
1861                  */
1862                 rxdp = &rx_ring[rx_id];
1863                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1864
1865                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1866                         break;
1867
1868                 rxd = *rxdp;
1869
1870                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1871                                   "staterr=0x%x data_len=%u",
1872                            rxq->port_id, rxq->queue_id, rx_id, staterr,
1873                            rte_le_to_cpu_16(rxd.wb.upper.length));
1874
1875                 if (!bulk_alloc) {
1876                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1877                         if (nmb == NULL) {
1878                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1879                                                   "port_id=%u queue_id=%u",
1880                                            rxq->port_id, rxq->queue_id);
1881
1882                                 rte_eth_devices[rxq->port_id].data->
1883                                                         rx_mbuf_alloc_failed++;
1884                                 break;
1885                         }
1886                 } else if (nb_hold > rxq->rx_free_thresh) {
1887                         uint16_t next_rdt = rxq->rx_free_trigger;
1888
1889                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1890                                 rte_wmb();
1891                                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1892                                                     next_rdt);
1893                                 nb_hold -= rxq->rx_free_thresh;
1894                         } else {
1895                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1896                                                   "port_id=%u queue_id=%u",
1897                                            rxq->port_id, rxq->queue_id);
1898
1899                                 rte_eth_devices[rxq->port_id].data->
1900                                                         rx_mbuf_alloc_failed++;
1901                                 break;
1902                         }
1903                 }
1904
1905                 nb_hold++;
1906                 rxe = &sw_ring[rx_id];
1907                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
1908
1909                 next_id = rx_id + 1;
1910                 if (next_id == rxq->nb_rx_desc)
1911                         next_id = 0;
1912
1913                 /* Prefetch next mbuf while processing current one. */
1914                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
1915
1916                 /*
1917                  * When next RX descriptor is on a cache-line boundary,
1918                  * prefetch the next 4 RX descriptors and the next 4 pointers
1919                  * to mbufs.
1920                  */
1921                 if ((next_id & 0x3) == 0) {
1922                         rte_ixgbe_prefetch(&rx_ring[next_id]);
1923                         rte_ixgbe_prefetch(&sw_ring[next_id]);
1924                 }
1925
1926                 rxm = rxe->mbuf;
1927
1928                 if (!bulk_alloc) {
1929                         __le64 dma =
1930                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1931                         /*
1932                          * Update RX descriptor with the physical address of the
1933                          * new data buffer of the new allocated mbuf.
1934                          */
1935                         rxe->mbuf = nmb;
1936
1937                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
1938                         rxdp->read.hdr_addr = 0;
1939                         rxdp->read.pkt_addr = dma;
1940                 } else
1941                         rxe->mbuf = NULL;
1942
1943                 /*
1944                  * Set data length & data buffer address of mbuf.
1945                  */
1946                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1947                 rxm->data_len = data_len;
1948
1949                 if (!eop) {
1950                         uint16_t nextp_id;
1951                         /*
1952                          * Get next descriptor index:
1953                          *  - For RSC it's in the NEXTP field.
1954                          *  - For a scattered packet - it's just a following
1955                          *    descriptor.
1956                          */
1957                         if (ixgbe_rsc_count(&rxd))
1958                                 nextp_id =
1959                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1960                                                        IXGBE_RXDADV_NEXTP_SHIFT;
1961                         else
1962                                 nextp_id = next_id;
1963
1964                         next_sc_entry = &sw_sc_ring[nextp_id];
1965                         next_rxe = &sw_ring[nextp_id];
1966                         rte_ixgbe_prefetch(next_rxe);
1967                 }
1968
1969                 sc_entry = &sw_sc_ring[rx_id];
1970                 first_seg = sc_entry->fbuf;
1971                 sc_entry->fbuf = NULL;
1972
1973                 /*
1974                  * If this is the first buffer of the received packet,
1975                  * set the pointer to the first mbuf of the packet and
1976                  * initialize its context.
1977                  * Otherwise, update the total length and the number of segments
1978                  * of the current scattered packet, and update the pointer to
1979                  * the last mbuf of the current packet.
1980                  */
1981                 if (first_seg == NULL) {
1982                         first_seg = rxm;
1983                         first_seg->pkt_len = data_len;
1984                         first_seg->nb_segs = 1;
1985                 } else {
1986                         first_seg->pkt_len += data_len;
1987                         first_seg->nb_segs++;
1988                 }
1989
1990                 prev_id = rx_id;
1991                 rx_id = next_id;
1992
1993                 /*
1994                  * If this is not the last buffer of the received packet, update
1995                  * the pointer to the first mbuf at the NEXTP entry in the
1996                  * sw_sc_ring and continue to parse the RX ring.
1997                  */
1998                 if (!eop && next_rxe) {
1999                         rxm->next = next_rxe->mbuf;
2000                         next_sc_entry->fbuf = first_seg;
2001                         goto next_desc;
2002                 }
2003
2004                 /*
2005                  * This is the last buffer of the received packet - return
2006                  * the current cluster to the user.
2007                  */
2008                 rxm->next = NULL;
2009
2010                 /* Initialize the first mbuf of the returned packet */
2011                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2012
2013                 /*
2014                  * Deal with the case, when HW CRC srip is disabled.
2015                  * That can't happen when LRO is enabled, but still could
2016                  * happen for scattered RX mode.
2017                  */
2018                 first_seg->pkt_len -= rxq->crc_len;
2019                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2020                         struct rte_mbuf *lp;
2021
2022                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2023                                 ;
2024
2025                         first_seg->nb_segs--;
2026                         lp->data_len -= rxq->crc_len - rxm->data_len;
2027                         lp->next = NULL;
2028                         rte_pktmbuf_free_seg(rxm);
2029                 } else
2030                         rxm->data_len -= rxq->crc_len;
2031
2032                 /* Prefetch data of first segment, if configured to do so. */
2033                 rte_packet_prefetch((char *)first_seg->buf_addr +
2034                         first_seg->data_off);
2035
2036                 /*
2037                  * Store the mbuf address into the next entry of the array
2038                  * of returned packets.
2039                  */
2040                 rx_pkts[nb_rx++] = first_seg;
2041         }
2042
2043         /*
2044          * Record index of the next RX descriptor to probe.
2045          */
2046         rxq->rx_tail = rx_id;
2047
2048         /*
2049          * If the number of free RX descriptors is greater than the RX free
2050          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2051          * register.
2052          * Update the RDT with the value of the last processed RX descriptor
2053          * minus 1, to guarantee that the RDT register is never equal to the
2054          * RDH register, which creates a "full" ring situtation from the
2055          * hardware point of view...
2056          */
2057         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2058                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2059                            "nb_hold=%u nb_rx=%u",
2060                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2061
2062                 rte_wmb();
2063                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
2064                 nb_hold = 0;
2065         }
2066
2067         rxq->nb_rx_hold = nb_hold;
2068         return nb_rx;
2069 }
2070
2071 uint16_t
2072 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2073                                  uint16_t nb_pkts)
2074 {
2075         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2076 }
2077
2078 uint16_t
2079 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2080                                uint16_t nb_pkts)
2081 {
2082         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2083 }
2084
2085 /*********************************************************************
2086  *
2087  *  Queue management functions
2088  *
2089  **********************************************************************/
2090
2091 static void __attribute__((cold))
2092 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2093 {
2094         unsigned i;
2095
2096         if (txq->sw_ring != NULL) {
2097                 for (i = 0; i < txq->nb_tx_desc; i++) {
2098                         if (txq->sw_ring[i].mbuf != NULL) {
2099                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2100                                 txq->sw_ring[i].mbuf = NULL;
2101                         }
2102                 }
2103         }
2104 }
2105
2106 static void __attribute__((cold))
2107 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2108 {
2109         if (txq != NULL &&
2110             txq->sw_ring != NULL)
2111                 rte_free(txq->sw_ring);
2112 }
2113
2114 static void __attribute__((cold))
2115 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2116 {
2117         if (txq != NULL && txq->ops != NULL) {
2118                 txq->ops->release_mbufs(txq);
2119                 txq->ops->free_swring(txq);
2120                 rte_free(txq);
2121         }
2122 }
2123
2124 void __attribute__((cold))
2125 ixgbe_dev_tx_queue_release(void *txq)
2126 {
2127         ixgbe_tx_queue_release(txq);
2128 }
2129
2130 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2131 static void __attribute__((cold))
2132 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2133 {
2134         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2135         struct ixgbe_tx_entry *txe = txq->sw_ring;
2136         uint16_t prev, i;
2137
2138         /* Zero out HW ring memory */
2139         for (i = 0; i < txq->nb_tx_desc; i++) {
2140                 txq->tx_ring[i] = zeroed_desc;
2141         }
2142
2143         /* Initialize SW ring entries */
2144         prev = (uint16_t) (txq->nb_tx_desc - 1);
2145         for (i = 0; i < txq->nb_tx_desc; i++) {
2146                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2147
2148                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2149                 txe[i].mbuf = NULL;
2150                 txe[i].last_id = i;
2151                 txe[prev].next_id = i;
2152                 prev = i;
2153         }
2154
2155         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2156         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2157
2158         txq->tx_tail = 0;
2159         txq->nb_tx_used = 0;
2160         /*
2161          * Always allow 1 descriptor to be un-allocated to avoid
2162          * a H/W race condition
2163          */
2164         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2165         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2166         txq->ctx_curr = 0;
2167         memset((void *)&txq->ctx_cache, 0,
2168                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2169 }
2170
2171 static const struct ixgbe_txq_ops def_txq_ops = {
2172         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2173         .free_swring = ixgbe_tx_free_swring,
2174         .reset = ixgbe_reset_tx_queue,
2175 };
2176
2177 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2178  * the queue parameters. Used in tx_queue_setup by primary process and then
2179  * in dev_init by secondary process when attaching to an existing ethdev.
2180  */
2181 void __attribute__((cold))
2182 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2183 {
2184         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2185         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2186                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2187                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2188 #ifdef RTE_IXGBE_INC_VECTOR
2189                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2190                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2191                                         ixgbe_txq_vec_setup(txq) == 0)) {
2192                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2193                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2194                 } else
2195 #endif
2196                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2197         } else {
2198                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2199                 PMD_INIT_LOG(DEBUG,
2200                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2201                                 (unsigned long)txq->txq_flags,
2202                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2203                 PMD_INIT_LOG(DEBUG,
2204                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2205                                 (unsigned long)txq->tx_rs_thresh,
2206                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2207                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2208         }
2209 }
2210
2211 int __attribute__((cold))
2212 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2213                          uint16_t queue_idx,
2214                          uint16_t nb_desc,
2215                          unsigned int socket_id,
2216                          const struct rte_eth_txconf *tx_conf)
2217 {
2218         const struct rte_memzone *tz;
2219         struct ixgbe_tx_queue *txq;
2220         struct ixgbe_hw     *hw;
2221         uint16_t tx_rs_thresh, tx_free_thresh;
2222
2223         PMD_INIT_FUNC_TRACE();
2224         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2225
2226         /*
2227          * Validate number of transmit descriptors.
2228          * It must not exceed hardware maximum, and must be multiple
2229          * of IXGBE_ALIGN.
2230          */
2231         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2232                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2233                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2234                 return -EINVAL;
2235         }
2236
2237         /*
2238          * The following two parameters control the setting of the RS bit on
2239          * transmit descriptors.
2240          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2241          * descriptors have been used.
2242          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2243          * descriptors are used or if the number of descriptors required
2244          * to transmit a packet is greater than the number of free TX
2245          * descriptors.
2246          * The following constraints must be satisfied:
2247          *  tx_rs_thresh must be greater than 0.
2248          *  tx_rs_thresh must be less than the size of the ring minus 2.
2249          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2250          *  tx_rs_thresh must be a divisor of the ring size.
2251          *  tx_free_thresh must be greater than 0.
2252          *  tx_free_thresh must be less than the size of the ring minus 3.
2253          * One descriptor in the TX ring is used as a sentinel to avoid a
2254          * H/W race condition, hence the maximum threshold constraints.
2255          * When set to zero use default values.
2256          */
2257         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2258                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2259         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2260                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2261         if (tx_rs_thresh >= (nb_desc - 2)) {
2262                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2263                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2264                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2265                         (int)dev->data->port_id, (int)queue_idx);
2266                 return -(EINVAL);
2267         }
2268         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2269                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2270                         "(tx_rs_thresh=%u port=%d queue=%d)",
2271                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2272                         (int)dev->data->port_id, (int)queue_idx);
2273                 return -(EINVAL);
2274         }
2275         if (tx_free_thresh >= (nb_desc - 3)) {
2276                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2277                              "tx_free_thresh must be less than the number of "
2278                              "TX descriptors minus 3. (tx_free_thresh=%u "
2279                              "port=%d queue=%d)",
2280                              (unsigned int)tx_free_thresh,
2281                              (int)dev->data->port_id, (int)queue_idx);
2282                 return -(EINVAL);
2283         }
2284         if (tx_rs_thresh > tx_free_thresh) {
2285                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2286                              "tx_free_thresh. (tx_free_thresh=%u "
2287                              "tx_rs_thresh=%u port=%d queue=%d)",
2288                              (unsigned int)tx_free_thresh,
2289                              (unsigned int)tx_rs_thresh,
2290                              (int)dev->data->port_id,
2291                              (int)queue_idx);
2292                 return -(EINVAL);
2293         }
2294         if ((nb_desc % tx_rs_thresh) != 0) {
2295                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2296                              "number of TX descriptors. (tx_rs_thresh=%u "
2297                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2298                              (int)dev->data->port_id, (int)queue_idx);
2299                 return -(EINVAL);
2300         }
2301
2302         /*
2303          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2304          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2305          * by the NIC and all descriptors are written back after the NIC
2306          * accumulates WTHRESH descriptors.
2307          */
2308         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2309                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2310                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2311                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2312                              (int)dev->data->port_id, (int)queue_idx);
2313                 return -(EINVAL);
2314         }
2315
2316         /* Free memory prior to re-allocation if needed... */
2317         if (dev->data->tx_queues[queue_idx] != NULL) {
2318                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2319                 dev->data->tx_queues[queue_idx] = NULL;
2320         }
2321
2322         /* First allocate the tx queue data structure */
2323         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2324                                  RTE_CACHE_LINE_SIZE, socket_id);
2325         if (txq == NULL)
2326                 return -ENOMEM;
2327
2328         /*
2329          * Allocate TX ring hardware descriptors. A memzone large enough to
2330          * handle the maximum ring size is allocated in order to allow for
2331          * resizing in later calls to the queue setup function.
2332          */
2333         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2334                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2335                         IXGBE_ALIGN, socket_id);
2336         if (tz == NULL) {
2337                 ixgbe_tx_queue_release(txq);
2338                 return -ENOMEM;
2339         }
2340
2341         txq->nb_tx_desc = nb_desc;
2342         txq->tx_rs_thresh = tx_rs_thresh;
2343         txq->tx_free_thresh = tx_free_thresh;
2344         txq->pthresh = tx_conf->tx_thresh.pthresh;
2345         txq->hthresh = tx_conf->tx_thresh.hthresh;
2346         txq->wthresh = tx_conf->tx_thresh.wthresh;
2347         txq->queue_id = queue_idx;
2348         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2349                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2350         txq->port_id = dev->data->port_id;
2351         txq->txq_flags = tx_conf->txq_flags;
2352         txq->ops = &def_txq_ops;
2353         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2354
2355         /*
2356          * Modification to set VFTDT for virtual function if vf is detected
2357          */
2358         if (hw->mac.type == ixgbe_mac_82599_vf ||
2359             hw->mac.type == ixgbe_mac_X540_vf ||
2360             hw->mac.type == ixgbe_mac_X550_vf ||
2361             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2362             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2363                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2364         else
2365                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2366
2367         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2368         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2369
2370         /* Allocate software ring */
2371         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2372                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2373                                 RTE_CACHE_LINE_SIZE, socket_id);
2374         if (txq->sw_ring == NULL) {
2375                 ixgbe_tx_queue_release(txq);
2376                 return -ENOMEM;
2377         }
2378         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2379                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2380
2381         /* set up vector or scalar TX function as appropriate */
2382         ixgbe_set_tx_function(dev, txq);
2383
2384         txq->ops->reset(txq);
2385
2386         dev->data->tx_queues[queue_idx] = txq;
2387
2388
2389         return 0;
2390 }
2391
2392 /**
2393  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2394  *
2395  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2396  * in the sw_rsc_ring is not set to NULL but rather points to the next
2397  * mbuf of this RSC aggregation (that has not been completed yet and still
2398  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2399  * will just free first "nb_segs" segments of the cluster explicitly by calling
2400  * an rte_pktmbuf_free_seg().
2401  *
2402  * @m scattered cluster head
2403  */
2404 static void __attribute__((cold))
2405 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2406 {
2407         uint8_t i, nb_segs = m->nb_segs;
2408         struct rte_mbuf *next_seg;
2409
2410         for (i = 0; i < nb_segs; i++) {
2411                 next_seg = m->next;
2412                 rte_pktmbuf_free_seg(m);
2413                 m = next_seg;
2414         }
2415 }
2416
2417 static void __attribute__((cold))
2418 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2419 {
2420         unsigned i;
2421
2422 #ifdef RTE_IXGBE_INC_VECTOR
2423         /* SSE Vector driver has a different way of releasing mbufs. */
2424         if (rxq->rx_using_sse) {
2425                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2426                 return;
2427         }
2428 #endif
2429
2430         if (rxq->sw_ring != NULL) {
2431                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2432                         if (rxq->sw_ring[i].mbuf != NULL) {
2433                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2434                                 rxq->sw_ring[i].mbuf = NULL;
2435                         }
2436                 }
2437                 if (rxq->rx_nb_avail) {
2438                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2439                                 struct rte_mbuf *mb;
2440
2441                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2442                                 rte_pktmbuf_free_seg(mb);
2443                         }
2444                         rxq->rx_nb_avail = 0;
2445                 }
2446         }
2447
2448         if (rxq->sw_sc_ring)
2449                 for (i = 0; i < rxq->nb_rx_desc; i++)
2450                         if (rxq->sw_sc_ring[i].fbuf) {
2451                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2452                                 rxq->sw_sc_ring[i].fbuf = NULL;
2453                         }
2454 }
2455
2456 static void __attribute__((cold))
2457 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2458 {
2459         if (rxq != NULL) {
2460                 ixgbe_rx_queue_release_mbufs(rxq);
2461                 rte_free(rxq->sw_ring);
2462                 rte_free(rxq->sw_sc_ring);
2463                 rte_free(rxq);
2464         }
2465 }
2466
2467 void __attribute__((cold))
2468 ixgbe_dev_rx_queue_release(void *rxq)
2469 {
2470         ixgbe_rx_queue_release(rxq);
2471 }
2472
2473 /*
2474  * Check if Rx Burst Bulk Alloc function can be used.
2475  * Return
2476  *        0: the preconditions are satisfied and the bulk allocation function
2477  *           can be used.
2478  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2479  *           function must be used.
2480  */
2481 static inline int __attribute__((cold))
2482 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2483 {
2484         int ret = 0;
2485
2486         /*
2487          * Make sure the following pre-conditions are satisfied:
2488          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2489          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2490          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2491          *   rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2492          * Scattered packets are not supported.  This should be checked
2493          * outside of this function.
2494          */
2495         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2496                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2497                              "rxq->rx_free_thresh=%d, "
2498                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2499                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2500                 ret = -EINVAL;
2501         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2502                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2503                              "rxq->rx_free_thresh=%d, "
2504                              "rxq->nb_rx_desc=%d",
2505                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2506                 ret = -EINVAL;
2507         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2508                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2509                              "rxq->nb_rx_desc=%d, "
2510                              "rxq->rx_free_thresh=%d",
2511                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2512                 ret = -EINVAL;
2513         } else if (!(rxq->nb_rx_desc <
2514                (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2515                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2516                              "rxq->nb_rx_desc=%d, "
2517                              "IXGBE_MAX_RING_DESC=%d, "
2518                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2519                              rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2520                              RTE_PMD_IXGBE_RX_MAX_BURST);
2521                 ret = -EINVAL;
2522         }
2523
2524         return ret;
2525 }
2526
2527 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2528 static void __attribute__((cold))
2529 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2530 {
2531         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2532         unsigned i;
2533         uint16_t len = rxq->nb_rx_desc;
2534
2535         /*
2536          * By default, the Rx queue setup function allocates enough memory for
2537          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2538          * extra memory at the end of the descriptor ring to be zero'd out. A
2539          * pre-condition for using the Rx burst bulk alloc function is that the
2540          * number of descriptors is less than or equal to
2541          * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2542          * constraints here to see if we need to zero out memory after the end
2543          * of the H/W descriptor ring.
2544          */
2545         if (adapter->rx_bulk_alloc_allowed)
2546                 /* zero out extra memory */
2547                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2548
2549         /*
2550          * Zero out HW ring memory. Zero out extra memory at the end of
2551          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2552          * reads extra memory as zeros.
2553          */
2554         for (i = 0; i < len; i++) {
2555                 rxq->rx_ring[i] = zeroed_desc;
2556         }
2557
2558         /*
2559          * initialize extra software ring entries. Space for these extra
2560          * entries is always allocated
2561          */
2562         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2563         for (i = rxq->nb_rx_desc; i < len; ++i) {
2564                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2565         }
2566
2567         rxq->rx_nb_avail = 0;
2568         rxq->rx_next_avail = 0;
2569         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2570         rxq->rx_tail = 0;
2571         rxq->nb_rx_hold = 0;
2572         rxq->pkt_first_seg = NULL;
2573         rxq->pkt_last_seg = NULL;
2574
2575 #ifdef RTE_IXGBE_INC_VECTOR
2576         rxq->rxrearm_start = 0;
2577         rxq->rxrearm_nb = 0;
2578 #endif
2579 }
2580
2581 int __attribute__((cold))
2582 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2583                          uint16_t queue_idx,
2584                          uint16_t nb_desc,
2585                          unsigned int socket_id,
2586                          const struct rte_eth_rxconf *rx_conf,
2587                          struct rte_mempool *mp)
2588 {
2589         const struct rte_memzone *rz;
2590         struct ixgbe_rx_queue *rxq;
2591         struct ixgbe_hw     *hw;
2592         uint16_t len;
2593         struct ixgbe_adapter *adapter =
2594                 (struct ixgbe_adapter *)dev->data->dev_private;
2595
2596         PMD_INIT_FUNC_TRACE();
2597         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2598
2599         /*
2600          * Validate number of receive descriptors.
2601          * It must not exceed hardware maximum, and must be multiple
2602          * of IXGBE_ALIGN.
2603          */
2604         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2605                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2606                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2607                 return -EINVAL;
2608         }
2609
2610         /* Free memory prior to re-allocation if needed... */
2611         if (dev->data->rx_queues[queue_idx] != NULL) {
2612                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2613                 dev->data->rx_queues[queue_idx] = NULL;
2614         }
2615
2616         /* First allocate the rx queue data structure */
2617         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2618                                  RTE_CACHE_LINE_SIZE, socket_id);
2619         if (rxq == NULL)
2620                 return -ENOMEM;
2621         rxq->mb_pool = mp;
2622         rxq->nb_rx_desc = nb_desc;
2623         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2624         rxq->queue_id = queue_idx;
2625         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2626                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2627         rxq->port_id = dev->data->port_id;
2628         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2629                                                         0 : ETHER_CRC_LEN);
2630         rxq->drop_en = rx_conf->rx_drop_en;
2631         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2632
2633         /*
2634          * The packet type in RX descriptor is different for different NICs.
2635          * Some bits are used for x550 but reserved for other NICS.
2636          * So set different masks for different NICs.
2637          */
2638         if (hw->mac.type == ixgbe_mac_X550 ||
2639             hw->mac.type == ixgbe_mac_X550EM_x ||
2640             hw->mac.type == ixgbe_mac_X550EM_a ||
2641             hw->mac.type == ixgbe_mac_X550_vf ||
2642             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2643             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2644                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2645         else
2646                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2647
2648         /*
2649          * Allocate RX ring hardware descriptors. A memzone large enough to
2650          * handle the maximum ring size is allocated in order to allow for
2651          * resizing in later calls to the queue setup function.
2652          */
2653         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2654                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2655         if (rz == NULL) {
2656                 ixgbe_rx_queue_release(rxq);
2657                 return -ENOMEM;
2658         }
2659
2660         /*
2661          * Zero init all the descriptors in the ring.
2662          */
2663         memset(rz->addr, 0, RX_RING_SZ);
2664
2665         /*
2666          * Modified to setup VFRDT for Virtual Function
2667          */
2668         if (hw->mac.type == ixgbe_mac_82599_vf ||
2669             hw->mac.type == ixgbe_mac_X540_vf ||
2670             hw->mac.type == ixgbe_mac_X550_vf ||
2671             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2672             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2673                 rxq->rdt_reg_addr =
2674                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2675                 rxq->rdh_reg_addr =
2676                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2677         } else {
2678                 rxq->rdt_reg_addr =
2679                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2680                 rxq->rdh_reg_addr =
2681                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2682         }
2683
2684         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2685         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2686
2687         /*
2688          * Certain constraints must be met in order to use the bulk buffer
2689          * allocation Rx burst function. If any of Rx queues doesn't meet them
2690          * the feature should be disabled for the whole port.
2691          */
2692         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2693                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2694                                     "preconditions - canceling the feature for "
2695                                     "the whole port[%d]",
2696                              rxq->queue_id, rxq->port_id);
2697                 adapter->rx_bulk_alloc_allowed = false;
2698         }
2699
2700         /*
2701          * Allocate software ring. Allow for space at the end of the
2702          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2703          * function does not access an invalid memory region.
2704          */
2705         len = nb_desc;
2706         if (adapter->rx_bulk_alloc_allowed)
2707                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2708
2709         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2710                                           sizeof(struct ixgbe_rx_entry) * len,
2711                                           RTE_CACHE_LINE_SIZE, socket_id);
2712         if (!rxq->sw_ring) {
2713                 ixgbe_rx_queue_release(rxq);
2714                 return -ENOMEM;
2715         }
2716
2717         /*
2718          * Always allocate even if it's not going to be needed in order to
2719          * simplify the code.
2720          *
2721          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2722          * be requested in ixgbe_dev_rx_init(), which is called later from
2723          * dev_start() flow.
2724          */
2725         rxq->sw_sc_ring =
2726                 rte_zmalloc_socket("rxq->sw_sc_ring",
2727                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2728                                    RTE_CACHE_LINE_SIZE, socket_id);
2729         if (!rxq->sw_sc_ring) {
2730                 ixgbe_rx_queue_release(rxq);
2731                 return -ENOMEM;
2732         }
2733
2734         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2735                             "dma_addr=0x%"PRIx64,
2736                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2737                      rxq->rx_ring_phys_addr);
2738
2739         if (!rte_is_power_of_2(nb_desc)) {
2740                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2741                                     "preconditions - canceling the feature for "
2742                                     "the whole port[%d]",
2743                              rxq->queue_id, rxq->port_id);
2744                 adapter->rx_vec_allowed = false;
2745         } else
2746                 ixgbe_rxq_vec_setup(rxq);
2747
2748         dev->data->rx_queues[queue_idx] = rxq;
2749
2750         ixgbe_reset_rx_queue(adapter, rxq);
2751
2752         return 0;
2753 }
2754
2755 uint32_t
2756 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2757 {
2758 #define IXGBE_RXQ_SCAN_INTERVAL 4
2759         volatile union ixgbe_adv_rx_desc *rxdp;
2760         struct ixgbe_rx_queue *rxq;
2761         uint32_t desc = 0;
2762
2763         if (rx_queue_id >= dev->data->nb_rx_queues) {
2764                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2765                 return 0;
2766         }
2767
2768         rxq = dev->data->rx_queues[rx_queue_id];
2769         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2770
2771         while ((desc < rxq->nb_rx_desc) &&
2772                 (rxdp->wb.upper.status_error &
2773                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2774                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2775                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2776                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2777                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2778                                 desc - rxq->nb_rx_desc]);
2779         }
2780
2781         return desc;
2782 }
2783
2784 int
2785 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2786 {
2787         volatile union ixgbe_adv_rx_desc *rxdp;
2788         struct ixgbe_rx_queue *rxq = rx_queue;
2789         uint32_t desc;
2790
2791         if (unlikely(offset >= rxq->nb_rx_desc))
2792                 return 0;
2793         desc = rxq->rx_tail + offset;
2794         if (desc >= rxq->nb_rx_desc)
2795                 desc -= rxq->nb_rx_desc;
2796
2797         rxdp = &rxq->rx_ring[desc];
2798         return !!(rxdp->wb.upper.status_error &
2799                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2800 }
2801
2802 void __attribute__((cold))
2803 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2804 {
2805         unsigned i;
2806         struct ixgbe_adapter *adapter =
2807                 (struct ixgbe_adapter *)dev->data->dev_private;
2808
2809         PMD_INIT_FUNC_TRACE();
2810
2811         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2812                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2813
2814                 if (txq != NULL) {
2815                         txq->ops->release_mbufs(txq);
2816                         txq->ops->reset(txq);
2817                 }
2818         }
2819
2820         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2821                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2822
2823                 if (rxq != NULL) {
2824                         ixgbe_rx_queue_release_mbufs(rxq);
2825                         ixgbe_reset_rx_queue(adapter, rxq);
2826                 }
2827         }
2828 }
2829
2830 void
2831 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2832 {
2833         unsigned i;
2834
2835         PMD_INIT_FUNC_TRACE();
2836
2837         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2838                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2839                 dev->data->rx_queues[i] = NULL;
2840         }
2841         dev->data->nb_rx_queues = 0;
2842
2843         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2844                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2845                 dev->data->tx_queues[i] = NULL;
2846         }
2847         dev->data->nb_tx_queues = 0;
2848 }
2849
2850 /*********************************************************************
2851  *
2852  *  Device RX/TX init functions
2853  *
2854  **********************************************************************/
2855
2856 /**
2857  * Receive Side Scaling (RSS)
2858  * See section 7.1.2.8 in the following document:
2859  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2860  *
2861  * Principles:
2862  * The source and destination IP addresses of the IP header and the source
2863  * and destination ports of TCP/UDP headers, if any, of received packets are
2864  * hashed against a configurable random key to compute a 32-bit RSS hash result.
2865  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2866  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
2867  * RSS output index which is used as the RX queue index where to store the
2868  * received packets.
2869  * The following output is supplied in the RX write-back descriptor:
2870  *     - 32-bit result of the Microsoft RSS hash function,
2871  *     - 4-bit RSS type field.
2872  */
2873
2874 /*
2875  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2876  * Used as the default key.
2877  */
2878 static uint8_t rss_intel_key[40] = {
2879         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2880         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2881         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2882         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2883         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2884 };
2885
2886 static void
2887 ixgbe_rss_disable(struct rte_eth_dev *dev)
2888 {
2889         struct ixgbe_hw *hw;
2890         uint32_t mrqc;
2891         uint32_t mrqc_reg;
2892
2893         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2894         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2895         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2896         mrqc &= ~IXGBE_MRQC_RSSEN;
2897         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2898 }
2899
2900 static void
2901 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
2902 {
2903         uint8_t  *hash_key;
2904         uint32_t mrqc;
2905         uint32_t rss_key;
2906         uint64_t rss_hf;
2907         uint16_t i;
2908         uint32_t mrqc_reg;
2909         uint32_t rssrk_reg;
2910
2911         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2912         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
2913
2914         hash_key = rss_conf->rss_key;
2915         if (hash_key != NULL) {
2916                 /* Fill in RSS hash key */
2917                 for (i = 0; i < 10; i++) {
2918                         rss_key  = hash_key[(i * 4)];
2919                         rss_key |= hash_key[(i * 4) + 1] << 8;
2920                         rss_key |= hash_key[(i * 4) + 2] << 16;
2921                         rss_key |= hash_key[(i * 4) + 3] << 24;
2922                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
2923                 }
2924         }
2925
2926         /* Set configured hashing protocols in MRQC register */
2927         rss_hf = rss_conf->rss_hf;
2928         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
2929         if (rss_hf & ETH_RSS_IPV4)
2930                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
2931         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
2932                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
2933         if (rss_hf & ETH_RSS_IPV6)
2934                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
2935         if (rss_hf & ETH_RSS_IPV6_EX)
2936                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
2937         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
2938                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
2939         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
2940                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
2941         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
2942                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
2943         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
2944                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
2945         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
2946                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2947         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2948 }
2949
2950 int
2951 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
2952                           struct rte_eth_rss_conf *rss_conf)
2953 {
2954         struct ixgbe_hw *hw;
2955         uint32_t mrqc;
2956         uint64_t rss_hf;
2957         uint32_t mrqc_reg;
2958
2959         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2960
2961         if (!ixgbe_rss_update_sp(hw->mac.type)) {
2962                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
2963                         "NIC.");
2964                 return -ENOTSUP;
2965         }
2966         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2967
2968         /*
2969          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
2970          *     "RSS enabling cannot be done dynamically while it must be
2971          *      preceded by a software reset"
2972          * Before changing anything, first check that the update RSS operation
2973          * does not attempt to disable RSS, if RSS was enabled at
2974          * initialization time, or does not attempt to enable RSS, if RSS was
2975          * disabled at initialization time.
2976          */
2977         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
2978         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2979         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
2980                 if (rss_hf != 0) /* Enable RSS */
2981                         return -(EINVAL);
2982                 return 0; /* Nothing to do */
2983         }
2984         /* RSS enabled */
2985         if (rss_hf == 0) /* Disable RSS */
2986                 return -(EINVAL);
2987         ixgbe_hw_rss_hash_set(hw, rss_conf);
2988         return 0;
2989 }
2990
2991 int
2992 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2993                             struct rte_eth_rss_conf *rss_conf)
2994 {
2995         struct ixgbe_hw *hw;
2996         uint8_t *hash_key;
2997         uint32_t mrqc;
2998         uint32_t rss_key;
2999         uint64_t rss_hf;
3000         uint16_t i;
3001         uint32_t mrqc_reg;
3002         uint32_t rssrk_reg;
3003
3004         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3005         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3006         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3007         hash_key = rss_conf->rss_key;
3008         if (hash_key != NULL) {
3009                 /* Return RSS hash key */
3010                 for (i = 0; i < 10; i++) {
3011                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3012                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3013                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3014                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3015                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3016                 }
3017         }
3018
3019         /* Get RSS functions configured in MRQC register */
3020         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3021         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3022                 rss_conf->rss_hf = 0;
3023                 return 0;
3024         }
3025         rss_hf = 0;
3026         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3027                 rss_hf |= ETH_RSS_IPV4;
3028         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3029                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3030         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3031                 rss_hf |= ETH_RSS_IPV6;
3032         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3033                 rss_hf |= ETH_RSS_IPV6_EX;
3034         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3035                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3036         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3037                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3038         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3039                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3040         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3041                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3042         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3043                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3044         rss_conf->rss_hf = rss_hf;
3045         return 0;
3046 }
3047
3048 static void
3049 ixgbe_rss_configure(struct rte_eth_dev *dev)
3050 {
3051         struct rte_eth_rss_conf rss_conf;
3052         struct ixgbe_hw *hw;
3053         uint32_t reta;
3054         uint16_t i;
3055         uint16_t j;
3056         uint16_t sp_reta_size;
3057         uint32_t reta_reg;
3058
3059         PMD_INIT_FUNC_TRACE();
3060         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3061
3062         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3063
3064         /*
3065          * Fill in redirection table
3066          * The byte-swap is needed because NIC registers are in
3067          * little-endian order.
3068          */
3069         reta = 0;
3070         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3071                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3072
3073                 if (j == dev->data->nb_rx_queues)
3074                         j = 0;
3075                 reta = (reta << 8) | j;
3076                 if ((i & 3) == 3)
3077                         IXGBE_WRITE_REG(hw, reta_reg,
3078                                         rte_bswap32(reta));
3079         }
3080
3081         /*
3082          * Configure the RSS key and the RSS protocols used to compute
3083          * the RSS hash of input packets.
3084          */
3085         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3086         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3087                 ixgbe_rss_disable(dev);
3088                 return;
3089         }
3090         if (rss_conf.rss_key == NULL)
3091                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3092         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3093 }
3094
3095 #define NUM_VFTA_REGISTERS 128
3096 #define NIC_RX_BUFFER_SIZE 0x200
3097 #define X550_RX_BUFFER_SIZE 0x180
3098
3099 static void
3100 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3101 {
3102         struct rte_eth_vmdq_dcb_conf *cfg;
3103         struct ixgbe_hw *hw;
3104         enum rte_eth_nb_pools num_pools;
3105         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3106         uint16_t pbsize;
3107         uint8_t nb_tcs; /* number of traffic classes */
3108         int i;
3109
3110         PMD_INIT_FUNC_TRACE();
3111         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3112         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3113         num_pools = cfg->nb_queue_pools;
3114         /* Check we have a valid number of pools */
3115         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3116                 ixgbe_rss_disable(dev);
3117                 return;
3118         }
3119         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3120         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3121
3122         /*
3123          * RXPBSIZE
3124          * split rx buffer up into sections, each for 1 traffic class
3125          */
3126         switch (hw->mac.type) {
3127         case ixgbe_mac_X550:
3128         case ixgbe_mac_X550EM_x:
3129         case ixgbe_mac_X550EM_a:
3130                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3131                 break;
3132         default:
3133                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3134                 break;
3135         }
3136         for (i = 0; i < nb_tcs; i++) {
3137                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3138
3139                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3140                 /* clear 10 bits. */
3141                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3142                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3143         }
3144         /* zero alloc all unused TCs */
3145         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3146                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3147
3148                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3149                 /* clear 10 bits. */
3150                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3151         }
3152
3153         /* MRQC: enable vmdq and dcb */
3154         mrqc = (num_pools == ETH_16_POOLS) ?
3155                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3156         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3157
3158         /* PFVTCTL: turn on virtualisation and set the default pool */
3159         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3160         if (cfg->enable_default_pool) {
3161                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3162         } else {
3163                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3164         }
3165
3166         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3167
3168         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3169         queue_mapping = 0;
3170         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3171                 /*
3172                  * mapping is done with 3 bits per priority,
3173                  * so shift by i*3 each time
3174                  */
3175                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3176
3177         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3178
3179         /* RTRPCS: DCB related */
3180         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3181
3182         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3183         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3184         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3185         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3186
3187         /* VFTA - enable all vlan filters */
3188         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3189                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3190         }
3191
3192         /* VFRE: pool enabling for receive - 16 or 32 */
3193         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3194                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3195
3196         /*
3197          * MPSAR - allow pools to read specific mac addresses
3198          * In this case, all pools should be able to read from mac addr 0
3199          */
3200         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3201         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3202
3203         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3204         for (i = 0; i < cfg->nb_pool_maps; i++) {
3205                 /* set vlan id in VF register and set the valid bit */
3206                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3207                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3208                 /*
3209                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3210                  * pools, we only need to use the first half of the register
3211                  * i.e. bits 0-31
3212                  */
3213                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3214         }
3215 }
3216
3217 /**
3218  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3219  * @hw: pointer to hardware structure
3220  * @dcb_config: pointer to ixgbe_dcb_config structure
3221  */
3222 static void
3223 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
3224                        struct ixgbe_dcb_config *dcb_config)
3225 {
3226         uint32_t reg;
3227         uint32_t q;
3228
3229         PMD_INIT_FUNC_TRACE();
3230         if (hw->mac.type != ixgbe_mac_82598EB) {
3231                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3232                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3233                 reg |= IXGBE_RTTDCS_ARBDIS;
3234                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3235
3236                 /* Enable DCB for Tx with 8 TCs */
3237                 if (dcb_config->num_tcs.pg_tcs == 8) {
3238                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3239                 } else {
3240                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3241                 }
3242                 if (dcb_config->vt_mode)
3243                         reg |= IXGBE_MTQC_VT_ENA;
3244                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3245
3246                 /* Disable drop for all queues */
3247                 for (q = 0; q < 128; q++)
3248                         IXGBE_WRITE_REG(hw, IXGBE_QDE,
3249                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3250
3251                 /* Enable the Tx desc arbiter */
3252                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3253                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3254                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3255
3256                 /* Enable Security TX Buffer IFG for DCB */
3257                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3258                 reg |= IXGBE_SECTX_DCB;
3259                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3260         }
3261 }
3262
3263 /**
3264  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3265  * @dev: pointer to rte_eth_dev structure
3266  * @dcb_config: pointer to ixgbe_dcb_config structure
3267  */
3268 static void
3269 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3270                         struct ixgbe_dcb_config *dcb_config)
3271 {
3272         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3273                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3274         struct ixgbe_hw *hw =
3275                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3276
3277         PMD_INIT_FUNC_TRACE();
3278         if (hw->mac.type != ixgbe_mac_82598EB)
3279                 /*PF VF Transmit Enable*/
3280                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3281                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3282
3283         /*Configure general DCB TX parameters*/
3284         ixgbe_dcb_tx_hw_config(hw, dcb_config);
3285 }
3286
3287 static void
3288 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3289                         struct ixgbe_dcb_config *dcb_config)
3290 {
3291         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3292                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3293         struct ixgbe_dcb_tc_config *tc;
3294         uint8_t i, j;
3295
3296         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3297         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3298                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3299                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3300         } else {
3301                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3302                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3303         }
3304         /* User Priority to Traffic Class mapping */
3305         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3306                 j = vmdq_rx_conf->dcb_tc[i];
3307                 tc = &dcb_config->tc_config[j];
3308                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3309                                                 (uint8_t)(1 << j);
3310         }
3311 }
3312
3313 static void
3314 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3315                         struct ixgbe_dcb_config *dcb_config)
3316 {
3317         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3318                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3319         struct ixgbe_dcb_tc_config *tc;
3320         uint8_t i, j;
3321
3322         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3323         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3324                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3325                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3326         } else {
3327                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3328                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3329         }
3330
3331         /* User Priority to Traffic Class mapping */
3332         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3333                 j = vmdq_tx_conf->dcb_tc[i];
3334                 tc = &dcb_config->tc_config[j];
3335                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3336                                                 (uint8_t)(1 << j);
3337         }
3338 }
3339
3340 static void
3341 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3342                 struct ixgbe_dcb_config *dcb_config)
3343 {
3344         struct rte_eth_dcb_rx_conf *rx_conf =
3345                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3346         struct ixgbe_dcb_tc_config *tc;
3347         uint8_t i, j;
3348
3349         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3350         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3351
3352         /* User Priority to Traffic Class mapping */
3353         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3354                 j = rx_conf->dcb_tc[i];
3355                 tc = &dcb_config->tc_config[j];
3356                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3357                                                 (uint8_t)(1 << j);
3358         }
3359 }
3360
3361 static void
3362 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3363                 struct ixgbe_dcb_config *dcb_config)
3364 {
3365         struct rte_eth_dcb_tx_conf *tx_conf =
3366                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3367         struct ixgbe_dcb_tc_config *tc;
3368         uint8_t i, j;
3369
3370         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3371         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3372
3373         /* User Priority to Traffic Class mapping */
3374         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3375                 j = tx_conf->dcb_tc[i];
3376                 tc = &dcb_config->tc_config[j];
3377                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3378                                                 (uint8_t)(1 << j);
3379         }
3380 }
3381
3382 /**
3383  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3384  * @hw: pointer to hardware structure
3385  * @dcb_config: pointer to ixgbe_dcb_config structure
3386  */
3387 static void
3388 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3389                struct ixgbe_dcb_config *dcb_config)
3390 {
3391         uint32_t reg;
3392         uint32_t vlanctrl;
3393         uint8_t i;
3394
3395         PMD_INIT_FUNC_TRACE();
3396         /*
3397          * Disable the arbiter before changing parameters
3398          * (always enable recycle mode; WSP)
3399          */
3400         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3401         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3402
3403         if (hw->mac.type != ixgbe_mac_82598EB) {
3404                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3405                 if (dcb_config->num_tcs.pg_tcs == 4) {
3406                         if (dcb_config->vt_mode)
3407                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3408                                         IXGBE_MRQC_VMDQRT4TCEN;
3409                         else {
3410                                 /* no matter the mode is DCB or DCB_RSS, just
3411                                  * set the MRQE to RSSXTCEN. RSS is controlled
3412                                  * by RSS_FIELD
3413                                  */
3414                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3415                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3416                                         IXGBE_MRQC_RTRSS4TCEN;
3417                         }
3418                 }
3419                 if (dcb_config->num_tcs.pg_tcs == 8) {
3420                         if (dcb_config->vt_mode)
3421                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3422                                         IXGBE_MRQC_VMDQRT8TCEN;
3423                         else {
3424                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3425                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3426                                         IXGBE_MRQC_RTRSS8TCEN;
3427                         }
3428                 }
3429
3430                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3431         }
3432
3433         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3434         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3435         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3436         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3437
3438         /* VFTA - enable all vlan filters */
3439         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3440                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3441         }
3442
3443         /*
3444          * Configure Rx packet plane (recycle mode; WSP) and
3445          * enable arbiter
3446          */
3447         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3448         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3449 }
3450
3451 static void
3452 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3453                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3454 {
3455         switch (hw->mac.type) {
3456         case ixgbe_mac_82598EB:
3457                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3458                 break;
3459         case ixgbe_mac_82599EB:
3460         case ixgbe_mac_X540:
3461         case ixgbe_mac_X550:
3462         case ixgbe_mac_X550EM_x:
3463         case ixgbe_mac_X550EM_a:
3464                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3465                                                   tsa, map);
3466                 break;
3467         default:
3468                 break;
3469         }
3470 }
3471
3472 static void
3473 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3474                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3475 {
3476         switch (hw->mac.type) {
3477         case ixgbe_mac_82598EB:
3478                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3479                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3480                 break;
3481         case ixgbe_mac_82599EB:
3482         case ixgbe_mac_X540:
3483         case ixgbe_mac_X550:
3484         case ixgbe_mac_X550EM_x:
3485         case ixgbe_mac_X550EM_a:
3486                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3487                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3488                 break;
3489         default:
3490                 break;
3491         }
3492 }
3493
3494 #define DCB_RX_CONFIG  1
3495 #define DCB_TX_CONFIG  1
3496 #define DCB_TX_PB      1024
3497 /**
3498  * ixgbe_dcb_hw_configure - Enable DCB and configure
3499  * general DCB in VT mode and non-VT mode parameters
3500  * @dev: pointer to rte_eth_dev structure
3501  * @dcb_config: pointer to ixgbe_dcb_config structure
3502  */
3503 static int
3504 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3505                         struct ixgbe_dcb_config *dcb_config)
3506 {
3507         int     ret = 0;
3508         uint8_t i, pfc_en, nb_tcs;
3509         uint16_t pbsize, rx_buffer_size;
3510         uint8_t config_dcb_rx = 0;
3511         uint8_t config_dcb_tx = 0;
3512         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3513         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3514         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3515         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3516         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3517         struct ixgbe_dcb_tc_config *tc;
3518         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3519         struct ixgbe_hw *hw =
3520                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3521
3522         switch (dev->data->dev_conf.rxmode.mq_mode) {
3523         case ETH_MQ_RX_VMDQ_DCB:
3524                 dcb_config->vt_mode = true;
3525                 if (hw->mac.type != ixgbe_mac_82598EB) {
3526                         config_dcb_rx = DCB_RX_CONFIG;
3527                         /*
3528                          *get dcb and VT rx configuration parameters
3529                          *from rte_eth_conf
3530                          */
3531                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3532                         /*Configure general VMDQ and DCB RX parameters*/
3533                         ixgbe_vmdq_dcb_configure(dev);
3534                 }
3535                 break;
3536         case ETH_MQ_RX_DCB:
3537         case ETH_MQ_RX_DCB_RSS:
3538                 dcb_config->vt_mode = false;
3539                 config_dcb_rx = DCB_RX_CONFIG;
3540                 /* Get dcb TX configuration parameters from rte_eth_conf */
3541                 ixgbe_dcb_rx_config(dev, dcb_config);
3542                 /*Configure general DCB RX parameters*/
3543                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3544                 break;
3545         default:
3546                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3547                 break;
3548         }
3549         switch (dev->data->dev_conf.txmode.mq_mode) {
3550         case ETH_MQ_TX_VMDQ_DCB:
3551                 dcb_config->vt_mode = true;
3552                 config_dcb_tx = DCB_TX_CONFIG;
3553                 /* get DCB and VT TX configuration parameters
3554                  * from rte_eth_conf
3555                  */
3556                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3557                 /*Configure general VMDQ and DCB TX parameters*/
3558                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3559                 break;
3560
3561         case ETH_MQ_TX_DCB:
3562                 dcb_config->vt_mode = false;
3563                 config_dcb_tx = DCB_TX_CONFIG;
3564                 /*get DCB TX configuration parameters from rte_eth_conf*/
3565                 ixgbe_dcb_tx_config(dev, dcb_config);
3566                 /*Configure general DCB TX parameters*/
3567                 ixgbe_dcb_tx_hw_config(hw, dcb_config);
3568                 break;
3569         default:
3570                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3571                 break;
3572         }
3573
3574         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3575         /* Unpack map */
3576         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3577         if (nb_tcs == ETH_4_TCS) {
3578                 /* Avoid un-configured priority mapping to TC0 */
3579                 uint8_t j = 4;
3580                 uint8_t mask = 0xFF;
3581
3582                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3583                         mask = (uint8_t)(mask & (~(1 << map[i])));
3584                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3585                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3586                                 map[j++] = i;
3587                         mask >>= 1;
3588                 }
3589                 /* Re-configure 4 TCs BW */
3590                 for (i = 0; i < nb_tcs; i++) {
3591                         tc = &dcb_config->tc_config[i];
3592                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3593                                                 (uint8_t)(100 / nb_tcs);
3594                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3595                                                 (uint8_t)(100 / nb_tcs);
3596                 }
3597                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3598                         tc = &dcb_config->tc_config[i];
3599                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3600                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3601                 }
3602         }
3603
3604         switch (hw->mac.type) {
3605         case ixgbe_mac_X550:
3606         case ixgbe_mac_X550EM_x:
3607         case ixgbe_mac_X550EM_a:
3608                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3609                 break;
3610         default:
3611                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3612                 break;
3613         }
3614
3615         if (config_dcb_rx) {
3616                 /* Set RX buffer size */
3617                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3618                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3619
3620                 for (i = 0; i < nb_tcs; i++) {
3621                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3622                 }
3623                 /* zero alloc all unused TCs */
3624                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3625                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3626                 }
3627         }
3628         if (config_dcb_tx) {
3629                 /* Only support an equally distributed
3630                  *  Tx packet buffer strategy.
3631                  */
3632                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3633                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3634
3635                 for (i = 0; i < nb_tcs; i++) {
3636                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3637                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3638                 }
3639                 /* Clear unused TCs, if any, to zero buffer size*/
3640                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3641                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3642                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3643                 }
3644         }
3645
3646         /*Calculates traffic class credits*/
3647         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3648                                 IXGBE_DCB_TX_CONFIG);
3649         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3650                                 IXGBE_DCB_RX_CONFIG);
3651
3652         if (config_dcb_rx) {
3653                 /* Unpack CEE standard containers */
3654                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3655                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3656                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3657                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3658                 /* Configure PG(ETS) RX */
3659                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3660         }
3661
3662         if (config_dcb_tx) {
3663                 /* Unpack CEE standard containers */
3664                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3665                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3666                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3667                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3668                 /* Configure PG(ETS) TX */
3669                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3670         }
3671
3672         /*Configure queue statistics registers*/
3673         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3674
3675         /* Check if the PFC is supported */
3676         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3677                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3678                 for (i = 0; i < nb_tcs; i++) {
3679                         /*
3680                         * If the TC count is 8,and the default high_water is 48,
3681                         * the low_water is 16 as default.
3682                         */
3683                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3684                         hw->fc.low_water[i] = pbsize / 4;
3685                         /* Enable pfc for this TC */
3686                         tc = &dcb_config->tc_config[i];
3687                         tc->pfc = ixgbe_dcb_pfc_enabled;
3688                 }
3689                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3690                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3691                         pfc_en &= 0x0F;
3692                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3693         }
3694
3695         return ret;
3696 }
3697
3698 /**
3699  * ixgbe_configure_dcb - Configure DCB  Hardware
3700  * @dev: pointer to rte_eth_dev
3701  */
3702 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3703 {
3704         struct ixgbe_dcb_config *dcb_cfg =
3705                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3706         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3707
3708         PMD_INIT_FUNC_TRACE();
3709
3710         /* check support mq_mode for DCB */
3711         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3712             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3713             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3714                 return;
3715
3716         if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES)
3717                 return;
3718
3719         /** Configure DCB hardware **/
3720         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3721 }
3722
3723 /*
3724  * VMDq only support for 10 GbE NIC.
3725  */
3726 static void
3727 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3728 {
3729         struct rte_eth_vmdq_rx_conf *cfg;
3730         struct ixgbe_hw *hw;
3731         enum rte_eth_nb_pools num_pools;
3732         uint32_t mrqc, vt_ctl, vlanctrl;
3733         uint32_t vmolr = 0;
3734         int i;
3735
3736         PMD_INIT_FUNC_TRACE();
3737         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3738         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3739         num_pools = cfg->nb_queue_pools;
3740
3741         ixgbe_rss_disable(dev);
3742
3743         /* MRQC: enable vmdq */
3744         mrqc = IXGBE_MRQC_VMDQEN;
3745         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3746
3747         /* PFVTCTL: turn on virtualisation and set the default pool */
3748         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3749         if (cfg->enable_default_pool)
3750                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3751         else
3752                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3753
3754         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3755
3756         for (i = 0; i < (int)num_pools; i++) {
3757                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3758                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3759         }
3760
3761         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3762         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3763         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3764         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3765
3766         /* VFTA - enable all vlan filters */
3767         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3768                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3769
3770         /* VFRE: pool enabling for receive - 64 */
3771         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3772         if (num_pools == ETH_64_POOLS)
3773                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3774
3775         /*
3776          * MPSAR - allow pools to read specific mac addresses
3777          * In this case, all pools should be able to read from mac addr 0
3778          */
3779         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3780         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3781
3782         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3783         for (i = 0; i < cfg->nb_pool_maps; i++) {
3784                 /* set vlan id in VF register and set the valid bit */
3785                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3786                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3787                 /*
3788                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3789                  * pools, we only need to use the first half of the register
3790                  * i.e. bits 0-31
3791                  */
3792                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3793                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
3794                                         (cfg->pool_map[i].pools & UINT32_MAX));
3795                 else
3796                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
3797                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3798
3799         }
3800
3801         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3802         if (cfg->enable_loop_back) {
3803                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3804                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3805                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3806         }
3807
3808         IXGBE_WRITE_FLUSH(hw);
3809 }
3810
3811 /*
3812  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3813  * @hw: pointer to hardware structure
3814  */
3815 static void
3816 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3817 {
3818         uint32_t reg;
3819         uint32_t q;
3820
3821         PMD_INIT_FUNC_TRACE();
3822         /*PF VF Transmit Enable*/
3823         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3824         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3825
3826         /* Disable the Tx desc arbiter so that MTQC can be changed */
3827         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3828         reg |= IXGBE_RTTDCS_ARBDIS;
3829         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3830
3831         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3832         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3833
3834         /* Disable drop for all queues */
3835         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3836                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3837                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3838
3839         /* Enable the Tx desc arbiter */
3840         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3841         reg &= ~IXGBE_RTTDCS_ARBDIS;
3842         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3843
3844         IXGBE_WRITE_FLUSH(hw);
3845 }
3846
3847 static int __attribute__((cold))
3848 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3849 {
3850         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
3851         uint64_t dma_addr;
3852         unsigned int i;
3853
3854         /* Initialize software ring entries */
3855         for (i = 0; i < rxq->nb_rx_desc; i++) {
3856                 volatile union ixgbe_adv_rx_desc *rxd;
3857                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
3858
3859                 if (mbuf == NULL) {
3860                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3861                                      (unsigned) rxq->queue_id);
3862                         return -ENOMEM;
3863                 }
3864
3865                 rte_mbuf_refcnt_set(mbuf, 1);
3866                 mbuf->next = NULL;
3867                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3868                 mbuf->nb_segs = 1;
3869                 mbuf->port = rxq->port_id;
3870
3871                 dma_addr =
3872                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
3873                 rxd = &rxq->rx_ring[i];
3874                 rxd->read.hdr_addr = 0;
3875                 rxd->read.pkt_addr = dma_addr;
3876                 rxe[i].mbuf = mbuf;
3877         }
3878
3879         return 0;
3880 }
3881
3882 static int
3883 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
3884 {
3885         struct ixgbe_hw *hw;
3886         uint32_t mrqc;
3887
3888         ixgbe_rss_configure(dev);
3889
3890         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3891
3892         /* MRQC: enable VF RSS */
3893         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
3894         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
3895         switch (RTE_ETH_DEV_SRIOV(dev).active) {
3896         case ETH_64_POOLS:
3897                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
3898                 break;
3899
3900         case ETH_32_POOLS:
3901                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
3902                 break;
3903
3904         default:
3905                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
3906                 return -EINVAL;
3907         }
3908
3909         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3910
3911         return 0;
3912 }
3913
3914 static int
3915 ixgbe_config_vf_default(struct rte_eth_dev *dev)
3916 {
3917         struct ixgbe_hw *hw =
3918                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3919
3920         switch (RTE_ETH_DEV_SRIOV(dev).active) {
3921         case ETH_64_POOLS:
3922                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3923                         IXGBE_MRQC_VMDQEN);
3924                 break;
3925
3926         case ETH_32_POOLS:
3927                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3928                         IXGBE_MRQC_VMDQRT4TCEN);
3929                 break;
3930
3931         case ETH_16_POOLS:
3932                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3933                         IXGBE_MRQC_VMDQRT8TCEN);
3934                 break;
3935         default:
3936                 PMD_INIT_LOG(ERR,
3937                         "invalid pool number in IOV mode");
3938                 break;
3939         }
3940         return 0;
3941 }
3942
3943 static int
3944 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
3945 {
3946         struct ixgbe_hw *hw =
3947                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3948
3949         if (hw->mac.type == ixgbe_mac_82598EB)
3950                 return 0;
3951
3952         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3953                 /*
3954                  * SRIOV inactive scheme
3955                  * any DCB/RSS w/o VMDq multi-queue setting
3956                  */
3957                 switch (dev->data->dev_conf.rxmode.mq_mode) {
3958                 case ETH_MQ_RX_RSS:
3959                 case ETH_MQ_RX_DCB_RSS:
3960                 case ETH_MQ_RX_VMDQ_RSS:
3961                         ixgbe_rss_configure(dev);
3962                         break;
3963
3964                 case ETH_MQ_RX_VMDQ_DCB:
3965                         ixgbe_vmdq_dcb_configure(dev);
3966                         break;
3967
3968                 case ETH_MQ_RX_VMDQ_ONLY:
3969                         ixgbe_vmdq_rx_hw_configure(dev);
3970                         break;
3971
3972                 case ETH_MQ_RX_NONE:
3973                 default:
3974                         /* if mq_mode is none, disable rss mode.*/
3975                         ixgbe_rss_disable(dev);
3976                         break;
3977                 }
3978         } else {
3979                 /*
3980                  * SRIOV active scheme
3981                  * Support RSS together with VMDq & SRIOV
3982                  */
3983                 switch (dev->data->dev_conf.rxmode.mq_mode) {
3984                 case ETH_MQ_RX_RSS:
3985                 case ETH_MQ_RX_VMDQ_RSS:
3986                         ixgbe_config_vf_rss(dev);
3987                         break;
3988
3989                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
3990                 case ETH_MQ_RX_VMDQ_DCB:
3991                 case ETH_MQ_RX_VMDQ_DCB_RSS:
3992                         PMD_INIT_LOG(ERR,
3993                                 "Could not support DCB with VMDq & SRIOV");
3994                         return -1;
3995                 default:
3996                         ixgbe_config_vf_default(dev);
3997                         break;
3998                 }
3999         }
4000
4001         return 0;
4002 }
4003
4004 static int
4005 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4006 {
4007         struct ixgbe_hw *hw =
4008                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4009         uint32_t mtqc;
4010         uint32_t rttdcs;
4011
4012         if (hw->mac.type == ixgbe_mac_82598EB)
4013                 return 0;
4014
4015         /* disable arbiter before setting MTQC */
4016         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4017         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4018         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4019
4020         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4021                 /*
4022                  * SRIOV inactive scheme
4023                  * any DCB w/o VMDq multi-queue setting
4024                  */
4025                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4026                         ixgbe_vmdq_tx_hw_configure(hw);
4027                 else {
4028                         mtqc = IXGBE_MTQC_64Q_1PB;
4029                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4030                 }
4031         } else {
4032                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4033
4034                 /*
4035                  * SRIOV active scheme
4036                  * FIXME if support DCB together with VMDq & SRIOV
4037                  */
4038                 case ETH_64_POOLS:
4039                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4040                         break;
4041                 case ETH_32_POOLS:
4042                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4043                         break;
4044                 case ETH_16_POOLS:
4045                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4046                                 IXGBE_MTQC_8TC_8TQ;
4047                         break;
4048                 default:
4049                         mtqc = IXGBE_MTQC_64Q_1PB;
4050                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4051                 }
4052                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4053         }
4054
4055         /* re-enable arbiter */
4056         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4057         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4058
4059         return 0;
4060 }
4061
4062 /**
4063  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4064  *
4065  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4066  * spec rev. 3.0 chapter 8.2.3.8.13.
4067  *
4068  * @pool Memory pool of the Rx queue
4069  */
4070 static inline uint32_t
4071 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4072 {
4073         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4074
4075         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4076         uint16_t maxdesc =
4077                 IPV4_MAX_PKT_LEN /
4078                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4079
4080         if (maxdesc >= 16)
4081                 return IXGBE_RSCCTL_MAXDESC_16;
4082         else if (maxdesc >= 8)
4083                 return IXGBE_RSCCTL_MAXDESC_8;
4084         else if (maxdesc >= 4)
4085                 return IXGBE_RSCCTL_MAXDESC_4;
4086         else
4087                 return IXGBE_RSCCTL_MAXDESC_1;
4088 }
4089
4090 /**
4091  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4092  * interrupt
4093  *
4094  * (Taken from FreeBSD tree)
4095  * (yes this is all very magic and confusing :)
4096  *
4097  * @dev port handle
4098  * @entry the register array entry
4099  * @vector the MSIX vector for this queue
4100  * @type RX/TX/MISC
4101  */
4102 static void
4103 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4104 {
4105         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4106         u32 ivar, index;
4107
4108         vector |= IXGBE_IVAR_ALLOC_VAL;
4109
4110         switch (hw->mac.type) {
4111
4112         case ixgbe_mac_82598EB:
4113                 if (type == -1)
4114                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4115                 else
4116                         entry += (type * 64);
4117                 index = (entry >> 2) & 0x1F;
4118                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4119                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4120                 ivar |= (vector << (8 * (entry & 0x3)));
4121                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4122                 break;
4123
4124         case ixgbe_mac_82599EB:
4125         case ixgbe_mac_X540:
4126                 if (type == -1) { /* MISC IVAR */
4127                         index = (entry & 1) * 8;
4128                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4129                         ivar &= ~(0xFF << index);
4130                         ivar |= (vector << index);
4131                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4132                 } else {        /* RX/TX IVARS */
4133                         index = (16 * (entry & 1)) + (8 * type);
4134                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4135                         ivar &= ~(0xFF << index);
4136                         ivar |= (vector << index);
4137                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4138                 }
4139
4140                 break;
4141
4142         default:
4143                 break;
4144         }
4145 }
4146
4147 void __attribute__((cold))
4148 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4149 {
4150         uint16_t i, rx_using_sse;
4151         struct ixgbe_adapter *adapter =
4152                 (struct ixgbe_adapter *)dev->data->dev_private;
4153
4154         /*
4155          * In order to allow Vector Rx there are a few configuration
4156          * conditions to be met and Rx Bulk Allocation should be allowed.
4157          */
4158         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4159             !adapter->rx_bulk_alloc_allowed) {
4160                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4161                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4162                                     "not enabled",
4163                              dev->data->port_id);
4164
4165                 adapter->rx_vec_allowed = false;
4166         }
4167
4168         /*
4169          * Initialize the appropriate LRO callback.
4170          *
4171          * If all queues satisfy the bulk allocation preconditions
4172          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4173          * Otherwise use a single allocation version.
4174          */
4175         if (dev->data->lro) {
4176                 if (adapter->rx_bulk_alloc_allowed) {
4177                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4178                                            "allocation version");
4179                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4180                 } else {
4181                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4182                                            "allocation version");
4183                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4184                 }
4185         } else if (dev->data->scattered_rx) {
4186                 /*
4187                  * Set the non-LRO scattered callback: there are Vector and
4188                  * single allocation versions.
4189                  */
4190                 if (adapter->rx_vec_allowed) {
4191                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4192                                             "callback (port=%d).",
4193                                      dev->data->port_id);
4194
4195                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4196                 } else if (adapter->rx_bulk_alloc_allowed) {
4197                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4198                                            "allocation callback (port=%d).",
4199                                      dev->data->port_id);
4200                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4201                 } else {
4202                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4203                                             "single allocation) "
4204                                             "Scattered Rx callback "
4205                                             "(port=%d).",
4206                                      dev->data->port_id);
4207
4208                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4209                 }
4210         /*
4211          * Below we set "simple" callbacks according to port/queues parameters.
4212          * If parameters allow we are going to choose between the following
4213          * callbacks:
4214          *    - Vector
4215          *    - Bulk Allocation
4216          *    - Single buffer allocation (the simplest one)
4217          */
4218         } else if (adapter->rx_vec_allowed) {
4219                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4220                                     "burst size no less than %d (port=%d).",
4221                              RTE_IXGBE_DESCS_PER_LOOP,
4222                              dev->data->port_id);
4223
4224                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4225         } else if (adapter->rx_bulk_alloc_allowed) {
4226                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4227                                     "satisfied. Rx Burst Bulk Alloc function "
4228                                     "will be used on port=%d.",
4229                              dev->data->port_id);
4230
4231                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4232         } else {
4233                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4234                                     "satisfied, or Scattered Rx is requested "
4235                                     "(port=%d).",
4236                              dev->data->port_id);
4237
4238                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4239         }
4240
4241         /* Propagate information about RX function choice through all queues. */
4242
4243         rx_using_sse =
4244                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4245                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4246
4247         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4248                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4249
4250                 rxq->rx_using_sse = rx_using_sse;
4251         }
4252 }
4253
4254 /**
4255  * ixgbe_set_rsc - configure RSC related port HW registers
4256  *
4257  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4258  * of 82599 Spec (x540 configuration is virtually the same).
4259  *
4260  * @dev port handle
4261  *
4262  * Returns 0 in case of success or a non-zero error code
4263  */
4264 static int
4265 ixgbe_set_rsc(struct rte_eth_dev *dev)
4266 {
4267         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4268         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4269         struct rte_eth_dev_info dev_info = { 0 };
4270         bool rsc_capable = false;
4271         uint16_t i;
4272         uint32_t rdrxctl;
4273
4274         /* Sanity check */
4275         dev->dev_ops->dev_infos_get(dev, &dev_info);
4276         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4277                 rsc_capable = true;
4278
4279         if (!rsc_capable && rx_conf->enable_lro) {
4280                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4281                                    "support it");
4282                 return -EINVAL;
4283         }
4284
4285         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4286
4287         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4288                 /*
4289                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4290                  * 3.0 RSC configuration requires HW CRC stripping being
4291                  * enabled. If user requested both HW CRC stripping off
4292                  * and RSC on - return an error.
4293                  */
4294                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4295                                     "is disabled");
4296                 return -EINVAL;
4297         }
4298
4299         /* RFCTL configuration  */
4300         if (rsc_capable) {
4301                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4302
4303                 if (rx_conf->enable_lro)
4304                         /*
4305                          * Since NFS packets coalescing is not supported - clear
4306                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4307                          * enabled.
4308                          */
4309                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4310                                    IXGBE_RFCTL_NFSR_DIS);
4311                 else
4312                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4313
4314                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4315         }
4316
4317         /* If LRO hasn't been requested - we are done here. */
4318         if (!rx_conf->enable_lro)
4319                 return 0;
4320
4321         /* Set RDRXCTL.RSCACKC bit */
4322         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4323         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4324         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4325
4326         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4327         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4328                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4329                 uint32_t srrctl =
4330                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4331                 uint32_t rscctl =
4332                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4333                 uint32_t psrtype =
4334                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4335                 uint32_t eitr =
4336                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4337
4338                 /*
4339                  * ixgbe PMD doesn't support header-split at the moment.
4340                  *
4341                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4342                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4343                  * should be configured even if header split is not
4344                  * enabled. We will configure it 128 bytes following the
4345                  * recommendation in the spec.
4346                  */
4347                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4348                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4349                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4350
4351                 /*
4352                  * TODO: Consider setting the Receive Descriptor Minimum
4353                  * Threshold Size for an RSC case. This is not an obviously
4354                  * beneficiary option but the one worth considering...
4355                  */
4356
4357                 rscctl |= IXGBE_RSCCTL_RSCEN;
4358                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4359                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4360
4361                 /*
4362                  * RSC: Set ITR interval corresponding to 2K ints/s.
4363                  *
4364                  * Full-sized RSC aggregations for a 10Gb/s link will
4365                  * arrive at about 20K aggregation/s rate.
4366                  *
4367                  * 2K inst/s rate will make only 10% of the
4368                  * aggregations to be closed due to the interrupt timer
4369                  * expiration for a streaming at wire-speed case.
4370                  *
4371                  * For a sparse streaming case this setting will yield
4372                  * at most 500us latency for a single RSC aggregation.
4373                  */
4374                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4375                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4376
4377                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4378                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4379                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4380                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4381
4382                 /*
4383                  * RSC requires the mapping of the queue to the
4384                  * interrupt vector.
4385                  */
4386                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4387         }
4388
4389         dev->data->lro = 1;
4390
4391         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4392
4393         return 0;
4394 }
4395
4396 /*
4397  * Initializes Receive Unit.
4398  */
4399 int __attribute__((cold))
4400 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4401 {
4402         struct ixgbe_hw     *hw;
4403         struct ixgbe_rx_queue *rxq;
4404         uint64_t bus_addr;
4405         uint32_t rxctrl;
4406         uint32_t fctrl;
4407         uint32_t hlreg0;
4408         uint32_t maxfrs;
4409         uint32_t srrctl;
4410         uint32_t rdrxctl;
4411         uint32_t rxcsum;
4412         uint16_t buf_size;
4413         uint16_t i;
4414         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4415         int rc;
4416
4417         PMD_INIT_FUNC_TRACE();
4418         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4419
4420         /*
4421          * Make sure receives are disabled while setting
4422          * up the RX context (registers, descriptor rings, etc.).
4423          */
4424         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4425         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4426
4427         /* Enable receipt of broadcasted frames */
4428         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4429         fctrl |= IXGBE_FCTRL_BAM;
4430         fctrl |= IXGBE_FCTRL_DPF;
4431         fctrl |= IXGBE_FCTRL_PMCF;
4432         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4433
4434         /*
4435          * Configure CRC stripping, if any.
4436          */
4437         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4438         if (rx_conf->hw_strip_crc)
4439                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4440         else
4441                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4442
4443         /*
4444          * Configure jumbo frame support, if any.
4445          */
4446         if (rx_conf->jumbo_frame == 1) {
4447                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4448                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4449                 maxfrs &= 0x0000FFFF;
4450                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4451                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4452         } else
4453                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4454
4455         /*
4456          * If loopback mode is configured for 82599, set LPBK bit.
4457          */
4458         if (hw->mac.type == ixgbe_mac_82599EB &&
4459                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4460                 hlreg0 |= IXGBE_HLREG0_LPBK;
4461         else
4462                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4463
4464         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4465
4466         /* Setup RX queues */
4467         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4468                 rxq = dev->data->rx_queues[i];
4469
4470                 /*
4471                  * Reset crc_len in case it was changed after queue setup by a
4472                  * call to configure.
4473                  */
4474                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4475
4476                 /* Setup the Base and Length of the Rx Descriptor Rings */
4477                 bus_addr = rxq->rx_ring_phys_addr;
4478                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4479                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4480                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4481                                 (uint32_t)(bus_addr >> 32));
4482                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4483                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4484                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4485                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4486
4487                 /* Configure the SRRCTL register */
4488 #ifdef RTE_HEADER_SPLIT_ENABLE
4489                 /*
4490                  * Configure Header Split
4491                  */
4492                 if (rx_conf->header_split) {
4493                         if (hw->mac.type == ixgbe_mac_82599EB) {
4494                                 /* Must setup the PSRTYPE register */
4495                                 uint32_t psrtype;
4496
4497                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4498                                         IXGBE_PSRTYPE_UDPHDR   |
4499                                         IXGBE_PSRTYPE_IPV4HDR  |
4500                                         IXGBE_PSRTYPE_IPV6HDR;
4501                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4502                         }
4503                         srrctl = ((rx_conf->split_hdr_size <<
4504                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4505                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4506                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4507                 } else
4508 #endif
4509                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4510
4511                 /* Set if packets are dropped when no descriptors available */
4512                 if (rxq->drop_en)
4513                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4514
4515                 /*
4516                  * Configure the RX buffer size in the BSIZEPACKET field of
4517                  * the SRRCTL register of the queue.
4518                  * The value is in 1 KB resolution. Valid values can be from
4519                  * 1 KB to 16 KB.
4520                  */
4521                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4522                         RTE_PKTMBUF_HEADROOM);
4523                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4524                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4525
4526                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4527
4528                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4529                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4530
4531                 /* It adds dual VLAN length for supporting dual VLAN */
4532                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4533                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4534                         dev->data->scattered_rx = 1;
4535         }
4536
4537         if (rx_conf->enable_scatter)
4538                 dev->data->scattered_rx = 1;
4539
4540         /*
4541          * Device configured with multiple RX queues.
4542          */
4543         ixgbe_dev_mq_rx_configure(dev);
4544
4545         /*
4546          * Setup the Checksum Register.
4547          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4548          * Enable IP/L4 checkum computation by hardware if requested to do so.
4549          */
4550         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4551         rxcsum |= IXGBE_RXCSUM_PCSD;
4552         if (rx_conf->hw_ip_checksum)
4553                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4554         else
4555                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4556
4557         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4558
4559         if (hw->mac.type == ixgbe_mac_82599EB ||
4560             hw->mac.type == ixgbe_mac_X540) {
4561                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4562                 if (rx_conf->hw_strip_crc)
4563                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4564                 else
4565                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4566                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4567                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4568         }
4569
4570         rc = ixgbe_set_rsc(dev);
4571         if (rc)
4572                 return rc;
4573
4574         ixgbe_set_rx_function(dev);
4575
4576         return 0;
4577 }
4578
4579 /*
4580  * Initializes Transmit Unit.
4581  */
4582 void __attribute__((cold))
4583 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4584 {
4585         struct ixgbe_hw     *hw;
4586         struct ixgbe_tx_queue *txq;
4587         uint64_t bus_addr;
4588         uint32_t hlreg0;
4589         uint32_t txctrl;
4590         uint16_t i;
4591
4592         PMD_INIT_FUNC_TRACE();
4593         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4594
4595         /* Enable TX CRC (checksum offload requirement) and hw padding
4596          * (TSO requirement)
4597          */
4598         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4599         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4600         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4601
4602         /* Setup the Base and Length of the Tx Descriptor Rings */
4603         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4604                 txq = dev->data->tx_queues[i];
4605
4606                 bus_addr = txq->tx_ring_phys_addr;
4607                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4608                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4609                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4610                                 (uint32_t)(bus_addr >> 32));
4611                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4612                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4613                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4614                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4615                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4616
4617                 /*
4618                  * Disable Tx Head Writeback RO bit, since this hoses
4619                  * bookkeeping if things aren't delivered in order.
4620                  */
4621                 switch (hw->mac.type) {
4622                 case ixgbe_mac_82598EB:
4623                         txctrl = IXGBE_READ_REG(hw,
4624                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4625                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4626                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4627                                         txctrl);
4628                         break;
4629
4630                 case ixgbe_mac_82599EB:
4631                 case ixgbe_mac_X540:
4632                 case ixgbe_mac_X550:
4633                 case ixgbe_mac_X550EM_x:
4634                 case ixgbe_mac_X550EM_a:
4635                 default:
4636                         txctrl = IXGBE_READ_REG(hw,
4637                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4638                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4639                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4640                                         txctrl);
4641                         break;
4642                 }
4643         }
4644
4645         /* Device configured with multiple TX queues. */
4646         ixgbe_dev_mq_tx_configure(dev);
4647 }
4648
4649 /*
4650  * Set up link for 82599 loopback mode Tx->Rx.
4651  */
4652 static inline void __attribute__((cold))
4653 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4654 {
4655         PMD_INIT_FUNC_TRACE();
4656
4657         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4658                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4659                                 IXGBE_SUCCESS) {
4660                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4661                         /* ignore error */
4662                         return;
4663                 }
4664         }
4665
4666         /* Restart link */
4667         IXGBE_WRITE_REG(hw,
4668                         IXGBE_AUTOC,
4669                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4670         ixgbe_reset_pipeline_82599(hw);
4671
4672         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4673         msec_delay(50);
4674 }
4675
4676
4677 /*
4678  * Start Transmit and Receive Units.
4679  */
4680 int __attribute__((cold))
4681 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4682 {
4683         struct ixgbe_hw     *hw;
4684         struct ixgbe_tx_queue *txq;
4685         struct ixgbe_rx_queue *rxq;
4686         uint32_t txdctl;
4687         uint32_t dmatxctl;
4688         uint32_t rxctrl;
4689         uint16_t i;
4690         int ret = 0;
4691
4692         PMD_INIT_FUNC_TRACE();
4693         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4694
4695         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4696                 txq = dev->data->tx_queues[i];
4697                 /* Setup Transmit Threshold Registers */
4698                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4699                 txdctl |= txq->pthresh & 0x7F;
4700                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4701                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4702                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4703         }
4704
4705         if (hw->mac.type != ixgbe_mac_82598EB) {
4706                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4707                 dmatxctl |= IXGBE_DMATXCTL_TE;
4708                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4709         }
4710
4711         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4712                 txq = dev->data->tx_queues[i];
4713                 if (!txq->tx_deferred_start) {
4714                         ret = ixgbe_dev_tx_queue_start(dev, i);
4715                         if (ret < 0)
4716                                 return ret;
4717                 }
4718         }
4719
4720         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4721                 rxq = dev->data->rx_queues[i];
4722                 if (!rxq->rx_deferred_start) {
4723                         ret = ixgbe_dev_rx_queue_start(dev, i);
4724                         if (ret < 0)
4725                                 return ret;
4726                 }
4727         }
4728
4729         /* Enable Receive engine */
4730         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4731         if (hw->mac.type == ixgbe_mac_82598EB)
4732                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4733         rxctrl |= IXGBE_RXCTRL_RXEN;
4734         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4735
4736         /* If loopback mode is enabled for 82599, set up the link accordingly */
4737         if (hw->mac.type == ixgbe_mac_82599EB &&
4738                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4739                 ixgbe_setup_loopback_link_82599(hw);
4740
4741         return 0;
4742 }
4743
4744 /*
4745  * Start Receive Units for specified queue.
4746  */
4747 int __attribute__((cold))
4748 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4749 {
4750         struct ixgbe_hw     *hw;
4751         struct ixgbe_rx_queue *rxq;
4752         uint32_t rxdctl;
4753         int poll_ms;
4754
4755         PMD_INIT_FUNC_TRACE();
4756         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4757
4758         if (rx_queue_id < dev->data->nb_rx_queues) {
4759                 rxq = dev->data->rx_queues[rx_queue_id];
4760
4761                 /* Allocate buffers for descriptor rings */
4762                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4763                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4764                                      rx_queue_id);
4765                         return -1;
4766                 }
4767                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4768                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4769                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4770
4771                 /* Wait until RX Enable ready */
4772                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4773                 do {
4774                         rte_delay_ms(1);
4775                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4776                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4777                 if (!poll_ms)
4778                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4779                                      rx_queue_id);
4780                 rte_wmb();
4781                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4782                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4783                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4784         } else
4785                 return -1;
4786
4787         return 0;
4788 }
4789
4790 /*
4791  * Stop Receive Units for specified queue.
4792  */
4793 int __attribute__((cold))
4794 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4795 {
4796         struct ixgbe_hw     *hw;
4797         struct ixgbe_adapter *adapter =
4798                 (struct ixgbe_adapter *)dev->data->dev_private;
4799         struct ixgbe_rx_queue *rxq;
4800         uint32_t rxdctl;
4801         int poll_ms;
4802
4803         PMD_INIT_FUNC_TRACE();
4804         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4805
4806         if (rx_queue_id < dev->data->nb_rx_queues) {
4807                 rxq = dev->data->rx_queues[rx_queue_id];
4808
4809                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4810                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4811                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4812
4813                 /* Wait until RX Enable bit clear */
4814                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4815                 do {
4816                         rte_delay_ms(1);
4817                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4818                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
4819                 if (!poll_ms)
4820                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4821                                      rx_queue_id);
4822
4823                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4824
4825                 ixgbe_rx_queue_release_mbufs(rxq);
4826                 ixgbe_reset_rx_queue(adapter, rxq);
4827                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4828         } else
4829                 return -1;
4830
4831         return 0;
4832 }
4833
4834
4835 /*
4836  * Start Transmit Units for specified queue.
4837  */
4838 int __attribute__((cold))
4839 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4840 {
4841         struct ixgbe_hw     *hw;
4842         struct ixgbe_tx_queue *txq;
4843         uint32_t txdctl;
4844         int poll_ms;
4845
4846         PMD_INIT_FUNC_TRACE();
4847         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4848
4849         if (tx_queue_id < dev->data->nb_tx_queues) {
4850                 txq = dev->data->tx_queues[tx_queue_id];
4851                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4852                 txdctl |= IXGBE_TXDCTL_ENABLE;
4853                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4854
4855                 /* Wait until TX Enable ready */
4856                 if (hw->mac.type == ixgbe_mac_82599EB) {
4857                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4858                         do {
4859                                 rte_delay_ms(1);
4860                                 txdctl = IXGBE_READ_REG(hw,
4861                                         IXGBE_TXDCTL(txq->reg_idx));
4862                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4863                         if (!poll_ms)
4864                                 PMD_INIT_LOG(ERR, "Could not enable "
4865                                              "Tx Queue %d", tx_queue_id);
4866                 }
4867                 rte_wmb();
4868                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4869                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4870                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4871         } else
4872                 return -1;
4873
4874         return 0;
4875 }
4876
4877 /*
4878  * Stop Transmit Units for specified queue.
4879  */
4880 int __attribute__((cold))
4881 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4882 {
4883         struct ixgbe_hw     *hw;
4884         struct ixgbe_tx_queue *txq;
4885         uint32_t txdctl;
4886         uint32_t txtdh, txtdt;
4887         int poll_ms;
4888
4889         PMD_INIT_FUNC_TRACE();
4890         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4891
4892         if (tx_queue_id >= dev->data->nb_tx_queues)
4893                 return -1;
4894
4895         txq = dev->data->tx_queues[tx_queue_id];
4896
4897         /* Wait until TX queue is empty */
4898         if (hw->mac.type == ixgbe_mac_82599EB) {
4899                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4900                 do {
4901                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
4902                         txtdh = IXGBE_READ_REG(hw,
4903                                                IXGBE_TDH(txq->reg_idx));
4904                         txtdt = IXGBE_READ_REG(hw,
4905                                                IXGBE_TDT(txq->reg_idx));
4906                 } while (--poll_ms && (txtdh != txtdt));
4907                 if (!poll_ms)
4908                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
4909                                      "when stopping.", tx_queue_id);
4910         }
4911
4912         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4913         txdctl &= ~IXGBE_TXDCTL_ENABLE;
4914         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4915
4916         /* Wait until TX Enable bit clear */
4917         if (hw->mac.type == ixgbe_mac_82599EB) {
4918                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4919                 do {
4920                         rte_delay_ms(1);
4921                         txdctl = IXGBE_READ_REG(hw,
4922                                                 IXGBE_TXDCTL(txq->reg_idx));
4923                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
4924                 if (!poll_ms)
4925                         PMD_INIT_LOG(ERR, "Could not disable "
4926                                      "Tx Queue %d", tx_queue_id);
4927         }
4928
4929         if (txq->ops != NULL) {
4930                 txq->ops->release_mbufs(txq);
4931                 txq->ops->reset(txq);
4932         }
4933         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4934
4935         return 0;
4936 }
4937
4938 void
4939 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4940         struct rte_eth_rxq_info *qinfo)
4941 {
4942         struct ixgbe_rx_queue *rxq;
4943
4944         rxq = dev->data->rx_queues[queue_id];
4945
4946         qinfo->mp = rxq->mb_pool;
4947         qinfo->scattered_rx = dev->data->scattered_rx;
4948         qinfo->nb_desc = rxq->nb_rx_desc;
4949
4950         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
4951         qinfo->conf.rx_drop_en = rxq->drop_en;
4952         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
4953 }
4954
4955 void
4956 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4957         struct rte_eth_txq_info *qinfo)
4958 {
4959         struct ixgbe_tx_queue *txq;
4960
4961         txq = dev->data->tx_queues[queue_id];
4962
4963         qinfo->nb_desc = txq->nb_tx_desc;
4964
4965         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
4966         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
4967         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
4968
4969         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
4970         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
4971         qinfo->conf.txq_flags = txq->txq_flags;
4972         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
4973 }
4974
4975 /*
4976  * [VF] Initializes Receive Unit.
4977  */
4978 int __attribute__((cold))
4979 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
4980 {
4981         struct ixgbe_hw     *hw;
4982         struct ixgbe_rx_queue *rxq;
4983         uint64_t bus_addr;
4984         uint32_t srrctl, psrtype = 0;
4985         uint16_t buf_size;
4986         uint16_t i;
4987         int ret;
4988
4989         PMD_INIT_FUNC_TRACE();
4990         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4991
4992         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
4993                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4994                         "it should be power of 2");
4995                 return -1;
4996         }
4997
4998         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
4999                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5000                         "it should be equal to or less than %d",
5001                         hw->mac.max_rx_queues);
5002                 return -1;
5003         }
5004
5005         /*
5006          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5007          * disables the VF receipt of packets if the PF MTU is > 1500.
5008          * This is done to deal with 82599 limitations that imposes
5009          * the PF and all VFs to share the same MTU.
5010          * Then, the PF driver enables again the VF receipt of packet when
5011          * the VF driver issues a IXGBE_VF_SET_LPE request.
5012          * In the meantime, the VF device cannot be used, even if the VF driver
5013          * and the Guest VM network stack are ready to accept packets with a
5014          * size up to the PF MTU.
5015          * As a work-around to this PF behaviour, force the call to
5016          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5017          * VF packets received can work in all cases.
5018          */
5019         ixgbevf_rlpml_set_vf(hw,
5020                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5021
5022         /* Setup RX queues */
5023         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5024                 rxq = dev->data->rx_queues[i];
5025
5026                 /* Allocate buffers for descriptor rings */
5027                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5028                 if (ret)
5029                         return ret;
5030
5031                 /* Setup the Base and Length of the Rx Descriptor Rings */
5032                 bus_addr = rxq->rx_ring_phys_addr;
5033
5034                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5035                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5036                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5037                                 (uint32_t)(bus_addr >> 32));
5038                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5039                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5040                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5041                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5042
5043
5044                 /* Configure the SRRCTL register */
5045 #ifdef RTE_HEADER_SPLIT_ENABLE
5046                 /*
5047                  * Configure Header Split
5048                  */
5049                 if (dev->data->dev_conf.rxmode.header_split) {
5050                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5051                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5052                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5053                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5054                 } else
5055 #endif
5056                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5057
5058                 /* Set if packets are dropped when no descriptors available */
5059                 if (rxq->drop_en)
5060                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5061
5062                 /*
5063                  * Configure the RX buffer size in the BSIZEPACKET field of
5064                  * the SRRCTL register of the queue.
5065                  * The value is in 1 KB resolution. Valid values can be from
5066                  * 1 KB to 16 KB.
5067                  */
5068                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5069                         RTE_PKTMBUF_HEADROOM);
5070                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5071                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5072
5073                 /*
5074                  * VF modification to write virtual function SRRCTL register
5075                  */
5076                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5077
5078                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5079                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5080
5081                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5082                     /* It adds dual VLAN length for supporting dual VLAN */
5083                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5084                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5085                         if (!dev->data->scattered_rx)
5086                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5087                         dev->data->scattered_rx = 1;
5088                 }
5089         }
5090
5091 #ifdef RTE_HEADER_SPLIT_ENABLE
5092         if (dev->data->dev_conf.rxmode.header_split)
5093                 /* Must setup the PSRTYPE register */
5094                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5095                         IXGBE_PSRTYPE_UDPHDR   |
5096                         IXGBE_PSRTYPE_IPV4HDR  |
5097                         IXGBE_PSRTYPE_IPV6HDR;
5098 #endif
5099
5100         /* Set RQPL for VF RSS according to max Rx queue */
5101         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5102                 IXGBE_PSRTYPE_RQPL_SHIFT;
5103         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5104
5105         ixgbe_set_rx_function(dev);
5106
5107         return 0;
5108 }
5109
5110 /*
5111  * [VF] Initializes Transmit Unit.
5112  */
5113 void __attribute__((cold))
5114 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5115 {
5116         struct ixgbe_hw     *hw;
5117         struct ixgbe_tx_queue *txq;
5118         uint64_t bus_addr;
5119         uint32_t txctrl;
5120         uint16_t i;
5121
5122         PMD_INIT_FUNC_TRACE();
5123         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5124
5125         /* Setup the Base and Length of the Tx Descriptor Rings */
5126         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5127                 txq = dev->data->tx_queues[i];
5128                 bus_addr = txq->tx_ring_phys_addr;
5129                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5130                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5131                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5132                                 (uint32_t)(bus_addr >> 32));
5133                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5134                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5135                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5136                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5137                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5138
5139                 /*
5140                  * Disable Tx Head Writeback RO bit, since this hoses
5141                  * bookkeeping if things aren't delivered in order.
5142                  */
5143                 txctrl = IXGBE_READ_REG(hw,
5144                                 IXGBE_VFDCA_TXCTRL(i));
5145                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5146                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5147                                 txctrl);
5148         }
5149 }
5150
5151 /*
5152  * [VF] Start Transmit and Receive Units.
5153  */
5154 void __attribute__((cold))
5155 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5156 {
5157         struct ixgbe_hw     *hw;
5158         struct ixgbe_tx_queue *txq;
5159         struct ixgbe_rx_queue *rxq;
5160         uint32_t txdctl;
5161         uint32_t rxdctl;
5162         uint16_t i;
5163         int poll_ms;
5164
5165         PMD_INIT_FUNC_TRACE();
5166         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5167
5168         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5169                 txq = dev->data->tx_queues[i];
5170                 /* Setup Transmit Threshold Registers */
5171                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5172                 txdctl |= txq->pthresh & 0x7F;
5173                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5174                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5175                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5176         }
5177
5178         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5179
5180                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5181                 txdctl |= IXGBE_TXDCTL_ENABLE;
5182                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5183
5184                 poll_ms = 10;
5185                 /* Wait until TX Enable ready */
5186                 do {
5187                         rte_delay_ms(1);
5188                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5189                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5190                 if (!poll_ms)
5191                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5192         }
5193         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5194
5195                 rxq = dev->data->rx_queues[i];
5196
5197                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5198                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5199                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5200
5201                 /* Wait until RX Enable ready */
5202                 poll_ms = 10;
5203                 do {
5204                         rte_delay_ms(1);
5205                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5206                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5207                 if (!poll_ms)
5208                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5209                 rte_wmb();
5210                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5211
5212         }
5213 }
5214
5215 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5216 int __attribute__((weak))
5217 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5218 {
5219         return -1;
5220 }
5221
5222 uint16_t __attribute__((weak))
5223 ixgbe_recv_pkts_vec(
5224         void __rte_unused *rx_queue,
5225         struct rte_mbuf __rte_unused **rx_pkts,
5226         uint16_t __rte_unused nb_pkts)
5227 {
5228         return 0;
5229 }
5230
5231 uint16_t __attribute__((weak))
5232 ixgbe_recv_scattered_pkts_vec(
5233         void __rte_unused *rx_queue,
5234         struct rte_mbuf __rte_unused **rx_pkts,
5235         uint16_t __rte_unused nb_pkts)
5236 {
5237         return 0;
5238 }
5239
5240 int __attribute__((weak))
5241 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5242 {
5243         return -1;
5244 }