mbuf: add raw allocation function
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_malloc.h>
64 #include <rte_mbuf.h>
65 #include <rte_ether.h>
66 #include <rte_ethdev.h>
67 #include <rte_prefetch.h>
68 #include <rte_udp.h>
69 #include <rte_tcp.h>
70 #include <rte_sctp.h>
71 #include <rte_string_fns.h>
72 #include <rte_errno.h>
73 #include <rte_ip.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 /* Bit Mask to indicate what bits required for building TX context */
84 #define IXGBE_TX_OFFLOAD_MASK (                  \
85                 PKT_TX_VLAN_PKT |                \
86                 PKT_TX_IP_CKSUM |                \
87                 PKT_TX_L4_MASK |                 \
88                 PKT_TX_TCP_SEG |                 \
89                 PKT_TX_OUTER_IP_CKSUM)
90
91 #if 1
92 #define RTE_PMD_USE_PREFETCH
93 #endif
94
95 #ifdef RTE_PMD_USE_PREFETCH
96 /*
97  * Prefetch a cache line into all cache levels.
98  */
99 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
100 #else
101 #define rte_ixgbe_prefetch(p)   do {} while (0)
102 #endif
103
104 /*********************************************************************
105  *
106  *  TX functions
107  *
108  **********************************************************************/
109
110 /*
111  * Check for descriptors with their DD bit set and free mbufs.
112  * Return the total number of buffers freed.
113  */
114 static inline int __attribute__((always_inline))
115 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
116 {
117         struct ixgbe_tx_entry *txep;
118         uint32_t status;
119         int i, nb_free = 0;
120         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
121
122         /* check DD bit on threshold descriptor */
123         status = txq->tx_ring[txq->tx_next_dd].wb.status;
124         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
125                 return 0;
126
127         /*
128          * first buffer to free from S/W ring is at index
129          * tx_next_dd - (tx_rs_thresh-1)
130          */
131         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
132
133         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
134                 /* free buffers one at a time */
135                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
136                 txep->mbuf = NULL;
137
138                 if (unlikely(m == NULL))
139                         continue;
140
141                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
142                     (nb_free > 0 && m->pool != free[0]->pool)) {
143                         rte_mempool_put_bulk(free[0]->pool,
144                                              (void **)free, nb_free);
145                         nb_free = 0;
146                 }
147
148                 free[nb_free++] = m;
149         }
150
151         if (nb_free > 0)
152                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
153
154         /* buffers were freed, update counters */
155         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
156         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
157         if (txq->tx_next_dd >= txq->nb_tx_desc)
158                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
159
160         return txq->tx_rs_thresh;
161 }
162
163 /* Populate 4 descriptors with data from 4 mbufs */
164 static inline void
165 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
166 {
167         uint64_t buf_dma_addr;
168         uint32_t pkt_len;
169         int i;
170
171         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
172                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
173                 pkt_len = (*pkts)->data_len;
174
175                 /* write data to descriptor */
176                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
177
178                 txdp->read.cmd_type_len =
179                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
180
181                 txdp->read.olinfo_status =
182                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
183
184                 rte_prefetch0(&(*pkts)->pool);
185         }
186 }
187
188 /* Populate 1 descriptor with data from 1 mbuf */
189 static inline void
190 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
191 {
192         uint64_t buf_dma_addr;
193         uint32_t pkt_len;
194
195         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
196         pkt_len = (*pkts)->data_len;
197
198         /* write data to descriptor */
199         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
200         txdp->read.cmd_type_len =
201                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
202         txdp->read.olinfo_status =
203                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
204         rte_prefetch0(&(*pkts)->pool);
205 }
206
207 /*
208  * Fill H/W descriptor ring with mbuf data.
209  * Copy mbuf pointers to the S/W ring.
210  */
211 static inline void
212 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
213                       uint16_t nb_pkts)
214 {
215         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
216         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
217         const int N_PER_LOOP = 4;
218         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
219         int mainpart, leftover;
220         int i, j;
221
222         /*
223          * Process most of the packets in chunks of N pkts.  Any
224          * leftover packets will get processed one at a time.
225          */
226         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
227         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
228         for (i = 0; i < mainpart; i += N_PER_LOOP) {
229                 /* Copy N mbuf pointers to the S/W ring */
230                 for (j = 0; j < N_PER_LOOP; ++j) {
231                         (txep + i + j)->mbuf = *(pkts + i + j);
232                 }
233                 tx4(txdp + i, pkts + i);
234         }
235
236         if (unlikely(leftover > 0)) {
237                 for (i = 0; i < leftover; ++i) {
238                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
239                         tx1(txdp + mainpart + i, pkts + mainpart + i);
240                 }
241         }
242 }
243
244 static inline uint16_t
245 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
246              uint16_t nb_pkts)
247 {
248         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
249         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
250         uint16_t n = 0;
251
252         /*
253          * Begin scanning the H/W ring for done descriptors when the
254          * number of available descriptors drops below tx_free_thresh.  For
255          * each done descriptor, free the associated buffer.
256          */
257         if (txq->nb_tx_free < txq->tx_free_thresh)
258                 ixgbe_tx_free_bufs(txq);
259
260         /* Only use descriptors that are available */
261         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
262         if (unlikely(nb_pkts == 0))
263                 return 0;
264
265         /* Use exactly nb_pkts descriptors */
266         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
267
268         /*
269          * At this point, we know there are enough descriptors in the
270          * ring to transmit all the packets.  This assumes that each
271          * mbuf contains a single segment, and that no new offloads
272          * are expected, which would require a new context descriptor.
273          */
274
275         /*
276          * See if we're going to wrap-around. If so, handle the top
277          * of the descriptor ring first, then do the bottom.  If not,
278          * the processing looks just like the "bottom" part anyway...
279          */
280         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
281                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
282                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
283
284                 /*
285                  * We know that the last descriptor in the ring will need to
286                  * have its RS bit set because tx_rs_thresh has to be
287                  * a divisor of the ring size
288                  */
289                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
290                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
291                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
292
293                 txq->tx_tail = 0;
294         }
295
296         /* Fill H/W descriptor ring with mbuf data */
297         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
298         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
299
300         /*
301          * Determine if RS bit should be set
302          * This is what we actually want:
303          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
304          * but instead of subtracting 1 and doing >=, we can just do
305          * greater than without subtracting.
306          */
307         if (txq->tx_tail > txq->tx_next_rs) {
308                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
309                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
310                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
311                                                 txq->tx_rs_thresh);
312                 if (txq->tx_next_rs >= txq->nb_tx_desc)
313                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
314         }
315
316         /*
317          * Check for wrap-around. This would only happen if we used
318          * up to the last descriptor in the ring, no more, no less.
319          */
320         if (txq->tx_tail >= txq->nb_tx_desc)
321                 txq->tx_tail = 0;
322
323         /* update tail pointer */
324         rte_wmb();
325         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
326
327         return nb_pkts;
328 }
329
330 uint16_t
331 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
332                        uint16_t nb_pkts)
333 {
334         uint16_t nb_tx;
335
336         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
337         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
338                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
339
340         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
341         nb_tx = 0;
342         while (nb_pkts) {
343                 uint16_t ret, n;
344
345                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
346                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
347                 nb_tx = (uint16_t)(nb_tx + ret);
348                 nb_pkts = (uint16_t)(nb_pkts - ret);
349                 if (ret < n)
350                         break;
351         }
352
353         return nb_tx;
354 }
355
356 static inline void
357 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
358                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
359                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
360 {
361         uint32_t type_tucmd_mlhl;
362         uint32_t mss_l4len_idx = 0;
363         uint32_t ctx_idx;
364         uint32_t vlan_macip_lens;
365         union ixgbe_tx_offload tx_offload_mask;
366         uint32_t seqnum_seed = 0;
367
368         ctx_idx = txq->ctx_curr;
369         tx_offload_mask.data[0] = 0;
370         tx_offload_mask.data[1] = 0;
371         type_tucmd_mlhl = 0;
372
373         /* Specify which HW CTX to upload. */
374         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
375
376         if (ol_flags & PKT_TX_VLAN_PKT) {
377                 tx_offload_mask.vlan_tci |= ~0;
378         }
379
380         /* check if TCP segmentation required for this packet */
381         if (ol_flags & PKT_TX_TCP_SEG) {
382                 /* implies IP cksum in IPv4 */
383                 if (ol_flags & PKT_TX_IP_CKSUM)
384                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
385                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
386                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
387                 else
388                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
389                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
390                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
391
392                 tx_offload_mask.l2_len |= ~0;
393                 tx_offload_mask.l3_len |= ~0;
394                 tx_offload_mask.l4_len |= ~0;
395                 tx_offload_mask.tso_segsz |= ~0;
396                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
397                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
398         } else { /* no TSO, check if hardware checksum is needed */
399                 if (ol_flags & PKT_TX_IP_CKSUM) {
400                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
401                         tx_offload_mask.l2_len |= ~0;
402                         tx_offload_mask.l3_len |= ~0;
403                 }
404
405                 switch (ol_flags & PKT_TX_L4_MASK) {
406                 case PKT_TX_UDP_CKSUM:
407                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
408                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
409                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
410                         tx_offload_mask.l2_len |= ~0;
411                         tx_offload_mask.l3_len |= ~0;
412                         break;
413                 case PKT_TX_TCP_CKSUM:
414                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
415                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
416                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
417                         tx_offload_mask.l2_len |= ~0;
418                         tx_offload_mask.l3_len |= ~0;
419                         break;
420                 case PKT_TX_SCTP_CKSUM:
421                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
422                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
423                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
424                         tx_offload_mask.l2_len |= ~0;
425                         tx_offload_mask.l3_len |= ~0;
426                         break;
427                 default:
428                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
429                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
430                         break;
431                 }
432         }
433
434         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
435                 tx_offload_mask.outer_l2_len |= ~0;
436                 tx_offload_mask.outer_l3_len |= ~0;
437                 tx_offload_mask.l2_len |= ~0;
438                 seqnum_seed |= tx_offload.outer_l3_len
439                                << IXGBE_ADVTXD_OUTER_IPLEN;
440                 seqnum_seed |= tx_offload.l2_len
441                                << IXGBE_ADVTXD_TUNNEL_LEN;
442         }
443
444         txq->ctx_cache[ctx_idx].flags = ol_flags;
445         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
446                 tx_offload_mask.data[0] & tx_offload.data[0];
447         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
448                 tx_offload_mask.data[1] & tx_offload.data[1];
449         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
450
451         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
452         vlan_macip_lens = tx_offload.l3_len;
453         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
454                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
455                                     IXGBE_ADVTXD_MACLEN_SHIFT);
456         else
457                 vlan_macip_lens |= (tx_offload.l2_len <<
458                                     IXGBE_ADVTXD_MACLEN_SHIFT);
459         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
460         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
461         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
462         ctx_txd->seqnum_seed     = seqnum_seed;
463 }
464
465 /*
466  * Check which hardware context can be used. Use the existing match
467  * or create a new context descriptor.
468  */
469 static inline uint32_t
470 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
471                    union ixgbe_tx_offload tx_offload)
472 {
473         /* If match with the current used context */
474         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
475                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
476                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
477                      & tx_offload.data[0])) &&
478                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
479                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
480                      & tx_offload.data[1]))))
481                 return txq->ctx_curr;
482
483         /* What if match with the next context  */
484         txq->ctx_curr ^= 1;
485         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
486                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
487                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
488                      & tx_offload.data[0])) &&
489                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
490                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
491                      & tx_offload.data[1]))))
492                 return txq->ctx_curr;
493
494         /* Mismatch, use the previous context */
495         return IXGBE_CTX_NUM;
496 }
497
498 static inline uint32_t
499 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
500 {
501         uint32_t tmp = 0;
502
503         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
504                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
505         if (ol_flags & PKT_TX_IP_CKSUM)
506                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
507         if (ol_flags & PKT_TX_TCP_SEG)
508                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
509         return tmp;
510 }
511
512 static inline uint32_t
513 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
514 {
515         uint32_t cmdtype = 0;
516
517         if (ol_flags & PKT_TX_VLAN_PKT)
518                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
519         if (ol_flags & PKT_TX_TCP_SEG)
520                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
521         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
522                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
523         return cmdtype;
524 }
525
526 /* Default RS bit threshold values */
527 #ifndef DEFAULT_TX_RS_THRESH
528 #define DEFAULT_TX_RS_THRESH   32
529 #endif
530 #ifndef DEFAULT_TX_FREE_THRESH
531 #define DEFAULT_TX_FREE_THRESH 32
532 #endif
533
534 /* Reset transmit descriptors after they have been used */
535 static inline int
536 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
537 {
538         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
539         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
540         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
541         uint16_t nb_tx_desc = txq->nb_tx_desc;
542         uint16_t desc_to_clean_to;
543         uint16_t nb_tx_to_clean;
544         uint32_t status;
545
546         /* Determine the last descriptor needing to be cleaned */
547         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
548         if (desc_to_clean_to >= nb_tx_desc)
549                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
550
551         /* Check to make sure the last descriptor to clean is done */
552         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
553         status = txr[desc_to_clean_to].wb.status;
554         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
555                 PMD_TX_FREE_LOG(DEBUG,
556                                 "TX descriptor %4u is not done"
557                                 "(port=%d queue=%d)",
558                                 desc_to_clean_to,
559                                 txq->port_id, txq->queue_id);
560                 /* Failed to clean any descriptors, better luck next time */
561                 return -(1);
562         }
563
564         /* Figure out how many descriptors will be cleaned */
565         if (last_desc_cleaned > desc_to_clean_to)
566                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
567                                                         desc_to_clean_to);
568         else
569                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
570                                                 last_desc_cleaned);
571
572         PMD_TX_FREE_LOG(DEBUG,
573                         "Cleaning %4u TX descriptors: %4u to %4u "
574                         "(port=%d queue=%d)",
575                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
576                         txq->port_id, txq->queue_id);
577
578         /*
579          * The last descriptor to clean is done, so that means all the
580          * descriptors from the last descriptor that was cleaned
581          * up to the last descriptor with the RS bit set
582          * are done. Only reset the threshold descriptor.
583          */
584         txr[desc_to_clean_to].wb.status = 0;
585
586         /* Update the txq to reflect the last descriptor that was cleaned */
587         txq->last_desc_cleaned = desc_to_clean_to;
588         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
589
590         /* No Error */
591         return 0;
592 }
593
594 uint16_t
595 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
596                 uint16_t nb_pkts)
597 {
598         struct ixgbe_tx_queue *txq;
599         struct ixgbe_tx_entry *sw_ring;
600         struct ixgbe_tx_entry *txe, *txn;
601         volatile union ixgbe_adv_tx_desc *txr;
602         volatile union ixgbe_adv_tx_desc *txd, *txp;
603         struct rte_mbuf     *tx_pkt;
604         struct rte_mbuf     *m_seg;
605         uint64_t buf_dma_addr;
606         uint32_t olinfo_status;
607         uint32_t cmd_type_len;
608         uint32_t pkt_len;
609         uint16_t slen;
610         uint64_t ol_flags;
611         uint16_t tx_id;
612         uint16_t tx_last;
613         uint16_t nb_tx;
614         uint16_t nb_used;
615         uint64_t tx_ol_req;
616         uint32_t ctx = 0;
617         uint32_t new_ctx;
618         union ixgbe_tx_offload tx_offload;
619
620         tx_offload.data[0] = 0;
621         tx_offload.data[1] = 0;
622         txq = tx_queue;
623         sw_ring = txq->sw_ring;
624         txr     = txq->tx_ring;
625         tx_id   = txq->tx_tail;
626         txe = &sw_ring[tx_id];
627         txp = NULL;
628
629         /* Determine if the descriptor ring needs to be cleaned. */
630         if (txq->nb_tx_free < txq->tx_free_thresh)
631                 ixgbe_xmit_cleanup(txq);
632
633         rte_prefetch0(&txe->mbuf->pool);
634
635         /* TX loop */
636         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
637                 new_ctx = 0;
638                 tx_pkt = *tx_pkts++;
639                 pkt_len = tx_pkt->pkt_len;
640
641                 /*
642                  * Determine how many (if any) context descriptors
643                  * are needed for offload functionality.
644                  */
645                 ol_flags = tx_pkt->ol_flags;
646
647                 /* If hardware offload required */
648                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
649                 if (tx_ol_req) {
650                         tx_offload.l2_len = tx_pkt->l2_len;
651                         tx_offload.l3_len = tx_pkt->l3_len;
652                         tx_offload.l4_len = tx_pkt->l4_len;
653                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
654                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
655                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
656                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
657
658                         /* If new context need be built or reuse the exist ctx. */
659                         ctx = what_advctx_update(txq, tx_ol_req,
660                                 tx_offload);
661                         /* Only allocate context descriptor if required*/
662                         new_ctx = (ctx == IXGBE_CTX_NUM);
663                         ctx = txq->ctx_curr;
664                 }
665
666                 /*
667                  * Keep track of how many descriptors are used this loop
668                  * This will always be the number of segments + the number of
669                  * Context descriptors required to transmit the packet
670                  */
671                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
672
673                 if (txp != NULL &&
674                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
675                         /* set RS on the previous packet in the burst */
676                         txp->read.cmd_type_len |=
677                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
678
679                 /*
680                  * The number of descriptors that must be allocated for a
681                  * packet is the number of segments of that packet, plus 1
682                  * Context Descriptor for the hardware offload, if any.
683                  * Determine the last TX descriptor to allocate in the TX ring
684                  * for the packet, starting from the current position (tx_id)
685                  * in the ring.
686                  */
687                 tx_last = (uint16_t) (tx_id + nb_used - 1);
688
689                 /* Circular ring */
690                 if (tx_last >= txq->nb_tx_desc)
691                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
692
693                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
694                            " tx_first=%u tx_last=%u",
695                            (unsigned) txq->port_id,
696                            (unsigned) txq->queue_id,
697                            (unsigned) pkt_len,
698                            (unsigned) tx_id,
699                            (unsigned) tx_last);
700
701                 /*
702                  * Make sure there are enough TX descriptors available to
703                  * transmit the entire packet.
704                  * nb_used better be less than or equal to txq->tx_rs_thresh
705                  */
706                 if (nb_used > txq->nb_tx_free) {
707                         PMD_TX_FREE_LOG(DEBUG,
708                                         "Not enough free TX descriptors "
709                                         "nb_used=%4u nb_free=%4u "
710                                         "(port=%d queue=%d)",
711                                         nb_used, txq->nb_tx_free,
712                                         txq->port_id, txq->queue_id);
713
714                         if (ixgbe_xmit_cleanup(txq) != 0) {
715                                 /* Could not clean any descriptors */
716                                 if (nb_tx == 0)
717                                         return 0;
718                                 goto end_of_tx;
719                         }
720
721                         /* nb_used better be <= txq->tx_rs_thresh */
722                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
723                                 PMD_TX_FREE_LOG(DEBUG,
724                                         "The number of descriptors needed to "
725                                         "transmit the packet exceeds the "
726                                         "RS bit threshold. This will impact "
727                                         "performance."
728                                         "nb_used=%4u nb_free=%4u "
729                                         "tx_rs_thresh=%4u. "
730                                         "(port=%d queue=%d)",
731                                         nb_used, txq->nb_tx_free,
732                                         txq->tx_rs_thresh,
733                                         txq->port_id, txq->queue_id);
734                                 /*
735                                  * Loop here until there are enough TX
736                                  * descriptors or until the ring cannot be
737                                  * cleaned.
738                                  */
739                                 while (nb_used > txq->nb_tx_free) {
740                                         if (ixgbe_xmit_cleanup(txq) != 0) {
741                                                 /*
742                                                  * Could not clean any
743                                                  * descriptors
744                                                  */
745                                                 if (nb_tx == 0)
746                                                         return 0;
747                                                 goto end_of_tx;
748                                         }
749                                 }
750                         }
751                 }
752
753                 /*
754                  * By now there are enough free TX descriptors to transmit
755                  * the packet.
756                  */
757
758                 /*
759                  * Set common flags of all TX Data Descriptors.
760                  *
761                  * The following bits must be set in all Data Descriptors:
762                  *   - IXGBE_ADVTXD_DTYP_DATA
763                  *   - IXGBE_ADVTXD_DCMD_DEXT
764                  *
765                  * The following bits must be set in the first Data Descriptor
766                  * and are ignored in the other ones:
767                  *   - IXGBE_ADVTXD_DCMD_IFCS
768                  *   - IXGBE_ADVTXD_MAC_1588
769                  *   - IXGBE_ADVTXD_DCMD_VLE
770                  *
771                  * The following bits must only be set in the last Data
772                  * Descriptor:
773                  *   - IXGBE_TXD_CMD_EOP
774                  *
775                  * The following bits can be set in any Data Descriptor, but
776                  * are only set in the last Data Descriptor:
777                  *   - IXGBE_TXD_CMD_RS
778                  */
779                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
780                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
781
782 #ifdef RTE_LIBRTE_IEEE1588
783                 if (ol_flags & PKT_TX_IEEE1588_TMST)
784                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
785 #endif
786
787                 olinfo_status = 0;
788                 if (tx_ol_req) {
789
790                         if (ol_flags & PKT_TX_TCP_SEG) {
791                                 /* when TSO is on, paylen in descriptor is the
792                                  * not the packet len but the tcp payload len */
793                                 pkt_len -= (tx_offload.l2_len +
794                                         tx_offload.l3_len + tx_offload.l4_len);
795                         }
796
797                         /*
798                          * Setup the TX Advanced Context Descriptor if required
799                          */
800                         if (new_ctx) {
801                                 volatile struct ixgbe_adv_tx_context_desc *
802                                     ctx_txd;
803
804                                 ctx_txd = (volatile struct
805                                     ixgbe_adv_tx_context_desc *)
806                                     &txr[tx_id];
807
808                                 txn = &sw_ring[txe->next_id];
809                                 rte_prefetch0(&txn->mbuf->pool);
810
811                                 if (txe->mbuf != NULL) {
812                                         rte_pktmbuf_free_seg(txe->mbuf);
813                                         txe->mbuf = NULL;
814                                 }
815
816                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
817                                         tx_offload);
818
819                                 txe->last_id = tx_last;
820                                 tx_id = txe->next_id;
821                                 txe = txn;
822                         }
823
824                         /*
825                          * Setup the TX Advanced Data Descriptor,
826                          * This path will go through
827                          * whatever new/reuse the context descriptor
828                          */
829                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
830                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
831                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
832                 }
833
834                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
835
836                 m_seg = tx_pkt;
837                 do {
838                         txd = &txr[tx_id];
839                         txn = &sw_ring[txe->next_id];
840                         rte_prefetch0(&txn->mbuf->pool);
841
842                         if (txe->mbuf != NULL)
843                                 rte_pktmbuf_free_seg(txe->mbuf);
844                         txe->mbuf = m_seg;
845
846                         /*
847                          * Set up Transmit Data Descriptor.
848                          */
849                         slen = m_seg->data_len;
850                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
851                         txd->read.buffer_addr =
852                                 rte_cpu_to_le_64(buf_dma_addr);
853                         txd->read.cmd_type_len =
854                                 rte_cpu_to_le_32(cmd_type_len | slen);
855                         txd->read.olinfo_status =
856                                 rte_cpu_to_le_32(olinfo_status);
857                         txe->last_id = tx_last;
858                         tx_id = txe->next_id;
859                         txe = txn;
860                         m_seg = m_seg->next;
861                 } while (m_seg != NULL);
862
863                 /*
864                  * The last packet data descriptor needs End Of Packet (EOP)
865                  */
866                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
867                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
868                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
869
870                 /* Set RS bit only on threshold packets' last descriptor */
871                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
872                         PMD_TX_FREE_LOG(DEBUG,
873                                         "Setting RS bit on TXD id="
874                                         "%4u (port=%d queue=%d)",
875                                         tx_last, txq->port_id, txq->queue_id);
876
877                         cmd_type_len |= IXGBE_TXD_CMD_RS;
878
879                         /* Update txq RS bit counters */
880                         txq->nb_tx_used = 0;
881                         txp = NULL;
882                 } else
883                         txp = txd;
884
885                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
886         }
887
888 end_of_tx:
889         /* set RS on last packet in the burst */
890         if (txp != NULL)
891                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
892
893         rte_wmb();
894
895         /*
896          * Set the Transmit Descriptor Tail (TDT)
897          */
898         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
899                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
900                    (unsigned) tx_id, (unsigned) nb_tx);
901         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
902         txq->tx_tail = tx_id;
903
904         return nb_tx;
905 }
906
907 /*********************************************************************
908  *
909  *  RX functions
910  *
911  **********************************************************************/
912 #define IXGBE_PACKET_TYPE_IPV4              0X01
913 #define IXGBE_PACKET_TYPE_IPV4_TCP          0X11
914 #define IXGBE_PACKET_TYPE_IPV4_UDP          0X21
915 #define IXGBE_PACKET_TYPE_IPV4_SCTP         0X41
916 #define IXGBE_PACKET_TYPE_IPV4_EXT          0X03
917 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP     0X43
918 #define IXGBE_PACKET_TYPE_IPV6              0X04
919 #define IXGBE_PACKET_TYPE_IPV6_TCP          0X14
920 #define IXGBE_PACKET_TYPE_IPV6_UDP          0X24
921 #define IXGBE_PACKET_TYPE_IPV6_EXT          0X0C
922 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP      0X1C
923 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP      0X2C
924 #define IXGBE_PACKET_TYPE_IPV4_IPV6         0X05
925 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP     0X15
926 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP     0X25
927 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT     0X0D
928 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
929 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
930
931 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
932 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
933 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
934 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
935 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
936 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
937 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
938 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
939 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
940 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
941 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
942 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
943 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
944 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
945 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
946 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
947 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
948 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
949 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
950
951 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
952 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
953 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
954 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
955 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
956 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
957 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
958 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
959 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
960 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
961 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
962 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
963 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
964 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
965 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
966 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
967 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
968 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
969 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
970
971 #define IXGBE_PACKET_TYPE_MAX               0X80
972 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
973 #define IXGBE_PACKET_TYPE_SHIFT             0X04
974
975 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
976 static inline uint32_t
977 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
978 {
979         /**
980          * Use 2 different table for normal packet and tunnel packet
981          * to save the space.
982          */
983         static const uint32_t
984                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
985                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
986                         RTE_PTYPE_L3_IPV4,
987                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
988                         RTE_PTYPE_L3_IPV4_EXT,
989                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
990                         RTE_PTYPE_L3_IPV6,
991                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
992                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
993                         RTE_PTYPE_INNER_L3_IPV6,
994                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
995                         RTE_PTYPE_L3_IPV6_EXT,
996                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
997                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
998                         RTE_PTYPE_INNER_L3_IPV6_EXT,
999                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1000                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1001                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1002                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1003                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1004                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1005                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1006                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1007                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1008                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1009                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1010                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1011                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1012                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1013                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1014                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1015                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1016                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1017                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1018                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1019                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1020                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1021                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1022                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1023                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1024                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1025                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1026                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1027         };
1028
1029         static const uint32_t
1030                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1031                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1032                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1033                         RTE_PTYPE_INNER_L2_ETHER,
1034                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1035                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1036                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1037                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1038                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1039                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1040                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1041                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1042                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1043                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1044                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1045                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1046                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1047                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1048                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1049                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1050                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1051                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1052                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1053                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1054                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1055                         RTE_PTYPE_INNER_L4_TCP,
1056                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1057                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1058                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1059                         RTE_PTYPE_INNER_L4_TCP,
1060                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1061                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1062                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1063                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1064                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1065                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1066                         RTE_PTYPE_INNER_L4_TCP,
1067                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1068                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1069                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1070                         RTE_PTYPE_INNER_L3_IPV4,
1071                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1072                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1073                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1074                         RTE_PTYPE_INNER_L4_UDP,
1075                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1076                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1077                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1078                         RTE_PTYPE_INNER_L4_UDP,
1079                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1080                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1081                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1082                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1083                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1084                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1085                         RTE_PTYPE_INNER_L4_UDP,
1086                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1087                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1088                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1089                         RTE_PTYPE_INNER_L3_IPV4,
1090                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1091                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1092                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1093                         RTE_PTYPE_INNER_L4_SCTP,
1094                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1095                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1096                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1097                         RTE_PTYPE_INNER_L4_SCTP,
1098
1099                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1100                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1101                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1102                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1103                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1104                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1105                         RTE_PTYPE_INNER_L3_IPV4,
1106                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1107                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1108                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1109                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1110                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1111                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1112                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1113                         RTE_PTYPE_INNER_L3_IPV6,
1114                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1115                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1116                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1117                         RTE_PTYPE_INNER_L3_IPV4,
1118                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1119                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1120                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1121                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1122                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1123                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1124                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1125                         RTE_PTYPE_INNER_L3_IPV4,
1126                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1128                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1129                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1130                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1131                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1132                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1133                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1134                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1135                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1136                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1137                         RTE_PTYPE_INNER_L3_IPV4,
1138                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1139                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1140                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1141                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1142                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1143                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1144                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1145                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1146                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1147                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1148                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1149                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1150                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1151                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1152                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1153                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1154                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1155                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1156                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1157                         RTE_PTYPE_INNER_L3_IPV4,
1158                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1159                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1160                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1161                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1162                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1163                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1164                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1165                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1166                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1167                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1168                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1169                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1170                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1171                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1172                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1173                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1174         };
1175
1176         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1177                 return RTE_PTYPE_UNKNOWN;
1178
1179         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1180
1181         /* For tunnel packet */
1182         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1183                 /* Remove the tunnel bit to save the space. */
1184                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1185                 return ptype_table_tn[pkt_info];
1186         }
1187
1188         /**
1189          * For x550, if it's not tunnel,
1190          * tunnel type bit should be set to 0.
1191          * Reuse 82599's mask.
1192          */
1193         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1194
1195         return ptype_table[pkt_info];
1196 }
1197
1198 static inline uint64_t
1199 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1200 {
1201         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1202                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1203                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1204                 PKT_RX_RSS_HASH, 0, 0, 0,
1205                 0, 0, 0,  PKT_RX_FDIR,
1206         };
1207 #ifdef RTE_LIBRTE_IEEE1588
1208         static uint64_t ip_pkt_etqf_map[8] = {
1209                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1210                 0, 0, 0, 0,
1211         };
1212
1213         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1214                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1215                                 ip_rss_types_map[pkt_info & 0XF];
1216         else
1217                 return ip_rss_types_map[pkt_info & 0XF];
1218 #else
1219         return ip_rss_types_map[pkt_info & 0XF];
1220 #endif
1221 }
1222
1223 static inline uint64_t
1224 rx_desc_status_to_pkt_flags(uint32_t rx_status)
1225 {
1226         uint64_t pkt_flags;
1227
1228         /*
1229          * Check if VLAN present only.
1230          * Do not check whether L3/L4 rx checksum done by NIC or not,
1231          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1232          */
1233         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  PKT_RX_VLAN_PKT : 0;
1234
1235 #ifdef RTE_LIBRTE_IEEE1588
1236         if (rx_status & IXGBE_RXD_STAT_TMST)
1237                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1238 #endif
1239         return pkt_flags;
1240 }
1241
1242 static inline uint64_t
1243 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1244 {
1245         uint64_t pkt_flags;
1246
1247         /*
1248          * Bit 31: IPE, IPv4 checksum error
1249          * Bit 30: L4I, L4I integrity error
1250          */
1251         static uint64_t error_to_pkt_flags_map[4] = {
1252                 0,  PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD,
1253                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1254         };
1255         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1256                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1257
1258         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1259             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1260                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1261         }
1262
1263         return pkt_flags;
1264 }
1265
1266 /*
1267  * LOOK_AHEAD defines how many desc statuses to check beyond the
1268  * current descriptor.
1269  * It must be a pound define for optimal performance.
1270  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1271  * function only works with LOOK_AHEAD=8.
1272  */
1273 #define LOOK_AHEAD 8
1274 #if (LOOK_AHEAD != 8)
1275 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1276 #endif
1277 static inline int
1278 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1279 {
1280         volatile union ixgbe_adv_rx_desc *rxdp;
1281         struct ixgbe_rx_entry *rxep;
1282         struct rte_mbuf *mb;
1283         uint16_t pkt_len;
1284         uint64_t pkt_flags;
1285         int nb_dd;
1286         uint32_t s[LOOK_AHEAD];
1287         uint32_t pkt_info[LOOK_AHEAD];
1288         int i, j, nb_rx = 0;
1289         uint32_t status;
1290
1291         /* get references to current descriptor and S/W ring entry */
1292         rxdp = &rxq->rx_ring[rxq->rx_tail];
1293         rxep = &rxq->sw_ring[rxq->rx_tail];
1294
1295         status = rxdp->wb.upper.status_error;
1296         /* check to make sure there is at least 1 packet to receive */
1297         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1298                 return 0;
1299
1300         /*
1301          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1302          * reference packets that are ready to be received.
1303          */
1304         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1305              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1306                 /* Read desc statuses backwards to avoid race condition */
1307                 for (j = LOOK_AHEAD-1; j >= 0; --j)
1308                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1309
1310                 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1311                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1312                                                        lo_dword.data);
1313
1314                 /* Compute how many status bits were set */
1315                 nb_dd = 0;
1316                 for (j = 0; j < LOOK_AHEAD; ++j)
1317                         nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1318
1319                 nb_rx += nb_dd;
1320
1321                 /* Translate descriptor info to mbuf format */
1322                 for (j = 0; j < nb_dd; ++j) {
1323                         mb = rxep[j].mbuf;
1324                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1325                                   rxq->crc_len;
1326                         mb->data_len = pkt_len;
1327                         mb->pkt_len = pkt_len;
1328                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1329
1330                         /* convert descriptor fields to rte mbuf flags */
1331                         pkt_flags = rx_desc_status_to_pkt_flags(s[j]);
1332                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1333                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1334                                         ((uint16_t)pkt_info[j]);
1335                         mb->ol_flags = pkt_flags;
1336                         mb->packet_type =
1337                                 ixgbe_rxd_pkt_info_to_pkt_type
1338                                         (pkt_info[j], rxq->pkt_type_mask);
1339
1340                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1341                                 mb->hash.rss = rte_le_to_cpu_32(
1342                                     rxdp[j].wb.lower.hi_dword.rss);
1343                         else if (pkt_flags & PKT_RX_FDIR) {
1344                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1345                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1346                                     IXGBE_ATR_HASH_MASK;
1347                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1348                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1349                         }
1350                 }
1351
1352                 /* Move mbuf pointers from the S/W ring to the stage */
1353                 for (j = 0; j < LOOK_AHEAD; ++j) {
1354                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1355                 }
1356
1357                 /* stop if all requested packets could not be received */
1358                 if (nb_dd != LOOK_AHEAD)
1359                         break;
1360         }
1361
1362         /* clear software ring entries so we can cleanup correctly */
1363         for (i = 0; i < nb_rx; ++i) {
1364                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1365         }
1366
1367
1368         return nb_rx;
1369 }
1370
1371 static inline int
1372 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1373 {
1374         volatile union ixgbe_adv_rx_desc *rxdp;
1375         struct ixgbe_rx_entry *rxep;
1376         struct rte_mbuf *mb;
1377         uint16_t alloc_idx;
1378         __le64 dma_addr;
1379         int diag, i;
1380
1381         /* allocate buffers in bulk directly into the S/W ring */
1382         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1383         rxep = &rxq->sw_ring[alloc_idx];
1384         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1385                                     rxq->rx_free_thresh);
1386         if (unlikely(diag != 0))
1387                 return -ENOMEM;
1388
1389         rxdp = &rxq->rx_ring[alloc_idx];
1390         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1391                 /* populate the static rte mbuf fields */
1392                 mb = rxep[i].mbuf;
1393                 if (reset_mbuf) {
1394                         mb->next = NULL;
1395                         mb->nb_segs = 1;
1396                         mb->port = rxq->port_id;
1397                 }
1398
1399                 rte_mbuf_refcnt_set(mb, 1);
1400                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1401
1402                 /* populate the descriptors */
1403                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1404                 rxdp[i].read.hdr_addr = 0;
1405                 rxdp[i].read.pkt_addr = dma_addr;
1406         }
1407
1408         /* update state of internal queue structure */
1409         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1410         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1411                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1412
1413         /* no errors */
1414         return 0;
1415 }
1416
1417 static inline uint16_t
1418 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1419                          uint16_t nb_pkts)
1420 {
1421         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1422         int i;
1423
1424         /* how many packets are ready to return? */
1425         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1426
1427         /* copy mbuf pointers to the application's packet list */
1428         for (i = 0; i < nb_pkts; ++i)
1429                 rx_pkts[i] = stage[i];
1430
1431         /* update internal queue state */
1432         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1433         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1434
1435         return nb_pkts;
1436 }
1437
1438 static inline uint16_t
1439 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1440              uint16_t nb_pkts)
1441 {
1442         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1443         uint16_t nb_rx = 0;
1444
1445         /* Any previously recv'd pkts will be returned from the Rx stage */
1446         if (rxq->rx_nb_avail)
1447                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1448
1449         /* Scan the H/W ring for packets to receive */
1450         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1451
1452         /* update internal queue state */
1453         rxq->rx_next_avail = 0;
1454         rxq->rx_nb_avail = nb_rx;
1455         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1456
1457         /* if required, allocate new buffers to replenish descriptors */
1458         if (rxq->rx_tail > rxq->rx_free_trigger) {
1459                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1460
1461                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1462                         int i, j;
1463
1464                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1465                                    "queue_id=%u", (unsigned) rxq->port_id,
1466                                    (unsigned) rxq->queue_id);
1467
1468                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1469                                 rxq->rx_free_thresh;
1470
1471                         /*
1472                          * Need to rewind any previous receives if we cannot
1473                          * allocate new buffers to replenish the old ones.
1474                          */
1475                         rxq->rx_nb_avail = 0;
1476                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1477                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1478                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1479
1480                         return 0;
1481                 }
1482
1483                 /* update tail pointer */
1484                 rte_wmb();
1485                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1486         }
1487
1488         if (rxq->rx_tail >= rxq->nb_rx_desc)
1489                 rxq->rx_tail = 0;
1490
1491         /* received any packets this loop? */
1492         if (rxq->rx_nb_avail)
1493                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1494
1495         return 0;
1496 }
1497
1498 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1499 uint16_t
1500 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1501                            uint16_t nb_pkts)
1502 {
1503         uint16_t nb_rx;
1504
1505         if (unlikely(nb_pkts == 0))
1506                 return 0;
1507
1508         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1509                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1510
1511         /* request is relatively large, chunk it up */
1512         nb_rx = 0;
1513         while (nb_pkts) {
1514                 uint16_t ret, n;
1515
1516                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1517                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1518                 nb_rx = (uint16_t)(nb_rx + ret);
1519                 nb_pkts = (uint16_t)(nb_pkts - ret);
1520                 if (ret < n)
1521                         break;
1522         }
1523
1524         return nb_rx;
1525 }
1526
1527 uint16_t
1528 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1529                 uint16_t nb_pkts)
1530 {
1531         struct ixgbe_rx_queue *rxq;
1532         volatile union ixgbe_adv_rx_desc *rx_ring;
1533         volatile union ixgbe_adv_rx_desc *rxdp;
1534         struct ixgbe_rx_entry *sw_ring;
1535         struct ixgbe_rx_entry *rxe;
1536         struct rte_mbuf *rxm;
1537         struct rte_mbuf *nmb;
1538         union ixgbe_adv_rx_desc rxd;
1539         uint64_t dma_addr;
1540         uint32_t staterr;
1541         uint32_t pkt_info;
1542         uint16_t pkt_len;
1543         uint16_t rx_id;
1544         uint16_t nb_rx;
1545         uint16_t nb_hold;
1546         uint64_t pkt_flags;
1547
1548         nb_rx = 0;
1549         nb_hold = 0;
1550         rxq = rx_queue;
1551         rx_id = rxq->rx_tail;
1552         rx_ring = rxq->rx_ring;
1553         sw_ring = rxq->sw_ring;
1554         while (nb_rx < nb_pkts) {
1555                 /*
1556                  * The order of operations here is important as the DD status
1557                  * bit must not be read after any other descriptor fields.
1558                  * rx_ring and rxdp are pointing to volatile data so the order
1559                  * of accesses cannot be reordered by the compiler. If they were
1560                  * not volatile, they could be reordered which could lead to
1561                  * using invalid descriptor fields when read from rxd.
1562                  */
1563                 rxdp = &rx_ring[rx_id];
1564                 staterr = rxdp->wb.upper.status_error;
1565                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1566                         break;
1567                 rxd = *rxdp;
1568
1569                 /*
1570                  * End of packet.
1571                  *
1572                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1573                  * is likely to be invalid and to be dropped by the various
1574                  * validation checks performed by the network stack.
1575                  *
1576                  * Allocate a new mbuf to replenish the RX ring descriptor.
1577                  * If the allocation fails:
1578                  *    - arrange for that RX descriptor to be the first one
1579                  *      being parsed the next time the receive function is
1580                  *      invoked [on the same queue].
1581                  *
1582                  *    - Stop parsing the RX ring and return immediately.
1583                  *
1584                  * This policy do not drop the packet received in the RX
1585                  * descriptor for which the allocation of a new mbuf failed.
1586                  * Thus, it allows that packet to be later retrieved if
1587                  * mbuf have been freed in the mean time.
1588                  * As a side effect, holding RX descriptors instead of
1589                  * systematically giving them back to the NIC may lead to
1590                  * RX ring exhaustion situations.
1591                  * However, the NIC can gracefully prevent such situations
1592                  * to happen by sending specific "back-pressure" flow control
1593                  * frames to its peer(s).
1594                  */
1595                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1596                            "ext_err_stat=0x%08x pkt_len=%u",
1597                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1598                            (unsigned) rx_id, (unsigned) staterr,
1599                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1600
1601                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1602                 if (nmb == NULL) {
1603                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1604                                    "queue_id=%u", (unsigned) rxq->port_id,
1605                                    (unsigned) rxq->queue_id);
1606                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1607                         break;
1608                 }
1609
1610                 nb_hold++;
1611                 rxe = &sw_ring[rx_id];
1612                 rx_id++;
1613                 if (rx_id == rxq->nb_rx_desc)
1614                         rx_id = 0;
1615
1616                 /* Prefetch next mbuf while processing current one. */
1617                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1618
1619                 /*
1620                  * When next RX descriptor is on a cache-line boundary,
1621                  * prefetch the next 4 RX descriptors and the next 8 pointers
1622                  * to mbufs.
1623                  */
1624                 if ((rx_id & 0x3) == 0) {
1625                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1626                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1627                 }
1628
1629                 rxm = rxe->mbuf;
1630                 rxe->mbuf = nmb;
1631                 dma_addr =
1632                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1633                 rxdp->read.hdr_addr = 0;
1634                 rxdp->read.pkt_addr = dma_addr;
1635
1636                 /*
1637                  * Initialize the returned mbuf.
1638                  * 1) setup generic mbuf fields:
1639                  *    - number of segments,
1640                  *    - next segment,
1641                  *    - packet length,
1642                  *    - RX port identifier.
1643                  * 2) integrate hardware offload data, if any:
1644                  *    - RSS flag & hash,
1645                  *    - IP checksum flag,
1646                  *    - VLAN TCI, if any,
1647                  *    - error flags.
1648                  */
1649                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1650                                       rxq->crc_len);
1651                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1652                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1653                 rxm->nb_segs = 1;
1654                 rxm->next = NULL;
1655                 rxm->pkt_len = pkt_len;
1656                 rxm->data_len = pkt_len;
1657                 rxm->port = rxq->port_id;
1658
1659                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1660                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1661                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1662
1663                 pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1664                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1665                 pkt_flags = pkt_flags |
1666                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1667                 rxm->ol_flags = pkt_flags;
1668                 rxm->packet_type =
1669                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1670                                                        rxq->pkt_type_mask);
1671
1672                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1673                         rxm->hash.rss = rte_le_to_cpu_32(
1674                                                 rxd.wb.lower.hi_dword.rss);
1675                 else if (pkt_flags & PKT_RX_FDIR) {
1676                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1677                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1678                                         IXGBE_ATR_HASH_MASK;
1679                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1680                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1681                 }
1682                 /*
1683                  * Store the mbuf address into the next entry of the array
1684                  * of returned packets.
1685                  */
1686                 rx_pkts[nb_rx++] = rxm;
1687         }
1688         rxq->rx_tail = rx_id;
1689
1690         /*
1691          * If the number of free RX descriptors is greater than the RX free
1692          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1693          * register.
1694          * Update the RDT with the value of the last processed RX descriptor
1695          * minus 1, to guarantee that the RDT register is never equal to the
1696          * RDH register, which creates a "full" ring situtation from the
1697          * hardware point of view...
1698          */
1699         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1700         if (nb_hold > rxq->rx_free_thresh) {
1701                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1702                            "nb_hold=%u nb_rx=%u",
1703                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1704                            (unsigned) rx_id, (unsigned) nb_hold,
1705                            (unsigned) nb_rx);
1706                 rx_id = (uint16_t) ((rx_id == 0) ?
1707                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1708                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1709                 nb_hold = 0;
1710         }
1711         rxq->nb_rx_hold = nb_hold;
1712         return nb_rx;
1713 }
1714
1715 /**
1716  * Detect an RSC descriptor.
1717  */
1718 static inline uint32_t
1719 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1720 {
1721         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1722                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1723 }
1724
1725 /**
1726  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1727  *
1728  * Fill the following info in the HEAD buffer of the Rx cluster:
1729  *    - RX port identifier
1730  *    - hardware offload data, if any:
1731  *      - RSS flag & hash
1732  *      - IP checksum flag
1733  *      - VLAN TCI, if any
1734  *      - error flags
1735  * @head HEAD of the packet cluster
1736  * @desc HW descriptor to get data from
1737  * @rxq Pointer to the Rx queue
1738  */
1739 static inline void
1740 ixgbe_fill_cluster_head_buf(
1741         struct rte_mbuf *head,
1742         union ixgbe_adv_rx_desc *desc,
1743         struct ixgbe_rx_queue *rxq,
1744         uint32_t staterr)
1745 {
1746         uint32_t pkt_info;
1747         uint64_t pkt_flags;
1748
1749         head->port = rxq->port_id;
1750
1751         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1752          * set in the pkt_flags field.
1753          */
1754         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1755         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1756         pkt_flags = rx_desc_status_to_pkt_flags(staterr);
1757         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1758         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1759         head->ol_flags = pkt_flags;
1760         head->packet_type =
1761                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1762
1763         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1764                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1765         else if (pkt_flags & PKT_RX_FDIR) {
1766                 head->hash.fdir.hash =
1767                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1768                                                           & IXGBE_ATR_HASH_MASK;
1769                 head->hash.fdir.id =
1770                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1771         }
1772 }
1773
1774 /**
1775  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1776  *
1777  * @rx_queue Rx queue handle
1778  * @rx_pkts table of received packets
1779  * @nb_pkts size of rx_pkts table
1780  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1781  *
1782  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1783  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1784  *
1785  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1786  * 1) When non-EOP RSC completion arrives:
1787  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1788  *       segment's data length.
1789  *    b) Set the "next" pointer of the current segment to point to the segment
1790  *       at the NEXTP index.
1791  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1792  *       in the sw_rsc_ring.
1793  * 2) When EOP arrives we just update the cluster's total length and offload
1794  *    flags and deliver the cluster up to the upper layers. In our case - put it
1795  *    in the rx_pkts table.
1796  *
1797  * Returns the number of received packets/clusters (according to the "bulk
1798  * receive" interface).
1799  */
1800 static inline uint16_t
1801 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1802                     bool bulk_alloc)
1803 {
1804         struct ixgbe_rx_queue *rxq = rx_queue;
1805         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1806         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1807         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1808         uint16_t rx_id = rxq->rx_tail;
1809         uint16_t nb_rx = 0;
1810         uint16_t nb_hold = rxq->nb_rx_hold;
1811         uint16_t prev_id = rxq->rx_tail;
1812
1813         while (nb_rx < nb_pkts) {
1814                 bool eop;
1815                 struct ixgbe_rx_entry *rxe;
1816                 struct ixgbe_scattered_rx_entry *sc_entry;
1817                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1818                 struct ixgbe_rx_entry *next_rxe = NULL;
1819                 struct rte_mbuf *first_seg;
1820                 struct rte_mbuf *rxm;
1821                 struct rte_mbuf *nmb;
1822                 union ixgbe_adv_rx_desc rxd;
1823                 uint16_t data_len;
1824                 uint16_t next_id;
1825                 volatile union ixgbe_adv_rx_desc *rxdp;
1826                 uint32_t staterr;
1827
1828 next_desc:
1829                 /*
1830                  * The code in this whole file uses the volatile pointer to
1831                  * ensure the read ordering of the status and the rest of the
1832                  * descriptor fields (on the compiler level only!!!). This is so
1833                  * UGLY - why not to just use the compiler barrier instead? DPDK
1834                  * even has the rte_compiler_barrier() for that.
1835                  *
1836                  * But most importantly this is just wrong because this doesn't
1837                  * ensure memory ordering in a general case at all. For
1838                  * instance, DPDK is supposed to work on Power CPUs where
1839                  * compiler barrier may just not be enough!
1840                  *
1841                  * I tried to write only this function properly to have a
1842                  * starting point (as a part of an LRO/RSC series) but the
1843                  * compiler cursed at me when I tried to cast away the
1844                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1845                  * keeping it the way it is for now.
1846                  *
1847                  * The code in this file is broken in so many other places and
1848                  * will just not work on a big endian CPU anyway therefore the
1849                  * lines below will have to be revisited together with the rest
1850                  * of the ixgbe PMD.
1851                  *
1852                  * TODO:
1853                  *    - Get rid of "volatile" crap and let the compiler do its
1854                  *      job.
1855                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
1856                  *      memory ordering below.
1857                  */
1858                 rxdp = &rx_ring[rx_id];
1859                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
1860
1861                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
1862                         break;
1863
1864                 rxd = *rxdp;
1865
1866                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1867                                   "staterr=0x%x data_len=%u",
1868                            rxq->port_id, rxq->queue_id, rx_id, staterr,
1869                            rte_le_to_cpu_16(rxd.wb.upper.length));
1870
1871                 if (!bulk_alloc) {
1872                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1873                         if (nmb == NULL) {
1874                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1875                                                   "port_id=%u queue_id=%u",
1876                                            rxq->port_id, rxq->queue_id);
1877
1878                                 rte_eth_devices[rxq->port_id].data->
1879                                                         rx_mbuf_alloc_failed++;
1880                                 break;
1881                         }
1882                 } else if (nb_hold > rxq->rx_free_thresh) {
1883                         uint16_t next_rdt = rxq->rx_free_trigger;
1884
1885                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
1886                                 rte_wmb();
1887                                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
1888                                                     next_rdt);
1889                                 nb_hold -= rxq->rx_free_thresh;
1890                         } else {
1891                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1892                                                   "port_id=%u queue_id=%u",
1893                                            rxq->port_id, rxq->queue_id);
1894
1895                                 rte_eth_devices[rxq->port_id].data->
1896                                                         rx_mbuf_alloc_failed++;
1897                                 break;
1898                         }
1899                 }
1900
1901                 nb_hold++;
1902                 rxe = &sw_ring[rx_id];
1903                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
1904
1905                 next_id = rx_id + 1;
1906                 if (next_id == rxq->nb_rx_desc)
1907                         next_id = 0;
1908
1909                 /* Prefetch next mbuf while processing current one. */
1910                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
1911
1912                 /*
1913                  * When next RX descriptor is on a cache-line boundary,
1914                  * prefetch the next 4 RX descriptors and the next 4 pointers
1915                  * to mbufs.
1916                  */
1917                 if ((next_id & 0x3) == 0) {
1918                         rte_ixgbe_prefetch(&rx_ring[next_id]);
1919                         rte_ixgbe_prefetch(&sw_ring[next_id]);
1920                 }
1921
1922                 rxm = rxe->mbuf;
1923
1924                 if (!bulk_alloc) {
1925                         __le64 dma =
1926                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1927                         /*
1928                          * Update RX descriptor with the physical address of the
1929                          * new data buffer of the new allocated mbuf.
1930                          */
1931                         rxe->mbuf = nmb;
1932
1933                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
1934                         rxdp->read.hdr_addr = 0;
1935                         rxdp->read.pkt_addr = dma;
1936                 } else
1937                         rxe->mbuf = NULL;
1938
1939                 /*
1940                  * Set data length & data buffer address of mbuf.
1941                  */
1942                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1943                 rxm->data_len = data_len;
1944
1945                 if (!eop) {
1946                         uint16_t nextp_id;
1947                         /*
1948                          * Get next descriptor index:
1949                          *  - For RSC it's in the NEXTP field.
1950                          *  - For a scattered packet - it's just a following
1951                          *    descriptor.
1952                          */
1953                         if (ixgbe_rsc_count(&rxd))
1954                                 nextp_id =
1955                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1956                                                        IXGBE_RXDADV_NEXTP_SHIFT;
1957                         else
1958                                 nextp_id = next_id;
1959
1960                         next_sc_entry = &sw_sc_ring[nextp_id];
1961                         next_rxe = &sw_ring[nextp_id];
1962                         rte_ixgbe_prefetch(next_rxe);
1963                 }
1964
1965                 sc_entry = &sw_sc_ring[rx_id];
1966                 first_seg = sc_entry->fbuf;
1967                 sc_entry->fbuf = NULL;
1968
1969                 /*
1970                  * If this is the first buffer of the received packet,
1971                  * set the pointer to the first mbuf of the packet and
1972                  * initialize its context.
1973                  * Otherwise, update the total length and the number of segments
1974                  * of the current scattered packet, and update the pointer to
1975                  * the last mbuf of the current packet.
1976                  */
1977                 if (first_seg == NULL) {
1978                         first_seg = rxm;
1979                         first_seg->pkt_len = data_len;
1980                         first_seg->nb_segs = 1;
1981                 } else {
1982                         first_seg->pkt_len += data_len;
1983                         first_seg->nb_segs++;
1984                 }
1985
1986                 prev_id = rx_id;
1987                 rx_id = next_id;
1988
1989                 /*
1990                  * If this is not the last buffer of the received packet, update
1991                  * the pointer to the first mbuf at the NEXTP entry in the
1992                  * sw_sc_ring and continue to parse the RX ring.
1993                  */
1994                 if (!eop && next_rxe) {
1995                         rxm->next = next_rxe->mbuf;
1996                         next_sc_entry->fbuf = first_seg;
1997                         goto next_desc;
1998                 }
1999
2000                 /*
2001                  * This is the last buffer of the received packet - return
2002                  * the current cluster to the user.
2003                  */
2004                 rxm->next = NULL;
2005
2006                 /* Initialize the first mbuf of the returned packet */
2007                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2008
2009                 /*
2010                  * Deal with the case, when HW CRC srip is disabled.
2011                  * That can't happen when LRO is enabled, but still could
2012                  * happen for scattered RX mode.
2013                  */
2014                 first_seg->pkt_len -= rxq->crc_len;
2015                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2016                         struct rte_mbuf *lp;
2017
2018                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2019                                 ;
2020
2021                         first_seg->nb_segs--;
2022                         lp->data_len -= rxq->crc_len - rxm->data_len;
2023                         lp->next = NULL;
2024                         rte_pktmbuf_free_seg(rxm);
2025                 } else
2026                         rxm->data_len -= rxq->crc_len;
2027
2028                 /* Prefetch data of first segment, if configured to do so. */
2029                 rte_packet_prefetch((char *)first_seg->buf_addr +
2030                         first_seg->data_off);
2031
2032                 /*
2033                  * Store the mbuf address into the next entry of the array
2034                  * of returned packets.
2035                  */
2036                 rx_pkts[nb_rx++] = first_seg;
2037         }
2038
2039         /*
2040          * Record index of the next RX descriptor to probe.
2041          */
2042         rxq->rx_tail = rx_id;
2043
2044         /*
2045          * If the number of free RX descriptors is greater than the RX free
2046          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2047          * register.
2048          * Update the RDT with the value of the last processed RX descriptor
2049          * minus 1, to guarantee that the RDT register is never equal to the
2050          * RDH register, which creates a "full" ring situtation from the
2051          * hardware point of view...
2052          */
2053         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2054                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2055                            "nb_hold=%u nb_rx=%u",
2056                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2057
2058                 rte_wmb();
2059                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
2060                 nb_hold = 0;
2061         }
2062
2063         rxq->nb_rx_hold = nb_hold;
2064         return nb_rx;
2065 }
2066
2067 uint16_t
2068 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2069                                  uint16_t nb_pkts)
2070 {
2071         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2072 }
2073
2074 uint16_t
2075 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2076                                uint16_t nb_pkts)
2077 {
2078         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2079 }
2080
2081 /*********************************************************************
2082  *
2083  *  Queue management functions
2084  *
2085  **********************************************************************/
2086
2087 static void __attribute__((cold))
2088 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2089 {
2090         unsigned i;
2091
2092         if (txq->sw_ring != NULL) {
2093                 for (i = 0; i < txq->nb_tx_desc; i++) {
2094                         if (txq->sw_ring[i].mbuf != NULL) {
2095                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2096                                 txq->sw_ring[i].mbuf = NULL;
2097                         }
2098                 }
2099         }
2100 }
2101
2102 static void __attribute__((cold))
2103 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2104 {
2105         if (txq != NULL &&
2106             txq->sw_ring != NULL)
2107                 rte_free(txq->sw_ring);
2108 }
2109
2110 static void __attribute__((cold))
2111 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2112 {
2113         if (txq != NULL && txq->ops != NULL) {
2114                 txq->ops->release_mbufs(txq);
2115                 txq->ops->free_swring(txq);
2116                 rte_free(txq);
2117         }
2118 }
2119
2120 void __attribute__((cold))
2121 ixgbe_dev_tx_queue_release(void *txq)
2122 {
2123         ixgbe_tx_queue_release(txq);
2124 }
2125
2126 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2127 static void __attribute__((cold))
2128 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2129 {
2130         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2131         struct ixgbe_tx_entry *txe = txq->sw_ring;
2132         uint16_t prev, i;
2133
2134         /* Zero out HW ring memory */
2135         for (i = 0; i < txq->nb_tx_desc; i++) {
2136                 txq->tx_ring[i] = zeroed_desc;
2137         }
2138
2139         /* Initialize SW ring entries */
2140         prev = (uint16_t) (txq->nb_tx_desc - 1);
2141         for (i = 0; i < txq->nb_tx_desc; i++) {
2142                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2143
2144                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2145                 txe[i].mbuf = NULL;
2146                 txe[i].last_id = i;
2147                 txe[prev].next_id = i;
2148                 prev = i;
2149         }
2150
2151         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2152         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2153
2154         txq->tx_tail = 0;
2155         txq->nb_tx_used = 0;
2156         /*
2157          * Always allow 1 descriptor to be un-allocated to avoid
2158          * a H/W race condition
2159          */
2160         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2161         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2162         txq->ctx_curr = 0;
2163         memset((void *)&txq->ctx_cache, 0,
2164                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2165 }
2166
2167 static const struct ixgbe_txq_ops def_txq_ops = {
2168         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2169         .free_swring = ixgbe_tx_free_swring,
2170         .reset = ixgbe_reset_tx_queue,
2171 };
2172
2173 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2174  * the queue parameters. Used in tx_queue_setup by primary process and then
2175  * in dev_init by secondary process when attaching to an existing ethdev.
2176  */
2177 void __attribute__((cold))
2178 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2179 {
2180         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2181         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2182                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2183                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2184 #ifdef RTE_IXGBE_INC_VECTOR
2185                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2186                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2187                                         ixgbe_txq_vec_setup(txq) == 0)) {
2188                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2189                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2190                 } else
2191 #endif
2192                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2193         } else {
2194                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2195                 PMD_INIT_LOG(DEBUG,
2196                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2197                                 (unsigned long)txq->txq_flags,
2198                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2199                 PMD_INIT_LOG(DEBUG,
2200                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2201                                 (unsigned long)txq->tx_rs_thresh,
2202                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2203                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2204         }
2205 }
2206
2207 int __attribute__((cold))
2208 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2209                          uint16_t queue_idx,
2210                          uint16_t nb_desc,
2211                          unsigned int socket_id,
2212                          const struct rte_eth_txconf *tx_conf)
2213 {
2214         const struct rte_memzone *tz;
2215         struct ixgbe_tx_queue *txq;
2216         struct ixgbe_hw     *hw;
2217         uint16_t tx_rs_thresh, tx_free_thresh;
2218
2219         PMD_INIT_FUNC_TRACE();
2220         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2221
2222         /*
2223          * Validate number of transmit descriptors.
2224          * It must not exceed hardware maximum, and must be multiple
2225          * of IXGBE_ALIGN.
2226          */
2227         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2228                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2229                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2230                 return -EINVAL;
2231         }
2232
2233         /*
2234          * The following two parameters control the setting of the RS bit on
2235          * transmit descriptors.
2236          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2237          * descriptors have been used.
2238          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2239          * descriptors are used or if the number of descriptors required
2240          * to transmit a packet is greater than the number of free TX
2241          * descriptors.
2242          * The following constraints must be satisfied:
2243          *  tx_rs_thresh must be greater than 0.
2244          *  tx_rs_thresh must be less than the size of the ring minus 2.
2245          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2246          *  tx_rs_thresh must be a divisor of the ring size.
2247          *  tx_free_thresh must be greater than 0.
2248          *  tx_free_thresh must be less than the size of the ring minus 3.
2249          * One descriptor in the TX ring is used as a sentinel to avoid a
2250          * H/W race condition, hence the maximum threshold constraints.
2251          * When set to zero use default values.
2252          */
2253         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2254                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2255         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2256                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2257         if (tx_rs_thresh >= (nb_desc - 2)) {
2258                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2259                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2260                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2261                         (int)dev->data->port_id, (int)queue_idx);
2262                 return -(EINVAL);
2263         }
2264         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2265                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2266                         "(tx_rs_thresh=%u port=%d queue=%d)",
2267                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2268                         (int)dev->data->port_id, (int)queue_idx);
2269                 return -(EINVAL);
2270         }
2271         if (tx_free_thresh >= (nb_desc - 3)) {
2272                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2273                              "tx_free_thresh must be less than the number of "
2274                              "TX descriptors minus 3. (tx_free_thresh=%u "
2275                              "port=%d queue=%d)",
2276                              (unsigned int)tx_free_thresh,
2277                              (int)dev->data->port_id, (int)queue_idx);
2278                 return -(EINVAL);
2279         }
2280         if (tx_rs_thresh > tx_free_thresh) {
2281                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2282                              "tx_free_thresh. (tx_free_thresh=%u "
2283                              "tx_rs_thresh=%u port=%d queue=%d)",
2284                              (unsigned int)tx_free_thresh,
2285                              (unsigned int)tx_rs_thresh,
2286                              (int)dev->data->port_id,
2287                              (int)queue_idx);
2288                 return -(EINVAL);
2289         }
2290         if ((nb_desc % tx_rs_thresh) != 0) {
2291                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2292                              "number of TX descriptors. (tx_rs_thresh=%u "
2293                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2294                              (int)dev->data->port_id, (int)queue_idx);
2295                 return -(EINVAL);
2296         }
2297
2298         /*
2299          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2300          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2301          * by the NIC and all descriptors are written back after the NIC
2302          * accumulates WTHRESH descriptors.
2303          */
2304         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2305                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2306                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2307                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2308                              (int)dev->data->port_id, (int)queue_idx);
2309                 return -(EINVAL);
2310         }
2311
2312         /* Free memory prior to re-allocation if needed... */
2313         if (dev->data->tx_queues[queue_idx] != NULL) {
2314                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2315                 dev->data->tx_queues[queue_idx] = NULL;
2316         }
2317
2318         /* First allocate the tx queue data structure */
2319         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2320                                  RTE_CACHE_LINE_SIZE, socket_id);
2321         if (txq == NULL)
2322                 return -ENOMEM;
2323
2324         /*
2325          * Allocate TX ring hardware descriptors. A memzone large enough to
2326          * handle the maximum ring size is allocated in order to allow for
2327          * resizing in later calls to the queue setup function.
2328          */
2329         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2330                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2331                         IXGBE_ALIGN, socket_id);
2332         if (tz == NULL) {
2333                 ixgbe_tx_queue_release(txq);
2334                 return -ENOMEM;
2335         }
2336
2337         txq->nb_tx_desc = nb_desc;
2338         txq->tx_rs_thresh = tx_rs_thresh;
2339         txq->tx_free_thresh = tx_free_thresh;
2340         txq->pthresh = tx_conf->tx_thresh.pthresh;
2341         txq->hthresh = tx_conf->tx_thresh.hthresh;
2342         txq->wthresh = tx_conf->tx_thresh.wthresh;
2343         txq->queue_id = queue_idx;
2344         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2345                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2346         txq->port_id = dev->data->port_id;
2347         txq->txq_flags = tx_conf->txq_flags;
2348         txq->ops = &def_txq_ops;
2349         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2350
2351         /*
2352          * Modification to set VFTDT for virtual function if vf is detected
2353          */
2354         if (hw->mac.type == ixgbe_mac_82599_vf ||
2355             hw->mac.type == ixgbe_mac_X540_vf ||
2356             hw->mac.type == ixgbe_mac_X550_vf ||
2357             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2358             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2359                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2360         else
2361                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2362
2363         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2364         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2365
2366         /* Allocate software ring */
2367         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2368                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2369                                 RTE_CACHE_LINE_SIZE, socket_id);
2370         if (txq->sw_ring == NULL) {
2371                 ixgbe_tx_queue_release(txq);
2372                 return -ENOMEM;
2373         }
2374         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2375                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2376
2377         /* set up vector or scalar TX function as appropriate */
2378         ixgbe_set_tx_function(dev, txq);
2379
2380         txq->ops->reset(txq);
2381
2382         dev->data->tx_queues[queue_idx] = txq;
2383
2384
2385         return 0;
2386 }
2387
2388 /**
2389  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2390  *
2391  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2392  * in the sw_rsc_ring is not set to NULL but rather points to the next
2393  * mbuf of this RSC aggregation (that has not been completed yet and still
2394  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2395  * will just free first "nb_segs" segments of the cluster explicitly by calling
2396  * an rte_pktmbuf_free_seg().
2397  *
2398  * @m scattered cluster head
2399  */
2400 static void __attribute__((cold))
2401 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2402 {
2403         uint8_t i, nb_segs = m->nb_segs;
2404         struct rte_mbuf *next_seg;
2405
2406         for (i = 0; i < nb_segs; i++) {
2407                 next_seg = m->next;
2408                 rte_pktmbuf_free_seg(m);
2409                 m = next_seg;
2410         }
2411 }
2412
2413 static void __attribute__((cold))
2414 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2415 {
2416         unsigned i;
2417
2418 #ifdef RTE_IXGBE_INC_VECTOR
2419         /* SSE Vector driver has a different way of releasing mbufs. */
2420         if (rxq->rx_using_sse) {
2421                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2422                 return;
2423         }
2424 #endif
2425
2426         if (rxq->sw_ring != NULL) {
2427                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2428                         if (rxq->sw_ring[i].mbuf != NULL) {
2429                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2430                                 rxq->sw_ring[i].mbuf = NULL;
2431                         }
2432                 }
2433                 if (rxq->rx_nb_avail) {
2434                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2435                                 struct rte_mbuf *mb;
2436
2437                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2438                                 rte_pktmbuf_free_seg(mb);
2439                         }
2440                         rxq->rx_nb_avail = 0;
2441                 }
2442         }
2443
2444         if (rxq->sw_sc_ring)
2445                 for (i = 0; i < rxq->nb_rx_desc; i++)
2446                         if (rxq->sw_sc_ring[i].fbuf) {
2447                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2448                                 rxq->sw_sc_ring[i].fbuf = NULL;
2449                         }
2450 }
2451
2452 static void __attribute__((cold))
2453 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2454 {
2455         if (rxq != NULL) {
2456                 ixgbe_rx_queue_release_mbufs(rxq);
2457                 rte_free(rxq->sw_ring);
2458                 rte_free(rxq->sw_sc_ring);
2459                 rte_free(rxq);
2460         }
2461 }
2462
2463 void __attribute__((cold))
2464 ixgbe_dev_rx_queue_release(void *rxq)
2465 {
2466         ixgbe_rx_queue_release(rxq);
2467 }
2468
2469 /*
2470  * Check if Rx Burst Bulk Alloc function can be used.
2471  * Return
2472  *        0: the preconditions are satisfied and the bulk allocation function
2473  *           can be used.
2474  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2475  *           function must be used.
2476  */
2477 static inline int __attribute__((cold))
2478 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2479 {
2480         int ret = 0;
2481
2482         /*
2483          * Make sure the following pre-conditions are satisfied:
2484          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2485          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2486          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2487          *   rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2488          * Scattered packets are not supported.  This should be checked
2489          * outside of this function.
2490          */
2491         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2492                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2493                              "rxq->rx_free_thresh=%d, "
2494                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2495                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2496                 ret = -EINVAL;
2497         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2498                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2499                              "rxq->rx_free_thresh=%d, "
2500                              "rxq->nb_rx_desc=%d",
2501                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2502                 ret = -EINVAL;
2503         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2504                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2505                              "rxq->nb_rx_desc=%d, "
2506                              "rxq->rx_free_thresh=%d",
2507                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2508                 ret = -EINVAL;
2509         } else if (!(rxq->nb_rx_desc <
2510                (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2511                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2512                              "rxq->nb_rx_desc=%d, "
2513                              "IXGBE_MAX_RING_DESC=%d, "
2514                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2515                              rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2516                              RTE_PMD_IXGBE_RX_MAX_BURST);
2517                 ret = -EINVAL;
2518         }
2519
2520         return ret;
2521 }
2522
2523 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2524 static void __attribute__((cold))
2525 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2526 {
2527         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2528         unsigned i;
2529         uint16_t len = rxq->nb_rx_desc;
2530
2531         /*
2532          * By default, the Rx queue setup function allocates enough memory for
2533          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2534          * extra memory at the end of the descriptor ring to be zero'd out. A
2535          * pre-condition for using the Rx burst bulk alloc function is that the
2536          * number of descriptors is less than or equal to
2537          * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2538          * constraints here to see if we need to zero out memory after the end
2539          * of the H/W descriptor ring.
2540          */
2541         if (adapter->rx_bulk_alloc_allowed)
2542                 /* zero out extra memory */
2543                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2544
2545         /*
2546          * Zero out HW ring memory. Zero out extra memory at the end of
2547          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2548          * reads extra memory as zeros.
2549          */
2550         for (i = 0; i < len; i++) {
2551                 rxq->rx_ring[i] = zeroed_desc;
2552         }
2553
2554         /*
2555          * initialize extra software ring entries. Space for these extra
2556          * entries is always allocated
2557          */
2558         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2559         for (i = rxq->nb_rx_desc; i < len; ++i) {
2560                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2561         }
2562
2563         rxq->rx_nb_avail = 0;
2564         rxq->rx_next_avail = 0;
2565         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2566         rxq->rx_tail = 0;
2567         rxq->nb_rx_hold = 0;
2568         rxq->pkt_first_seg = NULL;
2569         rxq->pkt_last_seg = NULL;
2570
2571 #ifdef RTE_IXGBE_INC_VECTOR
2572         rxq->rxrearm_start = 0;
2573         rxq->rxrearm_nb = 0;
2574 #endif
2575 }
2576
2577 int __attribute__((cold))
2578 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2579                          uint16_t queue_idx,
2580                          uint16_t nb_desc,
2581                          unsigned int socket_id,
2582                          const struct rte_eth_rxconf *rx_conf,
2583                          struct rte_mempool *mp)
2584 {
2585         const struct rte_memzone *rz;
2586         struct ixgbe_rx_queue *rxq;
2587         struct ixgbe_hw     *hw;
2588         uint16_t len;
2589         struct ixgbe_adapter *adapter =
2590                 (struct ixgbe_adapter *)dev->data->dev_private;
2591
2592         PMD_INIT_FUNC_TRACE();
2593         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2594
2595         /*
2596          * Validate number of receive descriptors.
2597          * It must not exceed hardware maximum, and must be multiple
2598          * of IXGBE_ALIGN.
2599          */
2600         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2601                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2602                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2603                 return -EINVAL;
2604         }
2605
2606         /* Free memory prior to re-allocation if needed... */
2607         if (dev->data->rx_queues[queue_idx] != NULL) {
2608                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2609                 dev->data->rx_queues[queue_idx] = NULL;
2610         }
2611
2612         /* First allocate the rx queue data structure */
2613         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2614                                  RTE_CACHE_LINE_SIZE, socket_id);
2615         if (rxq == NULL)
2616                 return -ENOMEM;
2617         rxq->mb_pool = mp;
2618         rxq->nb_rx_desc = nb_desc;
2619         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2620         rxq->queue_id = queue_idx;
2621         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2622                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2623         rxq->port_id = dev->data->port_id;
2624         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2625                                                         0 : ETHER_CRC_LEN);
2626         rxq->drop_en = rx_conf->rx_drop_en;
2627         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2628
2629         /*
2630          * The packet type in RX descriptor is different for different NICs.
2631          * Some bits are used for x550 but reserved for other NICS.
2632          * So set different masks for different NICs.
2633          */
2634         if (hw->mac.type == ixgbe_mac_X550 ||
2635             hw->mac.type == ixgbe_mac_X550EM_x ||
2636             hw->mac.type == ixgbe_mac_X550EM_a ||
2637             hw->mac.type == ixgbe_mac_X550_vf ||
2638             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2639             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2640                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2641         else
2642                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2643
2644         /*
2645          * Allocate RX ring hardware descriptors. A memzone large enough to
2646          * handle the maximum ring size is allocated in order to allow for
2647          * resizing in later calls to the queue setup function.
2648          */
2649         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2650                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2651         if (rz == NULL) {
2652                 ixgbe_rx_queue_release(rxq);
2653                 return -ENOMEM;
2654         }
2655
2656         /*
2657          * Zero init all the descriptors in the ring.
2658          */
2659         memset(rz->addr, 0, RX_RING_SZ);
2660
2661         /*
2662          * Modified to setup VFRDT for Virtual Function
2663          */
2664         if (hw->mac.type == ixgbe_mac_82599_vf ||
2665             hw->mac.type == ixgbe_mac_X540_vf ||
2666             hw->mac.type == ixgbe_mac_X550_vf ||
2667             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2668             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2669                 rxq->rdt_reg_addr =
2670                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2671                 rxq->rdh_reg_addr =
2672                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2673         } else {
2674                 rxq->rdt_reg_addr =
2675                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2676                 rxq->rdh_reg_addr =
2677                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2678         }
2679
2680         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2681         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2682
2683         /*
2684          * Certain constraints must be met in order to use the bulk buffer
2685          * allocation Rx burst function. If any of Rx queues doesn't meet them
2686          * the feature should be disabled for the whole port.
2687          */
2688         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2689                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2690                                     "preconditions - canceling the feature for "
2691                                     "the whole port[%d]",
2692                              rxq->queue_id, rxq->port_id);
2693                 adapter->rx_bulk_alloc_allowed = false;
2694         }
2695
2696         /*
2697          * Allocate software ring. Allow for space at the end of the
2698          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2699          * function does not access an invalid memory region.
2700          */
2701         len = nb_desc;
2702         if (adapter->rx_bulk_alloc_allowed)
2703                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2704
2705         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2706                                           sizeof(struct ixgbe_rx_entry) * len,
2707                                           RTE_CACHE_LINE_SIZE, socket_id);
2708         if (!rxq->sw_ring) {
2709                 ixgbe_rx_queue_release(rxq);
2710                 return -ENOMEM;
2711         }
2712
2713         /*
2714          * Always allocate even if it's not going to be needed in order to
2715          * simplify the code.
2716          *
2717          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2718          * be requested in ixgbe_dev_rx_init(), which is called later from
2719          * dev_start() flow.
2720          */
2721         rxq->sw_sc_ring =
2722                 rte_zmalloc_socket("rxq->sw_sc_ring",
2723                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2724                                    RTE_CACHE_LINE_SIZE, socket_id);
2725         if (!rxq->sw_sc_ring) {
2726                 ixgbe_rx_queue_release(rxq);
2727                 return -ENOMEM;
2728         }
2729
2730         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2731                             "dma_addr=0x%"PRIx64,
2732                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2733                      rxq->rx_ring_phys_addr);
2734
2735         if (!rte_is_power_of_2(nb_desc)) {
2736                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2737                                     "preconditions - canceling the feature for "
2738                                     "the whole port[%d]",
2739                              rxq->queue_id, rxq->port_id);
2740                 adapter->rx_vec_allowed = false;
2741         } else
2742                 ixgbe_rxq_vec_setup(rxq);
2743
2744         dev->data->rx_queues[queue_idx] = rxq;
2745
2746         ixgbe_reset_rx_queue(adapter, rxq);
2747
2748         return 0;
2749 }
2750
2751 uint32_t
2752 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2753 {
2754 #define IXGBE_RXQ_SCAN_INTERVAL 4
2755         volatile union ixgbe_adv_rx_desc *rxdp;
2756         struct ixgbe_rx_queue *rxq;
2757         uint32_t desc = 0;
2758
2759         if (rx_queue_id >= dev->data->nb_rx_queues) {
2760                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2761                 return 0;
2762         }
2763
2764         rxq = dev->data->rx_queues[rx_queue_id];
2765         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2766
2767         while ((desc < rxq->nb_rx_desc) &&
2768                 (rxdp->wb.upper.status_error &
2769                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2770                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2771                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2772                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2773                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2774                                 desc - rxq->nb_rx_desc]);
2775         }
2776
2777         return desc;
2778 }
2779
2780 int
2781 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2782 {
2783         volatile union ixgbe_adv_rx_desc *rxdp;
2784         struct ixgbe_rx_queue *rxq = rx_queue;
2785         uint32_t desc;
2786
2787         if (unlikely(offset >= rxq->nb_rx_desc))
2788                 return 0;
2789         desc = rxq->rx_tail + offset;
2790         if (desc >= rxq->nb_rx_desc)
2791                 desc -= rxq->nb_rx_desc;
2792
2793         rxdp = &rxq->rx_ring[desc];
2794         return !!(rxdp->wb.upper.status_error &
2795                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2796 }
2797
2798 void __attribute__((cold))
2799 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2800 {
2801         unsigned i;
2802         struct ixgbe_adapter *adapter =
2803                 (struct ixgbe_adapter *)dev->data->dev_private;
2804
2805         PMD_INIT_FUNC_TRACE();
2806
2807         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2808                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2809
2810                 if (txq != NULL) {
2811                         txq->ops->release_mbufs(txq);
2812                         txq->ops->reset(txq);
2813                 }
2814         }
2815
2816         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2817                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2818
2819                 if (rxq != NULL) {
2820                         ixgbe_rx_queue_release_mbufs(rxq);
2821                         ixgbe_reset_rx_queue(adapter, rxq);
2822                 }
2823         }
2824 }
2825
2826 void
2827 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2828 {
2829         unsigned i;
2830
2831         PMD_INIT_FUNC_TRACE();
2832
2833         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2834                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2835                 dev->data->rx_queues[i] = NULL;
2836         }
2837         dev->data->nb_rx_queues = 0;
2838
2839         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2840                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2841                 dev->data->tx_queues[i] = NULL;
2842         }
2843         dev->data->nb_tx_queues = 0;
2844 }
2845
2846 /*********************************************************************
2847  *
2848  *  Device RX/TX init functions
2849  *
2850  **********************************************************************/
2851
2852 /**
2853  * Receive Side Scaling (RSS)
2854  * See section 7.1.2.8 in the following document:
2855  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
2856  *
2857  * Principles:
2858  * The source and destination IP addresses of the IP header and the source
2859  * and destination ports of TCP/UDP headers, if any, of received packets are
2860  * hashed against a configurable random key to compute a 32-bit RSS hash result.
2861  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2862  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
2863  * RSS output index which is used as the RX queue index where to store the
2864  * received packets.
2865  * The following output is supplied in the RX write-back descriptor:
2866  *     - 32-bit result of the Microsoft RSS hash function,
2867  *     - 4-bit RSS type field.
2868  */
2869
2870 /*
2871  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
2872  * Used as the default key.
2873  */
2874 static uint8_t rss_intel_key[40] = {
2875         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2876         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2877         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2878         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2879         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2880 };
2881
2882 static void
2883 ixgbe_rss_disable(struct rte_eth_dev *dev)
2884 {
2885         struct ixgbe_hw *hw;
2886         uint32_t mrqc;
2887         uint32_t mrqc_reg;
2888
2889         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2890         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2891         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2892         mrqc &= ~IXGBE_MRQC_RSSEN;
2893         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2894 }
2895
2896 static void
2897 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
2898 {
2899         uint8_t  *hash_key;
2900         uint32_t mrqc;
2901         uint32_t rss_key;
2902         uint64_t rss_hf;
2903         uint16_t i;
2904         uint32_t mrqc_reg;
2905         uint32_t rssrk_reg;
2906
2907         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2908         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
2909
2910         hash_key = rss_conf->rss_key;
2911         if (hash_key != NULL) {
2912                 /* Fill in RSS hash key */
2913                 for (i = 0; i < 10; i++) {
2914                         rss_key  = hash_key[(i * 4)];
2915                         rss_key |= hash_key[(i * 4) + 1] << 8;
2916                         rss_key |= hash_key[(i * 4) + 2] << 16;
2917                         rss_key |= hash_key[(i * 4) + 3] << 24;
2918                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
2919                 }
2920         }
2921
2922         /* Set configured hashing protocols in MRQC register */
2923         rss_hf = rss_conf->rss_hf;
2924         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
2925         if (rss_hf & ETH_RSS_IPV4)
2926                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
2927         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
2928                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
2929         if (rss_hf & ETH_RSS_IPV6)
2930                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
2931         if (rss_hf & ETH_RSS_IPV6_EX)
2932                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
2933         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
2934                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
2935         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
2936                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
2937         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
2938                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
2939         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
2940                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
2941         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
2942                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
2943         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
2944 }
2945
2946 int
2947 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
2948                           struct rte_eth_rss_conf *rss_conf)
2949 {
2950         struct ixgbe_hw *hw;
2951         uint32_t mrqc;
2952         uint64_t rss_hf;
2953         uint32_t mrqc_reg;
2954
2955         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2956
2957         if (!ixgbe_rss_update_sp(hw->mac.type)) {
2958                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
2959                         "NIC.");
2960                 return -ENOTSUP;
2961         }
2962         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
2963
2964         /*
2965          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
2966          *     "RSS enabling cannot be done dynamically while it must be
2967          *      preceded by a software reset"
2968          * Before changing anything, first check that the update RSS operation
2969          * does not attempt to disable RSS, if RSS was enabled at
2970          * initialization time, or does not attempt to enable RSS, if RSS was
2971          * disabled at initialization time.
2972          */
2973         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
2974         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
2975         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
2976                 if (rss_hf != 0) /* Enable RSS */
2977                         return -(EINVAL);
2978                 return 0; /* Nothing to do */
2979         }
2980         /* RSS enabled */
2981         if (rss_hf == 0) /* Disable RSS */
2982                 return -(EINVAL);
2983         ixgbe_hw_rss_hash_set(hw, rss_conf);
2984         return 0;
2985 }
2986
2987 int
2988 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2989                             struct rte_eth_rss_conf *rss_conf)
2990 {
2991         struct ixgbe_hw *hw;
2992         uint8_t *hash_key;
2993         uint32_t mrqc;
2994         uint32_t rss_key;
2995         uint64_t rss_hf;
2996         uint16_t i;
2997         uint32_t mrqc_reg;
2998         uint32_t rssrk_reg;
2999
3000         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3001         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3002         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3003         hash_key = rss_conf->rss_key;
3004         if (hash_key != NULL) {
3005                 /* Return RSS hash key */
3006                 for (i = 0; i < 10; i++) {
3007                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3008                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3009                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3010                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3011                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3012                 }
3013         }
3014
3015         /* Get RSS functions configured in MRQC register */
3016         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3017         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3018                 rss_conf->rss_hf = 0;
3019                 return 0;
3020         }
3021         rss_hf = 0;
3022         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3023                 rss_hf |= ETH_RSS_IPV4;
3024         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3025                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3026         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3027                 rss_hf |= ETH_RSS_IPV6;
3028         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3029                 rss_hf |= ETH_RSS_IPV6_EX;
3030         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3031                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3032         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3033                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3034         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3035                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3036         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3037                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3038         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3039                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3040         rss_conf->rss_hf = rss_hf;
3041         return 0;
3042 }
3043
3044 static void
3045 ixgbe_rss_configure(struct rte_eth_dev *dev)
3046 {
3047         struct rte_eth_rss_conf rss_conf;
3048         struct ixgbe_hw *hw;
3049         uint32_t reta;
3050         uint16_t i;
3051         uint16_t j;
3052         uint16_t sp_reta_size;
3053         uint32_t reta_reg;
3054
3055         PMD_INIT_FUNC_TRACE();
3056         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3057
3058         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3059
3060         /*
3061          * Fill in redirection table
3062          * The byte-swap is needed because NIC registers are in
3063          * little-endian order.
3064          */
3065         reta = 0;
3066         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3067                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3068
3069                 if (j == dev->data->nb_rx_queues)
3070                         j = 0;
3071                 reta = (reta << 8) | j;
3072                 if ((i & 3) == 3)
3073                         IXGBE_WRITE_REG(hw, reta_reg,
3074                                         rte_bswap32(reta));
3075         }
3076
3077         /*
3078          * Configure the RSS key and the RSS protocols used to compute
3079          * the RSS hash of input packets.
3080          */
3081         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3082         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3083                 ixgbe_rss_disable(dev);
3084                 return;
3085         }
3086         if (rss_conf.rss_key == NULL)
3087                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3088         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3089 }
3090
3091 #define NUM_VFTA_REGISTERS 128
3092 #define NIC_RX_BUFFER_SIZE 0x200
3093 #define X550_RX_BUFFER_SIZE 0x180
3094
3095 static void
3096 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3097 {
3098         struct rte_eth_vmdq_dcb_conf *cfg;
3099         struct ixgbe_hw *hw;
3100         enum rte_eth_nb_pools num_pools;
3101         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3102         uint16_t pbsize;
3103         uint8_t nb_tcs; /* number of traffic classes */
3104         int i;
3105
3106         PMD_INIT_FUNC_TRACE();
3107         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3108         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3109         num_pools = cfg->nb_queue_pools;
3110         /* Check we have a valid number of pools */
3111         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3112                 ixgbe_rss_disable(dev);
3113                 return;
3114         }
3115         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3116         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3117
3118         /*
3119          * RXPBSIZE
3120          * split rx buffer up into sections, each for 1 traffic class
3121          */
3122         switch (hw->mac.type) {
3123         case ixgbe_mac_X550:
3124         case ixgbe_mac_X550EM_x:
3125         case ixgbe_mac_X550EM_a:
3126                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3127                 break;
3128         default:
3129                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3130                 break;
3131         }
3132         for (i = 0; i < nb_tcs; i++) {
3133                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3134
3135                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3136                 /* clear 10 bits. */
3137                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3138                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3139         }
3140         /* zero alloc all unused TCs */
3141         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3142                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3143
3144                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3145                 /* clear 10 bits. */
3146                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3147         }
3148
3149         /* MRQC: enable vmdq and dcb */
3150         mrqc = (num_pools == ETH_16_POOLS) ?
3151                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3152         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3153
3154         /* PFVTCTL: turn on virtualisation and set the default pool */
3155         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3156         if (cfg->enable_default_pool) {
3157                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3158         } else {
3159                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3160         }
3161
3162         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3163
3164         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3165         queue_mapping = 0;
3166         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3167                 /*
3168                  * mapping is done with 3 bits per priority,
3169                  * so shift by i*3 each time
3170                  */
3171                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3172
3173         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3174
3175         /* RTRPCS: DCB related */
3176         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3177
3178         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3179         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3180         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3181         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3182
3183         /* VFTA - enable all vlan filters */
3184         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3185                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3186         }
3187
3188         /* VFRE: pool enabling for receive - 16 or 32 */
3189         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3190                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3191
3192         /*
3193          * MPSAR - allow pools to read specific mac addresses
3194          * In this case, all pools should be able to read from mac addr 0
3195          */
3196         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3197         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3198
3199         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3200         for (i = 0; i < cfg->nb_pool_maps; i++) {
3201                 /* set vlan id in VF register and set the valid bit */
3202                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3203                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3204                 /*
3205                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3206                  * pools, we only need to use the first half of the register
3207                  * i.e. bits 0-31
3208                  */
3209                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3210         }
3211 }
3212
3213 /**
3214  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3215  * @hw: pointer to hardware structure
3216  * @dcb_config: pointer to ixgbe_dcb_config structure
3217  */
3218 static void
3219 ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw,
3220                        struct ixgbe_dcb_config *dcb_config)
3221 {
3222         uint32_t reg;
3223         uint32_t q;
3224
3225         PMD_INIT_FUNC_TRACE();
3226         if (hw->mac.type != ixgbe_mac_82598EB) {
3227                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3228                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3229                 reg |= IXGBE_RTTDCS_ARBDIS;
3230                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3231
3232                 /* Enable DCB for Tx with 8 TCs */
3233                 if (dcb_config->num_tcs.pg_tcs == 8) {
3234                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3235                 } else {
3236                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3237                 }
3238                 if (dcb_config->vt_mode)
3239                         reg |= IXGBE_MTQC_VT_ENA;
3240                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3241
3242                 /* Disable drop for all queues */
3243                 for (q = 0; q < 128; q++)
3244                         IXGBE_WRITE_REG(hw, IXGBE_QDE,
3245                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3246
3247                 /* Enable the Tx desc arbiter */
3248                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3249                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3250                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3251
3252                 /* Enable Security TX Buffer IFG for DCB */
3253                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3254                 reg |= IXGBE_SECTX_DCB;
3255                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3256         }
3257 }
3258
3259 /**
3260  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3261  * @dev: pointer to rte_eth_dev structure
3262  * @dcb_config: pointer to ixgbe_dcb_config structure
3263  */
3264 static void
3265 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3266                         struct ixgbe_dcb_config *dcb_config)
3267 {
3268         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3269                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3270         struct ixgbe_hw *hw =
3271                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3272
3273         PMD_INIT_FUNC_TRACE();
3274         if (hw->mac.type != ixgbe_mac_82598EB)
3275                 /*PF VF Transmit Enable*/
3276                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3277                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3278
3279         /*Configure general DCB TX parameters*/
3280         ixgbe_dcb_tx_hw_config(hw, dcb_config);
3281 }
3282
3283 static void
3284 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3285                         struct ixgbe_dcb_config *dcb_config)
3286 {
3287         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3288                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3289         struct ixgbe_dcb_tc_config *tc;
3290         uint8_t i, j;
3291
3292         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3293         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3294                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3295                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3296         } else {
3297                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3298                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3299         }
3300         /* User Priority to Traffic Class mapping */
3301         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3302                 j = vmdq_rx_conf->dcb_tc[i];
3303                 tc = &dcb_config->tc_config[j];
3304                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3305                                                 (uint8_t)(1 << j);
3306         }
3307 }
3308
3309 static void
3310 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3311                         struct ixgbe_dcb_config *dcb_config)
3312 {
3313         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3314                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3315         struct ixgbe_dcb_tc_config *tc;
3316         uint8_t i, j;
3317
3318         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3319         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3320                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3321                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3322         } else {
3323                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3324                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3325         }
3326
3327         /* User Priority to Traffic Class mapping */
3328         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3329                 j = vmdq_tx_conf->dcb_tc[i];
3330                 tc = &dcb_config->tc_config[j];
3331                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3332                                                 (uint8_t)(1 << j);
3333         }
3334 }
3335
3336 static void
3337 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3338                 struct ixgbe_dcb_config *dcb_config)
3339 {
3340         struct rte_eth_dcb_rx_conf *rx_conf =
3341                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3342         struct ixgbe_dcb_tc_config *tc;
3343         uint8_t i, j;
3344
3345         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3346         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3347
3348         /* User Priority to Traffic Class mapping */
3349         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3350                 j = rx_conf->dcb_tc[i];
3351                 tc = &dcb_config->tc_config[j];
3352                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3353                                                 (uint8_t)(1 << j);
3354         }
3355 }
3356
3357 static void
3358 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3359                 struct ixgbe_dcb_config *dcb_config)
3360 {
3361         struct rte_eth_dcb_tx_conf *tx_conf =
3362                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3363         struct ixgbe_dcb_tc_config *tc;
3364         uint8_t i, j;
3365
3366         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3367         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3368
3369         /* User Priority to Traffic Class mapping */
3370         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3371                 j = tx_conf->dcb_tc[i];
3372                 tc = &dcb_config->tc_config[j];
3373                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3374                                                 (uint8_t)(1 << j);
3375         }
3376 }
3377
3378 /**
3379  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3380  * @hw: pointer to hardware structure
3381  * @dcb_config: pointer to ixgbe_dcb_config structure
3382  */
3383 static void
3384 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3385                struct ixgbe_dcb_config *dcb_config)
3386 {
3387         uint32_t reg;
3388         uint32_t vlanctrl;
3389         uint8_t i;
3390
3391         PMD_INIT_FUNC_TRACE();
3392         /*
3393          * Disable the arbiter before changing parameters
3394          * (always enable recycle mode; WSP)
3395          */
3396         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3397         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3398
3399         if (hw->mac.type != ixgbe_mac_82598EB) {
3400                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3401                 if (dcb_config->num_tcs.pg_tcs == 4) {
3402                         if (dcb_config->vt_mode)
3403                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3404                                         IXGBE_MRQC_VMDQRT4TCEN;
3405                         else {
3406                                 /* no matter the mode is DCB or DCB_RSS, just
3407                                  * set the MRQE to RSSXTCEN. RSS is controlled
3408                                  * by RSS_FIELD
3409                                  */
3410                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3411                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3412                                         IXGBE_MRQC_RTRSS4TCEN;
3413                         }
3414                 }
3415                 if (dcb_config->num_tcs.pg_tcs == 8) {
3416                         if (dcb_config->vt_mode)
3417                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3418                                         IXGBE_MRQC_VMDQRT8TCEN;
3419                         else {
3420                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3421                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3422                                         IXGBE_MRQC_RTRSS8TCEN;
3423                         }
3424                 }
3425
3426                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3427         }
3428
3429         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3430         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3431         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3432         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3433
3434         /* VFTA - enable all vlan filters */
3435         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3436                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3437         }
3438
3439         /*
3440          * Configure Rx packet plane (recycle mode; WSP) and
3441          * enable arbiter
3442          */
3443         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3444         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3445 }
3446
3447 static void
3448 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3449                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3450 {
3451         switch (hw->mac.type) {
3452         case ixgbe_mac_82598EB:
3453                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3454                 break;
3455         case ixgbe_mac_82599EB:
3456         case ixgbe_mac_X540:
3457         case ixgbe_mac_X550:
3458         case ixgbe_mac_X550EM_x:
3459         case ixgbe_mac_X550EM_a:
3460                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3461                                                   tsa, map);
3462                 break;
3463         default:
3464                 break;
3465         }
3466 }
3467
3468 static void
3469 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3470                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3471 {
3472         switch (hw->mac.type) {
3473         case ixgbe_mac_82598EB:
3474                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3475                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3476                 break;
3477         case ixgbe_mac_82599EB:
3478         case ixgbe_mac_X540:
3479         case ixgbe_mac_X550:
3480         case ixgbe_mac_X550EM_x:
3481         case ixgbe_mac_X550EM_a:
3482                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3483                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3484                 break;
3485         default:
3486                 break;
3487         }
3488 }
3489
3490 #define DCB_RX_CONFIG  1
3491 #define DCB_TX_CONFIG  1
3492 #define DCB_TX_PB      1024
3493 /**
3494  * ixgbe_dcb_hw_configure - Enable DCB and configure
3495  * general DCB in VT mode and non-VT mode parameters
3496  * @dev: pointer to rte_eth_dev structure
3497  * @dcb_config: pointer to ixgbe_dcb_config structure
3498  */
3499 static int
3500 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3501                         struct ixgbe_dcb_config *dcb_config)
3502 {
3503         int     ret = 0;
3504         uint8_t i, pfc_en, nb_tcs;
3505         uint16_t pbsize, rx_buffer_size;
3506         uint8_t config_dcb_rx = 0;
3507         uint8_t config_dcb_tx = 0;
3508         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3509         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3510         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3511         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3512         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3513         struct ixgbe_dcb_tc_config *tc;
3514         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3515         struct ixgbe_hw *hw =
3516                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3517
3518         switch (dev->data->dev_conf.rxmode.mq_mode) {
3519         case ETH_MQ_RX_VMDQ_DCB:
3520                 dcb_config->vt_mode = true;
3521                 if (hw->mac.type != ixgbe_mac_82598EB) {
3522                         config_dcb_rx = DCB_RX_CONFIG;
3523                         /*
3524                          *get dcb and VT rx configuration parameters
3525                          *from rte_eth_conf
3526                          */
3527                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3528                         /*Configure general VMDQ and DCB RX parameters*/
3529                         ixgbe_vmdq_dcb_configure(dev);
3530                 }
3531                 break;
3532         case ETH_MQ_RX_DCB:
3533         case ETH_MQ_RX_DCB_RSS:
3534                 dcb_config->vt_mode = false;
3535                 config_dcb_rx = DCB_RX_CONFIG;
3536                 /* Get dcb TX configuration parameters from rte_eth_conf */
3537                 ixgbe_dcb_rx_config(dev, dcb_config);
3538                 /*Configure general DCB RX parameters*/
3539                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3540                 break;
3541         default:
3542                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3543                 break;
3544         }
3545         switch (dev->data->dev_conf.txmode.mq_mode) {
3546         case ETH_MQ_TX_VMDQ_DCB:
3547                 dcb_config->vt_mode = true;
3548                 config_dcb_tx = DCB_TX_CONFIG;
3549                 /* get DCB and VT TX configuration parameters
3550                  * from rte_eth_conf
3551                  */
3552                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3553                 /*Configure general VMDQ and DCB TX parameters*/
3554                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3555                 break;
3556
3557         case ETH_MQ_TX_DCB:
3558                 dcb_config->vt_mode = false;
3559                 config_dcb_tx = DCB_TX_CONFIG;
3560                 /*get DCB TX configuration parameters from rte_eth_conf*/
3561                 ixgbe_dcb_tx_config(dev, dcb_config);
3562                 /*Configure general DCB TX parameters*/
3563                 ixgbe_dcb_tx_hw_config(hw, dcb_config);
3564                 break;
3565         default:
3566                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3567                 break;
3568         }
3569
3570         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3571         /* Unpack map */
3572         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3573         if (nb_tcs == ETH_4_TCS) {
3574                 /* Avoid un-configured priority mapping to TC0 */
3575                 uint8_t j = 4;
3576                 uint8_t mask = 0xFF;
3577
3578                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3579                         mask = (uint8_t)(mask & (~(1 << map[i])));
3580                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3581                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3582                                 map[j++] = i;
3583                         mask >>= 1;
3584                 }
3585                 /* Re-configure 4 TCs BW */
3586                 for (i = 0; i < nb_tcs; i++) {
3587                         tc = &dcb_config->tc_config[i];
3588                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3589                                                 (uint8_t)(100 / nb_tcs);
3590                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3591                                                 (uint8_t)(100 / nb_tcs);
3592                 }
3593                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3594                         tc = &dcb_config->tc_config[i];
3595                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3596                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3597                 }
3598         }
3599
3600         switch (hw->mac.type) {
3601         case ixgbe_mac_X550:
3602         case ixgbe_mac_X550EM_x:
3603         case ixgbe_mac_X550EM_a:
3604                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3605                 break;
3606         default:
3607                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3608                 break;
3609         }
3610
3611         if (config_dcb_rx) {
3612                 /* Set RX buffer size */
3613                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3614                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3615
3616                 for (i = 0; i < nb_tcs; i++) {
3617                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3618                 }
3619                 /* zero alloc all unused TCs */
3620                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3621                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3622                 }
3623         }
3624         if (config_dcb_tx) {
3625                 /* Only support an equally distributed
3626                  *  Tx packet buffer strategy.
3627                  */
3628                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3629                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3630
3631                 for (i = 0; i < nb_tcs; i++) {
3632                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3633                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3634                 }
3635                 /* Clear unused TCs, if any, to zero buffer size*/
3636                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3637                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3638                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3639                 }
3640         }
3641
3642         /*Calculates traffic class credits*/
3643         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3644                                 IXGBE_DCB_TX_CONFIG);
3645         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3646                                 IXGBE_DCB_RX_CONFIG);
3647
3648         if (config_dcb_rx) {
3649                 /* Unpack CEE standard containers */
3650                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3651                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3652                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3653                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3654                 /* Configure PG(ETS) RX */
3655                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3656         }
3657
3658         if (config_dcb_tx) {
3659                 /* Unpack CEE standard containers */
3660                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3661                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3662                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3663                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3664                 /* Configure PG(ETS) TX */
3665                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3666         }
3667
3668         /*Configure queue statistics registers*/
3669         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3670
3671         /* Check if the PFC is supported */
3672         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3673                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3674                 for (i = 0; i < nb_tcs; i++) {
3675                         /*
3676                         * If the TC count is 8,and the default high_water is 48,
3677                         * the low_water is 16 as default.
3678                         */
3679                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3680                         hw->fc.low_water[i] = pbsize / 4;
3681                         /* Enable pfc for this TC */
3682                         tc = &dcb_config->tc_config[i];
3683                         tc->pfc = ixgbe_dcb_pfc_enabled;
3684                 }
3685                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3686                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3687                         pfc_en &= 0x0F;
3688                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3689         }
3690
3691         return ret;
3692 }
3693
3694 /**
3695  * ixgbe_configure_dcb - Configure DCB  Hardware
3696  * @dev: pointer to rte_eth_dev
3697  */
3698 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3699 {
3700         struct ixgbe_dcb_config *dcb_cfg =
3701                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3702         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3703
3704         PMD_INIT_FUNC_TRACE();
3705
3706         /* check support mq_mode for DCB */
3707         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3708             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3709             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3710                 return;
3711
3712         if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES)
3713                 return;
3714
3715         /** Configure DCB hardware **/
3716         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3717 }
3718
3719 /*
3720  * VMDq only support for 10 GbE NIC.
3721  */
3722 static void
3723 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3724 {
3725         struct rte_eth_vmdq_rx_conf *cfg;
3726         struct ixgbe_hw *hw;
3727         enum rte_eth_nb_pools num_pools;
3728         uint32_t mrqc, vt_ctl, vlanctrl;
3729         uint32_t vmolr = 0;
3730         int i;
3731
3732         PMD_INIT_FUNC_TRACE();
3733         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3734         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3735         num_pools = cfg->nb_queue_pools;
3736
3737         ixgbe_rss_disable(dev);
3738
3739         /* MRQC: enable vmdq */
3740         mrqc = IXGBE_MRQC_VMDQEN;
3741         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3742
3743         /* PFVTCTL: turn on virtualisation and set the default pool */
3744         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3745         if (cfg->enable_default_pool)
3746                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3747         else
3748                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3749
3750         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3751
3752         for (i = 0; i < (int)num_pools; i++) {
3753                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3754                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3755         }
3756
3757         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3758         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3759         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3760         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3761
3762         /* VFTA - enable all vlan filters */
3763         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3764                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3765
3766         /* VFRE: pool enabling for receive - 64 */
3767         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3768         if (num_pools == ETH_64_POOLS)
3769                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3770
3771         /*
3772          * MPSAR - allow pools to read specific mac addresses
3773          * In this case, all pools should be able to read from mac addr 0
3774          */
3775         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3776         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3777
3778         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3779         for (i = 0; i < cfg->nb_pool_maps; i++) {
3780                 /* set vlan id in VF register and set the valid bit */
3781                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3782                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3783                 /*
3784                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3785                  * pools, we only need to use the first half of the register
3786                  * i.e. bits 0-31
3787                  */
3788                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3789                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
3790                                         (cfg->pool_map[i].pools & UINT32_MAX));
3791                 else
3792                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
3793                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3794
3795         }
3796
3797         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3798         if (cfg->enable_loop_back) {
3799                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3800                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3801                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3802         }
3803
3804         IXGBE_WRITE_FLUSH(hw);
3805 }
3806
3807 /*
3808  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3809  * @hw: pointer to hardware structure
3810  */
3811 static void
3812 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3813 {
3814         uint32_t reg;
3815         uint32_t q;
3816
3817         PMD_INIT_FUNC_TRACE();
3818         /*PF VF Transmit Enable*/
3819         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3820         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3821
3822         /* Disable the Tx desc arbiter so that MTQC can be changed */
3823         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3824         reg |= IXGBE_RTTDCS_ARBDIS;
3825         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3826
3827         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3828         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3829
3830         /* Disable drop for all queues */
3831         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3832                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3833                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3834
3835         /* Enable the Tx desc arbiter */
3836         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3837         reg &= ~IXGBE_RTTDCS_ARBDIS;
3838         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3839
3840         IXGBE_WRITE_FLUSH(hw);
3841 }
3842
3843 static int __attribute__((cold))
3844 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
3845 {
3846         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
3847         uint64_t dma_addr;
3848         unsigned int i;
3849
3850         /* Initialize software ring entries */
3851         for (i = 0; i < rxq->nb_rx_desc; i++) {
3852                 volatile union ixgbe_adv_rx_desc *rxd;
3853                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
3854
3855                 if (mbuf == NULL) {
3856                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3857                                      (unsigned) rxq->queue_id);
3858                         return -ENOMEM;
3859                 }
3860
3861                 rte_mbuf_refcnt_set(mbuf, 1);
3862                 mbuf->next = NULL;
3863                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3864                 mbuf->nb_segs = 1;
3865                 mbuf->port = rxq->port_id;
3866
3867                 dma_addr =
3868                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
3869                 rxd = &rxq->rx_ring[i];
3870                 rxd->read.hdr_addr = 0;
3871                 rxd->read.pkt_addr = dma_addr;
3872                 rxe[i].mbuf = mbuf;
3873         }
3874
3875         return 0;
3876 }
3877
3878 static int
3879 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
3880 {
3881         struct ixgbe_hw *hw;
3882         uint32_t mrqc;
3883
3884         ixgbe_rss_configure(dev);
3885
3886         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3887
3888         /* MRQC: enable VF RSS */
3889         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
3890         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
3891         switch (RTE_ETH_DEV_SRIOV(dev).active) {
3892         case ETH_64_POOLS:
3893                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
3894                 break;
3895
3896         case ETH_32_POOLS:
3897                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
3898                 break;
3899
3900         default:
3901                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
3902                 return -EINVAL;
3903         }
3904
3905         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3906
3907         return 0;
3908 }
3909
3910 static int
3911 ixgbe_config_vf_default(struct rte_eth_dev *dev)
3912 {
3913         struct ixgbe_hw *hw =
3914                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3915
3916         switch (RTE_ETH_DEV_SRIOV(dev).active) {
3917         case ETH_64_POOLS:
3918                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3919                         IXGBE_MRQC_VMDQEN);
3920                 break;
3921
3922         case ETH_32_POOLS:
3923                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3924                         IXGBE_MRQC_VMDQRT4TCEN);
3925                 break;
3926
3927         case ETH_16_POOLS:
3928                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
3929                         IXGBE_MRQC_VMDQRT8TCEN);
3930                 break;
3931         default:
3932                 PMD_INIT_LOG(ERR,
3933                         "invalid pool number in IOV mode");
3934                 break;
3935         }
3936         return 0;
3937 }
3938
3939 static int
3940 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
3941 {
3942         struct ixgbe_hw *hw =
3943                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3944
3945         if (hw->mac.type == ixgbe_mac_82598EB)
3946                 return 0;
3947
3948         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3949                 /*
3950                  * SRIOV inactive scheme
3951                  * any DCB/RSS w/o VMDq multi-queue setting
3952                  */
3953                 switch (dev->data->dev_conf.rxmode.mq_mode) {
3954                 case ETH_MQ_RX_RSS:
3955                 case ETH_MQ_RX_DCB_RSS:
3956                 case ETH_MQ_RX_VMDQ_RSS:
3957                         ixgbe_rss_configure(dev);
3958                         break;
3959
3960                 case ETH_MQ_RX_VMDQ_DCB:
3961                         ixgbe_vmdq_dcb_configure(dev);
3962                         break;
3963
3964                 case ETH_MQ_RX_VMDQ_ONLY:
3965                         ixgbe_vmdq_rx_hw_configure(dev);
3966                         break;
3967
3968                 case ETH_MQ_RX_NONE:
3969                 default:
3970                         /* if mq_mode is none, disable rss mode.*/
3971                         ixgbe_rss_disable(dev);
3972                         break;
3973                 }
3974         } else {
3975                 /*
3976                  * SRIOV active scheme
3977                  * Support RSS together with VMDq & SRIOV
3978                  */
3979                 switch (dev->data->dev_conf.rxmode.mq_mode) {
3980                 case ETH_MQ_RX_RSS:
3981                 case ETH_MQ_RX_VMDQ_RSS:
3982                         ixgbe_config_vf_rss(dev);
3983                         break;
3984
3985                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
3986                 case ETH_MQ_RX_VMDQ_DCB:
3987                 case ETH_MQ_RX_VMDQ_DCB_RSS:
3988                         PMD_INIT_LOG(ERR,
3989                                 "Could not support DCB with VMDq & SRIOV");
3990                         return -1;
3991                 default:
3992                         ixgbe_config_vf_default(dev);
3993                         break;
3994                 }
3995         }
3996
3997         return 0;
3998 }
3999
4000 static int
4001 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4002 {
4003         struct ixgbe_hw *hw =
4004                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4005         uint32_t mtqc;
4006         uint32_t rttdcs;
4007
4008         if (hw->mac.type == ixgbe_mac_82598EB)
4009                 return 0;
4010
4011         /* disable arbiter before setting MTQC */
4012         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4013         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4014         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4015
4016         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4017                 /*
4018                  * SRIOV inactive scheme
4019                  * any DCB w/o VMDq multi-queue setting
4020                  */
4021                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4022                         ixgbe_vmdq_tx_hw_configure(hw);
4023                 else {
4024                         mtqc = IXGBE_MTQC_64Q_1PB;
4025                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4026                 }
4027         } else {
4028                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4029
4030                 /*
4031                  * SRIOV active scheme
4032                  * FIXME if support DCB together with VMDq & SRIOV
4033                  */
4034                 case ETH_64_POOLS:
4035                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4036                         break;
4037                 case ETH_32_POOLS:
4038                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4039                         break;
4040                 case ETH_16_POOLS:
4041                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4042                                 IXGBE_MTQC_8TC_8TQ;
4043                         break;
4044                 default:
4045                         mtqc = IXGBE_MTQC_64Q_1PB;
4046                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4047                 }
4048                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4049         }
4050
4051         /* re-enable arbiter */
4052         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4053         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4054
4055         return 0;
4056 }
4057
4058 /**
4059  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4060  *
4061  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4062  * spec rev. 3.0 chapter 8.2.3.8.13.
4063  *
4064  * @pool Memory pool of the Rx queue
4065  */
4066 static inline uint32_t
4067 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4068 {
4069         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4070
4071         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4072         uint16_t maxdesc =
4073                 IPV4_MAX_PKT_LEN /
4074                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4075
4076         if (maxdesc >= 16)
4077                 return IXGBE_RSCCTL_MAXDESC_16;
4078         else if (maxdesc >= 8)
4079                 return IXGBE_RSCCTL_MAXDESC_8;
4080         else if (maxdesc >= 4)
4081                 return IXGBE_RSCCTL_MAXDESC_4;
4082         else
4083                 return IXGBE_RSCCTL_MAXDESC_1;
4084 }
4085
4086 /**
4087  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4088  * interrupt
4089  *
4090  * (Taken from FreeBSD tree)
4091  * (yes this is all very magic and confusing :)
4092  *
4093  * @dev port handle
4094  * @entry the register array entry
4095  * @vector the MSIX vector for this queue
4096  * @type RX/TX/MISC
4097  */
4098 static void
4099 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4100 {
4101         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4102         u32 ivar, index;
4103
4104         vector |= IXGBE_IVAR_ALLOC_VAL;
4105
4106         switch (hw->mac.type) {
4107
4108         case ixgbe_mac_82598EB:
4109                 if (type == -1)
4110                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4111                 else
4112                         entry += (type * 64);
4113                 index = (entry >> 2) & 0x1F;
4114                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4115                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4116                 ivar |= (vector << (8 * (entry & 0x3)));
4117                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4118                 break;
4119
4120         case ixgbe_mac_82599EB:
4121         case ixgbe_mac_X540:
4122                 if (type == -1) { /* MISC IVAR */
4123                         index = (entry & 1) * 8;
4124                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4125                         ivar &= ~(0xFF << index);
4126                         ivar |= (vector << index);
4127                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4128                 } else {        /* RX/TX IVARS */
4129                         index = (16 * (entry & 1)) + (8 * type);
4130                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4131                         ivar &= ~(0xFF << index);
4132                         ivar |= (vector << index);
4133                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4134                 }
4135
4136                 break;
4137
4138         default:
4139                 break;
4140         }
4141 }
4142
4143 void __attribute__((cold))
4144 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4145 {
4146         uint16_t i, rx_using_sse;
4147         struct ixgbe_adapter *adapter =
4148                 (struct ixgbe_adapter *)dev->data->dev_private;
4149
4150         /*
4151          * In order to allow Vector Rx there are a few configuration
4152          * conditions to be met and Rx Bulk Allocation should be allowed.
4153          */
4154         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4155             !adapter->rx_bulk_alloc_allowed) {
4156                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4157                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4158                                     "not enabled",
4159                              dev->data->port_id);
4160
4161                 adapter->rx_vec_allowed = false;
4162         }
4163
4164         /*
4165          * Initialize the appropriate LRO callback.
4166          *
4167          * If all queues satisfy the bulk allocation preconditions
4168          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4169          * Otherwise use a single allocation version.
4170          */
4171         if (dev->data->lro) {
4172                 if (adapter->rx_bulk_alloc_allowed) {
4173                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4174                                            "allocation version");
4175                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4176                 } else {
4177                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4178                                            "allocation version");
4179                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4180                 }
4181         } else if (dev->data->scattered_rx) {
4182                 /*
4183                  * Set the non-LRO scattered callback: there are Vector and
4184                  * single allocation versions.
4185                  */
4186                 if (adapter->rx_vec_allowed) {
4187                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4188                                             "callback (port=%d).",
4189                                      dev->data->port_id);
4190
4191                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4192                 } else if (adapter->rx_bulk_alloc_allowed) {
4193                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4194                                            "allocation callback (port=%d).",
4195                                      dev->data->port_id);
4196                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4197                 } else {
4198                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4199                                             "single allocation) "
4200                                             "Scattered Rx callback "
4201                                             "(port=%d).",
4202                                      dev->data->port_id);
4203
4204                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4205                 }
4206         /*
4207          * Below we set "simple" callbacks according to port/queues parameters.
4208          * If parameters allow we are going to choose between the following
4209          * callbacks:
4210          *    - Vector
4211          *    - Bulk Allocation
4212          *    - Single buffer allocation (the simplest one)
4213          */
4214         } else if (adapter->rx_vec_allowed) {
4215                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4216                                     "burst size no less than %d (port=%d).",
4217                              RTE_IXGBE_DESCS_PER_LOOP,
4218                              dev->data->port_id);
4219
4220                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4221         } else if (adapter->rx_bulk_alloc_allowed) {
4222                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4223                                     "satisfied. Rx Burst Bulk Alloc function "
4224                                     "will be used on port=%d.",
4225                              dev->data->port_id);
4226
4227                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4228         } else {
4229                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4230                                     "satisfied, or Scattered Rx is requested "
4231                                     "(port=%d).",
4232                              dev->data->port_id);
4233
4234                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4235         }
4236
4237         /* Propagate information about RX function choice through all queues. */
4238
4239         rx_using_sse =
4240                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4241                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4242
4243         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4244                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4245
4246                 rxq->rx_using_sse = rx_using_sse;
4247         }
4248 }
4249
4250 /**
4251  * ixgbe_set_rsc - configure RSC related port HW registers
4252  *
4253  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4254  * of 82599 Spec (x540 configuration is virtually the same).
4255  *
4256  * @dev port handle
4257  *
4258  * Returns 0 in case of success or a non-zero error code
4259  */
4260 static int
4261 ixgbe_set_rsc(struct rte_eth_dev *dev)
4262 {
4263         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4264         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4265         struct rte_eth_dev_info dev_info = { 0 };
4266         bool rsc_capable = false;
4267         uint16_t i;
4268         uint32_t rdrxctl;
4269
4270         /* Sanity check */
4271         dev->dev_ops->dev_infos_get(dev, &dev_info);
4272         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4273                 rsc_capable = true;
4274
4275         if (!rsc_capable && rx_conf->enable_lro) {
4276                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4277                                    "support it");
4278                 return -EINVAL;
4279         }
4280
4281         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4282
4283         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4284                 /*
4285                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4286                  * 3.0 RSC configuration requires HW CRC stripping being
4287                  * enabled. If user requested both HW CRC stripping off
4288                  * and RSC on - return an error.
4289                  */
4290                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4291                                     "is disabled");
4292                 return -EINVAL;
4293         }
4294
4295         /* RFCTL configuration  */
4296         if (rsc_capable) {
4297                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4298
4299                 if (rx_conf->enable_lro)
4300                         /*
4301                          * Since NFS packets coalescing is not supported - clear
4302                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4303                          * enabled.
4304                          */
4305                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4306                                    IXGBE_RFCTL_NFSR_DIS);
4307                 else
4308                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4309
4310                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4311         }
4312
4313         /* If LRO hasn't been requested - we are done here. */
4314         if (!rx_conf->enable_lro)
4315                 return 0;
4316
4317         /* Set RDRXCTL.RSCACKC bit */
4318         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4319         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4320         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4321
4322         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4323         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4324                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4325                 uint32_t srrctl =
4326                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4327                 uint32_t rscctl =
4328                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4329                 uint32_t psrtype =
4330                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4331                 uint32_t eitr =
4332                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4333
4334                 /*
4335                  * ixgbe PMD doesn't support header-split at the moment.
4336                  *
4337                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4338                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4339                  * should be configured even if header split is not
4340                  * enabled. We will configure it 128 bytes following the
4341                  * recommendation in the spec.
4342                  */
4343                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4344                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4345                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4346
4347                 /*
4348                  * TODO: Consider setting the Receive Descriptor Minimum
4349                  * Threshold Size for an RSC case. This is not an obviously
4350                  * beneficiary option but the one worth considering...
4351                  */
4352
4353                 rscctl |= IXGBE_RSCCTL_RSCEN;
4354                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4355                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4356
4357                 /*
4358                  * RSC: Set ITR interval corresponding to 2K ints/s.
4359                  *
4360                  * Full-sized RSC aggregations for a 10Gb/s link will
4361                  * arrive at about 20K aggregation/s rate.
4362                  *
4363                  * 2K inst/s rate will make only 10% of the
4364                  * aggregations to be closed due to the interrupt timer
4365                  * expiration for a streaming at wire-speed case.
4366                  *
4367                  * For a sparse streaming case this setting will yield
4368                  * at most 500us latency for a single RSC aggregation.
4369                  */
4370                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4371                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4372
4373                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4374                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4375                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4376                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4377
4378                 /*
4379                  * RSC requires the mapping of the queue to the
4380                  * interrupt vector.
4381                  */
4382                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4383         }
4384
4385         dev->data->lro = 1;
4386
4387         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4388
4389         return 0;
4390 }
4391
4392 /*
4393  * Initializes Receive Unit.
4394  */
4395 int __attribute__((cold))
4396 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4397 {
4398         struct ixgbe_hw     *hw;
4399         struct ixgbe_rx_queue *rxq;
4400         uint64_t bus_addr;
4401         uint32_t rxctrl;
4402         uint32_t fctrl;
4403         uint32_t hlreg0;
4404         uint32_t maxfrs;
4405         uint32_t srrctl;
4406         uint32_t rdrxctl;
4407         uint32_t rxcsum;
4408         uint16_t buf_size;
4409         uint16_t i;
4410         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4411         int rc;
4412
4413         PMD_INIT_FUNC_TRACE();
4414         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4415
4416         /*
4417          * Make sure receives are disabled while setting
4418          * up the RX context (registers, descriptor rings, etc.).
4419          */
4420         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4421         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4422
4423         /* Enable receipt of broadcasted frames */
4424         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4425         fctrl |= IXGBE_FCTRL_BAM;
4426         fctrl |= IXGBE_FCTRL_DPF;
4427         fctrl |= IXGBE_FCTRL_PMCF;
4428         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4429
4430         /*
4431          * Configure CRC stripping, if any.
4432          */
4433         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4434         if (rx_conf->hw_strip_crc)
4435                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4436         else
4437                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4438
4439         /*
4440          * Configure jumbo frame support, if any.
4441          */
4442         if (rx_conf->jumbo_frame == 1) {
4443                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4444                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4445                 maxfrs &= 0x0000FFFF;
4446                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4447                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4448         } else
4449                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4450
4451         /*
4452          * If loopback mode is configured for 82599, set LPBK bit.
4453          */
4454         if (hw->mac.type == ixgbe_mac_82599EB &&
4455                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4456                 hlreg0 |= IXGBE_HLREG0_LPBK;
4457         else
4458                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4459
4460         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4461
4462         /* Setup RX queues */
4463         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4464                 rxq = dev->data->rx_queues[i];
4465
4466                 /*
4467                  * Reset crc_len in case it was changed after queue setup by a
4468                  * call to configure.
4469                  */
4470                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4471
4472                 /* Setup the Base and Length of the Rx Descriptor Rings */
4473                 bus_addr = rxq->rx_ring_phys_addr;
4474                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4475                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4476                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4477                                 (uint32_t)(bus_addr >> 32));
4478                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4479                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4480                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4481                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4482
4483                 /* Configure the SRRCTL register */
4484 #ifdef RTE_HEADER_SPLIT_ENABLE
4485                 /*
4486                  * Configure Header Split
4487                  */
4488                 if (rx_conf->header_split) {
4489                         if (hw->mac.type == ixgbe_mac_82599EB) {
4490                                 /* Must setup the PSRTYPE register */
4491                                 uint32_t psrtype;
4492
4493                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4494                                         IXGBE_PSRTYPE_UDPHDR   |
4495                                         IXGBE_PSRTYPE_IPV4HDR  |
4496                                         IXGBE_PSRTYPE_IPV6HDR;
4497                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4498                         }
4499                         srrctl = ((rx_conf->split_hdr_size <<
4500                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4501                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4502                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4503                 } else
4504 #endif
4505                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4506
4507                 /* Set if packets are dropped when no descriptors available */
4508                 if (rxq->drop_en)
4509                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4510
4511                 /*
4512                  * Configure the RX buffer size in the BSIZEPACKET field of
4513                  * the SRRCTL register of the queue.
4514                  * The value is in 1 KB resolution. Valid values can be from
4515                  * 1 KB to 16 KB.
4516                  */
4517                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4518                         RTE_PKTMBUF_HEADROOM);
4519                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4520                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4521
4522                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4523
4524                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4525                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4526
4527                 /* It adds dual VLAN length for supporting dual VLAN */
4528                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4529                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4530                         dev->data->scattered_rx = 1;
4531         }
4532
4533         if (rx_conf->enable_scatter)
4534                 dev->data->scattered_rx = 1;
4535
4536         /*
4537          * Device configured with multiple RX queues.
4538          */
4539         ixgbe_dev_mq_rx_configure(dev);
4540
4541         /*
4542          * Setup the Checksum Register.
4543          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4544          * Enable IP/L4 checkum computation by hardware if requested to do so.
4545          */
4546         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4547         rxcsum |= IXGBE_RXCSUM_PCSD;
4548         if (rx_conf->hw_ip_checksum)
4549                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4550         else
4551                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4552
4553         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4554
4555         if (hw->mac.type == ixgbe_mac_82599EB ||
4556             hw->mac.type == ixgbe_mac_X540) {
4557                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4558                 if (rx_conf->hw_strip_crc)
4559                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4560                 else
4561                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4562                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4563                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4564         }
4565
4566         rc = ixgbe_set_rsc(dev);
4567         if (rc)
4568                 return rc;
4569
4570         ixgbe_set_rx_function(dev);
4571
4572         return 0;
4573 }
4574
4575 /*
4576  * Initializes Transmit Unit.
4577  */
4578 void __attribute__((cold))
4579 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4580 {
4581         struct ixgbe_hw     *hw;
4582         struct ixgbe_tx_queue *txq;
4583         uint64_t bus_addr;
4584         uint32_t hlreg0;
4585         uint32_t txctrl;
4586         uint16_t i;
4587
4588         PMD_INIT_FUNC_TRACE();
4589         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4590
4591         /* Enable TX CRC (checksum offload requirement) and hw padding
4592          * (TSO requirement)
4593          */
4594         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4595         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4596         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4597
4598         /* Setup the Base and Length of the Tx Descriptor Rings */
4599         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4600                 txq = dev->data->tx_queues[i];
4601
4602                 bus_addr = txq->tx_ring_phys_addr;
4603                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4604                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4605                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4606                                 (uint32_t)(bus_addr >> 32));
4607                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4608                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4609                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4610                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4611                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4612
4613                 /*
4614                  * Disable Tx Head Writeback RO bit, since this hoses
4615                  * bookkeeping if things aren't delivered in order.
4616                  */
4617                 switch (hw->mac.type) {
4618                 case ixgbe_mac_82598EB:
4619                         txctrl = IXGBE_READ_REG(hw,
4620                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4621                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4622                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4623                                         txctrl);
4624                         break;
4625
4626                 case ixgbe_mac_82599EB:
4627                 case ixgbe_mac_X540:
4628                 case ixgbe_mac_X550:
4629                 case ixgbe_mac_X550EM_x:
4630                 case ixgbe_mac_X550EM_a:
4631                 default:
4632                         txctrl = IXGBE_READ_REG(hw,
4633                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4634                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4635                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4636                                         txctrl);
4637                         break;
4638                 }
4639         }
4640
4641         /* Device configured with multiple TX queues. */
4642         ixgbe_dev_mq_tx_configure(dev);
4643 }
4644
4645 /*
4646  * Set up link for 82599 loopback mode Tx->Rx.
4647  */
4648 static inline void __attribute__((cold))
4649 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4650 {
4651         PMD_INIT_FUNC_TRACE();
4652
4653         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4654                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4655                                 IXGBE_SUCCESS) {
4656                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4657                         /* ignore error */
4658                         return;
4659                 }
4660         }
4661
4662         /* Restart link */
4663         IXGBE_WRITE_REG(hw,
4664                         IXGBE_AUTOC,
4665                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4666         ixgbe_reset_pipeline_82599(hw);
4667
4668         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4669         msec_delay(50);
4670 }
4671
4672
4673 /*
4674  * Start Transmit and Receive Units.
4675  */
4676 int __attribute__((cold))
4677 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4678 {
4679         struct ixgbe_hw     *hw;
4680         struct ixgbe_tx_queue *txq;
4681         struct ixgbe_rx_queue *rxq;
4682         uint32_t txdctl;
4683         uint32_t dmatxctl;
4684         uint32_t rxctrl;
4685         uint16_t i;
4686         int ret = 0;
4687
4688         PMD_INIT_FUNC_TRACE();
4689         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4690
4691         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4692                 txq = dev->data->tx_queues[i];
4693                 /* Setup Transmit Threshold Registers */
4694                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4695                 txdctl |= txq->pthresh & 0x7F;
4696                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4697                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4698                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4699         }
4700
4701         if (hw->mac.type != ixgbe_mac_82598EB) {
4702                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4703                 dmatxctl |= IXGBE_DMATXCTL_TE;
4704                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4705         }
4706
4707         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4708                 txq = dev->data->tx_queues[i];
4709                 if (!txq->tx_deferred_start) {
4710                         ret = ixgbe_dev_tx_queue_start(dev, i);
4711                         if (ret < 0)
4712                                 return ret;
4713                 }
4714         }
4715
4716         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4717                 rxq = dev->data->rx_queues[i];
4718                 if (!rxq->rx_deferred_start) {
4719                         ret = ixgbe_dev_rx_queue_start(dev, i);
4720                         if (ret < 0)
4721                                 return ret;
4722                 }
4723         }
4724
4725         /* Enable Receive engine */
4726         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4727         if (hw->mac.type == ixgbe_mac_82598EB)
4728                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4729         rxctrl |= IXGBE_RXCTRL_RXEN;
4730         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4731
4732         /* If loopback mode is enabled for 82599, set up the link accordingly */
4733         if (hw->mac.type == ixgbe_mac_82599EB &&
4734                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4735                 ixgbe_setup_loopback_link_82599(hw);
4736
4737         return 0;
4738 }
4739
4740 /*
4741  * Start Receive Units for specified queue.
4742  */
4743 int __attribute__((cold))
4744 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4745 {
4746         struct ixgbe_hw     *hw;
4747         struct ixgbe_rx_queue *rxq;
4748         uint32_t rxdctl;
4749         int poll_ms;
4750
4751         PMD_INIT_FUNC_TRACE();
4752         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4753
4754         if (rx_queue_id < dev->data->nb_rx_queues) {
4755                 rxq = dev->data->rx_queues[rx_queue_id];
4756
4757                 /* Allocate buffers for descriptor rings */
4758                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4759                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4760                                      rx_queue_id);
4761                         return -1;
4762                 }
4763                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4764                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4765                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4766
4767                 /* Wait until RX Enable ready */
4768                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4769                 do {
4770                         rte_delay_ms(1);
4771                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4772                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4773                 if (!poll_ms)
4774                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4775                                      rx_queue_id);
4776                 rte_wmb();
4777                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4778                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4779                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4780         } else
4781                 return -1;
4782
4783         return 0;
4784 }
4785
4786 /*
4787  * Stop Receive Units for specified queue.
4788  */
4789 int __attribute__((cold))
4790 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4791 {
4792         struct ixgbe_hw     *hw;
4793         struct ixgbe_adapter *adapter =
4794                 (struct ixgbe_adapter *)dev->data->dev_private;
4795         struct ixgbe_rx_queue *rxq;
4796         uint32_t rxdctl;
4797         int poll_ms;
4798
4799         PMD_INIT_FUNC_TRACE();
4800         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4801
4802         if (rx_queue_id < dev->data->nb_rx_queues) {
4803                 rxq = dev->data->rx_queues[rx_queue_id];
4804
4805                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4806                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4807                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4808
4809                 /* Wait until RX Enable bit clear */
4810                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4811                 do {
4812                         rte_delay_ms(1);
4813                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4814                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
4815                 if (!poll_ms)
4816                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4817                                      rx_queue_id);
4818
4819                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4820
4821                 ixgbe_rx_queue_release_mbufs(rxq);
4822                 ixgbe_reset_rx_queue(adapter, rxq);
4823                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4824         } else
4825                 return -1;
4826
4827         return 0;
4828 }
4829
4830
4831 /*
4832  * Start Transmit Units for specified queue.
4833  */
4834 int __attribute__((cold))
4835 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4836 {
4837         struct ixgbe_hw     *hw;
4838         struct ixgbe_tx_queue *txq;
4839         uint32_t txdctl;
4840         int poll_ms;
4841
4842         PMD_INIT_FUNC_TRACE();
4843         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4844
4845         if (tx_queue_id < dev->data->nb_tx_queues) {
4846                 txq = dev->data->tx_queues[tx_queue_id];
4847                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4848                 txdctl |= IXGBE_TXDCTL_ENABLE;
4849                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4850
4851                 /* Wait until TX Enable ready */
4852                 if (hw->mac.type == ixgbe_mac_82599EB) {
4853                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4854                         do {
4855                                 rte_delay_ms(1);
4856                                 txdctl = IXGBE_READ_REG(hw,
4857                                         IXGBE_TXDCTL(txq->reg_idx));
4858                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
4859                         if (!poll_ms)
4860                                 PMD_INIT_LOG(ERR, "Could not enable "
4861                                              "Tx Queue %d", tx_queue_id);
4862                 }
4863                 rte_wmb();
4864                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4865                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4866                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4867         } else
4868                 return -1;
4869
4870         return 0;
4871 }
4872
4873 /*
4874  * Stop Transmit Units for specified queue.
4875  */
4876 int __attribute__((cold))
4877 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4878 {
4879         struct ixgbe_hw     *hw;
4880         struct ixgbe_tx_queue *txq;
4881         uint32_t txdctl;
4882         uint32_t txtdh, txtdt;
4883         int poll_ms;
4884
4885         PMD_INIT_FUNC_TRACE();
4886         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4887
4888         if (tx_queue_id >= dev->data->nb_tx_queues)
4889                 return -1;
4890
4891         txq = dev->data->tx_queues[tx_queue_id];
4892
4893         /* Wait until TX queue is empty */
4894         if (hw->mac.type == ixgbe_mac_82599EB) {
4895                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4896                 do {
4897                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
4898                         txtdh = IXGBE_READ_REG(hw,
4899                                                IXGBE_TDH(txq->reg_idx));
4900                         txtdt = IXGBE_READ_REG(hw,
4901                                                IXGBE_TDT(txq->reg_idx));
4902                 } while (--poll_ms && (txtdh != txtdt));
4903                 if (!poll_ms)
4904                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
4905                                      "when stopping.", tx_queue_id);
4906         }
4907
4908         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4909         txdctl &= ~IXGBE_TXDCTL_ENABLE;
4910         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4911
4912         /* Wait until TX Enable bit clear */
4913         if (hw->mac.type == ixgbe_mac_82599EB) {
4914                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4915                 do {
4916                         rte_delay_ms(1);
4917                         txdctl = IXGBE_READ_REG(hw,
4918                                                 IXGBE_TXDCTL(txq->reg_idx));
4919                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
4920                 if (!poll_ms)
4921                         PMD_INIT_LOG(ERR, "Could not disable "
4922                                      "Tx Queue %d", tx_queue_id);
4923         }
4924
4925         if (txq->ops != NULL) {
4926                 txq->ops->release_mbufs(txq);
4927                 txq->ops->reset(txq);
4928         }
4929         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4930
4931         return 0;
4932 }
4933
4934 void
4935 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4936         struct rte_eth_rxq_info *qinfo)
4937 {
4938         struct ixgbe_rx_queue *rxq;
4939
4940         rxq = dev->data->rx_queues[queue_id];
4941
4942         qinfo->mp = rxq->mb_pool;
4943         qinfo->scattered_rx = dev->data->scattered_rx;
4944         qinfo->nb_desc = rxq->nb_rx_desc;
4945
4946         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
4947         qinfo->conf.rx_drop_en = rxq->drop_en;
4948         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
4949 }
4950
4951 void
4952 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4953         struct rte_eth_txq_info *qinfo)
4954 {
4955         struct ixgbe_tx_queue *txq;
4956
4957         txq = dev->data->tx_queues[queue_id];
4958
4959         qinfo->nb_desc = txq->nb_tx_desc;
4960
4961         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
4962         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
4963         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
4964
4965         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
4966         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
4967         qinfo->conf.txq_flags = txq->txq_flags;
4968         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
4969 }
4970
4971 /*
4972  * [VF] Initializes Receive Unit.
4973  */
4974 int __attribute__((cold))
4975 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
4976 {
4977         struct ixgbe_hw     *hw;
4978         struct ixgbe_rx_queue *rxq;
4979         uint64_t bus_addr;
4980         uint32_t srrctl, psrtype = 0;
4981         uint16_t buf_size;
4982         uint16_t i;
4983         int ret;
4984
4985         PMD_INIT_FUNC_TRACE();
4986         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4987
4988         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
4989                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4990                         "it should be power of 2");
4991                 return -1;
4992         }
4993
4994         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
4995                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
4996                         "it should be equal to or less than %d",
4997                         hw->mac.max_rx_queues);
4998                 return -1;
4999         }
5000
5001         /*
5002          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5003          * disables the VF receipt of packets if the PF MTU is > 1500.
5004          * This is done to deal with 82599 limitations that imposes
5005          * the PF and all VFs to share the same MTU.
5006          * Then, the PF driver enables again the VF receipt of packet when
5007          * the VF driver issues a IXGBE_VF_SET_LPE request.
5008          * In the meantime, the VF device cannot be used, even if the VF driver
5009          * and the Guest VM network stack are ready to accept packets with a
5010          * size up to the PF MTU.
5011          * As a work-around to this PF behaviour, force the call to
5012          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5013          * VF packets received can work in all cases.
5014          */
5015         ixgbevf_rlpml_set_vf(hw,
5016                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5017
5018         /* Setup RX queues */
5019         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5020                 rxq = dev->data->rx_queues[i];
5021
5022                 /* Allocate buffers for descriptor rings */
5023                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5024                 if (ret)
5025                         return ret;
5026
5027                 /* Setup the Base and Length of the Rx Descriptor Rings */
5028                 bus_addr = rxq->rx_ring_phys_addr;
5029
5030                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5031                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5032                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5033                                 (uint32_t)(bus_addr >> 32));
5034                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5035                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5036                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5037                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5038
5039
5040                 /* Configure the SRRCTL register */
5041 #ifdef RTE_HEADER_SPLIT_ENABLE
5042                 /*
5043                  * Configure Header Split
5044                  */
5045                 if (dev->data->dev_conf.rxmode.header_split) {
5046                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5047                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5048                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5049                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5050                 } else
5051 #endif
5052                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5053
5054                 /* Set if packets are dropped when no descriptors available */
5055                 if (rxq->drop_en)
5056                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5057
5058                 /*
5059                  * Configure the RX buffer size in the BSIZEPACKET field of
5060                  * the SRRCTL register of the queue.
5061                  * The value is in 1 KB resolution. Valid values can be from
5062                  * 1 KB to 16 KB.
5063                  */
5064                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5065                         RTE_PKTMBUF_HEADROOM);
5066                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5067                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5068
5069                 /*
5070                  * VF modification to write virtual function SRRCTL register
5071                  */
5072                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5073
5074                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5075                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5076
5077                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5078                     /* It adds dual VLAN length for supporting dual VLAN */
5079                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5080                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5081                         if (!dev->data->scattered_rx)
5082                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5083                         dev->data->scattered_rx = 1;
5084                 }
5085         }
5086
5087 #ifdef RTE_HEADER_SPLIT_ENABLE
5088         if (dev->data->dev_conf.rxmode.header_split)
5089                 /* Must setup the PSRTYPE register */
5090                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5091                         IXGBE_PSRTYPE_UDPHDR   |
5092                         IXGBE_PSRTYPE_IPV4HDR  |
5093                         IXGBE_PSRTYPE_IPV6HDR;
5094 #endif
5095
5096         /* Set RQPL for VF RSS according to max Rx queue */
5097         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5098                 IXGBE_PSRTYPE_RQPL_SHIFT;
5099         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5100
5101         ixgbe_set_rx_function(dev);
5102
5103         return 0;
5104 }
5105
5106 /*
5107  * [VF] Initializes Transmit Unit.
5108  */
5109 void __attribute__((cold))
5110 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5111 {
5112         struct ixgbe_hw     *hw;
5113         struct ixgbe_tx_queue *txq;
5114         uint64_t bus_addr;
5115         uint32_t txctrl;
5116         uint16_t i;
5117
5118         PMD_INIT_FUNC_TRACE();
5119         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5120
5121         /* Setup the Base and Length of the Tx Descriptor Rings */
5122         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5123                 txq = dev->data->tx_queues[i];
5124                 bus_addr = txq->tx_ring_phys_addr;
5125                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5126                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5127                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5128                                 (uint32_t)(bus_addr >> 32));
5129                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5130                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5131                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5132                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5133                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5134
5135                 /*
5136                  * Disable Tx Head Writeback RO bit, since this hoses
5137                  * bookkeeping if things aren't delivered in order.
5138                  */
5139                 txctrl = IXGBE_READ_REG(hw,
5140                                 IXGBE_VFDCA_TXCTRL(i));
5141                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5142                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5143                                 txctrl);
5144         }
5145 }
5146
5147 /*
5148  * [VF] Start Transmit and Receive Units.
5149  */
5150 void __attribute__((cold))
5151 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5152 {
5153         struct ixgbe_hw     *hw;
5154         struct ixgbe_tx_queue *txq;
5155         struct ixgbe_rx_queue *rxq;
5156         uint32_t txdctl;
5157         uint32_t rxdctl;
5158         uint16_t i;
5159         int poll_ms;
5160
5161         PMD_INIT_FUNC_TRACE();
5162         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5163
5164         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5165                 txq = dev->data->tx_queues[i];
5166                 /* Setup Transmit Threshold Registers */
5167                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5168                 txdctl |= txq->pthresh & 0x7F;
5169                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5170                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5171                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5172         }
5173
5174         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5175
5176                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5177                 txdctl |= IXGBE_TXDCTL_ENABLE;
5178                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5179
5180                 poll_ms = 10;
5181                 /* Wait until TX Enable ready */
5182                 do {
5183                         rte_delay_ms(1);
5184                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5185                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5186                 if (!poll_ms)
5187                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5188         }
5189         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5190
5191                 rxq = dev->data->rx_queues[i];
5192
5193                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5194                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5195                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5196
5197                 /* Wait until RX Enable ready */
5198                 poll_ms = 10;
5199                 do {
5200                         rte_delay_ms(1);
5201                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5202                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5203                 if (!poll_ms)
5204                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5205                 rte_wmb();
5206                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5207
5208         }
5209 }
5210
5211 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5212 int __attribute__((weak))
5213 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5214 {
5215         return -1;
5216 }
5217
5218 uint16_t __attribute__((weak))
5219 ixgbe_recv_pkts_vec(
5220         void __rte_unused *rx_queue,
5221         struct rte_mbuf __rte_unused **rx_pkts,
5222         uint16_t __rte_unused nb_pkts)
5223 {
5224         return 0;
5225 }
5226
5227 uint16_t __attribute__((weak))
5228 ixgbe_recv_scattered_pkts_vec(
5229         void __rte_unused *rx_queue,
5230         struct rte_mbuf __rte_unused **rx_pkts,
5231         uint16_t __rte_unused nb_pkts)
5232 {
5233         return 0;
5234 }
5235
5236 int __attribute__((weak))
5237 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5238 {
5239         return -1;
5240 }