mbuf: rename deprecated VLAN flags
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73 #include <rte_net.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 #ifdef RTE_LIBRTE_IEEE1588
84 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
85 #else
86 #define IXGBE_TX_IEEE1588_TMST 0
87 #endif
88 /* Bit Mask to indicate what bits required for building TX context */
89 #define IXGBE_TX_OFFLOAD_MASK (                  \
90                 PKT_TX_VLAN_PKT |                \
91                 PKT_TX_IP_CKSUM |                \
92                 PKT_TX_L4_MASK |                 \
93                 PKT_TX_TCP_SEG |                 \
94                 PKT_TX_MACSEC |                  \
95                 PKT_TX_OUTER_IP_CKSUM |          \
96                 PKT_TX_SEC_OFFLOAD |     \
97                 IXGBE_TX_IEEE1588_TMST)
98
99 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
100                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
101
102 #if 1
103 #define RTE_PMD_USE_PREFETCH
104 #endif
105
106 #ifdef RTE_PMD_USE_PREFETCH
107 /*
108  * Prefetch a cache line into all cache levels.
109  */
110 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
111 #else
112 #define rte_ixgbe_prefetch(p)   do {} while (0)
113 #endif
114
115 #ifdef RTE_IXGBE_INC_VECTOR
116 uint16_t ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
117                                     uint16_t nb_pkts);
118 #endif
119
120 /*********************************************************************
121  *
122  *  TX functions
123  *
124  **********************************************************************/
125
126 /*
127  * Check for descriptors with their DD bit set and free mbufs.
128  * Return the total number of buffers freed.
129  */
130 static __rte_always_inline int
131 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
132 {
133         struct ixgbe_tx_entry *txep;
134         uint32_t status;
135         int i, nb_free = 0;
136         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
137
138         /* check DD bit on threshold descriptor */
139         status = txq->tx_ring[txq->tx_next_dd].wb.status;
140         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
141                 return 0;
142
143         /*
144          * first buffer to free from S/W ring is at index
145          * tx_next_dd - (tx_rs_thresh-1)
146          */
147         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
148
149         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
150                 /* free buffers one at a time */
151                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
152                 txep->mbuf = NULL;
153
154                 if (unlikely(m == NULL))
155                         continue;
156
157                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
158                     (nb_free > 0 && m->pool != free[0]->pool)) {
159                         rte_mempool_put_bulk(free[0]->pool,
160                                              (void **)free, nb_free);
161                         nb_free = 0;
162                 }
163
164                 free[nb_free++] = m;
165         }
166
167         if (nb_free > 0)
168                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
169
170         /* buffers were freed, update counters */
171         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
172         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
173         if (txq->tx_next_dd >= txq->nb_tx_desc)
174                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
175
176         return txq->tx_rs_thresh;
177 }
178
179 /* Populate 4 descriptors with data from 4 mbufs */
180 static inline void
181 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
182 {
183         uint64_t buf_dma_addr;
184         uint32_t pkt_len;
185         int i;
186
187         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
188                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
189                 pkt_len = (*pkts)->data_len;
190
191                 /* write data to descriptor */
192                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
193
194                 txdp->read.cmd_type_len =
195                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
196
197                 txdp->read.olinfo_status =
198                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
199
200                 rte_prefetch0(&(*pkts)->pool);
201         }
202 }
203
204 /* Populate 1 descriptor with data from 1 mbuf */
205 static inline void
206 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
207 {
208         uint64_t buf_dma_addr;
209         uint32_t pkt_len;
210
211         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
212         pkt_len = (*pkts)->data_len;
213
214         /* write data to descriptor */
215         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
216         txdp->read.cmd_type_len =
217                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
218         txdp->read.olinfo_status =
219                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
220         rte_prefetch0(&(*pkts)->pool);
221 }
222
223 /*
224  * Fill H/W descriptor ring with mbuf data.
225  * Copy mbuf pointers to the S/W ring.
226  */
227 static inline void
228 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
229                       uint16_t nb_pkts)
230 {
231         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
232         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
233         const int N_PER_LOOP = 4;
234         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
235         int mainpart, leftover;
236         int i, j;
237
238         /*
239          * Process most of the packets in chunks of N pkts.  Any
240          * leftover packets will get processed one at a time.
241          */
242         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
243         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
244         for (i = 0; i < mainpart; i += N_PER_LOOP) {
245                 /* Copy N mbuf pointers to the S/W ring */
246                 for (j = 0; j < N_PER_LOOP; ++j) {
247                         (txep + i + j)->mbuf = *(pkts + i + j);
248                 }
249                 tx4(txdp + i, pkts + i);
250         }
251
252         if (unlikely(leftover > 0)) {
253                 for (i = 0; i < leftover; ++i) {
254                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
255                         tx1(txdp + mainpart + i, pkts + mainpart + i);
256                 }
257         }
258 }
259
260 static inline uint16_t
261 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
262              uint16_t nb_pkts)
263 {
264         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
265         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
266         uint16_t n = 0;
267
268         /*
269          * Begin scanning the H/W ring for done descriptors when the
270          * number of available descriptors drops below tx_free_thresh.  For
271          * each done descriptor, free the associated buffer.
272          */
273         if (txq->nb_tx_free < txq->tx_free_thresh)
274                 ixgbe_tx_free_bufs(txq);
275
276         /* Only use descriptors that are available */
277         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
278         if (unlikely(nb_pkts == 0))
279                 return 0;
280
281         /* Use exactly nb_pkts descriptors */
282         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
283
284         /*
285          * At this point, we know there are enough descriptors in the
286          * ring to transmit all the packets.  This assumes that each
287          * mbuf contains a single segment, and that no new offloads
288          * are expected, which would require a new context descriptor.
289          */
290
291         /*
292          * See if we're going to wrap-around. If so, handle the top
293          * of the descriptor ring first, then do the bottom.  If not,
294          * the processing looks just like the "bottom" part anyway...
295          */
296         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
297                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
298                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
299
300                 /*
301                  * We know that the last descriptor in the ring will need to
302                  * have its RS bit set because tx_rs_thresh has to be
303                  * a divisor of the ring size
304                  */
305                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
306                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
307                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
308
309                 txq->tx_tail = 0;
310         }
311
312         /* Fill H/W descriptor ring with mbuf data */
313         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
314         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
315
316         /*
317          * Determine if RS bit should be set
318          * This is what we actually want:
319          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
320          * but instead of subtracting 1 and doing >=, we can just do
321          * greater than without subtracting.
322          */
323         if (txq->tx_tail > txq->tx_next_rs) {
324                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
325                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
326                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
327                                                 txq->tx_rs_thresh);
328                 if (txq->tx_next_rs >= txq->nb_tx_desc)
329                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
330         }
331
332         /*
333          * Check for wrap-around. This would only happen if we used
334          * up to the last descriptor in the ring, no more, no less.
335          */
336         if (txq->tx_tail >= txq->nb_tx_desc)
337                 txq->tx_tail = 0;
338
339         /* update tail pointer */
340         rte_wmb();
341         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
342
343         return nb_pkts;
344 }
345
346 uint16_t
347 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
348                        uint16_t nb_pkts)
349 {
350         uint16_t nb_tx;
351
352         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
353         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
354                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
355
356         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
357         nb_tx = 0;
358         while (nb_pkts) {
359                 uint16_t ret, n;
360
361                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
362                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
363                 nb_tx = (uint16_t)(nb_tx + ret);
364                 nb_pkts = (uint16_t)(nb_pkts - ret);
365                 if (ret < n)
366                         break;
367         }
368
369         return nb_tx;
370 }
371
372 #ifdef RTE_IXGBE_INC_VECTOR
373 static uint16_t
374 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
375                     uint16_t nb_pkts)
376 {
377         uint16_t nb_tx = 0;
378         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
379
380         while (nb_pkts) {
381                 uint16_t ret, num;
382
383                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
384                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
385                                                  num);
386                 nb_tx += ret;
387                 nb_pkts -= ret;
388                 if (ret < num)
389                         break;
390         }
391
392         return nb_tx;
393 }
394 #endif
395
396 static inline void
397 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
398                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
399                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
400                 union ixgbe_crypto_tx_desc_md *mdata)
401 {
402         uint32_t type_tucmd_mlhl;
403         uint32_t mss_l4len_idx = 0;
404         uint32_t ctx_idx;
405         uint32_t vlan_macip_lens;
406         union ixgbe_tx_offload tx_offload_mask;
407         uint32_t seqnum_seed = 0;
408
409         ctx_idx = txq->ctx_curr;
410         tx_offload_mask.data[0] = 0;
411         tx_offload_mask.data[1] = 0;
412         type_tucmd_mlhl = 0;
413
414         /* Specify which HW CTX to upload. */
415         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
416
417         if (ol_flags & PKT_TX_VLAN_PKT) {
418                 tx_offload_mask.vlan_tci |= ~0;
419         }
420
421         /* check if TCP segmentation required for this packet */
422         if (ol_flags & PKT_TX_TCP_SEG) {
423                 /* implies IP cksum in IPv4 */
424                 if (ol_flags & PKT_TX_IP_CKSUM)
425                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
426                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
427                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
428                 else
429                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
430                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
431                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
432
433                 tx_offload_mask.l2_len |= ~0;
434                 tx_offload_mask.l3_len |= ~0;
435                 tx_offload_mask.l4_len |= ~0;
436                 tx_offload_mask.tso_segsz |= ~0;
437                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
438                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
439         } else { /* no TSO, check if hardware checksum is needed */
440                 if (ol_flags & PKT_TX_IP_CKSUM) {
441                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
442                         tx_offload_mask.l2_len |= ~0;
443                         tx_offload_mask.l3_len |= ~0;
444                 }
445
446                 switch (ol_flags & PKT_TX_L4_MASK) {
447                 case PKT_TX_UDP_CKSUM:
448                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
449                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
450                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
451                         tx_offload_mask.l2_len |= ~0;
452                         tx_offload_mask.l3_len |= ~0;
453                         break;
454                 case PKT_TX_TCP_CKSUM:
455                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
456                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
457                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
458                         tx_offload_mask.l2_len |= ~0;
459                         tx_offload_mask.l3_len |= ~0;
460                         break;
461                 case PKT_TX_SCTP_CKSUM:
462                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
463                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
464                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
465                         tx_offload_mask.l2_len |= ~0;
466                         tx_offload_mask.l3_len |= ~0;
467                         break;
468                 default:
469                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
470                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
471                         break;
472                 }
473         }
474
475         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
476                 tx_offload_mask.outer_l2_len |= ~0;
477                 tx_offload_mask.outer_l3_len |= ~0;
478                 tx_offload_mask.l2_len |= ~0;
479                 seqnum_seed |= tx_offload.outer_l3_len
480                                << IXGBE_ADVTXD_OUTER_IPLEN;
481                 seqnum_seed |= tx_offload.l2_len
482                                << IXGBE_ADVTXD_TUNNEL_LEN;
483         }
484         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
485                 seqnum_seed |=
486                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & mdata->sa_idx);
487                 type_tucmd_mlhl |= mdata->enc ?
488                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
489                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
490                 type_tucmd_mlhl |=
491                         (mdata->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
492                 tx_offload_mask.sa_idx |= ~0;
493                 tx_offload_mask.sec_pad_len |= ~0;
494         }
495
496         txq->ctx_cache[ctx_idx].flags = ol_flags;
497         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
498                 tx_offload_mask.data[0] & tx_offload.data[0];
499         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
500                 tx_offload_mask.data[1] & tx_offload.data[1];
501         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
502
503         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
504         vlan_macip_lens = tx_offload.l3_len;
505         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
506                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
507                                     IXGBE_ADVTXD_MACLEN_SHIFT);
508         else
509                 vlan_macip_lens |= (tx_offload.l2_len <<
510                                     IXGBE_ADVTXD_MACLEN_SHIFT);
511         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
512         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
513         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
514         ctx_txd->seqnum_seed     = seqnum_seed;
515 }
516
517 /*
518  * Check which hardware context can be used. Use the existing match
519  * or create a new context descriptor.
520  */
521 static inline uint32_t
522 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
523                    union ixgbe_tx_offload tx_offload)
524 {
525         /* If match with the current used context */
526         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
527                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
528                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
529                      & tx_offload.data[0])) &&
530                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
531                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
532                      & tx_offload.data[1]))))
533                 return txq->ctx_curr;
534
535         /* What if match with the next context  */
536         txq->ctx_curr ^= 1;
537         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
538                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
539                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
540                      & tx_offload.data[0])) &&
541                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
542                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
543                      & tx_offload.data[1]))))
544                 return txq->ctx_curr;
545
546         /* Mismatch, use the previous context */
547         return IXGBE_CTX_NUM;
548 }
549
550 static inline uint32_t
551 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
552 {
553         uint32_t tmp = 0;
554
555         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
556                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
557         if (ol_flags & PKT_TX_IP_CKSUM)
558                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
559         if (ol_flags & PKT_TX_TCP_SEG)
560                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
561         return tmp;
562 }
563
564 static inline uint32_t
565 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
566 {
567         uint32_t cmdtype = 0;
568
569         if (ol_flags & PKT_TX_VLAN_PKT)
570                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
571         if (ol_flags & PKT_TX_TCP_SEG)
572                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
573         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
574                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
575         if (ol_flags & PKT_TX_MACSEC)
576                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
577         return cmdtype;
578 }
579
580 /* Default RS bit threshold values */
581 #ifndef DEFAULT_TX_RS_THRESH
582 #define DEFAULT_TX_RS_THRESH   32
583 #endif
584 #ifndef DEFAULT_TX_FREE_THRESH
585 #define DEFAULT_TX_FREE_THRESH 32
586 #endif
587
588 /* Reset transmit descriptors after they have been used */
589 static inline int
590 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
591 {
592         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
593         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
594         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
595         uint16_t nb_tx_desc = txq->nb_tx_desc;
596         uint16_t desc_to_clean_to;
597         uint16_t nb_tx_to_clean;
598         uint32_t status;
599
600         /* Determine the last descriptor needing to be cleaned */
601         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
602         if (desc_to_clean_to >= nb_tx_desc)
603                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
604
605         /* Check to make sure the last descriptor to clean is done */
606         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
607         status = txr[desc_to_clean_to].wb.status;
608         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
609                 PMD_TX_FREE_LOG(DEBUG,
610                                 "TX descriptor %4u is not done"
611                                 "(port=%d queue=%d)",
612                                 desc_to_clean_to,
613                                 txq->port_id, txq->queue_id);
614                 /* Failed to clean any descriptors, better luck next time */
615                 return -(1);
616         }
617
618         /* Figure out how many descriptors will be cleaned */
619         if (last_desc_cleaned > desc_to_clean_to)
620                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
621                                                         desc_to_clean_to);
622         else
623                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
624                                                 last_desc_cleaned);
625
626         PMD_TX_FREE_LOG(DEBUG,
627                         "Cleaning %4u TX descriptors: %4u to %4u "
628                         "(port=%d queue=%d)",
629                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
630                         txq->port_id, txq->queue_id);
631
632         /*
633          * The last descriptor to clean is done, so that means all the
634          * descriptors from the last descriptor that was cleaned
635          * up to the last descriptor with the RS bit set
636          * are done. Only reset the threshold descriptor.
637          */
638         txr[desc_to_clean_to].wb.status = 0;
639
640         /* Update the txq to reflect the last descriptor that was cleaned */
641         txq->last_desc_cleaned = desc_to_clean_to;
642         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
643
644         /* No Error */
645         return 0;
646 }
647
648 uint16_t
649 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
650                 uint16_t nb_pkts)
651 {
652         struct ixgbe_tx_queue *txq;
653         struct ixgbe_tx_entry *sw_ring;
654         struct ixgbe_tx_entry *txe, *txn;
655         volatile union ixgbe_adv_tx_desc *txr;
656         volatile union ixgbe_adv_tx_desc *txd, *txp;
657         struct rte_mbuf     *tx_pkt;
658         struct rte_mbuf     *m_seg;
659         uint64_t buf_dma_addr;
660         uint32_t olinfo_status;
661         uint32_t cmd_type_len;
662         uint32_t pkt_len;
663         uint16_t slen;
664         uint64_t ol_flags;
665         uint16_t tx_id;
666         uint16_t tx_last;
667         uint16_t nb_tx;
668         uint16_t nb_used;
669         uint64_t tx_ol_req;
670         uint32_t ctx = 0;
671         uint32_t new_ctx;
672         union ixgbe_tx_offload tx_offload;
673         uint8_t use_ipsec;
674
675         tx_offload.data[0] = 0;
676         tx_offload.data[1] = 0;
677         txq = tx_queue;
678         sw_ring = txq->sw_ring;
679         txr     = txq->tx_ring;
680         tx_id   = txq->tx_tail;
681         txe = &sw_ring[tx_id];
682         txp = NULL;
683
684         /* Determine if the descriptor ring needs to be cleaned. */
685         if (txq->nb_tx_free < txq->tx_free_thresh)
686                 ixgbe_xmit_cleanup(txq);
687
688         rte_prefetch0(&txe->mbuf->pool);
689
690         /* TX loop */
691         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
692                 new_ctx = 0;
693                 tx_pkt = *tx_pkts++;
694                 pkt_len = tx_pkt->pkt_len;
695
696                 /*
697                  * Determine how many (if any) context descriptors
698                  * are needed for offload functionality.
699                  */
700                 ol_flags = tx_pkt->ol_flags;
701                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
702
703                 /* If hardware offload required */
704                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
705                 if (tx_ol_req) {
706                         tx_offload.l2_len = tx_pkt->l2_len;
707                         tx_offload.l3_len = tx_pkt->l3_len;
708                         tx_offload.l4_len = tx_pkt->l4_len;
709                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
710                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
711                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
712                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
713                         if (use_ipsec) {
714                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
715                                         (union ixgbe_crypto_tx_desc_md *)
716                                                         &tx_pkt->udata64;
717                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
718                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
719                         }
720
721                         /* If new context need be built or reuse the exist ctx. */
722                         ctx = what_advctx_update(txq, tx_ol_req,
723                                 tx_offload);
724                         /* Only allocate context descriptor if required*/
725                         new_ctx = (ctx == IXGBE_CTX_NUM);
726                         ctx = txq->ctx_curr;
727                 }
728
729                 /*
730                  * Keep track of how many descriptors are used this loop
731                  * This will always be the number of segments + the number of
732                  * Context descriptors required to transmit the packet
733                  */
734                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
735
736                 if (txp != NULL &&
737                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
738                         /* set RS on the previous packet in the burst */
739                         txp->read.cmd_type_len |=
740                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
741
742                 /*
743                  * The number of descriptors that must be allocated for a
744                  * packet is the number of segments of that packet, plus 1
745                  * Context Descriptor for the hardware offload, if any.
746                  * Determine the last TX descriptor to allocate in the TX ring
747                  * for the packet, starting from the current position (tx_id)
748                  * in the ring.
749                  */
750                 tx_last = (uint16_t) (tx_id + nb_used - 1);
751
752                 /* Circular ring */
753                 if (tx_last >= txq->nb_tx_desc)
754                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
755
756                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
757                            " tx_first=%u tx_last=%u",
758                            (unsigned) txq->port_id,
759                            (unsigned) txq->queue_id,
760                            (unsigned) pkt_len,
761                            (unsigned) tx_id,
762                            (unsigned) tx_last);
763
764                 /*
765                  * Make sure there are enough TX descriptors available to
766                  * transmit the entire packet.
767                  * nb_used better be less than or equal to txq->tx_rs_thresh
768                  */
769                 if (nb_used > txq->nb_tx_free) {
770                         PMD_TX_FREE_LOG(DEBUG,
771                                         "Not enough free TX descriptors "
772                                         "nb_used=%4u nb_free=%4u "
773                                         "(port=%d queue=%d)",
774                                         nb_used, txq->nb_tx_free,
775                                         txq->port_id, txq->queue_id);
776
777                         if (ixgbe_xmit_cleanup(txq) != 0) {
778                                 /* Could not clean any descriptors */
779                                 if (nb_tx == 0)
780                                         return 0;
781                                 goto end_of_tx;
782                         }
783
784                         /* nb_used better be <= txq->tx_rs_thresh */
785                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
786                                 PMD_TX_FREE_LOG(DEBUG,
787                                         "The number of descriptors needed to "
788                                         "transmit the packet exceeds the "
789                                         "RS bit threshold. This will impact "
790                                         "performance."
791                                         "nb_used=%4u nb_free=%4u "
792                                         "tx_rs_thresh=%4u. "
793                                         "(port=%d queue=%d)",
794                                         nb_used, txq->nb_tx_free,
795                                         txq->tx_rs_thresh,
796                                         txq->port_id, txq->queue_id);
797                                 /*
798                                  * Loop here until there are enough TX
799                                  * descriptors or until the ring cannot be
800                                  * cleaned.
801                                  */
802                                 while (nb_used > txq->nb_tx_free) {
803                                         if (ixgbe_xmit_cleanup(txq) != 0) {
804                                                 /*
805                                                  * Could not clean any
806                                                  * descriptors
807                                                  */
808                                                 if (nb_tx == 0)
809                                                         return 0;
810                                                 goto end_of_tx;
811                                         }
812                                 }
813                         }
814                 }
815
816                 /*
817                  * By now there are enough free TX descriptors to transmit
818                  * the packet.
819                  */
820
821                 /*
822                  * Set common flags of all TX Data Descriptors.
823                  *
824                  * The following bits must be set in all Data Descriptors:
825                  *   - IXGBE_ADVTXD_DTYP_DATA
826                  *   - IXGBE_ADVTXD_DCMD_DEXT
827                  *
828                  * The following bits must be set in the first Data Descriptor
829                  * and are ignored in the other ones:
830                  *   - IXGBE_ADVTXD_DCMD_IFCS
831                  *   - IXGBE_ADVTXD_MAC_1588
832                  *   - IXGBE_ADVTXD_DCMD_VLE
833                  *
834                  * The following bits must only be set in the last Data
835                  * Descriptor:
836                  *   - IXGBE_TXD_CMD_EOP
837                  *
838                  * The following bits can be set in any Data Descriptor, but
839                  * are only set in the last Data Descriptor:
840                  *   - IXGBE_TXD_CMD_RS
841                  */
842                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
843                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
844
845 #ifdef RTE_LIBRTE_IEEE1588
846                 if (ol_flags & PKT_TX_IEEE1588_TMST)
847                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
848 #endif
849
850                 olinfo_status = 0;
851                 if (tx_ol_req) {
852
853                         if (ol_flags & PKT_TX_TCP_SEG) {
854                                 /* when TSO is on, paylen in descriptor is the
855                                  * not the packet len but the tcp payload len */
856                                 pkt_len -= (tx_offload.l2_len +
857                                         tx_offload.l3_len + tx_offload.l4_len);
858                         }
859
860                         /*
861                          * Setup the TX Advanced Context Descriptor if required
862                          */
863                         if (new_ctx) {
864                                 volatile struct ixgbe_adv_tx_context_desc *
865                                     ctx_txd;
866
867                                 ctx_txd = (volatile struct
868                                     ixgbe_adv_tx_context_desc *)
869                                     &txr[tx_id];
870
871                                 txn = &sw_ring[txe->next_id];
872                                 rte_prefetch0(&txn->mbuf->pool);
873
874                                 if (txe->mbuf != NULL) {
875                                         rte_pktmbuf_free_seg(txe->mbuf);
876                                         txe->mbuf = NULL;
877                                 }
878
879                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
880                                         tx_offload,
881                                         (union ixgbe_crypto_tx_desc_md *)
882                                         &tx_pkt->udata64);
883
884                                 txe->last_id = tx_last;
885                                 tx_id = txe->next_id;
886                                 txe = txn;
887                         }
888
889                         /*
890                          * Setup the TX Advanced Data Descriptor,
891                          * This path will go through
892                          * whatever new/reuse the context descriptor
893                          */
894                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
895                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
896                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
897                 }
898
899                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
900                 if (use_ipsec)
901                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
902
903                 m_seg = tx_pkt;
904                 do {
905                         txd = &txr[tx_id];
906                         txn = &sw_ring[txe->next_id];
907                         rte_prefetch0(&txn->mbuf->pool);
908
909                         if (txe->mbuf != NULL)
910                                 rte_pktmbuf_free_seg(txe->mbuf);
911                         txe->mbuf = m_seg;
912
913                         /*
914                          * Set up Transmit Data Descriptor.
915                          */
916                         slen = m_seg->data_len;
917                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
918                         txd->read.buffer_addr =
919                                 rte_cpu_to_le_64(buf_dma_addr);
920                         txd->read.cmd_type_len =
921                                 rte_cpu_to_le_32(cmd_type_len | slen);
922                         txd->read.olinfo_status =
923                                 rte_cpu_to_le_32(olinfo_status);
924                         txe->last_id = tx_last;
925                         tx_id = txe->next_id;
926                         txe = txn;
927                         m_seg = m_seg->next;
928                 } while (m_seg != NULL);
929
930                 /*
931                  * The last packet data descriptor needs End Of Packet (EOP)
932                  */
933                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
934                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
935                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
936
937                 /* Set RS bit only on threshold packets' last descriptor */
938                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
939                         PMD_TX_FREE_LOG(DEBUG,
940                                         "Setting RS bit on TXD id="
941                                         "%4u (port=%d queue=%d)",
942                                         tx_last, txq->port_id, txq->queue_id);
943
944                         cmd_type_len |= IXGBE_TXD_CMD_RS;
945
946                         /* Update txq RS bit counters */
947                         txq->nb_tx_used = 0;
948                         txp = NULL;
949                 } else
950                         txp = txd;
951
952                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
953         }
954
955 end_of_tx:
956         /* set RS on last packet in the burst */
957         if (txp != NULL)
958                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
959
960         rte_wmb();
961
962         /*
963          * Set the Transmit Descriptor Tail (TDT)
964          */
965         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
966                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
967                    (unsigned) tx_id, (unsigned) nb_tx);
968         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
969         txq->tx_tail = tx_id;
970
971         return nb_tx;
972 }
973
974 /*********************************************************************
975  *
976  *  TX prep functions
977  *
978  **********************************************************************/
979 uint16_t
980 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
981 {
982         int i, ret;
983         uint64_t ol_flags;
984         struct rte_mbuf *m;
985         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
986
987         for (i = 0; i < nb_pkts; i++) {
988                 m = tx_pkts[i];
989                 ol_flags = m->ol_flags;
990
991                 /**
992                  * Check if packet meets requirements for number of segments
993                  *
994                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
995                  *       non-TSO
996                  */
997
998                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
999                         rte_errno = -EINVAL;
1000                         return i;
1001                 }
1002
1003                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
1004                         rte_errno = -ENOTSUP;
1005                         return i;
1006                 }
1007
1008 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1009                 ret = rte_validate_tx_offload(m);
1010                 if (ret != 0) {
1011                         rte_errno = ret;
1012                         return i;
1013                 }
1014 #endif
1015                 ret = rte_net_intel_cksum_prepare(m);
1016                 if (ret != 0) {
1017                         rte_errno = ret;
1018                         return i;
1019                 }
1020         }
1021
1022         return i;
1023 }
1024
1025 /*********************************************************************
1026  *
1027  *  RX functions
1028  *
1029  **********************************************************************/
1030
1031 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1032 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1033 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1034 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1035 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1036 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1037 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1038 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1039 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1040 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1041 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1042 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1043 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1044 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1045 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1046 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1047 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1048 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1049 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1050 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1051 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1052 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1053 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1054 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1055 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1056 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1057 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1058 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1059 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1060 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1061 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1062 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1063 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1064
1065 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1074 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1075 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1076 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1077 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1078 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1079 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1080 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1081 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1082 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1083 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1084 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1085 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1086 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1087 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1088
1089 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1098 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1099 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1100 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1101 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1102 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1103 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1104 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1105 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1106 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1107 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1108 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1109 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1110 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1111 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1112
1113 /**
1114  * Use 2 different table for normal packet and tunnel packet
1115  * to save the space.
1116  */
1117 const uint32_t
1118         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1119         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1120         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1121                 RTE_PTYPE_L3_IPV4,
1122         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1123                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1124         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1125                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1126         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1127                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1128         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1129                 RTE_PTYPE_L3_IPV4_EXT,
1130         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1131                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1132         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1133                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1134         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1135                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1136         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1137                 RTE_PTYPE_L3_IPV6,
1138         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1139                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1140         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1141                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1142         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1143                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1144         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1145                 RTE_PTYPE_L3_IPV6_EXT,
1146         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1147                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1148         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1149                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1150         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1151                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1152         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1153                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1154                 RTE_PTYPE_INNER_L3_IPV6,
1155         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1156                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1157                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1158         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1159                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1160         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1161         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1162                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1163                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1164         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1165                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1166                 RTE_PTYPE_INNER_L3_IPV6,
1167         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1168                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1169                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1170         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1171                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1172                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1173         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1174                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1175                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1176         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1177                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1178                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1179         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1180                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1181                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1182         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1183                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1184                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1185         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1186                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1187                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1188         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1189                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1190                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1191         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1192                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1193                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1194         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1195                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1196                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1197         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1198                 RTE_PTYPE_L2_ETHER |
1199                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1200                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1201 };
1202
1203 const uint32_t
1204         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1205         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1206                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1207                 RTE_PTYPE_INNER_L2_ETHER,
1208         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1209                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1210                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1211         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1212                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1213                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1214         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1215                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1216                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1217         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1218                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1219                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1220         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1221                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1222                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1223         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1224                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1225                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1226         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1227                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1228                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1229                 RTE_PTYPE_INNER_L4_TCP,
1230         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1231                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1232                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1233                 RTE_PTYPE_INNER_L4_TCP,
1234         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1235                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1236                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1237         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1238                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1239                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1240                 RTE_PTYPE_INNER_L4_TCP,
1241         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1242                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1243                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1244                 RTE_PTYPE_INNER_L3_IPV4,
1245         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1246                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1247                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1248                 RTE_PTYPE_INNER_L4_UDP,
1249         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1250                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1251                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1252                 RTE_PTYPE_INNER_L4_UDP,
1253         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1254                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1255                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1256                 RTE_PTYPE_INNER_L4_SCTP,
1257         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1258                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1259                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1260         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1261                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1262                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1263                 RTE_PTYPE_INNER_L4_UDP,
1264         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1265                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1266                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1267                 RTE_PTYPE_INNER_L4_SCTP,
1268         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1269                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1270                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1271                 RTE_PTYPE_INNER_L3_IPV4,
1272         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1273                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1274                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1275                 RTE_PTYPE_INNER_L4_SCTP,
1276         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1277                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1278                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1279                 RTE_PTYPE_INNER_L4_SCTP,
1280         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1281                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1282                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1283                 RTE_PTYPE_INNER_L4_TCP,
1284         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1285                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1286                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1287                 RTE_PTYPE_INNER_L4_UDP,
1288
1289         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1290                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1291                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1292         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1293                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1294                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1295                 RTE_PTYPE_INNER_L3_IPV4,
1296         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1297                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1298                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1299                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1300         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1301                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1302                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1303                 RTE_PTYPE_INNER_L3_IPV6,
1304         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1305                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1306                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1307                 RTE_PTYPE_INNER_L3_IPV4,
1308         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1309                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1310                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1311                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1312         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1313                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1314                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1315                 RTE_PTYPE_INNER_L3_IPV4,
1316         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1317                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1318                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1319                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1320         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1321                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1322                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1323                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1324         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1325                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1326                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1327                 RTE_PTYPE_INNER_L3_IPV4,
1328         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1329                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1330                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1331                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1332         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1333                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1334                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1335                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1336         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1337                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1338                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1339                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1340         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1341                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1342                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1343                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1344         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1345                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1346                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1347                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1348         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1349                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1350                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1351                 RTE_PTYPE_INNER_L3_IPV4,
1352         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1353                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1354                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1355                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1356         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1357                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1358                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1359                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1360         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1361                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1362                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1363                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1364         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1365                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1366                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1367                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1368         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1369                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1370                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1371                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1372         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1373                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1374                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1375                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1376         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1377                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1378                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1379                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1380 };
1381
1382 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1383 static inline uint32_t
1384 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1385 {
1386
1387         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1388                 return RTE_PTYPE_UNKNOWN;
1389
1390         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1391
1392         /* For tunnel packet */
1393         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1394                 /* Remove the tunnel bit to save the space. */
1395                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1396                 return ptype_table_tn[pkt_info];
1397         }
1398
1399         /**
1400          * For x550, if it's not tunnel,
1401          * tunnel type bit should be set to 0.
1402          * Reuse 82599's mask.
1403          */
1404         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1405
1406         return ptype_table[pkt_info];
1407 }
1408
1409 static inline uint64_t
1410 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1411 {
1412         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1413                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1414                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1415                 PKT_RX_RSS_HASH, 0, 0, 0,
1416                 0, 0, 0,  PKT_RX_FDIR,
1417         };
1418 #ifdef RTE_LIBRTE_IEEE1588
1419         static uint64_t ip_pkt_etqf_map[8] = {
1420                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1421                 0, 0, 0, 0,
1422         };
1423
1424         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1425                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1426                                 ip_rss_types_map[pkt_info & 0XF];
1427         else
1428                 return ip_rss_types_map[pkt_info & 0XF];
1429 #else
1430         return ip_rss_types_map[pkt_info & 0XF];
1431 #endif
1432 }
1433
1434 static inline uint64_t
1435 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1436 {
1437         uint64_t pkt_flags;
1438
1439         /*
1440          * Check if VLAN present only.
1441          * Do not check whether L3/L4 rx checksum done by NIC or not,
1442          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1443          */
1444         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1445
1446 #ifdef RTE_LIBRTE_IEEE1588
1447         if (rx_status & IXGBE_RXD_STAT_TMST)
1448                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1449 #endif
1450         return pkt_flags;
1451 }
1452
1453 static inline uint64_t
1454 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1455 {
1456         uint64_t pkt_flags;
1457
1458         /*
1459          * Bit 31: IPE, IPv4 checksum error
1460          * Bit 30: L4I, L4I integrity error
1461          */
1462         static uint64_t error_to_pkt_flags_map[4] = {
1463                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1464                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1465                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1466                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1467         };
1468         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1469                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1470
1471         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1472             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1473                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1474         }
1475
1476         if (rx_status & IXGBE_RXD_STAT_SECP) {
1477                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1478                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1479                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1480         }
1481
1482         return pkt_flags;
1483 }
1484
1485 /*
1486  * LOOK_AHEAD defines how many desc statuses to check beyond the
1487  * current descriptor.
1488  * It must be a pound define for optimal performance.
1489  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1490  * function only works with LOOK_AHEAD=8.
1491  */
1492 #define LOOK_AHEAD 8
1493 #if (LOOK_AHEAD != 8)
1494 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1495 #endif
1496 static inline int
1497 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1498 {
1499         volatile union ixgbe_adv_rx_desc *rxdp;
1500         struct ixgbe_rx_entry *rxep;
1501         struct rte_mbuf *mb;
1502         uint16_t pkt_len;
1503         uint64_t pkt_flags;
1504         int nb_dd;
1505         uint32_t s[LOOK_AHEAD];
1506         uint32_t pkt_info[LOOK_AHEAD];
1507         int i, j, nb_rx = 0;
1508         uint32_t status;
1509         uint64_t vlan_flags = rxq->vlan_flags;
1510
1511         /* get references to current descriptor and S/W ring entry */
1512         rxdp = &rxq->rx_ring[rxq->rx_tail];
1513         rxep = &rxq->sw_ring[rxq->rx_tail];
1514
1515         status = rxdp->wb.upper.status_error;
1516         /* check to make sure there is at least 1 packet to receive */
1517         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1518                 return 0;
1519
1520         /*
1521          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1522          * reference packets that are ready to be received.
1523          */
1524         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1525              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1526                 /* Read desc statuses backwards to avoid race condition */
1527                 for (j = 0; j < LOOK_AHEAD; j++)
1528                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1529
1530                 rte_smp_rmb();
1531
1532                 /* Compute how many status bits were set */
1533                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1534                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1535                         ;
1536
1537                 for (j = 0; j < nb_dd; j++)
1538                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1539                                                        lo_dword.data);
1540
1541                 nb_rx += nb_dd;
1542
1543                 /* Translate descriptor info to mbuf format */
1544                 for (j = 0; j < nb_dd; ++j) {
1545                         mb = rxep[j].mbuf;
1546                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1547                                   rxq->crc_len;
1548                         mb->data_len = pkt_len;
1549                         mb->pkt_len = pkt_len;
1550                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1551
1552                         /* convert descriptor fields to rte mbuf flags */
1553                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1554                                 vlan_flags);
1555                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1556                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1557                                         ((uint16_t)pkt_info[j]);
1558                         mb->ol_flags = pkt_flags;
1559                         mb->packet_type =
1560                                 ixgbe_rxd_pkt_info_to_pkt_type
1561                                         (pkt_info[j], rxq->pkt_type_mask);
1562
1563                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1564                                 mb->hash.rss = rte_le_to_cpu_32(
1565                                     rxdp[j].wb.lower.hi_dword.rss);
1566                         else if (pkt_flags & PKT_RX_FDIR) {
1567                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1568                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1569                                     IXGBE_ATR_HASH_MASK;
1570                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1571                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1572                         }
1573                 }
1574
1575                 /* Move mbuf pointers from the S/W ring to the stage */
1576                 for (j = 0; j < LOOK_AHEAD; ++j) {
1577                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1578                 }
1579
1580                 /* stop if all requested packets could not be received */
1581                 if (nb_dd != LOOK_AHEAD)
1582                         break;
1583         }
1584
1585         /* clear software ring entries so we can cleanup correctly */
1586         for (i = 0; i < nb_rx; ++i) {
1587                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1588         }
1589
1590
1591         return nb_rx;
1592 }
1593
1594 static inline int
1595 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1596 {
1597         volatile union ixgbe_adv_rx_desc *rxdp;
1598         struct ixgbe_rx_entry *rxep;
1599         struct rte_mbuf *mb;
1600         uint16_t alloc_idx;
1601         __le64 dma_addr;
1602         int diag, i;
1603
1604         /* allocate buffers in bulk directly into the S/W ring */
1605         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1606         rxep = &rxq->sw_ring[alloc_idx];
1607         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1608                                     rxq->rx_free_thresh);
1609         if (unlikely(diag != 0))
1610                 return -ENOMEM;
1611
1612         rxdp = &rxq->rx_ring[alloc_idx];
1613         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1614                 /* populate the static rte mbuf fields */
1615                 mb = rxep[i].mbuf;
1616                 if (reset_mbuf) {
1617                         mb->port = rxq->port_id;
1618                 }
1619
1620                 rte_mbuf_refcnt_set(mb, 1);
1621                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1622
1623                 /* populate the descriptors */
1624                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1625                 rxdp[i].read.hdr_addr = 0;
1626                 rxdp[i].read.pkt_addr = dma_addr;
1627         }
1628
1629         /* update state of internal queue structure */
1630         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1631         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1632                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1633
1634         /* no errors */
1635         return 0;
1636 }
1637
1638 static inline uint16_t
1639 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1640                          uint16_t nb_pkts)
1641 {
1642         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1643         int i;
1644
1645         /* how many packets are ready to return? */
1646         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1647
1648         /* copy mbuf pointers to the application's packet list */
1649         for (i = 0; i < nb_pkts; ++i)
1650                 rx_pkts[i] = stage[i];
1651
1652         /* update internal queue state */
1653         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1654         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1655
1656         return nb_pkts;
1657 }
1658
1659 static inline uint16_t
1660 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1661              uint16_t nb_pkts)
1662 {
1663         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1664         uint16_t nb_rx = 0;
1665
1666         /* Any previously recv'd pkts will be returned from the Rx stage */
1667         if (rxq->rx_nb_avail)
1668                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1669
1670         /* Scan the H/W ring for packets to receive */
1671         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1672
1673         /* update internal queue state */
1674         rxq->rx_next_avail = 0;
1675         rxq->rx_nb_avail = nb_rx;
1676         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1677
1678         /* if required, allocate new buffers to replenish descriptors */
1679         if (rxq->rx_tail > rxq->rx_free_trigger) {
1680                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1681
1682                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1683                         int i, j;
1684
1685                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1686                                    "queue_id=%u", (unsigned) rxq->port_id,
1687                                    (unsigned) rxq->queue_id);
1688
1689                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1690                                 rxq->rx_free_thresh;
1691
1692                         /*
1693                          * Need to rewind any previous receives if we cannot
1694                          * allocate new buffers to replenish the old ones.
1695                          */
1696                         rxq->rx_nb_avail = 0;
1697                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1698                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1699                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1700
1701                         return 0;
1702                 }
1703
1704                 /* update tail pointer */
1705                 rte_wmb();
1706                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1707                                             cur_free_trigger);
1708         }
1709
1710         if (rxq->rx_tail >= rxq->nb_rx_desc)
1711                 rxq->rx_tail = 0;
1712
1713         /* received any packets this loop? */
1714         if (rxq->rx_nb_avail)
1715                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1716
1717         return 0;
1718 }
1719
1720 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1721 uint16_t
1722 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1723                            uint16_t nb_pkts)
1724 {
1725         uint16_t nb_rx;
1726
1727         if (unlikely(nb_pkts == 0))
1728                 return 0;
1729
1730         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1731                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1732
1733         /* request is relatively large, chunk it up */
1734         nb_rx = 0;
1735         while (nb_pkts) {
1736                 uint16_t ret, n;
1737
1738                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1739                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1740                 nb_rx = (uint16_t)(nb_rx + ret);
1741                 nb_pkts = (uint16_t)(nb_pkts - ret);
1742                 if (ret < n)
1743                         break;
1744         }
1745
1746         return nb_rx;
1747 }
1748
1749 uint16_t
1750 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1751                 uint16_t nb_pkts)
1752 {
1753         struct ixgbe_rx_queue *rxq;
1754         volatile union ixgbe_adv_rx_desc *rx_ring;
1755         volatile union ixgbe_adv_rx_desc *rxdp;
1756         struct ixgbe_rx_entry *sw_ring;
1757         struct ixgbe_rx_entry *rxe;
1758         struct rte_mbuf *rxm;
1759         struct rte_mbuf *nmb;
1760         union ixgbe_adv_rx_desc rxd;
1761         uint64_t dma_addr;
1762         uint32_t staterr;
1763         uint32_t pkt_info;
1764         uint16_t pkt_len;
1765         uint16_t rx_id;
1766         uint16_t nb_rx;
1767         uint16_t nb_hold;
1768         uint64_t pkt_flags;
1769         uint64_t vlan_flags;
1770
1771         nb_rx = 0;
1772         nb_hold = 0;
1773         rxq = rx_queue;
1774         rx_id = rxq->rx_tail;
1775         rx_ring = rxq->rx_ring;
1776         sw_ring = rxq->sw_ring;
1777         vlan_flags = rxq->vlan_flags;
1778         while (nb_rx < nb_pkts) {
1779                 /*
1780                  * The order of operations here is important as the DD status
1781                  * bit must not be read after any other descriptor fields.
1782                  * rx_ring and rxdp are pointing to volatile data so the order
1783                  * of accesses cannot be reordered by the compiler. If they were
1784                  * not volatile, they could be reordered which could lead to
1785                  * using invalid descriptor fields when read from rxd.
1786                  */
1787                 rxdp = &rx_ring[rx_id];
1788                 staterr = rxdp->wb.upper.status_error;
1789                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1790                         break;
1791                 rxd = *rxdp;
1792
1793                 /*
1794                  * End of packet.
1795                  *
1796                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1797                  * is likely to be invalid and to be dropped by the various
1798                  * validation checks performed by the network stack.
1799                  *
1800                  * Allocate a new mbuf to replenish the RX ring descriptor.
1801                  * If the allocation fails:
1802                  *    - arrange for that RX descriptor to be the first one
1803                  *      being parsed the next time the receive function is
1804                  *      invoked [on the same queue].
1805                  *
1806                  *    - Stop parsing the RX ring and return immediately.
1807                  *
1808                  * This policy do not drop the packet received in the RX
1809                  * descriptor for which the allocation of a new mbuf failed.
1810                  * Thus, it allows that packet to be later retrieved if
1811                  * mbuf have been freed in the mean time.
1812                  * As a side effect, holding RX descriptors instead of
1813                  * systematically giving them back to the NIC may lead to
1814                  * RX ring exhaustion situations.
1815                  * However, the NIC can gracefully prevent such situations
1816                  * to happen by sending specific "back-pressure" flow control
1817                  * frames to its peer(s).
1818                  */
1819                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1820                            "ext_err_stat=0x%08x pkt_len=%u",
1821                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1822                            (unsigned) rx_id, (unsigned) staterr,
1823                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1824
1825                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1826                 if (nmb == NULL) {
1827                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1828                                    "queue_id=%u", (unsigned) rxq->port_id,
1829                                    (unsigned) rxq->queue_id);
1830                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1831                         break;
1832                 }
1833
1834                 nb_hold++;
1835                 rxe = &sw_ring[rx_id];
1836                 rx_id++;
1837                 if (rx_id == rxq->nb_rx_desc)
1838                         rx_id = 0;
1839
1840                 /* Prefetch next mbuf while processing current one. */
1841                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1842
1843                 /*
1844                  * When next RX descriptor is on a cache-line boundary,
1845                  * prefetch the next 4 RX descriptors and the next 8 pointers
1846                  * to mbufs.
1847                  */
1848                 if ((rx_id & 0x3) == 0) {
1849                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1850                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1851                 }
1852
1853                 rxm = rxe->mbuf;
1854                 rxe->mbuf = nmb;
1855                 dma_addr =
1856                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1857                 rxdp->read.hdr_addr = 0;
1858                 rxdp->read.pkt_addr = dma_addr;
1859
1860                 /*
1861                  * Initialize the returned mbuf.
1862                  * 1) setup generic mbuf fields:
1863                  *    - number of segments,
1864                  *    - next segment,
1865                  *    - packet length,
1866                  *    - RX port identifier.
1867                  * 2) integrate hardware offload data, if any:
1868                  *    - RSS flag & hash,
1869                  *    - IP checksum flag,
1870                  *    - VLAN TCI, if any,
1871                  *    - error flags.
1872                  */
1873                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1874                                       rxq->crc_len);
1875                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1876                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1877                 rxm->nb_segs = 1;
1878                 rxm->next = NULL;
1879                 rxm->pkt_len = pkt_len;
1880                 rxm->data_len = pkt_len;
1881                 rxm->port = rxq->port_id;
1882
1883                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1884                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1885                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1886
1887                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1888                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1889                 pkt_flags = pkt_flags |
1890                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1891                 rxm->ol_flags = pkt_flags;
1892                 rxm->packet_type =
1893                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1894                                                        rxq->pkt_type_mask);
1895
1896                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1897                         rxm->hash.rss = rte_le_to_cpu_32(
1898                                                 rxd.wb.lower.hi_dword.rss);
1899                 else if (pkt_flags & PKT_RX_FDIR) {
1900                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1901                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1902                                         IXGBE_ATR_HASH_MASK;
1903                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1904                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1905                 }
1906                 /*
1907                  * Store the mbuf address into the next entry of the array
1908                  * of returned packets.
1909                  */
1910                 rx_pkts[nb_rx++] = rxm;
1911         }
1912         rxq->rx_tail = rx_id;
1913
1914         /*
1915          * If the number of free RX descriptors is greater than the RX free
1916          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1917          * register.
1918          * Update the RDT with the value of the last processed RX descriptor
1919          * minus 1, to guarantee that the RDT register is never equal to the
1920          * RDH register, which creates a "full" ring situtation from the
1921          * hardware point of view...
1922          */
1923         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1924         if (nb_hold > rxq->rx_free_thresh) {
1925                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1926                            "nb_hold=%u nb_rx=%u",
1927                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1928                            (unsigned) rx_id, (unsigned) nb_hold,
1929                            (unsigned) nb_rx);
1930                 rx_id = (uint16_t) ((rx_id == 0) ?
1931                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1932                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1933                 nb_hold = 0;
1934         }
1935         rxq->nb_rx_hold = nb_hold;
1936         return nb_rx;
1937 }
1938
1939 /**
1940  * Detect an RSC descriptor.
1941  */
1942 static inline uint32_t
1943 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1944 {
1945         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1946                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1947 }
1948
1949 /**
1950  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1951  *
1952  * Fill the following info in the HEAD buffer of the Rx cluster:
1953  *    - RX port identifier
1954  *    - hardware offload data, if any:
1955  *      - RSS flag & hash
1956  *      - IP checksum flag
1957  *      - VLAN TCI, if any
1958  *      - error flags
1959  * @head HEAD of the packet cluster
1960  * @desc HW descriptor to get data from
1961  * @rxq Pointer to the Rx queue
1962  */
1963 static inline void
1964 ixgbe_fill_cluster_head_buf(
1965         struct rte_mbuf *head,
1966         union ixgbe_adv_rx_desc *desc,
1967         struct ixgbe_rx_queue *rxq,
1968         uint32_t staterr)
1969 {
1970         uint32_t pkt_info;
1971         uint64_t pkt_flags;
1972
1973         head->port = rxq->port_id;
1974
1975         /* The vlan_tci field is only valid when PKT_RX_VLAN is
1976          * set in the pkt_flags field.
1977          */
1978         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1979         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1980         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1981         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1982         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1983         head->ol_flags = pkt_flags;
1984         head->packet_type =
1985                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1986
1987         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1988                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1989         else if (pkt_flags & PKT_RX_FDIR) {
1990                 head->hash.fdir.hash =
1991                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1992                                                           & IXGBE_ATR_HASH_MASK;
1993                 head->hash.fdir.id =
1994                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1995         }
1996 }
1997
1998 /**
1999  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
2000  *
2001  * @rx_queue Rx queue handle
2002  * @rx_pkts table of received packets
2003  * @nb_pkts size of rx_pkts table
2004  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
2005  *
2006  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
2007  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
2008  *
2009  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
2010  * 1) When non-EOP RSC completion arrives:
2011  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2012  *       segment's data length.
2013  *    b) Set the "next" pointer of the current segment to point to the segment
2014  *       at the NEXTP index.
2015  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2016  *       in the sw_rsc_ring.
2017  * 2) When EOP arrives we just update the cluster's total length and offload
2018  *    flags and deliver the cluster up to the upper layers. In our case - put it
2019  *    in the rx_pkts table.
2020  *
2021  * Returns the number of received packets/clusters (according to the "bulk
2022  * receive" interface).
2023  */
2024 static inline uint16_t
2025 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2026                     bool bulk_alloc)
2027 {
2028         struct ixgbe_rx_queue *rxq = rx_queue;
2029         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2030         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2031         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2032         uint16_t rx_id = rxq->rx_tail;
2033         uint16_t nb_rx = 0;
2034         uint16_t nb_hold = rxq->nb_rx_hold;
2035         uint16_t prev_id = rxq->rx_tail;
2036
2037         while (nb_rx < nb_pkts) {
2038                 bool eop;
2039                 struct ixgbe_rx_entry *rxe;
2040                 struct ixgbe_scattered_rx_entry *sc_entry;
2041                 struct ixgbe_scattered_rx_entry *next_sc_entry;
2042                 struct ixgbe_rx_entry *next_rxe = NULL;
2043                 struct rte_mbuf *first_seg;
2044                 struct rte_mbuf *rxm;
2045                 struct rte_mbuf *nmb;
2046                 union ixgbe_adv_rx_desc rxd;
2047                 uint16_t data_len;
2048                 uint16_t next_id;
2049                 volatile union ixgbe_adv_rx_desc *rxdp;
2050                 uint32_t staterr;
2051
2052 next_desc:
2053                 /*
2054                  * The code in this whole file uses the volatile pointer to
2055                  * ensure the read ordering of the status and the rest of the
2056                  * descriptor fields (on the compiler level only!!!). This is so
2057                  * UGLY - why not to just use the compiler barrier instead? DPDK
2058                  * even has the rte_compiler_barrier() for that.
2059                  *
2060                  * But most importantly this is just wrong because this doesn't
2061                  * ensure memory ordering in a general case at all. For
2062                  * instance, DPDK is supposed to work on Power CPUs where
2063                  * compiler barrier may just not be enough!
2064                  *
2065                  * I tried to write only this function properly to have a
2066                  * starting point (as a part of an LRO/RSC series) but the
2067                  * compiler cursed at me when I tried to cast away the
2068                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2069                  * keeping it the way it is for now.
2070                  *
2071                  * The code in this file is broken in so many other places and
2072                  * will just not work on a big endian CPU anyway therefore the
2073                  * lines below will have to be revisited together with the rest
2074                  * of the ixgbe PMD.
2075                  *
2076                  * TODO:
2077                  *    - Get rid of "volatile" crap and let the compiler do its
2078                  *      job.
2079                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2080                  *      memory ordering below.
2081                  */
2082                 rxdp = &rx_ring[rx_id];
2083                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2084
2085                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2086                         break;
2087
2088                 rxd = *rxdp;
2089
2090                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2091                                   "staterr=0x%x data_len=%u",
2092                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2093                            rte_le_to_cpu_16(rxd.wb.upper.length));
2094
2095                 if (!bulk_alloc) {
2096                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2097                         if (nmb == NULL) {
2098                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2099                                                   "port_id=%u queue_id=%u",
2100                                            rxq->port_id, rxq->queue_id);
2101
2102                                 rte_eth_devices[rxq->port_id].data->
2103                                                         rx_mbuf_alloc_failed++;
2104                                 break;
2105                         }
2106                 } else if (nb_hold > rxq->rx_free_thresh) {
2107                         uint16_t next_rdt = rxq->rx_free_trigger;
2108
2109                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2110                                 rte_wmb();
2111                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2112                                                             next_rdt);
2113                                 nb_hold -= rxq->rx_free_thresh;
2114                         } else {
2115                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2116                                                   "port_id=%u queue_id=%u",
2117                                            rxq->port_id, rxq->queue_id);
2118
2119                                 rte_eth_devices[rxq->port_id].data->
2120                                                         rx_mbuf_alloc_failed++;
2121                                 break;
2122                         }
2123                 }
2124
2125                 nb_hold++;
2126                 rxe = &sw_ring[rx_id];
2127                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2128
2129                 next_id = rx_id + 1;
2130                 if (next_id == rxq->nb_rx_desc)
2131                         next_id = 0;
2132
2133                 /* Prefetch next mbuf while processing current one. */
2134                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2135
2136                 /*
2137                  * When next RX descriptor is on a cache-line boundary,
2138                  * prefetch the next 4 RX descriptors and the next 4 pointers
2139                  * to mbufs.
2140                  */
2141                 if ((next_id & 0x3) == 0) {
2142                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2143                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2144                 }
2145
2146                 rxm = rxe->mbuf;
2147
2148                 if (!bulk_alloc) {
2149                         __le64 dma =
2150                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2151                         /*
2152                          * Update RX descriptor with the physical address of the
2153                          * new data buffer of the new allocated mbuf.
2154                          */
2155                         rxe->mbuf = nmb;
2156
2157                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2158                         rxdp->read.hdr_addr = 0;
2159                         rxdp->read.pkt_addr = dma;
2160                 } else
2161                         rxe->mbuf = NULL;
2162
2163                 /*
2164                  * Set data length & data buffer address of mbuf.
2165                  */
2166                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2167                 rxm->data_len = data_len;
2168
2169                 if (!eop) {
2170                         uint16_t nextp_id;
2171                         /*
2172                          * Get next descriptor index:
2173                          *  - For RSC it's in the NEXTP field.
2174                          *  - For a scattered packet - it's just a following
2175                          *    descriptor.
2176                          */
2177                         if (ixgbe_rsc_count(&rxd))
2178                                 nextp_id =
2179                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2180                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2181                         else
2182                                 nextp_id = next_id;
2183
2184                         next_sc_entry = &sw_sc_ring[nextp_id];
2185                         next_rxe = &sw_ring[nextp_id];
2186                         rte_ixgbe_prefetch(next_rxe);
2187                 }
2188
2189                 sc_entry = &sw_sc_ring[rx_id];
2190                 first_seg = sc_entry->fbuf;
2191                 sc_entry->fbuf = NULL;
2192
2193                 /*
2194                  * If this is the first buffer of the received packet,
2195                  * set the pointer to the first mbuf of the packet and
2196                  * initialize its context.
2197                  * Otherwise, update the total length and the number of segments
2198                  * of the current scattered packet, and update the pointer to
2199                  * the last mbuf of the current packet.
2200                  */
2201                 if (first_seg == NULL) {
2202                         first_seg = rxm;
2203                         first_seg->pkt_len = data_len;
2204                         first_seg->nb_segs = 1;
2205                 } else {
2206                         first_seg->pkt_len += data_len;
2207                         first_seg->nb_segs++;
2208                 }
2209
2210                 prev_id = rx_id;
2211                 rx_id = next_id;
2212
2213                 /*
2214                  * If this is not the last buffer of the received packet, update
2215                  * the pointer to the first mbuf at the NEXTP entry in the
2216                  * sw_sc_ring and continue to parse the RX ring.
2217                  */
2218                 if (!eop && next_rxe) {
2219                         rxm->next = next_rxe->mbuf;
2220                         next_sc_entry->fbuf = first_seg;
2221                         goto next_desc;
2222                 }
2223
2224                 /* Initialize the first mbuf of the returned packet */
2225                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2226
2227                 /*
2228                  * Deal with the case, when HW CRC srip is disabled.
2229                  * That can't happen when LRO is enabled, but still could
2230                  * happen for scattered RX mode.
2231                  */
2232                 first_seg->pkt_len -= rxq->crc_len;
2233                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2234                         struct rte_mbuf *lp;
2235
2236                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2237                                 ;
2238
2239                         first_seg->nb_segs--;
2240                         lp->data_len -= rxq->crc_len - rxm->data_len;
2241                         lp->next = NULL;
2242                         rte_pktmbuf_free_seg(rxm);
2243                 } else
2244                         rxm->data_len -= rxq->crc_len;
2245
2246                 /* Prefetch data of first segment, if configured to do so. */
2247                 rte_packet_prefetch((char *)first_seg->buf_addr +
2248                         first_seg->data_off);
2249
2250                 /*
2251                  * Store the mbuf address into the next entry of the array
2252                  * of returned packets.
2253                  */
2254                 rx_pkts[nb_rx++] = first_seg;
2255         }
2256
2257         /*
2258          * Record index of the next RX descriptor to probe.
2259          */
2260         rxq->rx_tail = rx_id;
2261
2262         /*
2263          * If the number of free RX descriptors is greater than the RX free
2264          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2265          * register.
2266          * Update the RDT with the value of the last processed RX descriptor
2267          * minus 1, to guarantee that the RDT register is never equal to the
2268          * RDH register, which creates a "full" ring situtation from the
2269          * hardware point of view...
2270          */
2271         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2272                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2273                            "nb_hold=%u nb_rx=%u",
2274                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2275
2276                 rte_wmb();
2277                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2278                 nb_hold = 0;
2279         }
2280
2281         rxq->nb_rx_hold = nb_hold;
2282         return nb_rx;
2283 }
2284
2285 uint16_t
2286 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2287                                  uint16_t nb_pkts)
2288 {
2289         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2290 }
2291
2292 uint16_t
2293 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2294                                uint16_t nb_pkts)
2295 {
2296         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2297 }
2298
2299 /*********************************************************************
2300  *
2301  *  Queue management functions
2302  *
2303  **********************************************************************/
2304
2305 static void __attribute__((cold))
2306 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2307 {
2308         unsigned i;
2309
2310         if (txq->sw_ring != NULL) {
2311                 for (i = 0; i < txq->nb_tx_desc; i++) {
2312                         if (txq->sw_ring[i].mbuf != NULL) {
2313                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2314                                 txq->sw_ring[i].mbuf = NULL;
2315                         }
2316                 }
2317         }
2318 }
2319
2320 static void __attribute__((cold))
2321 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2322 {
2323         if (txq != NULL &&
2324             txq->sw_ring != NULL)
2325                 rte_free(txq->sw_ring);
2326 }
2327
2328 static void __attribute__((cold))
2329 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2330 {
2331         if (txq != NULL && txq->ops != NULL) {
2332                 txq->ops->release_mbufs(txq);
2333                 txq->ops->free_swring(txq);
2334                 rte_free(txq);
2335         }
2336 }
2337
2338 void __attribute__((cold))
2339 ixgbe_dev_tx_queue_release(void *txq)
2340 {
2341         ixgbe_tx_queue_release(txq);
2342 }
2343
2344 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2345 static void __attribute__((cold))
2346 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2347 {
2348         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2349         struct ixgbe_tx_entry *txe = txq->sw_ring;
2350         uint16_t prev, i;
2351
2352         /* Zero out HW ring memory */
2353         for (i = 0; i < txq->nb_tx_desc; i++) {
2354                 txq->tx_ring[i] = zeroed_desc;
2355         }
2356
2357         /* Initialize SW ring entries */
2358         prev = (uint16_t) (txq->nb_tx_desc - 1);
2359         for (i = 0; i < txq->nb_tx_desc; i++) {
2360                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2361
2362                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2363                 txe[i].mbuf = NULL;
2364                 txe[i].last_id = i;
2365                 txe[prev].next_id = i;
2366                 prev = i;
2367         }
2368
2369         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2370         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2371
2372         txq->tx_tail = 0;
2373         txq->nb_tx_used = 0;
2374         /*
2375          * Always allow 1 descriptor to be un-allocated to avoid
2376          * a H/W race condition
2377          */
2378         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2379         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2380         txq->ctx_curr = 0;
2381         memset((void *)&txq->ctx_cache, 0,
2382                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2383 }
2384
2385 static const struct ixgbe_txq_ops def_txq_ops = {
2386         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2387         .free_swring = ixgbe_tx_free_swring,
2388         .reset = ixgbe_reset_tx_queue,
2389 };
2390
2391 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2392  * the queue parameters. Used in tx_queue_setup by primary process and then
2393  * in dev_init by secondary process when attaching to an existing ethdev.
2394  */
2395 void __attribute__((cold))
2396 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2397 {
2398         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2399         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) &&
2400                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) &&
2401                         !(dev->data->dev_conf.txmode.offloads
2402                                         & DEV_TX_OFFLOAD_SECURITY)) {
2403                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2404                 dev->tx_pkt_prepare = NULL;
2405 #ifdef RTE_IXGBE_INC_VECTOR
2406                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2407                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2408                                         ixgbe_txq_vec_setup(txq) == 0)) {
2409                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2410                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2411                 } else
2412 #endif
2413                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2414         } else {
2415                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2416                 PMD_INIT_LOG(DEBUG,
2417                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2418                                 (unsigned long)txq->txq_flags,
2419                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2420                 PMD_INIT_LOG(DEBUG,
2421                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2422                                 (unsigned long)txq->tx_rs_thresh,
2423                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2424                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2425                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2426         }
2427 }
2428
2429 int __attribute__((cold))
2430 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2431                          uint16_t queue_idx,
2432                          uint16_t nb_desc,
2433                          unsigned int socket_id,
2434                          const struct rte_eth_txconf *tx_conf)
2435 {
2436         const struct rte_memzone *tz;
2437         struct ixgbe_tx_queue *txq;
2438         struct ixgbe_hw     *hw;
2439         uint16_t tx_rs_thresh, tx_free_thresh;
2440
2441         PMD_INIT_FUNC_TRACE();
2442         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2443
2444         /*
2445          * Validate number of transmit descriptors.
2446          * It must not exceed hardware maximum, and must be multiple
2447          * of IXGBE_ALIGN.
2448          */
2449         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2450                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2451                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2452                 return -EINVAL;
2453         }
2454
2455         /*
2456          * The following two parameters control the setting of the RS bit on
2457          * transmit descriptors.
2458          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2459          * descriptors have been used.
2460          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2461          * descriptors are used or if the number of descriptors required
2462          * to transmit a packet is greater than the number of free TX
2463          * descriptors.
2464          * The following constraints must be satisfied:
2465          *  tx_rs_thresh must be greater than 0.
2466          *  tx_rs_thresh must be less than the size of the ring minus 2.
2467          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2468          *  tx_rs_thresh must be a divisor of the ring size.
2469          *  tx_free_thresh must be greater than 0.
2470          *  tx_free_thresh must be less than the size of the ring minus 3.
2471          * One descriptor in the TX ring is used as a sentinel to avoid a
2472          * H/W race condition, hence the maximum threshold constraints.
2473          * When set to zero use default values.
2474          */
2475         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2476                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2477         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2478                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2479         if (tx_rs_thresh >= (nb_desc - 2)) {
2480                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2481                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2482                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2483                         (int)dev->data->port_id, (int)queue_idx);
2484                 return -(EINVAL);
2485         }
2486         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2487                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2488                         "(tx_rs_thresh=%u port=%d queue=%d)",
2489                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2490                         (int)dev->data->port_id, (int)queue_idx);
2491                 return -(EINVAL);
2492         }
2493         if (tx_free_thresh >= (nb_desc - 3)) {
2494                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2495                              "tx_free_thresh must be less than the number of "
2496                              "TX descriptors minus 3. (tx_free_thresh=%u "
2497                              "port=%d queue=%d)",
2498                              (unsigned int)tx_free_thresh,
2499                              (int)dev->data->port_id, (int)queue_idx);
2500                 return -(EINVAL);
2501         }
2502         if (tx_rs_thresh > tx_free_thresh) {
2503                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2504                              "tx_free_thresh. (tx_free_thresh=%u "
2505                              "tx_rs_thresh=%u port=%d queue=%d)",
2506                              (unsigned int)tx_free_thresh,
2507                              (unsigned int)tx_rs_thresh,
2508                              (int)dev->data->port_id,
2509                              (int)queue_idx);
2510                 return -(EINVAL);
2511         }
2512         if ((nb_desc % tx_rs_thresh) != 0) {
2513                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2514                              "number of TX descriptors. (tx_rs_thresh=%u "
2515                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2516                              (int)dev->data->port_id, (int)queue_idx);
2517                 return -(EINVAL);
2518         }
2519
2520         /*
2521          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2522          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2523          * by the NIC and all descriptors are written back after the NIC
2524          * accumulates WTHRESH descriptors.
2525          */
2526         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2527                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2528                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2529                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2530                              (int)dev->data->port_id, (int)queue_idx);
2531                 return -(EINVAL);
2532         }
2533
2534         /* Free memory prior to re-allocation if needed... */
2535         if (dev->data->tx_queues[queue_idx] != NULL) {
2536                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2537                 dev->data->tx_queues[queue_idx] = NULL;
2538         }
2539
2540         /* First allocate the tx queue data structure */
2541         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2542                                  RTE_CACHE_LINE_SIZE, socket_id);
2543         if (txq == NULL)
2544                 return -ENOMEM;
2545
2546         /*
2547          * Allocate TX ring hardware descriptors. A memzone large enough to
2548          * handle the maximum ring size is allocated in order to allow for
2549          * resizing in later calls to the queue setup function.
2550          */
2551         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2552                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2553                         IXGBE_ALIGN, socket_id);
2554         if (tz == NULL) {
2555                 ixgbe_tx_queue_release(txq);
2556                 return -ENOMEM;
2557         }
2558
2559         txq->nb_tx_desc = nb_desc;
2560         txq->tx_rs_thresh = tx_rs_thresh;
2561         txq->tx_free_thresh = tx_free_thresh;
2562         txq->pthresh = tx_conf->tx_thresh.pthresh;
2563         txq->hthresh = tx_conf->tx_thresh.hthresh;
2564         txq->wthresh = tx_conf->tx_thresh.wthresh;
2565         txq->queue_id = queue_idx;
2566         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2567                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2568         txq->port_id = dev->data->port_id;
2569         txq->txq_flags = tx_conf->txq_flags;
2570         txq->ops = &def_txq_ops;
2571         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2572         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2573                         DEV_TX_OFFLOAD_SECURITY);
2574
2575         /*
2576          * Modification to set VFTDT for virtual function if vf is detected
2577          */
2578         if (hw->mac.type == ixgbe_mac_82599_vf ||
2579             hw->mac.type == ixgbe_mac_X540_vf ||
2580             hw->mac.type == ixgbe_mac_X550_vf ||
2581             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2582             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2583                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2584         else
2585                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2586
2587         txq->tx_ring_phys_addr = tz->phys_addr;
2588         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2589
2590         /* Allocate software ring */
2591         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2592                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2593                                 RTE_CACHE_LINE_SIZE, socket_id);
2594         if (txq->sw_ring == NULL) {
2595                 ixgbe_tx_queue_release(txq);
2596                 return -ENOMEM;
2597         }
2598         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2599                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2600
2601         /* set up vector or scalar TX function as appropriate */
2602         ixgbe_set_tx_function(dev, txq);
2603
2604         txq->ops->reset(txq);
2605
2606         dev->data->tx_queues[queue_idx] = txq;
2607
2608
2609         return 0;
2610 }
2611
2612 /**
2613  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2614  *
2615  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2616  * in the sw_rsc_ring is not set to NULL but rather points to the next
2617  * mbuf of this RSC aggregation (that has not been completed yet and still
2618  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2619  * will just free first "nb_segs" segments of the cluster explicitly by calling
2620  * an rte_pktmbuf_free_seg().
2621  *
2622  * @m scattered cluster head
2623  */
2624 static void __attribute__((cold))
2625 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2626 {
2627         uint8_t i, nb_segs = m->nb_segs;
2628         struct rte_mbuf *next_seg;
2629
2630         for (i = 0; i < nb_segs; i++) {
2631                 next_seg = m->next;
2632                 rte_pktmbuf_free_seg(m);
2633                 m = next_seg;
2634         }
2635 }
2636
2637 static void __attribute__((cold))
2638 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2639 {
2640         unsigned i;
2641
2642 #ifdef RTE_IXGBE_INC_VECTOR
2643         /* SSE Vector driver has a different way of releasing mbufs. */
2644         if (rxq->rx_using_sse) {
2645                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2646                 return;
2647         }
2648 #endif
2649
2650         if (rxq->sw_ring != NULL) {
2651                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2652                         if (rxq->sw_ring[i].mbuf != NULL) {
2653                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2654                                 rxq->sw_ring[i].mbuf = NULL;
2655                         }
2656                 }
2657                 if (rxq->rx_nb_avail) {
2658                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2659                                 struct rte_mbuf *mb;
2660
2661                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2662                                 rte_pktmbuf_free_seg(mb);
2663                         }
2664                         rxq->rx_nb_avail = 0;
2665                 }
2666         }
2667
2668         if (rxq->sw_sc_ring)
2669                 for (i = 0; i < rxq->nb_rx_desc; i++)
2670                         if (rxq->sw_sc_ring[i].fbuf) {
2671                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2672                                 rxq->sw_sc_ring[i].fbuf = NULL;
2673                         }
2674 }
2675
2676 static void __attribute__((cold))
2677 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2678 {
2679         if (rxq != NULL) {
2680                 ixgbe_rx_queue_release_mbufs(rxq);
2681                 rte_free(rxq->sw_ring);
2682                 rte_free(rxq->sw_sc_ring);
2683                 rte_free(rxq);
2684         }
2685 }
2686
2687 void __attribute__((cold))
2688 ixgbe_dev_rx_queue_release(void *rxq)
2689 {
2690         ixgbe_rx_queue_release(rxq);
2691 }
2692
2693 /*
2694  * Check if Rx Burst Bulk Alloc function can be used.
2695  * Return
2696  *        0: the preconditions are satisfied and the bulk allocation function
2697  *           can be used.
2698  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2699  *           function must be used.
2700  */
2701 static inline int __attribute__((cold))
2702 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2703 {
2704         int ret = 0;
2705
2706         /*
2707          * Make sure the following pre-conditions are satisfied:
2708          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2709          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2710          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2711          * Scattered packets are not supported.  This should be checked
2712          * outside of this function.
2713          */
2714         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2715                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2716                              "rxq->rx_free_thresh=%d, "
2717                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2718                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2719                 ret = -EINVAL;
2720         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2721                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2722                              "rxq->rx_free_thresh=%d, "
2723                              "rxq->nb_rx_desc=%d",
2724                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2725                 ret = -EINVAL;
2726         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2727                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2728                              "rxq->nb_rx_desc=%d, "
2729                              "rxq->rx_free_thresh=%d",
2730                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2731                 ret = -EINVAL;
2732         }
2733
2734         return ret;
2735 }
2736
2737 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2738 static void __attribute__((cold))
2739 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2740 {
2741         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2742         unsigned i;
2743         uint16_t len = rxq->nb_rx_desc;
2744
2745         /*
2746          * By default, the Rx queue setup function allocates enough memory for
2747          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2748          * extra memory at the end of the descriptor ring to be zero'd out.
2749          */
2750         if (adapter->rx_bulk_alloc_allowed)
2751                 /* zero out extra memory */
2752                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2753
2754         /*
2755          * Zero out HW ring memory. Zero out extra memory at the end of
2756          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2757          * reads extra memory as zeros.
2758          */
2759         for (i = 0; i < len; i++) {
2760                 rxq->rx_ring[i] = zeroed_desc;
2761         }
2762
2763         /*
2764          * initialize extra software ring entries. Space for these extra
2765          * entries is always allocated
2766          */
2767         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2768         for (i = rxq->nb_rx_desc; i < len; ++i) {
2769                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2770         }
2771
2772         rxq->rx_nb_avail = 0;
2773         rxq->rx_next_avail = 0;
2774         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2775         rxq->rx_tail = 0;
2776         rxq->nb_rx_hold = 0;
2777         rxq->pkt_first_seg = NULL;
2778         rxq->pkt_last_seg = NULL;
2779
2780 #ifdef RTE_IXGBE_INC_VECTOR
2781         rxq->rxrearm_start = 0;
2782         rxq->rxrearm_nb = 0;
2783 #endif
2784 }
2785
2786 int __attribute__((cold))
2787 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2788                          uint16_t queue_idx,
2789                          uint16_t nb_desc,
2790                          unsigned int socket_id,
2791                          const struct rte_eth_rxconf *rx_conf,
2792                          struct rte_mempool *mp)
2793 {
2794         const struct rte_memzone *rz;
2795         struct ixgbe_rx_queue *rxq;
2796         struct ixgbe_hw     *hw;
2797         uint16_t len;
2798         struct ixgbe_adapter *adapter =
2799                 (struct ixgbe_adapter *)dev->data->dev_private;
2800
2801         PMD_INIT_FUNC_TRACE();
2802         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2803
2804         /*
2805          * Validate number of receive descriptors.
2806          * It must not exceed hardware maximum, and must be multiple
2807          * of IXGBE_ALIGN.
2808          */
2809         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2810                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2811                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2812                 return -EINVAL;
2813         }
2814
2815         /* Free memory prior to re-allocation if needed... */
2816         if (dev->data->rx_queues[queue_idx] != NULL) {
2817                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2818                 dev->data->rx_queues[queue_idx] = NULL;
2819         }
2820
2821         /* First allocate the rx queue data structure */
2822         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2823                                  RTE_CACHE_LINE_SIZE, socket_id);
2824         if (rxq == NULL)
2825                 return -ENOMEM;
2826         rxq->mb_pool = mp;
2827         rxq->nb_rx_desc = nb_desc;
2828         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2829         rxq->queue_id = queue_idx;
2830         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2831                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2832         rxq->port_id = dev->data->port_id;
2833         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2834                                                         0 : ETHER_CRC_LEN);
2835         rxq->drop_en = rx_conf->rx_drop_en;
2836         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2837
2838         /*
2839          * The packet type in RX descriptor is different for different NICs.
2840          * Some bits are used for x550 but reserved for other NICS.
2841          * So set different masks for different NICs.
2842          */
2843         if (hw->mac.type == ixgbe_mac_X550 ||
2844             hw->mac.type == ixgbe_mac_X550EM_x ||
2845             hw->mac.type == ixgbe_mac_X550EM_a ||
2846             hw->mac.type == ixgbe_mac_X550_vf ||
2847             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2848             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2849                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2850         else
2851                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2852
2853         /*
2854          * Allocate RX ring hardware descriptors. A memzone large enough to
2855          * handle the maximum ring size is allocated in order to allow for
2856          * resizing in later calls to the queue setup function.
2857          */
2858         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2859                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2860         if (rz == NULL) {
2861                 ixgbe_rx_queue_release(rxq);
2862                 return -ENOMEM;
2863         }
2864
2865         /*
2866          * Zero init all the descriptors in the ring.
2867          */
2868         memset(rz->addr, 0, RX_RING_SZ);
2869
2870         /*
2871          * Modified to setup VFRDT for Virtual Function
2872          */
2873         if (hw->mac.type == ixgbe_mac_82599_vf ||
2874             hw->mac.type == ixgbe_mac_X540_vf ||
2875             hw->mac.type == ixgbe_mac_X550_vf ||
2876             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2877             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2878                 rxq->rdt_reg_addr =
2879                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2880                 rxq->rdh_reg_addr =
2881                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2882         } else {
2883                 rxq->rdt_reg_addr =
2884                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2885                 rxq->rdh_reg_addr =
2886                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2887         }
2888
2889         rxq->rx_ring_phys_addr = rz->phys_addr;
2890         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2891
2892         /*
2893          * Certain constraints must be met in order to use the bulk buffer
2894          * allocation Rx burst function. If any of Rx queues doesn't meet them
2895          * the feature should be disabled for the whole port.
2896          */
2897         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2898                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2899                                     "preconditions - canceling the feature for "
2900                                     "the whole port[%d]",
2901                              rxq->queue_id, rxq->port_id);
2902                 adapter->rx_bulk_alloc_allowed = false;
2903         }
2904
2905         /*
2906          * Allocate software ring. Allow for space at the end of the
2907          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2908          * function does not access an invalid memory region.
2909          */
2910         len = nb_desc;
2911         if (adapter->rx_bulk_alloc_allowed)
2912                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2913
2914         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2915                                           sizeof(struct ixgbe_rx_entry) * len,
2916                                           RTE_CACHE_LINE_SIZE, socket_id);
2917         if (!rxq->sw_ring) {
2918                 ixgbe_rx_queue_release(rxq);
2919                 return -ENOMEM;
2920         }
2921
2922         /*
2923          * Always allocate even if it's not going to be needed in order to
2924          * simplify the code.
2925          *
2926          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2927          * be requested in ixgbe_dev_rx_init(), which is called later from
2928          * dev_start() flow.
2929          */
2930         rxq->sw_sc_ring =
2931                 rte_zmalloc_socket("rxq->sw_sc_ring",
2932                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2933                                    RTE_CACHE_LINE_SIZE, socket_id);
2934         if (!rxq->sw_sc_ring) {
2935                 ixgbe_rx_queue_release(rxq);
2936                 return -ENOMEM;
2937         }
2938
2939         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2940                             "dma_addr=0x%"PRIx64,
2941                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2942                      rxq->rx_ring_phys_addr);
2943
2944         if (!rte_is_power_of_2(nb_desc)) {
2945                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2946                                     "preconditions - canceling the feature for "
2947                                     "the whole port[%d]",
2948                              rxq->queue_id, rxq->port_id);
2949                 adapter->rx_vec_allowed = false;
2950         } else
2951                 ixgbe_rxq_vec_setup(rxq);
2952
2953         dev->data->rx_queues[queue_idx] = rxq;
2954
2955         ixgbe_reset_rx_queue(adapter, rxq);
2956
2957         return 0;
2958 }
2959
2960 uint32_t
2961 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2962 {
2963 #define IXGBE_RXQ_SCAN_INTERVAL 4
2964         volatile union ixgbe_adv_rx_desc *rxdp;
2965         struct ixgbe_rx_queue *rxq;
2966         uint32_t desc = 0;
2967
2968         rxq = dev->data->rx_queues[rx_queue_id];
2969         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2970
2971         while ((desc < rxq->nb_rx_desc) &&
2972                 (rxdp->wb.upper.status_error &
2973                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2974                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2975                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2976                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2977                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2978                                 desc - rxq->nb_rx_desc]);
2979         }
2980
2981         return desc;
2982 }
2983
2984 int
2985 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2986 {
2987         volatile union ixgbe_adv_rx_desc *rxdp;
2988         struct ixgbe_rx_queue *rxq = rx_queue;
2989         uint32_t desc;
2990
2991         if (unlikely(offset >= rxq->nb_rx_desc))
2992                 return 0;
2993         desc = rxq->rx_tail + offset;
2994         if (desc >= rxq->nb_rx_desc)
2995                 desc -= rxq->nb_rx_desc;
2996
2997         rxdp = &rxq->rx_ring[desc];
2998         return !!(rxdp->wb.upper.status_error &
2999                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3000 }
3001
3002 int
3003 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3004 {
3005         struct ixgbe_rx_queue *rxq = rx_queue;
3006         volatile uint32_t *status;
3007         uint32_t nb_hold, desc;
3008
3009         if (unlikely(offset >= rxq->nb_rx_desc))
3010                 return -EINVAL;
3011
3012 #ifdef RTE_IXGBE_INC_VECTOR
3013         if (rxq->rx_using_sse)
3014                 nb_hold = rxq->rxrearm_nb;
3015         else
3016 #endif
3017                 nb_hold = rxq->nb_rx_hold;
3018         if (offset >= rxq->nb_rx_desc - nb_hold)
3019                 return RTE_ETH_RX_DESC_UNAVAIL;
3020
3021         desc = rxq->rx_tail + offset;
3022         if (desc >= rxq->nb_rx_desc)
3023                 desc -= rxq->nb_rx_desc;
3024
3025         status = &rxq->rx_ring[desc].wb.upper.status_error;
3026         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3027                 return RTE_ETH_RX_DESC_DONE;
3028
3029         return RTE_ETH_RX_DESC_AVAIL;
3030 }
3031
3032 int
3033 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3034 {
3035         struct ixgbe_tx_queue *txq = tx_queue;
3036         volatile uint32_t *status;
3037         uint32_t desc;
3038
3039         if (unlikely(offset >= txq->nb_tx_desc))
3040                 return -EINVAL;
3041
3042         desc = txq->tx_tail + offset;
3043         /* go to next desc that has the RS bit */
3044         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3045                 txq->tx_rs_thresh;
3046         if (desc >= txq->nb_tx_desc) {
3047                 desc -= txq->nb_tx_desc;
3048                 if (desc >= txq->nb_tx_desc)
3049                         desc -= txq->nb_tx_desc;
3050         }
3051
3052         status = &txq->tx_ring[desc].wb.status;
3053         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3054                 return RTE_ETH_TX_DESC_DONE;
3055
3056         return RTE_ETH_TX_DESC_FULL;
3057 }
3058
3059 void __attribute__((cold))
3060 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3061 {
3062         unsigned i;
3063         struct ixgbe_adapter *adapter =
3064                 (struct ixgbe_adapter *)dev->data->dev_private;
3065
3066         PMD_INIT_FUNC_TRACE();
3067
3068         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3069                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3070
3071                 if (txq != NULL) {
3072                         txq->ops->release_mbufs(txq);
3073                         txq->ops->reset(txq);
3074                 }
3075         }
3076
3077         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3078                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3079
3080                 if (rxq != NULL) {
3081                         ixgbe_rx_queue_release_mbufs(rxq);
3082                         ixgbe_reset_rx_queue(adapter, rxq);
3083                 }
3084         }
3085 }
3086
3087 void
3088 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3089 {
3090         unsigned i;
3091
3092         PMD_INIT_FUNC_TRACE();
3093
3094         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3095                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3096                 dev->data->rx_queues[i] = NULL;
3097         }
3098         dev->data->nb_rx_queues = 0;
3099
3100         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3101                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3102                 dev->data->tx_queues[i] = NULL;
3103         }
3104         dev->data->nb_tx_queues = 0;
3105 }
3106
3107 /*********************************************************************
3108  *
3109  *  Device RX/TX init functions
3110  *
3111  **********************************************************************/
3112
3113 /**
3114  * Receive Side Scaling (RSS)
3115  * See section 7.1.2.8 in the following document:
3116  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3117  *
3118  * Principles:
3119  * The source and destination IP addresses of the IP header and the source
3120  * and destination ports of TCP/UDP headers, if any, of received packets are
3121  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3122  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3123  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3124  * RSS output index which is used as the RX queue index where to store the
3125  * received packets.
3126  * The following output is supplied in the RX write-back descriptor:
3127  *     - 32-bit result of the Microsoft RSS hash function,
3128  *     - 4-bit RSS type field.
3129  */
3130
3131 /*
3132  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3133  * Used as the default key.
3134  */
3135 static uint8_t rss_intel_key[40] = {
3136         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3137         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3138         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3139         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3140         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3141 };
3142
3143 static void
3144 ixgbe_rss_disable(struct rte_eth_dev *dev)
3145 {
3146         struct ixgbe_hw *hw;
3147         uint32_t mrqc;
3148         uint32_t mrqc_reg;
3149
3150         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3151         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3152         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3153         mrqc &= ~IXGBE_MRQC_RSSEN;
3154         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3155 }
3156
3157 static void
3158 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3159 {
3160         uint8_t  *hash_key;
3161         uint32_t mrqc;
3162         uint32_t rss_key;
3163         uint64_t rss_hf;
3164         uint16_t i;
3165         uint32_t mrqc_reg;
3166         uint32_t rssrk_reg;
3167
3168         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3169         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3170
3171         hash_key = rss_conf->rss_key;
3172         if (hash_key != NULL) {
3173                 /* Fill in RSS hash key */
3174                 for (i = 0; i < 10; i++) {
3175                         rss_key  = hash_key[(i * 4)];
3176                         rss_key |= hash_key[(i * 4) + 1] << 8;
3177                         rss_key |= hash_key[(i * 4) + 2] << 16;
3178                         rss_key |= hash_key[(i * 4) + 3] << 24;
3179                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3180                 }
3181         }
3182
3183         /* Set configured hashing protocols in MRQC register */
3184         rss_hf = rss_conf->rss_hf;
3185         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3186         if (rss_hf & ETH_RSS_IPV4)
3187                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3188         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3189                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3190         if (rss_hf & ETH_RSS_IPV6)
3191                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3192         if (rss_hf & ETH_RSS_IPV6_EX)
3193                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3194         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3195                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3196         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3197                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3198         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3199                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3200         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3201                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3202         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3203                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3204         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3205 }
3206
3207 int
3208 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3209                           struct rte_eth_rss_conf *rss_conf)
3210 {
3211         struct ixgbe_hw *hw;
3212         uint32_t mrqc;
3213         uint64_t rss_hf;
3214         uint32_t mrqc_reg;
3215
3216         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3217
3218         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3219                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3220                         "NIC.");
3221                 return -ENOTSUP;
3222         }
3223         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3224
3225         /*
3226          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3227          *     "RSS enabling cannot be done dynamically while it must be
3228          *      preceded by a software reset"
3229          * Before changing anything, first check that the update RSS operation
3230          * does not attempt to disable RSS, if RSS was enabled at
3231          * initialization time, or does not attempt to enable RSS, if RSS was
3232          * disabled at initialization time.
3233          */
3234         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3235         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3236         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3237                 if (rss_hf != 0) /* Enable RSS */
3238                         return -(EINVAL);
3239                 return 0; /* Nothing to do */
3240         }
3241         /* RSS enabled */
3242         if (rss_hf == 0) /* Disable RSS */
3243                 return -(EINVAL);
3244         ixgbe_hw_rss_hash_set(hw, rss_conf);
3245         return 0;
3246 }
3247
3248 int
3249 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3250                             struct rte_eth_rss_conf *rss_conf)
3251 {
3252         struct ixgbe_hw *hw;
3253         uint8_t *hash_key;
3254         uint32_t mrqc;
3255         uint32_t rss_key;
3256         uint64_t rss_hf;
3257         uint16_t i;
3258         uint32_t mrqc_reg;
3259         uint32_t rssrk_reg;
3260
3261         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3262         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3263         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3264         hash_key = rss_conf->rss_key;
3265         if (hash_key != NULL) {
3266                 /* Return RSS hash key */
3267                 for (i = 0; i < 10; i++) {
3268                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3269                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3270                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3271                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3272                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3273                 }
3274         }
3275
3276         /* Get RSS functions configured in MRQC register */
3277         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3278         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3279                 rss_conf->rss_hf = 0;
3280                 return 0;
3281         }
3282         rss_hf = 0;
3283         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3284                 rss_hf |= ETH_RSS_IPV4;
3285         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3286                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3287         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3288                 rss_hf |= ETH_RSS_IPV6;
3289         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3290                 rss_hf |= ETH_RSS_IPV6_EX;
3291         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3292                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3293         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3294                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3295         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3296                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3297         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3298                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3299         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3300                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3301         rss_conf->rss_hf = rss_hf;
3302         return 0;
3303 }
3304
3305 static void
3306 ixgbe_rss_configure(struct rte_eth_dev *dev)
3307 {
3308         struct rte_eth_rss_conf rss_conf;
3309         struct ixgbe_hw *hw;
3310         uint32_t reta;
3311         uint16_t i;
3312         uint16_t j;
3313         uint16_t sp_reta_size;
3314         uint32_t reta_reg;
3315
3316         PMD_INIT_FUNC_TRACE();
3317         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3318
3319         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3320
3321         /*
3322          * Fill in redirection table
3323          * The byte-swap is needed because NIC registers are in
3324          * little-endian order.
3325          */
3326         reta = 0;
3327         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3328                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3329
3330                 if (j == dev->data->nb_rx_queues)
3331                         j = 0;
3332                 reta = (reta << 8) | j;
3333                 if ((i & 3) == 3)
3334                         IXGBE_WRITE_REG(hw, reta_reg,
3335                                         rte_bswap32(reta));
3336         }
3337
3338         /*
3339          * Configure the RSS key and the RSS protocols used to compute
3340          * the RSS hash of input packets.
3341          */
3342         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3343         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3344                 ixgbe_rss_disable(dev);
3345                 return;
3346         }
3347         if (rss_conf.rss_key == NULL)
3348                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3349         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3350 }
3351
3352 #define NUM_VFTA_REGISTERS 128
3353 #define NIC_RX_BUFFER_SIZE 0x200
3354 #define X550_RX_BUFFER_SIZE 0x180
3355
3356 static void
3357 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3358 {
3359         struct rte_eth_vmdq_dcb_conf *cfg;
3360         struct ixgbe_hw *hw;
3361         enum rte_eth_nb_pools num_pools;
3362         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3363         uint16_t pbsize;
3364         uint8_t nb_tcs; /* number of traffic classes */
3365         int i;
3366
3367         PMD_INIT_FUNC_TRACE();
3368         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3369         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3370         num_pools = cfg->nb_queue_pools;
3371         /* Check we have a valid number of pools */
3372         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3373                 ixgbe_rss_disable(dev);
3374                 return;
3375         }
3376         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3377         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3378
3379         /*
3380          * RXPBSIZE
3381          * split rx buffer up into sections, each for 1 traffic class
3382          */
3383         switch (hw->mac.type) {
3384         case ixgbe_mac_X550:
3385         case ixgbe_mac_X550EM_x:
3386         case ixgbe_mac_X550EM_a:
3387                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3388                 break;
3389         default:
3390                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3391                 break;
3392         }
3393         for (i = 0; i < nb_tcs; i++) {
3394                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3395
3396                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3397                 /* clear 10 bits. */
3398                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3399                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3400         }
3401         /* zero alloc all unused TCs */
3402         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3403                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3404
3405                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3406                 /* clear 10 bits. */
3407                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3408         }
3409
3410         /* MRQC: enable vmdq and dcb */
3411         mrqc = (num_pools == ETH_16_POOLS) ?
3412                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3413         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3414
3415         /* PFVTCTL: turn on virtualisation and set the default pool */
3416         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3417         if (cfg->enable_default_pool) {
3418                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3419         } else {
3420                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3421         }
3422
3423         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3424
3425         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3426         queue_mapping = 0;
3427         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3428                 /*
3429                  * mapping is done with 3 bits per priority,
3430                  * so shift by i*3 each time
3431                  */
3432                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3433
3434         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3435
3436         /* RTRPCS: DCB related */
3437         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3438
3439         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3440         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3441         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3442         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3443
3444         /* VFTA - enable all vlan filters */
3445         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3446                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3447         }
3448
3449         /* VFRE: pool enabling for receive - 16 or 32 */
3450         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3451                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3452
3453         /*
3454          * MPSAR - allow pools to read specific mac addresses
3455          * In this case, all pools should be able to read from mac addr 0
3456          */
3457         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3458         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3459
3460         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3461         for (i = 0; i < cfg->nb_pool_maps; i++) {
3462                 /* set vlan id in VF register and set the valid bit */
3463                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3464                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3465                 /*
3466                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3467                  * pools, we only need to use the first half of the register
3468                  * i.e. bits 0-31
3469                  */
3470                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3471         }
3472 }
3473
3474 /**
3475  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3476  * @dev: pointer to eth_dev structure
3477  * @dcb_config: pointer to ixgbe_dcb_config structure
3478  */
3479 static void
3480 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3481                        struct ixgbe_dcb_config *dcb_config)
3482 {
3483         uint32_t reg;
3484         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3485
3486         PMD_INIT_FUNC_TRACE();
3487         if (hw->mac.type != ixgbe_mac_82598EB) {
3488                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3489                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3490                 reg |= IXGBE_RTTDCS_ARBDIS;
3491                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3492
3493                 /* Enable DCB for Tx with 8 TCs */
3494                 if (dcb_config->num_tcs.pg_tcs == 8) {
3495                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3496                 } else {
3497                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3498                 }
3499                 if (dcb_config->vt_mode)
3500                         reg |= IXGBE_MTQC_VT_ENA;
3501                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3502
3503                 /* Enable the Tx desc arbiter */
3504                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3505                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3506                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3507
3508                 /* Enable Security TX Buffer IFG for DCB */
3509                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3510                 reg |= IXGBE_SECTX_DCB;
3511                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3512         }
3513 }
3514
3515 /**
3516  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3517  * @dev: pointer to rte_eth_dev structure
3518  * @dcb_config: pointer to ixgbe_dcb_config structure
3519  */
3520 static void
3521 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3522                         struct ixgbe_dcb_config *dcb_config)
3523 {
3524         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3525                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3526         struct ixgbe_hw *hw =
3527                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3528
3529         PMD_INIT_FUNC_TRACE();
3530         if (hw->mac.type != ixgbe_mac_82598EB)
3531                 /*PF VF Transmit Enable*/
3532                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3533                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3534
3535         /*Configure general DCB TX parameters*/
3536         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3537 }
3538
3539 static void
3540 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3541                         struct ixgbe_dcb_config *dcb_config)
3542 {
3543         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3544                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3545         struct ixgbe_dcb_tc_config *tc;
3546         uint8_t i, j;
3547
3548         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3549         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3550                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3551                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3552         } else {
3553                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3554                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3555         }
3556
3557         /* Initialize User Priority to Traffic Class mapping */
3558         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3559                 tc = &dcb_config->tc_config[j];
3560                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3561         }
3562
3563         /* User Priority to Traffic Class mapping */
3564         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3565                 j = vmdq_rx_conf->dcb_tc[i];
3566                 tc = &dcb_config->tc_config[j];
3567                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3568                                                 (uint8_t)(1 << i);
3569         }
3570 }
3571
3572 static void
3573 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3574                         struct ixgbe_dcb_config *dcb_config)
3575 {
3576         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3577                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3578         struct ixgbe_dcb_tc_config *tc;
3579         uint8_t i, j;
3580
3581         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3582         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3583                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3584                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3585         } else {
3586                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3587                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3588         }
3589
3590         /* Initialize User Priority to Traffic Class mapping */
3591         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3592                 tc = &dcb_config->tc_config[j];
3593                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3594         }
3595
3596         /* User Priority to Traffic Class mapping */
3597         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3598                 j = vmdq_tx_conf->dcb_tc[i];
3599                 tc = &dcb_config->tc_config[j];
3600                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3601                                                 (uint8_t)(1 << i);
3602         }
3603 }
3604
3605 static void
3606 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3607                 struct ixgbe_dcb_config *dcb_config)
3608 {
3609         struct rte_eth_dcb_rx_conf *rx_conf =
3610                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3611         struct ixgbe_dcb_tc_config *tc;
3612         uint8_t i, j;
3613
3614         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3615         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3616
3617         /* Initialize User Priority to Traffic Class mapping */
3618         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3619                 tc = &dcb_config->tc_config[j];
3620                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3621         }
3622
3623         /* User Priority to Traffic Class mapping */
3624         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3625                 j = rx_conf->dcb_tc[i];
3626                 tc = &dcb_config->tc_config[j];
3627                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3628                                                 (uint8_t)(1 << i);
3629         }
3630 }
3631
3632 static void
3633 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3634                 struct ixgbe_dcb_config *dcb_config)
3635 {
3636         struct rte_eth_dcb_tx_conf *tx_conf =
3637                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3638         struct ixgbe_dcb_tc_config *tc;
3639         uint8_t i, j;
3640
3641         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3642         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3643
3644         /* Initialize User Priority to Traffic Class mapping */
3645         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3646                 tc = &dcb_config->tc_config[j];
3647                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3648         }
3649
3650         /* User Priority to Traffic Class mapping */
3651         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3652                 j = tx_conf->dcb_tc[i];
3653                 tc = &dcb_config->tc_config[j];
3654                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3655                                                 (uint8_t)(1 << i);
3656         }
3657 }
3658
3659 /**
3660  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3661  * @dev: pointer to eth_dev structure
3662  * @dcb_config: pointer to ixgbe_dcb_config structure
3663  */
3664 static void
3665 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3666                        struct ixgbe_dcb_config *dcb_config)
3667 {
3668         uint32_t reg;
3669         uint32_t vlanctrl;
3670         uint8_t i;
3671         uint32_t q;
3672         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3673
3674         PMD_INIT_FUNC_TRACE();
3675         /*
3676          * Disable the arbiter before changing parameters
3677          * (always enable recycle mode; WSP)
3678          */
3679         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3680         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3681
3682         if (hw->mac.type != ixgbe_mac_82598EB) {
3683                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3684                 if (dcb_config->num_tcs.pg_tcs == 4) {
3685                         if (dcb_config->vt_mode)
3686                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3687                                         IXGBE_MRQC_VMDQRT4TCEN;
3688                         else {
3689                                 /* no matter the mode is DCB or DCB_RSS, just
3690                                  * set the MRQE to RSSXTCEN. RSS is controlled
3691                                  * by RSS_FIELD
3692                                  */
3693                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3694                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3695                                         IXGBE_MRQC_RTRSS4TCEN;
3696                         }
3697                 }
3698                 if (dcb_config->num_tcs.pg_tcs == 8) {
3699                         if (dcb_config->vt_mode)
3700                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3701                                         IXGBE_MRQC_VMDQRT8TCEN;
3702                         else {
3703                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3704                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3705                                         IXGBE_MRQC_RTRSS8TCEN;
3706                         }
3707                 }
3708
3709                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3710
3711                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3712                         /* Disable drop for all queues in VMDQ mode*/
3713                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3714                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3715                                                 (IXGBE_QDE_WRITE |
3716                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3717                 } else {
3718                         /* Enable drop for all queues in SRIOV mode */
3719                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3720                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3721                                                 (IXGBE_QDE_WRITE |
3722                                                  (q << IXGBE_QDE_IDX_SHIFT) |
3723                                                  IXGBE_QDE_ENABLE));
3724                 }
3725         }
3726
3727         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3728         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3729         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3730         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3731
3732         /* VFTA - enable all vlan filters */
3733         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3734                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3735         }
3736
3737         /*
3738          * Configure Rx packet plane (recycle mode; WSP) and
3739          * enable arbiter
3740          */
3741         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3742         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3743 }
3744
3745 static void
3746 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3747                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3748 {
3749         switch (hw->mac.type) {
3750         case ixgbe_mac_82598EB:
3751                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3752                 break;
3753         case ixgbe_mac_82599EB:
3754         case ixgbe_mac_X540:
3755         case ixgbe_mac_X550:
3756         case ixgbe_mac_X550EM_x:
3757         case ixgbe_mac_X550EM_a:
3758                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3759                                                   tsa, map);
3760                 break;
3761         default:
3762                 break;
3763         }
3764 }
3765
3766 static void
3767 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3768                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3769 {
3770         switch (hw->mac.type) {
3771         case ixgbe_mac_82598EB:
3772                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3773                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3774                 break;
3775         case ixgbe_mac_82599EB:
3776         case ixgbe_mac_X540:
3777         case ixgbe_mac_X550:
3778         case ixgbe_mac_X550EM_x:
3779         case ixgbe_mac_X550EM_a:
3780                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3781                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3782                 break;
3783         default:
3784                 break;
3785         }
3786 }
3787
3788 #define DCB_RX_CONFIG  1
3789 #define DCB_TX_CONFIG  1
3790 #define DCB_TX_PB      1024
3791 /**
3792  * ixgbe_dcb_hw_configure - Enable DCB and configure
3793  * general DCB in VT mode and non-VT mode parameters
3794  * @dev: pointer to rte_eth_dev structure
3795  * @dcb_config: pointer to ixgbe_dcb_config structure
3796  */
3797 static int
3798 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3799                         struct ixgbe_dcb_config *dcb_config)
3800 {
3801         int     ret = 0;
3802         uint8_t i, pfc_en, nb_tcs;
3803         uint16_t pbsize, rx_buffer_size;
3804         uint8_t config_dcb_rx = 0;
3805         uint8_t config_dcb_tx = 0;
3806         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3807         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3808         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3809         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3810         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3811         struct ixgbe_dcb_tc_config *tc;
3812         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3813         struct ixgbe_hw *hw =
3814                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3815         struct ixgbe_bw_conf *bw_conf =
3816                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
3817
3818         switch (dev->data->dev_conf.rxmode.mq_mode) {
3819         case ETH_MQ_RX_VMDQ_DCB:
3820                 dcb_config->vt_mode = true;
3821                 if (hw->mac.type != ixgbe_mac_82598EB) {
3822                         config_dcb_rx = DCB_RX_CONFIG;
3823                         /*
3824                          *get dcb and VT rx configuration parameters
3825                          *from rte_eth_conf
3826                          */
3827                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3828                         /*Configure general VMDQ and DCB RX parameters*/
3829                         ixgbe_vmdq_dcb_configure(dev);
3830                 }
3831                 break;
3832         case ETH_MQ_RX_DCB:
3833         case ETH_MQ_RX_DCB_RSS:
3834                 dcb_config->vt_mode = false;
3835                 config_dcb_rx = DCB_RX_CONFIG;
3836                 /* Get dcb TX configuration parameters from rte_eth_conf */
3837                 ixgbe_dcb_rx_config(dev, dcb_config);
3838                 /*Configure general DCB RX parameters*/
3839                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
3840                 break;
3841         default:
3842                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3843                 break;
3844         }
3845         switch (dev->data->dev_conf.txmode.mq_mode) {
3846         case ETH_MQ_TX_VMDQ_DCB:
3847                 dcb_config->vt_mode = true;
3848                 config_dcb_tx = DCB_TX_CONFIG;
3849                 /* get DCB and VT TX configuration parameters
3850                  * from rte_eth_conf
3851                  */
3852                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3853                 /*Configure general VMDQ and DCB TX parameters*/
3854                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3855                 break;
3856
3857         case ETH_MQ_TX_DCB:
3858                 dcb_config->vt_mode = false;
3859                 config_dcb_tx = DCB_TX_CONFIG;
3860                 /*get DCB TX configuration parameters from rte_eth_conf*/
3861                 ixgbe_dcb_tx_config(dev, dcb_config);
3862                 /*Configure general DCB TX parameters*/
3863                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3864                 break;
3865         default:
3866                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3867                 break;
3868         }
3869
3870         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3871         /* Unpack map */
3872         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3873         if (nb_tcs == ETH_4_TCS) {
3874                 /* Avoid un-configured priority mapping to TC0 */
3875                 uint8_t j = 4;
3876                 uint8_t mask = 0xFF;
3877
3878                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3879                         mask = (uint8_t)(mask & (~(1 << map[i])));
3880                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3881                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3882                                 map[j++] = i;
3883                         mask >>= 1;
3884                 }
3885                 /* Re-configure 4 TCs BW */
3886                 for (i = 0; i < nb_tcs; i++) {
3887                         tc = &dcb_config->tc_config[i];
3888                         if (bw_conf->tc_num != nb_tcs)
3889                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3890                                         (uint8_t)(100 / nb_tcs);
3891                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3892                                                 (uint8_t)(100 / nb_tcs);
3893                 }
3894                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3895                         tc = &dcb_config->tc_config[i];
3896                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3897                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3898                 }
3899         } else {
3900                 /* Re-configure 8 TCs BW */
3901                 for (i = 0; i < nb_tcs; i++) {
3902                         tc = &dcb_config->tc_config[i];
3903                         if (bw_conf->tc_num != nb_tcs)
3904                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3905                                         (uint8_t)(100 / nb_tcs + (i & 1));
3906                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3907                                 (uint8_t)(100 / nb_tcs + (i & 1));
3908                 }
3909         }
3910
3911         switch (hw->mac.type) {
3912         case ixgbe_mac_X550:
3913         case ixgbe_mac_X550EM_x:
3914         case ixgbe_mac_X550EM_a:
3915                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3916                 break;
3917         default:
3918                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3919                 break;
3920         }
3921
3922         if (config_dcb_rx) {
3923                 /* Set RX buffer size */
3924                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3925                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3926
3927                 for (i = 0; i < nb_tcs; i++) {
3928                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3929                 }
3930                 /* zero alloc all unused TCs */
3931                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3932                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3933                 }
3934         }
3935         if (config_dcb_tx) {
3936                 /* Only support an equally distributed
3937                  *  Tx packet buffer strategy.
3938                  */
3939                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3940                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3941
3942                 for (i = 0; i < nb_tcs; i++) {
3943                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3944                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3945                 }
3946                 /* Clear unused TCs, if any, to zero buffer size*/
3947                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3948                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3949                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3950                 }
3951         }
3952
3953         /*Calculates traffic class credits*/
3954         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3955                                 IXGBE_DCB_TX_CONFIG);
3956         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3957                                 IXGBE_DCB_RX_CONFIG);
3958
3959         if (config_dcb_rx) {
3960                 /* Unpack CEE standard containers */
3961                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3962                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3963                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3964                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3965                 /* Configure PG(ETS) RX */
3966                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3967         }
3968
3969         if (config_dcb_tx) {
3970                 /* Unpack CEE standard containers */
3971                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3972                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3973                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3974                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3975                 /* Configure PG(ETS) TX */
3976                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3977         }
3978
3979         /*Configure queue statistics registers*/
3980         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3981
3982         /* Check if the PFC is supported */
3983         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3984                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3985                 for (i = 0; i < nb_tcs; i++) {
3986                         /*
3987                         * If the TC count is 8,and the default high_water is 48,
3988                         * the low_water is 16 as default.
3989                         */
3990                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3991                         hw->fc.low_water[i] = pbsize / 4;
3992                         /* Enable pfc for this TC */
3993                         tc = &dcb_config->tc_config[i];
3994                         tc->pfc = ixgbe_dcb_pfc_enabled;
3995                 }
3996                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3997                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3998                         pfc_en &= 0x0F;
3999                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4000         }
4001
4002         return ret;
4003 }
4004
4005 /**
4006  * ixgbe_configure_dcb - Configure DCB  Hardware
4007  * @dev: pointer to rte_eth_dev
4008  */
4009 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4010 {
4011         struct ixgbe_dcb_config *dcb_cfg =
4012                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4013         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4014
4015         PMD_INIT_FUNC_TRACE();
4016
4017         /* check support mq_mode for DCB */
4018         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4019             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4020             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4021                 return;
4022
4023         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4024                 return;
4025
4026         /** Configure DCB hardware **/
4027         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4028 }
4029
4030 /*
4031  * VMDq only support for 10 GbE NIC.
4032  */
4033 static void
4034 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4035 {
4036         struct rte_eth_vmdq_rx_conf *cfg;
4037         struct ixgbe_hw *hw;
4038         enum rte_eth_nb_pools num_pools;
4039         uint32_t mrqc, vt_ctl, vlanctrl;
4040         uint32_t vmolr = 0;
4041         int i;
4042
4043         PMD_INIT_FUNC_TRACE();
4044         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4045         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4046         num_pools = cfg->nb_queue_pools;
4047
4048         ixgbe_rss_disable(dev);
4049
4050         /* MRQC: enable vmdq */
4051         mrqc = IXGBE_MRQC_VMDQEN;
4052         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4053
4054         /* PFVTCTL: turn on virtualisation and set the default pool */
4055         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4056         if (cfg->enable_default_pool)
4057                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4058         else
4059                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4060
4061         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4062
4063         for (i = 0; i < (int)num_pools; i++) {
4064                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4065                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4066         }
4067
4068         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4069         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4070         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4071         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4072
4073         /* VFTA - enable all vlan filters */
4074         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4075                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4076
4077         /* VFRE: pool enabling for receive - 64 */
4078         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4079         if (num_pools == ETH_64_POOLS)
4080                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4081
4082         /*
4083          * MPSAR - allow pools to read specific mac addresses
4084          * In this case, all pools should be able to read from mac addr 0
4085          */
4086         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4087         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4088
4089         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4090         for (i = 0; i < cfg->nb_pool_maps; i++) {
4091                 /* set vlan id in VF register and set the valid bit */
4092                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4093                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4094                 /*
4095                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4096                  * pools, we only need to use the first half of the register
4097                  * i.e. bits 0-31
4098                  */
4099                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4100                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4101                                         (cfg->pool_map[i].pools & UINT32_MAX));
4102                 else
4103                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4104                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4105
4106         }
4107
4108         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4109         if (cfg->enable_loop_back) {
4110                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4111                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4112                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4113         }
4114
4115         IXGBE_WRITE_FLUSH(hw);
4116 }
4117
4118 /*
4119  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4120  * @hw: pointer to hardware structure
4121  */
4122 static void
4123 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4124 {
4125         uint32_t reg;
4126         uint32_t q;
4127
4128         PMD_INIT_FUNC_TRACE();
4129         /*PF VF Transmit Enable*/
4130         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4131         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4132
4133         /* Disable the Tx desc arbiter so that MTQC can be changed */
4134         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4135         reg |= IXGBE_RTTDCS_ARBDIS;
4136         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4137
4138         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4139         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4140
4141         /* Disable drop for all queues */
4142         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4143                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4144                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4145
4146         /* Enable the Tx desc arbiter */
4147         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4148         reg &= ~IXGBE_RTTDCS_ARBDIS;
4149         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4150
4151         IXGBE_WRITE_FLUSH(hw);
4152 }
4153
4154 static int __attribute__((cold))
4155 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4156 {
4157         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4158         uint64_t dma_addr;
4159         unsigned int i;
4160
4161         /* Initialize software ring entries */
4162         for (i = 0; i < rxq->nb_rx_desc; i++) {
4163                 volatile union ixgbe_adv_rx_desc *rxd;
4164                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4165
4166                 if (mbuf == NULL) {
4167                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4168                                      (unsigned) rxq->queue_id);
4169                         return -ENOMEM;
4170                 }
4171
4172                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4173                 mbuf->port = rxq->port_id;
4174
4175                 dma_addr =
4176                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
4177                 rxd = &rxq->rx_ring[i];
4178                 rxd->read.hdr_addr = 0;
4179                 rxd->read.pkt_addr = dma_addr;
4180                 rxe[i].mbuf = mbuf;
4181         }
4182
4183         return 0;
4184 }
4185
4186 static int
4187 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4188 {
4189         struct ixgbe_hw *hw;
4190         uint32_t mrqc;
4191
4192         ixgbe_rss_configure(dev);
4193
4194         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4195
4196         /* MRQC: enable VF RSS */
4197         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4198         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4199         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4200         case ETH_64_POOLS:
4201                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4202                 break;
4203
4204         case ETH_32_POOLS:
4205                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4206                 break;
4207
4208         default:
4209                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4210                 return -EINVAL;
4211         }
4212
4213         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4214
4215         return 0;
4216 }
4217
4218 static int
4219 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4220 {
4221         struct ixgbe_hw *hw =
4222                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4223
4224         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4225         case ETH_64_POOLS:
4226                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4227                         IXGBE_MRQC_VMDQEN);
4228                 break;
4229
4230         case ETH_32_POOLS:
4231                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4232                         IXGBE_MRQC_VMDQRT4TCEN);
4233                 break;
4234
4235         case ETH_16_POOLS:
4236                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4237                         IXGBE_MRQC_VMDQRT8TCEN);
4238                 break;
4239         default:
4240                 PMD_INIT_LOG(ERR,
4241                         "invalid pool number in IOV mode");
4242                 break;
4243         }
4244         return 0;
4245 }
4246
4247 static int
4248 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4249 {
4250         struct ixgbe_hw *hw =
4251                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4252
4253         if (hw->mac.type == ixgbe_mac_82598EB)
4254                 return 0;
4255
4256         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4257                 /*
4258                  * SRIOV inactive scheme
4259                  * any DCB/RSS w/o VMDq multi-queue setting
4260                  */
4261                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4262                 case ETH_MQ_RX_RSS:
4263                 case ETH_MQ_RX_DCB_RSS:
4264                 case ETH_MQ_RX_VMDQ_RSS:
4265                         ixgbe_rss_configure(dev);
4266                         break;
4267
4268                 case ETH_MQ_RX_VMDQ_DCB:
4269                         ixgbe_vmdq_dcb_configure(dev);
4270                         break;
4271
4272                 case ETH_MQ_RX_VMDQ_ONLY:
4273                         ixgbe_vmdq_rx_hw_configure(dev);
4274                         break;
4275
4276                 case ETH_MQ_RX_NONE:
4277                 default:
4278                         /* if mq_mode is none, disable rss mode.*/
4279                         ixgbe_rss_disable(dev);
4280                         break;
4281                 }
4282         } else {
4283                 /* SRIOV active scheme
4284                  * Support RSS together with SRIOV.
4285                  */
4286                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4287                 case ETH_MQ_RX_RSS:
4288                 case ETH_MQ_RX_VMDQ_RSS:
4289                         ixgbe_config_vf_rss(dev);
4290                         break;
4291                 case ETH_MQ_RX_VMDQ_DCB:
4292                 case ETH_MQ_RX_DCB:
4293                 /* In SRIOV, the configuration is the same as VMDq case */
4294                         ixgbe_vmdq_dcb_configure(dev);
4295                         break;
4296                 /* DCB/RSS together with SRIOV is not supported */
4297                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4298                 case ETH_MQ_RX_DCB_RSS:
4299                         PMD_INIT_LOG(ERR,
4300                                 "Could not support DCB/RSS with VMDq & SRIOV");
4301                         return -1;
4302                 default:
4303                         ixgbe_config_vf_default(dev);
4304                         break;
4305                 }
4306         }
4307
4308         return 0;
4309 }
4310
4311 static int
4312 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4313 {
4314         struct ixgbe_hw *hw =
4315                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4316         uint32_t mtqc;
4317         uint32_t rttdcs;
4318
4319         if (hw->mac.type == ixgbe_mac_82598EB)
4320                 return 0;
4321
4322         /* disable arbiter before setting MTQC */
4323         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4324         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4325         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4326
4327         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4328                 /*
4329                  * SRIOV inactive scheme
4330                  * any DCB w/o VMDq multi-queue setting
4331                  */
4332                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4333                         ixgbe_vmdq_tx_hw_configure(hw);
4334                 else {
4335                         mtqc = IXGBE_MTQC_64Q_1PB;
4336                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4337                 }
4338         } else {
4339                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4340
4341                 /*
4342                  * SRIOV active scheme
4343                  * FIXME if support DCB together with VMDq & SRIOV
4344                  */
4345                 case ETH_64_POOLS:
4346                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4347                         break;
4348                 case ETH_32_POOLS:
4349                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4350                         break;
4351                 case ETH_16_POOLS:
4352                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4353                                 IXGBE_MTQC_8TC_8TQ;
4354                         break;
4355                 default:
4356                         mtqc = IXGBE_MTQC_64Q_1PB;
4357                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4358                 }
4359                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4360         }
4361
4362         /* re-enable arbiter */
4363         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4364         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4365
4366         return 0;
4367 }
4368
4369 /**
4370  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4371  *
4372  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4373  * spec rev. 3.0 chapter 8.2.3.8.13.
4374  *
4375  * @pool Memory pool of the Rx queue
4376  */
4377 static inline uint32_t
4378 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4379 {
4380         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4381
4382         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4383         uint16_t maxdesc =
4384                 IPV4_MAX_PKT_LEN /
4385                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4386
4387         if (maxdesc >= 16)
4388                 return IXGBE_RSCCTL_MAXDESC_16;
4389         else if (maxdesc >= 8)
4390                 return IXGBE_RSCCTL_MAXDESC_8;
4391         else if (maxdesc >= 4)
4392                 return IXGBE_RSCCTL_MAXDESC_4;
4393         else
4394                 return IXGBE_RSCCTL_MAXDESC_1;
4395 }
4396
4397 /**
4398  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4399  * interrupt
4400  *
4401  * (Taken from FreeBSD tree)
4402  * (yes this is all very magic and confusing :)
4403  *
4404  * @dev port handle
4405  * @entry the register array entry
4406  * @vector the MSIX vector for this queue
4407  * @type RX/TX/MISC
4408  */
4409 static void
4410 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4411 {
4412         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4413         u32 ivar, index;
4414
4415         vector |= IXGBE_IVAR_ALLOC_VAL;
4416
4417         switch (hw->mac.type) {
4418
4419         case ixgbe_mac_82598EB:
4420                 if (type == -1)
4421                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4422                 else
4423                         entry += (type * 64);
4424                 index = (entry >> 2) & 0x1F;
4425                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4426                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4427                 ivar |= (vector << (8 * (entry & 0x3)));
4428                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4429                 break;
4430
4431         case ixgbe_mac_82599EB:
4432         case ixgbe_mac_X540:
4433                 if (type == -1) { /* MISC IVAR */
4434                         index = (entry & 1) * 8;
4435                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4436                         ivar &= ~(0xFF << index);
4437                         ivar |= (vector << index);
4438                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4439                 } else {        /* RX/TX IVARS */
4440                         index = (16 * (entry & 1)) + (8 * type);
4441                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4442                         ivar &= ~(0xFF << index);
4443                         ivar |= (vector << index);
4444                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4445                 }
4446
4447                 break;
4448
4449         default:
4450                 break;
4451         }
4452 }
4453
4454 void __attribute__((cold))
4455 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4456 {
4457         uint16_t i, rx_using_sse;
4458         struct ixgbe_adapter *adapter =
4459                 (struct ixgbe_adapter *)dev->data->dev_private;
4460
4461         /*
4462          * In order to allow Vector Rx there are a few configuration
4463          * conditions to be met and Rx Bulk Allocation should be allowed.
4464          */
4465         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4466             !adapter->rx_bulk_alloc_allowed) {
4467                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4468                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4469                                     "not enabled",
4470                              dev->data->port_id);
4471
4472                 adapter->rx_vec_allowed = false;
4473         }
4474
4475         /*
4476          * Initialize the appropriate LRO callback.
4477          *
4478          * If all queues satisfy the bulk allocation preconditions
4479          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4480          * Otherwise use a single allocation version.
4481          */
4482         if (dev->data->lro) {
4483                 if (adapter->rx_bulk_alloc_allowed) {
4484                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4485                                            "allocation version");
4486                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4487                 } else {
4488                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4489                                            "allocation version");
4490                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4491                 }
4492         } else if (dev->data->scattered_rx) {
4493                 /*
4494                  * Set the non-LRO scattered callback: there are Vector and
4495                  * single allocation versions.
4496                  */
4497                 if (adapter->rx_vec_allowed) {
4498                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4499                                             "callback (port=%d).",
4500                                      dev->data->port_id);
4501
4502                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4503                 } else if (adapter->rx_bulk_alloc_allowed) {
4504                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4505                                            "allocation callback (port=%d).",
4506                                      dev->data->port_id);
4507                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4508                 } else {
4509                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4510                                             "single allocation) "
4511                                             "Scattered Rx callback "
4512                                             "(port=%d).",
4513                                      dev->data->port_id);
4514
4515                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4516                 }
4517         /*
4518          * Below we set "simple" callbacks according to port/queues parameters.
4519          * If parameters allow we are going to choose between the following
4520          * callbacks:
4521          *    - Vector
4522          *    - Bulk Allocation
4523          *    - Single buffer allocation (the simplest one)
4524          */
4525         } else if (adapter->rx_vec_allowed) {
4526                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4527                                     "burst size no less than %d (port=%d).",
4528                              RTE_IXGBE_DESCS_PER_LOOP,
4529                              dev->data->port_id);
4530
4531                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4532         } else if (adapter->rx_bulk_alloc_allowed) {
4533                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4534                                     "satisfied. Rx Burst Bulk Alloc function "
4535                                     "will be used on port=%d.",
4536                              dev->data->port_id);
4537
4538                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4539         } else {
4540                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4541                                     "satisfied, or Scattered Rx is requested "
4542                                     "(port=%d).",
4543                              dev->data->port_id);
4544
4545                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4546         }
4547
4548         /* Propagate information about RX function choice through all queues. */
4549
4550         rx_using_sse =
4551                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4552                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4553
4554         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4555                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4556
4557                 rxq->rx_using_sse = rx_using_sse;
4558                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4559                                 DEV_RX_OFFLOAD_SECURITY);
4560         }
4561 }
4562
4563 /**
4564  * ixgbe_set_rsc - configure RSC related port HW registers
4565  *
4566  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4567  * of 82599 Spec (x540 configuration is virtually the same).
4568  *
4569  * @dev port handle
4570  *
4571  * Returns 0 in case of success or a non-zero error code
4572  */
4573 static int
4574 ixgbe_set_rsc(struct rte_eth_dev *dev)
4575 {
4576         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4577         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4578         struct rte_eth_dev_info dev_info = { 0 };
4579         bool rsc_capable = false;
4580         uint16_t i;
4581         uint32_t rdrxctl;
4582         uint32_t rfctl;
4583
4584         /* Sanity check */
4585         dev->dev_ops->dev_infos_get(dev, &dev_info);
4586         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4587                 rsc_capable = true;
4588
4589         if (!rsc_capable && rx_conf->enable_lro) {
4590                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4591                                    "support it");
4592                 return -EINVAL;
4593         }
4594
4595         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4596
4597         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4598                 /*
4599                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4600                  * 3.0 RSC configuration requires HW CRC stripping being
4601                  * enabled. If user requested both HW CRC stripping off
4602                  * and RSC on - return an error.
4603                  */
4604                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4605                                     "is disabled");
4606                 return -EINVAL;
4607         }
4608
4609         /* RFCTL configuration  */
4610         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4611         if ((rsc_capable) && (rx_conf->enable_lro))
4612                 /*
4613                  * Since NFS packets coalescing is not supported - clear
4614                  * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4615                  * enabled.
4616                  */
4617                 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4618                            IXGBE_RFCTL_NFSR_DIS);
4619         else
4620                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4621         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4622
4623         /* If LRO hasn't been requested - we are done here. */
4624         if (!rx_conf->enable_lro)
4625                 return 0;
4626
4627         /* Set RDRXCTL.RSCACKC bit */
4628         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4629         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4630         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4631
4632         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4633         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4634                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4635                 uint32_t srrctl =
4636                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4637                 uint32_t rscctl =
4638                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4639                 uint32_t psrtype =
4640                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4641                 uint32_t eitr =
4642                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4643
4644                 /*
4645                  * ixgbe PMD doesn't support header-split at the moment.
4646                  *
4647                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4648                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4649                  * should be configured even if header split is not
4650                  * enabled. We will configure it 128 bytes following the
4651                  * recommendation in the spec.
4652                  */
4653                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4654                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4655                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4656
4657                 /*
4658                  * TODO: Consider setting the Receive Descriptor Minimum
4659                  * Threshold Size for an RSC case. This is not an obviously
4660                  * beneficiary option but the one worth considering...
4661                  */
4662
4663                 rscctl |= IXGBE_RSCCTL_RSCEN;
4664                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4665                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4666
4667                 /*
4668                  * RSC: Set ITR interval corresponding to 2K ints/s.
4669                  *
4670                  * Full-sized RSC aggregations for a 10Gb/s link will
4671                  * arrive at about 20K aggregation/s rate.
4672                  *
4673                  * 2K inst/s rate will make only 10% of the
4674                  * aggregations to be closed due to the interrupt timer
4675                  * expiration for a streaming at wire-speed case.
4676                  *
4677                  * For a sparse streaming case this setting will yield
4678                  * at most 500us latency for a single RSC aggregation.
4679                  */
4680                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4681                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4682
4683                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4684                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4685                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4686                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4687
4688                 /*
4689                  * RSC requires the mapping of the queue to the
4690                  * interrupt vector.
4691                  */
4692                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4693         }
4694
4695         dev->data->lro = 1;
4696
4697         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4698
4699         return 0;
4700 }
4701
4702 /*
4703  * Initializes Receive Unit.
4704  */
4705 int __attribute__((cold))
4706 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4707 {
4708         struct ixgbe_hw     *hw;
4709         struct ixgbe_rx_queue *rxq;
4710         uint64_t bus_addr;
4711         uint32_t rxctrl;
4712         uint32_t fctrl;
4713         uint32_t hlreg0;
4714         uint32_t maxfrs;
4715         uint32_t srrctl;
4716         uint32_t rdrxctl;
4717         uint32_t rxcsum;
4718         uint16_t buf_size;
4719         uint16_t i;
4720         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4721         int rc;
4722
4723         PMD_INIT_FUNC_TRACE();
4724         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4725
4726         /*
4727          * Make sure receives are disabled while setting
4728          * up the RX context (registers, descriptor rings, etc.).
4729          */
4730         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4731         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4732
4733         /* Enable receipt of broadcasted frames */
4734         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4735         fctrl |= IXGBE_FCTRL_BAM;
4736         fctrl |= IXGBE_FCTRL_DPF;
4737         fctrl |= IXGBE_FCTRL_PMCF;
4738         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4739
4740         /*
4741          * Configure CRC stripping, if any.
4742          */
4743         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4744         if (rx_conf->hw_strip_crc)
4745                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4746         else
4747                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4748
4749         /*
4750          * Configure jumbo frame support, if any.
4751          */
4752         if (rx_conf->jumbo_frame == 1) {
4753                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4754                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4755                 maxfrs &= 0x0000FFFF;
4756                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4757                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4758         } else
4759                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4760
4761         /*
4762          * If loopback mode is configured for 82599, set LPBK bit.
4763          */
4764         if (hw->mac.type == ixgbe_mac_82599EB &&
4765                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4766                 hlreg0 |= IXGBE_HLREG0_LPBK;
4767         else
4768                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4769
4770         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4771
4772         /* Setup RX queues */
4773         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4774                 rxq = dev->data->rx_queues[i];
4775
4776                 /*
4777                  * Reset crc_len in case it was changed after queue setup by a
4778                  * call to configure.
4779                  */
4780                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4781
4782                 /* Setup the Base and Length of the Rx Descriptor Rings */
4783                 bus_addr = rxq->rx_ring_phys_addr;
4784                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4785                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4786                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4787                                 (uint32_t)(bus_addr >> 32));
4788                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4789                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4790                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4791                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4792
4793                 /* Configure the SRRCTL register */
4794 #ifdef RTE_HEADER_SPLIT_ENABLE
4795                 /*
4796                  * Configure Header Split
4797                  */
4798                 if (rx_conf->header_split) {
4799                         if (hw->mac.type == ixgbe_mac_82599EB) {
4800                                 /* Must setup the PSRTYPE register */
4801                                 uint32_t psrtype;
4802
4803                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4804                                         IXGBE_PSRTYPE_UDPHDR   |
4805                                         IXGBE_PSRTYPE_IPV4HDR  |
4806                                         IXGBE_PSRTYPE_IPV6HDR;
4807                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4808                         }
4809                         srrctl = ((rx_conf->split_hdr_size <<
4810                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4811                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4812                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4813                 } else
4814 #endif
4815                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4816
4817                 /* Set if packets are dropped when no descriptors available */
4818                 if (rxq->drop_en)
4819                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4820
4821                 /*
4822                  * Configure the RX buffer size in the BSIZEPACKET field of
4823                  * the SRRCTL register of the queue.
4824                  * The value is in 1 KB resolution. Valid values can be from
4825                  * 1 KB to 16 KB.
4826                  */
4827                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4828                         RTE_PKTMBUF_HEADROOM);
4829                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4830                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4831
4832                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4833
4834                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4835                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4836
4837                 /* It adds dual VLAN length for supporting dual VLAN */
4838                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4839                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4840                         dev->data->scattered_rx = 1;
4841         }
4842
4843         if (rx_conf->enable_scatter)
4844                 dev->data->scattered_rx = 1;
4845
4846         /*
4847          * Device configured with multiple RX queues.
4848          */
4849         ixgbe_dev_mq_rx_configure(dev);
4850
4851         /*
4852          * Setup the Checksum Register.
4853          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4854          * Enable IP/L4 checkum computation by hardware if requested to do so.
4855          */
4856         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4857         rxcsum |= IXGBE_RXCSUM_PCSD;
4858         if (rx_conf->hw_ip_checksum)
4859                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4860         else
4861                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4862
4863         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4864
4865         if (hw->mac.type == ixgbe_mac_82599EB ||
4866             hw->mac.type == ixgbe_mac_X540) {
4867                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4868                 if (rx_conf->hw_strip_crc)
4869                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4870                 else
4871                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4872                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4873                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4874         }
4875
4876         rc = ixgbe_set_rsc(dev);
4877         if (rc)
4878                 return rc;
4879
4880         ixgbe_set_rx_function(dev);
4881
4882         return 0;
4883 }
4884
4885 /*
4886  * Initializes Transmit Unit.
4887  */
4888 void __attribute__((cold))
4889 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4890 {
4891         struct ixgbe_hw     *hw;
4892         struct ixgbe_tx_queue *txq;
4893         uint64_t bus_addr;
4894         uint32_t hlreg0;
4895         uint32_t txctrl;
4896         uint16_t i;
4897
4898         PMD_INIT_FUNC_TRACE();
4899         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4900
4901         /* Enable TX CRC (checksum offload requirement) and hw padding
4902          * (TSO requirement)
4903          */
4904         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4905         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4906         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4907
4908         /* Setup the Base and Length of the Tx Descriptor Rings */
4909         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4910                 txq = dev->data->tx_queues[i];
4911
4912                 bus_addr = txq->tx_ring_phys_addr;
4913                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4914                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4915                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4916                                 (uint32_t)(bus_addr >> 32));
4917                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4918                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4919                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4920                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4921                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4922
4923                 /*
4924                  * Disable Tx Head Writeback RO bit, since this hoses
4925                  * bookkeeping if things aren't delivered in order.
4926                  */
4927                 switch (hw->mac.type) {
4928                 case ixgbe_mac_82598EB:
4929                         txctrl = IXGBE_READ_REG(hw,
4930                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4931                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4932                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4933                                         txctrl);
4934                         break;
4935
4936                 case ixgbe_mac_82599EB:
4937                 case ixgbe_mac_X540:
4938                 case ixgbe_mac_X550:
4939                 case ixgbe_mac_X550EM_x:
4940                 case ixgbe_mac_X550EM_a:
4941                 default:
4942                         txctrl = IXGBE_READ_REG(hw,
4943                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4944                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4945                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4946                                         txctrl);
4947                         break;
4948                 }
4949         }
4950
4951         /* Device configured with multiple TX queues. */
4952         ixgbe_dev_mq_tx_configure(dev);
4953 }
4954
4955 /*
4956  * Set up link for 82599 loopback mode Tx->Rx.
4957  */
4958 static inline void __attribute__((cold))
4959 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4960 {
4961         PMD_INIT_FUNC_TRACE();
4962
4963         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4964                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4965                                 IXGBE_SUCCESS) {
4966                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4967                         /* ignore error */
4968                         return;
4969                 }
4970         }
4971
4972         /* Restart link */
4973         IXGBE_WRITE_REG(hw,
4974                         IXGBE_AUTOC,
4975                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4976         ixgbe_reset_pipeline_82599(hw);
4977
4978         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4979         msec_delay(50);
4980 }
4981
4982
4983 /*
4984  * Start Transmit and Receive Units.
4985  */
4986 int __attribute__((cold))
4987 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4988 {
4989         struct ixgbe_hw     *hw;
4990         struct ixgbe_tx_queue *txq;
4991         struct ixgbe_rx_queue *rxq;
4992         uint32_t txdctl;
4993         uint32_t dmatxctl;
4994         uint32_t rxctrl;
4995         uint16_t i;
4996         int ret = 0;
4997
4998         PMD_INIT_FUNC_TRACE();
4999         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5000
5001         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5002                 txq = dev->data->tx_queues[i];
5003                 /* Setup Transmit Threshold Registers */
5004                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5005                 txdctl |= txq->pthresh & 0x7F;
5006                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5007                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5008                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5009         }
5010
5011         if (hw->mac.type != ixgbe_mac_82598EB) {
5012                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5013                 dmatxctl |= IXGBE_DMATXCTL_TE;
5014                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5015         }
5016
5017         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5018                 txq = dev->data->tx_queues[i];
5019                 if (!txq->tx_deferred_start) {
5020                         ret = ixgbe_dev_tx_queue_start(dev, i);
5021                         if (ret < 0)
5022                                 return ret;
5023                 }
5024         }
5025
5026         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5027                 rxq = dev->data->rx_queues[i];
5028                 if (!rxq->rx_deferred_start) {
5029                         ret = ixgbe_dev_rx_queue_start(dev, i);
5030                         if (ret < 0)
5031                                 return ret;
5032                 }
5033         }
5034
5035         /* Enable Receive engine */
5036         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5037         if (hw->mac.type == ixgbe_mac_82598EB)
5038                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5039         rxctrl |= IXGBE_RXCTRL_RXEN;
5040         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5041
5042         /* If loopback mode is enabled for 82599, set up the link accordingly */
5043         if (hw->mac.type == ixgbe_mac_82599EB &&
5044                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
5045                 ixgbe_setup_loopback_link_82599(hw);
5046
5047         if ((dev->data->dev_conf.rxmode.offloads &
5048                         DEV_RX_OFFLOAD_SECURITY) ||
5049                 (dev->data->dev_conf.txmode.offloads &
5050                         DEV_TX_OFFLOAD_SECURITY)) {
5051                 ret = ixgbe_crypto_enable_ipsec(dev);
5052                 if (ret != 0) {
5053                         PMD_DRV_LOG(ERR,
5054                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5055                                     ret);
5056                         return ret;
5057                 }
5058         }
5059
5060         return 0;
5061 }
5062
5063 /*
5064  * Start Receive Units for specified queue.
5065  */
5066 int __attribute__((cold))
5067 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5068 {
5069         struct ixgbe_hw     *hw;
5070         struct ixgbe_rx_queue *rxq;
5071         uint32_t rxdctl;
5072         int poll_ms;
5073
5074         PMD_INIT_FUNC_TRACE();
5075         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5076
5077         if (rx_queue_id < dev->data->nb_rx_queues) {
5078                 rxq = dev->data->rx_queues[rx_queue_id];
5079
5080                 /* Allocate buffers for descriptor rings */
5081                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5082                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5083                                      rx_queue_id);
5084                         return -1;
5085                 }
5086                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5087                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5088                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5089
5090                 /* Wait until RX Enable ready */
5091                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5092                 do {
5093                         rte_delay_ms(1);
5094                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5095                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5096                 if (!poll_ms)
5097                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
5098                                      rx_queue_id);
5099                 rte_wmb();
5100                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5101                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5102                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5103         } else
5104                 return -1;
5105
5106         return 0;
5107 }
5108
5109 /*
5110  * Stop Receive Units for specified queue.
5111  */
5112 int __attribute__((cold))
5113 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5114 {
5115         struct ixgbe_hw     *hw;
5116         struct ixgbe_adapter *adapter =
5117                 (struct ixgbe_adapter *)dev->data->dev_private;
5118         struct ixgbe_rx_queue *rxq;
5119         uint32_t rxdctl;
5120         int poll_ms;
5121
5122         PMD_INIT_FUNC_TRACE();
5123         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5124
5125         if (rx_queue_id < dev->data->nb_rx_queues) {
5126                 rxq = dev->data->rx_queues[rx_queue_id];
5127
5128                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5129                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5130                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5131
5132                 /* Wait until RX Enable bit clear */
5133                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5134                 do {
5135                         rte_delay_ms(1);
5136                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5137                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5138                 if (!poll_ms)
5139                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
5140                                      rx_queue_id);
5141
5142                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
5143
5144                 ixgbe_rx_queue_release_mbufs(rxq);
5145                 ixgbe_reset_rx_queue(adapter, rxq);
5146                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5147         } else
5148                 return -1;
5149
5150         return 0;
5151 }
5152
5153
5154 /*
5155  * Start Transmit Units for specified queue.
5156  */
5157 int __attribute__((cold))
5158 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5159 {
5160         struct ixgbe_hw     *hw;
5161         struct ixgbe_tx_queue *txq;
5162         uint32_t txdctl;
5163         int poll_ms;
5164
5165         PMD_INIT_FUNC_TRACE();
5166         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5167
5168         if (tx_queue_id < dev->data->nb_tx_queues) {
5169                 txq = dev->data->tx_queues[tx_queue_id];
5170                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5171                 txdctl |= IXGBE_TXDCTL_ENABLE;
5172                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5173
5174                 /* Wait until TX Enable ready */
5175                 if (hw->mac.type == ixgbe_mac_82599EB) {
5176                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5177                         do {
5178                                 rte_delay_ms(1);
5179                                 txdctl = IXGBE_READ_REG(hw,
5180                                         IXGBE_TXDCTL(txq->reg_idx));
5181                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5182                         if (!poll_ms)
5183                                 PMD_INIT_LOG(ERR, "Could not enable "
5184                                              "Tx Queue %d", tx_queue_id);
5185                 }
5186                 rte_wmb();
5187                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5188                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5189                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5190         } else
5191                 return -1;
5192
5193         return 0;
5194 }
5195
5196 /*
5197  * Stop Transmit Units for specified queue.
5198  */
5199 int __attribute__((cold))
5200 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5201 {
5202         struct ixgbe_hw     *hw;
5203         struct ixgbe_tx_queue *txq;
5204         uint32_t txdctl;
5205         uint32_t txtdh, txtdt;
5206         int poll_ms;
5207
5208         PMD_INIT_FUNC_TRACE();
5209         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5210
5211         if (tx_queue_id >= dev->data->nb_tx_queues)
5212                 return -1;
5213
5214         txq = dev->data->tx_queues[tx_queue_id];
5215
5216         /* Wait until TX queue is empty */
5217         if (hw->mac.type == ixgbe_mac_82599EB) {
5218                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5219                 do {
5220                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5221                         txtdh = IXGBE_READ_REG(hw,
5222                                                IXGBE_TDH(txq->reg_idx));
5223                         txtdt = IXGBE_READ_REG(hw,
5224                                                IXGBE_TDT(txq->reg_idx));
5225                 } while (--poll_ms && (txtdh != txtdt));
5226                 if (!poll_ms)
5227                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5228                                      "when stopping.", tx_queue_id);
5229         }
5230
5231         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5232         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5233         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5234
5235         /* Wait until TX Enable bit clear */
5236         if (hw->mac.type == ixgbe_mac_82599EB) {
5237                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5238                 do {
5239                         rte_delay_ms(1);
5240                         txdctl = IXGBE_READ_REG(hw,
5241                                                 IXGBE_TXDCTL(txq->reg_idx));
5242                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5243                 if (!poll_ms)
5244                         PMD_INIT_LOG(ERR, "Could not disable "
5245                                      "Tx Queue %d", tx_queue_id);
5246         }
5247
5248         if (txq->ops != NULL) {
5249                 txq->ops->release_mbufs(txq);
5250                 txq->ops->reset(txq);
5251         }
5252         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5253
5254         return 0;
5255 }
5256
5257 void
5258 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5259         struct rte_eth_rxq_info *qinfo)
5260 {
5261         struct ixgbe_rx_queue *rxq;
5262
5263         rxq = dev->data->rx_queues[queue_id];
5264
5265         qinfo->mp = rxq->mb_pool;
5266         qinfo->scattered_rx = dev->data->scattered_rx;
5267         qinfo->nb_desc = rxq->nb_rx_desc;
5268
5269         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5270         qinfo->conf.rx_drop_en = rxq->drop_en;
5271         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5272 }
5273
5274 void
5275 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5276         struct rte_eth_txq_info *qinfo)
5277 {
5278         struct ixgbe_tx_queue *txq;
5279
5280         txq = dev->data->tx_queues[queue_id];
5281
5282         qinfo->nb_desc = txq->nb_tx_desc;
5283
5284         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5285         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5286         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5287
5288         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5289         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5290         qinfo->conf.txq_flags = txq->txq_flags;
5291         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5292 }
5293
5294 /*
5295  * [VF] Initializes Receive Unit.
5296  */
5297 int __attribute__((cold))
5298 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5299 {
5300         struct ixgbe_hw     *hw;
5301         struct ixgbe_rx_queue *rxq;
5302         uint64_t bus_addr;
5303         uint32_t srrctl, psrtype = 0;
5304         uint16_t buf_size;
5305         uint16_t i;
5306         int ret;
5307
5308         PMD_INIT_FUNC_TRACE();
5309         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5310
5311         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5312                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5313                         "it should be power of 2");
5314                 return -1;
5315         }
5316
5317         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5318                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5319                         "it should be equal to or less than %d",
5320                         hw->mac.max_rx_queues);
5321                 return -1;
5322         }
5323
5324         /*
5325          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5326          * disables the VF receipt of packets if the PF MTU is > 1500.
5327          * This is done to deal with 82599 limitations that imposes
5328          * the PF and all VFs to share the same MTU.
5329          * Then, the PF driver enables again the VF receipt of packet when
5330          * the VF driver issues a IXGBE_VF_SET_LPE request.
5331          * In the meantime, the VF device cannot be used, even if the VF driver
5332          * and the Guest VM network stack are ready to accept packets with a
5333          * size up to the PF MTU.
5334          * As a work-around to this PF behaviour, force the call to
5335          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5336          * VF packets received can work in all cases.
5337          */
5338         ixgbevf_rlpml_set_vf(hw,
5339                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5340
5341         /* Setup RX queues */
5342         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5343                 rxq = dev->data->rx_queues[i];
5344
5345                 /* Allocate buffers for descriptor rings */
5346                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5347                 if (ret)
5348                         return ret;
5349
5350                 /* Setup the Base and Length of the Rx Descriptor Rings */
5351                 bus_addr = rxq->rx_ring_phys_addr;
5352
5353                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5354                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5355                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5356                                 (uint32_t)(bus_addr >> 32));
5357                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5358                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5359                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5360                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5361
5362
5363                 /* Configure the SRRCTL register */
5364 #ifdef RTE_HEADER_SPLIT_ENABLE
5365                 /*
5366                  * Configure Header Split
5367                  */
5368                 if (dev->data->dev_conf.rxmode.header_split) {
5369                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5370                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5371                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5372                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5373                 } else
5374 #endif
5375                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5376
5377                 /* Set if packets are dropped when no descriptors available */
5378                 if (rxq->drop_en)
5379                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5380
5381                 /*
5382                  * Configure the RX buffer size in the BSIZEPACKET field of
5383                  * the SRRCTL register of the queue.
5384                  * The value is in 1 KB resolution. Valid values can be from
5385                  * 1 KB to 16 KB.
5386                  */
5387                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5388                         RTE_PKTMBUF_HEADROOM);
5389                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5390                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5391
5392                 /*
5393                  * VF modification to write virtual function SRRCTL register
5394                  */
5395                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5396
5397                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5398                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5399
5400                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5401                     /* It adds dual VLAN length for supporting dual VLAN */
5402                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5403                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5404                         if (!dev->data->scattered_rx)
5405                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5406                         dev->data->scattered_rx = 1;
5407                 }
5408         }
5409
5410 #ifdef RTE_HEADER_SPLIT_ENABLE
5411         if (dev->data->dev_conf.rxmode.header_split)
5412                 /* Must setup the PSRTYPE register */
5413                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5414                         IXGBE_PSRTYPE_UDPHDR   |
5415                         IXGBE_PSRTYPE_IPV4HDR  |
5416                         IXGBE_PSRTYPE_IPV6HDR;
5417 #endif
5418
5419         /* Set RQPL for VF RSS according to max Rx queue */
5420         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5421                 IXGBE_PSRTYPE_RQPL_SHIFT;
5422         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5423
5424         ixgbe_set_rx_function(dev);
5425
5426         return 0;
5427 }
5428
5429 /*
5430  * [VF] Initializes Transmit Unit.
5431  */
5432 void __attribute__((cold))
5433 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5434 {
5435         struct ixgbe_hw     *hw;
5436         struct ixgbe_tx_queue *txq;
5437         uint64_t bus_addr;
5438         uint32_t txctrl;
5439         uint16_t i;
5440
5441         PMD_INIT_FUNC_TRACE();
5442         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5443
5444         /* Setup the Base and Length of the Tx Descriptor Rings */
5445         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5446                 txq = dev->data->tx_queues[i];
5447                 bus_addr = txq->tx_ring_phys_addr;
5448                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5449                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5450                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5451                                 (uint32_t)(bus_addr >> 32));
5452                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5453                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5454                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5455                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5456                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5457
5458                 /*
5459                  * Disable Tx Head Writeback RO bit, since this hoses
5460                  * bookkeeping if things aren't delivered in order.
5461                  */
5462                 txctrl = IXGBE_READ_REG(hw,
5463                                 IXGBE_VFDCA_TXCTRL(i));
5464                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5465                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5466                                 txctrl);
5467         }
5468 }
5469
5470 /*
5471  * [VF] Start Transmit and Receive Units.
5472  */
5473 void __attribute__((cold))
5474 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5475 {
5476         struct ixgbe_hw     *hw;
5477         struct ixgbe_tx_queue *txq;
5478         struct ixgbe_rx_queue *rxq;
5479         uint32_t txdctl;
5480         uint32_t rxdctl;
5481         uint16_t i;
5482         int poll_ms;
5483
5484         PMD_INIT_FUNC_TRACE();
5485         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5486
5487         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5488                 txq = dev->data->tx_queues[i];
5489                 /* Setup Transmit Threshold Registers */
5490                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5491                 txdctl |= txq->pthresh & 0x7F;
5492                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5493                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5494                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5495         }
5496
5497         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5498
5499                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5500                 txdctl |= IXGBE_TXDCTL_ENABLE;
5501                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5502
5503                 poll_ms = 10;
5504                 /* Wait until TX Enable ready */
5505                 do {
5506                         rte_delay_ms(1);
5507                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5508                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5509                 if (!poll_ms)
5510                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5511         }
5512         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5513
5514                 rxq = dev->data->rx_queues[i];
5515
5516                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5517                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5518                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5519
5520                 /* Wait until RX Enable ready */
5521                 poll_ms = 10;
5522                 do {
5523                         rte_delay_ms(1);
5524                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5525                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5526                 if (!poll_ms)
5527                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5528                 rte_wmb();
5529                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5530
5531         }
5532 }
5533
5534 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5535 int __attribute__((weak))
5536 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5537 {
5538         return -1;
5539 }
5540
5541 uint16_t __attribute__((weak))
5542 ixgbe_recv_pkts_vec(
5543         void __rte_unused *rx_queue,
5544         struct rte_mbuf __rte_unused **rx_pkts,
5545         uint16_t __rte_unused nb_pkts)
5546 {
5547         return 0;
5548 }
5549
5550 uint16_t __attribute__((weak))
5551 ixgbe_recv_scattered_pkts_vec(
5552         void __rte_unused *rx_queue,
5553         struct rte_mbuf __rte_unused **rx_pkts,
5554         uint16_t __rte_unused nb_pkts)
5555 {
5556         return 0;
5557 }
5558
5559 int __attribute__((weak))
5560 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5561 {
5562         return -1;
5563 }