net/ixgbe: add Tx preparation
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73 #include <rte_net.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 /* Bit Mask to indicate what bits required for building TX context */
84 #define IXGBE_TX_OFFLOAD_MASK (                  \
85                 PKT_TX_VLAN_PKT |                \
86                 PKT_TX_IP_CKSUM |                \
87                 PKT_TX_L4_MASK |                 \
88                 PKT_TX_TCP_SEG |                 \
89                 PKT_TX_OUTER_IP_CKSUM)
90
91 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
92                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
93
94 #if 1
95 #define RTE_PMD_USE_PREFETCH
96 #endif
97
98 #ifdef RTE_PMD_USE_PREFETCH
99 /*
100  * Prefetch a cache line into all cache levels.
101  */
102 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
103 #else
104 #define rte_ixgbe_prefetch(p)   do {} while (0)
105 #endif
106
107 /*********************************************************************
108  *
109  *  TX functions
110  *
111  **********************************************************************/
112
113 /*
114  * Check for descriptors with their DD bit set and free mbufs.
115  * Return the total number of buffers freed.
116  */
117 static inline int __attribute__((always_inline))
118 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
119 {
120         struct ixgbe_tx_entry *txep;
121         uint32_t status;
122         int i, nb_free = 0;
123         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
124
125         /* check DD bit on threshold descriptor */
126         status = txq->tx_ring[txq->tx_next_dd].wb.status;
127         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
128                 return 0;
129
130         /*
131          * first buffer to free from S/W ring is at index
132          * tx_next_dd - (tx_rs_thresh-1)
133          */
134         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
135
136         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
137                 /* free buffers one at a time */
138                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
139                 txep->mbuf = NULL;
140
141                 if (unlikely(m == NULL))
142                         continue;
143
144                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
145                     (nb_free > 0 && m->pool != free[0]->pool)) {
146                         rte_mempool_put_bulk(free[0]->pool,
147                                              (void **)free, nb_free);
148                         nb_free = 0;
149                 }
150
151                 free[nb_free++] = m;
152         }
153
154         if (nb_free > 0)
155                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
156
157         /* buffers were freed, update counters */
158         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
159         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
160         if (txq->tx_next_dd >= txq->nb_tx_desc)
161                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
162
163         return txq->tx_rs_thresh;
164 }
165
166 /* Populate 4 descriptors with data from 4 mbufs */
167 static inline void
168 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
169 {
170         uint64_t buf_dma_addr;
171         uint32_t pkt_len;
172         int i;
173
174         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
175                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
176                 pkt_len = (*pkts)->data_len;
177
178                 /* write data to descriptor */
179                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
180
181                 txdp->read.cmd_type_len =
182                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
183
184                 txdp->read.olinfo_status =
185                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
186
187                 rte_prefetch0(&(*pkts)->pool);
188         }
189 }
190
191 /* Populate 1 descriptor with data from 1 mbuf */
192 static inline void
193 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
194 {
195         uint64_t buf_dma_addr;
196         uint32_t pkt_len;
197
198         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
199         pkt_len = (*pkts)->data_len;
200
201         /* write data to descriptor */
202         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
203         txdp->read.cmd_type_len =
204                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
205         txdp->read.olinfo_status =
206                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
207         rte_prefetch0(&(*pkts)->pool);
208 }
209
210 /*
211  * Fill H/W descriptor ring with mbuf data.
212  * Copy mbuf pointers to the S/W ring.
213  */
214 static inline void
215 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
216                       uint16_t nb_pkts)
217 {
218         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
219         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
220         const int N_PER_LOOP = 4;
221         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
222         int mainpart, leftover;
223         int i, j;
224
225         /*
226          * Process most of the packets in chunks of N pkts.  Any
227          * leftover packets will get processed one at a time.
228          */
229         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
230         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
231         for (i = 0; i < mainpart; i += N_PER_LOOP) {
232                 /* Copy N mbuf pointers to the S/W ring */
233                 for (j = 0; j < N_PER_LOOP; ++j) {
234                         (txep + i + j)->mbuf = *(pkts + i + j);
235                 }
236                 tx4(txdp + i, pkts + i);
237         }
238
239         if (unlikely(leftover > 0)) {
240                 for (i = 0; i < leftover; ++i) {
241                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
242                         tx1(txdp + mainpart + i, pkts + mainpart + i);
243                 }
244         }
245 }
246
247 static inline uint16_t
248 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
249              uint16_t nb_pkts)
250 {
251         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
252         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
253         uint16_t n = 0;
254
255         /*
256          * Begin scanning the H/W ring for done descriptors when the
257          * number of available descriptors drops below tx_free_thresh.  For
258          * each done descriptor, free the associated buffer.
259          */
260         if (txq->nb_tx_free < txq->tx_free_thresh)
261                 ixgbe_tx_free_bufs(txq);
262
263         /* Only use descriptors that are available */
264         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
265         if (unlikely(nb_pkts == 0))
266                 return 0;
267
268         /* Use exactly nb_pkts descriptors */
269         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
270
271         /*
272          * At this point, we know there are enough descriptors in the
273          * ring to transmit all the packets.  This assumes that each
274          * mbuf contains a single segment, and that no new offloads
275          * are expected, which would require a new context descriptor.
276          */
277
278         /*
279          * See if we're going to wrap-around. If so, handle the top
280          * of the descriptor ring first, then do the bottom.  If not,
281          * the processing looks just like the "bottom" part anyway...
282          */
283         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
284                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
285                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
286
287                 /*
288                  * We know that the last descriptor in the ring will need to
289                  * have its RS bit set because tx_rs_thresh has to be
290                  * a divisor of the ring size
291                  */
292                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
293                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
294                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
295
296                 txq->tx_tail = 0;
297         }
298
299         /* Fill H/W descriptor ring with mbuf data */
300         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
301         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
302
303         /*
304          * Determine if RS bit should be set
305          * This is what we actually want:
306          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
307          * but instead of subtracting 1 and doing >=, we can just do
308          * greater than without subtracting.
309          */
310         if (txq->tx_tail > txq->tx_next_rs) {
311                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
312                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
313                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
314                                                 txq->tx_rs_thresh);
315                 if (txq->tx_next_rs >= txq->nb_tx_desc)
316                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
317         }
318
319         /*
320          * Check for wrap-around. This would only happen if we used
321          * up to the last descriptor in the ring, no more, no less.
322          */
323         if (txq->tx_tail >= txq->nb_tx_desc)
324                 txq->tx_tail = 0;
325
326         /* update tail pointer */
327         rte_wmb();
328         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
329
330         return nb_pkts;
331 }
332
333 uint16_t
334 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
335                        uint16_t nb_pkts)
336 {
337         uint16_t nb_tx;
338
339         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
340         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
341                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
342
343         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
344         nb_tx = 0;
345         while (nb_pkts) {
346                 uint16_t ret, n;
347
348                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
349                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
350                 nb_tx = (uint16_t)(nb_tx + ret);
351                 nb_pkts = (uint16_t)(nb_pkts - ret);
352                 if (ret < n)
353                         break;
354         }
355
356         return nb_tx;
357 }
358
359 static inline void
360 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
361                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
362                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
363 {
364         uint32_t type_tucmd_mlhl;
365         uint32_t mss_l4len_idx = 0;
366         uint32_t ctx_idx;
367         uint32_t vlan_macip_lens;
368         union ixgbe_tx_offload tx_offload_mask;
369         uint32_t seqnum_seed = 0;
370
371         ctx_idx = txq->ctx_curr;
372         tx_offload_mask.data[0] = 0;
373         tx_offload_mask.data[1] = 0;
374         type_tucmd_mlhl = 0;
375
376         /* Specify which HW CTX to upload. */
377         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
378
379         if (ol_flags & PKT_TX_VLAN_PKT) {
380                 tx_offload_mask.vlan_tci |= ~0;
381         }
382
383         /* check if TCP segmentation required for this packet */
384         if (ol_flags & PKT_TX_TCP_SEG) {
385                 /* implies IP cksum in IPv4 */
386                 if (ol_flags & PKT_TX_IP_CKSUM)
387                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
388                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
389                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
390                 else
391                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
392                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
393                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
394
395                 tx_offload_mask.l2_len |= ~0;
396                 tx_offload_mask.l3_len |= ~0;
397                 tx_offload_mask.l4_len |= ~0;
398                 tx_offload_mask.tso_segsz |= ~0;
399                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
400                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
401         } else { /* no TSO, check if hardware checksum is needed */
402                 if (ol_flags & PKT_TX_IP_CKSUM) {
403                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
404                         tx_offload_mask.l2_len |= ~0;
405                         tx_offload_mask.l3_len |= ~0;
406                 }
407
408                 switch (ol_flags & PKT_TX_L4_MASK) {
409                 case PKT_TX_UDP_CKSUM:
410                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
411                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
412                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
413                         tx_offload_mask.l2_len |= ~0;
414                         tx_offload_mask.l3_len |= ~0;
415                         break;
416                 case PKT_TX_TCP_CKSUM:
417                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
418                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
419                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
420                         tx_offload_mask.l2_len |= ~0;
421                         tx_offload_mask.l3_len |= ~0;
422                         break;
423                 case PKT_TX_SCTP_CKSUM:
424                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
425                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
427                         tx_offload_mask.l2_len |= ~0;
428                         tx_offload_mask.l3_len |= ~0;
429                         break;
430                 default:
431                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
432                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
433                         break;
434                 }
435         }
436
437         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
438                 tx_offload_mask.outer_l2_len |= ~0;
439                 tx_offload_mask.outer_l3_len |= ~0;
440                 tx_offload_mask.l2_len |= ~0;
441                 seqnum_seed |= tx_offload.outer_l3_len
442                                << IXGBE_ADVTXD_OUTER_IPLEN;
443                 seqnum_seed |= tx_offload.l2_len
444                                << IXGBE_ADVTXD_TUNNEL_LEN;
445         }
446
447         txq->ctx_cache[ctx_idx].flags = ol_flags;
448         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
449                 tx_offload_mask.data[0] & tx_offload.data[0];
450         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
451                 tx_offload_mask.data[1] & tx_offload.data[1];
452         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
453
454         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
455         vlan_macip_lens = tx_offload.l3_len;
456         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
457                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
458                                     IXGBE_ADVTXD_MACLEN_SHIFT);
459         else
460                 vlan_macip_lens |= (tx_offload.l2_len <<
461                                     IXGBE_ADVTXD_MACLEN_SHIFT);
462         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
463         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
464         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
465         ctx_txd->seqnum_seed     = seqnum_seed;
466 }
467
468 /*
469  * Check which hardware context can be used. Use the existing match
470  * or create a new context descriptor.
471  */
472 static inline uint32_t
473 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
474                    union ixgbe_tx_offload tx_offload)
475 {
476         /* If match with the current used context */
477         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
478                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
479                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
480                      & tx_offload.data[0])) &&
481                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
482                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
483                      & tx_offload.data[1]))))
484                 return txq->ctx_curr;
485
486         /* What if match with the next context  */
487         txq->ctx_curr ^= 1;
488         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
489                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
490                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
491                      & tx_offload.data[0])) &&
492                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
493                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
494                      & tx_offload.data[1]))))
495                 return txq->ctx_curr;
496
497         /* Mismatch, use the previous context */
498         return IXGBE_CTX_NUM;
499 }
500
501 static inline uint32_t
502 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
503 {
504         uint32_t tmp = 0;
505
506         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
507                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
508         if (ol_flags & PKT_TX_IP_CKSUM)
509                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
510         if (ol_flags & PKT_TX_TCP_SEG)
511                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
512         return tmp;
513 }
514
515 static inline uint32_t
516 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
517 {
518         uint32_t cmdtype = 0;
519
520         if (ol_flags & PKT_TX_VLAN_PKT)
521                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
522         if (ol_flags & PKT_TX_TCP_SEG)
523                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
524         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
525                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
526         return cmdtype;
527 }
528
529 /* Default RS bit threshold values */
530 #ifndef DEFAULT_TX_RS_THRESH
531 #define DEFAULT_TX_RS_THRESH   32
532 #endif
533 #ifndef DEFAULT_TX_FREE_THRESH
534 #define DEFAULT_TX_FREE_THRESH 32
535 #endif
536
537 /* Reset transmit descriptors after they have been used */
538 static inline int
539 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
540 {
541         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
542         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
543         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
544         uint16_t nb_tx_desc = txq->nb_tx_desc;
545         uint16_t desc_to_clean_to;
546         uint16_t nb_tx_to_clean;
547         uint32_t status;
548
549         /* Determine the last descriptor needing to be cleaned */
550         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
551         if (desc_to_clean_to >= nb_tx_desc)
552                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
553
554         /* Check to make sure the last descriptor to clean is done */
555         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
556         status = txr[desc_to_clean_to].wb.status;
557         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
558                 PMD_TX_FREE_LOG(DEBUG,
559                                 "TX descriptor %4u is not done"
560                                 "(port=%d queue=%d)",
561                                 desc_to_clean_to,
562                                 txq->port_id, txq->queue_id);
563                 /* Failed to clean any descriptors, better luck next time */
564                 return -(1);
565         }
566
567         /* Figure out how many descriptors will be cleaned */
568         if (last_desc_cleaned > desc_to_clean_to)
569                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
570                                                         desc_to_clean_to);
571         else
572                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
573                                                 last_desc_cleaned);
574
575         PMD_TX_FREE_LOG(DEBUG,
576                         "Cleaning %4u TX descriptors: %4u to %4u "
577                         "(port=%d queue=%d)",
578                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
579                         txq->port_id, txq->queue_id);
580
581         /*
582          * The last descriptor to clean is done, so that means all the
583          * descriptors from the last descriptor that was cleaned
584          * up to the last descriptor with the RS bit set
585          * are done. Only reset the threshold descriptor.
586          */
587         txr[desc_to_clean_to].wb.status = 0;
588
589         /* Update the txq to reflect the last descriptor that was cleaned */
590         txq->last_desc_cleaned = desc_to_clean_to;
591         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
592
593         /* No Error */
594         return 0;
595 }
596
597 uint16_t
598 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
599                 uint16_t nb_pkts)
600 {
601         struct ixgbe_tx_queue *txq;
602         struct ixgbe_tx_entry *sw_ring;
603         struct ixgbe_tx_entry *txe, *txn;
604         volatile union ixgbe_adv_tx_desc *txr;
605         volatile union ixgbe_adv_tx_desc *txd, *txp;
606         struct rte_mbuf     *tx_pkt;
607         struct rte_mbuf     *m_seg;
608         uint64_t buf_dma_addr;
609         uint32_t olinfo_status;
610         uint32_t cmd_type_len;
611         uint32_t pkt_len;
612         uint16_t slen;
613         uint64_t ol_flags;
614         uint16_t tx_id;
615         uint16_t tx_last;
616         uint16_t nb_tx;
617         uint16_t nb_used;
618         uint64_t tx_ol_req;
619         uint32_t ctx = 0;
620         uint32_t new_ctx;
621         union ixgbe_tx_offload tx_offload;
622
623         tx_offload.data[0] = 0;
624         tx_offload.data[1] = 0;
625         txq = tx_queue;
626         sw_ring = txq->sw_ring;
627         txr     = txq->tx_ring;
628         tx_id   = txq->tx_tail;
629         txe = &sw_ring[tx_id];
630         txp = NULL;
631
632         /* Determine if the descriptor ring needs to be cleaned. */
633         if (txq->nb_tx_free < txq->tx_free_thresh)
634                 ixgbe_xmit_cleanup(txq);
635
636         rte_prefetch0(&txe->mbuf->pool);
637
638         /* TX loop */
639         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
640                 new_ctx = 0;
641                 tx_pkt = *tx_pkts++;
642                 pkt_len = tx_pkt->pkt_len;
643
644                 /*
645                  * Determine how many (if any) context descriptors
646                  * are needed for offload functionality.
647                  */
648                 ol_flags = tx_pkt->ol_flags;
649
650                 /* If hardware offload required */
651                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
652                 if (tx_ol_req) {
653                         tx_offload.l2_len = tx_pkt->l2_len;
654                         tx_offload.l3_len = tx_pkt->l3_len;
655                         tx_offload.l4_len = tx_pkt->l4_len;
656                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
657                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
658                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
659                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
660
661                         /* If new context need be built or reuse the exist ctx. */
662                         ctx = what_advctx_update(txq, tx_ol_req,
663                                 tx_offload);
664                         /* Only allocate context descriptor if required*/
665                         new_ctx = (ctx == IXGBE_CTX_NUM);
666                         ctx = txq->ctx_curr;
667                 }
668
669                 /*
670                  * Keep track of how many descriptors are used this loop
671                  * This will always be the number of segments + the number of
672                  * Context descriptors required to transmit the packet
673                  */
674                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
675
676                 if (txp != NULL &&
677                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
678                         /* set RS on the previous packet in the burst */
679                         txp->read.cmd_type_len |=
680                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
681
682                 /*
683                  * The number of descriptors that must be allocated for a
684                  * packet is the number of segments of that packet, plus 1
685                  * Context Descriptor for the hardware offload, if any.
686                  * Determine the last TX descriptor to allocate in the TX ring
687                  * for the packet, starting from the current position (tx_id)
688                  * in the ring.
689                  */
690                 tx_last = (uint16_t) (tx_id + nb_used - 1);
691
692                 /* Circular ring */
693                 if (tx_last >= txq->nb_tx_desc)
694                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
695
696                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
697                            " tx_first=%u tx_last=%u",
698                            (unsigned) txq->port_id,
699                            (unsigned) txq->queue_id,
700                            (unsigned) pkt_len,
701                            (unsigned) tx_id,
702                            (unsigned) tx_last);
703
704                 /*
705                  * Make sure there are enough TX descriptors available to
706                  * transmit the entire packet.
707                  * nb_used better be less than or equal to txq->tx_rs_thresh
708                  */
709                 if (nb_used > txq->nb_tx_free) {
710                         PMD_TX_FREE_LOG(DEBUG,
711                                         "Not enough free TX descriptors "
712                                         "nb_used=%4u nb_free=%4u "
713                                         "(port=%d queue=%d)",
714                                         nb_used, txq->nb_tx_free,
715                                         txq->port_id, txq->queue_id);
716
717                         if (ixgbe_xmit_cleanup(txq) != 0) {
718                                 /* Could not clean any descriptors */
719                                 if (nb_tx == 0)
720                                         return 0;
721                                 goto end_of_tx;
722                         }
723
724                         /* nb_used better be <= txq->tx_rs_thresh */
725                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
726                                 PMD_TX_FREE_LOG(DEBUG,
727                                         "The number of descriptors needed to "
728                                         "transmit the packet exceeds the "
729                                         "RS bit threshold. This will impact "
730                                         "performance."
731                                         "nb_used=%4u nb_free=%4u "
732                                         "tx_rs_thresh=%4u. "
733                                         "(port=%d queue=%d)",
734                                         nb_used, txq->nb_tx_free,
735                                         txq->tx_rs_thresh,
736                                         txq->port_id, txq->queue_id);
737                                 /*
738                                  * Loop here until there are enough TX
739                                  * descriptors or until the ring cannot be
740                                  * cleaned.
741                                  */
742                                 while (nb_used > txq->nb_tx_free) {
743                                         if (ixgbe_xmit_cleanup(txq) != 0) {
744                                                 /*
745                                                  * Could not clean any
746                                                  * descriptors
747                                                  */
748                                                 if (nb_tx == 0)
749                                                         return 0;
750                                                 goto end_of_tx;
751                                         }
752                                 }
753                         }
754                 }
755
756                 /*
757                  * By now there are enough free TX descriptors to transmit
758                  * the packet.
759                  */
760
761                 /*
762                  * Set common flags of all TX Data Descriptors.
763                  *
764                  * The following bits must be set in all Data Descriptors:
765                  *   - IXGBE_ADVTXD_DTYP_DATA
766                  *   - IXGBE_ADVTXD_DCMD_DEXT
767                  *
768                  * The following bits must be set in the first Data Descriptor
769                  * and are ignored in the other ones:
770                  *   - IXGBE_ADVTXD_DCMD_IFCS
771                  *   - IXGBE_ADVTXD_MAC_1588
772                  *   - IXGBE_ADVTXD_DCMD_VLE
773                  *
774                  * The following bits must only be set in the last Data
775                  * Descriptor:
776                  *   - IXGBE_TXD_CMD_EOP
777                  *
778                  * The following bits can be set in any Data Descriptor, but
779                  * are only set in the last Data Descriptor:
780                  *   - IXGBE_TXD_CMD_RS
781                  */
782                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
783                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
784
785 #ifdef RTE_LIBRTE_IEEE1588
786                 if (ol_flags & PKT_TX_IEEE1588_TMST)
787                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
788 #endif
789
790                 olinfo_status = 0;
791                 if (tx_ol_req) {
792
793                         if (ol_flags & PKT_TX_TCP_SEG) {
794                                 /* when TSO is on, paylen in descriptor is the
795                                  * not the packet len but the tcp payload len */
796                                 pkt_len -= (tx_offload.l2_len +
797                                         tx_offload.l3_len + tx_offload.l4_len);
798                         }
799
800                         /*
801                          * Setup the TX Advanced Context Descriptor if required
802                          */
803                         if (new_ctx) {
804                                 volatile struct ixgbe_adv_tx_context_desc *
805                                     ctx_txd;
806
807                                 ctx_txd = (volatile struct
808                                     ixgbe_adv_tx_context_desc *)
809                                     &txr[tx_id];
810
811                                 txn = &sw_ring[txe->next_id];
812                                 rte_prefetch0(&txn->mbuf->pool);
813
814                                 if (txe->mbuf != NULL) {
815                                         rte_pktmbuf_free_seg(txe->mbuf);
816                                         txe->mbuf = NULL;
817                                 }
818
819                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
820                                         tx_offload);
821
822                                 txe->last_id = tx_last;
823                                 tx_id = txe->next_id;
824                                 txe = txn;
825                         }
826
827                         /*
828                          * Setup the TX Advanced Data Descriptor,
829                          * This path will go through
830                          * whatever new/reuse the context descriptor
831                          */
832                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
833                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
834                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
835                 }
836
837                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
838
839                 m_seg = tx_pkt;
840                 do {
841                         txd = &txr[tx_id];
842                         txn = &sw_ring[txe->next_id];
843                         rte_prefetch0(&txn->mbuf->pool);
844
845                         if (txe->mbuf != NULL)
846                                 rte_pktmbuf_free_seg(txe->mbuf);
847                         txe->mbuf = m_seg;
848
849                         /*
850                          * Set up Transmit Data Descriptor.
851                          */
852                         slen = m_seg->data_len;
853                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
854                         txd->read.buffer_addr =
855                                 rte_cpu_to_le_64(buf_dma_addr);
856                         txd->read.cmd_type_len =
857                                 rte_cpu_to_le_32(cmd_type_len | slen);
858                         txd->read.olinfo_status =
859                                 rte_cpu_to_le_32(olinfo_status);
860                         txe->last_id = tx_last;
861                         tx_id = txe->next_id;
862                         txe = txn;
863                         m_seg = m_seg->next;
864                 } while (m_seg != NULL);
865
866                 /*
867                  * The last packet data descriptor needs End Of Packet (EOP)
868                  */
869                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
870                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
871                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
872
873                 /* Set RS bit only on threshold packets' last descriptor */
874                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
875                         PMD_TX_FREE_LOG(DEBUG,
876                                         "Setting RS bit on TXD id="
877                                         "%4u (port=%d queue=%d)",
878                                         tx_last, txq->port_id, txq->queue_id);
879
880                         cmd_type_len |= IXGBE_TXD_CMD_RS;
881
882                         /* Update txq RS bit counters */
883                         txq->nb_tx_used = 0;
884                         txp = NULL;
885                 } else
886                         txp = txd;
887
888                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
889         }
890
891 end_of_tx:
892         /* set RS on last packet in the burst */
893         if (txp != NULL)
894                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
895
896         rte_wmb();
897
898         /*
899          * Set the Transmit Descriptor Tail (TDT)
900          */
901         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
902                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
903                    (unsigned) tx_id, (unsigned) nb_tx);
904         IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
905         txq->tx_tail = tx_id;
906
907         return nb_tx;
908 }
909
910 /*********************************************************************
911  *
912  *  TX prep functions
913  *
914  **********************************************************************/
915 uint16_t
916 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
917 {
918         int i, ret;
919         uint64_t ol_flags;
920         struct rte_mbuf *m;
921         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
922
923         for (i = 0; i < nb_pkts; i++) {
924                 m = tx_pkts[i];
925                 ol_flags = m->ol_flags;
926
927                 /**
928                  * Check if packet meets requirements for number of segments
929                  *
930                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
931                  *       non-TSO
932                  */
933
934                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
935                         rte_errno = -EINVAL;
936                         return i;
937                 }
938
939                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
940                         rte_errno = -ENOTSUP;
941                         return i;
942                 }
943
944 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
945                 ret = rte_validate_tx_offload(m);
946                 if (ret != 0) {
947                         rte_errno = ret;
948                         return i;
949                 }
950 #endif
951                 ret = rte_net_intel_cksum_prepare(m);
952                 if (ret != 0) {
953                         rte_errno = ret;
954                         return i;
955                 }
956         }
957
958         return i;
959 }
960
961 /*********************************************************************
962  *
963  *  RX functions
964  *
965  **********************************************************************/
966
967 #define IXGBE_PACKET_TYPE_ETHER                         0X00
968 #define IXGBE_PACKET_TYPE_IPV4                          0X01
969 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
970 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
971 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
972 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
973 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
974 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
975 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
976 #define IXGBE_PACKET_TYPE_IPV6                          0X04
977 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
978 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
979 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
980 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
981 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
982 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
983 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
984 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
985 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
986 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
987 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
988 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
989 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
990 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
991 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
992 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
993 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
994 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
995 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
996 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
997 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
998 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
999 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1000
1001 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1002 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1003 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1004 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1005 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1006 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1007 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1008 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1009 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1010 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1011 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1012 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1013 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1014 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1015 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1016 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1017 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1018 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1019 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1020 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1021 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1022 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1023 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1024
1025 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1026 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1027 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1028 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1029 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1030 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1031 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1032 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1033 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1034 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1035 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1036 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1037 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1038 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1039 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1040 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1041 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1042 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1043 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1044 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1045 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1046 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1047 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1048
1049 #define IXGBE_PACKET_TYPE_MAX               0X80
1050 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
1051 #define IXGBE_PACKET_TYPE_SHIFT             0X04
1052
1053 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1054 static inline uint32_t
1055 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1056 {
1057         /**
1058          * Use 2 different table for normal packet and tunnel packet
1059          * to save the space.
1060          */
1061         static const uint32_t
1062                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1063                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1064                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1065                         RTE_PTYPE_L3_IPV4,
1066                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1067                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1068                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1069                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1070                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1071                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1072                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1073                         RTE_PTYPE_L3_IPV4_EXT,
1074                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1075                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1076                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1077                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1078                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1079                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1080                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1081                         RTE_PTYPE_L3_IPV6,
1082                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1083                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1084                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1085                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1086                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1087                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1088                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1089                         RTE_PTYPE_L3_IPV6_EXT,
1090                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1091                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1092                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1093                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1094                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1095                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1096                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1097                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1098                         RTE_PTYPE_INNER_L3_IPV6,
1099                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1100                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1101                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1102                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1103                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1104                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1105                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1106                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1107                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1108                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1109                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1110                         RTE_PTYPE_INNER_L3_IPV6,
1111                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1112                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1113                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1114                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1115                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1116                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1117                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1118                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1119                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1120                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1121                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1122                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1123                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1124                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1125                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1126                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1128                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1129                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1130                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1131                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1132                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1133                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1134                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1135                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1136                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1137                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1138                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1139                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1140                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1141                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1142                         RTE_PTYPE_L2_ETHER |
1143                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1144                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1145         };
1146
1147         static const uint32_t
1148                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1149                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1150                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1151                         RTE_PTYPE_INNER_L2_ETHER,
1152                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1153                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1154                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1155                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1156                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1157                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1158                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1159                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1160                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1161                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1162                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1163                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1164                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1165                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1166                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1167                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1168                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1169                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1170                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1171                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1172                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1173                         RTE_PTYPE_INNER_L4_TCP,
1174                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1175                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1176                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1177                         RTE_PTYPE_INNER_L4_TCP,
1178                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1179                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1180                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1181                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1182                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1183                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1184                         RTE_PTYPE_INNER_L4_TCP,
1185                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1186                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1187                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1188                         RTE_PTYPE_INNER_L3_IPV4,
1189                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1190                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1191                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1192                         RTE_PTYPE_INNER_L4_UDP,
1193                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1194                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1195                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1196                         RTE_PTYPE_INNER_L4_UDP,
1197                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1198                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1199                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1200                         RTE_PTYPE_INNER_L4_SCTP,
1201                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1202                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1203                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1204                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1205                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1207                         RTE_PTYPE_INNER_L4_UDP,
1208                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1209                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1210                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1211                         RTE_PTYPE_INNER_L4_SCTP,
1212                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1213                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1214                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1215                         RTE_PTYPE_INNER_L3_IPV4,
1216                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1217                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1218                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1219                         RTE_PTYPE_INNER_L4_SCTP,
1220                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1221                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1222                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1223                         RTE_PTYPE_INNER_L4_SCTP,
1224                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1225                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1226                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1227                         RTE_PTYPE_INNER_L4_TCP,
1228                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1229                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1230                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1231                         RTE_PTYPE_INNER_L4_UDP,
1232
1233                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1234                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1235                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1236                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1237                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1238                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1239                         RTE_PTYPE_INNER_L3_IPV4,
1240                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1241                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1242                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1243                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1244                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1245                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1246                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1247                         RTE_PTYPE_INNER_L3_IPV6,
1248                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1249                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1250                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1251                         RTE_PTYPE_INNER_L3_IPV4,
1252                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1253                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1254                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1255                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1256                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1257                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1258                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1259                         RTE_PTYPE_INNER_L3_IPV4,
1260                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1261                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1262                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1263                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1264                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1265                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1266                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1267                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1268                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1269                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1270                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1271                         RTE_PTYPE_INNER_L3_IPV4,
1272                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1273                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1274                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1275                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1276                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1277                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1278                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1279                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1280                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1281                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1282                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1283                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1284                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1285                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1286                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1287                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1288                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1289                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1290                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1291                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1292                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1293                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1294                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1295                         RTE_PTYPE_INNER_L3_IPV4,
1296                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1297                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1298                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1299                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1300                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1301                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1302                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1303                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1304                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1305                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1306                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1307                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1308                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1309                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1310                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1311                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1312                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1313                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1314                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1315                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1316                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1317                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1318                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1319                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1320                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1321                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1322                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1323                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1324         };
1325
1326         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1327                 return RTE_PTYPE_UNKNOWN;
1328
1329         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1330
1331         /* For tunnel packet */
1332         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1333                 /* Remove the tunnel bit to save the space. */
1334                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1335                 return ptype_table_tn[pkt_info];
1336         }
1337
1338         /**
1339          * For x550, if it's not tunnel,
1340          * tunnel type bit should be set to 0.
1341          * Reuse 82599's mask.
1342          */
1343         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1344
1345         return ptype_table[pkt_info];
1346 }
1347
1348 static inline uint64_t
1349 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1350 {
1351         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1352                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1353                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1354                 PKT_RX_RSS_HASH, 0, 0, 0,
1355                 0, 0, 0,  PKT_RX_FDIR,
1356         };
1357 #ifdef RTE_LIBRTE_IEEE1588
1358         static uint64_t ip_pkt_etqf_map[8] = {
1359                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1360                 0, 0, 0, 0,
1361         };
1362
1363         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1364                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1365                                 ip_rss_types_map[pkt_info & 0XF];
1366         else
1367                 return ip_rss_types_map[pkt_info & 0XF];
1368 #else
1369         return ip_rss_types_map[pkt_info & 0XF];
1370 #endif
1371 }
1372
1373 static inline uint64_t
1374 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1375 {
1376         uint64_t pkt_flags;
1377
1378         /*
1379          * Check if VLAN present only.
1380          * Do not check whether L3/L4 rx checksum done by NIC or not,
1381          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1382          */
1383         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1384
1385 #ifdef RTE_LIBRTE_IEEE1588
1386         if (rx_status & IXGBE_RXD_STAT_TMST)
1387                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1388 #endif
1389         return pkt_flags;
1390 }
1391
1392 static inline uint64_t
1393 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1394 {
1395         uint64_t pkt_flags;
1396
1397         /*
1398          * Bit 31: IPE, IPv4 checksum error
1399          * Bit 30: L4I, L4I integrity error
1400          */
1401         static uint64_t error_to_pkt_flags_map[4] = {
1402                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1403                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1404                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1405                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1406         };
1407         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1408                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1409
1410         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1411             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1412                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1413         }
1414
1415         return pkt_flags;
1416 }
1417
1418 /*
1419  * LOOK_AHEAD defines how many desc statuses to check beyond the
1420  * current descriptor.
1421  * It must be a pound define for optimal performance.
1422  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1423  * function only works with LOOK_AHEAD=8.
1424  */
1425 #define LOOK_AHEAD 8
1426 #if (LOOK_AHEAD != 8)
1427 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1428 #endif
1429 static inline int
1430 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1431 {
1432         volatile union ixgbe_adv_rx_desc *rxdp;
1433         struct ixgbe_rx_entry *rxep;
1434         struct rte_mbuf *mb;
1435         uint16_t pkt_len;
1436         uint64_t pkt_flags;
1437         int nb_dd;
1438         uint32_t s[LOOK_AHEAD];
1439         uint32_t pkt_info[LOOK_AHEAD];
1440         int i, j, nb_rx = 0;
1441         uint32_t status;
1442         uint64_t vlan_flags = rxq->vlan_flags;
1443
1444         /* get references to current descriptor and S/W ring entry */
1445         rxdp = &rxq->rx_ring[rxq->rx_tail];
1446         rxep = &rxq->sw_ring[rxq->rx_tail];
1447
1448         status = rxdp->wb.upper.status_error;
1449         /* check to make sure there is at least 1 packet to receive */
1450         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1451                 return 0;
1452
1453         /*
1454          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1455          * reference packets that are ready to be received.
1456          */
1457         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1458              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1459                 /* Read desc statuses backwards to avoid race condition */
1460                 for (j = LOOK_AHEAD-1; j >= 0; --j)
1461                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1462
1463                 for (j = LOOK_AHEAD - 1; j >= 0; --j)
1464                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1465                                                        lo_dword.data);
1466
1467                 /* Compute how many status bits were set */
1468                 nb_dd = 0;
1469                 for (j = 0; j < LOOK_AHEAD; ++j)
1470                         nb_dd += s[j] & IXGBE_RXDADV_STAT_DD;
1471
1472                 nb_rx += nb_dd;
1473
1474                 /* Translate descriptor info to mbuf format */
1475                 for (j = 0; j < nb_dd; ++j) {
1476                         mb = rxep[j].mbuf;
1477                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1478                                   rxq->crc_len;
1479                         mb->data_len = pkt_len;
1480                         mb->pkt_len = pkt_len;
1481                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1482
1483                         /* convert descriptor fields to rte mbuf flags */
1484                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1485                                 vlan_flags);
1486                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1487                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1488                                         ((uint16_t)pkt_info[j]);
1489                         mb->ol_flags = pkt_flags;
1490                         mb->packet_type =
1491                                 ixgbe_rxd_pkt_info_to_pkt_type
1492                                         (pkt_info[j], rxq->pkt_type_mask);
1493
1494                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1495                                 mb->hash.rss = rte_le_to_cpu_32(
1496                                     rxdp[j].wb.lower.hi_dword.rss);
1497                         else if (pkt_flags & PKT_RX_FDIR) {
1498                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1499                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1500                                     IXGBE_ATR_HASH_MASK;
1501                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1502                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1503                         }
1504                 }
1505
1506                 /* Move mbuf pointers from the S/W ring to the stage */
1507                 for (j = 0; j < LOOK_AHEAD; ++j) {
1508                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1509                 }
1510
1511                 /* stop if all requested packets could not be received */
1512                 if (nb_dd != LOOK_AHEAD)
1513                         break;
1514         }
1515
1516         /* clear software ring entries so we can cleanup correctly */
1517         for (i = 0; i < nb_rx; ++i) {
1518                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1519         }
1520
1521
1522         return nb_rx;
1523 }
1524
1525 static inline int
1526 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1527 {
1528         volatile union ixgbe_adv_rx_desc *rxdp;
1529         struct ixgbe_rx_entry *rxep;
1530         struct rte_mbuf *mb;
1531         uint16_t alloc_idx;
1532         __le64 dma_addr;
1533         int diag, i;
1534
1535         /* allocate buffers in bulk directly into the S/W ring */
1536         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1537         rxep = &rxq->sw_ring[alloc_idx];
1538         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1539                                     rxq->rx_free_thresh);
1540         if (unlikely(diag != 0))
1541                 return -ENOMEM;
1542
1543         rxdp = &rxq->rx_ring[alloc_idx];
1544         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1545                 /* populate the static rte mbuf fields */
1546                 mb = rxep[i].mbuf;
1547                 if (reset_mbuf) {
1548                         mb->next = NULL;
1549                         mb->nb_segs = 1;
1550                         mb->port = rxq->port_id;
1551                 }
1552
1553                 rte_mbuf_refcnt_set(mb, 1);
1554                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1555
1556                 /* populate the descriptors */
1557                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1558                 rxdp[i].read.hdr_addr = 0;
1559                 rxdp[i].read.pkt_addr = dma_addr;
1560         }
1561
1562         /* update state of internal queue structure */
1563         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1564         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1565                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1566
1567         /* no errors */
1568         return 0;
1569 }
1570
1571 static inline uint16_t
1572 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1573                          uint16_t nb_pkts)
1574 {
1575         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1576         int i;
1577
1578         /* how many packets are ready to return? */
1579         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1580
1581         /* copy mbuf pointers to the application's packet list */
1582         for (i = 0; i < nb_pkts; ++i)
1583                 rx_pkts[i] = stage[i];
1584
1585         /* update internal queue state */
1586         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1587         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1588
1589         return nb_pkts;
1590 }
1591
1592 static inline uint16_t
1593 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1594              uint16_t nb_pkts)
1595 {
1596         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1597         uint16_t nb_rx = 0;
1598
1599         /* Any previously recv'd pkts will be returned from the Rx stage */
1600         if (rxq->rx_nb_avail)
1601                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1602
1603         /* Scan the H/W ring for packets to receive */
1604         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1605
1606         /* update internal queue state */
1607         rxq->rx_next_avail = 0;
1608         rxq->rx_nb_avail = nb_rx;
1609         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1610
1611         /* if required, allocate new buffers to replenish descriptors */
1612         if (rxq->rx_tail > rxq->rx_free_trigger) {
1613                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1614
1615                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1616                         int i, j;
1617
1618                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1619                                    "queue_id=%u", (unsigned) rxq->port_id,
1620                                    (unsigned) rxq->queue_id);
1621
1622                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1623                                 rxq->rx_free_thresh;
1624
1625                         /*
1626                          * Need to rewind any previous receives if we cannot
1627                          * allocate new buffers to replenish the old ones.
1628                          */
1629                         rxq->rx_nb_avail = 0;
1630                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1631                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1632                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1633
1634                         return 0;
1635                 }
1636
1637                 /* update tail pointer */
1638                 rte_wmb();
1639                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
1640         }
1641
1642         if (rxq->rx_tail >= rxq->nb_rx_desc)
1643                 rxq->rx_tail = 0;
1644
1645         /* received any packets this loop? */
1646         if (rxq->rx_nb_avail)
1647                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1648
1649         return 0;
1650 }
1651
1652 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1653 uint16_t
1654 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1655                            uint16_t nb_pkts)
1656 {
1657         uint16_t nb_rx;
1658
1659         if (unlikely(nb_pkts == 0))
1660                 return 0;
1661
1662         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1663                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1664
1665         /* request is relatively large, chunk it up */
1666         nb_rx = 0;
1667         while (nb_pkts) {
1668                 uint16_t ret, n;
1669
1670                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1671                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1672                 nb_rx = (uint16_t)(nb_rx + ret);
1673                 nb_pkts = (uint16_t)(nb_pkts - ret);
1674                 if (ret < n)
1675                         break;
1676         }
1677
1678         return nb_rx;
1679 }
1680
1681 uint16_t
1682 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1683                 uint16_t nb_pkts)
1684 {
1685         struct ixgbe_rx_queue *rxq;
1686         volatile union ixgbe_adv_rx_desc *rx_ring;
1687         volatile union ixgbe_adv_rx_desc *rxdp;
1688         struct ixgbe_rx_entry *sw_ring;
1689         struct ixgbe_rx_entry *rxe;
1690         struct rte_mbuf *rxm;
1691         struct rte_mbuf *nmb;
1692         union ixgbe_adv_rx_desc rxd;
1693         uint64_t dma_addr;
1694         uint32_t staterr;
1695         uint32_t pkt_info;
1696         uint16_t pkt_len;
1697         uint16_t rx_id;
1698         uint16_t nb_rx;
1699         uint16_t nb_hold;
1700         uint64_t pkt_flags;
1701         uint64_t vlan_flags;
1702
1703         nb_rx = 0;
1704         nb_hold = 0;
1705         rxq = rx_queue;
1706         rx_id = rxq->rx_tail;
1707         rx_ring = rxq->rx_ring;
1708         sw_ring = rxq->sw_ring;
1709         vlan_flags = rxq->vlan_flags;
1710         while (nb_rx < nb_pkts) {
1711                 /*
1712                  * The order of operations here is important as the DD status
1713                  * bit must not be read after any other descriptor fields.
1714                  * rx_ring and rxdp are pointing to volatile data so the order
1715                  * of accesses cannot be reordered by the compiler. If they were
1716                  * not volatile, they could be reordered which could lead to
1717                  * using invalid descriptor fields when read from rxd.
1718                  */
1719                 rxdp = &rx_ring[rx_id];
1720                 staterr = rxdp->wb.upper.status_error;
1721                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1722                         break;
1723                 rxd = *rxdp;
1724
1725                 /*
1726                  * End of packet.
1727                  *
1728                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1729                  * is likely to be invalid and to be dropped by the various
1730                  * validation checks performed by the network stack.
1731                  *
1732                  * Allocate a new mbuf to replenish the RX ring descriptor.
1733                  * If the allocation fails:
1734                  *    - arrange for that RX descriptor to be the first one
1735                  *      being parsed the next time the receive function is
1736                  *      invoked [on the same queue].
1737                  *
1738                  *    - Stop parsing the RX ring and return immediately.
1739                  *
1740                  * This policy do not drop the packet received in the RX
1741                  * descriptor for which the allocation of a new mbuf failed.
1742                  * Thus, it allows that packet to be later retrieved if
1743                  * mbuf have been freed in the mean time.
1744                  * As a side effect, holding RX descriptors instead of
1745                  * systematically giving them back to the NIC may lead to
1746                  * RX ring exhaustion situations.
1747                  * However, the NIC can gracefully prevent such situations
1748                  * to happen by sending specific "back-pressure" flow control
1749                  * frames to its peer(s).
1750                  */
1751                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1752                            "ext_err_stat=0x%08x pkt_len=%u",
1753                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1754                            (unsigned) rx_id, (unsigned) staterr,
1755                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1756
1757                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1758                 if (nmb == NULL) {
1759                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1760                                    "queue_id=%u", (unsigned) rxq->port_id,
1761                                    (unsigned) rxq->queue_id);
1762                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1763                         break;
1764                 }
1765
1766                 nb_hold++;
1767                 rxe = &sw_ring[rx_id];
1768                 rx_id++;
1769                 if (rx_id == rxq->nb_rx_desc)
1770                         rx_id = 0;
1771
1772                 /* Prefetch next mbuf while processing current one. */
1773                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1774
1775                 /*
1776                  * When next RX descriptor is on a cache-line boundary,
1777                  * prefetch the next 4 RX descriptors and the next 8 pointers
1778                  * to mbufs.
1779                  */
1780                 if ((rx_id & 0x3) == 0) {
1781                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1782                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1783                 }
1784
1785                 rxm = rxe->mbuf;
1786                 rxe->mbuf = nmb;
1787                 dma_addr =
1788                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1789                 rxdp->read.hdr_addr = 0;
1790                 rxdp->read.pkt_addr = dma_addr;
1791
1792                 /*
1793                  * Initialize the returned mbuf.
1794                  * 1) setup generic mbuf fields:
1795                  *    - number of segments,
1796                  *    - next segment,
1797                  *    - packet length,
1798                  *    - RX port identifier.
1799                  * 2) integrate hardware offload data, if any:
1800                  *    - RSS flag & hash,
1801                  *    - IP checksum flag,
1802                  *    - VLAN TCI, if any,
1803                  *    - error flags.
1804                  */
1805                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1806                                       rxq->crc_len);
1807                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1808                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1809                 rxm->nb_segs = 1;
1810                 rxm->next = NULL;
1811                 rxm->pkt_len = pkt_len;
1812                 rxm->data_len = pkt_len;
1813                 rxm->port = rxq->port_id;
1814
1815                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1816                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1817                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1818
1819                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1820                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1821                 pkt_flags = pkt_flags |
1822                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1823                 rxm->ol_flags = pkt_flags;
1824                 rxm->packet_type =
1825                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1826                                                        rxq->pkt_type_mask);
1827
1828                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1829                         rxm->hash.rss = rte_le_to_cpu_32(
1830                                                 rxd.wb.lower.hi_dword.rss);
1831                 else if (pkt_flags & PKT_RX_FDIR) {
1832                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1833                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1834                                         IXGBE_ATR_HASH_MASK;
1835                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1836                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1837                 }
1838                 /*
1839                  * Store the mbuf address into the next entry of the array
1840                  * of returned packets.
1841                  */
1842                 rx_pkts[nb_rx++] = rxm;
1843         }
1844         rxq->rx_tail = rx_id;
1845
1846         /*
1847          * If the number of free RX descriptors is greater than the RX free
1848          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1849          * register.
1850          * Update the RDT with the value of the last processed RX descriptor
1851          * minus 1, to guarantee that the RDT register is never equal to the
1852          * RDH register, which creates a "full" ring situtation from the
1853          * hardware point of view...
1854          */
1855         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1856         if (nb_hold > rxq->rx_free_thresh) {
1857                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1858                            "nb_hold=%u nb_rx=%u",
1859                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1860                            (unsigned) rx_id, (unsigned) nb_hold,
1861                            (unsigned) nb_rx);
1862                 rx_id = (uint16_t) ((rx_id == 0) ?
1863                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1864                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1865                 nb_hold = 0;
1866         }
1867         rxq->nb_rx_hold = nb_hold;
1868         return nb_rx;
1869 }
1870
1871 /**
1872  * Detect an RSC descriptor.
1873  */
1874 static inline uint32_t
1875 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1876 {
1877         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1878                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1879 }
1880
1881 /**
1882  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1883  *
1884  * Fill the following info in the HEAD buffer of the Rx cluster:
1885  *    - RX port identifier
1886  *    - hardware offload data, if any:
1887  *      - RSS flag & hash
1888  *      - IP checksum flag
1889  *      - VLAN TCI, if any
1890  *      - error flags
1891  * @head HEAD of the packet cluster
1892  * @desc HW descriptor to get data from
1893  * @rxq Pointer to the Rx queue
1894  */
1895 static inline void
1896 ixgbe_fill_cluster_head_buf(
1897         struct rte_mbuf *head,
1898         union ixgbe_adv_rx_desc *desc,
1899         struct ixgbe_rx_queue *rxq,
1900         uint32_t staterr)
1901 {
1902         uint32_t pkt_info;
1903         uint64_t pkt_flags;
1904
1905         head->port = rxq->port_id;
1906
1907         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1908          * set in the pkt_flags field.
1909          */
1910         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1911         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1912         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1913         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1914         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1915         head->ol_flags = pkt_flags;
1916         head->packet_type =
1917                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1918
1919         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1920                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1921         else if (pkt_flags & PKT_RX_FDIR) {
1922                 head->hash.fdir.hash =
1923                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1924                                                           & IXGBE_ATR_HASH_MASK;
1925                 head->hash.fdir.id =
1926                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1927         }
1928 }
1929
1930 /**
1931  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1932  *
1933  * @rx_queue Rx queue handle
1934  * @rx_pkts table of received packets
1935  * @nb_pkts size of rx_pkts table
1936  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1937  *
1938  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1939  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1940  *
1941  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1942  * 1) When non-EOP RSC completion arrives:
1943  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1944  *       segment's data length.
1945  *    b) Set the "next" pointer of the current segment to point to the segment
1946  *       at the NEXTP index.
1947  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1948  *       in the sw_rsc_ring.
1949  * 2) When EOP arrives we just update the cluster's total length and offload
1950  *    flags and deliver the cluster up to the upper layers. In our case - put it
1951  *    in the rx_pkts table.
1952  *
1953  * Returns the number of received packets/clusters (according to the "bulk
1954  * receive" interface).
1955  */
1956 static inline uint16_t
1957 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1958                     bool bulk_alloc)
1959 {
1960         struct ixgbe_rx_queue *rxq = rx_queue;
1961         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1962         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1963         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1964         uint16_t rx_id = rxq->rx_tail;
1965         uint16_t nb_rx = 0;
1966         uint16_t nb_hold = rxq->nb_rx_hold;
1967         uint16_t prev_id = rxq->rx_tail;
1968
1969         while (nb_rx < nb_pkts) {
1970                 bool eop;
1971                 struct ixgbe_rx_entry *rxe;
1972                 struct ixgbe_scattered_rx_entry *sc_entry;
1973                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1974                 struct ixgbe_rx_entry *next_rxe = NULL;
1975                 struct rte_mbuf *first_seg;
1976                 struct rte_mbuf *rxm;
1977                 struct rte_mbuf *nmb;
1978                 union ixgbe_adv_rx_desc rxd;
1979                 uint16_t data_len;
1980                 uint16_t next_id;
1981                 volatile union ixgbe_adv_rx_desc *rxdp;
1982                 uint32_t staterr;
1983
1984 next_desc:
1985                 /*
1986                  * The code in this whole file uses the volatile pointer to
1987                  * ensure the read ordering of the status and the rest of the
1988                  * descriptor fields (on the compiler level only!!!). This is so
1989                  * UGLY - why not to just use the compiler barrier instead? DPDK
1990                  * even has the rte_compiler_barrier() for that.
1991                  *
1992                  * But most importantly this is just wrong because this doesn't
1993                  * ensure memory ordering in a general case at all. For
1994                  * instance, DPDK is supposed to work on Power CPUs where
1995                  * compiler barrier may just not be enough!
1996                  *
1997                  * I tried to write only this function properly to have a
1998                  * starting point (as a part of an LRO/RSC series) but the
1999                  * compiler cursed at me when I tried to cast away the
2000                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2001                  * keeping it the way it is for now.
2002                  *
2003                  * The code in this file is broken in so many other places and
2004                  * will just not work on a big endian CPU anyway therefore the
2005                  * lines below will have to be revisited together with the rest
2006                  * of the ixgbe PMD.
2007                  *
2008                  * TODO:
2009                  *    - Get rid of "volatile" crap and let the compiler do its
2010                  *      job.
2011                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2012                  *      memory ordering below.
2013                  */
2014                 rxdp = &rx_ring[rx_id];
2015                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2016
2017                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2018                         break;
2019
2020                 rxd = *rxdp;
2021
2022                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2023                                   "staterr=0x%x data_len=%u",
2024                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2025                            rte_le_to_cpu_16(rxd.wb.upper.length));
2026
2027                 if (!bulk_alloc) {
2028                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2029                         if (nmb == NULL) {
2030                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2031                                                   "port_id=%u queue_id=%u",
2032                                            rxq->port_id, rxq->queue_id);
2033
2034                                 rte_eth_devices[rxq->port_id].data->
2035                                                         rx_mbuf_alloc_failed++;
2036                                 break;
2037                         }
2038                 } else if (nb_hold > rxq->rx_free_thresh) {
2039                         uint16_t next_rdt = rxq->rx_free_trigger;
2040
2041                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2042                                 rte_wmb();
2043                                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
2044                                                     next_rdt);
2045                                 nb_hold -= rxq->rx_free_thresh;
2046                         } else {
2047                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2048                                                   "port_id=%u queue_id=%u",
2049                                            rxq->port_id, rxq->queue_id);
2050
2051                                 rte_eth_devices[rxq->port_id].data->
2052                                                         rx_mbuf_alloc_failed++;
2053                                 break;
2054                         }
2055                 }
2056
2057                 nb_hold++;
2058                 rxe = &sw_ring[rx_id];
2059                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2060
2061                 next_id = rx_id + 1;
2062                 if (next_id == rxq->nb_rx_desc)
2063                         next_id = 0;
2064
2065                 /* Prefetch next mbuf while processing current one. */
2066                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2067
2068                 /*
2069                  * When next RX descriptor is on a cache-line boundary,
2070                  * prefetch the next 4 RX descriptors and the next 4 pointers
2071                  * to mbufs.
2072                  */
2073                 if ((next_id & 0x3) == 0) {
2074                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2075                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2076                 }
2077
2078                 rxm = rxe->mbuf;
2079
2080                 if (!bulk_alloc) {
2081                         __le64 dma =
2082                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2083                         /*
2084                          * Update RX descriptor with the physical address of the
2085                          * new data buffer of the new allocated mbuf.
2086                          */
2087                         rxe->mbuf = nmb;
2088
2089                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2090                         rxdp->read.hdr_addr = 0;
2091                         rxdp->read.pkt_addr = dma;
2092                 } else
2093                         rxe->mbuf = NULL;
2094
2095                 /*
2096                  * Set data length & data buffer address of mbuf.
2097                  */
2098                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2099                 rxm->data_len = data_len;
2100
2101                 if (!eop) {
2102                         uint16_t nextp_id;
2103                         /*
2104                          * Get next descriptor index:
2105                          *  - For RSC it's in the NEXTP field.
2106                          *  - For a scattered packet - it's just a following
2107                          *    descriptor.
2108                          */
2109                         if (ixgbe_rsc_count(&rxd))
2110                                 nextp_id =
2111                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2112                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2113                         else
2114                                 nextp_id = next_id;
2115
2116                         next_sc_entry = &sw_sc_ring[nextp_id];
2117                         next_rxe = &sw_ring[nextp_id];
2118                         rte_ixgbe_prefetch(next_rxe);
2119                 }
2120
2121                 sc_entry = &sw_sc_ring[rx_id];
2122                 first_seg = sc_entry->fbuf;
2123                 sc_entry->fbuf = NULL;
2124
2125                 /*
2126                  * If this is the first buffer of the received packet,
2127                  * set the pointer to the first mbuf of the packet and
2128                  * initialize its context.
2129                  * Otherwise, update the total length and the number of segments
2130                  * of the current scattered packet, and update the pointer to
2131                  * the last mbuf of the current packet.
2132                  */
2133                 if (first_seg == NULL) {
2134                         first_seg = rxm;
2135                         first_seg->pkt_len = data_len;
2136                         first_seg->nb_segs = 1;
2137                 } else {
2138                         first_seg->pkt_len += data_len;
2139                         first_seg->nb_segs++;
2140                 }
2141
2142                 prev_id = rx_id;
2143                 rx_id = next_id;
2144
2145                 /*
2146                  * If this is not the last buffer of the received packet, update
2147                  * the pointer to the first mbuf at the NEXTP entry in the
2148                  * sw_sc_ring and continue to parse the RX ring.
2149                  */
2150                 if (!eop && next_rxe) {
2151                         rxm->next = next_rxe->mbuf;
2152                         next_sc_entry->fbuf = first_seg;
2153                         goto next_desc;
2154                 }
2155
2156                 /*
2157                  * This is the last buffer of the received packet - return
2158                  * the current cluster to the user.
2159                  */
2160                 rxm->next = NULL;
2161
2162                 /* Initialize the first mbuf of the returned packet */
2163                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2164
2165                 /*
2166                  * Deal with the case, when HW CRC srip is disabled.
2167                  * That can't happen when LRO is enabled, but still could
2168                  * happen for scattered RX mode.
2169                  */
2170                 first_seg->pkt_len -= rxq->crc_len;
2171                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2172                         struct rte_mbuf *lp;
2173
2174                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2175                                 ;
2176
2177                         first_seg->nb_segs--;
2178                         lp->data_len -= rxq->crc_len - rxm->data_len;
2179                         lp->next = NULL;
2180                         rte_pktmbuf_free_seg(rxm);
2181                 } else
2182                         rxm->data_len -= rxq->crc_len;
2183
2184                 /* Prefetch data of first segment, if configured to do so. */
2185                 rte_packet_prefetch((char *)first_seg->buf_addr +
2186                         first_seg->data_off);
2187
2188                 /*
2189                  * Store the mbuf address into the next entry of the array
2190                  * of returned packets.
2191                  */
2192                 rx_pkts[nb_rx++] = first_seg;
2193         }
2194
2195         /*
2196          * Record index of the next RX descriptor to probe.
2197          */
2198         rxq->rx_tail = rx_id;
2199
2200         /*
2201          * If the number of free RX descriptors is greater than the RX free
2202          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2203          * register.
2204          * Update the RDT with the value of the last processed RX descriptor
2205          * minus 1, to guarantee that the RDT register is never equal to the
2206          * RDH register, which creates a "full" ring situtation from the
2207          * hardware point of view...
2208          */
2209         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2210                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2211                            "nb_hold=%u nb_rx=%u",
2212                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2213
2214                 rte_wmb();
2215                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
2216                 nb_hold = 0;
2217         }
2218
2219         rxq->nb_rx_hold = nb_hold;
2220         return nb_rx;
2221 }
2222
2223 uint16_t
2224 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2225                                  uint16_t nb_pkts)
2226 {
2227         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2228 }
2229
2230 uint16_t
2231 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2232                                uint16_t nb_pkts)
2233 {
2234         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2235 }
2236
2237 /*********************************************************************
2238  *
2239  *  Queue management functions
2240  *
2241  **********************************************************************/
2242
2243 static void __attribute__((cold))
2244 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2245 {
2246         unsigned i;
2247
2248         if (txq->sw_ring != NULL) {
2249                 for (i = 0; i < txq->nb_tx_desc; i++) {
2250                         if (txq->sw_ring[i].mbuf != NULL) {
2251                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2252                                 txq->sw_ring[i].mbuf = NULL;
2253                         }
2254                 }
2255         }
2256 }
2257
2258 static void __attribute__((cold))
2259 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2260 {
2261         if (txq != NULL &&
2262             txq->sw_ring != NULL)
2263                 rte_free(txq->sw_ring);
2264 }
2265
2266 static void __attribute__((cold))
2267 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2268 {
2269         if (txq != NULL && txq->ops != NULL) {
2270                 txq->ops->release_mbufs(txq);
2271                 txq->ops->free_swring(txq);
2272                 rte_free(txq);
2273         }
2274 }
2275
2276 void __attribute__((cold))
2277 ixgbe_dev_tx_queue_release(void *txq)
2278 {
2279         ixgbe_tx_queue_release(txq);
2280 }
2281
2282 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2283 static void __attribute__((cold))
2284 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2285 {
2286         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2287         struct ixgbe_tx_entry *txe = txq->sw_ring;
2288         uint16_t prev, i;
2289
2290         /* Zero out HW ring memory */
2291         for (i = 0; i < txq->nb_tx_desc; i++) {
2292                 txq->tx_ring[i] = zeroed_desc;
2293         }
2294
2295         /* Initialize SW ring entries */
2296         prev = (uint16_t) (txq->nb_tx_desc - 1);
2297         for (i = 0; i < txq->nb_tx_desc; i++) {
2298                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2299
2300                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2301                 txe[i].mbuf = NULL;
2302                 txe[i].last_id = i;
2303                 txe[prev].next_id = i;
2304                 prev = i;
2305         }
2306
2307         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2308         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2309
2310         txq->tx_tail = 0;
2311         txq->nb_tx_used = 0;
2312         /*
2313          * Always allow 1 descriptor to be un-allocated to avoid
2314          * a H/W race condition
2315          */
2316         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2317         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2318         txq->ctx_curr = 0;
2319         memset((void *)&txq->ctx_cache, 0,
2320                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2321 }
2322
2323 static const struct ixgbe_txq_ops def_txq_ops = {
2324         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2325         .free_swring = ixgbe_tx_free_swring,
2326         .reset = ixgbe_reset_tx_queue,
2327 };
2328
2329 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2330  * the queue parameters. Used in tx_queue_setup by primary process and then
2331  * in dev_init by secondary process when attaching to an existing ethdev.
2332  */
2333 void __attribute__((cold))
2334 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2335 {
2336         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2337         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2338                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2339                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2340                 dev->tx_pkt_prepare = NULL;
2341 #ifdef RTE_IXGBE_INC_VECTOR
2342                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2343                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2344                                         ixgbe_txq_vec_setup(txq) == 0)) {
2345                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2346                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2347                 } else
2348 #endif
2349                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2350         } else {
2351                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2352                 PMD_INIT_LOG(DEBUG,
2353                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2354                                 (unsigned long)txq->txq_flags,
2355                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2356                 PMD_INIT_LOG(DEBUG,
2357                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2358                                 (unsigned long)txq->tx_rs_thresh,
2359                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2360                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2361                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2362         }
2363 }
2364
2365 int __attribute__((cold))
2366 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2367                          uint16_t queue_idx,
2368                          uint16_t nb_desc,
2369                          unsigned int socket_id,
2370                          const struct rte_eth_txconf *tx_conf)
2371 {
2372         const struct rte_memzone *tz;
2373         struct ixgbe_tx_queue *txq;
2374         struct ixgbe_hw     *hw;
2375         uint16_t tx_rs_thresh, tx_free_thresh;
2376
2377         PMD_INIT_FUNC_TRACE();
2378         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2379
2380         /*
2381          * Validate number of transmit descriptors.
2382          * It must not exceed hardware maximum, and must be multiple
2383          * of IXGBE_ALIGN.
2384          */
2385         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2386                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2387                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2388                 return -EINVAL;
2389         }
2390
2391         /*
2392          * The following two parameters control the setting of the RS bit on
2393          * transmit descriptors.
2394          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2395          * descriptors have been used.
2396          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2397          * descriptors are used or if the number of descriptors required
2398          * to transmit a packet is greater than the number of free TX
2399          * descriptors.
2400          * The following constraints must be satisfied:
2401          *  tx_rs_thresh must be greater than 0.
2402          *  tx_rs_thresh must be less than the size of the ring minus 2.
2403          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2404          *  tx_rs_thresh must be a divisor of the ring size.
2405          *  tx_free_thresh must be greater than 0.
2406          *  tx_free_thresh must be less than the size of the ring minus 3.
2407          * One descriptor in the TX ring is used as a sentinel to avoid a
2408          * H/W race condition, hence the maximum threshold constraints.
2409          * When set to zero use default values.
2410          */
2411         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2412                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2413         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2414                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2415         if (tx_rs_thresh >= (nb_desc - 2)) {
2416                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2417                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2418                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2419                         (int)dev->data->port_id, (int)queue_idx);
2420                 return -(EINVAL);
2421         }
2422         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2423                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2424                         "(tx_rs_thresh=%u port=%d queue=%d)",
2425                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2426                         (int)dev->data->port_id, (int)queue_idx);
2427                 return -(EINVAL);
2428         }
2429         if (tx_free_thresh >= (nb_desc - 3)) {
2430                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2431                              "tx_free_thresh must be less than the number of "
2432                              "TX descriptors minus 3. (tx_free_thresh=%u "
2433                              "port=%d queue=%d)",
2434                              (unsigned int)tx_free_thresh,
2435                              (int)dev->data->port_id, (int)queue_idx);
2436                 return -(EINVAL);
2437         }
2438         if (tx_rs_thresh > tx_free_thresh) {
2439                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2440                              "tx_free_thresh. (tx_free_thresh=%u "
2441                              "tx_rs_thresh=%u port=%d queue=%d)",
2442                              (unsigned int)tx_free_thresh,
2443                              (unsigned int)tx_rs_thresh,
2444                              (int)dev->data->port_id,
2445                              (int)queue_idx);
2446                 return -(EINVAL);
2447         }
2448         if ((nb_desc % tx_rs_thresh) != 0) {
2449                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2450                              "number of TX descriptors. (tx_rs_thresh=%u "
2451                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2452                              (int)dev->data->port_id, (int)queue_idx);
2453                 return -(EINVAL);
2454         }
2455
2456         /*
2457          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2458          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2459          * by the NIC and all descriptors are written back after the NIC
2460          * accumulates WTHRESH descriptors.
2461          */
2462         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2463                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2464                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2465                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2466                              (int)dev->data->port_id, (int)queue_idx);
2467                 return -(EINVAL);
2468         }
2469
2470         /* Free memory prior to re-allocation if needed... */
2471         if (dev->data->tx_queues[queue_idx] != NULL) {
2472                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2473                 dev->data->tx_queues[queue_idx] = NULL;
2474         }
2475
2476         /* First allocate the tx queue data structure */
2477         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2478                                  RTE_CACHE_LINE_SIZE, socket_id);
2479         if (txq == NULL)
2480                 return -ENOMEM;
2481
2482         /*
2483          * Allocate TX ring hardware descriptors. A memzone large enough to
2484          * handle the maximum ring size is allocated in order to allow for
2485          * resizing in later calls to the queue setup function.
2486          */
2487         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2488                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2489                         IXGBE_ALIGN, socket_id);
2490         if (tz == NULL) {
2491                 ixgbe_tx_queue_release(txq);
2492                 return -ENOMEM;
2493         }
2494
2495         txq->nb_tx_desc = nb_desc;
2496         txq->tx_rs_thresh = tx_rs_thresh;
2497         txq->tx_free_thresh = tx_free_thresh;
2498         txq->pthresh = tx_conf->tx_thresh.pthresh;
2499         txq->hthresh = tx_conf->tx_thresh.hthresh;
2500         txq->wthresh = tx_conf->tx_thresh.wthresh;
2501         txq->queue_id = queue_idx;
2502         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2503                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2504         txq->port_id = dev->data->port_id;
2505         txq->txq_flags = tx_conf->txq_flags;
2506         txq->ops = &def_txq_ops;
2507         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2508
2509         /*
2510          * Modification to set VFTDT for virtual function if vf is detected
2511          */
2512         if (hw->mac.type == ixgbe_mac_82599_vf ||
2513             hw->mac.type == ixgbe_mac_X540_vf ||
2514             hw->mac.type == ixgbe_mac_X550_vf ||
2515             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2516             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2517                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2518         else
2519                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2520
2521         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2522         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2523
2524         /* Allocate software ring */
2525         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2526                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2527                                 RTE_CACHE_LINE_SIZE, socket_id);
2528         if (txq->sw_ring == NULL) {
2529                 ixgbe_tx_queue_release(txq);
2530                 return -ENOMEM;
2531         }
2532         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2533                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2534
2535         /* set up vector or scalar TX function as appropriate */
2536         ixgbe_set_tx_function(dev, txq);
2537
2538         txq->ops->reset(txq);
2539
2540         dev->data->tx_queues[queue_idx] = txq;
2541
2542
2543         return 0;
2544 }
2545
2546 /**
2547  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2548  *
2549  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2550  * in the sw_rsc_ring is not set to NULL but rather points to the next
2551  * mbuf of this RSC aggregation (that has not been completed yet and still
2552  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2553  * will just free first "nb_segs" segments of the cluster explicitly by calling
2554  * an rte_pktmbuf_free_seg().
2555  *
2556  * @m scattered cluster head
2557  */
2558 static void __attribute__((cold))
2559 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2560 {
2561         uint8_t i, nb_segs = m->nb_segs;
2562         struct rte_mbuf *next_seg;
2563
2564         for (i = 0; i < nb_segs; i++) {
2565                 next_seg = m->next;
2566                 rte_pktmbuf_free_seg(m);
2567                 m = next_seg;
2568         }
2569 }
2570
2571 static void __attribute__((cold))
2572 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2573 {
2574         unsigned i;
2575
2576 #ifdef RTE_IXGBE_INC_VECTOR
2577         /* SSE Vector driver has a different way of releasing mbufs. */
2578         if (rxq->rx_using_sse) {
2579                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2580                 return;
2581         }
2582 #endif
2583
2584         if (rxq->sw_ring != NULL) {
2585                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2586                         if (rxq->sw_ring[i].mbuf != NULL) {
2587                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2588                                 rxq->sw_ring[i].mbuf = NULL;
2589                         }
2590                 }
2591                 if (rxq->rx_nb_avail) {
2592                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2593                                 struct rte_mbuf *mb;
2594
2595                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2596                                 rte_pktmbuf_free_seg(mb);
2597                         }
2598                         rxq->rx_nb_avail = 0;
2599                 }
2600         }
2601
2602         if (rxq->sw_sc_ring)
2603                 for (i = 0; i < rxq->nb_rx_desc; i++)
2604                         if (rxq->sw_sc_ring[i].fbuf) {
2605                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2606                                 rxq->sw_sc_ring[i].fbuf = NULL;
2607                         }
2608 }
2609
2610 static void __attribute__((cold))
2611 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2612 {
2613         if (rxq != NULL) {
2614                 ixgbe_rx_queue_release_mbufs(rxq);
2615                 rte_free(rxq->sw_ring);
2616                 rte_free(rxq->sw_sc_ring);
2617                 rte_free(rxq);
2618         }
2619 }
2620
2621 void __attribute__((cold))
2622 ixgbe_dev_rx_queue_release(void *rxq)
2623 {
2624         ixgbe_rx_queue_release(rxq);
2625 }
2626
2627 /*
2628  * Check if Rx Burst Bulk Alloc function can be used.
2629  * Return
2630  *        0: the preconditions are satisfied and the bulk allocation function
2631  *           can be used.
2632  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2633  *           function must be used.
2634  */
2635 static inline int __attribute__((cold))
2636 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2637 {
2638         int ret = 0;
2639
2640         /*
2641          * Make sure the following pre-conditions are satisfied:
2642          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2643          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2644          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2645          *   rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST)
2646          * Scattered packets are not supported.  This should be checked
2647          * outside of this function.
2648          */
2649         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2650                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2651                              "rxq->rx_free_thresh=%d, "
2652                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2653                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2654                 ret = -EINVAL;
2655         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2656                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2657                              "rxq->rx_free_thresh=%d, "
2658                              "rxq->nb_rx_desc=%d",
2659                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2660                 ret = -EINVAL;
2661         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2662                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2663                              "rxq->nb_rx_desc=%d, "
2664                              "rxq->rx_free_thresh=%d",
2665                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2666                 ret = -EINVAL;
2667         } else if (!(rxq->nb_rx_desc <
2668                (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) {
2669                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2670                              "rxq->nb_rx_desc=%d, "
2671                              "IXGBE_MAX_RING_DESC=%d, "
2672                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2673                              rxq->nb_rx_desc, IXGBE_MAX_RING_DESC,
2674                              RTE_PMD_IXGBE_RX_MAX_BURST);
2675                 ret = -EINVAL;
2676         }
2677
2678         return ret;
2679 }
2680
2681 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2682 static void __attribute__((cold))
2683 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2684 {
2685         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2686         unsigned i;
2687         uint16_t len = rxq->nb_rx_desc;
2688
2689         /*
2690          * By default, the Rx queue setup function allocates enough memory for
2691          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2692          * extra memory at the end of the descriptor ring to be zero'd out. A
2693          * pre-condition for using the Rx burst bulk alloc function is that the
2694          * number of descriptors is less than or equal to
2695          * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the
2696          * constraints here to see if we need to zero out memory after the end
2697          * of the H/W descriptor ring.
2698          */
2699         if (adapter->rx_bulk_alloc_allowed)
2700                 /* zero out extra memory */
2701                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2702
2703         /*
2704          * Zero out HW ring memory. Zero out extra memory at the end of
2705          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2706          * reads extra memory as zeros.
2707          */
2708         for (i = 0; i < len; i++) {
2709                 rxq->rx_ring[i] = zeroed_desc;
2710         }
2711
2712         /*
2713          * initialize extra software ring entries. Space for these extra
2714          * entries is always allocated
2715          */
2716         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2717         for (i = rxq->nb_rx_desc; i < len; ++i) {
2718                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2719         }
2720
2721         rxq->rx_nb_avail = 0;
2722         rxq->rx_next_avail = 0;
2723         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2724         rxq->rx_tail = 0;
2725         rxq->nb_rx_hold = 0;
2726         rxq->pkt_first_seg = NULL;
2727         rxq->pkt_last_seg = NULL;
2728
2729 #ifdef RTE_IXGBE_INC_VECTOR
2730         rxq->rxrearm_start = 0;
2731         rxq->rxrearm_nb = 0;
2732 #endif
2733 }
2734
2735 int __attribute__((cold))
2736 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2737                          uint16_t queue_idx,
2738                          uint16_t nb_desc,
2739                          unsigned int socket_id,
2740                          const struct rte_eth_rxconf *rx_conf,
2741                          struct rte_mempool *mp)
2742 {
2743         const struct rte_memzone *rz;
2744         struct ixgbe_rx_queue *rxq;
2745         struct ixgbe_hw     *hw;
2746         uint16_t len;
2747         struct ixgbe_adapter *adapter =
2748                 (struct ixgbe_adapter *)dev->data->dev_private;
2749
2750         PMD_INIT_FUNC_TRACE();
2751         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2752
2753         /*
2754          * Validate number of receive descriptors.
2755          * It must not exceed hardware maximum, and must be multiple
2756          * of IXGBE_ALIGN.
2757          */
2758         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2759                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2760                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2761                 return -EINVAL;
2762         }
2763
2764         /* Free memory prior to re-allocation if needed... */
2765         if (dev->data->rx_queues[queue_idx] != NULL) {
2766                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2767                 dev->data->rx_queues[queue_idx] = NULL;
2768         }
2769
2770         /* First allocate the rx queue data structure */
2771         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2772                                  RTE_CACHE_LINE_SIZE, socket_id);
2773         if (rxq == NULL)
2774                 return -ENOMEM;
2775         rxq->mb_pool = mp;
2776         rxq->nb_rx_desc = nb_desc;
2777         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2778         rxq->queue_id = queue_idx;
2779         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2780                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2781         rxq->port_id = dev->data->port_id;
2782         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2783                                                         0 : ETHER_CRC_LEN);
2784         rxq->drop_en = rx_conf->rx_drop_en;
2785         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2786
2787         /*
2788          * The packet type in RX descriptor is different for different NICs.
2789          * Some bits are used for x550 but reserved for other NICS.
2790          * So set different masks for different NICs.
2791          */
2792         if (hw->mac.type == ixgbe_mac_X550 ||
2793             hw->mac.type == ixgbe_mac_X550EM_x ||
2794             hw->mac.type == ixgbe_mac_X550EM_a ||
2795             hw->mac.type == ixgbe_mac_X550_vf ||
2796             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2797             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2798                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2799         else
2800                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2801
2802         /*
2803          * Allocate RX ring hardware descriptors. A memzone large enough to
2804          * handle the maximum ring size is allocated in order to allow for
2805          * resizing in later calls to the queue setup function.
2806          */
2807         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2808                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2809         if (rz == NULL) {
2810                 ixgbe_rx_queue_release(rxq);
2811                 return -ENOMEM;
2812         }
2813
2814         /*
2815          * Zero init all the descriptors in the ring.
2816          */
2817         memset(rz->addr, 0, RX_RING_SZ);
2818
2819         /*
2820          * Modified to setup VFRDT for Virtual Function
2821          */
2822         if (hw->mac.type == ixgbe_mac_82599_vf ||
2823             hw->mac.type == ixgbe_mac_X540_vf ||
2824             hw->mac.type == ixgbe_mac_X550_vf ||
2825             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2826             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2827                 rxq->rdt_reg_addr =
2828                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2829                 rxq->rdh_reg_addr =
2830                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2831         } else {
2832                 rxq->rdt_reg_addr =
2833                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2834                 rxq->rdh_reg_addr =
2835                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2836         }
2837
2838         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2839         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2840
2841         /*
2842          * Certain constraints must be met in order to use the bulk buffer
2843          * allocation Rx burst function. If any of Rx queues doesn't meet them
2844          * the feature should be disabled for the whole port.
2845          */
2846         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2847                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2848                                     "preconditions - canceling the feature for "
2849                                     "the whole port[%d]",
2850                              rxq->queue_id, rxq->port_id);
2851                 adapter->rx_bulk_alloc_allowed = false;
2852         }
2853
2854         /*
2855          * Allocate software ring. Allow for space at the end of the
2856          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2857          * function does not access an invalid memory region.
2858          */
2859         len = nb_desc;
2860         if (adapter->rx_bulk_alloc_allowed)
2861                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2862
2863         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2864                                           sizeof(struct ixgbe_rx_entry) * len,
2865                                           RTE_CACHE_LINE_SIZE, socket_id);
2866         if (!rxq->sw_ring) {
2867                 ixgbe_rx_queue_release(rxq);
2868                 return -ENOMEM;
2869         }
2870
2871         /*
2872          * Always allocate even if it's not going to be needed in order to
2873          * simplify the code.
2874          *
2875          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2876          * be requested in ixgbe_dev_rx_init(), which is called later from
2877          * dev_start() flow.
2878          */
2879         rxq->sw_sc_ring =
2880                 rte_zmalloc_socket("rxq->sw_sc_ring",
2881                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2882                                    RTE_CACHE_LINE_SIZE, socket_id);
2883         if (!rxq->sw_sc_ring) {
2884                 ixgbe_rx_queue_release(rxq);
2885                 return -ENOMEM;
2886         }
2887
2888         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2889                             "dma_addr=0x%"PRIx64,
2890                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2891                      rxq->rx_ring_phys_addr);
2892
2893         if (!rte_is_power_of_2(nb_desc)) {
2894                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2895                                     "preconditions - canceling the feature for "
2896                                     "the whole port[%d]",
2897                              rxq->queue_id, rxq->port_id);
2898                 adapter->rx_vec_allowed = false;
2899         } else
2900                 ixgbe_rxq_vec_setup(rxq);
2901
2902         dev->data->rx_queues[queue_idx] = rxq;
2903
2904         ixgbe_reset_rx_queue(adapter, rxq);
2905
2906         return 0;
2907 }
2908
2909 uint32_t
2910 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2911 {
2912 #define IXGBE_RXQ_SCAN_INTERVAL 4
2913         volatile union ixgbe_adv_rx_desc *rxdp;
2914         struct ixgbe_rx_queue *rxq;
2915         uint32_t desc = 0;
2916
2917         if (rx_queue_id >= dev->data->nb_rx_queues) {
2918                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2919                 return 0;
2920         }
2921
2922         rxq = dev->data->rx_queues[rx_queue_id];
2923         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2924
2925         while ((desc < rxq->nb_rx_desc) &&
2926                 (rxdp->wb.upper.status_error &
2927                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2928                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2929                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2930                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2931                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2932                                 desc - rxq->nb_rx_desc]);
2933         }
2934
2935         return desc;
2936 }
2937
2938 int
2939 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2940 {
2941         volatile union ixgbe_adv_rx_desc *rxdp;
2942         struct ixgbe_rx_queue *rxq = rx_queue;
2943         uint32_t desc;
2944
2945         if (unlikely(offset >= rxq->nb_rx_desc))
2946                 return 0;
2947         desc = rxq->rx_tail + offset;
2948         if (desc >= rxq->nb_rx_desc)
2949                 desc -= rxq->nb_rx_desc;
2950
2951         rxdp = &rxq->rx_ring[desc];
2952         return !!(rxdp->wb.upper.status_error &
2953                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2954 }
2955
2956 void __attribute__((cold))
2957 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2958 {
2959         unsigned i;
2960         struct ixgbe_adapter *adapter =
2961                 (struct ixgbe_adapter *)dev->data->dev_private;
2962
2963         PMD_INIT_FUNC_TRACE();
2964
2965         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2966                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2967
2968                 if (txq != NULL) {
2969                         txq->ops->release_mbufs(txq);
2970                         txq->ops->reset(txq);
2971                 }
2972         }
2973
2974         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2975                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2976
2977                 if (rxq != NULL) {
2978                         ixgbe_rx_queue_release_mbufs(rxq);
2979                         ixgbe_reset_rx_queue(adapter, rxq);
2980                 }
2981         }
2982 }
2983
2984 void
2985 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2986 {
2987         unsigned i;
2988
2989         PMD_INIT_FUNC_TRACE();
2990
2991         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2992                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2993                 dev->data->rx_queues[i] = NULL;
2994         }
2995         dev->data->nb_rx_queues = 0;
2996
2997         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2998                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2999                 dev->data->tx_queues[i] = NULL;
3000         }
3001         dev->data->nb_tx_queues = 0;
3002 }
3003
3004 /*********************************************************************
3005  *
3006  *  Device RX/TX init functions
3007  *
3008  **********************************************************************/
3009
3010 /**
3011  * Receive Side Scaling (RSS)
3012  * See section 7.1.2.8 in the following document:
3013  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3014  *
3015  * Principles:
3016  * The source and destination IP addresses of the IP header and the source
3017  * and destination ports of TCP/UDP headers, if any, of received packets are
3018  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3019  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3020  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3021  * RSS output index which is used as the RX queue index where to store the
3022  * received packets.
3023  * The following output is supplied in the RX write-back descriptor:
3024  *     - 32-bit result of the Microsoft RSS hash function,
3025  *     - 4-bit RSS type field.
3026  */
3027
3028 /*
3029  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3030  * Used as the default key.
3031  */
3032 static uint8_t rss_intel_key[40] = {
3033         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3034         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3035         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3036         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3037         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3038 };
3039
3040 static void
3041 ixgbe_rss_disable(struct rte_eth_dev *dev)
3042 {
3043         struct ixgbe_hw *hw;
3044         uint32_t mrqc;
3045         uint32_t mrqc_reg;
3046
3047         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3048         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3049         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3050         mrqc &= ~IXGBE_MRQC_RSSEN;
3051         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3052 }
3053
3054 static void
3055 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3056 {
3057         uint8_t  *hash_key;
3058         uint32_t mrqc;
3059         uint32_t rss_key;
3060         uint64_t rss_hf;
3061         uint16_t i;
3062         uint32_t mrqc_reg;
3063         uint32_t rssrk_reg;
3064
3065         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3066         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3067
3068         hash_key = rss_conf->rss_key;
3069         if (hash_key != NULL) {
3070                 /* Fill in RSS hash key */
3071                 for (i = 0; i < 10; i++) {
3072                         rss_key  = hash_key[(i * 4)];
3073                         rss_key |= hash_key[(i * 4) + 1] << 8;
3074                         rss_key |= hash_key[(i * 4) + 2] << 16;
3075                         rss_key |= hash_key[(i * 4) + 3] << 24;
3076                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3077                 }
3078         }
3079
3080         /* Set configured hashing protocols in MRQC register */
3081         rss_hf = rss_conf->rss_hf;
3082         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3083         if (rss_hf & ETH_RSS_IPV4)
3084                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3085         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3086                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3087         if (rss_hf & ETH_RSS_IPV6)
3088                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3089         if (rss_hf & ETH_RSS_IPV6_EX)
3090                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3091         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3092                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3093         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3094                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3095         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3096                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3097         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3098                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3099         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3100                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3101         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3102 }
3103
3104 int
3105 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3106                           struct rte_eth_rss_conf *rss_conf)
3107 {
3108         struct ixgbe_hw *hw;
3109         uint32_t mrqc;
3110         uint64_t rss_hf;
3111         uint32_t mrqc_reg;
3112
3113         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3114
3115         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3116                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3117                         "NIC.");
3118                 return -ENOTSUP;
3119         }
3120         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3121
3122         /*
3123          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3124          *     "RSS enabling cannot be done dynamically while it must be
3125          *      preceded by a software reset"
3126          * Before changing anything, first check that the update RSS operation
3127          * does not attempt to disable RSS, if RSS was enabled at
3128          * initialization time, or does not attempt to enable RSS, if RSS was
3129          * disabled at initialization time.
3130          */
3131         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3132         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3133         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3134                 if (rss_hf != 0) /* Enable RSS */
3135                         return -(EINVAL);
3136                 return 0; /* Nothing to do */
3137         }
3138         /* RSS enabled */
3139         if (rss_hf == 0) /* Disable RSS */
3140                 return -(EINVAL);
3141         ixgbe_hw_rss_hash_set(hw, rss_conf);
3142         return 0;
3143 }
3144
3145 int
3146 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3147                             struct rte_eth_rss_conf *rss_conf)
3148 {
3149         struct ixgbe_hw *hw;
3150         uint8_t *hash_key;
3151         uint32_t mrqc;
3152         uint32_t rss_key;
3153         uint64_t rss_hf;
3154         uint16_t i;
3155         uint32_t mrqc_reg;
3156         uint32_t rssrk_reg;
3157
3158         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3159         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3160         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3161         hash_key = rss_conf->rss_key;
3162         if (hash_key != NULL) {
3163                 /* Return RSS hash key */
3164                 for (i = 0; i < 10; i++) {
3165                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3166                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3167                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3168                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3169                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3170                 }
3171         }
3172
3173         /* Get RSS functions configured in MRQC register */
3174         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3175         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3176                 rss_conf->rss_hf = 0;
3177                 return 0;
3178         }
3179         rss_hf = 0;
3180         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3181                 rss_hf |= ETH_RSS_IPV4;
3182         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3183                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3184         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3185                 rss_hf |= ETH_RSS_IPV6;
3186         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3187                 rss_hf |= ETH_RSS_IPV6_EX;
3188         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3189                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3190         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3191                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3192         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3193                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3194         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3195                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3196         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3197                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3198         rss_conf->rss_hf = rss_hf;
3199         return 0;
3200 }
3201
3202 static void
3203 ixgbe_rss_configure(struct rte_eth_dev *dev)
3204 {
3205         struct rte_eth_rss_conf rss_conf;
3206         struct ixgbe_hw *hw;
3207         uint32_t reta;
3208         uint16_t i;
3209         uint16_t j;
3210         uint16_t sp_reta_size;
3211         uint32_t reta_reg;
3212
3213         PMD_INIT_FUNC_TRACE();
3214         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3215
3216         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3217
3218         /*
3219          * Fill in redirection table
3220          * The byte-swap is needed because NIC registers are in
3221          * little-endian order.
3222          */
3223         reta = 0;
3224         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3225                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3226
3227                 if (j == dev->data->nb_rx_queues)
3228                         j = 0;
3229                 reta = (reta << 8) | j;
3230                 if ((i & 3) == 3)
3231                         IXGBE_WRITE_REG(hw, reta_reg,
3232                                         rte_bswap32(reta));
3233         }
3234
3235         /*
3236          * Configure the RSS key and the RSS protocols used to compute
3237          * the RSS hash of input packets.
3238          */
3239         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3240         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3241                 ixgbe_rss_disable(dev);
3242                 return;
3243         }
3244         if (rss_conf.rss_key == NULL)
3245                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3246         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3247 }
3248
3249 #define NUM_VFTA_REGISTERS 128
3250 #define NIC_RX_BUFFER_SIZE 0x200
3251 #define X550_RX_BUFFER_SIZE 0x180
3252
3253 static void
3254 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3255 {
3256         struct rte_eth_vmdq_dcb_conf *cfg;
3257         struct ixgbe_hw *hw;
3258         enum rte_eth_nb_pools num_pools;
3259         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3260         uint16_t pbsize;
3261         uint8_t nb_tcs; /* number of traffic classes */
3262         int i;
3263
3264         PMD_INIT_FUNC_TRACE();
3265         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3266         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3267         num_pools = cfg->nb_queue_pools;
3268         /* Check we have a valid number of pools */
3269         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3270                 ixgbe_rss_disable(dev);
3271                 return;
3272         }
3273         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3274         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3275
3276         /*
3277          * RXPBSIZE
3278          * split rx buffer up into sections, each for 1 traffic class
3279          */
3280         switch (hw->mac.type) {
3281         case ixgbe_mac_X550:
3282         case ixgbe_mac_X550EM_x:
3283         case ixgbe_mac_X550EM_a:
3284                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3285                 break;
3286         default:
3287                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3288                 break;
3289         }
3290         for (i = 0; i < nb_tcs; i++) {
3291                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3292
3293                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3294                 /* clear 10 bits. */
3295                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3296                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3297         }
3298         /* zero alloc all unused TCs */
3299         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3300                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3301
3302                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3303                 /* clear 10 bits. */
3304                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3305         }
3306
3307         /* MRQC: enable vmdq and dcb */
3308         mrqc = (num_pools == ETH_16_POOLS) ?
3309                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3310         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3311
3312         /* PFVTCTL: turn on virtualisation and set the default pool */
3313         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3314         if (cfg->enable_default_pool) {
3315                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3316         } else {
3317                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3318         }
3319
3320         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3321
3322         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3323         queue_mapping = 0;
3324         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3325                 /*
3326                  * mapping is done with 3 bits per priority,
3327                  * so shift by i*3 each time
3328                  */
3329                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3330
3331         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3332
3333         /* RTRPCS: DCB related */
3334         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3335
3336         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3337         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3338         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3339         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3340
3341         /* VFTA - enable all vlan filters */
3342         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3343                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3344         }
3345
3346         /* VFRE: pool enabling for receive - 16 or 32 */
3347         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3348                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3349
3350         /*
3351          * MPSAR - allow pools to read specific mac addresses
3352          * In this case, all pools should be able to read from mac addr 0
3353          */
3354         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3355         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3356
3357         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3358         for (i = 0; i < cfg->nb_pool_maps; i++) {
3359                 /* set vlan id in VF register and set the valid bit */
3360                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3361                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3362                 /*
3363                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3364                  * pools, we only need to use the first half of the register
3365                  * i.e. bits 0-31
3366                  */
3367                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3368         }
3369 }
3370
3371 /**
3372  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3373  * @dev: pointer to eth_dev structure
3374  * @dcb_config: pointer to ixgbe_dcb_config structure
3375  */
3376 static void
3377 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3378                        struct ixgbe_dcb_config *dcb_config)
3379 {
3380         uint32_t reg;
3381         uint32_t q;
3382         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3383
3384         PMD_INIT_FUNC_TRACE();
3385         if (hw->mac.type != ixgbe_mac_82598EB) {
3386                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3387                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3388                 reg |= IXGBE_RTTDCS_ARBDIS;
3389                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3390
3391                 /* Enable DCB for Tx with 8 TCs */
3392                 if (dcb_config->num_tcs.pg_tcs == 8) {
3393                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3394                 } else {
3395                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3396                 }
3397                 if (dcb_config->vt_mode)
3398                         reg |= IXGBE_MTQC_VT_ENA;
3399                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3400
3401                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3402                         /* Disable drop for all queues in VMDQ mode*/
3403                         for (q = 0; q < 128; q++)
3404                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3405                                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3406                 } else {
3407                         /* Enable drop for all queues in SRIOV mode */
3408                         for (q = 0; q < 128; q++)
3409                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3410                                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT) | IXGBE_QDE_ENABLE));
3411                 }
3412
3413                 /* Enable the Tx desc arbiter */
3414                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3415                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3416                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3417
3418                 /* Enable Security TX Buffer IFG for DCB */
3419                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3420                 reg |= IXGBE_SECTX_DCB;
3421                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3422         }
3423 }
3424
3425 /**
3426  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3427  * @dev: pointer to rte_eth_dev structure
3428  * @dcb_config: pointer to ixgbe_dcb_config structure
3429  */
3430 static void
3431 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3432                         struct ixgbe_dcb_config *dcb_config)
3433 {
3434         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3435                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3436         struct ixgbe_hw *hw =
3437                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3438
3439         PMD_INIT_FUNC_TRACE();
3440         if (hw->mac.type != ixgbe_mac_82598EB)
3441                 /*PF VF Transmit Enable*/
3442                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3443                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3444
3445         /*Configure general DCB TX parameters*/
3446         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3447 }
3448
3449 static void
3450 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3451                         struct ixgbe_dcb_config *dcb_config)
3452 {
3453         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3454                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3455         struct ixgbe_dcb_tc_config *tc;
3456         uint8_t i, j;
3457
3458         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3459         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3460                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3461                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3462         } else {
3463                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3464                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3465         }
3466         /* User Priority to Traffic Class mapping */
3467         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3468                 j = vmdq_rx_conf->dcb_tc[i];
3469                 tc = &dcb_config->tc_config[j];
3470                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3471                                                 (uint8_t)(1 << j);
3472         }
3473 }
3474
3475 static void
3476 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3477                         struct ixgbe_dcb_config *dcb_config)
3478 {
3479         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3480                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3481         struct ixgbe_dcb_tc_config *tc;
3482         uint8_t i, j;
3483
3484         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3485         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3486                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3487                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3488         } else {
3489                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3490                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3491         }
3492
3493         /* User Priority to Traffic Class mapping */
3494         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3495                 j = vmdq_tx_conf->dcb_tc[i];
3496                 tc = &dcb_config->tc_config[j];
3497                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3498                                                 (uint8_t)(1 << j);
3499         }
3500 }
3501
3502 static void
3503 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3504                 struct ixgbe_dcb_config *dcb_config)
3505 {
3506         struct rte_eth_dcb_rx_conf *rx_conf =
3507                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3508         struct ixgbe_dcb_tc_config *tc;
3509         uint8_t i, j;
3510
3511         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3512         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3513
3514         /* User Priority to Traffic Class mapping */
3515         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3516                 j = rx_conf->dcb_tc[i];
3517                 tc = &dcb_config->tc_config[j];
3518                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3519                                                 (uint8_t)(1 << j);
3520         }
3521 }
3522
3523 static void
3524 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3525                 struct ixgbe_dcb_config *dcb_config)
3526 {
3527         struct rte_eth_dcb_tx_conf *tx_conf =
3528                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3529         struct ixgbe_dcb_tc_config *tc;
3530         uint8_t i, j;
3531
3532         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3533         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3534
3535         /* User Priority to Traffic Class mapping */
3536         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3537                 j = tx_conf->dcb_tc[i];
3538                 tc = &dcb_config->tc_config[j];
3539                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3540                                                 (uint8_t)(1 << j);
3541         }
3542 }
3543
3544 /**
3545  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3546  * @hw: pointer to hardware structure
3547  * @dcb_config: pointer to ixgbe_dcb_config structure
3548  */
3549 static void
3550 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3551                struct ixgbe_dcb_config *dcb_config)
3552 {
3553         uint32_t reg;
3554         uint32_t vlanctrl;
3555         uint8_t i;
3556
3557         PMD_INIT_FUNC_TRACE();
3558         /*
3559          * Disable the arbiter before changing parameters
3560          * (always enable recycle mode; WSP)
3561          */
3562         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3563         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3564
3565         if (hw->mac.type != ixgbe_mac_82598EB) {
3566                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3567                 if (dcb_config->num_tcs.pg_tcs == 4) {
3568                         if (dcb_config->vt_mode)
3569                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3570                                         IXGBE_MRQC_VMDQRT4TCEN;
3571                         else {
3572                                 /* no matter the mode is DCB or DCB_RSS, just
3573                                  * set the MRQE to RSSXTCEN. RSS is controlled
3574                                  * by RSS_FIELD
3575                                  */
3576                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3577                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3578                                         IXGBE_MRQC_RTRSS4TCEN;
3579                         }
3580                 }
3581                 if (dcb_config->num_tcs.pg_tcs == 8) {
3582                         if (dcb_config->vt_mode)
3583                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3584                                         IXGBE_MRQC_VMDQRT8TCEN;
3585                         else {
3586                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3587                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3588                                         IXGBE_MRQC_RTRSS8TCEN;
3589                         }
3590                 }
3591
3592                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3593         }
3594
3595         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3596         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3597         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3598         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3599
3600         /* VFTA - enable all vlan filters */
3601         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3602                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3603         }
3604
3605         /*
3606          * Configure Rx packet plane (recycle mode; WSP) and
3607          * enable arbiter
3608          */
3609         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3610         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3611 }
3612
3613 static void
3614 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3615                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3616 {
3617         switch (hw->mac.type) {
3618         case ixgbe_mac_82598EB:
3619                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3620                 break;
3621         case ixgbe_mac_82599EB:
3622         case ixgbe_mac_X540:
3623         case ixgbe_mac_X550:
3624         case ixgbe_mac_X550EM_x:
3625         case ixgbe_mac_X550EM_a:
3626                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3627                                                   tsa, map);
3628                 break;
3629         default:
3630                 break;
3631         }
3632 }
3633
3634 static void
3635 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3636                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3637 {
3638         switch (hw->mac.type) {
3639         case ixgbe_mac_82598EB:
3640                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3641                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3642                 break;
3643         case ixgbe_mac_82599EB:
3644         case ixgbe_mac_X540:
3645         case ixgbe_mac_X550:
3646         case ixgbe_mac_X550EM_x:
3647         case ixgbe_mac_X550EM_a:
3648                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3649                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3650                 break;
3651         default:
3652                 break;
3653         }
3654 }
3655
3656 #define DCB_RX_CONFIG  1
3657 #define DCB_TX_CONFIG  1
3658 #define DCB_TX_PB      1024
3659 /**
3660  * ixgbe_dcb_hw_configure - Enable DCB and configure
3661  * general DCB in VT mode and non-VT mode parameters
3662  * @dev: pointer to rte_eth_dev structure
3663  * @dcb_config: pointer to ixgbe_dcb_config structure
3664  */
3665 static int
3666 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3667                         struct ixgbe_dcb_config *dcb_config)
3668 {
3669         int     ret = 0;
3670         uint8_t i, pfc_en, nb_tcs;
3671         uint16_t pbsize, rx_buffer_size;
3672         uint8_t config_dcb_rx = 0;
3673         uint8_t config_dcb_tx = 0;
3674         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3675         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3676         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3677         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3678         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3679         struct ixgbe_dcb_tc_config *tc;
3680         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3681         struct ixgbe_hw *hw =
3682                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3683
3684         switch (dev->data->dev_conf.rxmode.mq_mode) {
3685         case ETH_MQ_RX_VMDQ_DCB:
3686                 dcb_config->vt_mode = true;
3687                 if (hw->mac.type != ixgbe_mac_82598EB) {
3688                         config_dcb_rx = DCB_RX_CONFIG;
3689                         /*
3690                          *get dcb and VT rx configuration parameters
3691                          *from rte_eth_conf
3692                          */
3693                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3694                         /*Configure general VMDQ and DCB RX parameters*/
3695                         ixgbe_vmdq_dcb_configure(dev);
3696                 }
3697                 break;
3698         case ETH_MQ_RX_DCB:
3699         case ETH_MQ_RX_DCB_RSS:
3700                 dcb_config->vt_mode = false;
3701                 config_dcb_rx = DCB_RX_CONFIG;
3702                 /* Get dcb TX configuration parameters from rte_eth_conf */
3703                 ixgbe_dcb_rx_config(dev, dcb_config);
3704                 /*Configure general DCB RX parameters*/
3705                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3706                 break;
3707         default:
3708                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3709                 break;
3710         }
3711         switch (dev->data->dev_conf.txmode.mq_mode) {
3712         case ETH_MQ_TX_VMDQ_DCB:
3713                 dcb_config->vt_mode = true;
3714                 config_dcb_tx = DCB_TX_CONFIG;
3715                 /* get DCB and VT TX configuration parameters
3716                  * from rte_eth_conf
3717                  */
3718                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3719                 /*Configure general VMDQ and DCB TX parameters*/
3720                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3721                 break;
3722
3723         case ETH_MQ_TX_DCB:
3724                 dcb_config->vt_mode = false;
3725                 config_dcb_tx = DCB_TX_CONFIG;
3726                 /*get DCB TX configuration parameters from rte_eth_conf*/
3727                 ixgbe_dcb_tx_config(dev, dcb_config);
3728                 /*Configure general DCB TX parameters*/
3729                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3730                 break;
3731         default:
3732                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3733                 break;
3734         }
3735
3736         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3737         /* Unpack map */
3738         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3739         if (nb_tcs == ETH_4_TCS) {
3740                 /* Avoid un-configured priority mapping to TC0 */
3741                 uint8_t j = 4;
3742                 uint8_t mask = 0xFF;
3743
3744                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3745                         mask = (uint8_t)(mask & (~(1 << map[i])));
3746                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3747                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3748                                 map[j++] = i;
3749                         mask >>= 1;
3750                 }
3751                 /* Re-configure 4 TCs BW */
3752                 for (i = 0; i < nb_tcs; i++) {
3753                         tc = &dcb_config->tc_config[i];
3754                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3755                                                 (uint8_t)(100 / nb_tcs);
3756                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3757                                                 (uint8_t)(100 / nb_tcs);
3758                 }
3759                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3760                         tc = &dcb_config->tc_config[i];
3761                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3762                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3763                 }
3764         }
3765
3766         switch (hw->mac.type) {
3767         case ixgbe_mac_X550:
3768         case ixgbe_mac_X550EM_x:
3769         case ixgbe_mac_X550EM_a:
3770                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3771                 break;
3772         default:
3773                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3774                 break;
3775         }
3776
3777         if (config_dcb_rx) {
3778                 /* Set RX buffer size */
3779                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3780                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3781
3782                 for (i = 0; i < nb_tcs; i++) {
3783                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3784                 }
3785                 /* zero alloc all unused TCs */
3786                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3787                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3788                 }
3789         }
3790         if (config_dcb_tx) {
3791                 /* Only support an equally distributed
3792                  *  Tx packet buffer strategy.
3793                  */
3794                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3795                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3796
3797                 for (i = 0; i < nb_tcs; i++) {
3798                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3799                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3800                 }
3801                 /* Clear unused TCs, if any, to zero buffer size*/
3802                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3803                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3804                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3805                 }
3806         }
3807
3808         /*Calculates traffic class credits*/
3809         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3810                                 IXGBE_DCB_TX_CONFIG);
3811         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3812                                 IXGBE_DCB_RX_CONFIG);
3813
3814         if (config_dcb_rx) {
3815                 /* Unpack CEE standard containers */
3816                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3817                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3818                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3819                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3820                 /* Configure PG(ETS) RX */
3821                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3822         }
3823
3824         if (config_dcb_tx) {
3825                 /* Unpack CEE standard containers */
3826                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3827                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3828                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3829                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3830                 /* Configure PG(ETS) TX */
3831                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3832         }
3833
3834         /*Configure queue statistics registers*/
3835         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3836
3837         /* Check if the PFC is supported */
3838         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3839                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3840                 for (i = 0; i < nb_tcs; i++) {
3841                         /*
3842                         * If the TC count is 8,and the default high_water is 48,
3843                         * the low_water is 16 as default.
3844                         */
3845                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3846                         hw->fc.low_water[i] = pbsize / 4;
3847                         /* Enable pfc for this TC */
3848                         tc = &dcb_config->tc_config[i];
3849                         tc->pfc = ixgbe_dcb_pfc_enabled;
3850                 }
3851                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3852                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3853                         pfc_en &= 0x0F;
3854                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3855         }
3856
3857         return ret;
3858 }
3859
3860 /**
3861  * ixgbe_configure_dcb - Configure DCB  Hardware
3862  * @dev: pointer to rte_eth_dev
3863  */
3864 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3865 {
3866         struct ixgbe_dcb_config *dcb_cfg =
3867                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3868         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3869
3870         PMD_INIT_FUNC_TRACE();
3871
3872         /* check support mq_mode for DCB */
3873         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3874             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3875             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3876                 return;
3877
3878         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3879                 return;
3880
3881         /** Configure DCB hardware **/
3882         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3883 }
3884
3885 /*
3886  * VMDq only support for 10 GbE NIC.
3887  */
3888 static void
3889 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3890 {
3891         struct rte_eth_vmdq_rx_conf *cfg;
3892         struct ixgbe_hw *hw;
3893         enum rte_eth_nb_pools num_pools;
3894         uint32_t mrqc, vt_ctl, vlanctrl;
3895         uint32_t vmolr = 0;
3896         int i;
3897
3898         PMD_INIT_FUNC_TRACE();
3899         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3900         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3901         num_pools = cfg->nb_queue_pools;
3902
3903         ixgbe_rss_disable(dev);
3904
3905         /* MRQC: enable vmdq */
3906         mrqc = IXGBE_MRQC_VMDQEN;
3907         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3908
3909         /* PFVTCTL: turn on virtualisation and set the default pool */
3910         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3911         if (cfg->enable_default_pool)
3912                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3913         else
3914                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3915
3916         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3917
3918         for (i = 0; i < (int)num_pools; i++) {
3919                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3920                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3921         }
3922
3923         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3924         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3925         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3926         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3927
3928         /* VFTA - enable all vlan filters */
3929         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3930                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3931
3932         /* VFRE: pool enabling for receive - 64 */
3933         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3934         if (num_pools == ETH_64_POOLS)
3935                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3936
3937         /*
3938          * MPSAR - allow pools to read specific mac addresses
3939          * In this case, all pools should be able to read from mac addr 0
3940          */
3941         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3942         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3943
3944         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3945         for (i = 0; i < cfg->nb_pool_maps; i++) {
3946                 /* set vlan id in VF register and set the valid bit */
3947                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3948                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3949                 /*
3950                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3951                  * pools, we only need to use the first half of the register
3952                  * i.e. bits 0-31
3953                  */
3954                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3955                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
3956                                         (cfg->pool_map[i].pools & UINT32_MAX));
3957                 else
3958                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
3959                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3960
3961         }
3962
3963         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3964         if (cfg->enable_loop_back) {
3965                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3966                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3967                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3968         }
3969
3970         IXGBE_WRITE_FLUSH(hw);
3971 }
3972
3973 /*
3974  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3975  * @hw: pointer to hardware structure
3976  */
3977 static void
3978 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3979 {
3980         uint32_t reg;
3981         uint32_t q;
3982
3983         PMD_INIT_FUNC_TRACE();
3984         /*PF VF Transmit Enable*/
3985         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3986         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3987
3988         /* Disable the Tx desc arbiter so that MTQC can be changed */
3989         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3990         reg |= IXGBE_RTTDCS_ARBDIS;
3991         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3992
3993         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3994         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3995
3996         /* Disable drop for all queues */
3997         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3998                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3999                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4000
4001         /* Enable the Tx desc arbiter */
4002         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4003         reg &= ~IXGBE_RTTDCS_ARBDIS;
4004         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4005
4006         IXGBE_WRITE_FLUSH(hw);
4007 }
4008
4009 static int __attribute__((cold))
4010 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4011 {
4012         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4013         uint64_t dma_addr;
4014         unsigned int i;
4015
4016         /* Initialize software ring entries */
4017         for (i = 0; i < rxq->nb_rx_desc; i++) {
4018                 volatile union ixgbe_adv_rx_desc *rxd;
4019                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4020
4021                 if (mbuf == NULL) {
4022                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4023                                      (unsigned) rxq->queue_id);
4024                         return -ENOMEM;
4025                 }
4026
4027                 rte_mbuf_refcnt_set(mbuf, 1);
4028                 mbuf->next = NULL;
4029                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4030                 mbuf->nb_segs = 1;
4031                 mbuf->port = rxq->port_id;
4032
4033                 dma_addr =
4034                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
4035                 rxd = &rxq->rx_ring[i];
4036                 rxd->read.hdr_addr = 0;
4037                 rxd->read.pkt_addr = dma_addr;
4038                 rxe[i].mbuf = mbuf;
4039         }
4040
4041         return 0;
4042 }
4043
4044 static int
4045 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4046 {
4047         struct ixgbe_hw *hw;
4048         uint32_t mrqc;
4049
4050         ixgbe_rss_configure(dev);
4051
4052         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4053
4054         /* MRQC: enable VF RSS */
4055         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4056         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4057         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4058         case ETH_64_POOLS:
4059                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4060                 break;
4061
4062         case ETH_32_POOLS:
4063                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4064                 break;
4065
4066         default:
4067                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4068                 return -EINVAL;
4069         }
4070
4071         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4072
4073         return 0;
4074 }
4075
4076 static int
4077 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4078 {
4079         struct ixgbe_hw *hw =
4080                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4081
4082         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4083         case ETH_64_POOLS:
4084                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4085                         IXGBE_MRQC_VMDQEN);
4086                 break;
4087
4088         case ETH_32_POOLS:
4089                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4090                         IXGBE_MRQC_VMDQRT4TCEN);
4091                 break;
4092
4093         case ETH_16_POOLS:
4094                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4095                         IXGBE_MRQC_VMDQRT8TCEN);
4096                 break;
4097         default:
4098                 PMD_INIT_LOG(ERR,
4099                         "invalid pool number in IOV mode");
4100                 break;
4101         }
4102         return 0;
4103 }
4104
4105 static int
4106 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4107 {
4108         struct ixgbe_hw *hw =
4109                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4110
4111         if (hw->mac.type == ixgbe_mac_82598EB)
4112                 return 0;
4113
4114         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4115                 /*
4116                  * SRIOV inactive scheme
4117                  * any DCB/RSS w/o VMDq multi-queue setting
4118                  */
4119                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4120                 case ETH_MQ_RX_RSS:
4121                 case ETH_MQ_RX_DCB_RSS:
4122                 case ETH_MQ_RX_VMDQ_RSS:
4123                         ixgbe_rss_configure(dev);
4124                         break;
4125
4126                 case ETH_MQ_RX_VMDQ_DCB:
4127                         ixgbe_vmdq_dcb_configure(dev);
4128                         break;
4129
4130                 case ETH_MQ_RX_VMDQ_ONLY:
4131                         ixgbe_vmdq_rx_hw_configure(dev);
4132                         break;
4133
4134                 case ETH_MQ_RX_NONE:
4135                 default:
4136                         /* if mq_mode is none, disable rss mode.*/
4137                         ixgbe_rss_disable(dev);
4138                         break;
4139                 }
4140         } else {
4141                 /*
4142                  * SRIOV active scheme
4143                  * Support RSS together with VMDq & SRIOV
4144                  */
4145                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4146                 case ETH_MQ_RX_RSS:
4147                 case ETH_MQ_RX_VMDQ_RSS:
4148                         ixgbe_config_vf_rss(dev);
4149                         break;
4150                 case ETH_MQ_RX_VMDQ_DCB:
4151                         ixgbe_vmdq_dcb_configure(dev);
4152                         break;
4153                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
4154                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4155                         PMD_INIT_LOG(ERR,
4156                                 "Could not support DCB/RSS with VMDq & SRIOV");
4157                         return -1;
4158                 default:
4159                         ixgbe_config_vf_default(dev);
4160                         break;
4161                 }
4162         }
4163
4164         return 0;
4165 }
4166
4167 static int
4168 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4169 {
4170         struct ixgbe_hw *hw =
4171                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4172         uint32_t mtqc;
4173         uint32_t rttdcs;
4174
4175         if (hw->mac.type == ixgbe_mac_82598EB)
4176                 return 0;
4177
4178         /* disable arbiter before setting MTQC */
4179         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4180         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4181         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4182
4183         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4184                 /*
4185                  * SRIOV inactive scheme
4186                  * any DCB w/o VMDq multi-queue setting
4187                  */
4188                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4189                         ixgbe_vmdq_tx_hw_configure(hw);
4190                 else {
4191                         mtqc = IXGBE_MTQC_64Q_1PB;
4192                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4193                 }
4194         } else {
4195                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4196
4197                 /*
4198                  * SRIOV active scheme
4199                  * FIXME if support DCB together with VMDq & SRIOV
4200                  */
4201                 case ETH_64_POOLS:
4202                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4203                         break;
4204                 case ETH_32_POOLS:
4205                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4206                         break;
4207                 case ETH_16_POOLS:
4208                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4209                                 IXGBE_MTQC_8TC_8TQ;
4210                         break;
4211                 default:
4212                         mtqc = IXGBE_MTQC_64Q_1PB;
4213                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4214                 }
4215                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4216         }
4217
4218         /* re-enable arbiter */
4219         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4220         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4221
4222         return 0;
4223 }
4224
4225 /**
4226  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4227  *
4228  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4229  * spec rev. 3.0 chapter 8.2.3.8.13.
4230  *
4231  * @pool Memory pool of the Rx queue
4232  */
4233 static inline uint32_t
4234 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4235 {
4236         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4237
4238         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4239         uint16_t maxdesc =
4240                 IPV4_MAX_PKT_LEN /
4241                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4242
4243         if (maxdesc >= 16)
4244                 return IXGBE_RSCCTL_MAXDESC_16;
4245         else if (maxdesc >= 8)
4246                 return IXGBE_RSCCTL_MAXDESC_8;
4247         else if (maxdesc >= 4)
4248                 return IXGBE_RSCCTL_MAXDESC_4;
4249         else
4250                 return IXGBE_RSCCTL_MAXDESC_1;
4251 }
4252
4253 /**
4254  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4255  * interrupt
4256  *
4257  * (Taken from FreeBSD tree)
4258  * (yes this is all very magic and confusing :)
4259  *
4260  * @dev port handle
4261  * @entry the register array entry
4262  * @vector the MSIX vector for this queue
4263  * @type RX/TX/MISC
4264  */
4265 static void
4266 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4267 {
4268         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4269         u32 ivar, index;
4270
4271         vector |= IXGBE_IVAR_ALLOC_VAL;
4272
4273         switch (hw->mac.type) {
4274
4275         case ixgbe_mac_82598EB:
4276                 if (type == -1)
4277                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4278                 else
4279                         entry += (type * 64);
4280                 index = (entry >> 2) & 0x1F;
4281                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4282                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4283                 ivar |= (vector << (8 * (entry & 0x3)));
4284                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4285                 break;
4286
4287         case ixgbe_mac_82599EB:
4288         case ixgbe_mac_X540:
4289                 if (type == -1) { /* MISC IVAR */
4290                         index = (entry & 1) * 8;
4291                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4292                         ivar &= ~(0xFF << index);
4293                         ivar |= (vector << index);
4294                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4295                 } else {        /* RX/TX IVARS */
4296                         index = (16 * (entry & 1)) + (8 * type);
4297                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4298                         ivar &= ~(0xFF << index);
4299                         ivar |= (vector << index);
4300                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4301                 }
4302
4303                 break;
4304
4305         default:
4306                 break;
4307         }
4308 }
4309
4310 void __attribute__((cold))
4311 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4312 {
4313         uint16_t i, rx_using_sse;
4314         struct ixgbe_adapter *adapter =
4315                 (struct ixgbe_adapter *)dev->data->dev_private;
4316
4317         /*
4318          * In order to allow Vector Rx there are a few configuration
4319          * conditions to be met and Rx Bulk Allocation should be allowed.
4320          */
4321         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4322             !adapter->rx_bulk_alloc_allowed) {
4323                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4324                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4325                                     "not enabled",
4326                              dev->data->port_id);
4327
4328                 adapter->rx_vec_allowed = false;
4329         }
4330
4331         /*
4332          * Initialize the appropriate LRO callback.
4333          *
4334          * If all queues satisfy the bulk allocation preconditions
4335          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4336          * Otherwise use a single allocation version.
4337          */
4338         if (dev->data->lro) {
4339                 if (adapter->rx_bulk_alloc_allowed) {
4340                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4341                                            "allocation version");
4342                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4343                 } else {
4344                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4345                                            "allocation version");
4346                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4347                 }
4348         } else if (dev->data->scattered_rx) {
4349                 /*
4350                  * Set the non-LRO scattered callback: there are Vector and
4351                  * single allocation versions.
4352                  */
4353                 if (adapter->rx_vec_allowed) {
4354                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4355                                             "callback (port=%d).",
4356                                      dev->data->port_id);
4357
4358                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4359                 } else if (adapter->rx_bulk_alloc_allowed) {
4360                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4361                                            "allocation callback (port=%d).",
4362                                      dev->data->port_id);
4363                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4364                 } else {
4365                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4366                                             "single allocation) "
4367                                             "Scattered Rx callback "
4368                                             "(port=%d).",
4369                                      dev->data->port_id);
4370
4371                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4372                 }
4373         /*
4374          * Below we set "simple" callbacks according to port/queues parameters.
4375          * If parameters allow we are going to choose between the following
4376          * callbacks:
4377          *    - Vector
4378          *    - Bulk Allocation
4379          *    - Single buffer allocation (the simplest one)
4380          */
4381         } else if (adapter->rx_vec_allowed) {
4382                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4383                                     "burst size no less than %d (port=%d).",
4384                              RTE_IXGBE_DESCS_PER_LOOP,
4385                              dev->data->port_id);
4386
4387                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4388         } else if (adapter->rx_bulk_alloc_allowed) {
4389                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4390                                     "satisfied. Rx Burst Bulk Alloc function "
4391                                     "will be used on port=%d.",
4392                              dev->data->port_id);
4393
4394                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4395         } else {
4396                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4397                                     "satisfied, or Scattered Rx is requested "
4398                                     "(port=%d).",
4399                              dev->data->port_id);
4400
4401                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4402         }
4403
4404         /* Propagate information about RX function choice through all queues. */
4405
4406         rx_using_sse =
4407                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4408                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4409
4410         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4411                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4412
4413                 rxq->rx_using_sse = rx_using_sse;
4414         }
4415 }
4416
4417 /**
4418  * ixgbe_set_rsc - configure RSC related port HW registers
4419  *
4420  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4421  * of 82599 Spec (x540 configuration is virtually the same).
4422  *
4423  * @dev port handle
4424  *
4425  * Returns 0 in case of success or a non-zero error code
4426  */
4427 static int
4428 ixgbe_set_rsc(struct rte_eth_dev *dev)
4429 {
4430         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4431         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4432         struct rte_eth_dev_info dev_info = { 0 };
4433         bool rsc_capable = false;
4434         uint16_t i;
4435         uint32_t rdrxctl;
4436
4437         /* Sanity check */
4438         dev->dev_ops->dev_infos_get(dev, &dev_info);
4439         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4440                 rsc_capable = true;
4441
4442         if (!rsc_capable && rx_conf->enable_lro) {
4443                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4444                                    "support it");
4445                 return -EINVAL;
4446         }
4447
4448         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4449
4450         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4451                 /*
4452                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4453                  * 3.0 RSC configuration requires HW CRC stripping being
4454                  * enabled. If user requested both HW CRC stripping off
4455                  * and RSC on - return an error.
4456                  */
4457                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4458                                     "is disabled");
4459                 return -EINVAL;
4460         }
4461
4462         /* RFCTL configuration  */
4463         if (rsc_capable) {
4464                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4465
4466                 if (rx_conf->enable_lro)
4467                         /*
4468                          * Since NFS packets coalescing is not supported - clear
4469                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4470                          * enabled.
4471                          */
4472                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4473                                    IXGBE_RFCTL_NFSR_DIS);
4474                 else
4475                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4476
4477                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4478         }
4479
4480         /* If LRO hasn't been requested - we are done here. */
4481         if (!rx_conf->enable_lro)
4482                 return 0;
4483
4484         /* Set RDRXCTL.RSCACKC bit */
4485         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4486         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4487         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4488
4489         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4490         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4491                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4492                 uint32_t srrctl =
4493                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4494                 uint32_t rscctl =
4495                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4496                 uint32_t psrtype =
4497                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4498                 uint32_t eitr =
4499                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4500
4501                 /*
4502                  * ixgbe PMD doesn't support header-split at the moment.
4503                  *
4504                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4505                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4506                  * should be configured even if header split is not
4507                  * enabled. We will configure it 128 bytes following the
4508                  * recommendation in the spec.
4509                  */
4510                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4511                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4512                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4513
4514                 /*
4515                  * TODO: Consider setting the Receive Descriptor Minimum
4516                  * Threshold Size for an RSC case. This is not an obviously
4517                  * beneficiary option but the one worth considering...
4518                  */
4519
4520                 rscctl |= IXGBE_RSCCTL_RSCEN;
4521                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4522                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4523
4524                 /*
4525                  * RSC: Set ITR interval corresponding to 2K ints/s.
4526                  *
4527                  * Full-sized RSC aggregations for a 10Gb/s link will
4528                  * arrive at about 20K aggregation/s rate.
4529                  *
4530                  * 2K inst/s rate will make only 10% of the
4531                  * aggregations to be closed due to the interrupt timer
4532                  * expiration for a streaming at wire-speed case.
4533                  *
4534                  * For a sparse streaming case this setting will yield
4535                  * at most 500us latency for a single RSC aggregation.
4536                  */
4537                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4538                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4539
4540                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4541                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4542                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4543                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4544
4545                 /*
4546                  * RSC requires the mapping of the queue to the
4547                  * interrupt vector.
4548                  */
4549                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4550         }
4551
4552         dev->data->lro = 1;
4553
4554         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4555
4556         return 0;
4557 }
4558
4559 /*
4560  * Initializes Receive Unit.
4561  */
4562 int __attribute__((cold))
4563 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4564 {
4565         struct ixgbe_hw     *hw;
4566         struct ixgbe_rx_queue *rxq;
4567         uint64_t bus_addr;
4568         uint32_t rxctrl;
4569         uint32_t fctrl;
4570         uint32_t hlreg0;
4571         uint32_t maxfrs;
4572         uint32_t srrctl;
4573         uint32_t rdrxctl;
4574         uint32_t rxcsum;
4575         uint16_t buf_size;
4576         uint16_t i;
4577         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4578         int rc;
4579
4580         PMD_INIT_FUNC_TRACE();
4581         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4582
4583         /*
4584          * Make sure receives are disabled while setting
4585          * up the RX context (registers, descriptor rings, etc.).
4586          */
4587         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4588         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4589
4590         /* Enable receipt of broadcasted frames */
4591         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4592         fctrl |= IXGBE_FCTRL_BAM;
4593         fctrl |= IXGBE_FCTRL_DPF;
4594         fctrl |= IXGBE_FCTRL_PMCF;
4595         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4596
4597         /*
4598          * Configure CRC stripping, if any.
4599          */
4600         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4601         if (rx_conf->hw_strip_crc)
4602                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4603         else
4604                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4605
4606         /*
4607          * Configure jumbo frame support, if any.
4608          */
4609         if (rx_conf->jumbo_frame == 1) {
4610                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4611                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4612                 maxfrs &= 0x0000FFFF;
4613                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4614                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4615         } else
4616                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4617
4618         /*
4619          * If loopback mode is configured for 82599, set LPBK bit.
4620          */
4621         if (hw->mac.type == ixgbe_mac_82599EB &&
4622                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4623                 hlreg0 |= IXGBE_HLREG0_LPBK;
4624         else
4625                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4626
4627         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4628
4629         /* Setup RX queues */
4630         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4631                 rxq = dev->data->rx_queues[i];
4632
4633                 /*
4634                  * Reset crc_len in case it was changed after queue setup by a
4635                  * call to configure.
4636                  */
4637                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4638
4639                 /* Setup the Base and Length of the Rx Descriptor Rings */
4640                 bus_addr = rxq->rx_ring_phys_addr;
4641                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4642                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4643                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4644                                 (uint32_t)(bus_addr >> 32));
4645                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4646                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4647                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4648                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4649
4650                 /* Configure the SRRCTL register */
4651 #ifdef RTE_HEADER_SPLIT_ENABLE
4652                 /*
4653                  * Configure Header Split
4654                  */
4655                 if (rx_conf->header_split) {
4656                         if (hw->mac.type == ixgbe_mac_82599EB) {
4657                                 /* Must setup the PSRTYPE register */
4658                                 uint32_t psrtype;
4659
4660                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4661                                         IXGBE_PSRTYPE_UDPHDR   |
4662                                         IXGBE_PSRTYPE_IPV4HDR  |
4663                                         IXGBE_PSRTYPE_IPV6HDR;
4664                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4665                         }
4666                         srrctl = ((rx_conf->split_hdr_size <<
4667                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4668                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4669                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4670                 } else
4671 #endif
4672                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4673
4674                 /* Set if packets are dropped when no descriptors available */
4675                 if (rxq->drop_en)
4676                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4677
4678                 /*
4679                  * Configure the RX buffer size in the BSIZEPACKET field of
4680                  * the SRRCTL register of the queue.
4681                  * The value is in 1 KB resolution. Valid values can be from
4682                  * 1 KB to 16 KB.
4683                  */
4684                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4685                         RTE_PKTMBUF_HEADROOM);
4686                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4687                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4688
4689                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4690
4691                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4692                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4693
4694                 /* It adds dual VLAN length for supporting dual VLAN */
4695                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4696                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4697                         dev->data->scattered_rx = 1;
4698         }
4699
4700         if (rx_conf->enable_scatter)
4701                 dev->data->scattered_rx = 1;
4702
4703         /*
4704          * Device configured with multiple RX queues.
4705          */
4706         ixgbe_dev_mq_rx_configure(dev);
4707
4708         /*
4709          * Setup the Checksum Register.
4710          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4711          * Enable IP/L4 checkum computation by hardware if requested to do so.
4712          */
4713         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4714         rxcsum |= IXGBE_RXCSUM_PCSD;
4715         if (rx_conf->hw_ip_checksum)
4716                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4717         else
4718                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4719
4720         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4721
4722         if (hw->mac.type == ixgbe_mac_82599EB ||
4723             hw->mac.type == ixgbe_mac_X540) {
4724                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4725                 if (rx_conf->hw_strip_crc)
4726                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4727                 else
4728                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4729                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4730                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4731         }
4732
4733         rc = ixgbe_set_rsc(dev);
4734         if (rc)
4735                 return rc;
4736
4737         ixgbe_set_rx_function(dev);
4738
4739         return 0;
4740 }
4741
4742 /*
4743  * Initializes Transmit Unit.
4744  */
4745 void __attribute__((cold))
4746 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4747 {
4748         struct ixgbe_hw     *hw;
4749         struct ixgbe_tx_queue *txq;
4750         uint64_t bus_addr;
4751         uint32_t hlreg0;
4752         uint32_t txctrl;
4753         uint16_t i;
4754
4755         PMD_INIT_FUNC_TRACE();
4756         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4757
4758         /* Enable TX CRC (checksum offload requirement) and hw padding
4759          * (TSO requirement)
4760          */
4761         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4762         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4763         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4764
4765         /* Setup the Base and Length of the Tx Descriptor Rings */
4766         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4767                 txq = dev->data->tx_queues[i];
4768
4769                 bus_addr = txq->tx_ring_phys_addr;
4770                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4771                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4772                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4773                                 (uint32_t)(bus_addr >> 32));
4774                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4775                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4776                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4777                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4778                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4779
4780                 /*
4781                  * Disable Tx Head Writeback RO bit, since this hoses
4782                  * bookkeeping if things aren't delivered in order.
4783                  */
4784                 switch (hw->mac.type) {
4785                 case ixgbe_mac_82598EB:
4786                         txctrl = IXGBE_READ_REG(hw,
4787                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4788                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4789                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4790                                         txctrl);
4791                         break;
4792
4793                 case ixgbe_mac_82599EB:
4794                 case ixgbe_mac_X540:
4795                 case ixgbe_mac_X550:
4796                 case ixgbe_mac_X550EM_x:
4797                 case ixgbe_mac_X550EM_a:
4798                 default:
4799                         txctrl = IXGBE_READ_REG(hw,
4800                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4801                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4802                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4803                                         txctrl);
4804                         break;
4805                 }
4806         }
4807
4808         /* Device configured with multiple TX queues. */
4809         ixgbe_dev_mq_tx_configure(dev);
4810 }
4811
4812 /*
4813  * Set up link for 82599 loopback mode Tx->Rx.
4814  */
4815 static inline void __attribute__((cold))
4816 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4817 {
4818         PMD_INIT_FUNC_TRACE();
4819
4820         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4821                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4822                                 IXGBE_SUCCESS) {
4823                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4824                         /* ignore error */
4825                         return;
4826                 }
4827         }
4828
4829         /* Restart link */
4830         IXGBE_WRITE_REG(hw,
4831                         IXGBE_AUTOC,
4832                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4833         ixgbe_reset_pipeline_82599(hw);
4834
4835         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4836         msec_delay(50);
4837 }
4838
4839
4840 /*
4841  * Start Transmit and Receive Units.
4842  */
4843 int __attribute__((cold))
4844 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4845 {
4846         struct ixgbe_hw     *hw;
4847         struct ixgbe_tx_queue *txq;
4848         struct ixgbe_rx_queue *rxq;
4849         uint32_t txdctl;
4850         uint32_t dmatxctl;
4851         uint32_t rxctrl;
4852         uint16_t i;
4853         int ret = 0;
4854
4855         PMD_INIT_FUNC_TRACE();
4856         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4857
4858         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4859                 txq = dev->data->tx_queues[i];
4860                 /* Setup Transmit Threshold Registers */
4861                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4862                 txdctl |= txq->pthresh & 0x7F;
4863                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4864                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4865                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4866         }
4867
4868         if (hw->mac.type != ixgbe_mac_82598EB) {
4869                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4870                 dmatxctl |= IXGBE_DMATXCTL_TE;
4871                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4872         }
4873
4874         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4875                 txq = dev->data->tx_queues[i];
4876                 if (!txq->tx_deferred_start) {
4877                         ret = ixgbe_dev_tx_queue_start(dev, i);
4878                         if (ret < 0)
4879                                 return ret;
4880                 }
4881         }
4882
4883         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4884                 rxq = dev->data->rx_queues[i];
4885                 if (!rxq->rx_deferred_start) {
4886                         ret = ixgbe_dev_rx_queue_start(dev, i);
4887                         if (ret < 0)
4888                                 return ret;
4889                 }
4890         }
4891
4892         /* Enable Receive engine */
4893         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4894         if (hw->mac.type == ixgbe_mac_82598EB)
4895                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4896         rxctrl |= IXGBE_RXCTRL_RXEN;
4897         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4898
4899         /* If loopback mode is enabled for 82599, set up the link accordingly */
4900         if (hw->mac.type == ixgbe_mac_82599EB &&
4901                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4902                 ixgbe_setup_loopback_link_82599(hw);
4903
4904         return 0;
4905 }
4906
4907 /*
4908  * Start Receive Units for specified queue.
4909  */
4910 int __attribute__((cold))
4911 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4912 {
4913         struct ixgbe_hw     *hw;
4914         struct ixgbe_rx_queue *rxq;
4915         uint32_t rxdctl;
4916         int poll_ms;
4917
4918         PMD_INIT_FUNC_TRACE();
4919         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4920
4921         if (rx_queue_id < dev->data->nb_rx_queues) {
4922                 rxq = dev->data->rx_queues[rx_queue_id];
4923
4924                 /* Allocate buffers for descriptor rings */
4925                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4926                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4927                                      rx_queue_id);
4928                         return -1;
4929                 }
4930                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4931                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4932                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4933
4934                 /* Wait until RX Enable ready */
4935                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4936                 do {
4937                         rte_delay_ms(1);
4938                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4939                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4940                 if (!poll_ms)
4941                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4942                                      rx_queue_id);
4943                 rte_wmb();
4944                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4945                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4946                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4947         } else
4948                 return -1;
4949
4950         return 0;
4951 }
4952
4953 /*
4954  * Stop Receive Units for specified queue.
4955  */
4956 int __attribute__((cold))
4957 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4958 {
4959         struct ixgbe_hw     *hw;
4960         struct ixgbe_adapter *adapter =
4961                 (struct ixgbe_adapter *)dev->data->dev_private;
4962         struct ixgbe_rx_queue *rxq;
4963         uint32_t rxdctl;
4964         int poll_ms;
4965
4966         PMD_INIT_FUNC_TRACE();
4967         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4968
4969         if (rx_queue_id < dev->data->nb_rx_queues) {
4970                 rxq = dev->data->rx_queues[rx_queue_id];
4971
4972                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4973                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4974                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4975
4976                 /* Wait until RX Enable bit clear */
4977                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4978                 do {
4979                         rte_delay_ms(1);
4980                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4981                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
4982                 if (!poll_ms)
4983                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4984                                      rx_queue_id);
4985
4986                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4987
4988                 ixgbe_rx_queue_release_mbufs(rxq);
4989                 ixgbe_reset_rx_queue(adapter, rxq);
4990                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4991         } else
4992                 return -1;
4993
4994         return 0;
4995 }
4996
4997
4998 /*
4999  * Start Transmit Units for specified queue.
5000  */
5001 int __attribute__((cold))
5002 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5003 {
5004         struct ixgbe_hw     *hw;
5005         struct ixgbe_tx_queue *txq;
5006         uint32_t txdctl;
5007         int poll_ms;
5008
5009         PMD_INIT_FUNC_TRACE();
5010         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5011
5012         if (tx_queue_id < dev->data->nb_tx_queues) {
5013                 txq = dev->data->tx_queues[tx_queue_id];
5014                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5015                 txdctl |= IXGBE_TXDCTL_ENABLE;
5016                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5017
5018                 /* Wait until TX Enable ready */
5019                 if (hw->mac.type == ixgbe_mac_82599EB) {
5020                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5021                         do {
5022                                 rte_delay_ms(1);
5023                                 txdctl = IXGBE_READ_REG(hw,
5024                                         IXGBE_TXDCTL(txq->reg_idx));
5025                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5026                         if (!poll_ms)
5027                                 PMD_INIT_LOG(ERR, "Could not enable "
5028                                              "Tx Queue %d", tx_queue_id);
5029                 }
5030                 rte_wmb();
5031                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5032                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5033                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5034         } else
5035                 return -1;
5036
5037         return 0;
5038 }
5039
5040 /*
5041  * Stop Transmit Units for specified queue.
5042  */
5043 int __attribute__((cold))
5044 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5045 {
5046         struct ixgbe_hw     *hw;
5047         struct ixgbe_tx_queue *txq;
5048         uint32_t txdctl;
5049         uint32_t txtdh, txtdt;
5050         int poll_ms;
5051
5052         PMD_INIT_FUNC_TRACE();
5053         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5054
5055         if (tx_queue_id >= dev->data->nb_tx_queues)
5056                 return -1;
5057
5058         txq = dev->data->tx_queues[tx_queue_id];
5059
5060         /* Wait until TX queue is empty */
5061         if (hw->mac.type == ixgbe_mac_82599EB) {
5062                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5063                 do {
5064                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5065                         txtdh = IXGBE_READ_REG(hw,
5066                                                IXGBE_TDH(txq->reg_idx));
5067                         txtdt = IXGBE_READ_REG(hw,
5068                                                IXGBE_TDT(txq->reg_idx));
5069                 } while (--poll_ms && (txtdh != txtdt));
5070                 if (!poll_ms)
5071                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5072                                      "when stopping.", tx_queue_id);
5073         }
5074
5075         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5076         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5077         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5078
5079         /* Wait until TX Enable bit clear */
5080         if (hw->mac.type == ixgbe_mac_82599EB) {
5081                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5082                 do {
5083                         rte_delay_ms(1);
5084                         txdctl = IXGBE_READ_REG(hw,
5085                                                 IXGBE_TXDCTL(txq->reg_idx));
5086                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5087                 if (!poll_ms)
5088                         PMD_INIT_LOG(ERR, "Could not disable "
5089                                      "Tx Queue %d", tx_queue_id);
5090         }
5091
5092         if (txq->ops != NULL) {
5093                 txq->ops->release_mbufs(txq);
5094                 txq->ops->reset(txq);
5095         }
5096         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5097
5098         return 0;
5099 }
5100
5101 void
5102 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5103         struct rte_eth_rxq_info *qinfo)
5104 {
5105         struct ixgbe_rx_queue *rxq;
5106
5107         rxq = dev->data->rx_queues[queue_id];
5108
5109         qinfo->mp = rxq->mb_pool;
5110         qinfo->scattered_rx = dev->data->scattered_rx;
5111         qinfo->nb_desc = rxq->nb_rx_desc;
5112
5113         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5114         qinfo->conf.rx_drop_en = rxq->drop_en;
5115         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5116 }
5117
5118 void
5119 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5120         struct rte_eth_txq_info *qinfo)
5121 {
5122         struct ixgbe_tx_queue *txq;
5123
5124         txq = dev->data->tx_queues[queue_id];
5125
5126         qinfo->nb_desc = txq->nb_tx_desc;
5127
5128         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5129         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5130         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5131
5132         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5133         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5134         qinfo->conf.txq_flags = txq->txq_flags;
5135         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5136 }
5137
5138 /*
5139  * [VF] Initializes Receive Unit.
5140  */
5141 int __attribute__((cold))
5142 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5143 {
5144         struct ixgbe_hw     *hw;
5145         struct ixgbe_rx_queue *rxq;
5146         uint64_t bus_addr;
5147         uint32_t srrctl, psrtype = 0;
5148         uint16_t buf_size;
5149         uint16_t i;
5150         int ret;
5151
5152         PMD_INIT_FUNC_TRACE();
5153         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5154
5155         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5156                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5157                         "it should be power of 2");
5158                 return -1;
5159         }
5160
5161         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5162                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5163                         "it should be equal to or less than %d",
5164                         hw->mac.max_rx_queues);
5165                 return -1;
5166         }
5167
5168         /*
5169          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5170          * disables the VF receipt of packets if the PF MTU is > 1500.
5171          * This is done to deal with 82599 limitations that imposes
5172          * the PF and all VFs to share the same MTU.
5173          * Then, the PF driver enables again the VF receipt of packet when
5174          * the VF driver issues a IXGBE_VF_SET_LPE request.
5175          * In the meantime, the VF device cannot be used, even if the VF driver
5176          * and the Guest VM network stack are ready to accept packets with a
5177          * size up to the PF MTU.
5178          * As a work-around to this PF behaviour, force the call to
5179          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5180          * VF packets received can work in all cases.
5181          */
5182         ixgbevf_rlpml_set_vf(hw,
5183                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5184
5185         /* Setup RX queues */
5186         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5187                 rxq = dev->data->rx_queues[i];
5188
5189                 /* Allocate buffers for descriptor rings */
5190                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5191                 if (ret)
5192                         return ret;
5193
5194                 /* Setup the Base and Length of the Rx Descriptor Rings */
5195                 bus_addr = rxq->rx_ring_phys_addr;
5196
5197                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5198                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5199                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5200                                 (uint32_t)(bus_addr >> 32));
5201                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5202                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5203                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5204                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5205
5206
5207                 /* Configure the SRRCTL register */
5208 #ifdef RTE_HEADER_SPLIT_ENABLE
5209                 /*
5210                  * Configure Header Split
5211                  */
5212                 if (dev->data->dev_conf.rxmode.header_split) {
5213                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5214                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5215                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5216                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5217                 } else
5218 #endif
5219                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5220
5221                 /* Set if packets are dropped when no descriptors available */
5222                 if (rxq->drop_en)
5223                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5224
5225                 /*
5226                  * Configure the RX buffer size in the BSIZEPACKET field of
5227                  * the SRRCTL register of the queue.
5228                  * The value is in 1 KB resolution. Valid values can be from
5229                  * 1 KB to 16 KB.
5230                  */
5231                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5232                         RTE_PKTMBUF_HEADROOM);
5233                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5234                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5235
5236                 /*
5237                  * VF modification to write virtual function SRRCTL register
5238                  */
5239                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5240
5241                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5242                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5243
5244                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5245                     /* It adds dual VLAN length for supporting dual VLAN */
5246                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5247                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5248                         if (!dev->data->scattered_rx)
5249                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5250                         dev->data->scattered_rx = 1;
5251                 }
5252         }
5253
5254 #ifdef RTE_HEADER_SPLIT_ENABLE
5255         if (dev->data->dev_conf.rxmode.header_split)
5256                 /* Must setup the PSRTYPE register */
5257                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5258                         IXGBE_PSRTYPE_UDPHDR   |
5259                         IXGBE_PSRTYPE_IPV4HDR  |
5260                         IXGBE_PSRTYPE_IPV6HDR;
5261 #endif
5262
5263         /* Set RQPL for VF RSS according to max Rx queue */
5264         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5265                 IXGBE_PSRTYPE_RQPL_SHIFT;
5266         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5267
5268         ixgbe_set_rx_function(dev);
5269
5270         return 0;
5271 }
5272
5273 /*
5274  * [VF] Initializes Transmit Unit.
5275  */
5276 void __attribute__((cold))
5277 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5278 {
5279         struct ixgbe_hw     *hw;
5280         struct ixgbe_tx_queue *txq;
5281         uint64_t bus_addr;
5282         uint32_t txctrl;
5283         uint16_t i;
5284
5285         PMD_INIT_FUNC_TRACE();
5286         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5287
5288         /* Setup the Base and Length of the Tx Descriptor Rings */
5289         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5290                 txq = dev->data->tx_queues[i];
5291                 bus_addr = txq->tx_ring_phys_addr;
5292                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5293                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5294                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5295                                 (uint32_t)(bus_addr >> 32));
5296                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5297                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5298                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5299                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5300                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5301
5302                 /*
5303                  * Disable Tx Head Writeback RO bit, since this hoses
5304                  * bookkeeping if things aren't delivered in order.
5305                  */
5306                 txctrl = IXGBE_READ_REG(hw,
5307                                 IXGBE_VFDCA_TXCTRL(i));
5308                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5309                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5310                                 txctrl);
5311         }
5312 }
5313
5314 /*
5315  * [VF] Start Transmit and Receive Units.
5316  */
5317 void __attribute__((cold))
5318 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5319 {
5320         struct ixgbe_hw     *hw;
5321         struct ixgbe_tx_queue *txq;
5322         struct ixgbe_rx_queue *rxq;
5323         uint32_t txdctl;
5324         uint32_t rxdctl;
5325         uint16_t i;
5326         int poll_ms;
5327
5328         PMD_INIT_FUNC_TRACE();
5329         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5330
5331         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5332                 txq = dev->data->tx_queues[i];
5333                 /* Setup Transmit Threshold Registers */
5334                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5335                 txdctl |= txq->pthresh & 0x7F;
5336                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5337                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5338                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5339         }
5340
5341         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5342
5343                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5344                 txdctl |= IXGBE_TXDCTL_ENABLE;
5345                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5346
5347                 poll_ms = 10;
5348                 /* Wait until TX Enable ready */
5349                 do {
5350                         rte_delay_ms(1);
5351                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5352                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5353                 if (!poll_ms)
5354                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5355         }
5356         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5357
5358                 rxq = dev->data->rx_queues[i];
5359
5360                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5361                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5362                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5363
5364                 /* Wait until RX Enable ready */
5365                 poll_ms = 10;
5366                 do {
5367                         rte_delay_ms(1);
5368                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5369                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5370                 if (!poll_ms)
5371                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5372                 rte_wmb();
5373                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5374
5375         }
5376 }
5377
5378 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5379 int __attribute__((weak))
5380 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5381 {
5382         return -1;
5383 }
5384
5385 uint16_t __attribute__((weak))
5386 ixgbe_recv_pkts_vec(
5387         void __rte_unused *rx_queue,
5388         struct rte_mbuf __rte_unused **rx_pkts,
5389         uint16_t __rte_unused nb_pkts)
5390 {
5391         return 0;
5392 }
5393
5394 uint16_t __attribute__((weak))
5395 ixgbe_recv_scattered_pkts_vec(
5396         void __rte_unused *rx_queue,
5397         struct rte_mbuf __rte_unused **rx_pkts,
5398         uint16_t __rte_unused nb_pkts)
5399 {
5400         return 0;
5401 }
5402
5403 int __attribute__((weak))
5404 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5405 {
5406         return -1;
5407 }