4 * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5 * Copyright 2014 6WIND S.A.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include <sys/queue.h>
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
83 #ifdef RTE_LIBRTE_IEEE1588
84 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
86 #define IXGBE_TX_IEEE1588_TMST 0
88 /* Bit Mask to indicate what bits required for building TX context */
89 #define IXGBE_TX_OFFLOAD_MASK ( \
95 PKT_TX_OUTER_IP_CKSUM | \
96 PKT_TX_SEC_OFFLOAD | \
97 IXGBE_TX_IEEE1588_TMST)
99 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
100 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
103 #define RTE_PMD_USE_PREFETCH
106 #ifdef RTE_PMD_USE_PREFETCH
108 * Prefetch a cache line into all cache levels.
110 #define rte_ixgbe_prefetch(p) rte_prefetch0(p)
112 #define rte_ixgbe_prefetch(p) do {} while (0)
115 #ifdef RTE_IXGBE_INC_VECTOR
116 uint16_t ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
120 /*********************************************************************
124 **********************************************************************/
127 * Check for descriptors with their DD bit set and free mbufs.
128 * Return the total number of buffers freed.
130 static __rte_always_inline int
131 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
133 struct ixgbe_tx_entry *txep;
136 struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
138 /* check DD bit on threshold descriptor */
139 status = txq->tx_ring[txq->tx_next_dd].wb.status;
140 if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
144 * first buffer to free from S/W ring is at index
145 * tx_next_dd - (tx_rs_thresh-1)
147 txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
149 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
150 /* free buffers one at a time */
151 m = rte_pktmbuf_prefree_seg(txep->mbuf);
154 if (unlikely(m == NULL))
157 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
158 (nb_free > 0 && m->pool != free[0]->pool)) {
159 rte_mempool_put_bulk(free[0]->pool,
160 (void **)free, nb_free);
168 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
170 /* buffers were freed, update counters */
171 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
172 txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
173 if (txq->tx_next_dd >= txq->nb_tx_desc)
174 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
176 return txq->tx_rs_thresh;
179 /* Populate 4 descriptors with data from 4 mbufs */
181 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
183 uint64_t buf_dma_addr;
187 for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
188 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
189 pkt_len = (*pkts)->data_len;
191 /* write data to descriptor */
192 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
194 txdp->read.cmd_type_len =
195 rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
197 txdp->read.olinfo_status =
198 rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
200 rte_prefetch0(&(*pkts)->pool);
204 /* Populate 1 descriptor with data from 1 mbuf */
206 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
208 uint64_t buf_dma_addr;
211 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
212 pkt_len = (*pkts)->data_len;
214 /* write data to descriptor */
215 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
216 txdp->read.cmd_type_len =
217 rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
218 txdp->read.olinfo_status =
219 rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
220 rte_prefetch0(&(*pkts)->pool);
224 * Fill H/W descriptor ring with mbuf data.
225 * Copy mbuf pointers to the S/W ring.
228 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
231 volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
232 struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
233 const int N_PER_LOOP = 4;
234 const int N_PER_LOOP_MASK = N_PER_LOOP-1;
235 int mainpart, leftover;
239 * Process most of the packets in chunks of N pkts. Any
240 * leftover packets will get processed one at a time.
242 mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
243 leftover = (nb_pkts & ((uint32_t) N_PER_LOOP_MASK));
244 for (i = 0; i < mainpart; i += N_PER_LOOP) {
245 /* Copy N mbuf pointers to the S/W ring */
246 for (j = 0; j < N_PER_LOOP; ++j) {
247 (txep + i + j)->mbuf = *(pkts + i + j);
249 tx4(txdp + i, pkts + i);
252 if (unlikely(leftover > 0)) {
253 for (i = 0; i < leftover; ++i) {
254 (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
255 tx1(txdp + mainpart + i, pkts + mainpart + i);
260 static inline uint16_t
261 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
264 struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
265 volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
269 * Begin scanning the H/W ring for done descriptors when the
270 * number of available descriptors drops below tx_free_thresh. For
271 * each done descriptor, free the associated buffer.
273 if (txq->nb_tx_free < txq->tx_free_thresh)
274 ixgbe_tx_free_bufs(txq);
276 /* Only use descriptors that are available */
277 nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
278 if (unlikely(nb_pkts == 0))
281 /* Use exactly nb_pkts descriptors */
282 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
285 * At this point, we know there are enough descriptors in the
286 * ring to transmit all the packets. This assumes that each
287 * mbuf contains a single segment, and that no new offloads
288 * are expected, which would require a new context descriptor.
292 * See if we're going to wrap-around. If so, handle the top
293 * of the descriptor ring first, then do the bottom. If not,
294 * the processing looks just like the "bottom" part anyway...
296 if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
297 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
298 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
301 * We know that the last descriptor in the ring will need to
302 * have its RS bit set because tx_rs_thresh has to be
303 * a divisor of the ring size
305 tx_r[txq->tx_next_rs].read.cmd_type_len |=
306 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
307 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
312 /* Fill H/W descriptor ring with mbuf data */
313 ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
314 txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
317 * Determine if RS bit should be set
318 * This is what we actually want:
319 * if ((txq->tx_tail - 1) >= txq->tx_next_rs)
320 * but instead of subtracting 1 and doing >=, we can just do
321 * greater than without subtracting.
323 if (txq->tx_tail > txq->tx_next_rs) {
324 tx_r[txq->tx_next_rs].read.cmd_type_len |=
325 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
326 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
328 if (txq->tx_next_rs >= txq->nb_tx_desc)
329 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
333 * Check for wrap-around. This would only happen if we used
334 * up to the last descriptor in the ring, no more, no less.
336 if (txq->tx_tail >= txq->nb_tx_desc)
339 /* update tail pointer */
341 IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
347 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
352 /* Try to transmit at least chunks of TX_MAX_BURST pkts */
353 if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
354 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
356 /* transmit more than the max burst, in chunks of TX_MAX_BURST */
361 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
362 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
363 nb_tx = (uint16_t)(nb_tx + ret);
364 nb_pkts = (uint16_t)(nb_pkts - ret);
372 #ifdef RTE_IXGBE_INC_VECTOR
374 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
378 struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
383 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
384 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
397 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
398 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
399 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
400 union ixgbe_crypto_tx_desc_md *mdata)
402 uint32_t type_tucmd_mlhl;
403 uint32_t mss_l4len_idx = 0;
405 uint32_t vlan_macip_lens;
406 union ixgbe_tx_offload tx_offload_mask;
407 uint32_t seqnum_seed = 0;
409 ctx_idx = txq->ctx_curr;
410 tx_offload_mask.data[0] = 0;
411 tx_offload_mask.data[1] = 0;
414 /* Specify which HW CTX to upload. */
415 mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
417 if (ol_flags & PKT_TX_VLAN_PKT) {
418 tx_offload_mask.vlan_tci |= ~0;
421 /* check if TCP segmentation required for this packet */
422 if (ol_flags & PKT_TX_TCP_SEG) {
423 /* implies IP cksum in IPv4 */
424 if (ol_flags & PKT_TX_IP_CKSUM)
425 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
426 IXGBE_ADVTXD_TUCMD_L4T_TCP |
427 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
429 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
430 IXGBE_ADVTXD_TUCMD_L4T_TCP |
431 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
433 tx_offload_mask.l2_len |= ~0;
434 tx_offload_mask.l3_len |= ~0;
435 tx_offload_mask.l4_len |= ~0;
436 tx_offload_mask.tso_segsz |= ~0;
437 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
438 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
439 } else { /* no TSO, check if hardware checksum is needed */
440 if (ol_flags & PKT_TX_IP_CKSUM) {
441 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
442 tx_offload_mask.l2_len |= ~0;
443 tx_offload_mask.l3_len |= ~0;
446 switch (ol_flags & PKT_TX_L4_MASK) {
447 case PKT_TX_UDP_CKSUM:
448 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
449 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
450 mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
451 tx_offload_mask.l2_len |= ~0;
452 tx_offload_mask.l3_len |= ~0;
454 case PKT_TX_TCP_CKSUM:
455 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
456 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
457 mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
458 tx_offload_mask.l2_len |= ~0;
459 tx_offload_mask.l3_len |= ~0;
461 case PKT_TX_SCTP_CKSUM:
462 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
463 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
464 mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
465 tx_offload_mask.l2_len |= ~0;
466 tx_offload_mask.l3_len |= ~0;
469 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
470 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
475 if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
476 tx_offload_mask.outer_l2_len |= ~0;
477 tx_offload_mask.outer_l3_len |= ~0;
478 tx_offload_mask.l2_len |= ~0;
479 seqnum_seed |= tx_offload.outer_l3_len
480 << IXGBE_ADVTXD_OUTER_IPLEN;
481 seqnum_seed |= tx_offload.l2_len
482 << IXGBE_ADVTXD_TUNNEL_LEN;
484 if (ol_flags & PKT_TX_SEC_OFFLOAD) {
486 (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & mdata->sa_idx);
487 type_tucmd_mlhl |= mdata->enc ?
488 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
489 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
491 (mdata->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
492 tx_offload_mask.sa_idx |= ~0;
493 tx_offload_mask.sec_pad_len |= ~0;
496 txq->ctx_cache[ctx_idx].flags = ol_flags;
497 txq->ctx_cache[ctx_idx].tx_offload.data[0] =
498 tx_offload_mask.data[0] & tx_offload.data[0];
499 txq->ctx_cache[ctx_idx].tx_offload.data[1] =
500 tx_offload_mask.data[1] & tx_offload.data[1];
501 txq->ctx_cache[ctx_idx].tx_offload_mask = tx_offload_mask;
503 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
504 vlan_macip_lens = tx_offload.l3_len;
505 if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
506 vlan_macip_lens |= (tx_offload.outer_l2_len <<
507 IXGBE_ADVTXD_MACLEN_SHIFT);
509 vlan_macip_lens |= (tx_offload.l2_len <<
510 IXGBE_ADVTXD_MACLEN_SHIFT);
511 vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
512 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
513 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
514 ctx_txd->seqnum_seed = seqnum_seed;
518 * Check which hardware context can be used. Use the existing match
519 * or create a new context descriptor.
521 static inline uint32_t
522 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
523 union ixgbe_tx_offload tx_offload)
525 /* If match with the current used context */
526 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
527 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
528 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
529 & tx_offload.data[0])) &&
530 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
531 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
532 & tx_offload.data[1]))))
533 return txq->ctx_curr;
535 /* What if match with the next context */
537 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
538 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
539 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
540 & tx_offload.data[0])) &&
541 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
542 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
543 & tx_offload.data[1]))))
544 return txq->ctx_curr;
546 /* Mismatch, use the previous context */
547 return IXGBE_CTX_NUM;
550 static inline uint32_t
551 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
555 if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
556 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
557 if (ol_flags & PKT_TX_IP_CKSUM)
558 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
559 if (ol_flags & PKT_TX_TCP_SEG)
560 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
564 static inline uint32_t
565 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
567 uint32_t cmdtype = 0;
569 if (ol_flags & PKT_TX_VLAN_PKT)
570 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
571 if (ol_flags & PKT_TX_TCP_SEG)
572 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
573 if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
574 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
575 if (ol_flags & PKT_TX_MACSEC)
576 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
580 /* Default RS bit threshold values */
581 #ifndef DEFAULT_TX_RS_THRESH
582 #define DEFAULT_TX_RS_THRESH 32
584 #ifndef DEFAULT_TX_FREE_THRESH
585 #define DEFAULT_TX_FREE_THRESH 32
588 /* Reset transmit descriptors after they have been used */
590 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
592 struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
593 volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
594 uint16_t last_desc_cleaned = txq->last_desc_cleaned;
595 uint16_t nb_tx_desc = txq->nb_tx_desc;
596 uint16_t desc_to_clean_to;
597 uint16_t nb_tx_to_clean;
600 /* Determine the last descriptor needing to be cleaned */
601 desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
602 if (desc_to_clean_to >= nb_tx_desc)
603 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
605 /* Check to make sure the last descriptor to clean is done */
606 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
607 status = txr[desc_to_clean_to].wb.status;
608 if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
609 PMD_TX_FREE_LOG(DEBUG,
610 "TX descriptor %4u is not done"
611 "(port=%d queue=%d)",
613 txq->port_id, txq->queue_id);
614 /* Failed to clean any descriptors, better luck next time */
618 /* Figure out how many descriptors will be cleaned */
619 if (last_desc_cleaned > desc_to_clean_to)
620 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
623 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
626 PMD_TX_FREE_LOG(DEBUG,
627 "Cleaning %4u TX descriptors: %4u to %4u "
628 "(port=%d queue=%d)",
629 nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
630 txq->port_id, txq->queue_id);
633 * The last descriptor to clean is done, so that means all the
634 * descriptors from the last descriptor that was cleaned
635 * up to the last descriptor with the RS bit set
636 * are done. Only reset the threshold descriptor.
638 txr[desc_to_clean_to].wb.status = 0;
640 /* Update the txq to reflect the last descriptor that was cleaned */
641 txq->last_desc_cleaned = desc_to_clean_to;
642 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
649 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
652 struct ixgbe_tx_queue *txq;
653 struct ixgbe_tx_entry *sw_ring;
654 struct ixgbe_tx_entry *txe, *txn;
655 volatile union ixgbe_adv_tx_desc *txr;
656 volatile union ixgbe_adv_tx_desc *txd, *txp;
657 struct rte_mbuf *tx_pkt;
658 struct rte_mbuf *m_seg;
659 uint64_t buf_dma_addr;
660 uint32_t olinfo_status;
661 uint32_t cmd_type_len;
672 union ixgbe_tx_offload tx_offload;
675 tx_offload.data[0] = 0;
676 tx_offload.data[1] = 0;
678 sw_ring = txq->sw_ring;
680 tx_id = txq->tx_tail;
681 txe = &sw_ring[tx_id];
684 /* Determine if the descriptor ring needs to be cleaned. */
685 if (txq->nb_tx_free < txq->tx_free_thresh)
686 ixgbe_xmit_cleanup(txq);
688 rte_prefetch0(&txe->mbuf->pool);
691 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
694 pkt_len = tx_pkt->pkt_len;
697 * Determine how many (if any) context descriptors
698 * are needed for offload functionality.
700 ol_flags = tx_pkt->ol_flags;
701 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
703 /* If hardware offload required */
704 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
706 tx_offload.l2_len = tx_pkt->l2_len;
707 tx_offload.l3_len = tx_pkt->l3_len;
708 tx_offload.l4_len = tx_pkt->l4_len;
709 tx_offload.vlan_tci = tx_pkt->vlan_tci;
710 tx_offload.tso_segsz = tx_pkt->tso_segsz;
711 tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
712 tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
714 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
715 (union ixgbe_crypto_tx_desc_md *)
717 tx_offload.sa_idx = ipsec_mdata->sa_idx;
718 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
721 /* If new context need be built or reuse the exist ctx. */
722 ctx = what_advctx_update(txq, tx_ol_req,
724 /* Only allocate context descriptor if required*/
725 new_ctx = (ctx == IXGBE_CTX_NUM);
730 * Keep track of how many descriptors are used this loop
731 * This will always be the number of segments + the number of
732 * Context descriptors required to transmit the packet
734 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
737 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
738 /* set RS on the previous packet in the burst */
739 txp->read.cmd_type_len |=
740 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
743 * The number of descriptors that must be allocated for a
744 * packet is the number of segments of that packet, plus 1
745 * Context Descriptor for the hardware offload, if any.
746 * Determine the last TX descriptor to allocate in the TX ring
747 * for the packet, starting from the current position (tx_id)
750 tx_last = (uint16_t) (tx_id + nb_used - 1);
753 if (tx_last >= txq->nb_tx_desc)
754 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
756 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
757 " tx_first=%u tx_last=%u",
758 (unsigned) txq->port_id,
759 (unsigned) txq->queue_id,
765 * Make sure there are enough TX descriptors available to
766 * transmit the entire packet.
767 * nb_used better be less than or equal to txq->tx_rs_thresh
769 if (nb_used > txq->nb_tx_free) {
770 PMD_TX_FREE_LOG(DEBUG,
771 "Not enough free TX descriptors "
772 "nb_used=%4u nb_free=%4u "
773 "(port=%d queue=%d)",
774 nb_used, txq->nb_tx_free,
775 txq->port_id, txq->queue_id);
777 if (ixgbe_xmit_cleanup(txq) != 0) {
778 /* Could not clean any descriptors */
784 /* nb_used better be <= txq->tx_rs_thresh */
785 if (unlikely(nb_used > txq->tx_rs_thresh)) {
786 PMD_TX_FREE_LOG(DEBUG,
787 "The number of descriptors needed to "
788 "transmit the packet exceeds the "
789 "RS bit threshold. This will impact "
791 "nb_used=%4u nb_free=%4u "
793 "(port=%d queue=%d)",
794 nb_used, txq->nb_tx_free,
796 txq->port_id, txq->queue_id);
798 * Loop here until there are enough TX
799 * descriptors or until the ring cannot be
802 while (nb_used > txq->nb_tx_free) {
803 if (ixgbe_xmit_cleanup(txq) != 0) {
805 * Could not clean any
817 * By now there are enough free TX descriptors to transmit
822 * Set common flags of all TX Data Descriptors.
824 * The following bits must be set in all Data Descriptors:
825 * - IXGBE_ADVTXD_DTYP_DATA
826 * - IXGBE_ADVTXD_DCMD_DEXT
828 * The following bits must be set in the first Data Descriptor
829 * and are ignored in the other ones:
830 * - IXGBE_ADVTXD_DCMD_IFCS
831 * - IXGBE_ADVTXD_MAC_1588
832 * - IXGBE_ADVTXD_DCMD_VLE
834 * The following bits must only be set in the last Data
836 * - IXGBE_TXD_CMD_EOP
838 * The following bits can be set in any Data Descriptor, but
839 * are only set in the last Data Descriptor:
842 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
843 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
845 #ifdef RTE_LIBRTE_IEEE1588
846 if (ol_flags & PKT_TX_IEEE1588_TMST)
847 cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
853 if (ol_flags & PKT_TX_TCP_SEG) {
854 /* when TSO is on, paylen in descriptor is the
855 * not the packet len but the tcp payload len */
856 pkt_len -= (tx_offload.l2_len +
857 tx_offload.l3_len + tx_offload.l4_len);
861 * Setup the TX Advanced Context Descriptor if required
864 volatile struct ixgbe_adv_tx_context_desc *
867 ctx_txd = (volatile struct
868 ixgbe_adv_tx_context_desc *)
871 txn = &sw_ring[txe->next_id];
872 rte_prefetch0(&txn->mbuf->pool);
874 if (txe->mbuf != NULL) {
875 rte_pktmbuf_free_seg(txe->mbuf);
879 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
881 (union ixgbe_crypto_tx_desc_md *)
884 txe->last_id = tx_last;
885 tx_id = txe->next_id;
890 * Setup the TX Advanced Data Descriptor,
891 * This path will go through
892 * whatever new/reuse the context descriptor
894 cmd_type_len |= tx_desc_ol_flags_to_cmdtype(ol_flags);
895 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
896 olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
899 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
901 olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
906 txn = &sw_ring[txe->next_id];
907 rte_prefetch0(&txn->mbuf->pool);
909 if (txe->mbuf != NULL)
910 rte_pktmbuf_free_seg(txe->mbuf);
914 * Set up Transmit Data Descriptor.
916 slen = m_seg->data_len;
917 buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
918 txd->read.buffer_addr =
919 rte_cpu_to_le_64(buf_dma_addr);
920 txd->read.cmd_type_len =
921 rte_cpu_to_le_32(cmd_type_len | slen);
922 txd->read.olinfo_status =
923 rte_cpu_to_le_32(olinfo_status);
924 txe->last_id = tx_last;
925 tx_id = txe->next_id;
928 } while (m_seg != NULL);
931 * The last packet data descriptor needs End Of Packet (EOP)
933 cmd_type_len |= IXGBE_TXD_CMD_EOP;
934 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
935 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
937 /* Set RS bit only on threshold packets' last descriptor */
938 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
939 PMD_TX_FREE_LOG(DEBUG,
940 "Setting RS bit on TXD id="
941 "%4u (port=%d queue=%d)",
942 tx_last, txq->port_id, txq->queue_id);
944 cmd_type_len |= IXGBE_TXD_CMD_RS;
946 /* Update txq RS bit counters */
952 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
956 /* set RS on last packet in the burst */
958 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
963 * Set the Transmit Descriptor Tail (TDT)
965 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
966 (unsigned) txq->port_id, (unsigned) txq->queue_id,
967 (unsigned) tx_id, (unsigned) nb_tx);
968 IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
969 txq->tx_tail = tx_id;
974 /*********************************************************************
978 **********************************************************************/
980 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
985 struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
987 for (i = 0; i < nb_pkts; i++) {
989 ol_flags = m->ol_flags;
992 * Check if packet meets requirements for number of segments
994 * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
998 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
1003 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
1004 rte_errno = -ENOTSUP;
1008 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1009 ret = rte_validate_tx_offload(m);
1015 ret = rte_net_intel_cksum_prepare(m);
1025 /*********************************************************************
1029 **********************************************************************/
1031 #define IXGBE_PACKET_TYPE_ETHER 0X00
1032 #define IXGBE_PACKET_TYPE_IPV4 0X01
1033 #define IXGBE_PACKET_TYPE_IPV4_TCP 0X11
1034 #define IXGBE_PACKET_TYPE_IPV4_UDP 0X21
1035 #define IXGBE_PACKET_TYPE_IPV4_SCTP 0X41
1036 #define IXGBE_PACKET_TYPE_IPV4_EXT 0X03
1037 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP 0X13
1038 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP 0X23
1039 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP 0X43
1040 #define IXGBE_PACKET_TYPE_IPV6 0X04
1041 #define IXGBE_PACKET_TYPE_IPV6_TCP 0X14
1042 #define IXGBE_PACKET_TYPE_IPV6_UDP 0X24
1043 #define IXGBE_PACKET_TYPE_IPV6_SCTP 0X44
1044 #define IXGBE_PACKET_TYPE_IPV6_EXT 0X0C
1045 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP 0X1C
1046 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP 0X2C
1047 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP 0X4C
1048 #define IXGBE_PACKET_TYPE_IPV4_IPV6 0X05
1049 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP 0X15
1050 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP 0X25
1051 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP 0X45
1052 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6 0X07
1053 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP 0X17
1054 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP 0X27
1055 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP 0X47
1056 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT 0X0D
1057 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
1058 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
1059 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP 0X4D
1060 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT 0X0F
1061 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP 0X1F
1062 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP 0X2F
1063 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP 0X4F
1065 #define IXGBE_PACKET_TYPE_NVGRE 0X00
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV4 0X01
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP 0X11
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP 0X21
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP 0X41
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT 0X03
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP 0X13
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP 0X23
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP 0X43
1074 #define IXGBE_PACKET_TYPE_NVGRE_IPV6 0X04
1075 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP 0X14
1076 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP 0X24
1077 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP 0X44
1078 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT 0X0C
1079 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP 0X1C
1080 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP 0X2C
1081 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP 0X4C
1082 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6 0X05
1083 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP 0X15
1084 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP 0X25
1085 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT 0X0D
1086 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1087 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1089 #define IXGBE_PACKET_TYPE_VXLAN 0X80
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV4 0X81
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP 0x91
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP 0xA1
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP 0xC1
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT 0x83
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP 0X93
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP 0XA3
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP 0XC3
1098 #define IXGBE_PACKET_TYPE_VXLAN_IPV6 0X84
1099 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP 0X94
1100 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP 0XA4
1101 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP 0XC4
1102 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT 0X8C
1103 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP 0X9C
1104 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP 0XAC
1105 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP 0XCC
1106 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6 0X85
1107 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP 0X95
1108 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP 0XA5
1109 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT 0X8D
1110 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1111 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1114 * Use 2 different table for normal packet and tunnel packet
1115 * to save the space.
1118 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1119 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1120 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1122 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1123 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1124 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1125 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1126 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1127 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1128 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1129 RTE_PTYPE_L3_IPV4_EXT,
1130 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1131 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1132 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1133 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1134 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1135 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1136 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1138 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1139 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1140 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1141 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1142 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1143 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1144 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1145 RTE_PTYPE_L3_IPV6_EXT,
1146 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1147 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1148 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1149 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1150 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1151 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1152 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1153 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1154 RTE_PTYPE_INNER_L3_IPV6,
1155 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1156 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1157 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1158 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1159 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1160 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1161 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1162 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1163 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1164 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1165 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1166 RTE_PTYPE_INNER_L3_IPV6,
1167 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1168 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1169 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1170 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1171 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1172 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1173 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1174 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1175 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1176 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1177 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1178 RTE_PTYPE_INNER_L3_IPV6_EXT,
1179 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1180 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1181 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1182 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1183 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1184 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1185 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1186 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1187 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1188 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1189 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1190 RTE_PTYPE_INNER_L3_IPV6_EXT,
1191 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1192 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1193 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1194 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1195 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1196 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1197 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1198 RTE_PTYPE_L2_ETHER |
1199 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1200 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1204 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1205 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1206 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1207 RTE_PTYPE_INNER_L2_ETHER,
1208 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1209 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1210 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1211 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1212 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1213 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1214 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1215 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1216 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1217 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1218 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1219 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1220 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1221 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1222 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1223 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1224 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1225 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1226 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1227 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1228 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1229 RTE_PTYPE_INNER_L4_TCP,
1230 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1231 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1232 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1233 RTE_PTYPE_INNER_L4_TCP,
1234 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1235 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1236 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1237 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1238 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1239 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1240 RTE_PTYPE_INNER_L4_TCP,
1241 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1242 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1243 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1244 RTE_PTYPE_INNER_L3_IPV4,
1245 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1246 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1247 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1248 RTE_PTYPE_INNER_L4_UDP,
1249 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1250 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1251 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1252 RTE_PTYPE_INNER_L4_UDP,
1253 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1254 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1255 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1256 RTE_PTYPE_INNER_L4_SCTP,
1257 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1258 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1259 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1260 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1261 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1262 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1263 RTE_PTYPE_INNER_L4_UDP,
1264 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1265 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1266 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1267 RTE_PTYPE_INNER_L4_SCTP,
1268 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1269 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1270 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1271 RTE_PTYPE_INNER_L3_IPV4,
1272 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1273 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1274 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1275 RTE_PTYPE_INNER_L4_SCTP,
1276 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1277 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1278 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1279 RTE_PTYPE_INNER_L4_SCTP,
1280 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1281 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1282 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1283 RTE_PTYPE_INNER_L4_TCP,
1284 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1285 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1286 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1287 RTE_PTYPE_INNER_L4_UDP,
1289 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1290 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1291 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1292 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1293 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1294 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1295 RTE_PTYPE_INNER_L3_IPV4,
1296 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1297 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1298 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1299 RTE_PTYPE_INNER_L3_IPV4_EXT,
1300 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1301 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1302 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1303 RTE_PTYPE_INNER_L3_IPV6,
1304 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1305 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1306 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1307 RTE_PTYPE_INNER_L3_IPV4,
1308 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1309 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1310 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1311 RTE_PTYPE_INNER_L3_IPV6_EXT,
1312 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1313 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1314 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1315 RTE_PTYPE_INNER_L3_IPV4,
1316 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1317 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1318 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1319 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1320 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1321 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1322 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1323 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1324 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1325 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1326 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1327 RTE_PTYPE_INNER_L3_IPV4,
1328 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1329 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1330 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1331 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1332 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1333 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1334 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1335 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1336 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1337 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1338 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1339 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1340 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1341 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1342 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1343 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1344 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1345 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1346 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1347 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1348 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1349 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1350 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1351 RTE_PTYPE_INNER_L3_IPV4,
1352 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1353 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1354 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1355 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1356 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1357 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1358 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1359 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1360 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1361 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1362 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1363 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1364 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1365 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1366 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1367 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1368 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1369 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1370 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1371 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1372 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1373 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1374 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1375 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1376 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1377 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1378 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1379 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1382 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1383 static inline uint32_t
1384 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1387 if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1388 return RTE_PTYPE_UNKNOWN;
1390 pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1392 /* For tunnel packet */
1393 if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1394 /* Remove the tunnel bit to save the space. */
1395 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1396 return ptype_table_tn[pkt_info];
1400 * For x550, if it's not tunnel,
1401 * tunnel type bit should be set to 0.
1402 * Reuse 82599's mask.
1404 pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1406 return ptype_table[pkt_info];
1409 static inline uint64_t
1410 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1412 static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1413 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1414 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1415 PKT_RX_RSS_HASH, 0, 0, 0,
1416 0, 0, 0, PKT_RX_FDIR,
1418 #ifdef RTE_LIBRTE_IEEE1588
1419 static uint64_t ip_pkt_etqf_map[8] = {
1420 0, 0, 0, PKT_RX_IEEE1588_PTP,
1424 if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1425 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1426 ip_rss_types_map[pkt_info & 0XF];
1428 return ip_rss_types_map[pkt_info & 0XF];
1430 return ip_rss_types_map[pkt_info & 0XF];
1434 static inline uint64_t
1435 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1440 * Check if VLAN present only.
1441 * Do not check whether L3/L4 rx checksum done by NIC or not,
1442 * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1444 pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ? vlan_flags : 0;
1446 #ifdef RTE_LIBRTE_IEEE1588
1447 if (rx_status & IXGBE_RXD_STAT_TMST)
1448 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1453 static inline uint64_t
1454 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1459 * Bit 31: IPE, IPv4 checksum error
1460 * Bit 30: L4I, L4I integrity error
1462 static uint64_t error_to_pkt_flags_map[4] = {
1463 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1464 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1465 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1466 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1468 pkt_flags = error_to_pkt_flags_map[(rx_status >>
1469 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1471 if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1472 (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1473 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1476 if (rx_status & IXGBE_RXD_STAT_SECP) {
1477 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1478 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1479 pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1486 * LOOK_AHEAD defines how many desc statuses to check beyond the
1487 * current descriptor.
1488 * It must be a pound define for optimal performance.
1489 * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1490 * function only works with LOOK_AHEAD=8.
1492 #define LOOK_AHEAD 8
1493 #if (LOOK_AHEAD != 8)
1494 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1497 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1499 volatile union ixgbe_adv_rx_desc *rxdp;
1500 struct ixgbe_rx_entry *rxep;
1501 struct rte_mbuf *mb;
1505 uint32_t s[LOOK_AHEAD];
1506 uint32_t pkt_info[LOOK_AHEAD];
1507 int i, j, nb_rx = 0;
1509 uint64_t vlan_flags = rxq->vlan_flags;
1511 /* get references to current descriptor and S/W ring entry */
1512 rxdp = &rxq->rx_ring[rxq->rx_tail];
1513 rxep = &rxq->sw_ring[rxq->rx_tail];
1515 status = rxdp->wb.upper.status_error;
1516 /* check to make sure there is at least 1 packet to receive */
1517 if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1521 * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1522 * reference packets that are ready to be received.
1524 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1525 i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1526 /* Read desc statuses backwards to avoid race condition */
1527 for (j = 0; j < LOOK_AHEAD; j++)
1528 s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1532 /* Compute how many status bits were set */
1533 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1534 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1537 for (j = 0; j < nb_dd; j++)
1538 pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1543 /* Translate descriptor info to mbuf format */
1544 for (j = 0; j < nb_dd; ++j) {
1546 pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1548 mb->data_len = pkt_len;
1549 mb->pkt_len = pkt_len;
1550 mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1552 /* convert descriptor fields to rte mbuf flags */
1553 pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1555 pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1556 pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1557 ((uint16_t)pkt_info[j]);
1558 mb->ol_flags = pkt_flags;
1560 ixgbe_rxd_pkt_info_to_pkt_type
1561 (pkt_info[j], rxq->pkt_type_mask);
1563 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1564 mb->hash.rss = rte_le_to_cpu_32(
1565 rxdp[j].wb.lower.hi_dword.rss);
1566 else if (pkt_flags & PKT_RX_FDIR) {
1567 mb->hash.fdir.hash = rte_le_to_cpu_16(
1568 rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1569 IXGBE_ATR_HASH_MASK;
1570 mb->hash.fdir.id = rte_le_to_cpu_16(
1571 rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1575 /* Move mbuf pointers from the S/W ring to the stage */
1576 for (j = 0; j < LOOK_AHEAD; ++j) {
1577 rxq->rx_stage[i + j] = rxep[j].mbuf;
1580 /* stop if all requested packets could not be received */
1581 if (nb_dd != LOOK_AHEAD)
1585 /* clear software ring entries so we can cleanup correctly */
1586 for (i = 0; i < nb_rx; ++i) {
1587 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1595 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1597 volatile union ixgbe_adv_rx_desc *rxdp;
1598 struct ixgbe_rx_entry *rxep;
1599 struct rte_mbuf *mb;
1604 /* allocate buffers in bulk directly into the S/W ring */
1605 alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1606 rxep = &rxq->sw_ring[alloc_idx];
1607 diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1608 rxq->rx_free_thresh);
1609 if (unlikely(diag != 0))
1612 rxdp = &rxq->rx_ring[alloc_idx];
1613 for (i = 0; i < rxq->rx_free_thresh; ++i) {
1614 /* populate the static rte mbuf fields */
1617 mb->port = rxq->port_id;
1620 rte_mbuf_refcnt_set(mb, 1);
1621 mb->data_off = RTE_PKTMBUF_HEADROOM;
1623 /* populate the descriptors */
1624 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1625 rxdp[i].read.hdr_addr = 0;
1626 rxdp[i].read.pkt_addr = dma_addr;
1629 /* update state of internal queue structure */
1630 rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1631 if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1632 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1638 static inline uint16_t
1639 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1642 struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1645 /* how many packets are ready to return? */
1646 nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1648 /* copy mbuf pointers to the application's packet list */
1649 for (i = 0; i < nb_pkts; ++i)
1650 rx_pkts[i] = stage[i];
1652 /* update internal queue state */
1653 rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1654 rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1659 static inline uint16_t
1660 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1663 struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1666 /* Any previously recv'd pkts will be returned from the Rx stage */
1667 if (rxq->rx_nb_avail)
1668 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1670 /* Scan the H/W ring for packets to receive */
1671 nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1673 /* update internal queue state */
1674 rxq->rx_next_avail = 0;
1675 rxq->rx_nb_avail = nb_rx;
1676 rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1678 /* if required, allocate new buffers to replenish descriptors */
1679 if (rxq->rx_tail > rxq->rx_free_trigger) {
1680 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1682 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1685 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1686 "queue_id=%u", (unsigned) rxq->port_id,
1687 (unsigned) rxq->queue_id);
1689 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1690 rxq->rx_free_thresh;
1693 * Need to rewind any previous receives if we cannot
1694 * allocate new buffers to replenish the old ones.
1696 rxq->rx_nb_avail = 0;
1697 rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1698 for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1699 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1704 /* update tail pointer */
1706 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1710 if (rxq->rx_tail >= rxq->nb_rx_desc)
1713 /* received any packets this loop? */
1714 if (rxq->rx_nb_avail)
1715 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1720 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1722 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1727 if (unlikely(nb_pkts == 0))
1730 if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1731 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1733 /* request is relatively large, chunk it up */
1738 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1739 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1740 nb_rx = (uint16_t)(nb_rx + ret);
1741 nb_pkts = (uint16_t)(nb_pkts - ret);
1750 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1753 struct ixgbe_rx_queue *rxq;
1754 volatile union ixgbe_adv_rx_desc *rx_ring;
1755 volatile union ixgbe_adv_rx_desc *rxdp;
1756 struct ixgbe_rx_entry *sw_ring;
1757 struct ixgbe_rx_entry *rxe;
1758 struct rte_mbuf *rxm;
1759 struct rte_mbuf *nmb;
1760 union ixgbe_adv_rx_desc rxd;
1769 uint64_t vlan_flags;
1774 rx_id = rxq->rx_tail;
1775 rx_ring = rxq->rx_ring;
1776 sw_ring = rxq->sw_ring;
1777 vlan_flags = rxq->vlan_flags;
1778 while (nb_rx < nb_pkts) {
1780 * The order of operations here is important as the DD status
1781 * bit must not be read after any other descriptor fields.
1782 * rx_ring and rxdp are pointing to volatile data so the order
1783 * of accesses cannot be reordered by the compiler. If they were
1784 * not volatile, they could be reordered which could lead to
1785 * using invalid descriptor fields when read from rxd.
1787 rxdp = &rx_ring[rx_id];
1788 staterr = rxdp->wb.upper.status_error;
1789 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1796 * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1797 * is likely to be invalid and to be dropped by the various
1798 * validation checks performed by the network stack.
1800 * Allocate a new mbuf to replenish the RX ring descriptor.
1801 * If the allocation fails:
1802 * - arrange for that RX descriptor to be the first one
1803 * being parsed the next time the receive function is
1804 * invoked [on the same queue].
1806 * - Stop parsing the RX ring and return immediately.
1808 * This policy do not drop the packet received in the RX
1809 * descriptor for which the allocation of a new mbuf failed.
1810 * Thus, it allows that packet to be later retrieved if
1811 * mbuf have been freed in the mean time.
1812 * As a side effect, holding RX descriptors instead of
1813 * systematically giving them back to the NIC may lead to
1814 * RX ring exhaustion situations.
1815 * However, the NIC can gracefully prevent such situations
1816 * to happen by sending specific "back-pressure" flow control
1817 * frames to its peer(s).
1819 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1820 "ext_err_stat=0x%08x pkt_len=%u",
1821 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1822 (unsigned) rx_id, (unsigned) staterr,
1823 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1825 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1827 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1828 "queue_id=%u", (unsigned) rxq->port_id,
1829 (unsigned) rxq->queue_id);
1830 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1835 rxe = &sw_ring[rx_id];
1837 if (rx_id == rxq->nb_rx_desc)
1840 /* Prefetch next mbuf while processing current one. */
1841 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1844 * When next RX descriptor is on a cache-line boundary,
1845 * prefetch the next 4 RX descriptors and the next 8 pointers
1848 if ((rx_id & 0x3) == 0) {
1849 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1850 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1856 rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1857 rxdp->read.hdr_addr = 0;
1858 rxdp->read.pkt_addr = dma_addr;
1861 * Initialize the returned mbuf.
1862 * 1) setup generic mbuf fields:
1863 * - number of segments,
1866 * - RX port identifier.
1867 * 2) integrate hardware offload data, if any:
1868 * - RSS flag & hash,
1869 * - IP checksum flag,
1870 * - VLAN TCI, if any,
1873 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1875 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1876 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1879 rxm->pkt_len = pkt_len;
1880 rxm->data_len = pkt_len;
1881 rxm->port = rxq->port_id;
1883 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1884 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1885 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1887 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1888 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1889 pkt_flags = pkt_flags |
1890 ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1891 rxm->ol_flags = pkt_flags;
1893 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1894 rxq->pkt_type_mask);
1896 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1897 rxm->hash.rss = rte_le_to_cpu_32(
1898 rxd.wb.lower.hi_dword.rss);
1899 else if (pkt_flags & PKT_RX_FDIR) {
1900 rxm->hash.fdir.hash = rte_le_to_cpu_16(
1901 rxd.wb.lower.hi_dword.csum_ip.csum) &
1902 IXGBE_ATR_HASH_MASK;
1903 rxm->hash.fdir.id = rte_le_to_cpu_16(
1904 rxd.wb.lower.hi_dword.csum_ip.ip_id);
1907 * Store the mbuf address into the next entry of the array
1908 * of returned packets.
1910 rx_pkts[nb_rx++] = rxm;
1912 rxq->rx_tail = rx_id;
1915 * If the number of free RX descriptors is greater than the RX free
1916 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1918 * Update the RDT with the value of the last processed RX descriptor
1919 * minus 1, to guarantee that the RDT register is never equal to the
1920 * RDH register, which creates a "full" ring situtation from the
1921 * hardware point of view...
1923 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1924 if (nb_hold > rxq->rx_free_thresh) {
1925 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1926 "nb_hold=%u nb_rx=%u",
1927 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1928 (unsigned) rx_id, (unsigned) nb_hold,
1930 rx_id = (uint16_t) ((rx_id == 0) ?
1931 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1932 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1935 rxq->nb_rx_hold = nb_hold;
1940 * Detect an RSC descriptor.
1942 static inline uint32_t
1943 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1945 return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1946 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1950 * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1952 * Fill the following info in the HEAD buffer of the Rx cluster:
1953 * - RX port identifier
1954 * - hardware offload data, if any:
1956 * - IP checksum flag
1957 * - VLAN TCI, if any
1959 * @head HEAD of the packet cluster
1960 * @desc HW descriptor to get data from
1961 * @rxq Pointer to the Rx queue
1964 ixgbe_fill_cluster_head_buf(
1965 struct rte_mbuf *head,
1966 union ixgbe_adv_rx_desc *desc,
1967 struct ixgbe_rx_queue *rxq,
1973 head->port = rxq->port_id;
1975 /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1976 * set in the pkt_flags field.
1978 head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1979 pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1980 pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1981 pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1982 pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1983 head->ol_flags = pkt_flags;
1985 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1987 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1988 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1989 else if (pkt_flags & PKT_RX_FDIR) {
1990 head->hash.fdir.hash =
1991 rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1992 & IXGBE_ATR_HASH_MASK;
1993 head->hash.fdir.id =
1994 rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1999 * ixgbe_recv_pkts_lro - receive handler for and LRO case.
2001 * @rx_queue Rx queue handle
2002 * @rx_pkts table of received packets
2003 * @nb_pkts size of rx_pkts table
2004 * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
2006 * Handles the Rx HW ring completions when RSC feature is configured. Uses an
2007 * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
2009 * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
2010 * 1) When non-EOP RSC completion arrives:
2011 * a) Update the HEAD of the current RSC aggregation cluster with the new
2012 * segment's data length.
2013 * b) Set the "next" pointer of the current segment to point to the segment
2014 * at the NEXTP index.
2015 * c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2016 * in the sw_rsc_ring.
2017 * 2) When EOP arrives we just update the cluster's total length and offload
2018 * flags and deliver the cluster up to the upper layers. In our case - put it
2019 * in the rx_pkts table.
2021 * Returns the number of received packets/clusters (according to the "bulk
2022 * receive" interface).
2024 static inline uint16_t
2025 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2028 struct ixgbe_rx_queue *rxq = rx_queue;
2029 volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2030 struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2031 struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2032 uint16_t rx_id = rxq->rx_tail;
2034 uint16_t nb_hold = rxq->nb_rx_hold;
2035 uint16_t prev_id = rxq->rx_tail;
2037 while (nb_rx < nb_pkts) {
2039 struct ixgbe_rx_entry *rxe;
2040 struct ixgbe_scattered_rx_entry *sc_entry;
2041 struct ixgbe_scattered_rx_entry *next_sc_entry;
2042 struct ixgbe_rx_entry *next_rxe = NULL;
2043 struct rte_mbuf *first_seg;
2044 struct rte_mbuf *rxm;
2045 struct rte_mbuf *nmb;
2046 union ixgbe_adv_rx_desc rxd;
2049 volatile union ixgbe_adv_rx_desc *rxdp;
2054 * The code in this whole file uses the volatile pointer to
2055 * ensure the read ordering of the status and the rest of the
2056 * descriptor fields (on the compiler level only!!!). This is so
2057 * UGLY - why not to just use the compiler barrier instead? DPDK
2058 * even has the rte_compiler_barrier() for that.
2060 * But most importantly this is just wrong because this doesn't
2061 * ensure memory ordering in a general case at all. For
2062 * instance, DPDK is supposed to work on Power CPUs where
2063 * compiler barrier may just not be enough!
2065 * I tried to write only this function properly to have a
2066 * starting point (as a part of an LRO/RSC series) but the
2067 * compiler cursed at me when I tried to cast away the
2068 * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2069 * keeping it the way it is for now.
2071 * The code in this file is broken in so many other places and
2072 * will just not work on a big endian CPU anyway therefore the
2073 * lines below will have to be revisited together with the rest
2077 * - Get rid of "volatile" crap and let the compiler do its
2079 * - Use the proper memory barrier (rte_rmb()) to ensure the
2080 * memory ordering below.
2082 rxdp = &rx_ring[rx_id];
2083 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2085 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2090 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2091 "staterr=0x%x data_len=%u",
2092 rxq->port_id, rxq->queue_id, rx_id, staterr,
2093 rte_le_to_cpu_16(rxd.wb.upper.length));
2096 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2098 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2099 "port_id=%u queue_id=%u",
2100 rxq->port_id, rxq->queue_id);
2102 rte_eth_devices[rxq->port_id].data->
2103 rx_mbuf_alloc_failed++;
2106 } else if (nb_hold > rxq->rx_free_thresh) {
2107 uint16_t next_rdt = rxq->rx_free_trigger;
2109 if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2111 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2113 nb_hold -= rxq->rx_free_thresh;
2115 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2116 "port_id=%u queue_id=%u",
2117 rxq->port_id, rxq->queue_id);
2119 rte_eth_devices[rxq->port_id].data->
2120 rx_mbuf_alloc_failed++;
2126 rxe = &sw_ring[rx_id];
2127 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2129 next_id = rx_id + 1;
2130 if (next_id == rxq->nb_rx_desc)
2133 /* Prefetch next mbuf while processing current one. */
2134 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2137 * When next RX descriptor is on a cache-line boundary,
2138 * prefetch the next 4 RX descriptors and the next 4 pointers
2141 if ((next_id & 0x3) == 0) {
2142 rte_ixgbe_prefetch(&rx_ring[next_id]);
2143 rte_ixgbe_prefetch(&sw_ring[next_id]);
2150 rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2152 * Update RX descriptor with the physical address of the
2153 * new data buffer of the new allocated mbuf.
2157 rxm->data_off = RTE_PKTMBUF_HEADROOM;
2158 rxdp->read.hdr_addr = 0;
2159 rxdp->read.pkt_addr = dma;
2164 * Set data length & data buffer address of mbuf.
2166 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2167 rxm->data_len = data_len;
2172 * Get next descriptor index:
2173 * - For RSC it's in the NEXTP field.
2174 * - For a scattered packet - it's just a following
2177 if (ixgbe_rsc_count(&rxd))
2179 (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2180 IXGBE_RXDADV_NEXTP_SHIFT;
2184 next_sc_entry = &sw_sc_ring[nextp_id];
2185 next_rxe = &sw_ring[nextp_id];
2186 rte_ixgbe_prefetch(next_rxe);
2189 sc_entry = &sw_sc_ring[rx_id];
2190 first_seg = sc_entry->fbuf;
2191 sc_entry->fbuf = NULL;
2194 * If this is the first buffer of the received packet,
2195 * set the pointer to the first mbuf of the packet and
2196 * initialize its context.
2197 * Otherwise, update the total length and the number of segments
2198 * of the current scattered packet, and update the pointer to
2199 * the last mbuf of the current packet.
2201 if (first_seg == NULL) {
2203 first_seg->pkt_len = data_len;
2204 first_seg->nb_segs = 1;
2206 first_seg->pkt_len += data_len;
2207 first_seg->nb_segs++;
2214 * If this is not the last buffer of the received packet, update
2215 * the pointer to the first mbuf at the NEXTP entry in the
2216 * sw_sc_ring and continue to parse the RX ring.
2218 if (!eop && next_rxe) {
2219 rxm->next = next_rxe->mbuf;
2220 next_sc_entry->fbuf = first_seg;
2224 /* Initialize the first mbuf of the returned packet */
2225 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2228 * Deal with the case, when HW CRC srip is disabled.
2229 * That can't happen when LRO is enabled, but still could
2230 * happen for scattered RX mode.
2232 first_seg->pkt_len -= rxq->crc_len;
2233 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2234 struct rte_mbuf *lp;
2236 for (lp = first_seg; lp->next != rxm; lp = lp->next)
2239 first_seg->nb_segs--;
2240 lp->data_len -= rxq->crc_len - rxm->data_len;
2242 rte_pktmbuf_free_seg(rxm);
2244 rxm->data_len -= rxq->crc_len;
2246 /* Prefetch data of first segment, if configured to do so. */
2247 rte_packet_prefetch((char *)first_seg->buf_addr +
2248 first_seg->data_off);
2251 * Store the mbuf address into the next entry of the array
2252 * of returned packets.
2254 rx_pkts[nb_rx++] = first_seg;
2258 * Record index of the next RX descriptor to probe.
2260 rxq->rx_tail = rx_id;
2263 * If the number of free RX descriptors is greater than the RX free
2264 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2266 * Update the RDT with the value of the last processed RX descriptor
2267 * minus 1, to guarantee that the RDT register is never equal to the
2268 * RDH register, which creates a "full" ring situtation from the
2269 * hardware point of view...
2271 if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2272 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2273 "nb_hold=%u nb_rx=%u",
2274 rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2277 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2281 rxq->nb_rx_hold = nb_hold;
2286 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2289 return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2293 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2296 return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2299 /*********************************************************************
2301 * Queue management functions
2303 **********************************************************************/
2305 static void __attribute__((cold))
2306 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2310 if (txq->sw_ring != NULL) {
2311 for (i = 0; i < txq->nb_tx_desc; i++) {
2312 if (txq->sw_ring[i].mbuf != NULL) {
2313 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2314 txq->sw_ring[i].mbuf = NULL;
2320 static void __attribute__((cold))
2321 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2324 txq->sw_ring != NULL)
2325 rte_free(txq->sw_ring);
2328 static void __attribute__((cold))
2329 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2331 if (txq != NULL && txq->ops != NULL) {
2332 txq->ops->release_mbufs(txq);
2333 txq->ops->free_swring(txq);
2338 void __attribute__((cold))
2339 ixgbe_dev_tx_queue_release(void *txq)
2341 ixgbe_tx_queue_release(txq);
2344 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2345 static void __attribute__((cold))
2346 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2348 static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2349 struct ixgbe_tx_entry *txe = txq->sw_ring;
2352 /* Zero out HW ring memory */
2353 for (i = 0; i < txq->nb_tx_desc; i++) {
2354 txq->tx_ring[i] = zeroed_desc;
2357 /* Initialize SW ring entries */
2358 prev = (uint16_t) (txq->nb_tx_desc - 1);
2359 for (i = 0; i < txq->nb_tx_desc; i++) {
2360 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2362 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2365 txe[prev].next_id = i;
2369 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2370 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2373 txq->nb_tx_used = 0;
2375 * Always allow 1 descriptor to be un-allocated to avoid
2376 * a H/W race condition
2378 txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2379 txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2381 memset((void *)&txq->ctx_cache, 0,
2382 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2385 static const struct ixgbe_txq_ops def_txq_ops = {
2386 .release_mbufs = ixgbe_tx_queue_release_mbufs,
2387 .free_swring = ixgbe_tx_free_swring,
2388 .reset = ixgbe_reset_tx_queue,
2391 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2392 * the queue parameters. Used in tx_queue_setup by primary process and then
2393 * in dev_init by secondary process when attaching to an existing ethdev.
2395 void __attribute__((cold))
2396 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2398 /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2399 if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) &&
2400 (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) &&
2401 !(dev->data->dev_conf.txmode.offloads
2402 & DEV_TX_OFFLOAD_SECURITY)) {
2403 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2404 dev->tx_pkt_prepare = NULL;
2405 #ifdef RTE_IXGBE_INC_VECTOR
2406 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2407 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2408 ixgbe_txq_vec_setup(txq) == 0)) {
2409 PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2410 dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2413 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2415 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2417 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2418 (unsigned long)txq->txq_flags,
2419 (unsigned long)IXGBE_SIMPLE_FLAGS);
2421 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2422 (unsigned long)txq->tx_rs_thresh,
2423 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2424 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2425 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2429 int __attribute__((cold))
2430 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2433 unsigned int socket_id,
2434 const struct rte_eth_txconf *tx_conf)
2436 const struct rte_memzone *tz;
2437 struct ixgbe_tx_queue *txq;
2438 struct ixgbe_hw *hw;
2439 uint16_t tx_rs_thresh, tx_free_thresh;
2441 PMD_INIT_FUNC_TRACE();
2442 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2445 * Validate number of transmit descriptors.
2446 * It must not exceed hardware maximum, and must be multiple
2449 if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2450 (nb_desc > IXGBE_MAX_RING_DESC) ||
2451 (nb_desc < IXGBE_MIN_RING_DESC)) {
2456 * The following two parameters control the setting of the RS bit on
2457 * transmit descriptors.
2458 * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2459 * descriptors have been used.
2460 * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2461 * descriptors are used or if the number of descriptors required
2462 * to transmit a packet is greater than the number of free TX
2464 * The following constraints must be satisfied:
2465 * tx_rs_thresh must be greater than 0.
2466 * tx_rs_thresh must be less than the size of the ring minus 2.
2467 * tx_rs_thresh must be less than or equal to tx_free_thresh.
2468 * tx_rs_thresh must be a divisor of the ring size.
2469 * tx_free_thresh must be greater than 0.
2470 * tx_free_thresh must be less than the size of the ring minus 3.
2471 * One descriptor in the TX ring is used as a sentinel to avoid a
2472 * H/W race condition, hence the maximum threshold constraints.
2473 * When set to zero use default values.
2475 tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2476 tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2477 tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2478 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2479 if (tx_rs_thresh >= (nb_desc - 2)) {
2480 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2481 "of TX descriptors minus 2. (tx_rs_thresh=%u "
2482 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2483 (int)dev->data->port_id, (int)queue_idx);
2486 if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2487 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2488 "(tx_rs_thresh=%u port=%d queue=%d)",
2489 DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2490 (int)dev->data->port_id, (int)queue_idx);
2493 if (tx_free_thresh >= (nb_desc - 3)) {
2494 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2495 "tx_free_thresh must be less than the number of "
2496 "TX descriptors minus 3. (tx_free_thresh=%u "
2497 "port=%d queue=%d)",
2498 (unsigned int)tx_free_thresh,
2499 (int)dev->data->port_id, (int)queue_idx);
2502 if (tx_rs_thresh > tx_free_thresh) {
2503 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2504 "tx_free_thresh. (tx_free_thresh=%u "
2505 "tx_rs_thresh=%u port=%d queue=%d)",
2506 (unsigned int)tx_free_thresh,
2507 (unsigned int)tx_rs_thresh,
2508 (int)dev->data->port_id,
2512 if ((nb_desc % tx_rs_thresh) != 0) {
2513 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2514 "number of TX descriptors. (tx_rs_thresh=%u "
2515 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2516 (int)dev->data->port_id, (int)queue_idx);
2521 * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2522 * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2523 * by the NIC and all descriptors are written back after the NIC
2524 * accumulates WTHRESH descriptors.
2526 if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2527 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2528 "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2529 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2530 (int)dev->data->port_id, (int)queue_idx);
2534 /* Free memory prior to re-allocation if needed... */
2535 if (dev->data->tx_queues[queue_idx] != NULL) {
2536 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2537 dev->data->tx_queues[queue_idx] = NULL;
2540 /* First allocate the tx queue data structure */
2541 txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2542 RTE_CACHE_LINE_SIZE, socket_id);
2547 * Allocate TX ring hardware descriptors. A memzone large enough to
2548 * handle the maximum ring size is allocated in order to allow for
2549 * resizing in later calls to the queue setup function.
2551 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2552 sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2553 IXGBE_ALIGN, socket_id);
2555 ixgbe_tx_queue_release(txq);
2559 txq->nb_tx_desc = nb_desc;
2560 txq->tx_rs_thresh = tx_rs_thresh;
2561 txq->tx_free_thresh = tx_free_thresh;
2562 txq->pthresh = tx_conf->tx_thresh.pthresh;
2563 txq->hthresh = tx_conf->tx_thresh.hthresh;
2564 txq->wthresh = tx_conf->tx_thresh.wthresh;
2565 txq->queue_id = queue_idx;
2566 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2567 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2568 txq->port_id = dev->data->port_id;
2569 txq->txq_flags = tx_conf->txq_flags;
2570 txq->ops = &def_txq_ops;
2571 txq->tx_deferred_start = tx_conf->tx_deferred_start;
2572 txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2573 DEV_TX_OFFLOAD_SECURITY);
2576 * Modification to set VFTDT for virtual function if vf is detected
2578 if (hw->mac.type == ixgbe_mac_82599_vf ||
2579 hw->mac.type == ixgbe_mac_X540_vf ||
2580 hw->mac.type == ixgbe_mac_X550_vf ||
2581 hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2582 hw->mac.type == ixgbe_mac_X550EM_a_vf)
2583 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2585 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2587 txq->tx_ring_phys_addr = tz->phys_addr;
2588 txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2590 /* Allocate software ring */
2591 txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2592 sizeof(struct ixgbe_tx_entry) * nb_desc,
2593 RTE_CACHE_LINE_SIZE, socket_id);
2594 if (txq->sw_ring == NULL) {
2595 ixgbe_tx_queue_release(txq);
2598 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2599 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2601 /* set up vector or scalar TX function as appropriate */
2602 ixgbe_set_tx_function(dev, txq);
2604 txq->ops->reset(txq);
2606 dev->data->tx_queues[queue_idx] = txq;
2613 * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2615 * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2616 * in the sw_rsc_ring is not set to NULL but rather points to the next
2617 * mbuf of this RSC aggregation (that has not been completed yet and still
2618 * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2619 * will just free first "nb_segs" segments of the cluster explicitly by calling
2620 * an rte_pktmbuf_free_seg().
2622 * @m scattered cluster head
2624 static void __attribute__((cold))
2625 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2627 uint8_t i, nb_segs = m->nb_segs;
2628 struct rte_mbuf *next_seg;
2630 for (i = 0; i < nb_segs; i++) {
2632 rte_pktmbuf_free_seg(m);
2637 static void __attribute__((cold))
2638 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2642 #ifdef RTE_IXGBE_INC_VECTOR
2643 /* SSE Vector driver has a different way of releasing mbufs. */
2644 if (rxq->rx_using_sse) {
2645 ixgbe_rx_queue_release_mbufs_vec(rxq);
2650 if (rxq->sw_ring != NULL) {
2651 for (i = 0; i < rxq->nb_rx_desc; i++) {
2652 if (rxq->sw_ring[i].mbuf != NULL) {
2653 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2654 rxq->sw_ring[i].mbuf = NULL;
2657 if (rxq->rx_nb_avail) {
2658 for (i = 0; i < rxq->rx_nb_avail; ++i) {
2659 struct rte_mbuf *mb;
2661 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2662 rte_pktmbuf_free_seg(mb);
2664 rxq->rx_nb_avail = 0;
2668 if (rxq->sw_sc_ring)
2669 for (i = 0; i < rxq->nb_rx_desc; i++)
2670 if (rxq->sw_sc_ring[i].fbuf) {
2671 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2672 rxq->sw_sc_ring[i].fbuf = NULL;
2676 static void __attribute__((cold))
2677 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2680 ixgbe_rx_queue_release_mbufs(rxq);
2681 rte_free(rxq->sw_ring);
2682 rte_free(rxq->sw_sc_ring);
2687 void __attribute__((cold))
2688 ixgbe_dev_rx_queue_release(void *rxq)
2690 ixgbe_rx_queue_release(rxq);
2694 * Check if Rx Burst Bulk Alloc function can be used.
2696 * 0: the preconditions are satisfied and the bulk allocation function
2698 * -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2699 * function must be used.
2701 static inline int __attribute__((cold))
2702 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2707 * Make sure the following pre-conditions are satisfied:
2708 * rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2709 * rxq->rx_free_thresh < rxq->nb_rx_desc
2710 * (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2711 * Scattered packets are not supported. This should be checked
2712 * outside of this function.
2714 if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2715 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2716 "rxq->rx_free_thresh=%d, "
2717 "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2718 rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2720 } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2721 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2722 "rxq->rx_free_thresh=%d, "
2723 "rxq->nb_rx_desc=%d",
2724 rxq->rx_free_thresh, rxq->nb_rx_desc);
2726 } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2727 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2728 "rxq->nb_rx_desc=%d, "
2729 "rxq->rx_free_thresh=%d",
2730 rxq->nb_rx_desc, rxq->rx_free_thresh);
2737 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2738 static void __attribute__((cold))
2739 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2741 static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2743 uint16_t len = rxq->nb_rx_desc;
2746 * By default, the Rx queue setup function allocates enough memory for
2747 * IXGBE_MAX_RING_DESC. The Rx Burst bulk allocation function requires
2748 * extra memory at the end of the descriptor ring to be zero'd out.
2750 if (adapter->rx_bulk_alloc_allowed)
2751 /* zero out extra memory */
2752 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2755 * Zero out HW ring memory. Zero out extra memory at the end of
2756 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2757 * reads extra memory as zeros.
2759 for (i = 0; i < len; i++) {
2760 rxq->rx_ring[i] = zeroed_desc;
2764 * initialize extra software ring entries. Space for these extra
2765 * entries is always allocated
2767 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2768 for (i = rxq->nb_rx_desc; i < len; ++i) {
2769 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2772 rxq->rx_nb_avail = 0;
2773 rxq->rx_next_avail = 0;
2774 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2776 rxq->nb_rx_hold = 0;
2777 rxq->pkt_first_seg = NULL;
2778 rxq->pkt_last_seg = NULL;
2780 #ifdef RTE_IXGBE_INC_VECTOR
2781 rxq->rxrearm_start = 0;
2782 rxq->rxrearm_nb = 0;
2786 int __attribute__((cold))
2787 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2790 unsigned int socket_id,
2791 const struct rte_eth_rxconf *rx_conf,
2792 struct rte_mempool *mp)
2794 const struct rte_memzone *rz;
2795 struct ixgbe_rx_queue *rxq;
2796 struct ixgbe_hw *hw;
2798 struct ixgbe_adapter *adapter =
2799 (struct ixgbe_adapter *)dev->data->dev_private;
2801 PMD_INIT_FUNC_TRACE();
2802 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2805 * Validate number of receive descriptors.
2806 * It must not exceed hardware maximum, and must be multiple
2809 if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2810 (nb_desc > IXGBE_MAX_RING_DESC) ||
2811 (nb_desc < IXGBE_MIN_RING_DESC)) {
2815 /* Free memory prior to re-allocation if needed... */
2816 if (dev->data->rx_queues[queue_idx] != NULL) {
2817 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2818 dev->data->rx_queues[queue_idx] = NULL;
2821 /* First allocate the rx queue data structure */
2822 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2823 RTE_CACHE_LINE_SIZE, socket_id);
2827 rxq->nb_rx_desc = nb_desc;
2828 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2829 rxq->queue_id = queue_idx;
2830 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2831 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2832 rxq->port_id = dev->data->port_id;
2833 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2835 rxq->drop_en = rx_conf->rx_drop_en;
2836 rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2839 * The packet type in RX descriptor is different for different NICs.
2840 * Some bits are used for x550 but reserved for other NICS.
2841 * So set different masks for different NICs.
2843 if (hw->mac.type == ixgbe_mac_X550 ||
2844 hw->mac.type == ixgbe_mac_X550EM_x ||
2845 hw->mac.type == ixgbe_mac_X550EM_a ||
2846 hw->mac.type == ixgbe_mac_X550_vf ||
2847 hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2848 hw->mac.type == ixgbe_mac_X550EM_a_vf)
2849 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2851 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2854 * Allocate RX ring hardware descriptors. A memzone large enough to
2855 * handle the maximum ring size is allocated in order to allow for
2856 * resizing in later calls to the queue setup function.
2858 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2859 RX_RING_SZ, IXGBE_ALIGN, socket_id);
2861 ixgbe_rx_queue_release(rxq);
2866 * Zero init all the descriptors in the ring.
2868 memset(rz->addr, 0, RX_RING_SZ);
2871 * Modified to setup VFRDT for Virtual Function
2873 if (hw->mac.type == ixgbe_mac_82599_vf ||
2874 hw->mac.type == ixgbe_mac_X540_vf ||
2875 hw->mac.type == ixgbe_mac_X550_vf ||
2876 hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2877 hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2879 IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2881 IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2884 IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2886 IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2889 rxq->rx_ring_phys_addr = rz->phys_addr;
2890 rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2893 * Certain constraints must be met in order to use the bulk buffer
2894 * allocation Rx burst function. If any of Rx queues doesn't meet them
2895 * the feature should be disabled for the whole port.
2897 if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2898 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2899 "preconditions - canceling the feature for "
2900 "the whole port[%d]",
2901 rxq->queue_id, rxq->port_id);
2902 adapter->rx_bulk_alloc_allowed = false;
2906 * Allocate software ring. Allow for space at the end of the
2907 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2908 * function does not access an invalid memory region.
2911 if (adapter->rx_bulk_alloc_allowed)
2912 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2914 rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2915 sizeof(struct ixgbe_rx_entry) * len,
2916 RTE_CACHE_LINE_SIZE, socket_id);
2917 if (!rxq->sw_ring) {
2918 ixgbe_rx_queue_release(rxq);
2923 * Always allocate even if it's not going to be needed in order to
2924 * simplify the code.
2926 * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2927 * be requested in ixgbe_dev_rx_init(), which is called later from
2931 rte_zmalloc_socket("rxq->sw_sc_ring",
2932 sizeof(struct ixgbe_scattered_rx_entry) * len,
2933 RTE_CACHE_LINE_SIZE, socket_id);
2934 if (!rxq->sw_sc_ring) {
2935 ixgbe_rx_queue_release(rxq);
2939 PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2940 "dma_addr=0x%"PRIx64,
2941 rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2942 rxq->rx_ring_phys_addr);
2944 if (!rte_is_power_of_2(nb_desc)) {
2945 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2946 "preconditions - canceling the feature for "
2947 "the whole port[%d]",
2948 rxq->queue_id, rxq->port_id);
2949 adapter->rx_vec_allowed = false;
2951 ixgbe_rxq_vec_setup(rxq);
2953 dev->data->rx_queues[queue_idx] = rxq;
2955 ixgbe_reset_rx_queue(adapter, rxq);
2961 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2963 #define IXGBE_RXQ_SCAN_INTERVAL 4
2964 volatile union ixgbe_adv_rx_desc *rxdp;
2965 struct ixgbe_rx_queue *rxq;
2968 rxq = dev->data->rx_queues[rx_queue_id];
2969 rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2971 while ((desc < rxq->nb_rx_desc) &&
2972 (rxdp->wb.upper.status_error &
2973 rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2974 desc += IXGBE_RXQ_SCAN_INTERVAL;
2975 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2976 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2977 rxdp = &(rxq->rx_ring[rxq->rx_tail +
2978 desc - rxq->nb_rx_desc]);
2985 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2987 volatile union ixgbe_adv_rx_desc *rxdp;
2988 struct ixgbe_rx_queue *rxq = rx_queue;
2991 if (unlikely(offset >= rxq->nb_rx_desc))
2993 desc = rxq->rx_tail + offset;
2994 if (desc >= rxq->nb_rx_desc)
2995 desc -= rxq->nb_rx_desc;
2997 rxdp = &rxq->rx_ring[desc];
2998 return !!(rxdp->wb.upper.status_error &
2999 rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3003 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3005 struct ixgbe_rx_queue *rxq = rx_queue;
3006 volatile uint32_t *status;
3007 uint32_t nb_hold, desc;
3009 if (unlikely(offset >= rxq->nb_rx_desc))
3012 #ifdef RTE_IXGBE_INC_VECTOR
3013 if (rxq->rx_using_sse)
3014 nb_hold = rxq->rxrearm_nb;
3017 nb_hold = rxq->nb_rx_hold;
3018 if (offset >= rxq->nb_rx_desc - nb_hold)
3019 return RTE_ETH_RX_DESC_UNAVAIL;
3021 desc = rxq->rx_tail + offset;
3022 if (desc >= rxq->nb_rx_desc)
3023 desc -= rxq->nb_rx_desc;
3025 status = &rxq->rx_ring[desc].wb.upper.status_error;
3026 if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3027 return RTE_ETH_RX_DESC_DONE;
3029 return RTE_ETH_RX_DESC_AVAIL;
3033 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3035 struct ixgbe_tx_queue *txq = tx_queue;
3036 volatile uint32_t *status;
3039 if (unlikely(offset >= txq->nb_tx_desc))
3042 desc = txq->tx_tail + offset;
3043 /* go to next desc that has the RS bit */
3044 desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3046 if (desc >= txq->nb_tx_desc) {
3047 desc -= txq->nb_tx_desc;
3048 if (desc >= txq->nb_tx_desc)
3049 desc -= txq->nb_tx_desc;
3052 status = &txq->tx_ring[desc].wb.status;
3053 if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3054 return RTE_ETH_TX_DESC_DONE;
3056 return RTE_ETH_TX_DESC_FULL;
3059 void __attribute__((cold))
3060 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3063 struct ixgbe_adapter *adapter =
3064 (struct ixgbe_adapter *)dev->data->dev_private;
3066 PMD_INIT_FUNC_TRACE();
3068 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3069 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3072 txq->ops->release_mbufs(txq);
3073 txq->ops->reset(txq);
3077 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3078 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3081 ixgbe_rx_queue_release_mbufs(rxq);
3082 ixgbe_reset_rx_queue(adapter, rxq);
3088 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3092 PMD_INIT_FUNC_TRACE();
3094 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3095 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3096 dev->data->rx_queues[i] = NULL;
3098 dev->data->nb_rx_queues = 0;
3100 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3101 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3102 dev->data->tx_queues[i] = NULL;
3104 dev->data->nb_tx_queues = 0;
3107 /*********************************************************************
3109 * Device RX/TX init functions
3111 **********************************************************************/
3114 * Receive Side Scaling (RSS)
3115 * See section 7.1.2.8 in the following document:
3116 * "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3119 * The source and destination IP addresses of the IP header and the source
3120 * and destination ports of TCP/UDP headers, if any, of received packets are
3121 * hashed against a configurable random key to compute a 32-bit RSS hash result.
3122 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3123 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
3124 * RSS output index which is used as the RX queue index where to store the
3126 * The following output is supplied in the RX write-back descriptor:
3127 * - 32-bit result of the Microsoft RSS hash function,
3128 * - 4-bit RSS type field.
3132 * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3133 * Used as the default key.
3135 static uint8_t rss_intel_key[40] = {
3136 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3137 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3138 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3139 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3140 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3144 ixgbe_rss_disable(struct rte_eth_dev *dev)
3146 struct ixgbe_hw *hw;
3150 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3151 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3152 mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3153 mrqc &= ~IXGBE_MRQC_RSSEN;
3154 IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3158 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3168 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3169 rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3171 hash_key = rss_conf->rss_key;
3172 if (hash_key != NULL) {
3173 /* Fill in RSS hash key */
3174 for (i = 0; i < 10; i++) {
3175 rss_key = hash_key[(i * 4)];
3176 rss_key |= hash_key[(i * 4) + 1] << 8;
3177 rss_key |= hash_key[(i * 4) + 2] << 16;
3178 rss_key |= hash_key[(i * 4) + 3] << 24;
3179 IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3183 /* Set configured hashing protocols in MRQC register */
3184 rss_hf = rss_conf->rss_hf;
3185 mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3186 if (rss_hf & ETH_RSS_IPV4)
3187 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3188 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3189 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3190 if (rss_hf & ETH_RSS_IPV6)
3191 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3192 if (rss_hf & ETH_RSS_IPV6_EX)
3193 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3194 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3195 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3196 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3197 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3198 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3199 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3200 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3201 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3202 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3203 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3204 IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3208 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3209 struct rte_eth_rss_conf *rss_conf)
3211 struct ixgbe_hw *hw;
3216 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3218 if (!ixgbe_rss_update_sp(hw->mac.type)) {
3219 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3223 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3226 * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3227 * "RSS enabling cannot be done dynamically while it must be
3228 * preceded by a software reset"
3229 * Before changing anything, first check that the update RSS operation
3230 * does not attempt to disable RSS, if RSS was enabled at
3231 * initialization time, or does not attempt to enable RSS, if RSS was
3232 * disabled at initialization time.
3234 rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3235 mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3236 if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3237 if (rss_hf != 0) /* Enable RSS */
3239 return 0; /* Nothing to do */
3242 if (rss_hf == 0) /* Disable RSS */
3244 ixgbe_hw_rss_hash_set(hw, rss_conf);
3249 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3250 struct rte_eth_rss_conf *rss_conf)
3252 struct ixgbe_hw *hw;
3261 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3262 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3263 rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3264 hash_key = rss_conf->rss_key;
3265 if (hash_key != NULL) {
3266 /* Return RSS hash key */
3267 for (i = 0; i < 10; i++) {
3268 rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3269 hash_key[(i * 4)] = rss_key & 0x000000FF;
3270 hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3271 hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3272 hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3276 /* Get RSS functions configured in MRQC register */
3277 mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3278 if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3279 rss_conf->rss_hf = 0;
3283 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3284 rss_hf |= ETH_RSS_IPV4;
3285 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3286 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3287 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3288 rss_hf |= ETH_RSS_IPV6;
3289 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3290 rss_hf |= ETH_RSS_IPV6_EX;
3291 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3292 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3293 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3294 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3295 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3296 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3297 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3298 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3299 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3300 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3301 rss_conf->rss_hf = rss_hf;
3306 ixgbe_rss_configure(struct rte_eth_dev *dev)
3308 struct rte_eth_rss_conf rss_conf;
3309 struct ixgbe_hw *hw;
3313 uint16_t sp_reta_size;
3316 PMD_INIT_FUNC_TRACE();
3317 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3319 sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3322 * Fill in redirection table
3323 * The byte-swap is needed because NIC registers are in
3324 * little-endian order.
3327 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3328 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3330 if (j == dev->data->nb_rx_queues)
3332 reta = (reta << 8) | j;
3334 IXGBE_WRITE_REG(hw, reta_reg,
3339 * Configure the RSS key and the RSS protocols used to compute
3340 * the RSS hash of input packets.
3342 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3343 if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3344 ixgbe_rss_disable(dev);
3347 if (rss_conf.rss_key == NULL)
3348 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3349 ixgbe_hw_rss_hash_set(hw, &rss_conf);
3352 #define NUM_VFTA_REGISTERS 128
3353 #define NIC_RX_BUFFER_SIZE 0x200
3354 #define X550_RX_BUFFER_SIZE 0x180
3357 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3359 struct rte_eth_vmdq_dcb_conf *cfg;
3360 struct ixgbe_hw *hw;
3361 enum rte_eth_nb_pools num_pools;
3362 uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3364 uint8_t nb_tcs; /* number of traffic classes */
3367 PMD_INIT_FUNC_TRACE();
3368 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3369 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3370 num_pools = cfg->nb_queue_pools;
3371 /* Check we have a valid number of pools */
3372 if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3373 ixgbe_rss_disable(dev);
3376 /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3377 nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3381 * split rx buffer up into sections, each for 1 traffic class
3383 switch (hw->mac.type) {
3384 case ixgbe_mac_X550:
3385 case ixgbe_mac_X550EM_x:
3386 case ixgbe_mac_X550EM_a:
3387 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3390 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3393 for (i = 0; i < nb_tcs; i++) {
3394 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3396 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3397 /* clear 10 bits. */
3398 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3399 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3401 /* zero alloc all unused TCs */
3402 for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3403 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3405 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3406 /* clear 10 bits. */
3407 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3410 /* MRQC: enable vmdq and dcb */
3411 mrqc = (num_pools == ETH_16_POOLS) ?
3412 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3413 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3415 /* PFVTCTL: turn on virtualisation and set the default pool */
3416 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3417 if (cfg->enable_default_pool) {
3418 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3420 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3423 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3425 /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3427 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3429 * mapping is done with 3 bits per priority,
3430 * so shift by i*3 each time
3432 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3434 IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3436 /* RTRPCS: DCB related */
3437 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3439 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3440 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3441 vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3442 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3444 /* VFTA - enable all vlan filters */
3445 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3446 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3449 /* VFRE: pool enabling for receive - 16 or 32 */
3450 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3451 num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3454 * MPSAR - allow pools to read specific mac addresses
3455 * In this case, all pools should be able to read from mac addr 0
3457 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3458 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3460 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3461 for (i = 0; i < cfg->nb_pool_maps; i++) {
3462 /* set vlan id in VF register and set the valid bit */
3463 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3464 (cfg->pool_map[i].vlan_id & 0xFFF)));
3466 * Put the allowed pools in VFB reg. As we only have 16 or 32
3467 * pools, we only need to use the first half of the register
3470 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3475 * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3476 * @dev: pointer to eth_dev structure
3477 * @dcb_config: pointer to ixgbe_dcb_config structure
3480 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3481 struct ixgbe_dcb_config *dcb_config)
3484 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3486 PMD_INIT_FUNC_TRACE();
3487 if (hw->mac.type != ixgbe_mac_82598EB) {
3488 /* Disable the Tx desc arbiter so that MTQC can be changed */
3489 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3490 reg |= IXGBE_RTTDCS_ARBDIS;
3491 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3493 /* Enable DCB for Tx with 8 TCs */
3494 if (dcb_config->num_tcs.pg_tcs == 8) {
3495 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3497 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3499 if (dcb_config->vt_mode)
3500 reg |= IXGBE_MTQC_VT_ENA;
3501 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3503 /* Enable the Tx desc arbiter */
3504 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3505 reg &= ~IXGBE_RTTDCS_ARBDIS;
3506 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3508 /* Enable Security TX Buffer IFG for DCB */
3509 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3510 reg |= IXGBE_SECTX_DCB;
3511 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3516 * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3517 * @dev: pointer to rte_eth_dev structure
3518 * @dcb_config: pointer to ixgbe_dcb_config structure
3521 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3522 struct ixgbe_dcb_config *dcb_config)
3524 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3525 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3526 struct ixgbe_hw *hw =
3527 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3529 PMD_INIT_FUNC_TRACE();
3530 if (hw->mac.type != ixgbe_mac_82598EB)
3531 /*PF VF Transmit Enable*/
3532 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3533 vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3535 /*Configure general DCB TX parameters*/
3536 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3540 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3541 struct ixgbe_dcb_config *dcb_config)
3543 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3544 &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3545 struct ixgbe_dcb_tc_config *tc;
3548 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3549 if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3550 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3551 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3553 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3554 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3557 /* Initialize User Priority to Traffic Class mapping */
3558 for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3559 tc = &dcb_config->tc_config[j];
3560 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3563 /* User Priority to Traffic Class mapping */
3564 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3565 j = vmdq_rx_conf->dcb_tc[i];
3566 tc = &dcb_config->tc_config[j];
3567 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3573 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3574 struct ixgbe_dcb_config *dcb_config)
3576 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3577 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3578 struct ixgbe_dcb_tc_config *tc;
3581 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3582 if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3583 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3584 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3586 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3587 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3590 /* Initialize User Priority to Traffic Class mapping */
3591 for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3592 tc = &dcb_config->tc_config[j];
3593 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3596 /* User Priority to Traffic Class mapping */
3597 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3598 j = vmdq_tx_conf->dcb_tc[i];
3599 tc = &dcb_config->tc_config[j];
3600 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3606 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3607 struct ixgbe_dcb_config *dcb_config)
3609 struct rte_eth_dcb_rx_conf *rx_conf =
3610 &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3611 struct ixgbe_dcb_tc_config *tc;
3614 dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3615 dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3617 /* Initialize User Priority to Traffic Class mapping */
3618 for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3619 tc = &dcb_config->tc_config[j];
3620 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3623 /* User Priority to Traffic Class mapping */
3624 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3625 j = rx_conf->dcb_tc[i];
3626 tc = &dcb_config->tc_config[j];
3627 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3633 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3634 struct ixgbe_dcb_config *dcb_config)
3636 struct rte_eth_dcb_tx_conf *tx_conf =
3637 &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3638 struct ixgbe_dcb_tc_config *tc;
3641 dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3642 dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3644 /* Initialize User Priority to Traffic Class mapping */
3645 for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3646 tc = &dcb_config->tc_config[j];
3647 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3650 /* User Priority to Traffic Class mapping */
3651 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3652 j = tx_conf->dcb_tc[i];
3653 tc = &dcb_config->tc_config[j];
3654 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3660 * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3661 * @dev: pointer to eth_dev structure
3662 * @dcb_config: pointer to ixgbe_dcb_config structure
3665 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3666 struct ixgbe_dcb_config *dcb_config)
3672 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3674 PMD_INIT_FUNC_TRACE();
3676 * Disable the arbiter before changing parameters
3677 * (always enable recycle mode; WSP)
3679 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3680 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3682 if (hw->mac.type != ixgbe_mac_82598EB) {
3683 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3684 if (dcb_config->num_tcs.pg_tcs == 4) {
3685 if (dcb_config->vt_mode)
3686 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3687 IXGBE_MRQC_VMDQRT4TCEN;
3689 /* no matter the mode is DCB or DCB_RSS, just
3690 * set the MRQE to RSSXTCEN. RSS is controlled
3693 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3694 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3695 IXGBE_MRQC_RTRSS4TCEN;
3698 if (dcb_config->num_tcs.pg_tcs == 8) {
3699 if (dcb_config->vt_mode)
3700 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3701 IXGBE_MRQC_VMDQRT8TCEN;
3703 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3704 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3705 IXGBE_MRQC_RTRSS8TCEN;
3709 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3711 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3712 /* Disable drop for all queues in VMDQ mode*/
3713 for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3714 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3716 (q << IXGBE_QDE_IDX_SHIFT)));
3718 /* Enable drop for all queues in SRIOV mode */
3719 for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3720 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3722 (q << IXGBE_QDE_IDX_SHIFT) |
3727 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3728 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3729 vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3730 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3732 /* VFTA - enable all vlan filters */
3733 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3734 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3738 * Configure Rx packet plane (recycle mode; WSP) and
3741 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3742 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3746 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3747 uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3749 switch (hw->mac.type) {
3750 case ixgbe_mac_82598EB:
3751 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3753 case ixgbe_mac_82599EB:
3754 case ixgbe_mac_X540:
3755 case ixgbe_mac_X550:
3756 case ixgbe_mac_X550EM_x:
3757 case ixgbe_mac_X550EM_a:
3758 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3767 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3768 uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3770 switch (hw->mac.type) {
3771 case ixgbe_mac_82598EB:
3772 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3773 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3775 case ixgbe_mac_82599EB:
3776 case ixgbe_mac_X540:
3777 case ixgbe_mac_X550:
3778 case ixgbe_mac_X550EM_x:
3779 case ixgbe_mac_X550EM_a:
3780 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3781 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3788 #define DCB_RX_CONFIG 1
3789 #define DCB_TX_CONFIG 1
3790 #define DCB_TX_PB 1024
3792 * ixgbe_dcb_hw_configure - Enable DCB and configure
3793 * general DCB in VT mode and non-VT mode parameters
3794 * @dev: pointer to rte_eth_dev structure
3795 * @dcb_config: pointer to ixgbe_dcb_config structure
3798 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3799 struct ixgbe_dcb_config *dcb_config)
3802 uint8_t i, pfc_en, nb_tcs;
3803 uint16_t pbsize, rx_buffer_size;
3804 uint8_t config_dcb_rx = 0;
3805 uint8_t config_dcb_tx = 0;
3806 uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3807 uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3808 uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3809 uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3810 uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3811 struct ixgbe_dcb_tc_config *tc;
3812 uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3813 struct ixgbe_hw *hw =
3814 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3815 struct ixgbe_bw_conf *bw_conf =
3816 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
3818 switch (dev->data->dev_conf.rxmode.mq_mode) {
3819 case ETH_MQ_RX_VMDQ_DCB:
3820 dcb_config->vt_mode = true;
3821 if (hw->mac.type != ixgbe_mac_82598EB) {
3822 config_dcb_rx = DCB_RX_CONFIG;
3824 *get dcb and VT rx configuration parameters
3827 ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3828 /*Configure general VMDQ and DCB RX parameters*/
3829 ixgbe_vmdq_dcb_configure(dev);
3833 case ETH_MQ_RX_DCB_RSS:
3834 dcb_config->vt_mode = false;
3835 config_dcb_rx = DCB_RX_CONFIG;
3836 /* Get dcb TX configuration parameters from rte_eth_conf */
3837 ixgbe_dcb_rx_config(dev, dcb_config);
3838 /*Configure general DCB RX parameters*/
3839 ixgbe_dcb_rx_hw_config(dev, dcb_config);
3842 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3845 switch (dev->data->dev_conf.txmode.mq_mode) {
3846 case ETH_MQ_TX_VMDQ_DCB:
3847 dcb_config->vt_mode = true;
3848 config_dcb_tx = DCB_TX_CONFIG;
3849 /* get DCB and VT TX configuration parameters
3852 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3853 /*Configure general VMDQ and DCB TX parameters*/
3854 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3858 dcb_config->vt_mode = false;
3859 config_dcb_tx = DCB_TX_CONFIG;
3860 /*get DCB TX configuration parameters from rte_eth_conf*/
3861 ixgbe_dcb_tx_config(dev, dcb_config);
3862 /*Configure general DCB TX parameters*/
3863 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3866 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3870 nb_tcs = dcb_config->num_tcs.pfc_tcs;
3872 ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3873 if (nb_tcs == ETH_4_TCS) {
3874 /* Avoid un-configured priority mapping to TC0 */
3876 uint8_t mask = 0xFF;
3878 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3879 mask = (uint8_t)(mask & (~(1 << map[i])));
3880 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3881 if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3885 /* Re-configure 4 TCs BW */
3886 for (i = 0; i < nb_tcs; i++) {
3887 tc = &dcb_config->tc_config[i];
3888 if (bw_conf->tc_num != nb_tcs)
3889 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3890 (uint8_t)(100 / nb_tcs);
3891 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3892 (uint8_t)(100 / nb_tcs);
3894 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3895 tc = &dcb_config->tc_config[i];
3896 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3897 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3900 /* Re-configure 8 TCs BW */
3901 for (i = 0; i < nb_tcs; i++) {
3902 tc = &dcb_config->tc_config[i];
3903 if (bw_conf->tc_num != nb_tcs)
3904 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3905 (uint8_t)(100 / nb_tcs + (i & 1));
3906 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3907 (uint8_t)(100 / nb_tcs + (i & 1));
3911 switch (hw->mac.type) {
3912 case ixgbe_mac_X550:
3913 case ixgbe_mac_X550EM_x:
3914 case ixgbe_mac_X550EM_a:
3915 rx_buffer_size = X550_RX_BUFFER_SIZE;
3918 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3922 if (config_dcb_rx) {
3923 /* Set RX buffer size */
3924 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3925 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3927 for (i = 0; i < nb_tcs; i++) {
3928 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3930 /* zero alloc all unused TCs */
3931 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3932 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3935 if (config_dcb_tx) {
3936 /* Only support an equally distributed
3937 * Tx packet buffer strategy.
3939 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3940 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3942 for (i = 0; i < nb_tcs; i++) {
3943 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3944 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3946 /* Clear unused TCs, if any, to zero buffer size*/
3947 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3948 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3949 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3953 /*Calculates traffic class credits*/
3954 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3955 IXGBE_DCB_TX_CONFIG);
3956 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3957 IXGBE_DCB_RX_CONFIG);
3959 if (config_dcb_rx) {
3960 /* Unpack CEE standard containers */
3961 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3962 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3963 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3964 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3965 /* Configure PG(ETS) RX */
3966 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3969 if (config_dcb_tx) {
3970 /* Unpack CEE standard containers */
3971 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3972 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3973 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3974 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3975 /* Configure PG(ETS) TX */
3976 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3979 /*Configure queue statistics registers*/
3980 ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3982 /* Check if the PFC is supported */
3983 if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3984 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3985 for (i = 0; i < nb_tcs; i++) {
3987 * If the TC count is 8,and the default high_water is 48,
3988 * the low_water is 16 as default.
3990 hw->fc.high_water[i] = (pbsize * 3) / 4;
3991 hw->fc.low_water[i] = pbsize / 4;
3992 /* Enable pfc for this TC */
3993 tc = &dcb_config->tc_config[i];
3994 tc->pfc = ixgbe_dcb_pfc_enabled;
3996 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3997 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3999 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4006 * ixgbe_configure_dcb - Configure DCB Hardware
4007 * @dev: pointer to rte_eth_dev
4009 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4011 struct ixgbe_dcb_config *dcb_cfg =
4012 IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4013 struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4015 PMD_INIT_FUNC_TRACE();
4017 /* check support mq_mode for DCB */
4018 if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4019 (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4020 (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4023 if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4026 /** Configure DCB hardware **/
4027 ixgbe_dcb_hw_configure(dev, dcb_cfg);
4031 * VMDq only support for 10 GbE NIC.
4034 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4036 struct rte_eth_vmdq_rx_conf *cfg;
4037 struct ixgbe_hw *hw;
4038 enum rte_eth_nb_pools num_pools;
4039 uint32_t mrqc, vt_ctl, vlanctrl;
4043 PMD_INIT_FUNC_TRACE();
4044 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4045 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4046 num_pools = cfg->nb_queue_pools;
4048 ixgbe_rss_disable(dev);
4050 /* MRQC: enable vmdq */
4051 mrqc = IXGBE_MRQC_VMDQEN;
4052 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4054 /* PFVTCTL: turn on virtualisation and set the default pool */
4055 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4056 if (cfg->enable_default_pool)
4057 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4059 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4061 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4063 for (i = 0; i < (int)num_pools; i++) {
4064 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4065 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4068 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4069 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4070 vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4071 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4073 /* VFTA - enable all vlan filters */
4074 for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4075 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4077 /* VFRE: pool enabling for receive - 64 */
4078 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4079 if (num_pools == ETH_64_POOLS)
4080 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4083 * MPSAR - allow pools to read specific mac addresses
4084 * In this case, all pools should be able to read from mac addr 0
4086 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4087 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4089 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4090 for (i = 0; i < cfg->nb_pool_maps; i++) {
4091 /* set vlan id in VF register and set the valid bit */
4092 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4093 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4095 * Put the allowed pools in VFB reg. As we only have 16 or 64
4096 * pools, we only need to use the first half of the register
4099 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4100 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4101 (cfg->pool_map[i].pools & UINT32_MAX));
4103 IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4104 ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4108 /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4109 if (cfg->enable_loop_back) {
4110 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4111 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4112 IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4115 IXGBE_WRITE_FLUSH(hw);
4119 * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4120 * @hw: pointer to hardware structure
4123 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4128 PMD_INIT_FUNC_TRACE();
4129 /*PF VF Transmit Enable*/
4130 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4131 IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4133 /* Disable the Tx desc arbiter so that MTQC can be changed */
4134 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4135 reg |= IXGBE_RTTDCS_ARBDIS;
4136 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4138 reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4139 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4141 /* Disable drop for all queues */
4142 for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4143 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4144 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4146 /* Enable the Tx desc arbiter */
4147 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4148 reg &= ~IXGBE_RTTDCS_ARBDIS;
4149 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4151 IXGBE_WRITE_FLUSH(hw);
4154 static int __attribute__((cold))
4155 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4157 struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4161 /* Initialize software ring entries */
4162 for (i = 0; i < rxq->nb_rx_desc; i++) {
4163 volatile union ixgbe_adv_rx_desc *rxd;
4164 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4167 PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4168 (unsigned) rxq->queue_id);
4172 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4173 mbuf->port = rxq->port_id;
4176 rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
4177 rxd = &rxq->rx_ring[i];
4178 rxd->read.hdr_addr = 0;
4179 rxd->read.pkt_addr = dma_addr;
4187 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4189 struct ixgbe_hw *hw;
4192 ixgbe_rss_configure(dev);
4194 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4196 /* MRQC: enable VF RSS */
4197 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4198 mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4199 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4201 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4205 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4209 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4213 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4219 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4221 struct ixgbe_hw *hw =
4222 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4224 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4226 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4231 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4232 IXGBE_MRQC_VMDQRT4TCEN);
4236 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4237 IXGBE_MRQC_VMDQRT8TCEN);
4241 "invalid pool number in IOV mode");
4248 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4250 struct ixgbe_hw *hw =
4251 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4253 if (hw->mac.type == ixgbe_mac_82598EB)
4256 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4258 * SRIOV inactive scheme
4259 * any DCB/RSS w/o VMDq multi-queue setting
4261 switch (dev->data->dev_conf.rxmode.mq_mode) {
4263 case ETH_MQ_RX_DCB_RSS:
4264 case ETH_MQ_RX_VMDQ_RSS:
4265 ixgbe_rss_configure(dev);
4268 case ETH_MQ_RX_VMDQ_DCB:
4269 ixgbe_vmdq_dcb_configure(dev);
4272 case ETH_MQ_RX_VMDQ_ONLY:
4273 ixgbe_vmdq_rx_hw_configure(dev);
4276 case ETH_MQ_RX_NONE:
4278 /* if mq_mode is none, disable rss mode.*/
4279 ixgbe_rss_disable(dev);
4283 /* SRIOV active scheme
4284 * Support RSS together with SRIOV.
4286 switch (dev->data->dev_conf.rxmode.mq_mode) {
4288 case ETH_MQ_RX_VMDQ_RSS:
4289 ixgbe_config_vf_rss(dev);
4291 case ETH_MQ_RX_VMDQ_DCB:
4293 /* In SRIOV, the configuration is the same as VMDq case */
4294 ixgbe_vmdq_dcb_configure(dev);
4296 /* DCB/RSS together with SRIOV is not supported */
4297 case ETH_MQ_RX_VMDQ_DCB_RSS:
4298 case ETH_MQ_RX_DCB_RSS:
4300 "Could not support DCB/RSS with VMDq & SRIOV");
4303 ixgbe_config_vf_default(dev);
4312 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4314 struct ixgbe_hw *hw =
4315 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4319 if (hw->mac.type == ixgbe_mac_82598EB)
4322 /* disable arbiter before setting MTQC */
4323 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4324 rttdcs |= IXGBE_RTTDCS_ARBDIS;
4325 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4327 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4329 * SRIOV inactive scheme
4330 * any DCB w/o VMDq multi-queue setting
4332 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4333 ixgbe_vmdq_tx_hw_configure(hw);
4335 mtqc = IXGBE_MTQC_64Q_1PB;
4336 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4339 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4342 * SRIOV active scheme
4343 * FIXME if support DCB together with VMDq & SRIOV
4346 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4349 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4352 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4356 mtqc = IXGBE_MTQC_64Q_1PB;
4357 PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4359 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4362 /* re-enable arbiter */
4363 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4364 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4370 * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4372 * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4373 * spec rev. 3.0 chapter 8.2.3.8.13.
4375 * @pool Memory pool of the Rx queue
4377 static inline uint32_t
4378 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4380 struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4382 /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4385 (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4388 return IXGBE_RSCCTL_MAXDESC_16;
4389 else if (maxdesc >= 8)
4390 return IXGBE_RSCCTL_MAXDESC_8;
4391 else if (maxdesc >= 4)
4392 return IXGBE_RSCCTL_MAXDESC_4;
4394 return IXGBE_RSCCTL_MAXDESC_1;
4398 * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4401 * (Taken from FreeBSD tree)
4402 * (yes this is all very magic and confusing :)
4405 * @entry the register array entry
4406 * @vector the MSIX vector for this queue
4410 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4412 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4415 vector |= IXGBE_IVAR_ALLOC_VAL;
4417 switch (hw->mac.type) {
4419 case ixgbe_mac_82598EB:
4421 entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4423 entry += (type * 64);
4424 index = (entry >> 2) & 0x1F;
4425 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4426 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4427 ivar |= (vector << (8 * (entry & 0x3)));
4428 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4431 case ixgbe_mac_82599EB:
4432 case ixgbe_mac_X540:
4433 if (type == -1) { /* MISC IVAR */
4434 index = (entry & 1) * 8;
4435 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4436 ivar &= ~(0xFF << index);
4437 ivar |= (vector << index);
4438 IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4439 } else { /* RX/TX IVARS */
4440 index = (16 * (entry & 1)) + (8 * type);
4441 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4442 ivar &= ~(0xFF << index);
4443 ivar |= (vector << index);
4444 IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4454 void __attribute__((cold))
4455 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4457 uint16_t i, rx_using_sse;
4458 struct ixgbe_adapter *adapter =
4459 (struct ixgbe_adapter *)dev->data->dev_private;
4462 * In order to allow Vector Rx there are a few configuration
4463 * conditions to be met and Rx Bulk Allocation should be allowed.
4465 if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4466 !adapter->rx_bulk_alloc_allowed) {
4467 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4468 "preconditions or RTE_IXGBE_INC_VECTOR is "
4470 dev->data->port_id);
4472 adapter->rx_vec_allowed = false;
4476 * Initialize the appropriate LRO callback.
4478 * If all queues satisfy the bulk allocation preconditions
4479 * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4480 * Otherwise use a single allocation version.
4482 if (dev->data->lro) {
4483 if (adapter->rx_bulk_alloc_allowed) {
4484 PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4485 "allocation version");
4486 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4488 PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4489 "allocation version");
4490 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4492 } else if (dev->data->scattered_rx) {
4494 * Set the non-LRO scattered callback: there are Vector and
4495 * single allocation versions.
4497 if (adapter->rx_vec_allowed) {
4498 PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4499 "callback (port=%d).",
4500 dev->data->port_id);
4502 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4503 } else if (adapter->rx_bulk_alloc_allowed) {
4504 PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4505 "allocation callback (port=%d).",
4506 dev->data->port_id);
4507 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4509 PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4510 "single allocation) "
4511 "Scattered Rx callback "
4513 dev->data->port_id);
4515 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4518 * Below we set "simple" callbacks according to port/queues parameters.
4519 * If parameters allow we are going to choose between the following
4523 * - Single buffer allocation (the simplest one)
4525 } else if (adapter->rx_vec_allowed) {
4526 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4527 "burst size no less than %d (port=%d).",
4528 RTE_IXGBE_DESCS_PER_LOOP,
4529 dev->data->port_id);
4531 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4532 } else if (adapter->rx_bulk_alloc_allowed) {
4533 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4534 "satisfied. Rx Burst Bulk Alloc function "
4535 "will be used on port=%d.",
4536 dev->data->port_id);
4538 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4540 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4541 "satisfied, or Scattered Rx is requested "
4543 dev->data->port_id);
4545 dev->rx_pkt_burst = ixgbe_recv_pkts;
4548 /* Propagate information about RX function choice through all queues. */
4551 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4552 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4554 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4555 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4557 rxq->rx_using_sse = rx_using_sse;
4558 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4559 DEV_RX_OFFLOAD_SECURITY);
4564 * ixgbe_set_rsc - configure RSC related port HW registers
4566 * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4567 * of 82599 Spec (x540 configuration is virtually the same).
4571 * Returns 0 in case of success or a non-zero error code
4574 ixgbe_set_rsc(struct rte_eth_dev *dev)
4576 struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4577 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4578 struct rte_eth_dev_info dev_info = { 0 };
4579 bool rsc_capable = false;
4585 dev->dev_ops->dev_infos_get(dev, &dev_info);
4586 if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4589 if (!rsc_capable && rx_conf->enable_lro) {
4590 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4595 /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4597 if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4599 * According to chapter of 4.6.7.2.1 of the Spec Rev.
4600 * 3.0 RSC configuration requires HW CRC stripping being
4601 * enabled. If user requested both HW CRC stripping off
4602 * and RSC on - return an error.
4604 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4609 /* RFCTL configuration */
4610 rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4611 if ((rsc_capable) && (rx_conf->enable_lro))
4613 * Since NFS packets coalescing is not supported - clear
4614 * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4617 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4618 IXGBE_RFCTL_NFSR_DIS);
4620 rfctl |= IXGBE_RFCTL_RSC_DIS;
4621 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4623 /* If LRO hasn't been requested - we are done here. */
4624 if (!rx_conf->enable_lro)
4627 /* Set RDRXCTL.RSCACKC bit */
4628 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4629 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4630 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4632 /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4633 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4634 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4636 IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4638 IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4640 IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4642 IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4645 * ixgbe PMD doesn't support header-split at the moment.
4647 * Following the 4.6.7.2.1 chapter of the 82599/x540
4648 * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4649 * should be configured even if header split is not
4650 * enabled. We will configure it 128 bytes following the
4651 * recommendation in the spec.
4653 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4654 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4655 IXGBE_SRRCTL_BSIZEHDR_MASK;
4658 * TODO: Consider setting the Receive Descriptor Minimum
4659 * Threshold Size for an RSC case. This is not an obviously
4660 * beneficiary option but the one worth considering...
4663 rscctl |= IXGBE_RSCCTL_RSCEN;
4664 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4665 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4668 * RSC: Set ITR interval corresponding to 2K ints/s.
4670 * Full-sized RSC aggregations for a 10Gb/s link will
4671 * arrive at about 20K aggregation/s rate.
4673 * 2K inst/s rate will make only 10% of the
4674 * aggregations to be closed due to the interrupt timer
4675 * expiration for a streaming at wire-speed case.
4677 * For a sparse streaming case this setting will yield
4678 * at most 500us latency for a single RSC aggregation.
4680 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4681 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4683 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4684 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4685 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4686 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4689 * RSC requires the mapping of the queue to the
4692 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4697 PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4703 * Initializes Receive Unit.
4705 int __attribute__((cold))
4706 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4708 struct ixgbe_hw *hw;
4709 struct ixgbe_rx_queue *rxq;
4720 struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4723 PMD_INIT_FUNC_TRACE();
4724 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4727 * Make sure receives are disabled while setting
4728 * up the RX context (registers, descriptor rings, etc.).
4730 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4731 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4733 /* Enable receipt of broadcasted frames */
4734 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4735 fctrl |= IXGBE_FCTRL_BAM;
4736 fctrl |= IXGBE_FCTRL_DPF;
4737 fctrl |= IXGBE_FCTRL_PMCF;
4738 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4741 * Configure CRC stripping, if any.
4743 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4744 if (rx_conf->hw_strip_crc)
4745 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4747 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4750 * Configure jumbo frame support, if any.
4752 if (rx_conf->jumbo_frame == 1) {
4753 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4754 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4755 maxfrs &= 0x0000FFFF;
4756 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4757 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4759 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4762 * If loopback mode is configured for 82599, set LPBK bit.
4764 if (hw->mac.type == ixgbe_mac_82599EB &&
4765 dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4766 hlreg0 |= IXGBE_HLREG0_LPBK;
4768 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4770 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4772 /* Setup RX queues */
4773 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4774 rxq = dev->data->rx_queues[i];
4777 * Reset crc_len in case it was changed after queue setup by a
4778 * call to configure.
4780 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4782 /* Setup the Base and Length of the Rx Descriptor Rings */
4783 bus_addr = rxq->rx_ring_phys_addr;
4784 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4785 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4786 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4787 (uint32_t)(bus_addr >> 32));
4788 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4789 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4790 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4791 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4793 /* Configure the SRRCTL register */
4794 #ifdef RTE_HEADER_SPLIT_ENABLE
4796 * Configure Header Split
4798 if (rx_conf->header_split) {
4799 if (hw->mac.type == ixgbe_mac_82599EB) {
4800 /* Must setup the PSRTYPE register */
4803 psrtype = IXGBE_PSRTYPE_TCPHDR |
4804 IXGBE_PSRTYPE_UDPHDR |
4805 IXGBE_PSRTYPE_IPV4HDR |
4806 IXGBE_PSRTYPE_IPV6HDR;
4807 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4809 srrctl = ((rx_conf->split_hdr_size <<
4810 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4811 IXGBE_SRRCTL_BSIZEHDR_MASK);
4812 srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4815 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4817 /* Set if packets are dropped when no descriptors available */
4819 srrctl |= IXGBE_SRRCTL_DROP_EN;
4822 * Configure the RX buffer size in the BSIZEPACKET field of
4823 * the SRRCTL register of the queue.
4824 * The value is in 1 KB resolution. Valid values can be from
4827 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4828 RTE_PKTMBUF_HEADROOM);
4829 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4830 IXGBE_SRRCTL_BSIZEPKT_MASK);
4832 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4834 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4835 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4837 /* It adds dual VLAN length for supporting dual VLAN */
4838 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4839 2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4840 dev->data->scattered_rx = 1;
4843 if (rx_conf->enable_scatter)
4844 dev->data->scattered_rx = 1;
4847 * Device configured with multiple RX queues.
4849 ixgbe_dev_mq_rx_configure(dev);
4852 * Setup the Checksum Register.
4853 * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4854 * Enable IP/L4 checkum computation by hardware if requested to do so.
4856 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4857 rxcsum |= IXGBE_RXCSUM_PCSD;
4858 if (rx_conf->hw_ip_checksum)
4859 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4861 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4863 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4865 if (hw->mac.type == ixgbe_mac_82599EB ||
4866 hw->mac.type == ixgbe_mac_X540) {
4867 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4868 if (rx_conf->hw_strip_crc)
4869 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4871 rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4872 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4873 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4876 rc = ixgbe_set_rsc(dev);
4880 ixgbe_set_rx_function(dev);
4886 * Initializes Transmit Unit.
4888 void __attribute__((cold))
4889 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4891 struct ixgbe_hw *hw;
4892 struct ixgbe_tx_queue *txq;
4898 PMD_INIT_FUNC_TRACE();
4899 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4901 /* Enable TX CRC (checksum offload requirement) and hw padding
4904 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4905 hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4906 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4908 /* Setup the Base and Length of the Tx Descriptor Rings */
4909 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4910 txq = dev->data->tx_queues[i];
4912 bus_addr = txq->tx_ring_phys_addr;
4913 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4914 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4915 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4916 (uint32_t)(bus_addr >> 32));
4917 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4918 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4919 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4920 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4921 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4924 * Disable Tx Head Writeback RO bit, since this hoses
4925 * bookkeeping if things aren't delivered in order.
4927 switch (hw->mac.type) {
4928 case ixgbe_mac_82598EB:
4929 txctrl = IXGBE_READ_REG(hw,
4930 IXGBE_DCA_TXCTRL(txq->reg_idx));
4931 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4932 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4936 case ixgbe_mac_82599EB:
4937 case ixgbe_mac_X540:
4938 case ixgbe_mac_X550:
4939 case ixgbe_mac_X550EM_x:
4940 case ixgbe_mac_X550EM_a:
4942 txctrl = IXGBE_READ_REG(hw,
4943 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4944 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4945 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4951 /* Device configured with multiple TX queues. */
4952 ixgbe_dev_mq_tx_configure(dev);
4956 * Set up link for 82599 loopback mode Tx->Rx.
4958 static inline void __attribute__((cold))
4959 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4961 PMD_INIT_FUNC_TRACE();
4963 if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4964 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4966 PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4975 IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4976 ixgbe_reset_pipeline_82599(hw);
4978 hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4984 * Start Transmit and Receive Units.
4986 int __attribute__((cold))
4987 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4989 struct ixgbe_hw *hw;
4990 struct ixgbe_tx_queue *txq;
4991 struct ixgbe_rx_queue *rxq;
4998 PMD_INIT_FUNC_TRACE();
4999 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5001 for (i = 0; i < dev->data->nb_tx_queues; i++) {
5002 txq = dev->data->tx_queues[i];
5003 /* Setup Transmit Threshold Registers */
5004 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5005 txdctl |= txq->pthresh & 0x7F;
5006 txdctl |= ((txq->hthresh & 0x7F) << 8);
5007 txdctl |= ((txq->wthresh & 0x7F) << 16);
5008 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5011 if (hw->mac.type != ixgbe_mac_82598EB) {
5012 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5013 dmatxctl |= IXGBE_DMATXCTL_TE;
5014 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5017 for (i = 0; i < dev->data->nb_tx_queues; i++) {
5018 txq = dev->data->tx_queues[i];
5019 if (!txq->tx_deferred_start) {
5020 ret = ixgbe_dev_tx_queue_start(dev, i);
5026 for (i = 0; i < dev->data->nb_rx_queues; i++) {
5027 rxq = dev->data->rx_queues[i];
5028 if (!rxq->rx_deferred_start) {
5029 ret = ixgbe_dev_rx_queue_start(dev, i);
5035 /* Enable Receive engine */
5036 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5037 if (hw->mac.type == ixgbe_mac_82598EB)
5038 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5039 rxctrl |= IXGBE_RXCTRL_RXEN;
5040 hw->mac.ops.enable_rx_dma(hw, rxctrl);
5042 /* If loopback mode is enabled for 82599, set up the link accordingly */
5043 if (hw->mac.type == ixgbe_mac_82599EB &&
5044 dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
5045 ixgbe_setup_loopback_link_82599(hw);
5047 if ((dev->data->dev_conf.rxmode.offloads &
5048 DEV_RX_OFFLOAD_SECURITY) ||
5049 (dev->data->dev_conf.txmode.offloads &
5050 DEV_TX_OFFLOAD_SECURITY)) {
5051 ret = ixgbe_crypto_enable_ipsec(dev);
5054 "ixgbe_crypto_enable_ipsec fails with %d.",
5064 * Start Receive Units for specified queue.
5066 int __attribute__((cold))
5067 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5069 struct ixgbe_hw *hw;
5070 struct ixgbe_rx_queue *rxq;
5074 PMD_INIT_FUNC_TRACE();
5075 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5077 if (rx_queue_id < dev->data->nb_rx_queues) {
5078 rxq = dev->data->rx_queues[rx_queue_id];
5080 /* Allocate buffers for descriptor rings */
5081 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5082 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5086 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5087 rxdctl |= IXGBE_RXDCTL_ENABLE;
5088 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5090 /* Wait until RX Enable ready */
5091 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5094 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5095 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5097 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
5100 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5101 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5102 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5110 * Stop Receive Units for specified queue.
5112 int __attribute__((cold))
5113 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5115 struct ixgbe_hw *hw;
5116 struct ixgbe_adapter *adapter =
5117 (struct ixgbe_adapter *)dev->data->dev_private;
5118 struct ixgbe_rx_queue *rxq;
5122 PMD_INIT_FUNC_TRACE();
5123 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5125 if (rx_queue_id < dev->data->nb_rx_queues) {
5126 rxq = dev->data->rx_queues[rx_queue_id];
5128 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5129 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5130 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5132 /* Wait until RX Enable bit clear */
5133 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5136 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5137 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5139 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
5142 rte_delay_us(RTE_IXGBE_WAIT_100_US);
5144 ixgbe_rx_queue_release_mbufs(rxq);
5145 ixgbe_reset_rx_queue(adapter, rxq);
5146 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5155 * Start Transmit Units for specified queue.
5157 int __attribute__((cold))
5158 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5160 struct ixgbe_hw *hw;
5161 struct ixgbe_tx_queue *txq;
5165 PMD_INIT_FUNC_TRACE();
5166 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5168 if (tx_queue_id < dev->data->nb_tx_queues) {
5169 txq = dev->data->tx_queues[tx_queue_id];
5170 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5171 txdctl |= IXGBE_TXDCTL_ENABLE;
5172 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5174 /* Wait until TX Enable ready */
5175 if (hw->mac.type == ixgbe_mac_82599EB) {
5176 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5179 txdctl = IXGBE_READ_REG(hw,
5180 IXGBE_TXDCTL(txq->reg_idx));
5181 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5183 PMD_INIT_LOG(ERR, "Could not enable "
5184 "Tx Queue %d", tx_queue_id);
5187 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5188 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5189 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5197 * Stop Transmit Units for specified queue.
5199 int __attribute__((cold))
5200 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5202 struct ixgbe_hw *hw;
5203 struct ixgbe_tx_queue *txq;
5205 uint32_t txtdh, txtdt;
5208 PMD_INIT_FUNC_TRACE();
5209 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5211 if (tx_queue_id >= dev->data->nb_tx_queues)
5214 txq = dev->data->tx_queues[tx_queue_id];
5216 /* Wait until TX queue is empty */
5217 if (hw->mac.type == ixgbe_mac_82599EB) {
5218 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5220 rte_delay_us(RTE_IXGBE_WAIT_100_US);
5221 txtdh = IXGBE_READ_REG(hw,
5222 IXGBE_TDH(txq->reg_idx));
5223 txtdt = IXGBE_READ_REG(hw,
5224 IXGBE_TDT(txq->reg_idx));
5225 } while (--poll_ms && (txtdh != txtdt));
5227 PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5228 "when stopping.", tx_queue_id);
5231 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5232 txdctl &= ~IXGBE_TXDCTL_ENABLE;
5233 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5235 /* Wait until TX Enable bit clear */
5236 if (hw->mac.type == ixgbe_mac_82599EB) {
5237 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5240 txdctl = IXGBE_READ_REG(hw,
5241 IXGBE_TXDCTL(txq->reg_idx));
5242 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5244 PMD_INIT_LOG(ERR, "Could not disable "
5245 "Tx Queue %d", tx_queue_id);
5248 if (txq->ops != NULL) {
5249 txq->ops->release_mbufs(txq);
5250 txq->ops->reset(txq);
5252 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5258 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5259 struct rte_eth_rxq_info *qinfo)
5261 struct ixgbe_rx_queue *rxq;
5263 rxq = dev->data->rx_queues[queue_id];
5265 qinfo->mp = rxq->mb_pool;
5266 qinfo->scattered_rx = dev->data->scattered_rx;
5267 qinfo->nb_desc = rxq->nb_rx_desc;
5269 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5270 qinfo->conf.rx_drop_en = rxq->drop_en;
5271 qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5275 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5276 struct rte_eth_txq_info *qinfo)
5278 struct ixgbe_tx_queue *txq;
5280 txq = dev->data->tx_queues[queue_id];
5282 qinfo->nb_desc = txq->nb_tx_desc;
5284 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5285 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5286 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5288 qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5289 qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5290 qinfo->conf.txq_flags = txq->txq_flags;
5291 qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5295 * [VF] Initializes Receive Unit.
5297 int __attribute__((cold))
5298 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5300 struct ixgbe_hw *hw;
5301 struct ixgbe_rx_queue *rxq;
5303 uint32_t srrctl, psrtype = 0;
5308 PMD_INIT_FUNC_TRACE();
5309 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5311 if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5312 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5313 "it should be power of 2");
5317 if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5318 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5319 "it should be equal to or less than %d",
5320 hw->mac.max_rx_queues);
5325 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5326 * disables the VF receipt of packets if the PF MTU is > 1500.
5327 * This is done to deal with 82599 limitations that imposes
5328 * the PF and all VFs to share the same MTU.
5329 * Then, the PF driver enables again the VF receipt of packet when
5330 * the VF driver issues a IXGBE_VF_SET_LPE request.
5331 * In the meantime, the VF device cannot be used, even if the VF driver
5332 * and the Guest VM network stack are ready to accept packets with a
5333 * size up to the PF MTU.
5334 * As a work-around to this PF behaviour, force the call to
5335 * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5336 * VF packets received can work in all cases.
5338 ixgbevf_rlpml_set_vf(hw,
5339 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5341 /* Setup RX queues */
5342 for (i = 0; i < dev->data->nb_rx_queues; i++) {
5343 rxq = dev->data->rx_queues[i];
5345 /* Allocate buffers for descriptor rings */
5346 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5350 /* Setup the Base and Length of the Rx Descriptor Rings */
5351 bus_addr = rxq->rx_ring_phys_addr;
5353 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5354 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5355 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5356 (uint32_t)(bus_addr >> 32));
5357 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5358 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5359 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5360 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5363 /* Configure the SRRCTL register */
5364 #ifdef RTE_HEADER_SPLIT_ENABLE
5366 * Configure Header Split
5368 if (dev->data->dev_conf.rxmode.header_split) {
5369 srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5370 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5371 IXGBE_SRRCTL_BSIZEHDR_MASK);
5372 srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5375 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5377 /* Set if packets are dropped when no descriptors available */
5379 srrctl |= IXGBE_SRRCTL_DROP_EN;
5382 * Configure the RX buffer size in the BSIZEPACKET field of
5383 * the SRRCTL register of the queue.
5384 * The value is in 1 KB resolution. Valid values can be from
5387 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5388 RTE_PKTMBUF_HEADROOM);
5389 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5390 IXGBE_SRRCTL_BSIZEPKT_MASK);
5393 * VF modification to write virtual function SRRCTL register
5395 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5397 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5398 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5400 if (dev->data->dev_conf.rxmode.enable_scatter ||
5401 /* It adds dual VLAN length for supporting dual VLAN */
5402 (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5403 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5404 if (!dev->data->scattered_rx)
5405 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5406 dev->data->scattered_rx = 1;
5410 #ifdef RTE_HEADER_SPLIT_ENABLE
5411 if (dev->data->dev_conf.rxmode.header_split)
5412 /* Must setup the PSRTYPE register */
5413 psrtype = IXGBE_PSRTYPE_TCPHDR |
5414 IXGBE_PSRTYPE_UDPHDR |
5415 IXGBE_PSRTYPE_IPV4HDR |
5416 IXGBE_PSRTYPE_IPV6HDR;
5419 /* Set RQPL for VF RSS according to max Rx queue */
5420 psrtype |= (dev->data->nb_rx_queues >> 1) <<
5421 IXGBE_PSRTYPE_RQPL_SHIFT;
5422 IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5424 ixgbe_set_rx_function(dev);
5430 * [VF] Initializes Transmit Unit.
5432 void __attribute__((cold))
5433 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5435 struct ixgbe_hw *hw;
5436 struct ixgbe_tx_queue *txq;
5441 PMD_INIT_FUNC_TRACE();
5442 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5444 /* Setup the Base and Length of the Tx Descriptor Rings */
5445 for (i = 0; i < dev->data->nb_tx_queues; i++) {
5446 txq = dev->data->tx_queues[i];
5447 bus_addr = txq->tx_ring_phys_addr;
5448 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5449 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5450 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5451 (uint32_t)(bus_addr >> 32));
5452 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5453 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5454 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5455 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5456 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5459 * Disable Tx Head Writeback RO bit, since this hoses
5460 * bookkeeping if things aren't delivered in order.
5462 txctrl = IXGBE_READ_REG(hw,
5463 IXGBE_VFDCA_TXCTRL(i));
5464 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5465 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5471 * [VF] Start Transmit and Receive Units.
5473 void __attribute__((cold))
5474 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5476 struct ixgbe_hw *hw;
5477 struct ixgbe_tx_queue *txq;
5478 struct ixgbe_rx_queue *rxq;
5484 PMD_INIT_FUNC_TRACE();
5485 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5487 for (i = 0; i < dev->data->nb_tx_queues; i++) {
5488 txq = dev->data->tx_queues[i];
5489 /* Setup Transmit Threshold Registers */
5490 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5491 txdctl |= txq->pthresh & 0x7F;
5492 txdctl |= ((txq->hthresh & 0x7F) << 8);
5493 txdctl |= ((txq->wthresh & 0x7F) << 16);
5494 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5497 for (i = 0; i < dev->data->nb_tx_queues; i++) {
5499 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5500 txdctl |= IXGBE_TXDCTL_ENABLE;
5501 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5504 /* Wait until TX Enable ready */
5507 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5508 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5510 PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5512 for (i = 0; i < dev->data->nb_rx_queues; i++) {
5514 rxq = dev->data->rx_queues[i];
5516 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5517 rxdctl |= IXGBE_RXDCTL_ENABLE;
5518 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5520 /* Wait until RX Enable ready */
5524 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5525 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5527 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5529 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5534 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5535 int __attribute__((weak))
5536 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5541 uint16_t __attribute__((weak))
5542 ixgbe_recv_pkts_vec(
5543 void __rte_unused *rx_queue,
5544 struct rte_mbuf __rte_unused **rx_pkts,
5545 uint16_t __rte_unused nb_pkts)
5550 uint16_t __attribute__((weak))
5551 ixgbe_recv_scattered_pkts_vec(
5552 void __rte_unused *rx_queue,
5553 struct rte_mbuf __rte_unused **rx_pkts,
5554 uint16_t __rte_unused nb_pkts)
5559 int __attribute__((weak))
5560 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)