1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation.
3 * Copyright 2014 6WIND S.A.
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
35 #include <rte_ether.h>
36 #include <ethdev_driver.h>
37 #include <rte_security_driver.h>
38 #include <rte_prefetch.h>
42 #include <rte_string_fns.h>
43 #include <rte_errno.h>
48 #include "ixgbe_logs.h"
49 #include "base/ixgbe_api.h"
50 #include "base/ixgbe_vf.h"
51 #include "ixgbe_ethdev.h"
52 #include "base/ixgbe_dcb.h"
53 #include "base/ixgbe_common.h"
54 #include "ixgbe_rxtx.h"
56 #ifdef RTE_LIBRTE_IEEE1588
57 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
59 #define IXGBE_TX_IEEE1588_TMST 0
61 /* Bit Mask to indicate what bits required for building TX context */
62 #define IXGBE_TX_OFFLOAD_MASK ( \
72 PKT_TX_OUTER_IP_CKSUM | \
73 PKT_TX_SEC_OFFLOAD | \
74 IXGBE_TX_IEEE1588_TMST)
76 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
77 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
80 #define RTE_PMD_USE_PREFETCH
83 #ifdef RTE_PMD_USE_PREFETCH
85 * Prefetch a cache line into all cache levels.
87 #define rte_ixgbe_prefetch(p) rte_prefetch0(p)
89 #define rte_ixgbe_prefetch(p) do {} while (0)
92 /*********************************************************************
96 **********************************************************************/
99 * Check for descriptors with their DD bit set and free mbufs.
100 * Return the total number of buffers freed.
102 static __rte_always_inline int
103 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
105 struct ixgbe_tx_entry *txep;
108 struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
110 /* check DD bit on threshold descriptor */
111 status = txq->tx_ring[txq->tx_next_dd].wb.status;
112 if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
116 * first buffer to free from S/W ring is at index
117 * tx_next_dd - (tx_rs_thresh-1)
119 txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
121 for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
122 /* free buffers one at a time */
123 m = rte_pktmbuf_prefree_seg(txep->mbuf);
126 if (unlikely(m == NULL))
129 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
130 (nb_free > 0 && m->pool != free[0]->pool)) {
131 rte_mempool_put_bulk(free[0]->pool,
132 (void **)free, nb_free);
140 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
142 /* buffers were freed, update counters */
143 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
144 txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
145 if (txq->tx_next_dd >= txq->nb_tx_desc)
146 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
148 return txq->tx_rs_thresh;
151 /* Populate 4 descriptors with data from 4 mbufs */
153 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
155 uint64_t buf_dma_addr;
159 for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
160 buf_dma_addr = rte_mbuf_data_iova(*pkts);
161 pkt_len = (*pkts)->data_len;
163 /* write data to descriptor */
164 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
166 txdp->read.cmd_type_len =
167 rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
169 txdp->read.olinfo_status =
170 rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
172 rte_prefetch0(&(*pkts)->pool);
176 /* Populate 1 descriptor with data from 1 mbuf */
178 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
180 uint64_t buf_dma_addr;
183 buf_dma_addr = rte_mbuf_data_iova(*pkts);
184 pkt_len = (*pkts)->data_len;
186 /* write data to descriptor */
187 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
188 txdp->read.cmd_type_len =
189 rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
190 txdp->read.olinfo_status =
191 rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
192 rte_prefetch0(&(*pkts)->pool);
196 * Fill H/W descriptor ring with mbuf data.
197 * Copy mbuf pointers to the S/W ring.
200 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
203 volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
204 struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
205 const int N_PER_LOOP = 4;
206 const int N_PER_LOOP_MASK = N_PER_LOOP-1;
207 int mainpart, leftover;
211 * Process most of the packets in chunks of N pkts. Any
212 * leftover packets will get processed one at a time.
214 mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
215 leftover = (nb_pkts & ((uint32_t) N_PER_LOOP_MASK));
216 for (i = 0; i < mainpart; i += N_PER_LOOP) {
217 /* Copy N mbuf pointers to the S/W ring */
218 for (j = 0; j < N_PER_LOOP; ++j) {
219 (txep + i + j)->mbuf = *(pkts + i + j);
221 tx4(txdp + i, pkts + i);
224 if (unlikely(leftover > 0)) {
225 for (i = 0; i < leftover; ++i) {
226 (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
227 tx1(txdp + mainpart + i, pkts + mainpart + i);
232 static inline uint16_t
233 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
236 struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
237 volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
241 * Begin scanning the H/W ring for done descriptors when the
242 * number of available descriptors drops below tx_free_thresh. For
243 * each done descriptor, free the associated buffer.
245 if (txq->nb_tx_free < txq->tx_free_thresh)
246 ixgbe_tx_free_bufs(txq);
248 /* Only use descriptors that are available */
249 nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
250 if (unlikely(nb_pkts == 0))
253 /* Use exactly nb_pkts descriptors */
254 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
257 * At this point, we know there are enough descriptors in the
258 * ring to transmit all the packets. This assumes that each
259 * mbuf contains a single segment, and that no new offloads
260 * are expected, which would require a new context descriptor.
264 * See if we're going to wrap-around. If so, handle the top
265 * of the descriptor ring first, then do the bottom. If not,
266 * the processing looks just like the "bottom" part anyway...
268 if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
269 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
270 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
273 * We know that the last descriptor in the ring will need to
274 * have its RS bit set because tx_rs_thresh has to be
275 * a divisor of the ring size
277 tx_r[txq->tx_next_rs].read.cmd_type_len |=
278 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
279 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
284 /* Fill H/W descriptor ring with mbuf data */
285 ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
286 txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
289 * Determine if RS bit should be set
290 * This is what we actually want:
291 * if ((txq->tx_tail - 1) >= txq->tx_next_rs)
292 * but instead of subtracting 1 and doing >=, we can just do
293 * greater than without subtracting.
295 if (txq->tx_tail > txq->tx_next_rs) {
296 tx_r[txq->tx_next_rs].read.cmd_type_len |=
297 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
298 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
300 if (txq->tx_next_rs >= txq->nb_tx_desc)
301 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
305 * Check for wrap-around. This would only happen if we used
306 * up to the last descriptor in the ring, no more, no less.
308 if (txq->tx_tail >= txq->nb_tx_desc)
311 /* update tail pointer */
313 IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
319 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
324 /* Try to transmit at least chunks of TX_MAX_BURST pkts */
325 if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
326 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
328 /* transmit more than the max burst, in chunks of TX_MAX_BURST */
333 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
334 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
335 nb_tx = (uint16_t)(nb_tx + ret);
336 nb_pkts = (uint16_t)(nb_pkts - ret);
345 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
349 struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
354 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
355 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
367 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
368 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
369 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
370 __rte_unused uint64_t *mdata)
372 uint32_t type_tucmd_mlhl;
373 uint32_t mss_l4len_idx = 0;
375 uint32_t vlan_macip_lens;
376 union ixgbe_tx_offload tx_offload_mask;
377 uint32_t seqnum_seed = 0;
379 ctx_idx = txq->ctx_curr;
380 tx_offload_mask.data[0] = 0;
381 tx_offload_mask.data[1] = 0;
384 /* Specify which HW CTX to upload. */
385 mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
387 if (ol_flags & PKT_TX_VLAN_PKT) {
388 tx_offload_mask.vlan_tci |= ~0;
391 /* check if TCP segmentation required for this packet */
392 if (ol_flags & PKT_TX_TCP_SEG) {
393 /* implies IP cksum in IPv4 */
394 if (ol_flags & PKT_TX_IP_CKSUM)
395 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
396 IXGBE_ADVTXD_TUCMD_L4T_TCP |
397 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
399 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
400 IXGBE_ADVTXD_TUCMD_L4T_TCP |
401 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
403 tx_offload_mask.l2_len |= ~0;
404 tx_offload_mask.l3_len |= ~0;
405 tx_offload_mask.l4_len |= ~0;
406 tx_offload_mask.tso_segsz |= ~0;
407 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
408 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
409 } else { /* no TSO, check if hardware checksum is needed */
410 if (ol_flags & PKT_TX_IP_CKSUM) {
411 type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
412 tx_offload_mask.l2_len |= ~0;
413 tx_offload_mask.l3_len |= ~0;
416 switch (ol_flags & PKT_TX_L4_MASK) {
417 case PKT_TX_UDP_CKSUM:
418 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
419 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
420 mss_l4len_idx |= sizeof(struct rte_udp_hdr)
421 << IXGBE_ADVTXD_L4LEN_SHIFT;
422 tx_offload_mask.l2_len |= ~0;
423 tx_offload_mask.l3_len |= ~0;
425 case PKT_TX_TCP_CKSUM:
426 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
427 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
428 mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
429 << IXGBE_ADVTXD_L4LEN_SHIFT;
430 tx_offload_mask.l2_len |= ~0;
431 tx_offload_mask.l3_len |= ~0;
433 case PKT_TX_SCTP_CKSUM:
434 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
435 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
436 mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
437 << IXGBE_ADVTXD_L4LEN_SHIFT;
438 tx_offload_mask.l2_len |= ~0;
439 tx_offload_mask.l3_len |= ~0;
442 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
443 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
448 if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
449 tx_offload_mask.outer_l2_len |= ~0;
450 tx_offload_mask.outer_l3_len |= ~0;
451 tx_offload_mask.l2_len |= ~0;
452 seqnum_seed |= tx_offload.outer_l3_len
453 << IXGBE_ADVTXD_OUTER_IPLEN;
454 seqnum_seed |= tx_offload.l2_len
455 << IXGBE_ADVTXD_TUNNEL_LEN;
457 #ifdef RTE_LIB_SECURITY
458 if (ol_flags & PKT_TX_SEC_OFFLOAD) {
459 union ixgbe_crypto_tx_desc_md *md =
460 (union ixgbe_crypto_tx_desc_md *)mdata;
462 (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
463 type_tucmd_mlhl |= md->enc ?
464 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
465 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
467 (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
468 tx_offload_mask.sa_idx |= ~0;
469 tx_offload_mask.sec_pad_len |= ~0;
473 txq->ctx_cache[ctx_idx].flags = ol_flags;
474 txq->ctx_cache[ctx_idx].tx_offload.data[0] =
475 tx_offload_mask.data[0] & tx_offload.data[0];
476 txq->ctx_cache[ctx_idx].tx_offload.data[1] =
477 tx_offload_mask.data[1] & tx_offload.data[1];
478 txq->ctx_cache[ctx_idx].tx_offload_mask = tx_offload_mask;
480 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
481 vlan_macip_lens = tx_offload.l3_len;
482 if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
483 vlan_macip_lens |= (tx_offload.outer_l2_len <<
484 IXGBE_ADVTXD_MACLEN_SHIFT);
486 vlan_macip_lens |= (tx_offload.l2_len <<
487 IXGBE_ADVTXD_MACLEN_SHIFT);
488 vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
489 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
490 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
491 ctx_txd->seqnum_seed = seqnum_seed;
495 * Check which hardware context can be used. Use the existing match
496 * or create a new context descriptor.
498 static inline uint32_t
499 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
500 union ixgbe_tx_offload tx_offload)
502 /* If match with the current used context */
503 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
504 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
505 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
506 & tx_offload.data[0])) &&
507 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
508 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
509 & tx_offload.data[1]))))
510 return txq->ctx_curr;
512 /* What if match with the next context */
514 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
515 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
516 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
517 & tx_offload.data[0])) &&
518 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
519 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
520 & tx_offload.data[1]))))
521 return txq->ctx_curr;
523 /* Mismatch, use the previous context */
524 return IXGBE_CTX_NUM;
527 static inline uint32_t
528 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
532 if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
533 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
534 if (ol_flags & PKT_TX_IP_CKSUM)
535 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
536 if (ol_flags & PKT_TX_TCP_SEG)
537 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
541 static inline uint32_t
542 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
544 uint32_t cmdtype = 0;
546 if (ol_flags & PKT_TX_VLAN_PKT)
547 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
548 if (ol_flags & PKT_TX_TCP_SEG)
549 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
550 if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
551 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
552 if (ol_flags & PKT_TX_MACSEC)
553 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
557 /* Default RS bit threshold values */
558 #ifndef DEFAULT_TX_RS_THRESH
559 #define DEFAULT_TX_RS_THRESH 32
561 #ifndef DEFAULT_TX_FREE_THRESH
562 #define DEFAULT_TX_FREE_THRESH 32
565 /* Reset transmit descriptors after they have been used */
567 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
569 struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
570 volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
571 uint16_t last_desc_cleaned = txq->last_desc_cleaned;
572 uint16_t nb_tx_desc = txq->nb_tx_desc;
573 uint16_t desc_to_clean_to;
574 uint16_t nb_tx_to_clean;
577 /* Determine the last descriptor needing to be cleaned */
578 desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
579 if (desc_to_clean_to >= nb_tx_desc)
580 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
582 /* Check to make sure the last descriptor to clean is done */
583 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
584 status = txr[desc_to_clean_to].wb.status;
585 if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
587 "TX descriptor %4u is not done"
588 "(port=%d queue=%d)",
590 txq->port_id, txq->queue_id);
591 /* Failed to clean any descriptors, better luck next time */
595 /* Figure out how many descriptors will be cleaned */
596 if (last_desc_cleaned > desc_to_clean_to)
597 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
600 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
604 "Cleaning %4u TX descriptors: %4u to %4u "
605 "(port=%d queue=%d)",
606 nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
607 txq->port_id, txq->queue_id);
610 * The last descriptor to clean is done, so that means all the
611 * descriptors from the last descriptor that was cleaned
612 * up to the last descriptor with the RS bit set
613 * are done. Only reset the threshold descriptor.
615 txr[desc_to_clean_to].wb.status = 0;
617 /* Update the txq to reflect the last descriptor that was cleaned */
618 txq->last_desc_cleaned = desc_to_clean_to;
619 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
626 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
629 struct ixgbe_tx_queue *txq;
630 struct ixgbe_tx_entry *sw_ring;
631 struct ixgbe_tx_entry *txe, *txn;
632 volatile union ixgbe_adv_tx_desc *txr;
633 volatile union ixgbe_adv_tx_desc *txd, *txp;
634 struct rte_mbuf *tx_pkt;
635 struct rte_mbuf *m_seg;
636 uint64_t buf_dma_addr;
637 uint32_t olinfo_status;
638 uint32_t cmd_type_len;
649 union ixgbe_tx_offload tx_offload;
650 #ifdef RTE_LIB_SECURITY
654 tx_offload.data[0] = 0;
655 tx_offload.data[1] = 0;
657 sw_ring = txq->sw_ring;
659 tx_id = txq->tx_tail;
660 txe = &sw_ring[tx_id];
663 /* Determine if the descriptor ring needs to be cleaned. */
664 if (txq->nb_tx_free < txq->tx_free_thresh)
665 ixgbe_xmit_cleanup(txq);
667 rte_prefetch0(&txe->mbuf->pool);
670 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
673 pkt_len = tx_pkt->pkt_len;
676 * Determine how many (if any) context descriptors
677 * are needed for offload functionality.
679 ol_flags = tx_pkt->ol_flags;
680 #ifdef RTE_LIB_SECURITY
681 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
684 /* If hardware offload required */
685 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
687 tx_offload.l2_len = tx_pkt->l2_len;
688 tx_offload.l3_len = tx_pkt->l3_len;
689 tx_offload.l4_len = tx_pkt->l4_len;
690 tx_offload.vlan_tci = tx_pkt->vlan_tci;
691 tx_offload.tso_segsz = tx_pkt->tso_segsz;
692 tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
693 tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
694 #ifdef RTE_LIB_SECURITY
696 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
697 (union ixgbe_crypto_tx_desc_md *)
698 rte_security_dynfield(tx_pkt);
699 tx_offload.sa_idx = ipsec_mdata->sa_idx;
700 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
704 /* If new context need be built or reuse the exist ctx. */
705 ctx = what_advctx_update(txq, tx_ol_req,
707 /* Only allocate context descriptor if required*/
708 new_ctx = (ctx == IXGBE_CTX_NUM);
713 * Keep track of how many descriptors are used this loop
714 * This will always be the number of segments + the number of
715 * Context descriptors required to transmit the packet
717 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
720 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
721 /* set RS on the previous packet in the burst */
722 txp->read.cmd_type_len |=
723 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
726 * The number of descriptors that must be allocated for a
727 * packet is the number of segments of that packet, plus 1
728 * Context Descriptor for the hardware offload, if any.
729 * Determine the last TX descriptor to allocate in the TX ring
730 * for the packet, starting from the current position (tx_id)
733 tx_last = (uint16_t) (tx_id + nb_used - 1);
736 if (tx_last >= txq->nb_tx_desc)
737 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
739 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
740 " tx_first=%u tx_last=%u",
741 (unsigned) txq->port_id,
742 (unsigned) txq->queue_id,
748 * Make sure there are enough TX descriptors available to
749 * transmit the entire packet.
750 * nb_used better be less than or equal to txq->tx_rs_thresh
752 if (nb_used > txq->nb_tx_free) {
754 "Not enough free TX descriptors "
755 "nb_used=%4u nb_free=%4u "
756 "(port=%d queue=%d)",
757 nb_used, txq->nb_tx_free,
758 txq->port_id, txq->queue_id);
760 if (ixgbe_xmit_cleanup(txq) != 0) {
761 /* Could not clean any descriptors */
767 /* nb_used better be <= txq->tx_rs_thresh */
768 if (unlikely(nb_used > txq->tx_rs_thresh)) {
770 "The number of descriptors needed to "
771 "transmit the packet exceeds the "
772 "RS bit threshold. This will impact "
774 "nb_used=%4u nb_free=%4u "
776 "(port=%d queue=%d)",
777 nb_used, txq->nb_tx_free,
779 txq->port_id, txq->queue_id);
781 * Loop here until there are enough TX
782 * descriptors or until the ring cannot be
785 while (nb_used > txq->nb_tx_free) {
786 if (ixgbe_xmit_cleanup(txq) != 0) {
788 * Could not clean any
800 * By now there are enough free TX descriptors to transmit
805 * Set common flags of all TX Data Descriptors.
807 * The following bits must be set in all Data Descriptors:
808 * - IXGBE_ADVTXD_DTYP_DATA
809 * - IXGBE_ADVTXD_DCMD_DEXT
811 * The following bits must be set in the first Data Descriptor
812 * and are ignored in the other ones:
813 * - IXGBE_ADVTXD_DCMD_IFCS
814 * - IXGBE_ADVTXD_MAC_1588
815 * - IXGBE_ADVTXD_DCMD_VLE
817 * The following bits must only be set in the last Data
819 * - IXGBE_TXD_CMD_EOP
821 * The following bits can be set in any Data Descriptor, but
822 * are only set in the last Data Descriptor:
825 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
826 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
828 #ifdef RTE_LIBRTE_IEEE1588
829 if (ol_flags & PKT_TX_IEEE1588_TMST)
830 cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
836 if (ol_flags & PKT_TX_TCP_SEG) {
837 /* when TSO is on, paylen in descriptor is the
838 * not the packet len but the tcp payload len */
839 pkt_len -= (tx_offload.l2_len +
840 tx_offload.l3_len + tx_offload.l4_len);
844 * Setup the TX Advanced Context Descriptor if required
847 volatile struct ixgbe_adv_tx_context_desc *
850 ctx_txd = (volatile struct
851 ixgbe_adv_tx_context_desc *)
854 txn = &sw_ring[txe->next_id];
855 rte_prefetch0(&txn->mbuf->pool);
857 if (txe->mbuf != NULL) {
858 rte_pktmbuf_free_seg(txe->mbuf);
862 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
864 rte_security_dynfield(tx_pkt));
866 txe->last_id = tx_last;
867 tx_id = txe->next_id;
872 * Setup the TX Advanced Data Descriptor,
873 * This path will go through
874 * whatever new/reuse the context descriptor
876 cmd_type_len |= tx_desc_ol_flags_to_cmdtype(ol_flags);
877 olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
878 olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
881 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
882 #ifdef RTE_LIB_SECURITY
884 olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
890 txn = &sw_ring[txe->next_id];
891 rte_prefetch0(&txn->mbuf->pool);
893 if (txe->mbuf != NULL)
894 rte_pktmbuf_free_seg(txe->mbuf);
898 * Set up Transmit Data Descriptor.
900 slen = m_seg->data_len;
901 buf_dma_addr = rte_mbuf_data_iova(m_seg);
902 txd->read.buffer_addr =
903 rte_cpu_to_le_64(buf_dma_addr);
904 txd->read.cmd_type_len =
905 rte_cpu_to_le_32(cmd_type_len | slen);
906 txd->read.olinfo_status =
907 rte_cpu_to_le_32(olinfo_status);
908 txe->last_id = tx_last;
909 tx_id = txe->next_id;
912 } while (m_seg != NULL);
915 * The last packet data descriptor needs End Of Packet (EOP)
917 cmd_type_len |= IXGBE_TXD_CMD_EOP;
918 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
919 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
921 /* Set RS bit only on threshold packets' last descriptor */
922 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
924 "Setting RS bit on TXD id="
925 "%4u (port=%d queue=%d)",
926 tx_last, txq->port_id, txq->queue_id);
928 cmd_type_len |= IXGBE_TXD_CMD_RS;
930 /* Update txq RS bit counters */
936 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
940 /* set RS on last packet in the burst */
942 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
947 * Set the Transmit Descriptor Tail (TDT)
949 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
950 (unsigned) txq->port_id, (unsigned) txq->queue_id,
951 (unsigned) tx_id, (unsigned) nb_tx);
952 IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
953 txq->tx_tail = tx_id;
958 /*********************************************************************
962 **********************************************************************/
964 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
969 struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
971 for (i = 0; i < nb_pkts; i++) {
973 ol_flags = m->ol_flags;
976 * Check if packet meets requirements for number of segments
978 * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
982 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
987 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
992 /* check the size of packet */
993 if (m->pkt_len < IXGBE_TX_MIN_PKT_LEN) {
998 #ifdef RTE_ETHDEV_DEBUG_TX
999 ret = rte_validate_tx_offload(m);
1005 ret = rte_net_intel_cksum_prepare(m);
1015 /*********************************************************************
1019 **********************************************************************/
1021 #define IXGBE_PACKET_TYPE_ETHER 0X00
1022 #define IXGBE_PACKET_TYPE_IPV4 0X01
1023 #define IXGBE_PACKET_TYPE_IPV4_TCP 0X11
1024 #define IXGBE_PACKET_TYPE_IPV4_UDP 0X21
1025 #define IXGBE_PACKET_TYPE_IPV4_SCTP 0X41
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT 0X03
1027 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP 0X13
1028 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP 0X23
1029 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP 0X43
1030 #define IXGBE_PACKET_TYPE_IPV6 0X04
1031 #define IXGBE_PACKET_TYPE_IPV6_TCP 0X14
1032 #define IXGBE_PACKET_TYPE_IPV6_UDP 0X24
1033 #define IXGBE_PACKET_TYPE_IPV6_SCTP 0X44
1034 #define IXGBE_PACKET_TYPE_IPV6_EXT 0X0C
1035 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP 0X1C
1036 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP 0X2C
1037 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP 0X4C
1038 #define IXGBE_PACKET_TYPE_IPV4_IPV6 0X05
1039 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP 0X15
1040 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP 0X25
1041 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP 0X45
1042 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6 0X07
1043 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP 0X17
1044 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP 0X27
1045 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP 0X47
1046 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT 0X0D
1047 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
1048 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
1049 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP 0X4D
1050 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT 0X0F
1051 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP 0X1F
1052 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP 0X2F
1053 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP 0X4F
1055 #define IXGBE_PACKET_TYPE_NVGRE 0X00
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4 0X01
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP 0X11
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP 0X21
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP 0X41
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT 0X03
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP 0X13
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP 0X23
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP 0X43
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV6 0X04
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP 0X14
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP 0X24
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP 0X44
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT 0X0C
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP 0X1C
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP 0X2C
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP 0X4C
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6 0X05
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP 0X15
1074 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP 0X25
1075 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT 0X0D
1076 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1077 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1079 #define IXGBE_PACKET_TYPE_VXLAN 0X80
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4 0X81
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP 0x91
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP 0xA1
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP 0xC1
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT 0x83
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP 0X93
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP 0XA3
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP 0XC3
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV6 0X84
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP 0X94
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP 0XA4
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP 0XC4
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT 0X8C
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP 0X9C
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP 0XAC
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP 0XCC
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6 0X85
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP 0X95
1098 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP 0XA5
1099 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT 0X8D
1100 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1101 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1104 * Use 2 different table for normal packet and tunnel packet
1105 * to save the space.
1108 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1109 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1110 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1112 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1113 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1114 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1115 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1116 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1117 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1118 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1119 RTE_PTYPE_L3_IPV4_EXT,
1120 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1121 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1122 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1123 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1124 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1125 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1126 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1128 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1129 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1130 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1131 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1132 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1133 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1134 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1135 RTE_PTYPE_L3_IPV6_EXT,
1136 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1137 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1138 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1139 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1140 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1141 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1142 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1143 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1144 RTE_PTYPE_INNER_L3_IPV6,
1145 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1146 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1147 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1148 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1149 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1150 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1151 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1152 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1153 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1154 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1155 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1156 RTE_PTYPE_INNER_L3_IPV6,
1157 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1158 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1159 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1160 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1161 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1162 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1163 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1164 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1165 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1166 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1167 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1168 RTE_PTYPE_INNER_L3_IPV6_EXT,
1169 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1170 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1171 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1172 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1173 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1174 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1175 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1176 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1177 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1178 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1179 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1180 RTE_PTYPE_INNER_L3_IPV6_EXT,
1181 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1182 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1183 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1184 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1185 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1186 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1187 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1188 RTE_PTYPE_L2_ETHER |
1189 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1190 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1194 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1195 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1196 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1197 RTE_PTYPE_INNER_L2_ETHER,
1198 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1199 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1201 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1202 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1203 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1204 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1205 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1207 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1208 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1209 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1210 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1211 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1212 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1213 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1214 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1216 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1217 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1218 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1219 RTE_PTYPE_INNER_L4_TCP,
1220 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1221 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1222 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1223 RTE_PTYPE_INNER_L4_TCP,
1224 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1225 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1226 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1227 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1228 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1229 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1230 RTE_PTYPE_INNER_L4_TCP,
1231 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1232 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1233 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1234 RTE_PTYPE_INNER_L3_IPV4,
1235 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1236 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1237 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1238 RTE_PTYPE_INNER_L4_UDP,
1239 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1240 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1241 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1242 RTE_PTYPE_INNER_L4_UDP,
1243 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1244 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1245 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1246 RTE_PTYPE_INNER_L4_SCTP,
1247 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1248 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1249 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1250 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1251 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1252 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1253 RTE_PTYPE_INNER_L4_UDP,
1254 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1255 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1256 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1257 RTE_PTYPE_INNER_L4_SCTP,
1258 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1259 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1260 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1261 RTE_PTYPE_INNER_L3_IPV4,
1262 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1263 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1264 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1265 RTE_PTYPE_INNER_L4_SCTP,
1266 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1267 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1268 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1269 RTE_PTYPE_INNER_L4_SCTP,
1270 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1271 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1272 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1273 RTE_PTYPE_INNER_L4_TCP,
1274 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1275 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1276 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1277 RTE_PTYPE_INNER_L4_UDP,
1279 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1280 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1281 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1282 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1283 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1284 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1285 RTE_PTYPE_INNER_L3_IPV4,
1286 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1287 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1288 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1289 RTE_PTYPE_INNER_L3_IPV4_EXT,
1290 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1291 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1292 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1293 RTE_PTYPE_INNER_L3_IPV6,
1294 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1295 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1296 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1297 RTE_PTYPE_INNER_L3_IPV4,
1298 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1299 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1300 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1301 RTE_PTYPE_INNER_L3_IPV6_EXT,
1302 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1303 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1304 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1305 RTE_PTYPE_INNER_L3_IPV4,
1306 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1307 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1308 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1309 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1310 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1311 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1312 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1313 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1314 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1315 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1316 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1317 RTE_PTYPE_INNER_L3_IPV4,
1318 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1319 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1320 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1321 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1322 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1323 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1324 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1325 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1326 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1327 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1328 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1329 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1330 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1331 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1332 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1333 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1334 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1335 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1336 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1337 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1338 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1339 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1340 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1341 RTE_PTYPE_INNER_L3_IPV4,
1342 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1343 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1344 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1345 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1346 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1347 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1348 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1349 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1350 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1351 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1352 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1353 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1354 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1355 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1356 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1357 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1358 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1359 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1360 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1361 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1362 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1363 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1364 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1365 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1366 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1367 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1368 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1369 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1373 ixgbe_monitor_callback(const uint64_t value,
1374 const uint64_t arg[RTE_POWER_MONITOR_OPAQUE_SZ] __rte_unused)
1376 const uint64_t m = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD);
1378 * we expect the DD bit to be set to 1 if this descriptor was already
1381 return (value & m) == m ? -1 : 0;
1385 ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1387 volatile union ixgbe_adv_rx_desc *rxdp;
1388 struct ixgbe_rx_queue *rxq = rx_queue;
1391 desc = rxq->rx_tail;
1392 rxdp = &rxq->rx_ring[desc];
1393 /* watch for changes in status bit */
1394 pmc->addr = &rxdp->wb.upper.status_error;
1396 /* comparison callback */
1397 pmc->fn = ixgbe_monitor_callback;
1399 /* the registers are 32-bit */
1400 pmc->size = sizeof(uint32_t);
1405 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1406 static inline uint32_t
1407 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1410 if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1411 return RTE_PTYPE_UNKNOWN;
1413 pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1415 /* For tunnel packet */
1416 if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1417 /* Remove the tunnel bit to save the space. */
1418 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1419 return ptype_table_tn[pkt_info];
1423 * For x550, if it's not tunnel,
1424 * tunnel type bit should be set to 0.
1425 * Reuse 82599's mask.
1427 pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1429 return ptype_table[pkt_info];
1432 static inline uint64_t
1433 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1435 static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1436 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1437 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1438 PKT_RX_RSS_HASH, 0, 0, 0,
1439 0, 0, 0, PKT_RX_FDIR,
1441 #ifdef RTE_LIBRTE_IEEE1588
1442 static uint64_t ip_pkt_etqf_map[8] = {
1443 0, 0, 0, PKT_RX_IEEE1588_PTP,
1447 if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1448 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1449 ip_rss_types_map[pkt_info & 0XF];
1451 return ip_rss_types_map[pkt_info & 0XF];
1453 return ip_rss_types_map[pkt_info & 0XF];
1457 static inline uint64_t
1458 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1463 * Check if VLAN present only.
1464 * Do not check whether L3/L4 rx checksum done by NIC or not,
1465 * That can be found from rte_eth_rxmode.offloads flag
1467 pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ? vlan_flags : 0;
1469 #ifdef RTE_LIBRTE_IEEE1588
1470 if (rx_status & IXGBE_RXD_STAT_TMST)
1471 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1476 static inline uint64_t
1477 rx_desc_error_to_pkt_flags(uint32_t rx_status, uint16_t pkt_info,
1478 uint8_t rx_udp_csum_zero_err)
1483 * Bit 31: IPE, IPv4 checksum error
1484 * Bit 30: L4I, L4I integrity error
1486 static uint64_t error_to_pkt_flags_map[4] = {
1487 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1488 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1489 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1490 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1492 pkt_flags = error_to_pkt_flags_map[(rx_status >>
1493 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1495 /* Mask out the bad UDP checksum error if the hardware has UDP zero
1496 * checksum error issue, so that the software application will then
1497 * have to recompute the checksum itself if needed.
1499 if ((rx_status & IXGBE_RXDADV_ERR_TCPE) &&
1500 (pkt_info & IXGBE_RXDADV_PKTTYPE_UDP) &&
1501 rx_udp_csum_zero_err)
1502 pkt_flags &= ~PKT_RX_L4_CKSUM_BAD;
1504 if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1505 (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1506 pkt_flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
1509 #ifdef RTE_LIB_SECURITY
1510 if (rx_status & IXGBE_RXD_STAT_SECP) {
1511 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1512 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1513 pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1521 * LOOK_AHEAD defines how many desc statuses to check beyond the
1522 * current descriptor.
1523 * It must be a pound define for optimal performance.
1524 * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1525 * function only works with LOOK_AHEAD=8.
1527 #define LOOK_AHEAD 8
1528 #if (LOOK_AHEAD != 8)
1529 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1532 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1534 volatile union ixgbe_adv_rx_desc *rxdp;
1535 struct ixgbe_rx_entry *rxep;
1536 struct rte_mbuf *mb;
1540 uint32_t s[LOOK_AHEAD];
1541 uint32_t pkt_info[LOOK_AHEAD];
1542 int i, j, nb_rx = 0;
1544 uint64_t vlan_flags = rxq->vlan_flags;
1546 /* get references to current descriptor and S/W ring entry */
1547 rxdp = &rxq->rx_ring[rxq->rx_tail];
1548 rxep = &rxq->sw_ring[rxq->rx_tail];
1550 status = rxdp->wb.upper.status_error;
1551 /* check to make sure there is at least 1 packet to receive */
1552 if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1556 * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1557 * reference packets that are ready to be received.
1559 for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1560 i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1561 /* Read desc statuses backwards to avoid race condition */
1562 for (j = 0; j < LOOK_AHEAD; j++)
1563 s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1567 /* Compute how many status bits were set */
1568 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1569 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1572 for (j = 0; j < nb_dd; j++)
1573 pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1578 /* Translate descriptor info to mbuf format */
1579 for (j = 0; j < nb_dd; ++j) {
1581 pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1583 mb->data_len = pkt_len;
1584 mb->pkt_len = pkt_len;
1585 mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1587 /* convert descriptor fields to rte mbuf flags */
1588 pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1590 pkt_flags |= rx_desc_error_to_pkt_flags(s[j],
1591 (uint16_t)pkt_info[j],
1592 rxq->rx_udp_csum_zero_err);
1593 pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1594 ((uint16_t)pkt_info[j]);
1595 mb->ol_flags = pkt_flags;
1597 ixgbe_rxd_pkt_info_to_pkt_type
1598 (pkt_info[j], rxq->pkt_type_mask);
1600 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1601 mb->hash.rss = rte_le_to_cpu_32(
1602 rxdp[j].wb.lower.hi_dword.rss);
1603 else if (pkt_flags & PKT_RX_FDIR) {
1604 mb->hash.fdir.hash = rte_le_to_cpu_16(
1605 rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1606 IXGBE_ATR_HASH_MASK;
1607 mb->hash.fdir.id = rte_le_to_cpu_16(
1608 rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1612 /* Move mbuf pointers from the S/W ring to the stage */
1613 for (j = 0; j < LOOK_AHEAD; ++j) {
1614 rxq->rx_stage[i + j] = rxep[j].mbuf;
1617 /* stop if all requested packets could not be received */
1618 if (nb_dd != LOOK_AHEAD)
1622 /* clear software ring entries so we can cleanup correctly */
1623 for (i = 0; i < nb_rx; ++i) {
1624 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1632 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1634 volatile union ixgbe_adv_rx_desc *rxdp;
1635 struct ixgbe_rx_entry *rxep;
1636 struct rte_mbuf *mb;
1641 /* allocate buffers in bulk directly into the S/W ring */
1642 alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1643 rxep = &rxq->sw_ring[alloc_idx];
1644 diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1645 rxq->rx_free_thresh);
1646 if (unlikely(diag != 0))
1649 rxdp = &rxq->rx_ring[alloc_idx];
1650 for (i = 0; i < rxq->rx_free_thresh; ++i) {
1651 /* populate the static rte mbuf fields */
1654 mb->port = rxq->port_id;
1657 rte_mbuf_refcnt_set(mb, 1);
1658 mb->data_off = RTE_PKTMBUF_HEADROOM;
1660 /* populate the descriptors */
1661 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1662 rxdp[i].read.hdr_addr = 0;
1663 rxdp[i].read.pkt_addr = dma_addr;
1666 /* update state of internal queue structure */
1667 rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1668 if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1669 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1675 static inline uint16_t
1676 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1679 struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1682 /* how many packets are ready to return? */
1683 nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1685 /* copy mbuf pointers to the application's packet list */
1686 for (i = 0; i < nb_pkts; ++i)
1687 rx_pkts[i] = stage[i];
1689 /* update internal queue state */
1690 rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1691 rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1696 static inline uint16_t
1697 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1700 struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1703 /* Any previously recv'd pkts will be returned from the Rx stage */
1704 if (rxq->rx_nb_avail)
1705 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1707 /* Scan the H/W ring for packets to receive */
1708 nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1710 /* update internal queue state */
1711 rxq->rx_next_avail = 0;
1712 rxq->rx_nb_avail = nb_rx;
1713 rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1715 /* if required, allocate new buffers to replenish descriptors */
1716 if (rxq->rx_tail > rxq->rx_free_trigger) {
1717 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1719 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1722 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1723 "queue_id=%u", (unsigned) rxq->port_id,
1724 (unsigned) rxq->queue_id);
1726 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1727 rxq->rx_free_thresh;
1730 * Need to rewind any previous receives if we cannot
1731 * allocate new buffers to replenish the old ones.
1733 rxq->rx_nb_avail = 0;
1734 rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1735 for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1736 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1741 /* update tail pointer */
1743 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
1747 if (rxq->rx_tail >= rxq->nb_rx_desc)
1750 /* received any packets this loop? */
1751 if (rxq->rx_nb_avail)
1752 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1757 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1759 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1764 if (unlikely(nb_pkts == 0))
1767 if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1768 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1770 /* request is relatively large, chunk it up */
1775 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1776 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1777 nb_rx = (uint16_t)(nb_rx + ret);
1778 nb_pkts = (uint16_t)(nb_pkts - ret);
1787 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1790 struct ixgbe_rx_queue *rxq;
1791 volatile union ixgbe_adv_rx_desc *rx_ring;
1792 volatile union ixgbe_adv_rx_desc *rxdp;
1793 struct ixgbe_rx_entry *sw_ring;
1794 struct ixgbe_rx_entry *rxe;
1795 struct rte_mbuf *rxm;
1796 struct rte_mbuf *nmb;
1797 union ixgbe_adv_rx_desc rxd;
1806 uint64_t vlan_flags;
1811 rx_id = rxq->rx_tail;
1812 rx_ring = rxq->rx_ring;
1813 sw_ring = rxq->sw_ring;
1814 vlan_flags = rxq->vlan_flags;
1815 while (nb_rx < nb_pkts) {
1817 * The order of operations here is important as the DD status
1818 * bit must not be read after any other descriptor fields.
1819 * rx_ring and rxdp are pointing to volatile data so the order
1820 * of accesses cannot be reordered by the compiler. If they were
1821 * not volatile, they could be reordered which could lead to
1822 * using invalid descriptor fields when read from rxd.
1824 rxdp = &rx_ring[rx_id];
1825 staterr = rxdp->wb.upper.status_error;
1826 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1833 * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1834 * is likely to be invalid and to be dropped by the various
1835 * validation checks performed by the network stack.
1837 * Allocate a new mbuf to replenish the RX ring descriptor.
1838 * If the allocation fails:
1839 * - arrange for that RX descriptor to be the first one
1840 * being parsed the next time the receive function is
1841 * invoked [on the same queue].
1843 * - Stop parsing the RX ring and return immediately.
1845 * This policy do not drop the packet received in the RX
1846 * descriptor for which the allocation of a new mbuf failed.
1847 * Thus, it allows that packet to be later retrieved if
1848 * mbuf have been freed in the mean time.
1849 * As a side effect, holding RX descriptors instead of
1850 * systematically giving them back to the NIC may lead to
1851 * RX ring exhaustion situations.
1852 * However, the NIC can gracefully prevent such situations
1853 * to happen by sending specific "back-pressure" flow control
1854 * frames to its peer(s).
1856 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1857 "ext_err_stat=0x%08x pkt_len=%u",
1858 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1859 (unsigned) rx_id, (unsigned) staterr,
1860 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1862 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1864 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1865 "queue_id=%u", (unsigned) rxq->port_id,
1866 (unsigned) rxq->queue_id);
1867 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1872 rxe = &sw_ring[rx_id];
1874 if (rx_id == rxq->nb_rx_desc)
1877 /* Prefetch next mbuf while processing current one. */
1878 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1881 * When next RX descriptor is on a cache-line boundary,
1882 * prefetch the next 4 RX descriptors and the next 8 pointers
1885 if ((rx_id & 0x3) == 0) {
1886 rte_ixgbe_prefetch(&rx_ring[rx_id]);
1887 rte_ixgbe_prefetch(&sw_ring[rx_id]);
1893 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1894 rxdp->read.hdr_addr = 0;
1895 rxdp->read.pkt_addr = dma_addr;
1898 * Initialize the returned mbuf.
1899 * 1) setup generic mbuf fields:
1900 * - number of segments,
1903 * - RX port identifier.
1904 * 2) integrate hardware offload data, if any:
1905 * - RSS flag & hash,
1906 * - IP checksum flag,
1907 * - VLAN TCI, if any,
1910 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1912 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1913 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1916 rxm->pkt_len = pkt_len;
1917 rxm->data_len = pkt_len;
1918 rxm->port = rxq->port_id;
1920 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1921 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1922 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1924 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1925 pkt_flags = pkt_flags |
1926 rx_desc_error_to_pkt_flags(staterr, (uint16_t)pkt_info,
1927 rxq->rx_udp_csum_zero_err);
1928 pkt_flags = pkt_flags |
1929 ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1930 rxm->ol_flags = pkt_flags;
1932 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1933 rxq->pkt_type_mask);
1935 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1936 rxm->hash.rss = rte_le_to_cpu_32(
1937 rxd.wb.lower.hi_dword.rss);
1938 else if (pkt_flags & PKT_RX_FDIR) {
1939 rxm->hash.fdir.hash = rte_le_to_cpu_16(
1940 rxd.wb.lower.hi_dword.csum_ip.csum) &
1941 IXGBE_ATR_HASH_MASK;
1942 rxm->hash.fdir.id = rte_le_to_cpu_16(
1943 rxd.wb.lower.hi_dword.csum_ip.ip_id);
1946 * Store the mbuf address into the next entry of the array
1947 * of returned packets.
1949 rx_pkts[nb_rx++] = rxm;
1951 rxq->rx_tail = rx_id;
1954 * If the number of free RX descriptors is greater than the RX free
1955 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1957 * Update the RDT with the value of the last processed RX descriptor
1958 * minus 1, to guarantee that the RDT register is never equal to the
1959 * RDH register, which creates a "full" ring situtation from the
1960 * hardware point of view...
1962 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1963 if (nb_hold > rxq->rx_free_thresh) {
1964 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1965 "nb_hold=%u nb_rx=%u",
1966 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1967 (unsigned) rx_id, (unsigned) nb_hold,
1969 rx_id = (uint16_t) ((rx_id == 0) ?
1970 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1971 IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
1974 rxq->nb_rx_hold = nb_hold;
1979 * Detect an RSC descriptor.
1981 static inline uint32_t
1982 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1984 return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1985 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1989 * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1991 * Fill the following info in the HEAD buffer of the Rx cluster:
1992 * - RX port identifier
1993 * - hardware offload data, if any:
1995 * - IP checksum flag
1996 * - VLAN TCI, if any
1998 * @head HEAD of the packet cluster
1999 * @desc HW descriptor to get data from
2000 * @rxq Pointer to the Rx queue
2003 ixgbe_fill_cluster_head_buf(
2004 struct rte_mbuf *head,
2005 union ixgbe_adv_rx_desc *desc,
2006 struct ixgbe_rx_queue *rxq,
2012 head->port = rxq->port_id;
2014 /* The vlan_tci field is only valid when PKT_RX_VLAN is
2015 * set in the pkt_flags field.
2017 head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
2018 pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
2019 pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
2020 pkt_flags |= rx_desc_error_to_pkt_flags(staterr, (uint16_t)pkt_info,
2021 rxq->rx_udp_csum_zero_err);
2022 pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
2023 head->ol_flags = pkt_flags;
2025 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
2027 if (likely(pkt_flags & PKT_RX_RSS_HASH))
2028 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
2029 else if (pkt_flags & PKT_RX_FDIR) {
2030 head->hash.fdir.hash =
2031 rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
2032 & IXGBE_ATR_HASH_MASK;
2033 head->hash.fdir.id =
2034 rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
2039 * ixgbe_recv_pkts_lro - receive handler for and LRO case.
2041 * @rx_queue Rx queue handle
2042 * @rx_pkts table of received packets
2043 * @nb_pkts size of rx_pkts table
2044 * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
2046 * Handles the Rx HW ring completions when RSC feature is configured. Uses an
2047 * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
2049 * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
2050 * 1) When non-EOP RSC completion arrives:
2051 * a) Update the HEAD of the current RSC aggregation cluster with the new
2052 * segment's data length.
2053 * b) Set the "next" pointer of the current segment to point to the segment
2054 * at the NEXTP index.
2055 * c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2056 * in the sw_rsc_ring.
2057 * 2) When EOP arrives we just update the cluster's total length and offload
2058 * flags and deliver the cluster up to the upper layers. In our case - put it
2059 * in the rx_pkts table.
2061 * Returns the number of received packets/clusters (according to the "bulk
2062 * receive" interface).
2064 static inline uint16_t
2065 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2068 struct ixgbe_rx_queue *rxq = rx_queue;
2069 volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2070 struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2071 struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2072 uint16_t rx_id = rxq->rx_tail;
2074 uint16_t nb_hold = rxq->nb_rx_hold;
2075 uint16_t prev_id = rxq->rx_tail;
2077 while (nb_rx < nb_pkts) {
2079 struct ixgbe_rx_entry *rxe;
2080 struct ixgbe_scattered_rx_entry *sc_entry;
2081 struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2082 struct ixgbe_rx_entry *next_rxe = NULL;
2083 struct rte_mbuf *first_seg;
2084 struct rte_mbuf *rxm;
2085 struct rte_mbuf *nmb = NULL;
2086 union ixgbe_adv_rx_desc rxd;
2089 volatile union ixgbe_adv_rx_desc *rxdp;
2094 * The code in this whole file uses the volatile pointer to
2095 * ensure the read ordering of the status and the rest of the
2096 * descriptor fields (on the compiler level only!!!). This is so
2097 * UGLY - why not to just use the compiler barrier instead? DPDK
2098 * even has the rte_compiler_barrier() for that.
2100 * But most importantly this is just wrong because this doesn't
2101 * ensure memory ordering in a general case at all. For
2102 * instance, DPDK is supposed to work on Power CPUs where
2103 * compiler barrier may just not be enough!
2105 * I tried to write only this function properly to have a
2106 * starting point (as a part of an LRO/RSC series) but the
2107 * compiler cursed at me when I tried to cast away the
2108 * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2109 * keeping it the way it is for now.
2111 * The code in this file is broken in so many other places and
2112 * will just not work on a big endian CPU anyway therefore the
2113 * lines below will have to be revisited together with the rest
2117 * - Get rid of "volatile" and let the compiler do its job.
2118 * - Use the proper memory barrier (rte_rmb()) to ensure the
2119 * memory ordering below.
2121 rxdp = &rx_ring[rx_id];
2122 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2124 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2129 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2130 "staterr=0x%x data_len=%u",
2131 rxq->port_id, rxq->queue_id, rx_id, staterr,
2132 rte_le_to_cpu_16(rxd.wb.upper.length));
2135 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2137 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2138 "port_id=%u queue_id=%u",
2139 rxq->port_id, rxq->queue_id);
2141 rte_eth_devices[rxq->port_id].data->
2142 rx_mbuf_alloc_failed++;
2145 } else if (nb_hold > rxq->rx_free_thresh) {
2146 uint16_t next_rdt = rxq->rx_free_trigger;
2148 if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2150 IXGBE_PCI_REG_WC_WRITE_RELAXED(
2153 nb_hold -= rxq->rx_free_thresh;
2155 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2156 "port_id=%u queue_id=%u",
2157 rxq->port_id, rxq->queue_id);
2159 rte_eth_devices[rxq->port_id].data->
2160 rx_mbuf_alloc_failed++;
2166 rxe = &sw_ring[rx_id];
2167 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2169 next_id = rx_id + 1;
2170 if (next_id == rxq->nb_rx_desc)
2173 /* Prefetch next mbuf while processing current one. */
2174 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2177 * When next RX descriptor is on a cache-line boundary,
2178 * prefetch the next 4 RX descriptors and the next 4 pointers
2181 if ((next_id & 0x3) == 0) {
2182 rte_ixgbe_prefetch(&rx_ring[next_id]);
2183 rte_ixgbe_prefetch(&sw_ring[next_id]);
2190 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2192 * Update RX descriptor with the physical address of the
2193 * new data buffer of the new allocated mbuf.
2197 rxm->data_off = RTE_PKTMBUF_HEADROOM;
2198 rxdp->read.hdr_addr = 0;
2199 rxdp->read.pkt_addr = dma;
2204 * Set data length & data buffer address of mbuf.
2206 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2207 rxm->data_len = data_len;
2212 * Get next descriptor index:
2213 * - For RSC it's in the NEXTP field.
2214 * - For a scattered packet - it's just a following
2217 if (ixgbe_rsc_count(&rxd))
2219 (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2220 IXGBE_RXDADV_NEXTP_SHIFT;
2224 next_sc_entry = &sw_sc_ring[nextp_id];
2225 next_rxe = &sw_ring[nextp_id];
2226 rte_ixgbe_prefetch(next_rxe);
2229 sc_entry = &sw_sc_ring[rx_id];
2230 first_seg = sc_entry->fbuf;
2231 sc_entry->fbuf = NULL;
2234 * If this is the first buffer of the received packet,
2235 * set the pointer to the first mbuf of the packet and
2236 * initialize its context.
2237 * Otherwise, update the total length and the number of segments
2238 * of the current scattered packet, and update the pointer to
2239 * the last mbuf of the current packet.
2241 if (first_seg == NULL) {
2243 first_seg->pkt_len = data_len;
2244 first_seg->nb_segs = 1;
2246 first_seg->pkt_len += data_len;
2247 first_seg->nb_segs++;
2254 * If this is not the last buffer of the received packet, update
2255 * the pointer to the first mbuf at the NEXTP entry in the
2256 * sw_sc_ring and continue to parse the RX ring.
2258 if (!eop && next_rxe) {
2259 rxm->next = next_rxe->mbuf;
2260 next_sc_entry->fbuf = first_seg;
2264 /* Initialize the first mbuf of the returned packet */
2265 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2268 * Deal with the case, when HW CRC srip is disabled.
2269 * That can't happen when LRO is enabled, but still could
2270 * happen for scattered RX mode.
2272 first_seg->pkt_len -= rxq->crc_len;
2273 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2274 struct rte_mbuf *lp;
2276 for (lp = first_seg; lp->next != rxm; lp = lp->next)
2279 first_seg->nb_segs--;
2280 lp->data_len -= rxq->crc_len - rxm->data_len;
2282 rte_pktmbuf_free_seg(rxm);
2284 rxm->data_len -= rxq->crc_len;
2286 /* Prefetch data of first segment, if configured to do so. */
2287 rte_packet_prefetch((char *)first_seg->buf_addr +
2288 first_seg->data_off);
2291 * Store the mbuf address into the next entry of the array
2292 * of returned packets.
2294 rx_pkts[nb_rx++] = first_seg;
2298 * Record index of the next RX descriptor to probe.
2300 rxq->rx_tail = rx_id;
2303 * If the number of free RX descriptors is greater than the RX free
2304 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2306 * Update the RDT with the value of the last processed RX descriptor
2307 * minus 1, to guarantee that the RDT register is never equal to the
2308 * RDH register, which creates a "full" ring situtation from the
2309 * hardware point of view...
2311 if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2312 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2313 "nb_hold=%u nb_rx=%u",
2314 rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2317 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2321 rxq->nb_rx_hold = nb_hold;
2326 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2329 return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2333 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2336 return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2339 /*********************************************************************
2341 * Queue management functions
2343 **********************************************************************/
2345 static void __rte_cold
2346 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2350 if (txq->sw_ring != NULL) {
2351 for (i = 0; i < txq->nb_tx_desc; i++) {
2352 if (txq->sw_ring[i].mbuf != NULL) {
2353 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2354 txq->sw_ring[i].mbuf = NULL;
2361 ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2363 struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2364 uint16_t i, tx_last, tx_id;
2365 uint16_t nb_tx_free_last;
2366 uint16_t nb_tx_to_clean;
2369 /* Start free mbuf from the next of tx_tail */
2370 tx_last = txq->tx_tail;
2371 tx_id = swr_ring[tx_last].next_id;
2373 if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2376 nb_tx_to_clean = txq->nb_tx_free;
2377 nb_tx_free_last = txq->nb_tx_free;
2379 free_cnt = txq->nb_tx_desc;
2381 /* Loop through swr_ring to count the amount of
2382 * freeable mubfs and packets.
2384 for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2385 for (i = 0; i < nb_tx_to_clean &&
2386 pkt_cnt < free_cnt &&
2387 tx_id != tx_last; i++) {
2388 if (swr_ring[tx_id].mbuf != NULL) {
2389 rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2390 swr_ring[tx_id].mbuf = NULL;
2393 * last segment in the packet,
2394 * increment packet count
2396 pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2399 tx_id = swr_ring[tx_id].next_id;
2402 if (txq->tx_rs_thresh > txq->nb_tx_desc -
2403 txq->nb_tx_free || tx_id == tx_last)
2406 if (pkt_cnt < free_cnt) {
2407 if (ixgbe_xmit_cleanup(txq))
2410 nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2411 nb_tx_free_last = txq->nb_tx_free;
2415 return (int)pkt_cnt;
2419 ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2424 if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2425 free_cnt = txq->nb_tx_desc;
2427 cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2429 for (i = 0; i < cnt; i += n) {
2430 if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2433 n = ixgbe_tx_free_bufs(txq);
2443 ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2444 uint32_t free_cnt __rte_unused)
2450 ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2452 struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2453 if (txq->offloads == 0 &&
2454 #ifdef RTE_LIB_SECURITY
2455 !(txq->using_ipsec) &&
2457 txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2458 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2459 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2460 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2461 txq->sw_ring_v != NULL)) {
2462 return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2464 return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2468 return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2471 static void __rte_cold
2472 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2475 txq->sw_ring != NULL)
2476 rte_free(txq->sw_ring);
2479 static void __rte_cold
2480 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2482 if (txq != NULL && txq->ops != NULL) {
2483 txq->ops->release_mbufs(txq);
2484 txq->ops->free_swring(txq);
2485 rte_memzone_free(txq->mz);
2491 ixgbe_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
2493 ixgbe_tx_queue_release(dev->data->tx_queues[qid]);
2496 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2497 static void __rte_cold
2498 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2500 static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2501 struct ixgbe_tx_entry *txe = txq->sw_ring;
2504 /* Zero out HW ring memory */
2505 for (i = 0; i < txq->nb_tx_desc; i++) {
2506 txq->tx_ring[i] = zeroed_desc;
2509 /* Initialize SW ring entries */
2510 prev = (uint16_t) (txq->nb_tx_desc - 1);
2511 for (i = 0; i < txq->nb_tx_desc; i++) {
2512 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2514 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2517 txe[prev].next_id = i;
2521 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2522 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2525 txq->nb_tx_used = 0;
2527 * Always allow 1 descriptor to be un-allocated to avoid
2528 * a H/W race condition
2530 txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2531 txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2533 memset((void *)&txq->ctx_cache, 0,
2534 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2537 static const struct ixgbe_txq_ops def_txq_ops = {
2538 .release_mbufs = ixgbe_tx_queue_release_mbufs,
2539 .free_swring = ixgbe_tx_free_swring,
2540 .reset = ixgbe_reset_tx_queue,
2543 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2544 * the queue parameters. Used in tx_queue_setup by primary process and then
2545 * in dev_init by secondary process when attaching to an existing ethdev.
2548 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2550 /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2551 if ((txq->offloads == 0) &&
2552 #ifdef RTE_LIB_SECURITY
2553 !(txq->using_ipsec) &&
2555 (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2556 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2557 dev->tx_pkt_prepare = NULL;
2558 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2559 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2560 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2561 ixgbe_txq_vec_setup(txq) == 0)) {
2562 PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2563 dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2565 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2567 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2569 " - offloads = 0x%" PRIx64,
2572 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2573 (unsigned long)txq->tx_rs_thresh,
2574 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2575 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2576 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2581 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2589 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2591 uint64_t tx_offload_capa;
2592 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2595 RTE_ETH_TX_OFFLOAD_VLAN_INSERT |
2596 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
2597 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
2598 RTE_ETH_TX_OFFLOAD_TCP_CKSUM |
2599 RTE_ETH_TX_OFFLOAD_SCTP_CKSUM |
2600 RTE_ETH_TX_OFFLOAD_TCP_TSO |
2601 RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
2603 if (hw->mac.type == ixgbe_mac_82599EB ||
2604 hw->mac.type == ixgbe_mac_X540)
2605 tx_offload_capa |= RTE_ETH_TX_OFFLOAD_MACSEC_INSERT;
2607 if (hw->mac.type == ixgbe_mac_X550 ||
2608 hw->mac.type == ixgbe_mac_X550EM_x ||
2609 hw->mac.type == ixgbe_mac_X550EM_a)
2610 tx_offload_capa |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2612 #ifdef RTE_LIB_SECURITY
2613 if (dev->security_ctx)
2614 tx_offload_capa |= RTE_ETH_TX_OFFLOAD_SECURITY;
2616 return tx_offload_capa;
2620 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2623 unsigned int socket_id,
2624 const struct rte_eth_txconf *tx_conf)
2626 const struct rte_memzone *tz;
2627 struct ixgbe_tx_queue *txq;
2628 struct ixgbe_hw *hw;
2629 uint16_t tx_rs_thresh, tx_free_thresh;
2632 PMD_INIT_FUNC_TRACE();
2633 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2635 offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2638 * Validate number of transmit descriptors.
2639 * It must not exceed hardware maximum, and must be multiple
2642 if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2643 (nb_desc > IXGBE_MAX_RING_DESC) ||
2644 (nb_desc < IXGBE_MIN_RING_DESC)) {
2649 * The following two parameters control the setting of the RS bit on
2650 * transmit descriptors.
2651 * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2652 * descriptors have been used.
2653 * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2654 * descriptors are used or if the number of descriptors required
2655 * to transmit a packet is greater than the number of free TX
2657 * The following constraints must be satisfied:
2658 * tx_rs_thresh must be greater than 0.
2659 * tx_rs_thresh must be less than the size of the ring minus 2.
2660 * tx_rs_thresh must be less than or equal to tx_free_thresh.
2661 * tx_rs_thresh must be a divisor of the ring size.
2662 * tx_free_thresh must be greater than 0.
2663 * tx_free_thresh must be less than the size of the ring minus 3.
2664 * tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2665 * One descriptor in the TX ring is used as a sentinel to avoid a
2666 * H/W race condition, hence the maximum threshold constraints.
2667 * When set to zero use default values.
2669 tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2670 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2671 /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2672 tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2673 nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2674 if (tx_conf->tx_rs_thresh > 0)
2675 tx_rs_thresh = tx_conf->tx_rs_thresh;
2676 if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2677 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2678 "exceed nb_desc. (tx_rs_thresh=%u "
2679 "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2680 (unsigned int)tx_rs_thresh,
2681 (unsigned int)tx_free_thresh,
2682 (unsigned int)nb_desc,
2683 (int)dev->data->port_id,
2687 if (tx_rs_thresh >= (nb_desc - 2)) {
2688 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2689 "of TX descriptors minus 2. (tx_rs_thresh=%u "
2690 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2691 (int)dev->data->port_id, (int)queue_idx);
2694 if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2695 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2696 "(tx_rs_thresh=%u port=%d queue=%d)",
2697 DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2698 (int)dev->data->port_id, (int)queue_idx);
2701 if (tx_free_thresh >= (nb_desc - 3)) {
2702 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2703 "tx_free_thresh must be less than the number of "
2704 "TX descriptors minus 3. (tx_free_thresh=%u "
2705 "port=%d queue=%d)",
2706 (unsigned int)tx_free_thresh,
2707 (int)dev->data->port_id, (int)queue_idx);
2710 if (tx_rs_thresh > tx_free_thresh) {
2711 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2712 "tx_free_thresh. (tx_free_thresh=%u "
2713 "tx_rs_thresh=%u port=%d queue=%d)",
2714 (unsigned int)tx_free_thresh,
2715 (unsigned int)tx_rs_thresh,
2716 (int)dev->data->port_id,
2720 if ((nb_desc % tx_rs_thresh) != 0) {
2721 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2722 "number of TX descriptors. (tx_rs_thresh=%u "
2723 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2724 (int)dev->data->port_id, (int)queue_idx);
2729 * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2730 * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2731 * by the NIC and all descriptors are written back after the NIC
2732 * accumulates WTHRESH descriptors.
2734 if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2735 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2736 "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2737 "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2738 (int)dev->data->port_id, (int)queue_idx);
2742 /* Free memory prior to re-allocation if needed... */
2743 if (dev->data->tx_queues[queue_idx] != NULL) {
2744 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2745 dev->data->tx_queues[queue_idx] = NULL;
2748 /* First allocate the tx queue data structure */
2749 txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2750 RTE_CACHE_LINE_SIZE, socket_id);
2755 * Allocate TX ring hardware descriptors. A memzone large enough to
2756 * handle the maximum ring size is allocated in order to allow for
2757 * resizing in later calls to the queue setup function.
2759 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2760 sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2761 IXGBE_ALIGN, socket_id);
2763 ixgbe_tx_queue_release(txq);
2768 txq->nb_tx_desc = nb_desc;
2769 txq->tx_rs_thresh = tx_rs_thresh;
2770 txq->tx_free_thresh = tx_free_thresh;
2771 txq->pthresh = tx_conf->tx_thresh.pthresh;
2772 txq->hthresh = tx_conf->tx_thresh.hthresh;
2773 txq->wthresh = tx_conf->tx_thresh.wthresh;
2774 txq->queue_id = queue_idx;
2775 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2776 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2777 txq->port_id = dev->data->port_id;
2778 txq->offloads = offloads;
2779 txq->ops = &def_txq_ops;
2780 txq->tx_deferred_start = tx_conf->tx_deferred_start;
2781 #ifdef RTE_LIB_SECURITY
2782 txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2783 RTE_ETH_TX_OFFLOAD_SECURITY);
2787 * Modification to set VFTDT for virtual function if vf is detected
2789 if (hw->mac.type == ixgbe_mac_82599_vf ||
2790 hw->mac.type == ixgbe_mac_X540_vf ||
2791 hw->mac.type == ixgbe_mac_X550_vf ||
2792 hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2793 hw->mac.type == ixgbe_mac_X550EM_a_vf)
2794 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2796 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2798 txq->tx_ring_phys_addr = tz->iova;
2799 txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2801 /* Allocate software ring */
2802 txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2803 sizeof(struct ixgbe_tx_entry) * nb_desc,
2804 RTE_CACHE_LINE_SIZE, socket_id);
2805 if (txq->sw_ring == NULL) {
2806 ixgbe_tx_queue_release(txq);
2809 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2810 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2812 /* set up vector or scalar TX function as appropriate */
2813 ixgbe_set_tx_function(dev, txq);
2815 txq->ops->reset(txq);
2817 dev->data->tx_queues[queue_idx] = txq;
2824 * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2826 * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2827 * in the sw_rsc_ring is not set to NULL but rather points to the next
2828 * mbuf of this RSC aggregation (that has not been completed yet and still
2829 * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2830 * will just free first "nb_segs" segments of the cluster explicitly by calling
2831 * an rte_pktmbuf_free_seg().
2833 * @m scattered cluster head
2835 static void __rte_cold
2836 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2838 uint16_t i, nb_segs = m->nb_segs;
2839 struct rte_mbuf *next_seg;
2841 for (i = 0; i < nb_segs; i++) {
2843 rte_pktmbuf_free_seg(m);
2848 static void __rte_cold
2849 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2853 /* SSE Vector driver has a different way of releasing mbufs. */
2854 if (rxq->rx_using_sse) {
2855 ixgbe_rx_queue_release_mbufs_vec(rxq);
2859 if (rxq->sw_ring != NULL) {
2860 for (i = 0; i < rxq->nb_rx_desc; i++) {
2861 if (rxq->sw_ring[i].mbuf != NULL) {
2862 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2863 rxq->sw_ring[i].mbuf = NULL;
2866 if (rxq->rx_nb_avail) {
2867 for (i = 0; i < rxq->rx_nb_avail; ++i) {
2868 struct rte_mbuf *mb;
2870 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2871 rte_pktmbuf_free_seg(mb);
2873 rxq->rx_nb_avail = 0;
2877 if (rxq->sw_sc_ring)
2878 for (i = 0; i < rxq->nb_rx_desc; i++)
2879 if (rxq->sw_sc_ring[i].fbuf) {
2880 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2881 rxq->sw_sc_ring[i].fbuf = NULL;
2885 static void __rte_cold
2886 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2889 ixgbe_rx_queue_release_mbufs(rxq);
2890 rte_free(rxq->sw_ring);
2891 rte_free(rxq->sw_sc_ring);
2892 rte_memzone_free(rxq->mz);
2898 ixgbe_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
2900 ixgbe_rx_queue_release(dev->data->rx_queues[qid]);
2904 * Check if Rx Burst Bulk Alloc function can be used.
2906 * 0: the preconditions are satisfied and the bulk allocation function
2908 * -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2909 * function must be used.
2911 static inline int __rte_cold
2912 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2917 * Make sure the following pre-conditions are satisfied:
2918 * rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2919 * rxq->rx_free_thresh < rxq->nb_rx_desc
2920 * (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2921 * Scattered packets are not supported. This should be checked
2922 * outside of this function.
2924 if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2925 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2926 "rxq->rx_free_thresh=%d, "
2927 "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2928 rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2930 } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2931 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2932 "rxq->rx_free_thresh=%d, "
2933 "rxq->nb_rx_desc=%d",
2934 rxq->rx_free_thresh, rxq->nb_rx_desc);
2936 } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2937 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2938 "rxq->nb_rx_desc=%d, "
2939 "rxq->rx_free_thresh=%d",
2940 rxq->nb_rx_desc, rxq->rx_free_thresh);
2947 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2948 static void __rte_cold
2949 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2951 static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2953 uint16_t len = rxq->nb_rx_desc;
2956 * By default, the Rx queue setup function allocates enough memory for
2957 * IXGBE_MAX_RING_DESC. The Rx Burst bulk allocation function requires
2958 * extra memory at the end of the descriptor ring to be zero'd out.
2960 if (adapter->rx_bulk_alloc_allowed)
2961 /* zero out extra memory */
2962 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2965 * Zero out HW ring memory. Zero out extra memory at the end of
2966 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2967 * reads extra memory as zeros.
2969 for (i = 0; i < len; i++) {
2970 rxq->rx_ring[i] = zeroed_desc;
2974 * initialize extra software ring entries. Space for these extra
2975 * entries is always allocated
2977 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2978 for (i = rxq->nb_rx_desc; i < len; ++i) {
2979 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2982 rxq->rx_nb_avail = 0;
2983 rxq->rx_next_avail = 0;
2984 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2986 rxq->nb_rx_hold = 0;
2988 if (rxq->pkt_first_seg != NULL)
2989 rte_pktmbuf_free(rxq->pkt_first_seg);
2991 rxq->pkt_first_seg = NULL;
2992 rxq->pkt_last_seg = NULL;
2994 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2995 rxq->rxrearm_start = 0;
2996 rxq->rxrearm_nb = 0;
3001 ixgbe_is_vf(struct rte_eth_dev *dev)
3003 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3005 switch (hw->mac.type) {
3006 case ixgbe_mac_82599_vf:
3007 case ixgbe_mac_X540_vf:
3008 case ixgbe_mac_X550_vf:
3009 case ixgbe_mac_X550EM_x_vf:
3010 case ixgbe_mac_X550EM_a_vf:
3018 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
3020 uint64_t offloads = 0;
3021 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3023 if (hw->mac.type != ixgbe_mac_82598EB)
3024 offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
3030 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
3033 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3035 offloads = RTE_ETH_RX_OFFLOAD_IPV4_CKSUM |
3036 RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
3037 RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
3038 RTE_ETH_RX_OFFLOAD_KEEP_CRC |
3039 RTE_ETH_RX_OFFLOAD_VLAN_FILTER |
3040 RTE_ETH_RX_OFFLOAD_SCATTER |
3041 RTE_ETH_RX_OFFLOAD_RSS_HASH;
3043 if (hw->mac.type == ixgbe_mac_82598EB)
3044 offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
3046 if (ixgbe_is_vf(dev) == 0)
3047 offloads |= RTE_ETH_RX_OFFLOAD_VLAN_EXTEND;
3050 * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
3053 if ((hw->mac.type == ixgbe_mac_82599EB ||
3054 hw->mac.type == ixgbe_mac_X540 ||
3055 hw->mac.type == ixgbe_mac_X550) &&
3056 !RTE_ETH_DEV_SRIOV(dev).active)
3057 offloads |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
3059 if (hw->mac.type == ixgbe_mac_82599EB ||
3060 hw->mac.type == ixgbe_mac_X540)
3061 offloads |= RTE_ETH_RX_OFFLOAD_MACSEC_STRIP;
3063 if (hw->mac.type == ixgbe_mac_X550 ||
3064 hw->mac.type == ixgbe_mac_X550EM_x ||
3065 hw->mac.type == ixgbe_mac_X550EM_a)
3066 offloads |= RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3068 #ifdef RTE_LIB_SECURITY
3069 if (dev->security_ctx)
3070 offloads |= RTE_ETH_RX_OFFLOAD_SECURITY;
3077 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3080 unsigned int socket_id,
3081 const struct rte_eth_rxconf *rx_conf,
3082 struct rte_mempool *mp)
3084 const struct rte_memzone *rz;
3085 struct ixgbe_rx_queue *rxq;
3086 struct ixgbe_hw *hw;
3088 struct ixgbe_adapter *adapter = dev->data->dev_private;
3091 PMD_INIT_FUNC_TRACE();
3092 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3094 offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3097 * Validate number of receive descriptors.
3098 * It must not exceed hardware maximum, and must be multiple
3101 if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3102 (nb_desc > IXGBE_MAX_RING_DESC) ||
3103 (nb_desc < IXGBE_MIN_RING_DESC)) {
3107 /* Free memory prior to re-allocation if needed... */
3108 if (dev->data->rx_queues[queue_idx] != NULL) {
3109 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3110 dev->data->rx_queues[queue_idx] = NULL;
3113 /* First allocate the rx queue data structure */
3114 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3115 RTE_CACHE_LINE_SIZE, socket_id);
3119 rxq->nb_rx_desc = nb_desc;
3120 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3121 rxq->queue_id = queue_idx;
3122 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3123 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3124 rxq->port_id = dev->data->port_id;
3125 if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
3126 rxq->crc_len = RTE_ETHER_CRC_LEN;
3129 rxq->drop_en = rx_conf->rx_drop_en;
3130 rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3131 rxq->offloads = offloads;
3134 * The packet type in RX descriptor is different for different NICs.
3135 * Some bits are used for x550 but reserved for other NICS.
3136 * So set different masks for different NICs.
3138 if (hw->mac.type == ixgbe_mac_X550 ||
3139 hw->mac.type == ixgbe_mac_X550EM_x ||
3140 hw->mac.type == ixgbe_mac_X550EM_a ||
3141 hw->mac.type == ixgbe_mac_X550_vf ||
3142 hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3143 hw->mac.type == ixgbe_mac_X550EM_a_vf)
3144 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3146 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3149 * 82599 errata, UDP frames with a 0 checksum can be marked as checksum
3152 if (hw->mac.type == ixgbe_mac_82599EB)
3153 rxq->rx_udp_csum_zero_err = 1;
3156 * Allocate RX ring hardware descriptors. A memzone large enough to
3157 * handle the maximum ring size is allocated in order to allow for
3158 * resizing in later calls to the queue setup function.
3160 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3161 RX_RING_SZ, IXGBE_ALIGN, socket_id);
3163 ixgbe_rx_queue_release(rxq);
3169 * Zero init all the descriptors in the ring.
3171 memset(rz->addr, 0, RX_RING_SZ);
3174 * Modified to setup VFRDT for Virtual Function
3176 if (hw->mac.type == ixgbe_mac_82599_vf ||
3177 hw->mac.type == ixgbe_mac_X540_vf ||
3178 hw->mac.type == ixgbe_mac_X550_vf ||
3179 hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3180 hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3182 IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3184 IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3187 IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3189 IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3192 rxq->rx_ring_phys_addr = rz->iova;
3193 rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3196 * Certain constraints must be met in order to use the bulk buffer
3197 * allocation Rx burst function. If any of Rx queues doesn't meet them
3198 * the feature should be disabled for the whole port.
3200 if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3201 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3202 "preconditions - canceling the feature for "
3203 "the whole port[%d]",
3204 rxq->queue_id, rxq->port_id);
3205 adapter->rx_bulk_alloc_allowed = false;
3209 * Allocate software ring. Allow for space at the end of the
3210 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3211 * function does not access an invalid memory region.
3214 if (adapter->rx_bulk_alloc_allowed)
3215 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3217 rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3218 sizeof(struct ixgbe_rx_entry) * len,
3219 RTE_CACHE_LINE_SIZE, socket_id);
3220 if (!rxq->sw_ring) {
3221 ixgbe_rx_queue_release(rxq);
3226 * Always allocate even if it's not going to be needed in order to
3227 * simplify the code.
3229 * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3230 * be requested in ixgbe_dev_rx_init(), which is called later from
3234 rte_zmalloc_socket("rxq->sw_sc_ring",
3235 sizeof(struct ixgbe_scattered_rx_entry) * len,
3236 RTE_CACHE_LINE_SIZE, socket_id);
3237 if (!rxq->sw_sc_ring) {
3238 ixgbe_rx_queue_release(rxq);
3242 PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3243 "dma_addr=0x%"PRIx64,
3244 rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3245 rxq->rx_ring_phys_addr);
3247 if (!rte_is_power_of_2(nb_desc)) {
3248 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3249 "preconditions - canceling the feature for "
3250 "the whole port[%d]",
3251 rxq->queue_id, rxq->port_id);
3252 adapter->rx_vec_allowed = false;
3254 ixgbe_rxq_vec_setup(rxq);
3256 dev->data->rx_queues[queue_idx] = rxq;
3258 ixgbe_reset_rx_queue(adapter, rxq);
3264 ixgbe_dev_rx_queue_count(void *rx_queue)
3266 #define IXGBE_RXQ_SCAN_INTERVAL 4
3267 volatile union ixgbe_adv_rx_desc *rxdp;
3268 struct ixgbe_rx_queue *rxq;
3272 rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3274 while ((desc < rxq->nb_rx_desc) &&
3275 (rxdp->wb.upper.status_error &
3276 rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3277 desc += IXGBE_RXQ_SCAN_INTERVAL;
3278 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3279 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3280 rxdp = &(rxq->rx_ring[rxq->rx_tail +
3281 desc - rxq->nb_rx_desc]);
3288 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3290 struct ixgbe_rx_queue *rxq = rx_queue;
3291 volatile uint32_t *status;
3292 uint32_t nb_hold, desc;
3294 if (unlikely(offset >= rxq->nb_rx_desc))
3297 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3298 if (rxq->rx_using_sse)
3299 nb_hold = rxq->rxrearm_nb;
3302 nb_hold = rxq->nb_rx_hold;
3303 if (offset >= rxq->nb_rx_desc - nb_hold)
3304 return RTE_ETH_RX_DESC_UNAVAIL;
3306 desc = rxq->rx_tail + offset;
3307 if (desc >= rxq->nb_rx_desc)
3308 desc -= rxq->nb_rx_desc;
3310 status = &rxq->rx_ring[desc].wb.upper.status_error;
3311 if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3312 return RTE_ETH_RX_DESC_DONE;
3314 return RTE_ETH_RX_DESC_AVAIL;
3318 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3320 struct ixgbe_tx_queue *txq = tx_queue;
3321 volatile uint32_t *status;
3324 if (unlikely(offset >= txq->nb_tx_desc))
3327 desc = txq->tx_tail + offset;
3328 /* go to next desc that has the RS bit */
3329 desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3331 if (desc >= txq->nb_tx_desc) {
3332 desc -= txq->nb_tx_desc;
3333 if (desc >= txq->nb_tx_desc)
3334 desc -= txq->nb_tx_desc;
3337 status = &txq->tx_ring[desc].wb.status;
3338 if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3339 return RTE_ETH_TX_DESC_DONE;
3341 return RTE_ETH_TX_DESC_FULL;
3345 * Set up link loopback for X540/X550 mode Tx->Rx.
3347 static inline void __rte_cold
3348 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3351 PMD_INIT_FUNC_TRACE();
3353 u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3355 hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3356 IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3357 macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3360 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3361 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3362 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3363 macc |= IXGBE_MACC_FLU;
3365 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3366 macc &= ~IXGBE_MACC_FLU;
3369 hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3370 IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3372 IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3376 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3379 struct ixgbe_adapter *adapter = dev->data->dev_private;
3380 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3382 PMD_INIT_FUNC_TRACE();
3384 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3385 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3388 txq->ops->release_mbufs(txq);
3389 txq->ops->reset(txq);
3393 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3394 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3397 ixgbe_rx_queue_release_mbufs(rxq);
3398 ixgbe_reset_rx_queue(adapter, rxq);
3401 /* If loopback mode was enabled, reconfigure the link accordingly */
3402 if (dev->data->dev_conf.lpbk_mode != 0) {
3403 if (hw->mac.type == ixgbe_mac_X540 ||
3404 hw->mac.type == ixgbe_mac_X550 ||
3405 hw->mac.type == ixgbe_mac_X550EM_x ||
3406 hw->mac.type == ixgbe_mac_X550EM_a)
3407 ixgbe_setup_loopback_link_x540_x550(hw, false);
3412 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3416 PMD_INIT_FUNC_TRACE();
3418 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3419 ixgbe_dev_rx_queue_release(dev, i);
3420 dev->data->rx_queues[i] = NULL;
3422 dev->data->nb_rx_queues = 0;
3424 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3425 ixgbe_dev_tx_queue_release(dev, i);
3426 dev->data->tx_queues[i] = NULL;
3428 dev->data->nb_tx_queues = 0;
3431 /*********************************************************************
3433 * Device RX/TX init functions
3435 **********************************************************************/
3438 * Receive Side Scaling (RSS)
3439 * See section 7.1.2.8 in the following document:
3440 * "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3443 * The source and destination IP addresses of the IP header and the source
3444 * and destination ports of TCP/UDP headers, if any, of received packets are
3445 * hashed against a configurable random key to compute a 32-bit RSS hash result.
3446 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3447 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
3448 * RSS output index which is used as the RX queue index where to store the
3450 * The following output is supplied in the RX write-back descriptor:
3451 * - 32-bit result of the Microsoft RSS hash function,
3452 * - 4-bit RSS type field.
3456 * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3457 * Used as the default key.
3459 static uint8_t rss_intel_key[40] = {
3460 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3461 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3462 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3463 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3464 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3468 ixgbe_rss_disable(struct rte_eth_dev *dev)
3470 struct ixgbe_hw *hw;
3474 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3475 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3476 mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3477 mrqc &= ~IXGBE_MRQC_RSSEN;
3478 IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3482 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3492 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3493 rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3495 hash_key = rss_conf->rss_key;
3496 if (hash_key != NULL) {
3497 /* Fill in RSS hash key */
3498 for (i = 0; i < 10; i++) {
3499 rss_key = hash_key[(i * 4)];
3500 rss_key |= hash_key[(i * 4) + 1] << 8;
3501 rss_key |= hash_key[(i * 4) + 2] << 16;
3502 rss_key |= hash_key[(i * 4) + 3] << 24;
3503 IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3507 /* Set configured hashing protocols in MRQC register */
3508 rss_hf = rss_conf->rss_hf;
3509 mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3510 if (rss_hf & RTE_ETH_RSS_IPV4)
3511 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3512 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
3513 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3514 if (rss_hf & RTE_ETH_RSS_IPV6)
3515 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3516 if (rss_hf & RTE_ETH_RSS_IPV6_EX)
3517 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3518 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
3519 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3520 if (rss_hf & RTE_ETH_RSS_IPV6_TCP_EX)
3521 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3522 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
3523 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3524 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
3525 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3526 if (rss_hf & RTE_ETH_RSS_IPV6_UDP_EX)
3527 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3528 IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3532 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3533 struct rte_eth_rss_conf *rss_conf)
3535 struct ixgbe_hw *hw;
3540 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3542 if (!ixgbe_rss_update_sp(hw->mac.type)) {
3543 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3547 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3550 * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3551 * "RSS enabling cannot be done dynamically while it must be
3552 * preceded by a software reset"
3553 * Before changing anything, first check that the update RSS operation
3554 * does not attempt to disable RSS, if RSS was enabled at
3555 * initialization time, or does not attempt to enable RSS, if RSS was
3556 * disabled at initialization time.
3558 rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3559 mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3560 if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3561 if (rss_hf != 0) /* Enable RSS */
3563 return 0; /* Nothing to do */
3566 if (rss_hf == 0) /* Disable RSS */
3568 ixgbe_hw_rss_hash_set(hw, rss_conf);
3573 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3574 struct rte_eth_rss_conf *rss_conf)
3576 struct ixgbe_hw *hw;
3585 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3586 mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3587 rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3588 hash_key = rss_conf->rss_key;
3589 if (hash_key != NULL) {
3590 /* Return RSS hash key */
3591 for (i = 0; i < 10; i++) {
3592 rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3593 hash_key[(i * 4)] = rss_key & 0x000000FF;
3594 hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3595 hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3596 hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3600 /* Get RSS functions configured in MRQC register */
3601 mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3602 if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3603 rss_conf->rss_hf = 0;
3607 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3608 rss_hf |= RTE_ETH_RSS_IPV4;
3609 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3610 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV4_TCP;
3611 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3612 rss_hf |= RTE_ETH_RSS_IPV6;
3613 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3614 rss_hf |= RTE_ETH_RSS_IPV6_EX;
3615 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3616 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV6_TCP;
3617 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3618 rss_hf |= RTE_ETH_RSS_IPV6_TCP_EX;
3619 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3620 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV4_UDP;
3621 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3622 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV6_UDP;
3623 if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3624 rss_hf |= RTE_ETH_RSS_IPV6_UDP_EX;
3625 rss_conf->rss_hf = rss_hf;
3630 ixgbe_rss_configure(struct rte_eth_dev *dev)
3632 struct rte_eth_rss_conf rss_conf;
3633 struct ixgbe_adapter *adapter;
3634 struct ixgbe_hw *hw;
3638 uint16_t sp_reta_size;
3641 PMD_INIT_FUNC_TRACE();
3642 adapter = dev->data->dev_private;
3643 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3645 sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3648 * Fill in redirection table
3649 * The byte-swap is needed because NIC registers are in
3650 * little-endian order.
3652 if (adapter->rss_reta_updated == 0) {
3654 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3655 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3657 if (j == dev->data->nb_rx_queues)
3659 reta = (reta << 8) | j;
3661 IXGBE_WRITE_REG(hw, reta_reg,
3667 * Configure the RSS key and the RSS protocols used to compute
3668 * the RSS hash of input packets.
3670 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3671 if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3672 ixgbe_rss_disable(dev);
3675 if (rss_conf.rss_key == NULL)
3676 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3677 ixgbe_hw_rss_hash_set(hw, &rss_conf);
3680 #define NUM_VFTA_REGISTERS 128
3681 #define NIC_RX_BUFFER_SIZE 0x200
3682 #define X550_RX_BUFFER_SIZE 0x180
3685 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3687 struct rte_eth_vmdq_dcb_conf *cfg;
3688 struct ixgbe_hw *hw;
3689 enum rte_eth_nb_pools num_pools;
3690 uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3692 uint8_t nb_tcs; /* number of traffic classes */
3695 PMD_INIT_FUNC_TRACE();
3696 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3697 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3698 num_pools = cfg->nb_queue_pools;
3699 /* Check we have a valid number of pools */
3700 if (num_pools != RTE_ETH_16_POOLS && num_pools != RTE_ETH_32_POOLS) {
3701 ixgbe_rss_disable(dev);
3704 /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3705 nb_tcs = (uint8_t)(RTE_ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3709 * split rx buffer up into sections, each for 1 traffic class
3711 switch (hw->mac.type) {
3712 case ixgbe_mac_X550:
3713 case ixgbe_mac_X550EM_x:
3714 case ixgbe_mac_X550EM_a:
3715 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3718 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3721 for (i = 0; i < nb_tcs; i++) {
3722 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3724 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3725 /* clear 10 bits. */
3726 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3727 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3729 /* zero alloc all unused TCs */
3730 for (i = nb_tcs; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3731 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3733 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3734 /* clear 10 bits. */
3735 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3738 /* MRQC: enable vmdq and dcb */
3739 mrqc = (num_pools == RTE_ETH_16_POOLS) ?
3740 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3741 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3743 /* PFVTCTL: turn on virtualisation and set the default pool */
3744 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3745 if (cfg->enable_default_pool) {
3746 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3748 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3751 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3753 /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3755 for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++)
3757 * mapping is done with 3 bits per priority,
3758 * so shift by i*3 each time
3760 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3762 IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3764 /* RTRPCS: DCB related */
3765 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3767 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3768 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3769 vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3770 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3772 /* VFTA - enable all vlan filters */
3773 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3774 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3777 /* VFRE: pool enabling for receive - 16 or 32 */
3778 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3779 num_pools == RTE_ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3782 * MPSAR - allow pools to read specific mac addresses
3783 * In this case, all pools should be able to read from mac addr 0
3785 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3786 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3788 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3789 for (i = 0; i < cfg->nb_pool_maps; i++) {
3790 /* set vlan id in VF register and set the valid bit */
3791 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3792 (cfg->pool_map[i].vlan_id & 0xFFF)));
3794 * Put the allowed pools in VFB reg. As we only have 16 or 32
3795 * pools, we only need to use the first half of the register
3798 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3803 * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3804 * @dev: pointer to eth_dev structure
3805 * @dcb_config: pointer to ixgbe_dcb_config structure
3808 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3809 struct ixgbe_dcb_config *dcb_config)
3812 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3814 PMD_INIT_FUNC_TRACE();
3815 if (hw->mac.type != ixgbe_mac_82598EB) {
3816 /* Disable the Tx desc arbiter so that MTQC can be changed */
3817 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3818 reg |= IXGBE_RTTDCS_ARBDIS;
3819 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3821 /* Enable DCB for Tx with 8 TCs */
3822 if (dcb_config->num_tcs.pg_tcs == 8) {
3823 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3825 reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3827 if (dcb_config->vt_mode)
3828 reg |= IXGBE_MTQC_VT_ENA;
3829 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3831 /* Enable the Tx desc arbiter */
3832 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3833 reg &= ~IXGBE_RTTDCS_ARBDIS;
3834 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3836 /* Enable Security TX Buffer IFG for DCB */
3837 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3838 reg |= IXGBE_SECTX_DCB;
3839 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3844 * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3845 * @dev: pointer to rte_eth_dev structure
3846 * @dcb_config: pointer to ixgbe_dcb_config structure
3849 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3850 struct ixgbe_dcb_config *dcb_config)
3852 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3853 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3854 struct ixgbe_hw *hw =
3855 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3857 PMD_INIT_FUNC_TRACE();
3858 if (hw->mac.type != ixgbe_mac_82598EB)
3859 /*PF VF Transmit Enable*/
3860 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3861 vmdq_tx_conf->nb_queue_pools == RTE_ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3863 /*Configure general DCB TX parameters*/
3864 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3868 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3869 struct ixgbe_dcb_config *dcb_config)
3871 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3872 &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3873 struct ixgbe_dcb_tc_config *tc;
3876 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3877 if (vmdq_rx_conf->nb_queue_pools == RTE_ETH_16_POOLS) {
3878 dcb_config->num_tcs.pg_tcs = RTE_ETH_8_TCS;
3879 dcb_config->num_tcs.pfc_tcs = RTE_ETH_8_TCS;
3881 dcb_config->num_tcs.pg_tcs = RTE_ETH_4_TCS;
3882 dcb_config->num_tcs.pfc_tcs = RTE_ETH_4_TCS;
3885 /* Initialize User Priority to Traffic Class mapping */
3886 for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3887 tc = &dcb_config->tc_config[j];
3888 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3891 /* User Priority to Traffic Class mapping */
3892 for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3893 j = vmdq_rx_conf->dcb_tc[i];
3894 tc = &dcb_config->tc_config[j];
3895 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3901 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3902 struct ixgbe_dcb_config *dcb_config)
3904 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3905 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3906 struct ixgbe_dcb_tc_config *tc;
3909 /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3910 if (vmdq_tx_conf->nb_queue_pools == RTE_ETH_16_POOLS) {
3911 dcb_config->num_tcs.pg_tcs = RTE_ETH_8_TCS;
3912 dcb_config->num_tcs.pfc_tcs = RTE_ETH_8_TCS;
3914 dcb_config->num_tcs.pg_tcs = RTE_ETH_4_TCS;
3915 dcb_config->num_tcs.pfc_tcs = RTE_ETH_4_TCS;
3918 /* Initialize User Priority to Traffic Class mapping */
3919 for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3920 tc = &dcb_config->tc_config[j];
3921 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3924 /* User Priority to Traffic Class mapping */
3925 for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3926 j = vmdq_tx_conf->dcb_tc[i];
3927 tc = &dcb_config->tc_config[j];
3928 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3934 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3935 struct ixgbe_dcb_config *dcb_config)
3937 struct rte_eth_dcb_rx_conf *rx_conf =
3938 &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3939 struct ixgbe_dcb_tc_config *tc;
3942 dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3943 dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3945 /* Initialize User Priority to Traffic Class mapping */
3946 for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3947 tc = &dcb_config->tc_config[j];
3948 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3951 /* User Priority to Traffic Class mapping */
3952 for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3953 j = rx_conf->dcb_tc[i];
3954 tc = &dcb_config->tc_config[j];
3955 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3961 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3962 struct ixgbe_dcb_config *dcb_config)
3964 struct rte_eth_dcb_tx_conf *tx_conf =
3965 &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3966 struct ixgbe_dcb_tc_config *tc;
3969 dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3970 dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3972 /* Initialize User Priority to Traffic Class mapping */
3973 for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3974 tc = &dcb_config->tc_config[j];
3975 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3978 /* User Priority to Traffic Class mapping */
3979 for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3980 j = tx_conf->dcb_tc[i];
3981 tc = &dcb_config->tc_config[j];
3982 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3988 * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3989 * @dev: pointer to eth_dev structure
3990 * @dcb_config: pointer to ixgbe_dcb_config structure
3993 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3994 struct ixgbe_dcb_config *dcb_config)
4000 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4002 PMD_INIT_FUNC_TRACE();
4004 * Disable the arbiter before changing parameters
4005 * (always enable recycle mode; WSP)
4007 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
4008 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4010 if (hw->mac.type != ixgbe_mac_82598EB) {
4011 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
4012 if (dcb_config->num_tcs.pg_tcs == 4) {
4013 if (dcb_config->vt_mode)
4014 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4015 IXGBE_MRQC_VMDQRT4TCEN;
4017 /* no matter the mode is DCB or DCB_RSS, just
4018 * set the MRQE to RSSXTCEN. RSS is controlled
4021 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4022 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4023 IXGBE_MRQC_RTRSS4TCEN;
4026 if (dcb_config->num_tcs.pg_tcs == 8) {
4027 if (dcb_config->vt_mode)
4028 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4029 IXGBE_MRQC_VMDQRT8TCEN;
4031 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4032 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4033 IXGBE_MRQC_RTRSS8TCEN;
4037 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
4039 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4040 /* Disable drop for all queues in VMDQ mode*/
4041 for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4042 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4044 (q << IXGBE_QDE_IDX_SHIFT)));
4046 /* Enable drop for all queues in SRIOV mode */
4047 for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4048 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4050 (q << IXGBE_QDE_IDX_SHIFT) |
4055 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4056 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4057 vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4058 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4060 /* VFTA - enable all vlan filters */
4061 for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4062 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4066 * Configure Rx packet plane (recycle mode; WSP) and
4069 reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4070 IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4074 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4075 uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4077 switch (hw->mac.type) {
4078 case ixgbe_mac_82598EB:
4079 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4081 case ixgbe_mac_82599EB:
4082 case ixgbe_mac_X540:
4083 case ixgbe_mac_X550:
4084 case ixgbe_mac_X550EM_x:
4085 case ixgbe_mac_X550EM_a:
4086 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4095 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4096 uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4098 switch (hw->mac.type) {
4099 case ixgbe_mac_82598EB:
4100 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4101 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4103 case ixgbe_mac_82599EB:
4104 case ixgbe_mac_X540:
4105 case ixgbe_mac_X550:
4106 case ixgbe_mac_X550EM_x:
4107 case ixgbe_mac_X550EM_a:
4108 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4109 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4116 #define DCB_RX_CONFIG 1
4117 #define DCB_TX_CONFIG 1
4118 #define DCB_TX_PB 1024
4120 * ixgbe_dcb_hw_configure - Enable DCB and configure
4121 * general DCB in VT mode and non-VT mode parameters
4122 * @dev: pointer to rte_eth_dev structure
4123 * @dcb_config: pointer to ixgbe_dcb_config structure
4126 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4127 struct ixgbe_dcb_config *dcb_config)
4130 uint8_t i, pfc_en, nb_tcs;
4131 uint16_t pbsize, rx_buffer_size;
4132 uint8_t config_dcb_rx = 0;
4133 uint8_t config_dcb_tx = 0;
4134 uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4135 uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4136 uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4137 uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4138 uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4139 struct ixgbe_dcb_tc_config *tc;
4140 uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4142 struct ixgbe_hw *hw =
4143 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4144 struct ixgbe_bw_conf *bw_conf =
4145 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4147 switch (dev->data->dev_conf.rxmode.mq_mode) {
4148 case RTE_ETH_MQ_RX_VMDQ_DCB:
4149 dcb_config->vt_mode = true;
4150 if (hw->mac.type != ixgbe_mac_82598EB) {
4151 config_dcb_rx = DCB_RX_CONFIG;
4153 *get dcb and VT rx configuration parameters
4156 ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4157 /*Configure general VMDQ and DCB RX parameters*/
4158 ixgbe_vmdq_dcb_configure(dev);
4161 case RTE_ETH_MQ_RX_DCB:
4162 case RTE_ETH_MQ_RX_DCB_RSS:
4163 dcb_config->vt_mode = false;
4164 config_dcb_rx = DCB_RX_CONFIG;
4165 /* Get dcb TX configuration parameters from rte_eth_conf */
4166 ixgbe_dcb_rx_config(dev, dcb_config);
4167 /*Configure general DCB RX parameters*/
4168 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4171 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4174 switch (dev->data->dev_conf.txmode.mq_mode) {
4175 case RTE_ETH_MQ_TX_VMDQ_DCB:
4176 dcb_config->vt_mode = true;
4177 config_dcb_tx = DCB_TX_CONFIG;
4178 /* get DCB and VT TX configuration parameters
4181 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4182 /*Configure general VMDQ and DCB TX parameters*/
4183 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4186 case RTE_ETH_MQ_TX_DCB:
4187 dcb_config->vt_mode = false;
4188 config_dcb_tx = DCB_TX_CONFIG;
4189 /*get DCB TX configuration parameters from rte_eth_conf*/
4190 ixgbe_dcb_tx_config(dev, dcb_config);
4191 /*Configure general DCB TX parameters*/
4192 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4195 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4199 nb_tcs = dcb_config->num_tcs.pfc_tcs;
4201 ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4202 if (nb_tcs == RTE_ETH_4_TCS) {
4203 /* Avoid un-configured priority mapping to TC0 */
4205 uint8_t mask = 0xFF;
4207 for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4208 mask = (uint8_t)(mask & (~(1 << map[i])));
4209 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4210 if ((mask & 0x1) && j < RTE_ETH_DCB_NUM_USER_PRIORITIES)
4214 /* Re-configure 4 TCs BW */
4215 for (i = 0; i < nb_tcs; i++) {
4216 tc = &dcb_config->tc_config[i];
4217 if (bw_conf->tc_num != nb_tcs)
4218 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4219 (uint8_t)(100 / nb_tcs);
4220 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4221 (uint8_t)(100 / nb_tcs);
4223 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4224 tc = &dcb_config->tc_config[i];
4225 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4226 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4229 /* Re-configure 8 TCs BW */
4230 for (i = 0; i < nb_tcs; i++) {
4231 tc = &dcb_config->tc_config[i];
4232 if (bw_conf->tc_num != nb_tcs)
4233 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4234 (uint8_t)(100 / nb_tcs + (i & 1));
4235 tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4236 (uint8_t)(100 / nb_tcs + (i & 1));
4240 switch (hw->mac.type) {
4241 case ixgbe_mac_X550:
4242 case ixgbe_mac_X550EM_x:
4243 case ixgbe_mac_X550EM_a:
4244 rx_buffer_size = X550_RX_BUFFER_SIZE;
4247 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4251 if (config_dcb_rx) {
4252 /* Set RX buffer size */
4253 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4254 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4256 for (i = 0; i < nb_tcs; i++) {
4257 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4259 /* zero alloc all unused TCs */
4260 for (; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++)
4261 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4263 if (config_dcb_tx) {
4264 /* Only support an equally distributed
4265 * Tx packet buffer strategy.
4267 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4268 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4270 for (i = 0; i < nb_tcs; i++) {
4271 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4272 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4274 /* Clear unused TCs, if any, to zero buffer size*/
4275 for (; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
4276 IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4277 IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4281 /*Calculates traffic class credits*/
4282 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4283 IXGBE_DCB_TX_CONFIG);
4284 ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4285 IXGBE_DCB_RX_CONFIG);
4287 if (config_dcb_rx) {
4288 /* Unpack CEE standard containers */
4289 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4290 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4291 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4292 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4293 /* Configure PG(ETS) RX */
4294 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4297 if (config_dcb_tx) {
4298 /* Unpack CEE standard containers */
4299 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4300 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4301 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4302 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4303 /* Configure PG(ETS) TX */
4304 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4307 /*Configure queue statistics registers*/
4308 ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4310 /* Check if the PFC is supported */
4311 if (dev->data->dev_conf.dcb_capability_en & RTE_ETH_DCB_PFC_SUPPORT) {
4312 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4313 for (i = 0; i < nb_tcs; i++) {
4315 * If the TC count is 8,and the default high_water is 48,
4316 * the low_water is 16 as default.
4318 hw->fc.high_water[i] = (pbsize * 3) / 4;
4319 hw->fc.low_water[i] = pbsize / 4;
4320 /* Enable pfc for this TC */
4321 tc = &dcb_config->tc_config[i];
4322 tc->pfc = ixgbe_dcb_pfc_enabled;
4324 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4325 if (dcb_config->num_tcs.pfc_tcs == RTE_ETH_4_TCS)
4327 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4334 * ixgbe_configure_dcb - Configure DCB Hardware
4335 * @dev: pointer to rte_eth_dev
4337 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4339 struct ixgbe_dcb_config *dcb_cfg =
4340 IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4341 struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4343 PMD_INIT_FUNC_TRACE();
4345 /* check support mq_mode for DCB */
4346 if (dev_conf->rxmode.mq_mode != RTE_ETH_MQ_RX_VMDQ_DCB &&
4347 dev_conf->rxmode.mq_mode != RTE_ETH_MQ_RX_DCB &&
4348 dev_conf->rxmode.mq_mode != RTE_ETH_MQ_RX_DCB_RSS)
4351 if (dev->data->nb_rx_queues > RTE_ETH_DCB_NUM_QUEUES)
4354 /** Configure DCB hardware **/
4355 ixgbe_dcb_hw_configure(dev, dcb_cfg);
4359 * VMDq only support for 10 GbE NIC.
4362 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4364 struct rte_eth_vmdq_rx_conf *cfg;
4365 struct ixgbe_hw *hw;
4366 enum rte_eth_nb_pools num_pools;
4367 uint32_t mrqc, vt_ctl, vlanctrl;
4371 PMD_INIT_FUNC_TRACE();
4372 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4373 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4374 num_pools = cfg->nb_queue_pools;
4376 ixgbe_rss_disable(dev);
4378 /* MRQC: enable vmdq */
4379 mrqc = IXGBE_MRQC_VMDQEN;
4380 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4382 /* PFVTCTL: turn on virtualisation and set the default pool */
4383 vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4384 if (cfg->enable_default_pool)
4385 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4387 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4389 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4391 for (i = 0; i < (int)num_pools; i++) {
4392 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4393 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4396 /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4397 vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4398 vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4399 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4401 /* VFTA - enable all vlan filters */
4402 for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4403 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4405 /* VFRE: pool enabling for receive - 64 */
4406 IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4407 if (num_pools == RTE_ETH_64_POOLS)
4408 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4411 * MPSAR - allow pools to read specific mac addresses
4412 * In this case, all pools should be able to read from mac addr 0
4414 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4415 IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4417 /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4418 for (i = 0; i < cfg->nb_pool_maps; i++) {
4419 /* set vlan id in VF register and set the valid bit */
4420 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4421 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4423 * Put the allowed pools in VFB reg. As we only have 16 or 64
4424 * pools, we only need to use the first half of the register
4427 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4428 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4429 (cfg->pool_map[i].pools & UINT32_MAX));
4431 IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4432 ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4436 /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4437 if (cfg->enable_loop_back) {
4438 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4439 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4440 IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4443 IXGBE_WRITE_FLUSH(hw);
4447 * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4448 * @hw: pointer to hardware structure
4451 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4456 PMD_INIT_FUNC_TRACE();
4457 /*PF VF Transmit Enable*/
4458 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4459 IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4461 /* Disable the Tx desc arbiter so that MTQC can be changed */
4462 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4463 reg |= IXGBE_RTTDCS_ARBDIS;
4464 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4466 reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4467 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4469 /* Disable drop for all queues */
4470 for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4471 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4472 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4474 /* Enable the Tx desc arbiter */
4475 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4476 reg &= ~IXGBE_RTTDCS_ARBDIS;
4477 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4479 IXGBE_WRITE_FLUSH(hw);
4482 static int __rte_cold
4483 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4485 struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4489 /* Initialize software ring entries */
4490 for (i = 0; i < rxq->nb_rx_desc; i++) {
4491 volatile union ixgbe_adv_rx_desc *rxd;
4492 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4495 PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4496 (unsigned) rxq->queue_id);
4500 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4501 mbuf->port = rxq->port_id;
4504 rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4505 rxd = &rxq->rx_ring[i];
4506 rxd->read.hdr_addr = 0;
4507 rxd->read.pkt_addr = dma_addr;
4515 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4517 struct ixgbe_hw *hw;
4520 ixgbe_rss_configure(dev);
4522 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4524 /* MRQC: enable VF RSS */
4525 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4526 mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4527 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4528 case RTE_ETH_64_POOLS:
4529 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4532 case RTE_ETH_32_POOLS:
4533 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4537 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4541 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4547 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4549 struct ixgbe_hw *hw =
4550 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4552 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4553 case RTE_ETH_64_POOLS:
4554 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4558 case RTE_ETH_32_POOLS:
4559 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4560 IXGBE_MRQC_VMDQRT4TCEN);
4563 case RTE_ETH_16_POOLS:
4564 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4565 IXGBE_MRQC_VMDQRT8TCEN);
4569 "invalid pool number in IOV mode");
4576 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4578 struct ixgbe_hw *hw =
4579 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4581 if (hw->mac.type == ixgbe_mac_82598EB)
4584 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4586 * SRIOV inactive scheme
4587 * any DCB/RSS w/o VMDq multi-queue setting
4589 switch (dev->data->dev_conf.rxmode.mq_mode) {
4590 case RTE_ETH_MQ_RX_RSS:
4591 case RTE_ETH_MQ_RX_DCB_RSS:
4592 case RTE_ETH_MQ_RX_VMDQ_RSS:
4593 ixgbe_rss_configure(dev);
4596 case RTE_ETH_MQ_RX_VMDQ_DCB:
4597 ixgbe_vmdq_dcb_configure(dev);
4600 case RTE_ETH_MQ_RX_VMDQ_ONLY:
4601 ixgbe_vmdq_rx_hw_configure(dev);
4604 case RTE_ETH_MQ_RX_NONE:
4606 /* if mq_mode is none, disable rss mode.*/
4607 ixgbe_rss_disable(dev);
4611 /* SRIOV active scheme
4612 * Support RSS together with SRIOV.
4614 switch (dev->data->dev_conf.rxmode.mq_mode) {
4615 case RTE_ETH_MQ_RX_RSS:
4616 case RTE_ETH_MQ_RX_VMDQ_RSS:
4617 ixgbe_config_vf_rss(dev);
4619 case RTE_ETH_MQ_RX_VMDQ_DCB:
4620 case RTE_ETH_MQ_RX_DCB:
4621 /* In SRIOV, the configuration is the same as VMDq case */
4622 ixgbe_vmdq_dcb_configure(dev);
4624 /* DCB/RSS together with SRIOV is not supported */
4625 case RTE_ETH_MQ_RX_VMDQ_DCB_RSS:
4626 case RTE_ETH_MQ_RX_DCB_RSS:
4628 "Could not support DCB/RSS with VMDq & SRIOV");
4631 ixgbe_config_vf_default(dev);
4640 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4642 struct ixgbe_hw *hw =
4643 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4647 if (hw->mac.type == ixgbe_mac_82598EB)
4650 /* disable arbiter before setting MTQC */
4651 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4652 rttdcs |= IXGBE_RTTDCS_ARBDIS;
4653 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4655 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4657 * SRIOV inactive scheme
4658 * any DCB w/o VMDq multi-queue setting
4660 if (dev->data->dev_conf.txmode.mq_mode == RTE_ETH_MQ_TX_VMDQ_ONLY)
4661 ixgbe_vmdq_tx_hw_configure(hw);
4663 mtqc = IXGBE_MTQC_64Q_1PB;
4664 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4667 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4670 * SRIOV active scheme
4671 * FIXME if support DCB together with VMDq & SRIOV
4673 case RTE_ETH_64_POOLS:
4674 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4676 case RTE_ETH_32_POOLS:
4677 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4679 case RTE_ETH_16_POOLS:
4680 mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4684 mtqc = IXGBE_MTQC_64Q_1PB;
4685 PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4687 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4690 /* re-enable arbiter */
4691 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4692 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4698 * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4700 * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4701 * spec rev. 3.0 chapter 8.2.3.8.13.
4703 * @pool Memory pool of the Rx queue
4705 static inline uint32_t
4706 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4708 struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4710 /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4712 RTE_IPV4_MAX_PKT_LEN /
4713 (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4716 return IXGBE_RSCCTL_MAXDESC_16;
4717 else if (maxdesc >= 8)
4718 return IXGBE_RSCCTL_MAXDESC_8;
4719 else if (maxdesc >= 4)
4720 return IXGBE_RSCCTL_MAXDESC_4;
4722 return IXGBE_RSCCTL_MAXDESC_1;
4726 * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4729 * (Taken from FreeBSD tree)
4730 * (yes this is all very magic and confusing :)
4733 * @entry the register array entry
4734 * @vector the MSIX vector for this queue
4738 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4740 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4743 vector |= IXGBE_IVAR_ALLOC_VAL;
4745 switch (hw->mac.type) {
4747 case ixgbe_mac_82598EB:
4749 entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4751 entry += (type * 64);
4752 index = (entry >> 2) & 0x1F;
4753 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4754 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4755 ivar |= (vector << (8 * (entry & 0x3)));
4756 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4759 case ixgbe_mac_82599EB:
4760 case ixgbe_mac_X540:
4761 if (type == -1) { /* MISC IVAR */
4762 index = (entry & 1) * 8;
4763 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4764 ivar &= ~(0xFF << index);
4765 ivar |= (vector << index);
4766 IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4767 } else { /* RX/TX IVARS */
4768 index = (16 * (entry & 1)) + (8 * type);
4769 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4770 ivar &= ~(0xFF << index);
4771 ivar |= (vector << index);
4772 IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4783 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4785 uint16_t i, rx_using_sse;
4786 struct ixgbe_adapter *adapter = dev->data->dev_private;
4789 * In order to allow Vector Rx there are a few configuration
4790 * conditions to be met and Rx Bulk Allocation should be allowed.
4792 if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4793 !adapter->rx_bulk_alloc_allowed ||
4794 rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
4795 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4797 dev->data->port_id);
4799 adapter->rx_vec_allowed = false;
4803 * Initialize the appropriate LRO callback.
4805 * If all queues satisfy the bulk allocation preconditions
4806 * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4807 * Otherwise use a single allocation version.
4809 if (dev->data->lro) {
4810 if (adapter->rx_bulk_alloc_allowed) {
4811 PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4812 "allocation version");
4813 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4815 PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4816 "allocation version");
4817 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4819 } else if (dev->data->scattered_rx) {
4821 * Set the non-LRO scattered callback: there are Vector and
4822 * single allocation versions.
4824 if (adapter->rx_vec_allowed) {
4825 PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4826 "callback (port=%d).",
4827 dev->data->port_id);
4829 dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4830 } else if (adapter->rx_bulk_alloc_allowed) {
4831 PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4832 "allocation callback (port=%d).",
4833 dev->data->port_id);
4834 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4836 PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4837 "single allocation) "
4838 "Scattered Rx callback "
4840 dev->data->port_id);
4842 dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4845 * Below we set "simple" callbacks according to port/queues parameters.
4846 * If parameters allow we are going to choose between the following
4850 * - Single buffer allocation (the simplest one)
4852 } else if (adapter->rx_vec_allowed) {
4853 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4854 "burst size no less than %d (port=%d).",
4855 RTE_IXGBE_DESCS_PER_LOOP,
4856 dev->data->port_id);
4858 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4859 } else if (adapter->rx_bulk_alloc_allowed) {
4860 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4861 "satisfied. Rx Burst Bulk Alloc function "
4862 "will be used on port=%d.",
4863 dev->data->port_id);
4865 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4867 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4868 "satisfied, or Scattered Rx is requested "
4870 dev->data->port_id);
4872 dev->rx_pkt_burst = ixgbe_recv_pkts;
4875 /* Propagate information about RX function choice through all queues. */
4878 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4879 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4881 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4882 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4884 rxq->rx_using_sse = rx_using_sse;
4885 #ifdef RTE_LIB_SECURITY
4886 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4887 RTE_ETH_RX_OFFLOAD_SECURITY);
4893 * ixgbe_set_rsc - configure RSC related port HW registers
4895 * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4896 * of 82599 Spec (x540 configuration is virtually the same).
4900 * Returns 0 in case of success or a non-zero error code
4903 ixgbe_set_rsc(struct rte_eth_dev *dev)
4905 struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4906 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4907 struct rte_eth_dev_info dev_info = { 0 };
4908 bool rsc_capable = false;
4914 dev->dev_ops->dev_infos_get(dev, &dev_info);
4915 if (dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_TCP_LRO)
4918 if (!rsc_capable && (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)) {
4919 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4924 /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4926 if ((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC) &&
4927 (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)) {
4929 * According to chapter of 4.6.7.2.1 of the Spec Rev.
4930 * 3.0 RSC configuration requires HW CRC stripping being
4931 * enabled. If user requested both HW CRC stripping off
4932 * and RSC on - return an error.
4934 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4939 /* RFCTL configuration */
4940 rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4941 if ((rsc_capable) && (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO))
4942 rfctl &= ~IXGBE_RFCTL_RSC_DIS;
4944 rfctl |= IXGBE_RFCTL_RSC_DIS;
4945 /* disable NFS filtering */
4946 rfctl |= IXGBE_RFCTL_NFSW_DIS | IXGBE_RFCTL_NFSR_DIS;
4947 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4949 /* If LRO hasn't been requested - we are done here. */
4950 if (!(rx_conf->offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO))
4953 /* Set RDRXCTL.RSCACKC bit */
4954 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4955 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4956 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4958 /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4959 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4960 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4962 IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4964 IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4966 IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4968 IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4971 * ixgbe PMD doesn't support header-split at the moment.
4973 * Following the 4.6.7.2.1 chapter of the 82599/x540
4974 * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4975 * should be configured even if header split is not
4976 * enabled. We will configure it 128 bytes following the
4977 * recommendation in the spec.
4979 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4980 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4981 IXGBE_SRRCTL_BSIZEHDR_MASK;
4984 * TODO: Consider setting the Receive Descriptor Minimum
4985 * Threshold Size for an RSC case. This is not an obviously
4986 * beneficiary option but the one worth considering...
4989 rscctl |= IXGBE_RSCCTL_RSCEN;
4990 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4991 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4994 * RSC: Set ITR interval corresponding to 2K ints/s.
4996 * Full-sized RSC aggregations for a 10Gb/s link will
4997 * arrive at about 20K aggregation/s rate.
4999 * 2K inst/s rate will make only 10% of the
5000 * aggregations to be closed due to the interrupt timer
5001 * expiration for a streaming at wire-speed case.
5003 * For a sparse streaming case this setting will yield
5004 * at most 500us latency for a single RSC aggregation.
5006 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
5007 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
5008 eitr |= IXGBE_EITR_CNT_WDIS;
5010 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5011 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
5012 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
5013 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
5016 * RSC requires the mapping of the queue to the
5019 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
5024 PMD_INIT_LOG(DEBUG, "enabling LRO mode");
5030 * Initializes Receive Unit.
5033 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
5035 struct ixgbe_hw *hw;
5036 struct ixgbe_rx_queue *rxq;
5047 struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5048 uint32_t frame_size = dev->data->mtu + IXGBE_ETH_OVERHEAD;
5051 PMD_INIT_FUNC_TRACE();
5052 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5055 * Make sure receives are disabled while setting
5056 * up the RX context (registers, descriptor rings, etc.).
5058 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5059 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5061 /* Enable receipt of broadcasted frames */
5062 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5063 fctrl |= IXGBE_FCTRL_BAM;
5064 fctrl |= IXGBE_FCTRL_DPF;
5065 fctrl |= IXGBE_FCTRL_PMCF;
5066 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5069 * Configure CRC stripping, if any.
5071 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5072 if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
5073 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5075 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5078 * Configure jumbo frame support, if any.
5080 if (dev->data->mtu > RTE_ETHER_MTU) {
5081 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5082 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5083 maxfrs &= 0x0000FFFF;
5084 maxfrs |= (frame_size << 16);
5085 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5087 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5090 * If loopback mode is configured, set LPBK bit.
5092 if (dev->data->dev_conf.lpbk_mode != 0) {
5093 rc = ixgbe_check_supported_loopback_mode(dev);
5095 PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5098 hlreg0 |= IXGBE_HLREG0_LPBK;
5100 hlreg0 &= ~IXGBE_HLREG0_LPBK;
5103 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5106 * Assume no header split and no VLAN strip support
5107 * on any Rx queue first .
5109 rx_conf->offloads &= ~RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
5110 /* Setup RX queues */
5111 for (i = 0; i < dev->data->nb_rx_queues; i++) {
5112 rxq = dev->data->rx_queues[i];
5115 * Reset crc_len in case it was changed after queue setup by a
5116 * call to configure.
5118 if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
5119 rxq->crc_len = RTE_ETHER_CRC_LEN;
5123 /* Setup the Base and Length of the Rx Descriptor Rings */
5124 bus_addr = rxq->rx_ring_phys_addr;
5125 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5126 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5127 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5128 (uint32_t)(bus_addr >> 32));
5129 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5130 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5131 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5132 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5134 /* Configure the SRRCTL register */
5135 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5137 /* Set if packets are dropped when no descriptors available */
5139 srrctl |= IXGBE_SRRCTL_DROP_EN;
5142 * Configure the RX buffer size in the BSIZEPACKET field of
5143 * the SRRCTL register of the queue.
5144 * The value is in 1 KB resolution. Valid values can be from
5147 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5148 RTE_PKTMBUF_HEADROOM);
5149 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5150 IXGBE_SRRCTL_BSIZEPKT_MASK);
5152 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5154 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5155 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5157 /* It adds dual VLAN length for supporting dual VLAN */
5158 if (frame_size + 2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5159 dev->data->scattered_rx = 1;
5160 if (rxq->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
5161 rx_conf->offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
5164 if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
5165 dev->data->scattered_rx = 1;
5168 * Device configured with multiple RX queues.
5170 ixgbe_dev_mq_rx_configure(dev);
5173 * Setup the Checksum Register.
5174 * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5175 * Enable IP/L4 checkum computation by hardware if requested to do so.
5177 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5178 rxcsum |= IXGBE_RXCSUM_PCSD;
5179 if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM)
5180 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5182 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5184 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5186 if (hw->mac.type == ixgbe_mac_82599EB ||
5187 hw->mac.type == ixgbe_mac_X540) {
5188 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5189 if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
5190 rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5192 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5193 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5194 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5197 rc = ixgbe_set_rsc(dev);
5201 ixgbe_set_rx_function(dev);
5207 * Initializes Transmit Unit.
5210 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5212 struct ixgbe_hw *hw;
5213 struct ixgbe_tx_queue *txq;
5219 PMD_INIT_FUNC_TRACE();
5220 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5222 /* Enable TX CRC (checksum offload requirement) and hw padding
5225 hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5226 hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5227 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5229 /* Setup the Base and Length of the Tx Descriptor Rings */
5230 for (i = 0; i < dev->data->nb_tx_queues; i++) {
5231 txq = dev->data->tx_queues[i];
5233 bus_addr = txq->tx_ring_phys_addr;
5234 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5235 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5236 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5237 (uint32_t)(bus_addr >> 32));
5238 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5239 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5240 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5241 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5242 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5245 * Disable Tx Head Writeback RO bit, since this hoses
5246 * bookkeeping if things aren't delivered in order.
5248 switch (hw->mac.type) {
5249 case ixgbe_mac_82598EB:
5250 txctrl = IXGBE_READ_REG(hw,
5251 IXGBE_DCA_TXCTRL(txq->reg_idx));
5252 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5253 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5257 case ixgbe_mac_82599EB:
5258 case ixgbe_mac_X540:
5259 case ixgbe_mac_X550:
5260 case ixgbe_mac_X550EM_x:
5261 case ixgbe_mac_X550EM_a:
5263 txctrl = IXGBE_READ_REG(hw,
5264 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5265 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5266 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5272 /* Device configured with multiple TX queues. */
5273 ixgbe_dev_mq_tx_configure(dev);
5277 * Check if requested loopback mode is supported
5280 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5282 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5284 if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5285 if (hw->mac.type == ixgbe_mac_82599EB ||
5286 hw->mac.type == ixgbe_mac_X540 ||
5287 hw->mac.type == ixgbe_mac_X550 ||
5288 hw->mac.type == ixgbe_mac_X550EM_x ||
5289 hw->mac.type == ixgbe_mac_X550EM_a)
5296 * Set up link for 82599 loopback mode Tx->Rx.
5298 static inline void __rte_cold
5299 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5301 PMD_INIT_FUNC_TRACE();
5303 if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5304 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5306 PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5315 IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5316 ixgbe_reset_pipeline_82599(hw);
5318 hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5324 * Start Transmit and Receive Units.
5327 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5329 struct ixgbe_hw *hw;
5330 struct ixgbe_tx_queue *txq;
5331 struct ixgbe_rx_queue *rxq;
5338 PMD_INIT_FUNC_TRACE();
5339 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5341 for (i = 0; i < dev->data->nb_tx_queues; i++) {
5342 txq = dev->data->tx_queues[i];
5343 /* Setup Transmit Threshold Registers */
5344 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5345 txdctl |= txq->pthresh & 0x7F;
5346 txdctl |= ((txq->hthresh & 0x7F) << 8);
5347 txdctl |= ((txq->wthresh & 0x7F) << 16);
5348 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5351 if (hw->mac.type != ixgbe_mac_82598EB) {
5352 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5353 dmatxctl |= IXGBE_DMATXCTL_TE;
5354 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5357 for (i = 0; i < dev->data->nb_tx_queues; i++) {
5358 txq = dev->data->tx_queues[i];
5359 if (!txq->tx_deferred_start) {
5360 ret = ixgbe_dev_tx_queue_start(dev, i);
5366 for (i = 0; i < dev->data->nb_rx_queues; i++) {
5367 rxq = dev->data->rx_queues[i];
5368 if (!rxq->rx_deferred_start) {
5369 ret = ixgbe_dev_rx_queue_start(dev, i);
5375 /* Enable Receive engine */
5376 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5377 if (hw->mac.type == ixgbe_mac_82598EB)
5378 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5379 rxctrl |= IXGBE_RXCTRL_RXEN;
5380 hw->mac.ops.enable_rx_dma(hw, rxctrl);
5382 /* If loopback mode is enabled, set up the link accordingly */
5383 if (dev->data->dev_conf.lpbk_mode != 0) {
5384 if (hw->mac.type == ixgbe_mac_82599EB)
5385 ixgbe_setup_loopback_link_82599(hw);
5386 else if (hw->mac.type == ixgbe_mac_X540 ||
5387 hw->mac.type == ixgbe_mac_X550 ||
5388 hw->mac.type == ixgbe_mac_X550EM_x ||
5389 hw->mac.type == ixgbe_mac_X550EM_a)
5390 ixgbe_setup_loopback_link_x540_x550(hw, true);
5393 #ifdef RTE_LIB_SECURITY
5394 if ((dev->data->dev_conf.rxmode.offloads &
5395 RTE_ETH_RX_OFFLOAD_SECURITY) ||
5396 (dev->data->dev_conf.txmode.offloads &
5397 RTE_ETH_TX_OFFLOAD_SECURITY)) {
5398 ret = ixgbe_crypto_enable_ipsec(dev);
5401 "ixgbe_crypto_enable_ipsec fails with %d.",
5412 * Start Receive Units for specified queue.
5415 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5417 struct ixgbe_hw *hw;
5418 struct ixgbe_rx_queue *rxq;
5422 PMD_INIT_FUNC_TRACE();
5423 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5425 rxq = dev->data->rx_queues[rx_queue_id];
5427 /* Allocate buffers for descriptor rings */
5428 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5429 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5433 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5434 rxdctl |= IXGBE_RXDCTL_ENABLE;
5435 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5437 /* Wait until RX Enable ready */
5438 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5441 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5442 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5444 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5446 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5447 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5448 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5454 * Stop Receive Units for specified queue.
5457 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5459 struct ixgbe_hw *hw;
5460 struct ixgbe_adapter *adapter = dev->data->dev_private;
5461 struct ixgbe_rx_queue *rxq;
5465 PMD_INIT_FUNC_TRACE();
5466 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5468 rxq = dev->data->rx_queues[rx_queue_id];
5470 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5471 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5472 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5474 /* Wait until RX Enable bit clear */
5475 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5478 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5479 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5481 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5483 rte_delay_us(RTE_IXGBE_WAIT_100_US);
5485 ixgbe_rx_queue_release_mbufs(rxq);
5486 ixgbe_reset_rx_queue(adapter, rxq);
5487 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5494 * Start Transmit Units for specified queue.
5497 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5499 struct ixgbe_hw *hw;
5500 struct ixgbe_tx_queue *txq;
5504 PMD_INIT_FUNC_TRACE();
5505 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5507 txq = dev->data->tx_queues[tx_queue_id];
5508 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5509 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5510 txdctl |= IXGBE_TXDCTL_ENABLE;
5511 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5513 /* Wait until TX Enable ready */
5514 if (hw->mac.type == ixgbe_mac_82599EB) {
5515 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5518 txdctl = IXGBE_READ_REG(hw,
5519 IXGBE_TXDCTL(txq->reg_idx));
5520 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5522 PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5526 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5527 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5533 * Stop Transmit Units for specified queue.
5536 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5538 struct ixgbe_hw *hw;
5539 struct ixgbe_tx_queue *txq;
5541 uint32_t txtdh, txtdt;
5544 PMD_INIT_FUNC_TRACE();
5545 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5547 txq = dev->data->tx_queues[tx_queue_id];
5549 /* Wait until TX queue is empty */
5550 if (hw->mac.type == ixgbe_mac_82599EB) {
5551 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5553 rte_delay_us(RTE_IXGBE_WAIT_100_US);
5554 txtdh = IXGBE_READ_REG(hw,
5555 IXGBE_TDH(txq->reg_idx));
5556 txtdt = IXGBE_READ_REG(hw,
5557 IXGBE_TDT(txq->reg_idx));
5558 } while (--poll_ms && (txtdh != txtdt));
5561 "Tx Queue %d is not empty when stopping.",
5565 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5566 txdctl &= ~IXGBE_TXDCTL_ENABLE;
5567 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5569 /* Wait until TX Enable bit clear */
5570 if (hw->mac.type == ixgbe_mac_82599EB) {
5571 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5574 txdctl = IXGBE_READ_REG(hw,
5575 IXGBE_TXDCTL(txq->reg_idx));
5576 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5578 PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5582 if (txq->ops != NULL) {
5583 txq->ops->release_mbufs(txq);
5584 txq->ops->reset(txq);
5586 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5592 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5593 struct rte_eth_rxq_info *qinfo)
5595 struct ixgbe_rx_queue *rxq;
5597 rxq = dev->data->rx_queues[queue_id];
5599 qinfo->mp = rxq->mb_pool;
5600 qinfo->scattered_rx = dev->data->scattered_rx;
5601 qinfo->nb_desc = rxq->nb_rx_desc;
5603 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5604 qinfo->conf.rx_drop_en = rxq->drop_en;
5605 qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5606 qinfo->conf.offloads = rxq->offloads;
5610 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5611 struct rte_eth_txq_info *qinfo)
5613 struct ixgbe_tx_queue *txq;
5615 txq = dev->data->tx_queues[queue_id];
5617 qinfo->nb_desc = txq->nb_tx_desc;
5619 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5620 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5621 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5623 qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5624 qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5625 qinfo->conf.offloads = txq->offloads;
5626 qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5630 * [VF] Initializes Receive Unit.
5633 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5635 struct ixgbe_hw *hw;
5636 struct ixgbe_rx_queue *rxq;
5637 struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5638 uint32_t frame_size = dev->data->mtu + IXGBE_ETH_OVERHEAD;
5640 uint32_t srrctl, psrtype = 0;
5645 PMD_INIT_FUNC_TRACE();
5646 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5648 if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5649 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5650 "it should be power of 2");
5654 if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5655 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5656 "it should be equal to or less than %d",
5657 hw->mac.max_rx_queues);
5662 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5663 * disables the VF receipt of packets if the PF MTU is > 1500.
5664 * This is done to deal with 82599 limitations that imposes
5665 * the PF and all VFs to share the same MTU.
5666 * Then, the PF driver enables again the VF receipt of packet when
5667 * the VF driver issues a IXGBE_VF_SET_LPE request.
5668 * In the meantime, the VF device cannot be used, even if the VF driver
5669 * and the Guest VM network stack are ready to accept packets with a
5670 * size up to the PF MTU.
5671 * As a work-around to this PF behaviour, force the call to
5672 * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5673 * VF packets received can work in all cases.
5675 if (ixgbevf_rlpml_set_vf(hw, frame_size) != 0)
5676 PMD_INIT_LOG(ERR, "Set max packet length to %d failed.",
5680 * Assume no header split and no VLAN strip support
5681 * on any Rx queue first .
5683 rxmode->offloads &= ~RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
5684 /* Setup RX queues */
5685 for (i = 0; i < dev->data->nb_rx_queues; i++) {
5686 rxq = dev->data->rx_queues[i];
5688 /* Allocate buffers for descriptor rings */
5689 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5693 /* Setup the Base and Length of the Rx Descriptor Rings */
5694 bus_addr = rxq->rx_ring_phys_addr;
5696 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5697 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5698 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5699 (uint32_t)(bus_addr >> 32));
5700 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5701 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5702 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5703 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5706 /* Configure the SRRCTL register */
5707 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5709 /* Set if packets are dropped when no descriptors available */
5711 srrctl |= IXGBE_SRRCTL_DROP_EN;
5714 * Configure the RX buffer size in the BSIZEPACKET field of
5715 * the SRRCTL register of the queue.
5716 * The value is in 1 KB resolution. Valid values can be from
5719 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5720 RTE_PKTMBUF_HEADROOM);
5721 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5722 IXGBE_SRRCTL_BSIZEPKT_MASK);
5725 * VF modification to write virtual function SRRCTL register
5727 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5729 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5730 IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5732 if (rxmode->offloads & RTE_ETH_RX_OFFLOAD_SCATTER ||
5733 /* It adds dual VLAN length for supporting dual VLAN */
5734 (frame_size + 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5735 if (!dev->data->scattered_rx)
5736 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5737 dev->data->scattered_rx = 1;
5740 if (rxq->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
5741 rxmode->offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
5744 /* Set RQPL for VF RSS according to max Rx queue */
5745 psrtype |= (dev->data->nb_rx_queues >> 1) <<
5746 IXGBE_PSRTYPE_RQPL_SHIFT;
5747 IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5749 ixgbe_set_rx_function(dev);
5755 * [VF] Initializes Transmit Unit.
5758 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5760 struct ixgbe_hw *hw;
5761 struct ixgbe_tx_queue *txq;
5766 PMD_INIT_FUNC_TRACE();
5767 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5769 /* Setup the Base and Length of the Tx Descriptor Rings */
5770 for (i = 0; i < dev->data->nb_tx_queues; i++) {
5771 txq = dev->data->tx_queues[i];
5772 bus_addr = txq->tx_ring_phys_addr;
5773 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5774 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5775 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5776 (uint32_t)(bus_addr >> 32));
5777 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5778 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5779 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5780 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5781 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5784 * Disable Tx Head Writeback RO bit, since this hoses
5785 * bookkeeping if things aren't delivered in order.
5787 txctrl = IXGBE_READ_REG(hw,
5788 IXGBE_VFDCA_TXCTRL(i));
5789 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5790 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5796 * [VF] Start Transmit and Receive Units.
5799 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5801 struct ixgbe_hw *hw;
5802 struct ixgbe_tx_queue *txq;
5803 struct ixgbe_rx_queue *rxq;
5809 PMD_INIT_FUNC_TRACE();
5810 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5812 for (i = 0; i < dev->data->nb_tx_queues; i++) {
5813 txq = dev->data->tx_queues[i];
5814 /* Setup Transmit Threshold Registers */
5815 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5816 txdctl |= txq->pthresh & 0x7F;
5817 txdctl |= ((txq->hthresh & 0x7F) << 8);
5818 txdctl |= ((txq->wthresh & 0x7F) << 16);
5819 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5822 for (i = 0; i < dev->data->nb_tx_queues; i++) {
5824 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5825 txdctl |= IXGBE_TXDCTL_ENABLE;
5826 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5829 /* Wait until TX Enable ready */
5832 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5833 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5835 PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5837 for (i = 0; i < dev->data->nb_rx_queues; i++) {
5839 rxq = dev->data->rx_queues[i];
5841 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5842 rxdctl |= IXGBE_RXDCTL_ENABLE;
5843 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5845 /* Wait until RX Enable ready */
5849 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5850 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5852 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5854 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5860 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5861 const struct rte_flow_action_rss *in)
5863 if (in->key_len > RTE_DIM(out->key) ||
5864 in->queue_num > RTE_DIM(out->queue))
5866 out->conf = (struct rte_flow_action_rss){
5870 .key_len = in->key_len,
5871 .queue_num = in->queue_num,
5872 .key = memcpy(out->key, in->key, in->key_len),
5873 .queue = memcpy(out->queue, in->queue,
5874 sizeof(*in->queue) * in->queue_num),
5880 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5881 const struct rte_flow_action_rss *with)
5883 return (comp->func == with->func &&
5884 comp->level == with->level &&
5885 comp->types == with->types &&
5886 comp->key_len == with->key_len &&
5887 comp->queue_num == with->queue_num &&
5888 !memcmp(comp->key, with->key, with->key_len) &&
5889 !memcmp(comp->queue, with->queue,
5890 sizeof(*with->queue) * with->queue_num));
5894 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5895 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5897 struct ixgbe_hw *hw;
5901 uint16_t sp_reta_size;
5903 struct rte_eth_rss_conf rss_conf = {
5904 .rss_key = conf->conf.key_len ?
5905 (void *)(uintptr_t)conf->conf.key : NULL,
5906 .rss_key_len = conf->conf.key_len,
5907 .rss_hf = conf->conf.types,
5909 struct ixgbe_filter_info *filter_info =
5910 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5912 PMD_INIT_FUNC_TRACE();
5913 hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5915 sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5918 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5920 ixgbe_rss_disable(dev);
5921 memset(&filter_info->rss_info, 0,
5922 sizeof(struct ixgbe_rte_flow_rss_conf));
5928 if (filter_info->rss_info.conf.queue_num)
5930 /* Fill in redirection table
5931 * The byte-swap is needed because NIC registers are in
5932 * little-endian order.
5935 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5936 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5938 if (j == conf->conf.queue_num)
5940 reta = (reta << 8) | conf->conf.queue[j];
5942 IXGBE_WRITE_REG(hw, reta_reg,
5946 /* Configure the RSS key and the RSS protocols used to compute
5947 * the RSS hash of input packets.
5949 if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5950 ixgbe_rss_disable(dev);
5953 if (rss_conf.rss_key == NULL)
5954 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5955 ixgbe_hw_rss_hash_set(hw, &rss_conf);
5957 if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5963 /* Stubs needed for linkage when RTE_ARCH_PPC_64 is set */
5964 #if defined(RTE_ARCH_PPC_64)
5966 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5972 ixgbe_recv_pkts_vec(
5973 void __rte_unused *rx_queue,
5974 struct rte_mbuf __rte_unused **rx_pkts,
5975 uint16_t __rte_unused nb_pkts)
5981 ixgbe_recv_scattered_pkts_vec(
5982 void __rte_unused *rx_queue,
5983 struct rte_mbuf __rte_unused **rx_pkts,
5984 uint16_t __rte_unused nb_pkts)
5990 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5996 ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
5997 struct rte_mbuf __rte_unused **tx_pkts,
5998 uint16_t __rte_unused nb_pkts)
6004 ixgbe_txq_vec_setup(struct ixgbe_tx_queue __rte_unused *txq)
6010 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)