1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2015-2020
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
20 #include <rte_debug.h>
21 #include <rte_ethdev.h>
22 #include <rte_ethdev_driver.h>
23 #include <rte_memzone.h>
24 #include <rte_atomic.h>
25 #include <rte_mempool.h>
26 #include <rte_malloc.h>
28 #include <rte_ether.h>
29 #include <rte_prefetch.h>
33 #include <rte_string_fns.h>
34 #include <rte_errno.h>
38 #include "txgbe_logs.h"
39 #include "base/txgbe.h"
40 #include "txgbe_ethdev.h"
41 #include "txgbe_rxtx.h"
43 /* Bit Mask to indicate what bits required for building TX context */
44 static const u64 TXGBE_TX_OFFLOAD_MASK = (PKT_TX_IP_CKSUM |
53 PKT_TX_OUTER_IP_CKSUM);
55 #define TXGBE_TX_OFFLOAD_NOTSUP_MASK \
56 (PKT_TX_OFFLOAD_MASK ^ TXGBE_TX_OFFLOAD_MASK)
59 * Prefetch a cache line into all cache levels.
61 #define rte_txgbe_prefetch(p) rte_prefetch0(p)
64 txgbe_is_vf(struct rte_eth_dev *dev)
66 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
68 switch (hw->mac.type) {
69 case txgbe_mac_raptor_vf:
76 /*********************************************************************
80 **********************************************************************/
83 * Check for descriptors with their DD bit set and free mbufs.
84 * Return the total number of buffers freed.
86 static __rte_always_inline int
87 txgbe_tx_free_bufs(struct txgbe_tx_queue *txq)
89 struct txgbe_tx_entry *txep;
92 struct rte_mbuf *m, *free[RTE_TXGBE_TX_MAX_FREE_BUF_SZ];
94 /* check DD bit on threshold descriptor */
95 status = txq->tx_ring[txq->tx_next_dd].dw3;
96 if (!(status & rte_cpu_to_le_32(TXGBE_TXD_DD))) {
97 if (txq->nb_tx_free >> 1 < txq->tx_free_thresh)
98 txgbe_set32_masked(txq->tdc_reg_addr,
99 TXGBE_TXCFG_FLUSH, TXGBE_TXCFG_FLUSH);
104 * first buffer to free from S/W ring is at index
105 * tx_next_dd - (tx_free_thresh-1)
107 txep = &txq->sw_ring[txq->tx_next_dd - (txq->tx_free_thresh - 1)];
108 for (i = 0; i < txq->tx_free_thresh; ++i, ++txep) {
109 /* free buffers one at a time */
110 m = rte_pktmbuf_prefree_seg(txep->mbuf);
113 if (unlikely(m == NULL))
116 if (nb_free >= RTE_TXGBE_TX_MAX_FREE_BUF_SZ ||
117 (nb_free > 0 && m->pool != free[0]->pool)) {
118 rte_mempool_put_bulk(free[0]->pool,
119 (void **)free, nb_free);
127 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
129 /* buffers were freed, update counters */
130 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_free_thresh);
131 txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_free_thresh);
132 if (txq->tx_next_dd >= txq->nb_tx_desc)
133 txq->tx_next_dd = (uint16_t)(txq->tx_free_thresh - 1);
135 return txq->tx_free_thresh;
138 /* Populate 4 descriptors with data from 4 mbufs */
140 tx4(volatile struct txgbe_tx_desc *txdp, struct rte_mbuf **pkts)
142 uint64_t buf_dma_addr;
146 for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
147 buf_dma_addr = rte_mbuf_data_iova(*pkts);
148 pkt_len = (*pkts)->data_len;
150 /* write data to descriptor */
151 txdp->qw0 = rte_cpu_to_le_64(buf_dma_addr);
152 txdp->dw2 = cpu_to_le32(TXGBE_TXD_FLAGS |
153 TXGBE_TXD_DATLEN(pkt_len));
154 txdp->dw3 = cpu_to_le32(TXGBE_TXD_PAYLEN(pkt_len));
156 rte_prefetch0(&(*pkts)->pool);
160 /* Populate 1 descriptor with data from 1 mbuf */
162 tx1(volatile struct txgbe_tx_desc *txdp, struct rte_mbuf **pkts)
164 uint64_t buf_dma_addr;
167 buf_dma_addr = rte_mbuf_data_iova(*pkts);
168 pkt_len = (*pkts)->data_len;
170 /* write data to descriptor */
171 txdp->qw0 = cpu_to_le64(buf_dma_addr);
172 txdp->dw2 = cpu_to_le32(TXGBE_TXD_FLAGS |
173 TXGBE_TXD_DATLEN(pkt_len));
174 txdp->dw3 = cpu_to_le32(TXGBE_TXD_PAYLEN(pkt_len));
176 rte_prefetch0(&(*pkts)->pool);
180 * Fill H/W descriptor ring with mbuf data.
181 * Copy mbuf pointers to the S/W ring.
184 txgbe_tx_fill_hw_ring(struct txgbe_tx_queue *txq, struct rte_mbuf **pkts,
187 volatile struct txgbe_tx_desc *txdp = &txq->tx_ring[txq->tx_tail];
188 struct txgbe_tx_entry *txep = &txq->sw_ring[txq->tx_tail];
189 const int N_PER_LOOP = 4;
190 const int N_PER_LOOP_MASK = N_PER_LOOP - 1;
191 int mainpart, leftover;
195 * Process most of the packets in chunks of N pkts. Any
196 * leftover packets will get processed one at a time.
198 mainpart = (nb_pkts & ((uint32_t)~N_PER_LOOP_MASK));
199 leftover = (nb_pkts & ((uint32_t)N_PER_LOOP_MASK));
200 for (i = 0; i < mainpart; i += N_PER_LOOP) {
201 /* Copy N mbuf pointers to the S/W ring */
202 for (j = 0; j < N_PER_LOOP; ++j)
203 (txep + i + j)->mbuf = *(pkts + i + j);
204 tx4(txdp + i, pkts + i);
207 if (unlikely(leftover > 0)) {
208 for (i = 0; i < leftover; ++i) {
209 (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
210 tx1(txdp + mainpart + i, pkts + mainpart + i);
215 static inline uint16_t
216 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
219 struct txgbe_tx_queue *txq = (struct txgbe_tx_queue *)tx_queue;
223 * Begin scanning the H/W ring for done descriptors when the
224 * number of available descriptors drops below tx_free_thresh. For
225 * each done descriptor, free the associated buffer.
227 if (txq->nb_tx_free < txq->tx_free_thresh)
228 txgbe_tx_free_bufs(txq);
230 /* Only use descriptors that are available */
231 nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
232 if (unlikely(nb_pkts == 0))
235 /* Use exactly nb_pkts descriptors */
236 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
239 * At this point, we know there are enough descriptors in the
240 * ring to transmit all the packets. This assumes that each
241 * mbuf contains a single segment, and that no new offloads
242 * are expected, which would require a new context descriptor.
246 * See if we're going to wrap-around. If so, handle the top
247 * of the descriptor ring first, then do the bottom. If not,
248 * the processing looks just like the "bottom" part anyway...
250 if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
251 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
252 txgbe_tx_fill_hw_ring(txq, tx_pkts, n);
256 /* Fill H/W descriptor ring with mbuf data */
257 txgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
258 txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
261 * Check for wrap-around. This would only happen if we used
262 * up to the last descriptor in the ring, no more, no less.
264 if (txq->tx_tail >= txq->nb_tx_desc)
267 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
268 (uint16_t)txq->port_id, (uint16_t)txq->queue_id,
269 (uint16_t)txq->tx_tail, (uint16_t)nb_pkts);
271 /* update tail pointer */
273 txgbe_set32_relaxed(txq->tdt_reg_addr, txq->tx_tail);
279 txgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
284 /* Try to transmit at least chunks of TX_MAX_BURST pkts */
285 if (likely(nb_pkts <= RTE_PMD_TXGBE_TX_MAX_BURST))
286 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
288 /* transmit more than the max burst, in chunks of TX_MAX_BURST */
293 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_TXGBE_TX_MAX_BURST);
294 ret = tx_xmit_pkts(tx_queue, &tx_pkts[nb_tx], n);
295 nb_tx = (uint16_t)(nb_tx + ret);
296 nb_pkts = (uint16_t)(nb_pkts - ret);
305 txgbe_set_xmit_ctx(struct txgbe_tx_queue *txq,
306 volatile struct txgbe_tx_ctx_desc *ctx_txd,
307 uint64_t ol_flags, union txgbe_tx_offload tx_offload)
309 union txgbe_tx_offload tx_offload_mask;
310 uint32_t type_tucmd_mlhl;
311 uint32_t mss_l4len_idx;
313 uint32_t vlan_macip_lens;
314 uint32_t tunnel_seed;
316 ctx_idx = txq->ctx_curr;
317 tx_offload_mask.data[0] = 0;
318 tx_offload_mask.data[1] = 0;
320 /* Specify which HW CTX to upload. */
321 mss_l4len_idx = TXGBE_TXD_IDX(ctx_idx);
322 type_tucmd_mlhl = TXGBE_TXD_CTXT;
324 tx_offload_mask.ptid |= ~0;
325 type_tucmd_mlhl |= TXGBE_TXD_PTID(tx_offload.ptid);
327 /* check if TCP segmentation required for this packet */
328 if (ol_flags & PKT_TX_TCP_SEG) {
329 tx_offload_mask.l2_len |= ~0;
330 tx_offload_mask.l3_len |= ~0;
331 tx_offload_mask.l4_len |= ~0;
332 tx_offload_mask.tso_segsz |= ~0;
333 mss_l4len_idx |= TXGBE_TXD_MSS(tx_offload.tso_segsz);
334 mss_l4len_idx |= TXGBE_TXD_L4LEN(tx_offload.l4_len);
335 } else { /* no TSO, check if hardware checksum is needed */
336 if (ol_flags & PKT_TX_IP_CKSUM) {
337 tx_offload_mask.l2_len |= ~0;
338 tx_offload_mask.l3_len |= ~0;
341 switch (ol_flags & PKT_TX_L4_MASK) {
342 case PKT_TX_UDP_CKSUM:
344 TXGBE_TXD_L4LEN(sizeof(struct rte_udp_hdr));
345 tx_offload_mask.l2_len |= ~0;
346 tx_offload_mask.l3_len |= ~0;
348 case PKT_TX_TCP_CKSUM:
350 TXGBE_TXD_L4LEN(sizeof(struct rte_tcp_hdr));
351 tx_offload_mask.l2_len |= ~0;
352 tx_offload_mask.l3_len |= ~0;
354 case PKT_TX_SCTP_CKSUM:
356 TXGBE_TXD_L4LEN(sizeof(struct rte_sctp_hdr));
357 tx_offload_mask.l2_len |= ~0;
358 tx_offload_mask.l3_len |= ~0;
365 vlan_macip_lens = TXGBE_TXD_IPLEN(tx_offload.l3_len >> 1);
367 if (ol_flags & PKT_TX_TUNNEL_MASK) {
368 tx_offload_mask.outer_tun_len |= ~0;
369 tx_offload_mask.outer_l2_len |= ~0;
370 tx_offload_mask.outer_l3_len |= ~0;
371 tx_offload_mask.l2_len |= ~0;
372 tunnel_seed = TXGBE_TXD_ETUNLEN(tx_offload.outer_tun_len >> 1);
373 tunnel_seed |= TXGBE_TXD_EIPLEN(tx_offload.outer_l3_len >> 2);
375 switch (ol_flags & PKT_TX_TUNNEL_MASK) {
376 case PKT_TX_TUNNEL_IPIP:
377 /* for non UDP / GRE tunneling, set to 0b */
379 case PKT_TX_TUNNEL_VXLAN:
380 case PKT_TX_TUNNEL_GENEVE:
381 tunnel_seed |= TXGBE_TXD_ETYPE_UDP;
383 case PKT_TX_TUNNEL_GRE:
384 tunnel_seed |= TXGBE_TXD_ETYPE_GRE;
387 PMD_TX_LOG(ERR, "Tunnel type not supported");
390 vlan_macip_lens |= TXGBE_TXD_MACLEN(tx_offload.outer_l2_len);
393 vlan_macip_lens |= TXGBE_TXD_MACLEN(tx_offload.l2_len);
396 if (ol_flags & PKT_TX_VLAN_PKT) {
397 tx_offload_mask.vlan_tci |= ~0;
398 vlan_macip_lens |= TXGBE_TXD_VLAN(tx_offload.vlan_tci);
401 txq->ctx_cache[ctx_idx].flags = ol_flags;
402 txq->ctx_cache[ctx_idx].tx_offload.data[0] =
403 tx_offload_mask.data[0] & tx_offload.data[0];
404 txq->ctx_cache[ctx_idx].tx_offload.data[1] =
405 tx_offload_mask.data[1] & tx_offload.data[1];
406 txq->ctx_cache[ctx_idx].tx_offload_mask = tx_offload_mask;
408 ctx_txd->dw0 = rte_cpu_to_le_32(vlan_macip_lens);
409 ctx_txd->dw1 = rte_cpu_to_le_32(tunnel_seed);
410 ctx_txd->dw2 = rte_cpu_to_le_32(type_tucmd_mlhl);
411 ctx_txd->dw3 = rte_cpu_to_le_32(mss_l4len_idx);
415 * Check which hardware context can be used. Use the existing match
416 * or create a new context descriptor.
418 static inline uint32_t
419 what_ctx_update(struct txgbe_tx_queue *txq, uint64_t flags,
420 union txgbe_tx_offload tx_offload)
422 /* If match with the current used context */
423 if (likely(txq->ctx_cache[txq->ctx_curr].flags == flags &&
424 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
425 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
426 & tx_offload.data[0])) &&
427 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
428 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
429 & tx_offload.data[1]))))
430 return txq->ctx_curr;
432 /* What if match with the next context */
434 if (likely(txq->ctx_cache[txq->ctx_curr].flags == flags &&
435 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
436 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
437 & tx_offload.data[0])) &&
438 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
439 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
440 & tx_offload.data[1]))))
441 return txq->ctx_curr;
443 /* Mismatch, use the previous context */
444 return TXGBE_CTX_NUM;
447 static inline uint32_t
448 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
452 if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM) {
454 tmp |= TXGBE_TXD_L4CS;
456 if (ol_flags & PKT_TX_IP_CKSUM) {
458 tmp |= TXGBE_TXD_IPCS;
460 if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
462 tmp |= TXGBE_TXD_EIPCS;
464 if (ol_flags & PKT_TX_TCP_SEG) {
466 /* implies IPv4 cksum */
467 if (ol_flags & PKT_TX_IPV4)
468 tmp |= TXGBE_TXD_IPCS;
469 tmp |= TXGBE_TXD_L4CS;
471 if (ol_flags & PKT_TX_VLAN_PKT)
477 static inline uint32_t
478 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
480 uint32_t cmdtype = 0;
482 if (ol_flags & PKT_TX_VLAN_PKT)
483 cmdtype |= TXGBE_TXD_VLE;
484 if (ol_flags & PKT_TX_TCP_SEG)
485 cmdtype |= TXGBE_TXD_TSE;
486 if (ol_flags & PKT_TX_MACSEC)
487 cmdtype |= TXGBE_TXD_LINKSEC;
491 static inline uint8_t
492 tx_desc_ol_flags_to_ptid(uint64_t oflags, uint32_t ptype)
497 return txgbe_encode_ptype(ptype);
499 /* Only support flags in TXGBE_TX_OFFLOAD_MASK */
500 tun = !!(oflags & PKT_TX_TUNNEL_MASK);
503 ptype = RTE_PTYPE_L2_ETHER;
504 if (oflags & PKT_TX_VLAN)
505 ptype |= RTE_PTYPE_L2_ETHER_VLAN;
508 if (oflags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IP_CKSUM))
509 ptype |= RTE_PTYPE_L3_IPV4;
510 else if (oflags & (PKT_TX_OUTER_IPV6))
511 ptype |= RTE_PTYPE_L3_IPV6;
513 if (oflags & (PKT_TX_IPV4 | PKT_TX_IP_CKSUM))
514 ptype |= (tun ? RTE_PTYPE_INNER_L3_IPV4 : RTE_PTYPE_L3_IPV4);
515 else if (oflags & (PKT_TX_IPV6))
516 ptype |= (tun ? RTE_PTYPE_INNER_L3_IPV6 : RTE_PTYPE_L3_IPV6);
519 switch (oflags & (PKT_TX_L4_MASK)) {
520 case PKT_TX_TCP_CKSUM:
521 ptype |= (tun ? RTE_PTYPE_INNER_L4_TCP : RTE_PTYPE_L4_TCP);
523 case PKT_TX_UDP_CKSUM:
524 ptype |= (tun ? RTE_PTYPE_INNER_L4_UDP : RTE_PTYPE_L4_UDP);
526 case PKT_TX_SCTP_CKSUM:
527 ptype |= (tun ? RTE_PTYPE_INNER_L4_SCTP : RTE_PTYPE_L4_SCTP);
531 if (oflags & PKT_TX_TCP_SEG)
532 ptype |= (tun ? RTE_PTYPE_INNER_L4_TCP : RTE_PTYPE_L4_TCP);
535 switch (oflags & PKT_TX_TUNNEL_MASK) {
536 case PKT_TX_TUNNEL_VXLAN:
537 ptype |= RTE_PTYPE_L2_ETHER |
539 RTE_PTYPE_TUNNEL_VXLAN;
540 ptype |= RTE_PTYPE_INNER_L2_ETHER;
542 case PKT_TX_TUNNEL_GRE:
543 ptype |= RTE_PTYPE_L2_ETHER |
545 RTE_PTYPE_TUNNEL_GRE;
546 ptype |= RTE_PTYPE_INNER_L2_ETHER;
548 case PKT_TX_TUNNEL_GENEVE:
549 ptype |= RTE_PTYPE_L2_ETHER |
551 RTE_PTYPE_TUNNEL_GENEVE;
552 ptype |= RTE_PTYPE_INNER_L2_ETHER;
554 case PKT_TX_TUNNEL_VXLAN_GPE:
555 ptype |= RTE_PTYPE_L2_ETHER |
557 RTE_PTYPE_TUNNEL_VXLAN_GPE;
558 ptype |= RTE_PTYPE_INNER_L2_ETHER;
560 case PKT_TX_TUNNEL_IPIP:
561 case PKT_TX_TUNNEL_IP:
562 ptype |= RTE_PTYPE_L2_ETHER |
568 return txgbe_encode_ptype(ptype);
571 #ifndef DEFAULT_TX_FREE_THRESH
572 #define DEFAULT_TX_FREE_THRESH 32
575 /* Reset transmit descriptors after they have been used */
577 txgbe_xmit_cleanup(struct txgbe_tx_queue *txq)
579 struct txgbe_tx_entry *sw_ring = txq->sw_ring;
580 volatile struct txgbe_tx_desc *txr = txq->tx_ring;
581 uint16_t last_desc_cleaned = txq->last_desc_cleaned;
582 uint16_t nb_tx_desc = txq->nb_tx_desc;
583 uint16_t desc_to_clean_to;
584 uint16_t nb_tx_to_clean;
587 /* Determine the last descriptor needing to be cleaned */
588 desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_free_thresh);
589 if (desc_to_clean_to >= nb_tx_desc)
590 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
592 /* Check to make sure the last descriptor to clean is done */
593 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
594 status = txr[desc_to_clean_to].dw3;
595 if (!(status & rte_cpu_to_le_32(TXGBE_TXD_DD))) {
596 PMD_TX_FREE_LOG(DEBUG,
597 "TX descriptor %4u is not done"
598 "(port=%d queue=%d)",
600 txq->port_id, txq->queue_id);
601 if (txq->nb_tx_free >> 1 < txq->tx_free_thresh)
602 txgbe_set32_masked(txq->tdc_reg_addr,
603 TXGBE_TXCFG_FLUSH, TXGBE_TXCFG_FLUSH);
604 /* Failed to clean any descriptors, better luck next time */
608 /* Figure out how many descriptors will be cleaned */
609 if (last_desc_cleaned > desc_to_clean_to)
610 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
613 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
616 PMD_TX_FREE_LOG(DEBUG,
617 "Cleaning %4u TX descriptors: %4u to %4u "
618 "(port=%d queue=%d)",
619 nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
620 txq->port_id, txq->queue_id);
623 * The last descriptor to clean is done, so that means all the
624 * descriptors from the last descriptor that was cleaned
625 * up to the last descriptor with the RS bit set
626 * are done. Only reset the threshold descriptor.
628 txr[desc_to_clean_to].dw3 = 0;
630 /* Update the txq to reflect the last descriptor that was cleaned */
631 txq->last_desc_cleaned = desc_to_clean_to;
632 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
638 static inline uint8_t
639 txgbe_get_tun_len(struct rte_mbuf *mbuf)
641 struct txgbe_genevehdr genevehdr;
642 const struct txgbe_genevehdr *gh;
645 switch (mbuf->ol_flags & PKT_TX_TUNNEL_MASK) {
646 case PKT_TX_TUNNEL_IPIP:
649 case PKT_TX_TUNNEL_VXLAN:
650 case PKT_TX_TUNNEL_VXLAN_GPE:
651 tun_len = sizeof(struct txgbe_udphdr)
652 + sizeof(struct txgbe_vxlanhdr);
654 case PKT_TX_TUNNEL_GRE:
655 tun_len = sizeof(struct txgbe_nvgrehdr);
657 case PKT_TX_TUNNEL_GENEVE:
658 gh = rte_pktmbuf_read(mbuf,
659 mbuf->outer_l2_len + mbuf->outer_l3_len,
660 sizeof(genevehdr), &genevehdr);
661 tun_len = sizeof(struct txgbe_udphdr)
662 + sizeof(struct txgbe_genevehdr)
663 + (gh->opt_len << 2);
673 txgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
676 struct txgbe_tx_queue *txq;
677 struct txgbe_tx_entry *sw_ring;
678 struct txgbe_tx_entry *txe, *txn;
679 volatile struct txgbe_tx_desc *txr;
680 volatile struct txgbe_tx_desc *txd;
681 struct rte_mbuf *tx_pkt;
682 struct rte_mbuf *m_seg;
683 uint64_t buf_dma_addr;
684 uint32_t olinfo_status;
685 uint32_t cmd_type_len;
696 union txgbe_tx_offload tx_offload;
698 tx_offload.data[0] = 0;
699 tx_offload.data[1] = 0;
701 sw_ring = txq->sw_ring;
703 tx_id = txq->tx_tail;
704 txe = &sw_ring[tx_id];
706 /* Determine if the descriptor ring needs to be cleaned. */
707 if (txq->nb_tx_free < txq->tx_free_thresh)
708 txgbe_xmit_cleanup(txq);
710 rte_prefetch0(&txe->mbuf->pool);
713 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
716 pkt_len = tx_pkt->pkt_len;
719 * Determine how many (if any) context descriptors
720 * are needed for offload functionality.
722 ol_flags = tx_pkt->ol_flags;
724 /* If hardware offload required */
725 tx_ol_req = ol_flags & TXGBE_TX_OFFLOAD_MASK;
727 tx_offload.ptid = tx_desc_ol_flags_to_ptid(tx_ol_req,
728 tx_pkt->packet_type);
729 tx_offload.l2_len = tx_pkt->l2_len;
730 tx_offload.l3_len = tx_pkt->l3_len;
731 tx_offload.l4_len = tx_pkt->l4_len;
732 tx_offload.vlan_tci = tx_pkt->vlan_tci;
733 tx_offload.tso_segsz = tx_pkt->tso_segsz;
734 tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
735 tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
736 tx_offload.outer_tun_len = txgbe_get_tun_len(tx_pkt);
738 /* If new context need be built or reuse the exist ctx*/
739 ctx = what_ctx_update(txq, tx_ol_req, tx_offload);
740 /* Only allocate context descriptor if required */
741 new_ctx = (ctx == TXGBE_CTX_NUM);
746 * Keep track of how many descriptors are used this loop
747 * This will always be the number of segments + the number of
748 * Context descriptors required to transmit the packet
750 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
753 * The number of descriptors that must be allocated for a
754 * packet is the number of segments of that packet, plus 1
755 * Context Descriptor for the hardware offload, if any.
756 * Determine the last TX descriptor to allocate in the TX ring
757 * for the packet, starting from the current position (tx_id)
760 tx_last = (uint16_t)(tx_id + nb_used - 1);
763 if (tx_last >= txq->nb_tx_desc)
764 tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
766 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
767 " tx_first=%u tx_last=%u",
768 (uint16_t)txq->port_id,
769 (uint16_t)txq->queue_id,
775 * Make sure there are enough TX descriptors available to
776 * transmit the entire packet.
777 * nb_used better be less than or equal to txq->tx_free_thresh
779 if (nb_used > txq->nb_tx_free) {
780 PMD_TX_FREE_LOG(DEBUG,
781 "Not enough free TX descriptors "
782 "nb_used=%4u nb_free=%4u "
783 "(port=%d queue=%d)",
784 nb_used, txq->nb_tx_free,
785 txq->port_id, txq->queue_id);
787 if (txgbe_xmit_cleanup(txq) != 0) {
788 /* Could not clean any descriptors */
794 /* nb_used better be <= txq->tx_free_thresh */
795 if (unlikely(nb_used > txq->tx_free_thresh)) {
796 PMD_TX_FREE_LOG(DEBUG,
797 "The number of descriptors needed to "
798 "transmit the packet exceeds the "
799 "RS bit threshold. This will impact "
801 "nb_used=%4u nb_free=%4u "
802 "tx_free_thresh=%4u. "
803 "(port=%d queue=%d)",
804 nb_used, txq->nb_tx_free,
806 txq->port_id, txq->queue_id);
808 * Loop here until there are enough TX
809 * descriptors or until the ring cannot be
812 while (nb_used > txq->nb_tx_free) {
813 if (txgbe_xmit_cleanup(txq) != 0) {
815 * Could not clean any
827 * By now there are enough free TX descriptors to transmit
832 * Set common flags of all TX Data Descriptors.
834 * The following bits must be set in all Data Descriptors:
835 * - TXGBE_TXD_DTYP_DATA
836 * - TXGBE_TXD_DCMD_DEXT
838 * The following bits must be set in the first Data Descriptor
839 * and are ignored in the other ones:
840 * - TXGBE_TXD_DCMD_IFCS
841 * - TXGBE_TXD_MAC_1588
842 * - TXGBE_TXD_DCMD_VLE
844 * The following bits must only be set in the last Data
846 * - TXGBE_TXD_CMD_EOP
848 * The following bits can be set in any Data Descriptor, but
849 * are only set in the last Data Descriptor:
852 cmd_type_len = TXGBE_TXD_FCS;
856 if (ol_flags & PKT_TX_TCP_SEG) {
857 /* when TSO is on, paylen in descriptor is the
858 * not the packet len but the tcp payload len
860 pkt_len -= (tx_offload.l2_len +
861 tx_offload.l3_len + tx_offload.l4_len);
863 (tx_pkt->ol_flags & PKT_TX_TUNNEL_MASK)
864 ? tx_offload.outer_l2_len +
865 tx_offload.outer_l3_len : 0;
869 * Setup the TX Advanced Context Descriptor if required
872 volatile struct txgbe_tx_ctx_desc *ctx_txd;
874 ctx_txd = (volatile struct txgbe_tx_ctx_desc *)
877 txn = &sw_ring[txe->next_id];
878 rte_prefetch0(&txn->mbuf->pool);
880 if (txe->mbuf != NULL) {
881 rte_pktmbuf_free_seg(txe->mbuf);
885 txgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
888 txe->last_id = tx_last;
889 tx_id = txe->next_id;
894 * Setup the TX Advanced Data Descriptor,
895 * This path will go through
896 * whatever new/reuse the context descriptor
898 cmd_type_len |= tx_desc_ol_flags_to_cmdtype(ol_flags);
900 tx_desc_cksum_flags_to_olinfo(ol_flags);
901 olinfo_status |= TXGBE_TXD_IDX(ctx);
904 olinfo_status |= TXGBE_TXD_PAYLEN(pkt_len);
909 txn = &sw_ring[txe->next_id];
910 rte_prefetch0(&txn->mbuf->pool);
912 if (txe->mbuf != NULL)
913 rte_pktmbuf_free_seg(txe->mbuf);
917 * Set up Transmit Data Descriptor.
919 slen = m_seg->data_len;
920 buf_dma_addr = rte_mbuf_data_iova(m_seg);
921 txd->qw0 = rte_cpu_to_le_64(buf_dma_addr);
922 txd->dw2 = rte_cpu_to_le_32(cmd_type_len | slen);
923 txd->dw3 = rte_cpu_to_le_32(olinfo_status);
924 txe->last_id = tx_last;
925 tx_id = txe->next_id;
928 } while (m_seg != NULL);
931 * The last packet data descriptor needs End Of Packet (EOP)
933 cmd_type_len |= TXGBE_TXD_EOP;
934 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
936 txd->dw2 |= rte_cpu_to_le_32(cmd_type_len);
944 * Set the Transmit Descriptor Tail (TDT)
946 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
947 (uint16_t)txq->port_id, (uint16_t)txq->queue_id,
948 (uint16_t)tx_id, (uint16_t)nb_tx);
949 txgbe_set32_relaxed(txq->tdt_reg_addr, tx_id);
950 txq->tx_tail = tx_id;
955 /*********************************************************************
959 **********************************************************************/
961 txgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
966 struct txgbe_tx_queue *txq = (struct txgbe_tx_queue *)tx_queue;
968 for (i = 0; i < nb_pkts; i++) {
970 ol_flags = m->ol_flags;
973 * Check if packet meets requirements for number of segments
975 * NOTE: for txgbe it's always (40 - WTHRESH) for both TSO and
979 if (m->nb_segs > TXGBE_TX_MAX_SEG - txq->wthresh) {
984 if (ol_flags & TXGBE_TX_OFFLOAD_NOTSUP_MASK) {
985 rte_errno = -ENOTSUP;
989 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
990 ret = rte_validate_tx_offload(m);
996 ret = rte_net_intel_cksum_prepare(m);
1006 /*********************************************************************
1010 **********************************************************************/
1011 /* @note: fix txgbe_dev_supported_ptypes_get() if any change here. */
1012 static inline uint32_t
1013 txgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptid_mask)
1015 uint16_t ptid = TXGBE_RXD_PTID(pkt_info);
1019 return txgbe_decode_ptype(ptid);
1022 static inline uint64_t
1023 txgbe_rxd_pkt_info_to_pkt_flags(uint32_t pkt_info)
1025 static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1026 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1027 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1028 PKT_RX_RSS_HASH, 0, 0, 0,
1029 0, 0, 0, PKT_RX_FDIR,
1032 return ip_rss_types_map[TXGBE_RXD_RSSTYPE(pkt_info)];
1035 static inline uint64_t
1036 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1041 * Check if VLAN present only.
1042 * Do not check whether L3/L4 rx checksum done by NIC or not,
1043 * That can be found from rte_eth_rxmode.offloads flag
1045 pkt_flags = (rx_status & TXGBE_RXD_STAT_VLAN &&
1046 vlan_flags & PKT_RX_VLAN_STRIPPED)
1052 static inline uint64_t
1053 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1055 uint64_t pkt_flags = 0;
1057 /* checksum offload can't be disabled */
1058 if (rx_status & TXGBE_RXD_STAT_IPCS) {
1059 pkt_flags |= (rx_status & TXGBE_RXD_ERR_IPCS
1060 ? PKT_RX_IP_CKSUM_BAD : PKT_RX_IP_CKSUM_GOOD);
1063 if (rx_status & TXGBE_RXD_STAT_L4CS) {
1064 pkt_flags |= (rx_status & TXGBE_RXD_ERR_L4CS
1065 ? PKT_RX_L4_CKSUM_BAD : PKT_RX_L4_CKSUM_GOOD);
1068 if (rx_status & TXGBE_RXD_STAT_EIPCS &&
1069 rx_status & TXGBE_RXD_ERR_EIPCS) {
1070 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1077 * LOOK_AHEAD defines how many desc statuses to check beyond the
1078 * current descriptor.
1079 * It must be a pound define for optimal performance.
1080 * Do not change the value of LOOK_AHEAD, as the txgbe_rx_scan_hw_ring
1081 * function only works with LOOK_AHEAD=8.
1083 #define LOOK_AHEAD 8
1084 #if (LOOK_AHEAD != 8)
1085 #error "PMD TXGBE: LOOK_AHEAD must be 8\n"
1088 txgbe_rx_scan_hw_ring(struct txgbe_rx_queue *rxq)
1090 volatile struct txgbe_rx_desc *rxdp;
1091 struct txgbe_rx_entry *rxep;
1092 struct rte_mbuf *mb;
1096 uint32_t s[LOOK_AHEAD];
1097 uint32_t pkt_info[LOOK_AHEAD];
1098 int i, j, nb_rx = 0;
1101 /* get references to current descriptor and S/W ring entry */
1102 rxdp = &rxq->rx_ring[rxq->rx_tail];
1103 rxep = &rxq->sw_ring[rxq->rx_tail];
1105 status = rxdp->qw1.lo.status;
1106 /* check to make sure there is at least 1 packet to receive */
1107 if (!(status & rte_cpu_to_le_32(TXGBE_RXD_STAT_DD)))
1111 * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1112 * reference packets that are ready to be received.
1114 for (i = 0; i < RTE_PMD_TXGBE_RX_MAX_BURST;
1115 i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1116 /* Read desc statuses backwards to avoid race condition */
1117 for (j = 0; j < LOOK_AHEAD; j++)
1118 s[j] = rte_le_to_cpu_32(rxdp[j].qw1.lo.status);
1122 /* Compute how many status bits were set */
1123 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1124 (s[nb_dd] & TXGBE_RXD_STAT_DD); nb_dd++)
1127 for (j = 0; j < nb_dd; j++)
1128 pkt_info[j] = rte_le_to_cpu_32(rxdp[j].qw0.dw0);
1132 /* Translate descriptor info to mbuf format */
1133 for (j = 0; j < nb_dd; ++j) {
1135 pkt_len = rte_le_to_cpu_16(rxdp[j].qw1.hi.len) -
1137 mb->data_len = pkt_len;
1138 mb->pkt_len = pkt_len;
1139 mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].qw1.hi.tag);
1141 /* convert descriptor fields to rte mbuf flags */
1142 pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1144 pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1146 txgbe_rxd_pkt_info_to_pkt_flags(pkt_info[j]);
1147 mb->ol_flags = pkt_flags;
1149 txgbe_rxd_pkt_info_to_pkt_type(pkt_info[j],
1150 rxq->pkt_type_mask);
1152 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1154 rte_le_to_cpu_32(rxdp[j].qw0.dw1);
1155 else if (pkt_flags & PKT_RX_FDIR) {
1156 mb->hash.fdir.hash =
1157 rte_le_to_cpu_16(rxdp[j].qw0.hi.csum) &
1158 TXGBE_ATR_HASH_MASK;
1160 rte_le_to_cpu_16(rxdp[j].qw0.hi.ipid);
1164 /* Move mbuf pointers from the S/W ring to the stage */
1165 for (j = 0; j < LOOK_AHEAD; ++j)
1166 rxq->rx_stage[i + j] = rxep[j].mbuf;
1168 /* stop if all requested packets could not be received */
1169 if (nb_dd != LOOK_AHEAD)
1173 /* clear software ring entries so we can cleanup correctly */
1174 for (i = 0; i < nb_rx; ++i)
1175 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1181 txgbe_rx_alloc_bufs(struct txgbe_rx_queue *rxq, bool reset_mbuf)
1183 volatile struct txgbe_rx_desc *rxdp;
1184 struct txgbe_rx_entry *rxep;
1185 struct rte_mbuf *mb;
1190 /* allocate buffers in bulk directly into the S/W ring */
1191 alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1192 rxep = &rxq->sw_ring[alloc_idx];
1193 diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1194 rxq->rx_free_thresh);
1195 if (unlikely(diag != 0))
1198 rxdp = &rxq->rx_ring[alloc_idx];
1199 for (i = 0; i < rxq->rx_free_thresh; ++i) {
1200 /* populate the static rte mbuf fields */
1203 mb->port = rxq->port_id;
1205 rte_mbuf_refcnt_set(mb, 1);
1206 mb->data_off = RTE_PKTMBUF_HEADROOM;
1208 /* populate the descriptors */
1209 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1210 TXGBE_RXD_HDRADDR(&rxdp[i], 0);
1211 TXGBE_RXD_PKTADDR(&rxdp[i], dma_addr);
1214 /* update state of internal queue structure */
1215 rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1216 if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1217 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1223 static inline uint16_t
1224 txgbe_rx_fill_from_stage(struct txgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1227 struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1230 /* how many packets are ready to return? */
1231 nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1233 /* copy mbuf pointers to the application's packet list */
1234 for (i = 0; i < nb_pkts; ++i)
1235 rx_pkts[i] = stage[i];
1237 /* update internal queue state */
1238 rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1239 rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1244 static inline uint16_t
1245 txgbe_rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1248 struct txgbe_rx_queue *rxq = (struct txgbe_rx_queue *)rx_queue;
1249 struct rte_eth_dev *dev = &rte_eth_devices[rxq->port_id];
1252 /* Any previously recv'd pkts will be returned from the Rx stage */
1253 if (rxq->rx_nb_avail)
1254 return txgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1256 /* Scan the H/W ring for packets to receive */
1257 nb_rx = (uint16_t)txgbe_rx_scan_hw_ring(rxq);
1259 /* update internal queue state */
1260 rxq->rx_next_avail = 0;
1261 rxq->rx_nb_avail = nb_rx;
1262 rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1264 /* if required, allocate new buffers to replenish descriptors */
1265 if (rxq->rx_tail > rxq->rx_free_trigger) {
1266 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1268 if (txgbe_rx_alloc_bufs(rxq, true) != 0) {
1271 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1272 "queue_id=%u", (uint16_t)rxq->port_id,
1273 (uint16_t)rxq->queue_id);
1275 dev->data->rx_mbuf_alloc_failed +=
1276 rxq->rx_free_thresh;
1279 * Need to rewind any previous receives if we cannot
1280 * allocate new buffers to replenish the old ones.
1282 rxq->rx_nb_avail = 0;
1283 rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1284 for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1285 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1290 /* update tail pointer */
1292 txgbe_set32_relaxed(rxq->rdt_reg_addr, cur_free_trigger);
1295 if (rxq->rx_tail >= rxq->nb_rx_desc)
1298 /* received any packets this loop? */
1299 if (rxq->rx_nb_avail)
1300 return txgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1305 /* split requests into chunks of size RTE_PMD_TXGBE_RX_MAX_BURST */
1307 txgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1312 if (unlikely(nb_pkts == 0))
1315 if (likely(nb_pkts <= RTE_PMD_TXGBE_RX_MAX_BURST))
1316 return txgbe_rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1318 /* request is relatively large, chunk it up */
1323 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_TXGBE_RX_MAX_BURST);
1324 ret = txgbe_rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1325 nb_rx = (uint16_t)(nb_rx + ret);
1326 nb_pkts = (uint16_t)(nb_pkts - ret);
1335 txgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1338 struct txgbe_rx_queue *rxq;
1339 volatile struct txgbe_rx_desc *rx_ring;
1340 volatile struct txgbe_rx_desc *rxdp;
1341 struct txgbe_rx_entry *sw_ring;
1342 struct txgbe_rx_entry *rxe;
1343 struct rte_mbuf *rxm;
1344 struct rte_mbuf *nmb;
1345 struct txgbe_rx_desc rxd;
1358 rx_id = rxq->rx_tail;
1359 rx_ring = rxq->rx_ring;
1360 sw_ring = rxq->sw_ring;
1361 struct rte_eth_dev *dev = &rte_eth_devices[rxq->port_id];
1362 while (nb_rx < nb_pkts) {
1364 * The order of operations here is important as the DD status
1365 * bit must not be read after any other descriptor fields.
1366 * rx_ring and rxdp are pointing to volatile data so the order
1367 * of accesses cannot be reordered by the compiler. If they were
1368 * not volatile, they could be reordered which could lead to
1369 * using invalid descriptor fields when read from rxd.
1371 rxdp = &rx_ring[rx_id];
1372 staterr = rxdp->qw1.lo.status;
1373 if (!(staterr & rte_cpu_to_le_32(TXGBE_RXD_STAT_DD)))
1380 * If the TXGBE_RXD_STAT_EOP flag is not set, the RX packet
1381 * is likely to be invalid and to be dropped by the various
1382 * validation checks performed by the network stack.
1384 * Allocate a new mbuf to replenish the RX ring descriptor.
1385 * If the allocation fails:
1386 * - arrange for that RX descriptor to be the first one
1387 * being parsed the next time the receive function is
1388 * invoked [on the same queue].
1390 * - Stop parsing the RX ring and return immediately.
1392 * This policy do not drop the packet received in the RX
1393 * descriptor for which the allocation of a new mbuf failed.
1394 * Thus, it allows that packet to be later retrieved if
1395 * mbuf have been freed in the mean time.
1396 * As a side effect, holding RX descriptors instead of
1397 * systematically giving them back to the NIC may lead to
1398 * RX ring exhaustion situations.
1399 * However, the NIC can gracefully prevent such situations
1400 * to happen by sending specific "back-pressure" flow control
1401 * frames to its peer(s).
1403 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1404 "ext_err_stat=0x%08x pkt_len=%u",
1405 (uint16_t)rxq->port_id, (uint16_t)rxq->queue_id,
1406 (uint16_t)rx_id, (uint32_t)staterr,
1407 (uint16_t)rte_le_to_cpu_16(rxd.qw1.hi.len));
1409 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1411 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1412 "queue_id=%u", (uint16_t)rxq->port_id,
1413 (uint16_t)rxq->queue_id);
1414 dev->data->rx_mbuf_alloc_failed++;
1419 rxe = &sw_ring[rx_id];
1421 if (rx_id == rxq->nb_rx_desc)
1424 /* Prefetch next mbuf while processing current one. */
1425 rte_txgbe_prefetch(sw_ring[rx_id].mbuf);
1428 * When next RX descriptor is on a cache-line boundary,
1429 * prefetch the next 4 RX descriptors and the next 8 pointers
1432 if ((rx_id & 0x3) == 0) {
1433 rte_txgbe_prefetch(&rx_ring[rx_id]);
1434 rte_txgbe_prefetch(&sw_ring[rx_id]);
1439 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1440 TXGBE_RXD_HDRADDR(rxdp, 0);
1441 TXGBE_RXD_PKTADDR(rxdp, dma_addr);
1444 * Initialize the returned mbuf.
1445 * 1) setup generic mbuf fields:
1446 * - number of segments,
1449 * - RX port identifier.
1450 * 2) integrate hardware offload data, if any:
1451 * - RSS flag & hash,
1452 * - IP checksum flag,
1453 * - VLAN TCI, if any,
1456 pkt_len = (uint16_t)(rte_le_to_cpu_16(rxd.qw1.hi.len) -
1458 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1459 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1462 rxm->pkt_len = pkt_len;
1463 rxm->data_len = pkt_len;
1464 rxm->port = rxq->port_id;
1466 pkt_info = rte_le_to_cpu_32(rxd.qw0.dw0);
1467 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1468 rxm->vlan_tci = rte_le_to_cpu_16(rxd.qw1.hi.tag);
1470 pkt_flags = rx_desc_status_to_pkt_flags(staterr,
1472 pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1473 pkt_flags |= txgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1474 rxm->ol_flags = pkt_flags;
1475 rxm->packet_type = txgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1476 rxq->pkt_type_mask);
1478 if (likely(pkt_flags & PKT_RX_RSS_HASH)) {
1479 rxm->hash.rss = rte_le_to_cpu_32(rxd.qw0.dw1);
1480 } else if (pkt_flags & PKT_RX_FDIR) {
1481 rxm->hash.fdir.hash =
1482 rte_le_to_cpu_16(rxd.qw0.hi.csum) &
1483 TXGBE_ATR_HASH_MASK;
1484 rxm->hash.fdir.id = rte_le_to_cpu_16(rxd.qw0.hi.ipid);
1487 * Store the mbuf address into the next entry of the array
1488 * of returned packets.
1490 rx_pkts[nb_rx++] = rxm;
1492 rxq->rx_tail = rx_id;
1495 * If the number of free RX descriptors is greater than the RX free
1496 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1498 * Update the RDT with the value of the last processed RX descriptor
1499 * minus 1, to guarantee that the RDT register is never equal to the
1500 * RDH register, which creates a "full" ring situation from the
1501 * hardware point of view...
1503 nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
1504 if (nb_hold > rxq->rx_free_thresh) {
1505 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1506 "nb_hold=%u nb_rx=%u",
1507 (uint16_t)rxq->port_id, (uint16_t)rxq->queue_id,
1508 (uint16_t)rx_id, (uint16_t)nb_hold,
1510 rx_id = (uint16_t)((rx_id == 0) ?
1511 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1512 txgbe_set32(rxq->rdt_reg_addr, rx_id);
1515 rxq->nb_rx_hold = nb_hold;
1520 * txgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1522 * Fill the following info in the HEAD buffer of the Rx cluster:
1523 * - RX port identifier
1524 * - hardware offload data, if any:
1526 * - IP checksum flag
1527 * - VLAN TCI, if any
1529 * @head HEAD of the packet cluster
1530 * @desc HW descriptor to get data from
1531 * @rxq Pointer to the Rx queue
1534 txgbe_fill_cluster_head_buf(struct rte_mbuf *head, struct txgbe_rx_desc *desc,
1535 struct txgbe_rx_queue *rxq, uint32_t staterr)
1540 head->port = rxq->port_id;
1542 /* The vlan_tci field is only valid when PKT_RX_VLAN is
1543 * set in the pkt_flags field.
1545 head->vlan_tci = rte_le_to_cpu_16(desc->qw1.hi.tag);
1546 pkt_info = rte_le_to_cpu_32(desc->qw0.dw0);
1547 pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1548 pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1549 pkt_flags |= txgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1550 head->ol_flags = pkt_flags;
1551 head->packet_type = txgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1552 rxq->pkt_type_mask);
1554 if (likely(pkt_flags & PKT_RX_RSS_HASH)) {
1555 head->hash.rss = rte_le_to_cpu_32(desc->qw0.dw1);
1556 } else if (pkt_flags & PKT_RX_FDIR) {
1557 head->hash.fdir.hash = rte_le_to_cpu_16(desc->qw0.hi.csum)
1558 & TXGBE_ATR_HASH_MASK;
1559 head->hash.fdir.id = rte_le_to_cpu_16(desc->qw0.hi.ipid);
1564 * txgbe_recv_pkts_lro - receive handler for and LRO case.
1566 * @rx_queue Rx queue handle
1567 * @rx_pkts table of received packets
1568 * @nb_pkts size of rx_pkts table
1569 * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1571 * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1572 * additional ring of txgbe_rsc_entry's that will hold the relevant RSC info.
1574 * We use the same logic as in Linux and in FreeBSD txgbe drivers:
1575 * 1) When non-EOP RSC completion arrives:
1576 * a) Update the HEAD of the current RSC aggregation cluster with the new
1577 * segment's data length.
1578 * b) Set the "next" pointer of the current segment to point to the segment
1579 * at the NEXTP index.
1580 * c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1581 * in the sw_rsc_ring.
1582 * 2) When EOP arrives we just update the cluster's total length and offload
1583 * flags and deliver the cluster up to the upper layers. In our case - put it
1584 * in the rx_pkts table.
1586 * Returns the number of received packets/clusters (according to the "bulk
1587 * receive" interface).
1589 static inline uint16_t
1590 txgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1593 struct txgbe_rx_queue *rxq = rx_queue;
1594 struct rte_eth_dev *dev = &rte_eth_devices[rxq->port_id];
1595 volatile struct txgbe_rx_desc *rx_ring = rxq->rx_ring;
1596 struct txgbe_rx_entry *sw_ring = rxq->sw_ring;
1597 struct txgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1598 uint16_t rx_id = rxq->rx_tail;
1600 uint16_t nb_hold = rxq->nb_rx_hold;
1601 uint16_t prev_id = rxq->rx_tail;
1603 while (nb_rx < nb_pkts) {
1605 struct txgbe_rx_entry *rxe;
1606 struct txgbe_scattered_rx_entry *sc_entry;
1607 struct txgbe_scattered_rx_entry *next_sc_entry = NULL;
1608 struct txgbe_rx_entry *next_rxe = NULL;
1609 struct rte_mbuf *first_seg;
1610 struct rte_mbuf *rxm;
1611 struct rte_mbuf *nmb = NULL;
1612 struct txgbe_rx_desc rxd;
1615 volatile struct txgbe_rx_desc *rxdp;
1620 * The code in this whole file uses the volatile pointer to
1621 * ensure the read ordering of the status and the rest of the
1622 * descriptor fields (on the compiler level only!!!). This is so
1623 * UGLY - why not to just use the compiler barrier instead? DPDK
1624 * even has the rte_compiler_barrier() for that.
1626 * But most importantly this is just wrong because this doesn't
1627 * ensure memory ordering in a general case at all. For
1628 * instance, DPDK is supposed to work on Power CPUs where
1629 * compiler barrier may just not be enough!
1631 * I tried to write only this function properly to have a
1632 * starting point (as a part of an LRO/RSC series) but the
1633 * compiler cursed at me when I tried to cast away the
1634 * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1635 * keeping it the way it is for now.
1637 * The code in this file is broken in so many other places and
1638 * will just not work on a big endian CPU anyway therefore the
1639 * lines below will have to be revisited together with the rest
1643 * - Get rid of "volatile" and let the compiler do its job.
1644 * - Use the proper memory barrier (rte_rmb()) to ensure the
1645 * memory ordering below.
1647 rxdp = &rx_ring[rx_id];
1648 staterr = rte_le_to_cpu_32(rxdp->qw1.lo.status);
1650 if (!(staterr & TXGBE_RXD_STAT_DD))
1655 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1656 "staterr=0x%x data_len=%u",
1657 rxq->port_id, rxq->queue_id, rx_id, staterr,
1658 rte_le_to_cpu_16(rxd.qw1.hi.len));
1661 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1663 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1664 "port_id=%u queue_id=%u",
1665 rxq->port_id, rxq->queue_id);
1667 dev->data->rx_mbuf_alloc_failed++;
1670 } else if (nb_hold > rxq->rx_free_thresh) {
1671 uint16_t next_rdt = rxq->rx_free_trigger;
1673 if (!txgbe_rx_alloc_bufs(rxq, false)) {
1675 txgbe_set32_relaxed(rxq->rdt_reg_addr,
1677 nb_hold -= rxq->rx_free_thresh;
1679 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1680 "port_id=%u queue_id=%u",
1681 rxq->port_id, rxq->queue_id);
1683 dev->data->rx_mbuf_alloc_failed++;
1689 rxe = &sw_ring[rx_id];
1690 eop = staterr & TXGBE_RXD_STAT_EOP;
1692 next_id = rx_id + 1;
1693 if (next_id == rxq->nb_rx_desc)
1696 /* Prefetch next mbuf while processing current one. */
1697 rte_txgbe_prefetch(sw_ring[next_id].mbuf);
1700 * When next RX descriptor is on a cache-line boundary,
1701 * prefetch the next 4 RX descriptors and the next 4 pointers
1704 if ((next_id & 0x3) == 0) {
1705 rte_txgbe_prefetch(&rx_ring[next_id]);
1706 rte_txgbe_prefetch(&sw_ring[next_id]);
1713 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1715 * Update RX descriptor with the physical address of the
1716 * new data buffer of the new allocated mbuf.
1720 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1721 TXGBE_RXD_HDRADDR(rxdp, 0);
1722 TXGBE_RXD_PKTADDR(rxdp, dma);
1728 * Set data length & data buffer address of mbuf.
1730 data_len = rte_le_to_cpu_16(rxd.qw1.hi.len);
1731 rxm->data_len = data_len;
1736 * Get next descriptor index:
1737 * - For RSC it's in the NEXTP field.
1738 * - For a scattered packet - it's just a following
1741 if (TXGBE_RXD_RSCCNT(rxd.qw0.dw0))
1742 nextp_id = TXGBE_RXD_NEXTP(staterr);
1746 next_sc_entry = &sw_sc_ring[nextp_id];
1747 next_rxe = &sw_ring[nextp_id];
1748 rte_txgbe_prefetch(next_rxe);
1751 sc_entry = &sw_sc_ring[rx_id];
1752 first_seg = sc_entry->fbuf;
1753 sc_entry->fbuf = NULL;
1756 * If this is the first buffer of the received packet,
1757 * set the pointer to the first mbuf of the packet and
1758 * initialize its context.
1759 * Otherwise, update the total length and the number of segments
1760 * of the current scattered packet, and update the pointer to
1761 * the last mbuf of the current packet.
1763 if (first_seg == NULL) {
1765 first_seg->pkt_len = data_len;
1766 first_seg->nb_segs = 1;
1768 first_seg->pkt_len += data_len;
1769 first_seg->nb_segs++;
1776 * If this is not the last buffer of the received packet, update
1777 * the pointer to the first mbuf at the NEXTP entry in the
1778 * sw_sc_ring and continue to parse the RX ring.
1780 if (!eop && next_rxe) {
1781 rxm->next = next_rxe->mbuf;
1782 next_sc_entry->fbuf = first_seg;
1786 /* Initialize the first mbuf of the returned packet */
1787 txgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
1790 * Deal with the case, when HW CRC srip is disabled.
1791 * That can't happen when LRO is enabled, but still could
1792 * happen for scattered RX mode.
1794 first_seg->pkt_len -= rxq->crc_len;
1795 if (unlikely(rxm->data_len <= rxq->crc_len)) {
1796 struct rte_mbuf *lp;
1798 for (lp = first_seg; lp->next != rxm; lp = lp->next)
1801 first_seg->nb_segs--;
1802 lp->data_len -= rxq->crc_len - rxm->data_len;
1804 rte_pktmbuf_free_seg(rxm);
1806 rxm->data_len -= rxq->crc_len;
1809 /* Prefetch data of first segment, if configured to do so. */
1810 rte_packet_prefetch((char *)first_seg->buf_addr +
1811 first_seg->data_off);
1814 * Store the mbuf address into the next entry of the array
1815 * of returned packets.
1817 rx_pkts[nb_rx++] = first_seg;
1821 * Record index of the next RX descriptor to probe.
1823 rxq->rx_tail = rx_id;
1826 * If the number of free RX descriptors is greater than the RX free
1827 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1829 * Update the RDT with the value of the last processed RX descriptor
1830 * minus 1, to guarantee that the RDT register is never equal to the
1831 * RDH register, which creates a "full" ring situation from the
1832 * hardware point of view...
1834 if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
1835 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1836 "nb_hold=%u nb_rx=%u",
1837 rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
1840 txgbe_set32_relaxed(rxq->rdt_reg_addr, prev_id);
1844 rxq->nb_rx_hold = nb_hold;
1849 txgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1852 return txgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
1856 txgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1859 return txgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
1863 txgbe_get_rx_queue_offloads(struct rte_eth_dev *dev __rte_unused)
1865 return DEV_RX_OFFLOAD_VLAN_STRIP;
1869 txgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
1872 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
1873 struct rte_eth_dev_sriov *sriov = &RTE_ETH_DEV_SRIOV(dev);
1875 offloads = DEV_RX_OFFLOAD_IPV4_CKSUM |
1876 DEV_RX_OFFLOAD_UDP_CKSUM |
1877 DEV_RX_OFFLOAD_TCP_CKSUM |
1878 DEV_RX_OFFLOAD_KEEP_CRC |
1879 DEV_RX_OFFLOAD_JUMBO_FRAME |
1880 DEV_RX_OFFLOAD_VLAN_FILTER |
1881 DEV_RX_OFFLOAD_RSS_HASH |
1882 DEV_RX_OFFLOAD_SCATTER;
1884 if (!txgbe_is_vf(dev))
1885 offloads |= (DEV_RX_OFFLOAD_VLAN_FILTER |
1886 DEV_RX_OFFLOAD_QINQ_STRIP |
1887 DEV_RX_OFFLOAD_VLAN_EXTEND);
1890 * RSC is only supported by PF devices in a non-SR-IOV
1893 if (hw->mac.type == txgbe_mac_raptor && !sriov->active)
1894 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
1896 if (hw->mac.type == txgbe_mac_raptor)
1897 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
1899 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
1904 static void __rte_cold
1905 txgbe_tx_queue_release_mbufs(struct txgbe_tx_queue *txq)
1909 if (txq->sw_ring != NULL) {
1910 for (i = 0; i < txq->nb_tx_desc; i++) {
1911 if (txq->sw_ring[i].mbuf != NULL) {
1912 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1913 txq->sw_ring[i].mbuf = NULL;
1919 static void __rte_cold
1920 txgbe_tx_free_swring(struct txgbe_tx_queue *txq)
1923 txq->sw_ring != NULL)
1924 rte_free(txq->sw_ring);
1927 static void __rte_cold
1928 txgbe_tx_queue_release(struct txgbe_tx_queue *txq)
1930 if (txq != NULL && txq->ops != NULL) {
1931 txq->ops->release_mbufs(txq);
1932 txq->ops->free_swring(txq);
1938 txgbe_dev_tx_queue_release(void *txq)
1940 txgbe_tx_queue_release(txq);
1943 /* (Re)set dynamic txgbe_tx_queue fields to defaults */
1944 static void __rte_cold
1945 txgbe_reset_tx_queue(struct txgbe_tx_queue *txq)
1947 static const struct txgbe_tx_desc zeroed_desc = {0};
1948 struct txgbe_tx_entry *txe = txq->sw_ring;
1951 /* Zero out HW ring memory */
1952 for (i = 0; i < txq->nb_tx_desc; i++)
1953 txq->tx_ring[i] = zeroed_desc;
1955 /* Initialize SW ring entries */
1956 prev = (uint16_t)(txq->nb_tx_desc - 1);
1957 for (i = 0; i < txq->nb_tx_desc; i++) {
1958 volatile struct txgbe_tx_desc *txd = &txq->tx_ring[i];
1960 txd->dw3 = rte_cpu_to_le_32(TXGBE_TXD_DD);
1963 txe[prev].next_id = i;
1967 txq->tx_next_dd = (uint16_t)(txq->tx_free_thresh - 1);
1971 * Always allow 1 descriptor to be un-allocated to avoid
1972 * a H/W race condition
1974 txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
1975 txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
1977 memset((void *)&txq->ctx_cache, 0,
1978 TXGBE_CTX_NUM * sizeof(struct txgbe_ctx_info));
1981 static const struct txgbe_txq_ops def_txq_ops = {
1982 .release_mbufs = txgbe_tx_queue_release_mbufs,
1983 .free_swring = txgbe_tx_free_swring,
1984 .reset = txgbe_reset_tx_queue,
1987 /* Takes an ethdev and a queue and sets up the tx function to be used based on
1988 * the queue parameters. Used in tx_queue_setup by primary process and then
1989 * in dev_init by secondary process when attaching to an existing ethdev.
1992 txgbe_set_tx_function(struct rte_eth_dev *dev, struct txgbe_tx_queue *txq)
1994 /* Use a simple Tx queue (no offloads, no multi segs) if possible */
1995 if (txq->offloads == 0 &&
1996 txq->tx_free_thresh >= RTE_PMD_TXGBE_TX_MAX_BURST) {
1997 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
1998 dev->tx_pkt_burst = txgbe_xmit_pkts_simple;
1999 dev->tx_pkt_prepare = NULL;
2001 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2003 " - offloads = 0x%" PRIx64,
2006 " - tx_free_thresh = %lu [RTE_PMD_TXGBE_TX_MAX_BURST=%lu]",
2007 (unsigned long)txq->tx_free_thresh,
2008 (unsigned long)RTE_PMD_TXGBE_TX_MAX_BURST);
2009 dev->tx_pkt_burst = txgbe_xmit_pkts;
2010 dev->tx_pkt_prepare = txgbe_prep_pkts;
2015 txgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2023 txgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2025 uint64_t tx_offload_capa;
2028 DEV_TX_OFFLOAD_VLAN_INSERT |
2029 DEV_TX_OFFLOAD_IPV4_CKSUM |
2030 DEV_TX_OFFLOAD_UDP_CKSUM |
2031 DEV_TX_OFFLOAD_TCP_CKSUM |
2032 DEV_TX_OFFLOAD_SCTP_CKSUM |
2033 DEV_TX_OFFLOAD_TCP_TSO |
2034 DEV_TX_OFFLOAD_UDP_TSO |
2035 DEV_TX_OFFLOAD_UDP_TNL_TSO |
2036 DEV_TX_OFFLOAD_IP_TNL_TSO |
2037 DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
2038 DEV_TX_OFFLOAD_GRE_TNL_TSO |
2039 DEV_TX_OFFLOAD_IPIP_TNL_TSO |
2040 DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
2041 DEV_TX_OFFLOAD_MULTI_SEGS;
2043 if (!txgbe_is_vf(dev))
2044 tx_offload_capa |= DEV_TX_OFFLOAD_QINQ_INSERT;
2046 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2048 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2050 return tx_offload_capa;
2054 txgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2057 unsigned int socket_id,
2058 const struct rte_eth_txconf *tx_conf)
2060 const struct rte_memzone *tz;
2061 struct txgbe_tx_queue *txq;
2062 struct txgbe_hw *hw;
2063 uint16_t tx_free_thresh;
2066 PMD_INIT_FUNC_TRACE();
2067 hw = TXGBE_DEV_HW(dev);
2069 offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2072 * Validate number of transmit descriptors.
2073 * It must not exceed hardware maximum, and must be multiple
2076 if (nb_desc % TXGBE_TXD_ALIGN != 0 ||
2077 nb_desc > TXGBE_RING_DESC_MAX ||
2078 nb_desc < TXGBE_RING_DESC_MIN) {
2083 * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2084 * descriptors are used or if the number of descriptors required
2085 * to transmit a packet is greater than the number of free TX
2087 * One descriptor in the TX ring is used as a sentinel to avoid a
2088 * H/W race condition, hence the maximum threshold constraints.
2089 * When set to zero use default values.
2091 tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2092 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2093 if (tx_free_thresh >= (nb_desc - 3)) {
2094 PMD_INIT_LOG(ERR, "tx_free_thresh must be less than the number of "
2095 "TX descriptors minus 3. (tx_free_thresh=%u "
2096 "port=%d queue=%d)",
2097 (unsigned int)tx_free_thresh,
2098 (int)dev->data->port_id, (int)queue_idx);
2102 if ((nb_desc % tx_free_thresh) != 0) {
2103 PMD_INIT_LOG(ERR, "tx_free_thresh must be a divisor of the "
2104 "number of TX descriptors. (tx_free_thresh=%u "
2105 "port=%d queue=%d)", (unsigned int)tx_free_thresh,
2106 (int)dev->data->port_id, (int)queue_idx);
2110 /* Free memory prior to re-allocation if needed... */
2111 if (dev->data->tx_queues[queue_idx] != NULL) {
2112 txgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2113 dev->data->tx_queues[queue_idx] = NULL;
2116 /* First allocate the tx queue data structure */
2117 txq = rte_zmalloc_socket("ethdev TX queue",
2118 sizeof(struct txgbe_tx_queue),
2119 RTE_CACHE_LINE_SIZE, socket_id);
2124 * Allocate TX ring hardware descriptors. A memzone large enough to
2125 * handle the maximum ring size is allocated in order to allow for
2126 * resizing in later calls to the queue setup function.
2128 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2129 sizeof(struct txgbe_tx_desc) * TXGBE_RING_DESC_MAX,
2130 TXGBE_ALIGN, socket_id);
2132 txgbe_tx_queue_release(txq);
2136 txq->nb_tx_desc = nb_desc;
2137 txq->tx_free_thresh = tx_free_thresh;
2138 txq->pthresh = tx_conf->tx_thresh.pthresh;
2139 txq->hthresh = tx_conf->tx_thresh.hthresh;
2140 txq->wthresh = tx_conf->tx_thresh.wthresh;
2141 txq->queue_id = queue_idx;
2142 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2143 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2144 txq->port_id = dev->data->port_id;
2145 txq->offloads = offloads;
2146 txq->ops = &def_txq_ops;
2147 txq->tx_deferred_start = tx_conf->tx_deferred_start;
2149 /* Modification to set tail pointer for virtual function
2150 * if vf is detected.
2152 if (hw->mac.type == txgbe_mac_raptor_vf) {
2153 txq->tdt_reg_addr = TXGBE_REG_ADDR(hw, TXGBE_TXWP(queue_idx));
2154 txq->tdc_reg_addr = TXGBE_REG_ADDR(hw, TXGBE_TXCFG(queue_idx));
2156 txq->tdt_reg_addr = TXGBE_REG_ADDR(hw,
2157 TXGBE_TXWP(txq->reg_idx));
2158 txq->tdc_reg_addr = TXGBE_REG_ADDR(hw,
2159 TXGBE_TXCFG(txq->reg_idx));
2162 txq->tx_ring_phys_addr = TMZ_PADDR(tz);
2163 txq->tx_ring = (struct txgbe_tx_desc *)TMZ_VADDR(tz);
2165 /* Allocate software ring */
2166 txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2167 sizeof(struct txgbe_tx_entry) * nb_desc,
2168 RTE_CACHE_LINE_SIZE, socket_id);
2169 if (txq->sw_ring == NULL) {
2170 txgbe_tx_queue_release(txq);
2173 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64,
2174 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2176 /* set up scalar TX function as appropriate */
2177 txgbe_set_tx_function(dev, txq);
2179 txq->ops->reset(txq);
2181 dev->data->tx_queues[queue_idx] = txq;
2187 * txgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2189 * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2190 * in the sw_rsc_ring is not set to NULL but rather points to the next
2191 * mbuf of this RSC aggregation (that has not been completed yet and still
2192 * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2193 * will just free first "nb_segs" segments of the cluster explicitly by calling
2194 * an rte_pktmbuf_free_seg().
2196 * @m scattered cluster head
2198 static void __rte_cold
2199 txgbe_free_sc_cluster(struct rte_mbuf *m)
2201 uint16_t i, nb_segs = m->nb_segs;
2202 struct rte_mbuf *next_seg;
2204 for (i = 0; i < nb_segs; i++) {
2206 rte_pktmbuf_free_seg(m);
2211 static void __rte_cold
2212 txgbe_rx_queue_release_mbufs(struct txgbe_rx_queue *rxq)
2216 if (rxq->sw_ring != NULL) {
2217 for (i = 0; i < rxq->nb_rx_desc; i++) {
2218 if (rxq->sw_ring[i].mbuf != NULL) {
2219 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2220 rxq->sw_ring[i].mbuf = NULL;
2223 if (rxq->rx_nb_avail) {
2224 for (i = 0; i < rxq->rx_nb_avail; ++i) {
2225 struct rte_mbuf *mb;
2227 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2228 rte_pktmbuf_free_seg(mb);
2230 rxq->rx_nb_avail = 0;
2234 if (rxq->sw_sc_ring)
2235 for (i = 0; i < rxq->nb_rx_desc; i++)
2236 if (rxq->sw_sc_ring[i].fbuf) {
2237 txgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2238 rxq->sw_sc_ring[i].fbuf = NULL;
2242 static void __rte_cold
2243 txgbe_rx_queue_release(struct txgbe_rx_queue *rxq)
2246 txgbe_rx_queue_release_mbufs(rxq);
2247 rte_free(rxq->sw_ring);
2248 rte_free(rxq->sw_sc_ring);
2254 txgbe_dev_rx_queue_release(void *rxq)
2256 txgbe_rx_queue_release(rxq);
2260 * Check if Rx Burst Bulk Alloc function can be used.
2262 * 0: the preconditions are satisfied and the bulk allocation function
2264 * -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2265 * function must be used.
2267 static inline int __rte_cold
2268 check_rx_burst_bulk_alloc_preconditions(struct txgbe_rx_queue *rxq)
2273 * Make sure the following pre-conditions are satisfied:
2274 * rxq->rx_free_thresh >= RTE_PMD_TXGBE_RX_MAX_BURST
2275 * rxq->rx_free_thresh < rxq->nb_rx_desc
2276 * (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2277 * Scattered packets are not supported. This should be checked
2278 * outside of this function.
2280 if (!(rxq->rx_free_thresh >= RTE_PMD_TXGBE_RX_MAX_BURST)) {
2281 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2282 "rxq->rx_free_thresh=%d, "
2283 "RTE_PMD_TXGBE_RX_MAX_BURST=%d",
2284 rxq->rx_free_thresh, RTE_PMD_TXGBE_RX_MAX_BURST);
2286 } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2287 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2288 "rxq->rx_free_thresh=%d, "
2289 "rxq->nb_rx_desc=%d",
2290 rxq->rx_free_thresh, rxq->nb_rx_desc);
2292 } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2293 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2294 "rxq->nb_rx_desc=%d, "
2295 "rxq->rx_free_thresh=%d",
2296 rxq->nb_rx_desc, rxq->rx_free_thresh);
2303 /* Reset dynamic txgbe_rx_queue fields back to defaults */
2304 static void __rte_cold
2305 txgbe_reset_rx_queue(struct txgbe_adapter *adapter, struct txgbe_rx_queue *rxq)
2307 static const struct txgbe_rx_desc zeroed_desc = {
2308 {{0}, {0} }, {{0}, {0} } };
2310 uint16_t len = rxq->nb_rx_desc;
2313 * By default, the Rx queue setup function allocates enough memory for
2314 * TXGBE_RING_DESC_MAX. The Rx Burst bulk allocation function requires
2315 * extra memory at the end of the descriptor ring to be zero'd out.
2317 if (adapter->rx_bulk_alloc_allowed)
2318 /* zero out extra memory */
2319 len += RTE_PMD_TXGBE_RX_MAX_BURST;
2322 * Zero out HW ring memory. Zero out extra memory at the end of
2323 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2324 * reads extra memory as zeros.
2326 for (i = 0; i < len; i++)
2327 rxq->rx_ring[i] = zeroed_desc;
2330 * initialize extra software ring entries. Space for these extra
2331 * entries is always allocated
2333 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2334 for (i = rxq->nb_rx_desc; i < len; ++i)
2335 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2337 rxq->rx_nb_avail = 0;
2338 rxq->rx_next_avail = 0;
2339 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2341 rxq->nb_rx_hold = 0;
2342 rxq->pkt_first_seg = NULL;
2343 rxq->pkt_last_seg = NULL;
2347 txgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2350 unsigned int socket_id,
2351 const struct rte_eth_rxconf *rx_conf,
2352 struct rte_mempool *mp)
2354 const struct rte_memzone *rz;
2355 struct txgbe_rx_queue *rxq;
2356 struct txgbe_hw *hw;
2358 struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev);
2361 PMD_INIT_FUNC_TRACE();
2362 hw = TXGBE_DEV_HW(dev);
2364 offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
2367 * Validate number of receive descriptors.
2368 * It must not exceed hardware maximum, and must be multiple
2371 if (nb_desc % TXGBE_RXD_ALIGN != 0 ||
2372 nb_desc > TXGBE_RING_DESC_MAX ||
2373 nb_desc < TXGBE_RING_DESC_MIN) {
2377 /* Free memory prior to re-allocation if needed... */
2378 if (dev->data->rx_queues[queue_idx] != NULL) {
2379 txgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2380 dev->data->rx_queues[queue_idx] = NULL;
2383 /* First allocate the rx queue data structure */
2384 rxq = rte_zmalloc_socket("ethdev RX queue",
2385 sizeof(struct txgbe_rx_queue),
2386 RTE_CACHE_LINE_SIZE, socket_id);
2390 rxq->nb_rx_desc = nb_desc;
2391 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2392 rxq->queue_id = queue_idx;
2393 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2394 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2395 rxq->port_id = dev->data->port_id;
2396 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
2397 rxq->crc_len = RTE_ETHER_CRC_LEN;
2400 rxq->drop_en = rx_conf->rx_drop_en;
2401 rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2402 rxq->offloads = offloads;
2405 * The packet type in RX descriptor is different for different NICs.
2406 * So set different masks for different NICs.
2408 rxq->pkt_type_mask = TXGBE_PTID_MASK;
2411 * Allocate RX ring hardware descriptors. A memzone large enough to
2412 * handle the maximum ring size is allocated in order to allow for
2413 * resizing in later calls to the queue setup function.
2415 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2416 RX_RING_SZ, TXGBE_ALIGN, socket_id);
2418 txgbe_rx_queue_release(rxq);
2423 * Zero init all the descriptors in the ring.
2425 memset(rz->addr, 0, RX_RING_SZ);
2428 * Modified to setup VFRDT for Virtual Function
2430 if (hw->mac.type == txgbe_mac_raptor_vf) {
2432 TXGBE_REG_ADDR(hw, TXGBE_RXWP(queue_idx));
2434 TXGBE_REG_ADDR(hw, TXGBE_RXRP(queue_idx));
2437 TXGBE_REG_ADDR(hw, TXGBE_RXWP(rxq->reg_idx));
2439 TXGBE_REG_ADDR(hw, TXGBE_RXRP(rxq->reg_idx));
2442 rxq->rx_ring_phys_addr = TMZ_PADDR(rz);
2443 rxq->rx_ring = (struct txgbe_rx_desc *)TMZ_VADDR(rz);
2446 * Certain constraints must be met in order to use the bulk buffer
2447 * allocation Rx burst function. If any of Rx queues doesn't meet them
2448 * the feature should be disabled for the whole port.
2450 if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2451 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2452 "preconditions - canceling the feature for "
2453 "the whole port[%d]",
2454 rxq->queue_id, rxq->port_id);
2455 adapter->rx_bulk_alloc_allowed = false;
2459 * Allocate software ring. Allow for space at the end of the
2460 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2461 * function does not access an invalid memory region.
2464 if (adapter->rx_bulk_alloc_allowed)
2465 len += RTE_PMD_TXGBE_RX_MAX_BURST;
2467 rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2468 sizeof(struct txgbe_rx_entry) * len,
2469 RTE_CACHE_LINE_SIZE, socket_id);
2470 if (!rxq->sw_ring) {
2471 txgbe_rx_queue_release(rxq);
2476 * Always allocate even if it's not going to be needed in order to
2477 * simplify the code.
2479 * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2480 * be requested in txgbe_dev_rx_init(), which is called later from
2484 rte_zmalloc_socket("rxq->sw_sc_ring",
2485 sizeof(struct txgbe_scattered_rx_entry) * len,
2486 RTE_CACHE_LINE_SIZE, socket_id);
2487 if (!rxq->sw_sc_ring) {
2488 txgbe_rx_queue_release(rxq);
2492 PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2493 "dma_addr=0x%" PRIx64,
2494 rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2495 rxq->rx_ring_phys_addr);
2497 dev->data->rx_queues[queue_idx] = rxq;
2499 txgbe_reset_rx_queue(adapter, rxq);
2505 txgbe_dev_clear_queues(struct rte_eth_dev *dev)
2508 struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev);
2510 PMD_INIT_FUNC_TRACE();
2512 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2513 struct txgbe_tx_queue *txq = dev->data->tx_queues[i];
2516 txq->ops->release_mbufs(txq);
2517 txq->ops->reset(txq);
2521 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2522 struct txgbe_rx_queue *rxq = dev->data->rx_queues[i];
2525 txgbe_rx_queue_release_mbufs(rxq);
2526 txgbe_reset_rx_queue(adapter, rxq);
2532 txgbe_dev_free_queues(struct rte_eth_dev *dev)
2536 PMD_INIT_FUNC_TRACE();
2538 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2539 txgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2540 dev->data->rx_queues[i] = NULL;
2542 dev->data->nb_rx_queues = 0;
2544 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2545 txgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2546 dev->data->tx_queues[i] = NULL;
2548 dev->data->nb_tx_queues = 0;
2552 * Receive Side Scaling (RSS)
2555 * The source and destination IP addresses of the IP header and the source
2556 * and destination ports of TCP/UDP headers, if any, of received packets are
2557 * hashed against a configurable random key to compute a 32-bit RSS hash result.
2558 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2559 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
2560 * RSS output index which is used as the RX queue index where to store the
2562 * The following output is supplied in the RX write-back descriptor:
2563 * - 32-bit result of the Microsoft RSS hash function,
2564 * - 4-bit RSS type field.
2568 * Used as the default key.
2570 static uint8_t rss_intel_key[40] = {
2571 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2572 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2573 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2574 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2575 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2579 txgbe_rss_disable(struct rte_eth_dev *dev)
2581 struct txgbe_hw *hw;
2583 hw = TXGBE_DEV_HW(dev);
2585 wr32m(hw, TXGBE_RACTL, TXGBE_RACTL_RSSENA, 0);
2589 txgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
2590 struct rte_eth_rss_conf *rss_conf)
2592 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
2599 if (!txgbe_rss_update_sp(hw->mac.type)) {
2600 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
2605 hash_key = rss_conf->rss_key;
2607 /* Fill in RSS hash key */
2608 for (i = 0; i < 10; i++) {
2609 rss_key = LS32(hash_key[(i * 4) + 0], 0, 0xFF);
2610 rss_key |= LS32(hash_key[(i * 4) + 1], 8, 0xFF);
2611 rss_key |= LS32(hash_key[(i * 4) + 2], 16, 0xFF);
2612 rss_key |= LS32(hash_key[(i * 4) + 3], 24, 0xFF);
2613 wr32a(hw, TXGBE_REG_RSSKEY, i, rss_key);
2617 /* Set configured hashing protocols */
2618 rss_hf = rss_conf->rss_hf & TXGBE_RSS_OFFLOAD_ALL;
2619 mrqc = rd32(hw, TXGBE_RACTL);
2620 mrqc &= ~TXGBE_RACTL_RSSMASK;
2621 if (rss_hf & ETH_RSS_IPV4)
2622 mrqc |= TXGBE_RACTL_RSSIPV4;
2623 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
2624 mrqc |= TXGBE_RACTL_RSSIPV4TCP;
2625 if (rss_hf & ETH_RSS_IPV6 ||
2626 rss_hf & ETH_RSS_IPV6_EX)
2627 mrqc |= TXGBE_RACTL_RSSIPV6;
2628 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP ||
2629 rss_hf & ETH_RSS_IPV6_TCP_EX)
2630 mrqc |= TXGBE_RACTL_RSSIPV6TCP;
2631 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
2632 mrqc |= TXGBE_RACTL_RSSIPV4UDP;
2633 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP ||
2634 rss_hf & ETH_RSS_IPV6_UDP_EX)
2635 mrqc |= TXGBE_RACTL_RSSIPV6UDP;
2638 mrqc |= TXGBE_RACTL_RSSENA;
2640 mrqc &= ~TXGBE_RACTL_RSSENA;
2642 wr32(hw, TXGBE_RACTL, mrqc);
2648 txgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2649 struct rte_eth_rss_conf *rss_conf)
2651 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
2658 hash_key = rss_conf->rss_key;
2660 /* Return RSS hash key */
2661 for (i = 0; i < 10; i++) {
2662 rss_key = rd32a(hw, TXGBE_REG_RSSKEY, i);
2663 hash_key[(i * 4) + 0] = RS32(rss_key, 0, 0xFF);
2664 hash_key[(i * 4) + 1] = RS32(rss_key, 8, 0xFF);
2665 hash_key[(i * 4) + 2] = RS32(rss_key, 16, 0xFF);
2666 hash_key[(i * 4) + 3] = RS32(rss_key, 24, 0xFF);
2671 mrqc = rd32(hw, TXGBE_RACTL);
2672 if (mrqc & TXGBE_RACTL_RSSIPV4)
2673 rss_hf |= ETH_RSS_IPV4;
2674 if (mrqc & TXGBE_RACTL_RSSIPV4TCP)
2675 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2676 if (mrqc & TXGBE_RACTL_RSSIPV6)
2677 rss_hf |= ETH_RSS_IPV6 |
2679 if (mrqc & TXGBE_RACTL_RSSIPV6TCP)
2680 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP |
2681 ETH_RSS_IPV6_TCP_EX;
2682 if (mrqc & TXGBE_RACTL_RSSIPV4UDP)
2683 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2684 if (mrqc & TXGBE_RACTL_RSSIPV6UDP)
2685 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP |
2686 ETH_RSS_IPV6_UDP_EX;
2687 if (!(mrqc & TXGBE_RACTL_RSSENA))
2690 rss_hf &= TXGBE_RSS_OFFLOAD_ALL;
2692 rss_conf->rss_hf = rss_hf;
2697 txgbe_rss_configure(struct rte_eth_dev *dev)
2699 struct rte_eth_rss_conf rss_conf;
2700 struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev);
2701 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
2706 PMD_INIT_FUNC_TRACE();
2709 * Fill in redirection table
2710 * The byte-swap is needed because NIC registers are in
2711 * little-endian order.
2713 if (adapter->rss_reta_updated == 0) {
2715 for (i = 0, j = 0; i < ETH_RSS_RETA_SIZE_128; i++, j++) {
2716 if (j == dev->data->nb_rx_queues)
2718 reta = (reta >> 8) | LS32(j, 24, 0xFF);
2720 wr32a(hw, TXGBE_REG_RSSTBL, i >> 2, reta);
2724 * Configure the RSS key and the RSS protocols used to compute
2725 * the RSS hash of input packets.
2727 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2728 if (rss_conf.rss_key == NULL)
2729 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2730 txgbe_dev_rss_hash_update(dev, &rss_conf);
2733 #define NUM_VFTA_REGISTERS 128
2736 * VMDq only support for 10 GbE NIC.
2739 txgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
2741 struct rte_eth_vmdq_rx_conf *cfg;
2742 struct txgbe_hw *hw;
2743 enum rte_eth_nb_pools num_pools;
2744 uint32_t mrqc, vt_ctl, vlanctrl;
2748 PMD_INIT_FUNC_TRACE();
2749 hw = TXGBE_DEV_HW(dev);
2750 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
2751 num_pools = cfg->nb_queue_pools;
2753 txgbe_rss_disable(dev);
2756 mrqc = TXGBE_PORTCTL_NUMVT_64;
2757 wr32m(hw, TXGBE_PORTCTL, TXGBE_PORTCTL_NUMVT_MASK, mrqc);
2759 /* turn on virtualisation and set the default pool */
2760 vt_ctl = TXGBE_POOLCTL_RPLEN;
2761 if (cfg->enable_default_pool)
2762 vt_ctl |= TXGBE_POOLCTL_DEFPL(cfg->default_pool);
2764 vt_ctl |= TXGBE_POOLCTL_DEFDSA;
2766 wr32(hw, TXGBE_POOLCTL, vt_ctl);
2768 for (i = 0; i < (int)num_pools; i++) {
2769 vmolr = txgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
2770 wr32(hw, TXGBE_POOLETHCTL(i), vmolr);
2773 /* enable vlan filtering and allow all vlan tags through */
2774 vlanctrl = rd32(hw, TXGBE_VLANCTL);
2775 vlanctrl |= TXGBE_VLANCTL_VFE; /* enable vlan filters */
2776 wr32(hw, TXGBE_VLANCTL, vlanctrl);
2778 /* enable all vlan filters */
2779 for (i = 0; i < NUM_VFTA_REGISTERS; i++)
2780 wr32(hw, TXGBE_VLANTBL(i), UINT32_MAX);
2782 /* pool enabling for receive - 64 */
2783 wr32(hw, TXGBE_POOLRXENA(0), UINT32_MAX);
2784 if (num_pools == ETH_64_POOLS)
2785 wr32(hw, TXGBE_POOLRXENA(1), UINT32_MAX);
2788 * allow pools to read specific mac addresses
2789 * In this case, all pools should be able to read from mac addr 0
2791 wr32(hw, TXGBE_ETHADDRIDX, 0);
2792 wr32(hw, TXGBE_ETHADDRASSL, 0xFFFFFFFF);
2793 wr32(hw, TXGBE_ETHADDRASSH, 0xFFFFFFFF);
2795 /* set up filters for vlan tags as configured */
2796 for (i = 0; i < cfg->nb_pool_maps; i++) {
2797 /* set vlan id in VF register and set the valid bit */
2798 wr32(hw, TXGBE_PSRVLANIDX, i);
2799 wr32(hw, TXGBE_PSRVLAN, (TXGBE_PSRVLAN_EA |
2800 TXGBE_PSRVLAN_VID(cfg->pool_map[i].vlan_id)));
2802 * Put the allowed pools in VFB reg. As we only have 16 or 64
2803 * pools, we only need to use the first half of the register
2806 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
2807 wr32(hw, TXGBE_PSRVLANPLM(0),
2808 (cfg->pool_map[i].pools & UINT32_MAX));
2810 wr32(hw, TXGBE_PSRVLANPLM(1),
2811 ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
2814 /* Tx General Switch Control Enables VMDQ loopback */
2815 if (cfg->enable_loop_back) {
2816 wr32(hw, TXGBE_PSRCTL, TXGBE_PSRCTL_LBENA);
2817 for (i = 0; i < 64; i++)
2818 wr32m(hw, TXGBE_POOLETHCTL(i),
2819 TXGBE_POOLETHCTL_LLB, TXGBE_POOLETHCTL_LLB);
2826 * txgbe_vmdq_tx_hw_configure - Configure general VMDq TX parameters
2827 * @hw: pointer to hardware structure
2830 txgbe_vmdq_tx_hw_configure(struct txgbe_hw *hw)
2835 PMD_INIT_FUNC_TRACE();
2836 /*PF VF Transmit Enable*/
2837 wr32(hw, TXGBE_POOLTXENA(0), UINT32_MAX);
2838 wr32(hw, TXGBE_POOLTXENA(1), UINT32_MAX);
2840 /* Disable the Tx desc arbiter */
2841 reg = rd32(hw, TXGBE_ARBTXCTL);
2842 reg |= TXGBE_ARBTXCTL_DIA;
2843 wr32(hw, TXGBE_ARBTXCTL, reg);
2845 wr32m(hw, TXGBE_PORTCTL, TXGBE_PORTCTL_NUMVT_MASK,
2846 TXGBE_PORTCTL_NUMVT_64);
2848 /* Disable drop for all queues */
2849 for (q = 0; q < 128; q++) {
2850 u32 val = 1 << (q % 32);
2851 wr32m(hw, TXGBE_QPRXDROP(q / 32), val, val);
2854 /* Enable the Tx desc arbiter */
2855 reg = rd32(hw, TXGBE_ARBTXCTL);
2856 reg &= ~TXGBE_ARBTXCTL_DIA;
2857 wr32(hw, TXGBE_ARBTXCTL, reg);
2862 static int __rte_cold
2863 txgbe_alloc_rx_queue_mbufs(struct txgbe_rx_queue *rxq)
2865 struct txgbe_rx_entry *rxe = rxq->sw_ring;
2869 /* Initialize software ring entries */
2870 for (i = 0; i < rxq->nb_rx_desc; i++) {
2871 volatile struct txgbe_rx_desc *rxd;
2872 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
2875 PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
2876 (unsigned int)rxq->queue_id);
2880 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
2881 mbuf->port = rxq->port_id;
2884 rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
2885 rxd = &rxq->rx_ring[i];
2886 TXGBE_RXD_HDRADDR(rxd, 0);
2887 TXGBE_RXD_PKTADDR(rxd, dma_addr);
2895 txgbe_config_vf_rss(struct rte_eth_dev *dev)
2897 struct txgbe_hw *hw;
2900 txgbe_rss_configure(dev);
2902 hw = TXGBE_DEV_HW(dev);
2905 mrqc = rd32(hw, TXGBE_PORTCTL);
2906 mrqc &= ~(TXGBE_PORTCTL_NUMTC_MASK | TXGBE_PORTCTL_NUMVT_MASK);
2907 switch (RTE_ETH_DEV_SRIOV(dev).active) {
2909 mrqc |= TXGBE_PORTCTL_NUMVT_64;
2913 mrqc |= TXGBE_PORTCTL_NUMVT_32;
2917 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
2921 wr32(hw, TXGBE_PORTCTL, mrqc);
2927 txgbe_config_vf_default(struct rte_eth_dev *dev)
2929 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
2932 mrqc = rd32(hw, TXGBE_PORTCTL);
2933 mrqc &= ~(TXGBE_PORTCTL_NUMTC_MASK | TXGBE_PORTCTL_NUMVT_MASK);
2934 switch (RTE_ETH_DEV_SRIOV(dev).active) {
2936 mrqc |= TXGBE_PORTCTL_NUMVT_64;
2940 mrqc |= TXGBE_PORTCTL_NUMVT_32;
2944 mrqc |= TXGBE_PORTCTL_NUMVT_16;
2948 "invalid pool number in IOV mode");
2952 wr32(hw, TXGBE_PORTCTL, mrqc);
2958 txgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
2960 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
2962 * SRIOV inactive scheme
2963 * any RSS w/o VMDq multi-queue setting
2965 switch (dev->data->dev_conf.rxmode.mq_mode) {
2967 case ETH_MQ_RX_VMDQ_RSS:
2968 txgbe_rss_configure(dev);
2971 case ETH_MQ_RX_VMDQ_ONLY:
2972 txgbe_vmdq_rx_hw_configure(dev);
2975 case ETH_MQ_RX_NONE:
2977 /* if mq_mode is none, disable rss mode.*/
2978 txgbe_rss_disable(dev);
2982 /* SRIOV active scheme
2983 * Support RSS together with SRIOV.
2985 switch (dev->data->dev_conf.rxmode.mq_mode) {
2987 case ETH_MQ_RX_VMDQ_RSS:
2988 txgbe_config_vf_rss(dev);
2991 txgbe_config_vf_default(dev);
3000 txgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
3002 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3006 /* disable arbiter */
3007 rttdcs = rd32(hw, TXGBE_ARBTXCTL);
3008 rttdcs |= TXGBE_ARBTXCTL_DIA;
3009 wr32(hw, TXGBE_ARBTXCTL, rttdcs);
3011 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3013 * SRIOV inactive scheme
3014 * any DCB w/o VMDq multi-queue setting
3016 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
3017 txgbe_vmdq_tx_hw_configure(hw);
3019 wr32m(hw, TXGBE_PORTCTL, TXGBE_PORTCTL_NUMVT_MASK, 0);
3021 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3023 * SRIOV active scheme
3024 * FIXME if support DCB together with VMDq & SRIOV
3027 mtqc = TXGBE_PORTCTL_NUMVT_64;
3030 mtqc = TXGBE_PORTCTL_NUMVT_32;
3033 mtqc = TXGBE_PORTCTL_NUMVT_16;
3037 PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
3039 wr32m(hw, TXGBE_PORTCTL, TXGBE_PORTCTL_NUMVT_MASK, mtqc);
3042 /* re-enable arbiter */
3043 rttdcs &= ~TXGBE_ARBTXCTL_DIA;
3044 wr32(hw, TXGBE_ARBTXCTL, rttdcs);
3050 * txgbe_get_rscctl_maxdesc
3052 * @pool Memory pool of the Rx queue
3054 static inline uint32_t
3055 txgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
3057 struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
3060 RTE_IPV4_MAX_PKT_LEN /
3061 (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
3064 return TXGBE_RXCFG_RSCMAX_16;
3065 else if (maxdesc >= 8)
3066 return TXGBE_RXCFG_RSCMAX_8;
3067 else if (maxdesc >= 4)
3068 return TXGBE_RXCFG_RSCMAX_4;
3070 return TXGBE_RXCFG_RSCMAX_1;
3074 * txgbe_set_rsc - configure RSC related port HW registers
3076 * Configures the port's RSC related registers.
3080 * Returns 0 in case of success or a non-zero error code
3083 txgbe_set_rsc(struct rte_eth_dev *dev)
3085 struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
3086 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3087 struct rte_eth_dev_info dev_info = { 0 };
3088 bool rsc_capable = false;
3094 dev->dev_ops->dev_infos_get(dev, &dev_info);
3095 if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
3098 if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
3099 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
3104 /* RSC global configuration */
3106 if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
3107 (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
3108 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
3113 rfctl = rd32(hw, TXGBE_PSRCTL);
3114 if (rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
3115 rfctl &= ~TXGBE_PSRCTL_RSCDIA;
3117 rfctl |= TXGBE_PSRCTL_RSCDIA;
3118 wr32(hw, TXGBE_PSRCTL, rfctl);
3120 /* If LRO hasn't been requested - we are done here. */
3121 if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
3124 /* Set PSRCTL.RSCACK bit */
3125 rdrxctl = rd32(hw, TXGBE_PSRCTL);
3126 rdrxctl |= TXGBE_PSRCTL_RSCACK;
3127 wr32(hw, TXGBE_PSRCTL, rdrxctl);
3129 /* Per-queue RSC configuration */
3130 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3131 struct txgbe_rx_queue *rxq = dev->data->rx_queues[i];
3133 rd32(hw, TXGBE_RXCFG(rxq->reg_idx));
3135 rd32(hw, TXGBE_POOLRSS(rxq->reg_idx));
3137 rd32(hw, TXGBE_ITR(rxq->reg_idx));
3140 * txgbe PMD doesn't support header-split at the moment.
3142 srrctl &= ~TXGBE_RXCFG_HDRLEN_MASK;
3143 srrctl |= TXGBE_RXCFG_HDRLEN(128);
3146 * TODO: Consider setting the Receive Descriptor Minimum
3147 * Threshold Size for an RSC case. This is not an obviously
3148 * beneficiary option but the one worth considering...
3151 srrctl |= TXGBE_RXCFG_RSCENA;
3152 srrctl &= ~TXGBE_RXCFG_RSCMAX_MASK;
3153 srrctl |= txgbe_get_rscctl_maxdesc(rxq->mb_pool);
3154 psrtype |= TXGBE_POOLRSS_L4HDR;
3157 * RSC: Set ITR interval corresponding to 2K ints/s.
3159 * Full-sized RSC aggregations for a 10Gb/s link will
3160 * arrive at about 20K aggregation/s rate.
3162 * 2K inst/s rate will make only 10% of the
3163 * aggregations to be closed due to the interrupt timer
3164 * expiration for a streaming at wire-speed case.
3166 * For a sparse streaming case this setting will yield
3167 * at most 500us latency for a single RSC aggregation.
3169 eitr &= ~TXGBE_ITR_IVAL_MASK;
3170 eitr |= TXGBE_ITR_IVAL_10G(TXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
3171 eitr |= TXGBE_ITR_WRDSA;
3173 wr32(hw, TXGBE_RXCFG(rxq->reg_idx), srrctl);
3174 wr32(hw, TXGBE_POOLRSS(rxq->reg_idx), psrtype);
3175 wr32(hw, TXGBE_ITR(rxq->reg_idx), eitr);
3178 * RSC requires the mapping of the queue to the
3181 txgbe_set_ivar_map(hw, 0, rxq->reg_idx, i);
3186 PMD_INIT_LOG(DEBUG, "enabling LRO mode");
3192 txgbe_set_rx_function(struct rte_eth_dev *dev)
3194 struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev);
3197 * Initialize the appropriate LRO callback.
3199 * If all queues satisfy the bulk allocation preconditions
3200 * (adapter->rx_bulk_alloc_allowed is TRUE) then we may use
3201 * bulk allocation. Otherwise use a single allocation version.
3203 if (dev->data->lro) {
3204 if (adapter->rx_bulk_alloc_allowed) {
3205 PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
3206 "allocation version");
3207 dev->rx_pkt_burst = txgbe_recv_pkts_lro_bulk_alloc;
3209 PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
3210 "allocation version");
3211 dev->rx_pkt_burst = txgbe_recv_pkts_lro_single_alloc;
3213 } else if (dev->data->scattered_rx) {
3215 * Set the non-LRO scattered callback: there are bulk and
3216 * single allocation versions.
3218 if (adapter->rx_bulk_alloc_allowed) {
3219 PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
3220 "allocation callback (port=%d).",
3221 dev->data->port_id);
3222 dev->rx_pkt_burst = txgbe_recv_pkts_lro_bulk_alloc;
3224 PMD_INIT_LOG(DEBUG, "Using Regular (non-vector, "
3225 "single allocation) "
3226 "Scattered Rx callback "
3228 dev->data->port_id);
3230 dev->rx_pkt_burst = txgbe_recv_pkts_lro_single_alloc;
3233 * Below we set "simple" callbacks according to port/queues parameters.
3234 * If parameters allow we are going to choose between the following
3237 * - Single buffer allocation (the simplest one)
3239 } else if (adapter->rx_bulk_alloc_allowed) {
3240 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
3241 "satisfied. Rx Burst Bulk Alloc function "
3242 "will be used on port=%d.",
3243 dev->data->port_id);
3245 dev->rx_pkt_burst = txgbe_recv_pkts_bulk_alloc;
3247 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
3248 "satisfied, or Scattered Rx is requested "
3250 dev->data->port_id);
3252 dev->rx_pkt_burst = txgbe_recv_pkts;
3257 * Initializes Receive Unit.
3260 txgbe_dev_rx_init(struct rte_eth_dev *dev)
3262 struct txgbe_hw *hw;
3263 struct txgbe_rx_queue *rxq;
3272 struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
3275 PMD_INIT_FUNC_TRACE();
3276 hw = TXGBE_DEV_HW(dev);
3279 * Make sure receives are disabled while setting
3280 * up the RX context (registers, descriptor rings, etc.).
3282 wr32m(hw, TXGBE_MACRXCFG, TXGBE_MACRXCFG_ENA, 0);
3283 wr32m(hw, TXGBE_PBRXCTL, TXGBE_PBRXCTL_ENA, 0);
3285 /* Enable receipt of broadcasted frames */
3286 fctrl = rd32(hw, TXGBE_PSRCTL);
3287 fctrl |= TXGBE_PSRCTL_BCA;
3288 wr32(hw, TXGBE_PSRCTL, fctrl);
3291 * Configure CRC stripping, if any.
3293 hlreg0 = rd32(hw, TXGBE_SECRXCTL);
3294 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3295 hlreg0 &= ~TXGBE_SECRXCTL_CRCSTRIP;
3297 hlreg0 |= TXGBE_SECRXCTL_CRCSTRIP;
3298 wr32(hw, TXGBE_SECRXCTL, hlreg0);
3301 * Configure jumbo frame support, if any.
3303 if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
3304 wr32m(hw, TXGBE_FRMSZ, TXGBE_FRMSZ_MAX_MASK,
3305 TXGBE_FRMSZ_MAX(rx_conf->max_rx_pkt_len));
3307 wr32m(hw, TXGBE_FRMSZ, TXGBE_FRMSZ_MAX_MASK,
3308 TXGBE_FRMSZ_MAX(TXGBE_FRAME_SIZE_DFT));
3312 * If loopback mode is configured, set LPBK bit.
3314 hlreg0 = rd32(hw, TXGBE_PSRCTL);
3315 if (hw->mac.type == txgbe_mac_raptor &&
3316 dev->data->dev_conf.lpbk_mode)
3317 hlreg0 |= TXGBE_PSRCTL_LBENA;
3319 hlreg0 &= ~TXGBE_PSRCTL_LBENA;
3321 wr32(hw, TXGBE_PSRCTL, hlreg0);
3324 * Assume no header split and no VLAN strip support
3325 * on any Rx queue first .
3327 rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
3329 /* Setup RX queues */
3330 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3331 rxq = dev->data->rx_queues[i];
3334 * Reset crc_len in case it was changed after queue setup by a
3335 * call to configure.
3337 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3338 rxq->crc_len = RTE_ETHER_CRC_LEN;
3342 /* Setup the Base and Length of the Rx Descriptor Rings */
3343 bus_addr = rxq->rx_ring_phys_addr;
3344 wr32(hw, TXGBE_RXBAL(rxq->reg_idx),
3345 (uint32_t)(bus_addr & BIT_MASK32));
3346 wr32(hw, TXGBE_RXBAH(rxq->reg_idx),
3347 (uint32_t)(bus_addr >> 32));
3348 wr32(hw, TXGBE_RXRP(rxq->reg_idx), 0);
3349 wr32(hw, TXGBE_RXWP(rxq->reg_idx), 0);
3351 srrctl = TXGBE_RXCFG_RNGLEN(rxq->nb_rx_desc);
3353 /* Set if packets are dropped when no descriptors available */
3355 srrctl |= TXGBE_RXCFG_DROP;
3358 * Configure the RX buffer size in the PKTLEN field of
3359 * the RXCFG register of the queue.
3360 * The value is in 1 KB resolution. Valid values can be from
3363 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
3364 RTE_PKTMBUF_HEADROOM);
3365 buf_size = ROUND_UP(buf_size, 0x1 << 10);
3366 srrctl |= TXGBE_RXCFG_PKTLEN(buf_size);
3368 wr32(hw, TXGBE_RXCFG(rxq->reg_idx), srrctl);
3370 /* It adds dual VLAN length for supporting dual VLAN */
3371 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
3372 2 * TXGBE_VLAN_TAG_SIZE > buf_size)
3373 dev->data->scattered_rx = 1;
3374 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
3375 rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
3378 if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
3379 dev->data->scattered_rx = 1;
3382 * Device configured with multiple RX queues.
3384 txgbe_dev_mq_rx_configure(dev);
3387 * Setup the Checksum Register.
3388 * Disable Full-Packet Checksum which is mutually exclusive with RSS.
3389 * Enable IP/L4 checksum computation by hardware if requested to do so.
3391 rxcsum = rd32(hw, TXGBE_PSRCTL);
3392 rxcsum |= TXGBE_PSRCTL_PCSD;
3393 if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
3394 rxcsum |= TXGBE_PSRCTL_L4CSUM;
3396 rxcsum &= ~TXGBE_PSRCTL_L4CSUM;
3398 wr32(hw, TXGBE_PSRCTL, rxcsum);
3400 if (hw->mac.type == txgbe_mac_raptor) {
3401 rdrxctl = rd32(hw, TXGBE_SECRXCTL);
3402 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3403 rdrxctl &= ~TXGBE_SECRXCTL_CRCSTRIP;
3405 rdrxctl |= TXGBE_SECRXCTL_CRCSTRIP;
3406 wr32(hw, TXGBE_SECRXCTL, rdrxctl);
3409 rc = txgbe_set_rsc(dev);
3413 txgbe_set_rx_function(dev);
3419 * Initializes Transmit Unit.
3422 txgbe_dev_tx_init(struct rte_eth_dev *dev)
3424 struct txgbe_hw *hw;
3425 struct txgbe_tx_queue *txq;
3429 PMD_INIT_FUNC_TRACE();
3430 hw = TXGBE_DEV_HW(dev);
3432 /* Setup the Base and Length of the Tx Descriptor Rings */
3433 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3434 txq = dev->data->tx_queues[i];
3436 bus_addr = txq->tx_ring_phys_addr;
3437 wr32(hw, TXGBE_TXBAL(txq->reg_idx),
3438 (uint32_t)(bus_addr & BIT_MASK32));
3439 wr32(hw, TXGBE_TXBAH(txq->reg_idx),
3440 (uint32_t)(bus_addr >> 32));
3441 wr32m(hw, TXGBE_TXCFG(txq->reg_idx), TXGBE_TXCFG_BUFLEN_MASK,
3442 TXGBE_TXCFG_BUFLEN(txq->nb_tx_desc));
3443 /* Setup the HW Tx Head and TX Tail descriptor pointers */
3444 wr32(hw, TXGBE_TXRP(txq->reg_idx), 0);
3445 wr32(hw, TXGBE_TXWP(txq->reg_idx), 0);
3448 /* Device configured with multiple TX queues. */
3449 txgbe_dev_mq_tx_configure(dev);
3453 * Set up link loopback mode Tx->Rx.
3455 static inline void __rte_cold
3456 txgbe_setup_loopback_link_raptor(struct txgbe_hw *hw)
3458 PMD_INIT_FUNC_TRACE();
3460 wr32m(hw, TXGBE_MACRXCFG, TXGBE_MACRXCFG_LB, TXGBE_MACRXCFG_LB);
3466 * Start Transmit and Receive Units.
3469 txgbe_dev_rxtx_start(struct rte_eth_dev *dev)
3471 struct txgbe_hw *hw;
3472 struct txgbe_tx_queue *txq;
3473 struct txgbe_rx_queue *rxq;
3479 PMD_INIT_FUNC_TRACE();
3480 hw = TXGBE_DEV_HW(dev);
3482 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3483 txq = dev->data->tx_queues[i];
3484 /* Setup Transmit Threshold Registers */
3485 wr32m(hw, TXGBE_TXCFG(txq->reg_idx),
3486 TXGBE_TXCFG_HTHRESH_MASK |
3487 TXGBE_TXCFG_WTHRESH_MASK,
3488 TXGBE_TXCFG_HTHRESH(txq->hthresh) |
3489 TXGBE_TXCFG_WTHRESH(txq->wthresh));
3492 dmatxctl = rd32(hw, TXGBE_DMATXCTRL);
3493 dmatxctl |= TXGBE_DMATXCTRL_ENA;
3494 wr32(hw, TXGBE_DMATXCTRL, dmatxctl);
3496 for (i = 0; i < dev->data->nb_tx_queues; i++) {
3497 txq = dev->data->tx_queues[i];
3498 if (!txq->tx_deferred_start) {
3499 ret = txgbe_dev_tx_queue_start(dev, i);
3505 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3506 rxq = dev->data->rx_queues[i];
3507 if (!rxq->rx_deferred_start) {
3508 ret = txgbe_dev_rx_queue_start(dev, i);
3514 /* Enable Receive engine */
3515 rxctrl = rd32(hw, TXGBE_PBRXCTL);
3516 rxctrl |= TXGBE_PBRXCTL_ENA;
3517 hw->mac.enable_rx_dma(hw, rxctrl);
3519 /* If loopback mode is enabled, set up the link accordingly */
3520 if (hw->mac.type == txgbe_mac_raptor &&
3521 dev->data->dev_conf.lpbk_mode)
3522 txgbe_setup_loopback_link_raptor(hw);
3528 txgbe_dev_save_rx_queue(struct txgbe_hw *hw, uint16_t rx_queue_id)
3530 u32 *reg = &hw->q_rx_regs[rx_queue_id * 8];
3531 *(reg++) = rd32(hw, TXGBE_RXBAL(rx_queue_id));
3532 *(reg++) = rd32(hw, TXGBE_RXBAH(rx_queue_id));
3533 *(reg++) = rd32(hw, TXGBE_RXCFG(rx_queue_id));
3537 txgbe_dev_store_rx_queue(struct txgbe_hw *hw, uint16_t rx_queue_id)
3539 u32 *reg = &hw->q_rx_regs[rx_queue_id * 8];
3540 wr32(hw, TXGBE_RXBAL(rx_queue_id), *(reg++));
3541 wr32(hw, TXGBE_RXBAH(rx_queue_id), *(reg++));
3542 wr32(hw, TXGBE_RXCFG(rx_queue_id), *(reg++) & ~TXGBE_RXCFG_ENA);
3546 txgbe_dev_save_tx_queue(struct txgbe_hw *hw, uint16_t tx_queue_id)
3548 u32 *reg = &hw->q_tx_regs[tx_queue_id * 8];
3549 *(reg++) = rd32(hw, TXGBE_TXBAL(tx_queue_id));
3550 *(reg++) = rd32(hw, TXGBE_TXBAH(tx_queue_id));
3551 *(reg++) = rd32(hw, TXGBE_TXCFG(tx_queue_id));
3555 txgbe_dev_store_tx_queue(struct txgbe_hw *hw, uint16_t tx_queue_id)
3557 u32 *reg = &hw->q_tx_regs[tx_queue_id * 8];
3558 wr32(hw, TXGBE_TXBAL(tx_queue_id), *(reg++));
3559 wr32(hw, TXGBE_TXBAH(tx_queue_id), *(reg++));
3560 wr32(hw, TXGBE_TXCFG(tx_queue_id), *(reg++) & ~TXGBE_TXCFG_ENA);
3564 * Start Receive Units for specified queue.
3567 txgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3569 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3570 struct txgbe_rx_queue *rxq;
3574 PMD_INIT_FUNC_TRACE();
3576 rxq = dev->data->rx_queues[rx_queue_id];
3578 /* Allocate buffers for descriptor rings */
3579 if (txgbe_alloc_rx_queue_mbufs(rxq) != 0) {
3580 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
3584 rxdctl = rd32(hw, TXGBE_RXCFG(rxq->reg_idx));
3585 rxdctl |= TXGBE_RXCFG_ENA;
3586 wr32(hw, TXGBE_RXCFG(rxq->reg_idx), rxdctl);
3588 /* Wait until RX Enable ready */
3589 poll_ms = RTE_TXGBE_REGISTER_POLL_WAIT_10_MS;
3592 rxdctl = rd32(hw, TXGBE_RXCFG(rxq->reg_idx));
3593 } while (--poll_ms && !(rxdctl & TXGBE_RXCFG_ENA));
3595 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
3597 wr32(hw, TXGBE_RXRP(rxq->reg_idx), 0);
3598 wr32(hw, TXGBE_RXWP(rxq->reg_idx), rxq->nb_rx_desc - 1);
3599 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
3605 * Stop Receive Units for specified queue.
3608 txgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3610 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3611 struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev);
3612 struct txgbe_rx_queue *rxq;
3616 PMD_INIT_FUNC_TRACE();
3618 rxq = dev->data->rx_queues[rx_queue_id];
3620 txgbe_dev_save_rx_queue(hw, rxq->reg_idx);
3621 wr32m(hw, TXGBE_RXCFG(rxq->reg_idx), TXGBE_RXCFG_ENA, 0);
3623 /* Wait until RX Enable bit clear */
3624 poll_ms = RTE_TXGBE_REGISTER_POLL_WAIT_10_MS;
3627 rxdctl = rd32(hw, TXGBE_RXCFG(rxq->reg_idx));
3628 } while (--poll_ms && (rxdctl & TXGBE_RXCFG_ENA));
3630 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
3632 rte_delay_us(RTE_TXGBE_WAIT_100_US);
3633 txgbe_dev_store_rx_queue(hw, rxq->reg_idx);
3635 txgbe_rx_queue_release_mbufs(rxq);
3636 txgbe_reset_rx_queue(adapter, rxq);
3637 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
3643 * Start Transmit Units for specified queue.
3646 txgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
3648 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3649 struct txgbe_tx_queue *txq;
3653 PMD_INIT_FUNC_TRACE();
3655 txq = dev->data->tx_queues[tx_queue_id];
3656 wr32m(hw, TXGBE_TXCFG(txq->reg_idx), TXGBE_TXCFG_ENA, TXGBE_TXCFG_ENA);
3658 /* Wait until TX Enable ready */
3659 poll_ms = RTE_TXGBE_REGISTER_POLL_WAIT_10_MS;
3662 txdctl = rd32(hw, TXGBE_TXCFG(txq->reg_idx));
3663 } while (--poll_ms && !(txdctl & TXGBE_TXCFG_ENA));
3665 PMD_INIT_LOG(ERR, "Could not enable "
3666 "Tx Queue %d", tx_queue_id);
3669 wr32(hw, TXGBE_TXWP(txq->reg_idx), txq->tx_tail);
3670 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
3676 * Stop Transmit Units for specified queue.
3679 txgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
3681 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3682 struct txgbe_tx_queue *txq;
3684 uint32_t txtdh, txtdt;
3687 PMD_INIT_FUNC_TRACE();
3689 txq = dev->data->tx_queues[tx_queue_id];
3691 /* Wait until TX queue is empty */
3692 poll_ms = RTE_TXGBE_REGISTER_POLL_WAIT_10_MS;
3694 rte_delay_us(RTE_TXGBE_WAIT_100_US);
3695 txtdh = rd32(hw, TXGBE_TXRP(txq->reg_idx));
3696 txtdt = rd32(hw, TXGBE_TXWP(txq->reg_idx));
3697 } while (--poll_ms && (txtdh != txtdt));
3700 "Tx Queue %d is not empty when stopping.",
3703 txgbe_dev_save_tx_queue(hw, txq->reg_idx);
3704 wr32m(hw, TXGBE_TXCFG(txq->reg_idx), TXGBE_TXCFG_ENA, 0);
3706 /* Wait until TX Enable bit clear */
3707 poll_ms = RTE_TXGBE_REGISTER_POLL_WAIT_10_MS;
3710 txdctl = rd32(hw, TXGBE_TXCFG(txq->reg_idx));
3711 } while (--poll_ms && (txdctl & TXGBE_TXCFG_ENA));
3713 PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
3716 rte_delay_us(RTE_TXGBE_WAIT_100_US);
3717 txgbe_dev_store_tx_queue(hw, txq->reg_idx);
3719 if (txq->ops != NULL) {
3720 txq->ops->release_mbufs(txq);
3721 txq->ops->reset(txq);
3723 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
3729 txgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
3730 struct rte_eth_rxq_info *qinfo)
3732 struct txgbe_rx_queue *rxq;
3734 rxq = dev->data->rx_queues[queue_id];
3736 qinfo->mp = rxq->mb_pool;
3737 qinfo->scattered_rx = dev->data->scattered_rx;
3738 qinfo->nb_desc = rxq->nb_rx_desc;
3740 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
3741 qinfo->conf.rx_drop_en = rxq->drop_en;
3742 qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
3743 qinfo->conf.offloads = rxq->offloads;
3747 txgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
3748 struct rte_eth_txq_info *qinfo)
3750 struct txgbe_tx_queue *txq;
3752 txq = dev->data->tx_queues[queue_id];
3754 qinfo->nb_desc = txq->nb_tx_desc;
3756 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
3757 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
3758 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
3760 qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
3761 qinfo->conf.offloads = txq->offloads;
3762 qinfo->conf.tx_deferred_start = txq->tx_deferred_start;