1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2015-2020
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
20 #include <rte_debug.h>
21 #include <rte_ethdev.h>
22 #include <rte_ethdev_driver.h>
23 #include <rte_memzone.h>
24 #include <rte_atomic.h>
25 #include <rte_mempool.h>
26 #include <rte_malloc.h>
28 #include <rte_ether.h>
29 #include <rte_prefetch.h>
33 #include <rte_string_fns.h>
34 #include <rte_errno.h>
38 #include "txgbe_logs.h"
39 #include "base/txgbe.h"
40 #include "txgbe_ethdev.h"
41 #include "txgbe_rxtx.h"
43 #ifdef RTE_LIBRTE_IEEE1588
44 #define TXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
46 #define TXGBE_TX_IEEE1588_TMST 0
49 /* Bit Mask to indicate what bits required for building TX context */
50 static const u64 TXGBE_TX_OFFLOAD_MASK = (PKT_TX_IP_CKSUM |
59 PKT_TX_OUTER_IP_CKSUM |
60 TXGBE_TX_IEEE1588_TMST);
62 #define TXGBE_TX_OFFLOAD_NOTSUP_MASK \
63 (PKT_TX_OFFLOAD_MASK ^ TXGBE_TX_OFFLOAD_MASK)
66 * Prefetch a cache line into all cache levels.
68 #define rte_txgbe_prefetch(p) rte_prefetch0(p)
71 txgbe_is_vf(struct rte_eth_dev *dev)
73 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
75 switch (hw->mac.type) {
76 case txgbe_mac_raptor_vf:
83 /*********************************************************************
87 **********************************************************************/
90 * Check for descriptors with their DD bit set and free mbufs.
91 * Return the total number of buffers freed.
93 static __rte_always_inline int
94 txgbe_tx_free_bufs(struct txgbe_tx_queue *txq)
96 struct txgbe_tx_entry *txep;
99 struct rte_mbuf *m, *free[RTE_TXGBE_TX_MAX_FREE_BUF_SZ];
101 /* check DD bit on threshold descriptor */
102 status = txq->tx_ring[txq->tx_next_dd].dw3;
103 if (!(status & rte_cpu_to_le_32(TXGBE_TXD_DD))) {
104 if (txq->nb_tx_free >> 1 < txq->tx_free_thresh)
105 txgbe_set32_masked(txq->tdc_reg_addr,
106 TXGBE_TXCFG_FLUSH, TXGBE_TXCFG_FLUSH);
111 * first buffer to free from S/W ring is at index
112 * tx_next_dd - (tx_free_thresh-1)
114 txep = &txq->sw_ring[txq->tx_next_dd - (txq->tx_free_thresh - 1)];
115 for (i = 0; i < txq->tx_free_thresh; ++i, ++txep) {
116 /* free buffers one at a time */
117 m = rte_pktmbuf_prefree_seg(txep->mbuf);
120 if (unlikely(m == NULL))
123 if (nb_free >= RTE_TXGBE_TX_MAX_FREE_BUF_SZ ||
124 (nb_free > 0 && m->pool != free[0]->pool)) {
125 rte_mempool_put_bulk(free[0]->pool,
126 (void **)free, nb_free);
134 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
136 /* buffers were freed, update counters */
137 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_free_thresh);
138 txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_free_thresh);
139 if (txq->tx_next_dd >= txq->nb_tx_desc)
140 txq->tx_next_dd = (uint16_t)(txq->tx_free_thresh - 1);
142 return txq->tx_free_thresh;
145 /* Populate 4 descriptors with data from 4 mbufs */
147 tx4(volatile struct txgbe_tx_desc *txdp, struct rte_mbuf **pkts)
149 uint64_t buf_dma_addr;
153 for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
154 buf_dma_addr = rte_mbuf_data_iova(*pkts);
155 pkt_len = (*pkts)->data_len;
157 /* write data to descriptor */
158 txdp->qw0 = rte_cpu_to_le_64(buf_dma_addr);
159 txdp->dw2 = cpu_to_le32(TXGBE_TXD_FLAGS |
160 TXGBE_TXD_DATLEN(pkt_len));
161 txdp->dw3 = cpu_to_le32(TXGBE_TXD_PAYLEN(pkt_len));
163 rte_prefetch0(&(*pkts)->pool);
167 /* Populate 1 descriptor with data from 1 mbuf */
169 tx1(volatile struct txgbe_tx_desc *txdp, struct rte_mbuf **pkts)
171 uint64_t buf_dma_addr;
174 buf_dma_addr = rte_mbuf_data_iova(*pkts);
175 pkt_len = (*pkts)->data_len;
177 /* write data to descriptor */
178 txdp->qw0 = cpu_to_le64(buf_dma_addr);
179 txdp->dw2 = cpu_to_le32(TXGBE_TXD_FLAGS |
180 TXGBE_TXD_DATLEN(pkt_len));
181 txdp->dw3 = cpu_to_le32(TXGBE_TXD_PAYLEN(pkt_len));
183 rte_prefetch0(&(*pkts)->pool);
187 * Fill H/W descriptor ring with mbuf data.
188 * Copy mbuf pointers to the S/W ring.
191 txgbe_tx_fill_hw_ring(struct txgbe_tx_queue *txq, struct rte_mbuf **pkts,
194 volatile struct txgbe_tx_desc *txdp = &txq->tx_ring[txq->tx_tail];
195 struct txgbe_tx_entry *txep = &txq->sw_ring[txq->tx_tail];
196 const int N_PER_LOOP = 4;
197 const int N_PER_LOOP_MASK = N_PER_LOOP - 1;
198 int mainpart, leftover;
202 * Process most of the packets in chunks of N pkts. Any
203 * leftover packets will get processed one at a time.
205 mainpart = (nb_pkts & ((uint32_t)~N_PER_LOOP_MASK));
206 leftover = (nb_pkts & ((uint32_t)N_PER_LOOP_MASK));
207 for (i = 0; i < mainpart; i += N_PER_LOOP) {
208 /* Copy N mbuf pointers to the S/W ring */
209 for (j = 0; j < N_PER_LOOP; ++j)
210 (txep + i + j)->mbuf = *(pkts + i + j);
211 tx4(txdp + i, pkts + i);
214 if (unlikely(leftover > 0)) {
215 for (i = 0; i < leftover; ++i) {
216 (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
217 tx1(txdp + mainpart + i, pkts + mainpart + i);
222 static inline uint16_t
223 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
226 struct txgbe_tx_queue *txq = (struct txgbe_tx_queue *)tx_queue;
230 * Begin scanning the H/W ring for done descriptors when the
231 * number of available descriptors drops below tx_free_thresh. For
232 * each done descriptor, free the associated buffer.
234 if (txq->nb_tx_free < txq->tx_free_thresh)
235 txgbe_tx_free_bufs(txq);
237 /* Only use descriptors that are available */
238 nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
239 if (unlikely(nb_pkts == 0))
242 /* Use exactly nb_pkts descriptors */
243 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
246 * At this point, we know there are enough descriptors in the
247 * ring to transmit all the packets. This assumes that each
248 * mbuf contains a single segment, and that no new offloads
249 * are expected, which would require a new context descriptor.
253 * See if we're going to wrap-around. If so, handle the top
254 * of the descriptor ring first, then do the bottom. If not,
255 * the processing looks just like the "bottom" part anyway...
257 if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
258 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
259 txgbe_tx_fill_hw_ring(txq, tx_pkts, n);
263 /* Fill H/W descriptor ring with mbuf data */
264 txgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
265 txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
268 * Check for wrap-around. This would only happen if we used
269 * up to the last descriptor in the ring, no more, no less.
271 if (txq->tx_tail >= txq->nb_tx_desc)
274 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
275 (uint16_t)txq->port_id, (uint16_t)txq->queue_id,
276 (uint16_t)txq->tx_tail, (uint16_t)nb_pkts);
278 /* update tail pointer */
280 txgbe_set32_relaxed(txq->tdt_reg_addr, txq->tx_tail);
286 txgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
291 /* Try to transmit at least chunks of TX_MAX_BURST pkts */
292 if (likely(nb_pkts <= RTE_PMD_TXGBE_TX_MAX_BURST))
293 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
295 /* transmit more than the max burst, in chunks of TX_MAX_BURST */
300 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_TXGBE_TX_MAX_BURST);
301 ret = tx_xmit_pkts(tx_queue, &tx_pkts[nb_tx], n);
302 nb_tx = (uint16_t)(nb_tx + ret);
303 nb_pkts = (uint16_t)(nb_pkts - ret);
312 txgbe_set_xmit_ctx(struct txgbe_tx_queue *txq,
313 volatile struct txgbe_tx_ctx_desc *ctx_txd,
314 uint64_t ol_flags, union txgbe_tx_offload tx_offload)
316 union txgbe_tx_offload tx_offload_mask;
317 uint32_t type_tucmd_mlhl;
318 uint32_t mss_l4len_idx;
320 uint32_t vlan_macip_lens;
321 uint32_t tunnel_seed;
323 ctx_idx = txq->ctx_curr;
324 tx_offload_mask.data[0] = 0;
325 tx_offload_mask.data[1] = 0;
327 /* Specify which HW CTX to upload. */
328 mss_l4len_idx = TXGBE_TXD_IDX(ctx_idx);
329 type_tucmd_mlhl = TXGBE_TXD_CTXT;
331 tx_offload_mask.ptid |= ~0;
332 type_tucmd_mlhl |= TXGBE_TXD_PTID(tx_offload.ptid);
334 /* check if TCP segmentation required for this packet */
335 if (ol_flags & PKT_TX_TCP_SEG) {
336 tx_offload_mask.l2_len |= ~0;
337 tx_offload_mask.l3_len |= ~0;
338 tx_offload_mask.l4_len |= ~0;
339 tx_offload_mask.tso_segsz |= ~0;
340 mss_l4len_idx |= TXGBE_TXD_MSS(tx_offload.tso_segsz);
341 mss_l4len_idx |= TXGBE_TXD_L4LEN(tx_offload.l4_len);
342 } else { /* no TSO, check if hardware checksum is needed */
343 if (ol_flags & PKT_TX_IP_CKSUM) {
344 tx_offload_mask.l2_len |= ~0;
345 tx_offload_mask.l3_len |= ~0;
348 switch (ol_flags & PKT_TX_L4_MASK) {
349 case PKT_TX_UDP_CKSUM:
351 TXGBE_TXD_L4LEN(sizeof(struct rte_udp_hdr));
352 tx_offload_mask.l2_len |= ~0;
353 tx_offload_mask.l3_len |= ~0;
355 case PKT_TX_TCP_CKSUM:
357 TXGBE_TXD_L4LEN(sizeof(struct rte_tcp_hdr));
358 tx_offload_mask.l2_len |= ~0;
359 tx_offload_mask.l3_len |= ~0;
361 case PKT_TX_SCTP_CKSUM:
363 TXGBE_TXD_L4LEN(sizeof(struct rte_sctp_hdr));
364 tx_offload_mask.l2_len |= ~0;
365 tx_offload_mask.l3_len |= ~0;
372 vlan_macip_lens = TXGBE_TXD_IPLEN(tx_offload.l3_len >> 1);
374 if (ol_flags & PKT_TX_TUNNEL_MASK) {
375 tx_offload_mask.outer_tun_len |= ~0;
376 tx_offload_mask.outer_l2_len |= ~0;
377 tx_offload_mask.outer_l3_len |= ~0;
378 tx_offload_mask.l2_len |= ~0;
379 tunnel_seed = TXGBE_TXD_ETUNLEN(tx_offload.outer_tun_len >> 1);
380 tunnel_seed |= TXGBE_TXD_EIPLEN(tx_offload.outer_l3_len >> 2);
382 switch (ol_flags & PKT_TX_TUNNEL_MASK) {
383 case PKT_TX_TUNNEL_IPIP:
384 /* for non UDP / GRE tunneling, set to 0b */
386 case PKT_TX_TUNNEL_VXLAN:
387 case PKT_TX_TUNNEL_GENEVE:
388 tunnel_seed |= TXGBE_TXD_ETYPE_UDP;
390 case PKT_TX_TUNNEL_GRE:
391 tunnel_seed |= TXGBE_TXD_ETYPE_GRE;
394 PMD_TX_LOG(ERR, "Tunnel type not supported");
397 vlan_macip_lens |= TXGBE_TXD_MACLEN(tx_offload.outer_l2_len);
400 vlan_macip_lens |= TXGBE_TXD_MACLEN(tx_offload.l2_len);
403 if (ol_flags & PKT_TX_VLAN_PKT) {
404 tx_offload_mask.vlan_tci |= ~0;
405 vlan_macip_lens |= TXGBE_TXD_VLAN(tx_offload.vlan_tci);
408 txq->ctx_cache[ctx_idx].flags = ol_flags;
409 txq->ctx_cache[ctx_idx].tx_offload.data[0] =
410 tx_offload_mask.data[0] & tx_offload.data[0];
411 txq->ctx_cache[ctx_idx].tx_offload.data[1] =
412 tx_offload_mask.data[1] & tx_offload.data[1];
413 txq->ctx_cache[ctx_idx].tx_offload_mask = tx_offload_mask;
415 ctx_txd->dw0 = rte_cpu_to_le_32(vlan_macip_lens);
416 ctx_txd->dw1 = rte_cpu_to_le_32(tunnel_seed);
417 ctx_txd->dw2 = rte_cpu_to_le_32(type_tucmd_mlhl);
418 ctx_txd->dw3 = rte_cpu_to_le_32(mss_l4len_idx);
422 * Check which hardware context can be used. Use the existing match
423 * or create a new context descriptor.
425 static inline uint32_t
426 what_ctx_update(struct txgbe_tx_queue *txq, uint64_t flags,
427 union txgbe_tx_offload tx_offload)
429 /* If match with the current used context */
430 if (likely(txq->ctx_cache[txq->ctx_curr].flags == flags &&
431 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
432 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
433 & tx_offload.data[0])) &&
434 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
435 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
436 & tx_offload.data[1]))))
437 return txq->ctx_curr;
439 /* What if match with the next context */
441 if (likely(txq->ctx_cache[txq->ctx_curr].flags == flags &&
442 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
443 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
444 & tx_offload.data[0])) &&
445 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
446 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
447 & tx_offload.data[1]))))
448 return txq->ctx_curr;
450 /* Mismatch, use the previous context */
451 return TXGBE_CTX_NUM;
454 static inline uint32_t
455 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
459 if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM) {
461 tmp |= TXGBE_TXD_L4CS;
463 if (ol_flags & PKT_TX_IP_CKSUM) {
465 tmp |= TXGBE_TXD_IPCS;
467 if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
469 tmp |= TXGBE_TXD_EIPCS;
471 if (ol_flags & PKT_TX_TCP_SEG) {
473 /* implies IPv4 cksum */
474 if (ol_flags & PKT_TX_IPV4)
475 tmp |= TXGBE_TXD_IPCS;
476 tmp |= TXGBE_TXD_L4CS;
478 if (ol_flags & PKT_TX_VLAN_PKT)
484 static inline uint32_t
485 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
487 uint32_t cmdtype = 0;
489 if (ol_flags & PKT_TX_VLAN_PKT)
490 cmdtype |= TXGBE_TXD_VLE;
491 if (ol_flags & PKT_TX_TCP_SEG)
492 cmdtype |= TXGBE_TXD_TSE;
493 if (ol_flags & PKT_TX_MACSEC)
494 cmdtype |= TXGBE_TXD_LINKSEC;
498 static inline uint8_t
499 tx_desc_ol_flags_to_ptid(uint64_t oflags, uint32_t ptype)
504 return txgbe_encode_ptype(ptype);
506 /* Only support flags in TXGBE_TX_OFFLOAD_MASK */
507 tun = !!(oflags & PKT_TX_TUNNEL_MASK);
510 ptype = RTE_PTYPE_L2_ETHER;
511 if (oflags & PKT_TX_VLAN)
512 ptype |= RTE_PTYPE_L2_ETHER_VLAN;
515 if (oflags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IP_CKSUM))
516 ptype |= RTE_PTYPE_L3_IPV4;
517 else if (oflags & (PKT_TX_OUTER_IPV6))
518 ptype |= RTE_PTYPE_L3_IPV6;
520 if (oflags & (PKT_TX_IPV4 | PKT_TX_IP_CKSUM))
521 ptype |= (tun ? RTE_PTYPE_INNER_L3_IPV4 : RTE_PTYPE_L3_IPV4);
522 else if (oflags & (PKT_TX_IPV6))
523 ptype |= (tun ? RTE_PTYPE_INNER_L3_IPV6 : RTE_PTYPE_L3_IPV6);
526 switch (oflags & (PKT_TX_L4_MASK)) {
527 case PKT_TX_TCP_CKSUM:
528 ptype |= (tun ? RTE_PTYPE_INNER_L4_TCP : RTE_PTYPE_L4_TCP);
530 case PKT_TX_UDP_CKSUM:
531 ptype |= (tun ? RTE_PTYPE_INNER_L4_UDP : RTE_PTYPE_L4_UDP);
533 case PKT_TX_SCTP_CKSUM:
534 ptype |= (tun ? RTE_PTYPE_INNER_L4_SCTP : RTE_PTYPE_L4_SCTP);
538 if (oflags & PKT_TX_TCP_SEG)
539 ptype |= (tun ? RTE_PTYPE_INNER_L4_TCP : RTE_PTYPE_L4_TCP);
542 switch (oflags & PKT_TX_TUNNEL_MASK) {
543 case PKT_TX_TUNNEL_VXLAN:
544 ptype |= RTE_PTYPE_L2_ETHER |
546 RTE_PTYPE_TUNNEL_VXLAN;
547 ptype |= RTE_PTYPE_INNER_L2_ETHER;
549 case PKT_TX_TUNNEL_GRE:
550 ptype |= RTE_PTYPE_L2_ETHER |
552 RTE_PTYPE_TUNNEL_GRE;
553 ptype |= RTE_PTYPE_INNER_L2_ETHER;
555 case PKT_TX_TUNNEL_GENEVE:
556 ptype |= RTE_PTYPE_L2_ETHER |
558 RTE_PTYPE_TUNNEL_GENEVE;
559 ptype |= RTE_PTYPE_INNER_L2_ETHER;
561 case PKT_TX_TUNNEL_VXLAN_GPE:
562 ptype |= RTE_PTYPE_L2_ETHER |
564 RTE_PTYPE_TUNNEL_VXLAN_GPE;
565 ptype |= RTE_PTYPE_INNER_L2_ETHER;
567 case PKT_TX_TUNNEL_IPIP:
568 case PKT_TX_TUNNEL_IP:
569 ptype |= RTE_PTYPE_L2_ETHER |
575 return txgbe_encode_ptype(ptype);
578 #ifndef DEFAULT_TX_FREE_THRESH
579 #define DEFAULT_TX_FREE_THRESH 32
582 /* Reset transmit descriptors after they have been used */
584 txgbe_xmit_cleanup(struct txgbe_tx_queue *txq)
586 struct txgbe_tx_entry *sw_ring = txq->sw_ring;
587 volatile struct txgbe_tx_desc *txr = txq->tx_ring;
588 uint16_t last_desc_cleaned = txq->last_desc_cleaned;
589 uint16_t nb_tx_desc = txq->nb_tx_desc;
590 uint16_t desc_to_clean_to;
591 uint16_t nb_tx_to_clean;
594 /* Determine the last descriptor needing to be cleaned */
595 desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_free_thresh);
596 if (desc_to_clean_to >= nb_tx_desc)
597 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
599 /* Check to make sure the last descriptor to clean is done */
600 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
601 status = txr[desc_to_clean_to].dw3;
602 if (!(status & rte_cpu_to_le_32(TXGBE_TXD_DD))) {
603 PMD_TX_FREE_LOG(DEBUG,
604 "TX descriptor %4u is not done"
605 "(port=%d queue=%d)",
607 txq->port_id, txq->queue_id);
608 if (txq->nb_tx_free >> 1 < txq->tx_free_thresh)
609 txgbe_set32_masked(txq->tdc_reg_addr,
610 TXGBE_TXCFG_FLUSH, TXGBE_TXCFG_FLUSH);
611 /* Failed to clean any descriptors, better luck next time */
615 /* Figure out how many descriptors will be cleaned */
616 if (last_desc_cleaned > desc_to_clean_to)
617 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
620 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
623 PMD_TX_FREE_LOG(DEBUG,
624 "Cleaning %4u TX descriptors: %4u to %4u "
625 "(port=%d queue=%d)",
626 nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
627 txq->port_id, txq->queue_id);
630 * The last descriptor to clean is done, so that means all the
631 * descriptors from the last descriptor that was cleaned
632 * up to the last descriptor with the RS bit set
633 * are done. Only reset the threshold descriptor.
635 txr[desc_to_clean_to].dw3 = 0;
637 /* Update the txq to reflect the last descriptor that was cleaned */
638 txq->last_desc_cleaned = desc_to_clean_to;
639 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
645 static inline uint8_t
646 txgbe_get_tun_len(struct rte_mbuf *mbuf)
648 struct txgbe_genevehdr genevehdr;
649 const struct txgbe_genevehdr *gh;
652 switch (mbuf->ol_flags & PKT_TX_TUNNEL_MASK) {
653 case PKT_TX_TUNNEL_IPIP:
656 case PKT_TX_TUNNEL_VXLAN:
657 case PKT_TX_TUNNEL_VXLAN_GPE:
658 tun_len = sizeof(struct txgbe_udphdr)
659 + sizeof(struct txgbe_vxlanhdr);
661 case PKT_TX_TUNNEL_GRE:
662 tun_len = sizeof(struct txgbe_nvgrehdr);
664 case PKT_TX_TUNNEL_GENEVE:
665 gh = rte_pktmbuf_read(mbuf,
666 mbuf->outer_l2_len + mbuf->outer_l3_len,
667 sizeof(genevehdr), &genevehdr);
668 tun_len = sizeof(struct txgbe_udphdr)
669 + sizeof(struct txgbe_genevehdr)
670 + (gh->opt_len << 2);
680 txgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
683 struct txgbe_tx_queue *txq;
684 struct txgbe_tx_entry *sw_ring;
685 struct txgbe_tx_entry *txe, *txn;
686 volatile struct txgbe_tx_desc *txr;
687 volatile struct txgbe_tx_desc *txd;
688 struct rte_mbuf *tx_pkt;
689 struct rte_mbuf *m_seg;
690 uint64_t buf_dma_addr;
691 uint32_t olinfo_status;
692 uint32_t cmd_type_len;
703 union txgbe_tx_offload tx_offload;
705 tx_offload.data[0] = 0;
706 tx_offload.data[1] = 0;
708 sw_ring = txq->sw_ring;
710 tx_id = txq->tx_tail;
711 txe = &sw_ring[tx_id];
713 /* Determine if the descriptor ring needs to be cleaned. */
714 if (txq->nb_tx_free < txq->tx_free_thresh)
715 txgbe_xmit_cleanup(txq);
717 rte_prefetch0(&txe->mbuf->pool);
720 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
723 pkt_len = tx_pkt->pkt_len;
726 * Determine how many (if any) context descriptors
727 * are needed for offload functionality.
729 ol_flags = tx_pkt->ol_flags;
731 /* If hardware offload required */
732 tx_ol_req = ol_flags & TXGBE_TX_OFFLOAD_MASK;
734 tx_offload.ptid = tx_desc_ol_flags_to_ptid(tx_ol_req,
735 tx_pkt->packet_type);
736 tx_offload.l2_len = tx_pkt->l2_len;
737 tx_offload.l3_len = tx_pkt->l3_len;
738 tx_offload.l4_len = tx_pkt->l4_len;
739 tx_offload.vlan_tci = tx_pkt->vlan_tci;
740 tx_offload.tso_segsz = tx_pkt->tso_segsz;
741 tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
742 tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
743 tx_offload.outer_tun_len = txgbe_get_tun_len(tx_pkt);
745 /* If new context need be built or reuse the exist ctx*/
746 ctx = what_ctx_update(txq, tx_ol_req, tx_offload);
747 /* Only allocate context descriptor if required */
748 new_ctx = (ctx == TXGBE_CTX_NUM);
753 * Keep track of how many descriptors are used this loop
754 * This will always be the number of segments + the number of
755 * Context descriptors required to transmit the packet
757 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
760 * The number of descriptors that must be allocated for a
761 * packet is the number of segments of that packet, plus 1
762 * Context Descriptor for the hardware offload, if any.
763 * Determine the last TX descriptor to allocate in the TX ring
764 * for the packet, starting from the current position (tx_id)
767 tx_last = (uint16_t)(tx_id + nb_used - 1);
770 if (tx_last >= txq->nb_tx_desc)
771 tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
773 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
774 " tx_first=%u tx_last=%u",
775 (uint16_t)txq->port_id,
776 (uint16_t)txq->queue_id,
782 * Make sure there are enough TX descriptors available to
783 * transmit the entire packet.
784 * nb_used better be less than or equal to txq->tx_free_thresh
786 if (nb_used > txq->nb_tx_free) {
787 PMD_TX_FREE_LOG(DEBUG,
788 "Not enough free TX descriptors "
789 "nb_used=%4u nb_free=%4u "
790 "(port=%d queue=%d)",
791 nb_used, txq->nb_tx_free,
792 txq->port_id, txq->queue_id);
794 if (txgbe_xmit_cleanup(txq) != 0) {
795 /* Could not clean any descriptors */
801 /* nb_used better be <= txq->tx_free_thresh */
802 if (unlikely(nb_used > txq->tx_free_thresh)) {
803 PMD_TX_FREE_LOG(DEBUG,
804 "The number of descriptors needed to "
805 "transmit the packet exceeds the "
806 "RS bit threshold. This will impact "
808 "nb_used=%4u nb_free=%4u "
809 "tx_free_thresh=%4u. "
810 "(port=%d queue=%d)",
811 nb_used, txq->nb_tx_free,
813 txq->port_id, txq->queue_id);
815 * Loop here until there are enough TX
816 * descriptors or until the ring cannot be
819 while (nb_used > txq->nb_tx_free) {
820 if (txgbe_xmit_cleanup(txq) != 0) {
822 * Could not clean any
834 * By now there are enough free TX descriptors to transmit
839 * Set common flags of all TX Data Descriptors.
841 * The following bits must be set in all Data Descriptors:
842 * - TXGBE_TXD_DTYP_DATA
843 * - TXGBE_TXD_DCMD_DEXT
845 * The following bits must be set in the first Data Descriptor
846 * and are ignored in the other ones:
847 * - TXGBE_TXD_DCMD_IFCS
848 * - TXGBE_TXD_MAC_1588
849 * - TXGBE_TXD_DCMD_VLE
851 * The following bits must only be set in the last Data
853 * - TXGBE_TXD_CMD_EOP
855 * The following bits can be set in any Data Descriptor, but
856 * are only set in the last Data Descriptor:
859 cmd_type_len = TXGBE_TXD_FCS;
861 #ifdef RTE_LIBRTE_IEEE1588
862 if (ol_flags & PKT_TX_IEEE1588_TMST)
863 cmd_type_len |= TXGBE_TXD_1588;
868 if (ol_flags & PKT_TX_TCP_SEG) {
869 /* when TSO is on, paylen in descriptor is the
870 * not the packet len but the tcp payload len
872 pkt_len -= (tx_offload.l2_len +
873 tx_offload.l3_len + tx_offload.l4_len);
875 (tx_pkt->ol_flags & PKT_TX_TUNNEL_MASK)
876 ? tx_offload.outer_l2_len +
877 tx_offload.outer_l3_len : 0;
881 * Setup the TX Advanced Context Descriptor if required
884 volatile struct txgbe_tx_ctx_desc *ctx_txd;
886 ctx_txd = (volatile struct txgbe_tx_ctx_desc *)
889 txn = &sw_ring[txe->next_id];
890 rte_prefetch0(&txn->mbuf->pool);
892 if (txe->mbuf != NULL) {
893 rte_pktmbuf_free_seg(txe->mbuf);
897 txgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
900 txe->last_id = tx_last;
901 tx_id = txe->next_id;
906 * Setup the TX Advanced Data Descriptor,
907 * This path will go through
908 * whatever new/reuse the context descriptor
910 cmd_type_len |= tx_desc_ol_flags_to_cmdtype(ol_flags);
912 tx_desc_cksum_flags_to_olinfo(ol_flags);
913 olinfo_status |= TXGBE_TXD_IDX(ctx);
916 olinfo_status |= TXGBE_TXD_PAYLEN(pkt_len);
921 txn = &sw_ring[txe->next_id];
922 rte_prefetch0(&txn->mbuf->pool);
924 if (txe->mbuf != NULL)
925 rte_pktmbuf_free_seg(txe->mbuf);
929 * Set up Transmit Data Descriptor.
931 slen = m_seg->data_len;
932 buf_dma_addr = rte_mbuf_data_iova(m_seg);
933 txd->qw0 = rte_cpu_to_le_64(buf_dma_addr);
934 txd->dw2 = rte_cpu_to_le_32(cmd_type_len | slen);
935 txd->dw3 = rte_cpu_to_le_32(olinfo_status);
936 txe->last_id = tx_last;
937 tx_id = txe->next_id;
940 } while (m_seg != NULL);
943 * The last packet data descriptor needs End Of Packet (EOP)
945 cmd_type_len |= TXGBE_TXD_EOP;
946 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
948 txd->dw2 |= rte_cpu_to_le_32(cmd_type_len);
956 * Set the Transmit Descriptor Tail (TDT)
958 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
959 (uint16_t)txq->port_id, (uint16_t)txq->queue_id,
960 (uint16_t)tx_id, (uint16_t)nb_tx);
961 txgbe_set32_relaxed(txq->tdt_reg_addr, tx_id);
962 txq->tx_tail = tx_id;
967 /*********************************************************************
971 **********************************************************************/
973 txgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
978 struct txgbe_tx_queue *txq = (struct txgbe_tx_queue *)tx_queue;
980 for (i = 0; i < nb_pkts; i++) {
982 ol_flags = m->ol_flags;
985 * Check if packet meets requirements for number of segments
987 * NOTE: for txgbe it's always (40 - WTHRESH) for both TSO and
991 if (m->nb_segs > TXGBE_TX_MAX_SEG - txq->wthresh) {
996 if (ol_flags & TXGBE_TX_OFFLOAD_NOTSUP_MASK) {
997 rte_errno = -ENOTSUP;
1001 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1002 ret = rte_validate_tx_offload(m);
1008 ret = rte_net_intel_cksum_prepare(m);
1018 /*********************************************************************
1022 **********************************************************************/
1023 /* @note: fix txgbe_dev_supported_ptypes_get() if any change here. */
1024 static inline uint32_t
1025 txgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptid_mask)
1027 uint16_t ptid = TXGBE_RXD_PTID(pkt_info);
1031 return txgbe_decode_ptype(ptid);
1034 static inline uint64_t
1035 txgbe_rxd_pkt_info_to_pkt_flags(uint32_t pkt_info)
1037 static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1038 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1039 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1040 PKT_RX_RSS_HASH, 0, 0, 0,
1041 0, 0, 0, PKT_RX_FDIR,
1043 #ifdef RTE_LIBRTE_IEEE1588
1044 static uint64_t ip_pkt_etqf_map[8] = {
1045 0, 0, 0, PKT_RX_IEEE1588_PTP,
1048 int etfid = txgbe_etflt_id(TXGBE_RXD_PTID(pkt_info));
1049 if (likely(-1 != etfid))
1050 return ip_pkt_etqf_map[etfid] |
1051 ip_rss_types_map[TXGBE_RXD_RSSTYPE(pkt_info)];
1053 return ip_rss_types_map[TXGBE_RXD_RSSTYPE(pkt_info)];
1055 return ip_rss_types_map[TXGBE_RXD_RSSTYPE(pkt_info)];
1059 static inline uint64_t
1060 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1065 * Check if VLAN present only.
1066 * Do not check whether L3/L4 rx checksum done by NIC or not,
1067 * That can be found from rte_eth_rxmode.offloads flag
1069 pkt_flags = (rx_status & TXGBE_RXD_STAT_VLAN &&
1070 vlan_flags & PKT_RX_VLAN_STRIPPED)
1073 #ifdef RTE_LIBRTE_IEEE1588
1074 if (rx_status & TXGBE_RXD_STAT_1588)
1075 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1080 static inline uint64_t
1081 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1083 uint64_t pkt_flags = 0;
1085 /* checksum offload can't be disabled */
1086 if (rx_status & TXGBE_RXD_STAT_IPCS) {
1087 pkt_flags |= (rx_status & TXGBE_RXD_ERR_IPCS
1088 ? PKT_RX_IP_CKSUM_BAD : PKT_RX_IP_CKSUM_GOOD);
1091 if (rx_status & TXGBE_RXD_STAT_L4CS) {
1092 pkt_flags |= (rx_status & TXGBE_RXD_ERR_L4CS
1093 ? PKT_RX_L4_CKSUM_BAD : PKT_RX_L4_CKSUM_GOOD);
1096 if (rx_status & TXGBE_RXD_STAT_EIPCS &&
1097 rx_status & TXGBE_RXD_ERR_EIPCS) {
1098 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1105 * LOOK_AHEAD defines how many desc statuses to check beyond the
1106 * current descriptor.
1107 * It must be a pound define for optimal performance.
1108 * Do not change the value of LOOK_AHEAD, as the txgbe_rx_scan_hw_ring
1109 * function only works with LOOK_AHEAD=8.
1111 #define LOOK_AHEAD 8
1112 #if (LOOK_AHEAD != 8)
1113 #error "PMD TXGBE: LOOK_AHEAD must be 8\n"
1116 txgbe_rx_scan_hw_ring(struct txgbe_rx_queue *rxq)
1118 volatile struct txgbe_rx_desc *rxdp;
1119 struct txgbe_rx_entry *rxep;
1120 struct rte_mbuf *mb;
1124 uint32_t s[LOOK_AHEAD];
1125 uint32_t pkt_info[LOOK_AHEAD];
1126 int i, j, nb_rx = 0;
1129 /* get references to current descriptor and S/W ring entry */
1130 rxdp = &rxq->rx_ring[rxq->rx_tail];
1131 rxep = &rxq->sw_ring[rxq->rx_tail];
1133 status = rxdp->qw1.lo.status;
1134 /* check to make sure there is at least 1 packet to receive */
1135 if (!(status & rte_cpu_to_le_32(TXGBE_RXD_STAT_DD)))
1139 * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1140 * reference packets that are ready to be received.
1142 for (i = 0; i < RTE_PMD_TXGBE_RX_MAX_BURST;
1143 i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1144 /* Read desc statuses backwards to avoid race condition */
1145 for (j = 0; j < LOOK_AHEAD; j++)
1146 s[j] = rte_le_to_cpu_32(rxdp[j].qw1.lo.status);
1150 /* Compute how many status bits were set */
1151 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1152 (s[nb_dd] & TXGBE_RXD_STAT_DD); nb_dd++)
1155 for (j = 0; j < nb_dd; j++)
1156 pkt_info[j] = rte_le_to_cpu_32(rxdp[j].qw0.dw0);
1160 /* Translate descriptor info to mbuf format */
1161 for (j = 0; j < nb_dd; ++j) {
1163 pkt_len = rte_le_to_cpu_16(rxdp[j].qw1.hi.len) -
1165 mb->data_len = pkt_len;
1166 mb->pkt_len = pkt_len;
1167 mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].qw1.hi.tag);
1169 /* convert descriptor fields to rte mbuf flags */
1170 pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1172 pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1174 txgbe_rxd_pkt_info_to_pkt_flags(pkt_info[j]);
1175 mb->ol_flags = pkt_flags;
1177 txgbe_rxd_pkt_info_to_pkt_type(pkt_info[j],
1178 rxq->pkt_type_mask);
1180 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1182 rte_le_to_cpu_32(rxdp[j].qw0.dw1);
1183 else if (pkt_flags & PKT_RX_FDIR) {
1184 mb->hash.fdir.hash =
1185 rte_le_to_cpu_16(rxdp[j].qw0.hi.csum) &
1186 TXGBE_ATR_HASH_MASK;
1188 rte_le_to_cpu_16(rxdp[j].qw0.hi.ipid);
1192 /* Move mbuf pointers from the S/W ring to the stage */
1193 for (j = 0; j < LOOK_AHEAD; ++j)
1194 rxq->rx_stage[i + j] = rxep[j].mbuf;
1196 /* stop if all requested packets could not be received */
1197 if (nb_dd != LOOK_AHEAD)
1201 /* clear software ring entries so we can cleanup correctly */
1202 for (i = 0; i < nb_rx; ++i)
1203 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1209 txgbe_rx_alloc_bufs(struct txgbe_rx_queue *rxq, bool reset_mbuf)
1211 volatile struct txgbe_rx_desc *rxdp;
1212 struct txgbe_rx_entry *rxep;
1213 struct rte_mbuf *mb;
1218 /* allocate buffers in bulk directly into the S/W ring */
1219 alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1220 rxep = &rxq->sw_ring[alloc_idx];
1221 diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1222 rxq->rx_free_thresh);
1223 if (unlikely(diag != 0))
1226 rxdp = &rxq->rx_ring[alloc_idx];
1227 for (i = 0; i < rxq->rx_free_thresh; ++i) {
1228 /* populate the static rte mbuf fields */
1231 mb->port = rxq->port_id;
1233 rte_mbuf_refcnt_set(mb, 1);
1234 mb->data_off = RTE_PKTMBUF_HEADROOM;
1236 /* populate the descriptors */
1237 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1238 TXGBE_RXD_HDRADDR(&rxdp[i], 0);
1239 TXGBE_RXD_PKTADDR(&rxdp[i], dma_addr);
1242 /* update state of internal queue structure */
1243 rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1244 if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1245 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1251 static inline uint16_t
1252 txgbe_rx_fill_from_stage(struct txgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1255 struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1258 /* how many packets are ready to return? */
1259 nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1261 /* copy mbuf pointers to the application's packet list */
1262 for (i = 0; i < nb_pkts; ++i)
1263 rx_pkts[i] = stage[i];
1265 /* update internal queue state */
1266 rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1267 rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1272 static inline uint16_t
1273 txgbe_rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1276 struct txgbe_rx_queue *rxq = (struct txgbe_rx_queue *)rx_queue;
1277 struct rte_eth_dev *dev = &rte_eth_devices[rxq->port_id];
1280 /* Any previously recv'd pkts will be returned from the Rx stage */
1281 if (rxq->rx_nb_avail)
1282 return txgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1284 /* Scan the H/W ring for packets to receive */
1285 nb_rx = (uint16_t)txgbe_rx_scan_hw_ring(rxq);
1287 /* update internal queue state */
1288 rxq->rx_next_avail = 0;
1289 rxq->rx_nb_avail = nb_rx;
1290 rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1292 /* if required, allocate new buffers to replenish descriptors */
1293 if (rxq->rx_tail > rxq->rx_free_trigger) {
1294 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1296 if (txgbe_rx_alloc_bufs(rxq, true) != 0) {
1299 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1300 "queue_id=%u", (uint16_t)rxq->port_id,
1301 (uint16_t)rxq->queue_id);
1303 dev->data->rx_mbuf_alloc_failed +=
1304 rxq->rx_free_thresh;
1307 * Need to rewind any previous receives if we cannot
1308 * allocate new buffers to replenish the old ones.
1310 rxq->rx_nb_avail = 0;
1311 rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1312 for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1313 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1318 /* update tail pointer */
1320 txgbe_set32_relaxed(rxq->rdt_reg_addr, cur_free_trigger);
1323 if (rxq->rx_tail >= rxq->nb_rx_desc)
1326 /* received any packets this loop? */
1327 if (rxq->rx_nb_avail)
1328 return txgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1333 /* split requests into chunks of size RTE_PMD_TXGBE_RX_MAX_BURST */
1335 txgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1340 if (unlikely(nb_pkts == 0))
1343 if (likely(nb_pkts <= RTE_PMD_TXGBE_RX_MAX_BURST))
1344 return txgbe_rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1346 /* request is relatively large, chunk it up */
1351 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_TXGBE_RX_MAX_BURST);
1352 ret = txgbe_rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1353 nb_rx = (uint16_t)(nb_rx + ret);
1354 nb_pkts = (uint16_t)(nb_pkts - ret);
1363 txgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1366 struct txgbe_rx_queue *rxq;
1367 volatile struct txgbe_rx_desc *rx_ring;
1368 volatile struct txgbe_rx_desc *rxdp;
1369 struct txgbe_rx_entry *sw_ring;
1370 struct txgbe_rx_entry *rxe;
1371 struct rte_mbuf *rxm;
1372 struct rte_mbuf *nmb;
1373 struct txgbe_rx_desc rxd;
1386 rx_id = rxq->rx_tail;
1387 rx_ring = rxq->rx_ring;
1388 sw_ring = rxq->sw_ring;
1389 struct rte_eth_dev *dev = &rte_eth_devices[rxq->port_id];
1390 while (nb_rx < nb_pkts) {
1392 * The order of operations here is important as the DD status
1393 * bit must not be read after any other descriptor fields.
1394 * rx_ring and rxdp are pointing to volatile data so the order
1395 * of accesses cannot be reordered by the compiler. If they were
1396 * not volatile, they could be reordered which could lead to
1397 * using invalid descriptor fields when read from rxd.
1399 rxdp = &rx_ring[rx_id];
1400 staterr = rxdp->qw1.lo.status;
1401 if (!(staterr & rte_cpu_to_le_32(TXGBE_RXD_STAT_DD)))
1408 * If the TXGBE_RXD_STAT_EOP flag is not set, the RX packet
1409 * is likely to be invalid and to be dropped by the various
1410 * validation checks performed by the network stack.
1412 * Allocate a new mbuf to replenish the RX ring descriptor.
1413 * If the allocation fails:
1414 * - arrange for that RX descriptor to be the first one
1415 * being parsed the next time the receive function is
1416 * invoked [on the same queue].
1418 * - Stop parsing the RX ring and return immediately.
1420 * This policy do not drop the packet received in the RX
1421 * descriptor for which the allocation of a new mbuf failed.
1422 * Thus, it allows that packet to be later retrieved if
1423 * mbuf have been freed in the mean time.
1424 * As a side effect, holding RX descriptors instead of
1425 * systematically giving them back to the NIC may lead to
1426 * RX ring exhaustion situations.
1427 * However, the NIC can gracefully prevent such situations
1428 * to happen by sending specific "back-pressure" flow control
1429 * frames to its peer(s).
1431 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1432 "ext_err_stat=0x%08x pkt_len=%u",
1433 (uint16_t)rxq->port_id, (uint16_t)rxq->queue_id,
1434 (uint16_t)rx_id, (uint32_t)staterr,
1435 (uint16_t)rte_le_to_cpu_16(rxd.qw1.hi.len));
1437 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1439 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1440 "queue_id=%u", (uint16_t)rxq->port_id,
1441 (uint16_t)rxq->queue_id);
1442 dev->data->rx_mbuf_alloc_failed++;
1447 rxe = &sw_ring[rx_id];
1449 if (rx_id == rxq->nb_rx_desc)
1452 /* Prefetch next mbuf while processing current one. */
1453 rte_txgbe_prefetch(sw_ring[rx_id].mbuf);
1456 * When next RX descriptor is on a cache-line boundary,
1457 * prefetch the next 4 RX descriptors and the next 8 pointers
1460 if ((rx_id & 0x3) == 0) {
1461 rte_txgbe_prefetch(&rx_ring[rx_id]);
1462 rte_txgbe_prefetch(&sw_ring[rx_id]);
1467 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1468 TXGBE_RXD_HDRADDR(rxdp, 0);
1469 TXGBE_RXD_PKTADDR(rxdp, dma_addr);
1472 * Initialize the returned mbuf.
1473 * 1) setup generic mbuf fields:
1474 * - number of segments,
1477 * - RX port identifier.
1478 * 2) integrate hardware offload data, if any:
1479 * - RSS flag & hash,
1480 * - IP checksum flag,
1481 * - VLAN TCI, if any,
1484 pkt_len = (uint16_t)(rte_le_to_cpu_16(rxd.qw1.hi.len) -
1486 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1487 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1490 rxm->pkt_len = pkt_len;
1491 rxm->data_len = pkt_len;
1492 rxm->port = rxq->port_id;
1494 pkt_info = rte_le_to_cpu_32(rxd.qw0.dw0);
1495 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1496 rxm->vlan_tci = rte_le_to_cpu_16(rxd.qw1.hi.tag);
1498 pkt_flags = rx_desc_status_to_pkt_flags(staterr,
1500 pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1501 pkt_flags |= txgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1502 rxm->ol_flags = pkt_flags;
1503 rxm->packet_type = txgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1504 rxq->pkt_type_mask);
1506 if (likely(pkt_flags & PKT_RX_RSS_HASH)) {
1507 rxm->hash.rss = rte_le_to_cpu_32(rxd.qw0.dw1);
1508 } else if (pkt_flags & PKT_RX_FDIR) {
1509 rxm->hash.fdir.hash =
1510 rte_le_to_cpu_16(rxd.qw0.hi.csum) &
1511 TXGBE_ATR_HASH_MASK;
1512 rxm->hash.fdir.id = rte_le_to_cpu_16(rxd.qw0.hi.ipid);
1515 * Store the mbuf address into the next entry of the array
1516 * of returned packets.
1518 rx_pkts[nb_rx++] = rxm;
1520 rxq->rx_tail = rx_id;
1523 * If the number of free RX descriptors is greater than the RX free
1524 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1526 * Update the RDT with the value of the last processed RX descriptor
1527 * minus 1, to guarantee that the RDT register is never equal to the
1528 * RDH register, which creates a "full" ring situation from the
1529 * hardware point of view...
1531 nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
1532 if (nb_hold > rxq->rx_free_thresh) {
1533 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1534 "nb_hold=%u nb_rx=%u",
1535 (uint16_t)rxq->port_id, (uint16_t)rxq->queue_id,
1536 (uint16_t)rx_id, (uint16_t)nb_hold,
1538 rx_id = (uint16_t)((rx_id == 0) ?
1539 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1540 txgbe_set32(rxq->rdt_reg_addr, rx_id);
1543 rxq->nb_rx_hold = nb_hold;
1548 * txgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1550 * Fill the following info in the HEAD buffer of the Rx cluster:
1551 * - RX port identifier
1552 * - hardware offload data, if any:
1554 * - IP checksum flag
1555 * - VLAN TCI, if any
1557 * @head HEAD of the packet cluster
1558 * @desc HW descriptor to get data from
1559 * @rxq Pointer to the Rx queue
1562 txgbe_fill_cluster_head_buf(struct rte_mbuf *head, struct txgbe_rx_desc *desc,
1563 struct txgbe_rx_queue *rxq, uint32_t staterr)
1568 head->port = rxq->port_id;
1570 /* The vlan_tci field is only valid when PKT_RX_VLAN is
1571 * set in the pkt_flags field.
1573 head->vlan_tci = rte_le_to_cpu_16(desc->qw1.hi.tag);
1574 pkt_info = rte_le_to_cpu_32(desc->qw0.dw0);
1575 pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1576 pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1577 pkt_flags |= txgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1578 head->ol_flags = pkt_flags;
1579 head->packet_type = txgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1580 rxq->pkt_type_mask);
1582 if (likely(pkt_flags & PKT_RX_RSS_HASH)) {
1583 head->hash.rss = rte_le_to_cpu_32(desc->qw0.dw1);
1584 } else if (pkt_flags & PKT_RX_FDIR) {
1585 head->hash.fdir.hash = rte_le_to_cpu_16(desc->qw0.hi.csum)
1586 & TXGBE_ATR_HASH_MASK;
1587 head->hash.fdir.id = rte_le_to_cpu_16(desc->qw0.hi.ipid);
1592 * txgbe_recv_pkts_lro - receive handler for and LRO case.
1594 * @rx_queue Rx queue handle
1595 * @rx_pkts table of received packets
1596 * @nb_pkts size of rx_pkts table
1597 * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1599 * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1600 * additional ring of txgbe_rsc_entry's that will hold the relevant RSC info.
1602 * We use the same logic as in Linux and in FreeBSD txgbe drivers:
1603 * 1) When non-EOP RSC completion arrives:
1604 * a) Update the HEAD of the current RSC aggregation cluster with the new
1605 * segment's data length.
1606 * b) Set the "next" pointer of the current segment to point to the segment
1607 * at the NEXTP index.
1608 * c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1609 * in the sw_rsc_ring.
1610 * 2) When EOP arrives we just update the cluster's total length and offload
1611 * flags and deliver the cluster up to the upper layers. In our case - put it
1612 * in the rx_pkts table.
1614 * Returns the number of received packets/clusters (according to the "bulk
1615 * receive" interface).
1617 static inline uint16_t
1618 txgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1621 struct txgbe_rx_queue *rxq = rx_queue;
1622 struct rte_eth_dev *dev = &rte_eth_devices[rxq->port_id];
1623 volatile struct txgbe_rx_desc *rx_ring = rxq->rx_ring;
1624 struct txgbe_rx_entry *sw_ring = rxq->sw_ring;
1625 struct txgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1626 uint16_t rx_id = rxq->rx_tail;
1628 uint16_t nb_hold = rxq->nb_rx_hold;
1629 uint16_t prev_id = rxq->rx_tail;
1631 while (nb_rx < nb_pkts) {
1633 struct txgbe_rx_entry *rxe;
1634 struct txgbe_scattered_rx_entry *sc_entry;
1635 struct txgbe_scattered_rx_entry *next_sc_entry = NULL;
1636 struct txgbe_rx_entry *next_rxe = NULL;
1637 struct rte_mbuf *first_seg;
1638 struct rte_mbuf *rxm;
1639 struct rte_mbuf *nmb = NULL;
1640 struct txgbe_rx_desc rxd;
1643 volatile struct txgbe_rx_desc *rxdp;
1648 * The code in this whole file uses the volatile pointer to
1649 * ensure the read ordering of the status and the rest of the
1650 * descriptor fields (on the compiler level only!!!). This is so
1651 * UGLY - why not to just use the compiler barrier instead? DPDK
1652 * even has the rte_compiler_barrier() for that.
1654 * But most importantly this is just wrong because this doesn't
1655 * ensure memory ordering in a general case at all. For
1656 * instance, DPDK is supposed to work on Power CPUs where
1657 * compiler barrier may just not be enough!
1659 * I tried to write only this function properly to have a
1660 * starting point (as a part of an LRO/RSC series) but the
1661 * compiler cursed at me when I tried to cast away the
1662 * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1663 * keeping it the way it is for now.
1665 * The code in this file is broken in so many other places and
1666 * will just not work on a big endian CPU anyway therefore the
1667 * lines below will have to be revisited together with the rest
1671 * - Get rid of "volatile" and let the compiler do its job.
1672 * - Use the proper memory barrier (rte_rmb()) to ensure the
1673 * memory ordering below.
1675 rxdp = &rx_ring[rx_id];
1676 staterr = rte_le_to_cpu_32(rxdp->qw1.lo.status);
1678 if (!(staterr & TXGBE_RXD_STAT_DD))
1683 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1684 "staterr=0x%x data_len=%u",
1685 rxq->port_id, rxq->queue_id, rx_id, staterr,
1686 rte_le_to_cpu_16(rxd.qw1.hi.len));
1689 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1691 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1692 "port_id=%u queue_id=%u",
1693 rxq->port_id, rxq->queue_id);
1695 dev->data->rx_mbuf_alloc_failed++;
1698 } else if (nb_hold > rxq->rx_free_thresh) {
1699 uint16_t next_rdt = rxq->rx_free_trigger;
1701 if (!txgbe_rx_alloc_bufs(rxq, false)) {
1703 txgbe_set32_relaxed(rxq->rdt_reg_addr,
1705 nb_hold -= rxq->rx_free_thresh;
1707 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1708 "port_id=%u queue_id=%u",
1709 rxq->port_id, rxq->queue_id);
1711 dev->data->rx_mbuf_alloc_failed++;
1717 rxe = &sw_ring[rx_id];
1718 eop = staterr & TXGBE_RXD_STAT_EOP;
1720 next_id = rx_id + 1;
1721 if (next_id == rxq->nb_rx_desc)
1724 /* Prefetch next mbuf while processing current one. */
1725 rte_txgbe_prefetch(sw_ring[next_id].mbuf);
1728 * When next RX descriptor is on a cache-line boundary,
1729 * prefetch the next 4 RX descriptors and the next 4 pointers
1732 if ((next_id & 0x3) == 0) {
1733 rte_txgbe_prefetch(&rx_ring[next_id]);
1734 rte_txgbe_prefetch(&sw_ring[next_id]);
1741 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1743 * Update RX descriptor with the physical address of the
1744 * new data buffer of the new allocated mbuf.
1748 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1749 TXGBE_RXD_HDRADDR(rxdp, 0);
1750 TXGBE_RXD_PKTADDR(rxdp, dma);
1756 * Set data length & data buffer address of mbuf.
1758 data_len = rte_le_to_cpu_16(rxd.qw1.hi.len);
1759 rxm->data_len = data_len;
1764 * Get next descriptor index:
1765 * - For RSC it's in the NEXTP field.
1766 * - For a scattered packet - it's just a following
1769 if (TXGBE_RXD_RSCCNT(rxd.qw0.dw0))
1770 nextp_id = TXGBE_RXD_NEXTP(staterr);
1774 next_sc_entry = &sw_sc_ring[nextp_id];
1775 next_rxe = &sw_ring[nextp_id];
1776 rte_txgbe_prefetch(next_rxe);
1779 sc_entry = &sw_sc_ring[rx_id];
1780 first_seg = sc_entry->fbuf;
1781 sc_entry->fbuf = NULL;
1784 * If this is the first buffer of the received packet,
1785 * set the pointer to the first mbuf of the packet and
1786 * initialize its context.
1787 * Otherwise, update the total length and the number of segments
1788 * of the current scattered packet, and update the pointer to
1789 * the last mbuf of the current packet.
1791 if (first_seg == NULL) {
1793 first_seg->pkt_len = data_len;
1794 first_seg->nb_segs = 1;
1796 first_seg->pkt_len += data_len;
1797 first_seg->nb_segs++;
1804 * If this is not the last buffer of the received packet, update
1805 * the pointer to the first mbuf at the NEXTP entry in the
1806 * sw_sc_ring and continue to parse the RX ring.
1808 if (!eop && next_rxe) {
1809 rxm->next = next_rxe->mbuf;
1810 next_sc_entry->fbuf = first_seg;
1814 /* Initialize the first mbuf of the returned packet */
1815 txgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
1818 * Deal with the case, when HW CRC srip is disabled.
1819 * That can't happen when LRO is enabled, but still could
1820 * happen for scattered RX mode.
1822 first_seg->pkt_len -= rxq->crc_len;
1823 if (unlikely(rxm->data_len <= rxq->crc_len)) {
1824 struct rte_mbuf *lp;
1826 for (lp = first_seg; lp->next != rxm; lp = lp->next)
1829 first_seg->nb_segs--;
1830 lp->data_len -= rxq->crc_len - rxm->data_len;
1832 rte_pktmbuf_free_seg(rxm);
1834 rxm->data_len -= rxq->crc_len;
1837 /* Prefetch data of first segment, if configured to do so. */
1838 rte_packet_prefetch((char *)first_seg->buf_addr +
1839 first_seg->data_off);
1842 * Store the mbuf address into the next entry of the array
1843 * of returned packets.
1845 rx_pkts[nb_rx++] = first_seg;
1849 * Record index of the next RX descriptor to probe.
1851 rxq->rx_tail = rx_id;
1854 * If the number of free RX descriptors is greater than the RX free
1855 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1857 * Update the RDT with the value of the last processed RX descriptor
1858 * minus 1, to guarantee that the RDT register is never equal to the
1859 * RDH register, which creates a "full" ring situation from the
1860 * hardware point of view...
1862 if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
1863 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1864 "nb_hold=%u nb_rx=%u",
1865 rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
1868 txgbe_set32_relaxed(rxq->rdt_reg_addr, prev_id);
1872 rxq->nb_rx_hold = nb_hold;
1877 txgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1880 return txgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
1884 txgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1887 return txgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
1891 txgbe_get_rx_queue_offloads(struct rte_eth_dev *dev __rte_unused)
1893 return DEV_RX_OFFLOAD_VLAN_STRIP;
1897 txgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
1900 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
1901 struct rte_eth_dev_sriov *sriov = &RTE_ETH_DEV_SRIOV(dev);
1903 offloads = DEV_RX_OFFLOAD_IPV4_CKSUM |
1904 DEV_RX_OFFLOAD_UDP_CKSUM |
1905 DEV_RX_OFFLOAD_TCP_CKSUM |
1906 DEV_RX_OFFLOAD_KEEP_CRC |
1907 DEV_RX_OFFLOAD_JUMBO_FRAME |
1908 DEV_RX_OFFLOAD_VLAN_FILTER |
1909 DEV_RX_OFFLOAD_RSS_HASH |
1910 DEV_RX_OFFLOAD_SCATTER;
1912 if (!txgbe_is_vf(dev))
1913 offloads |= (DEV_RX_OFFLOAD_VLAN_FILTER |
1914 DEV_RX_OFFLOAD_QINQ_STRIP |
1915 DEV_RX_OFFLOAD_VLAN_EXTEND);
1918 * RSC is only supported by PF devices in a non-SR-IOV
1921 if (hw->mac.type == txgbe_mac_raptor && !sriov->active)
1922 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
1924 if (hw->mac.type == txgbe_mac_raptor)
1925 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
1927 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
1932 static void __rte_cold
1933 txgbe_tx_queue_release_mbufs(struct txgbe_tx_queue *txq)
1937 if (txq->sw_ring != NULL) {
1938 for (i = 0; i < txq->nb_tx_desc; i++) {
1939 if (txq->sw_ring[i].mbuf != NULL) {
1940 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1941 txq->sw_ring[i].mbuf = NULL;
1947 static void __rte_cold
1948 txgbe_tx_free_swring(struct txgbe_tx_queue *txq)
1951 txq->sw_ring != NULL)
1952 rte_free(txq->sw_ring);
1955 static void __rte_cold
1956 txgbe_tx_queue_release(struct txgbe_tx_queue *txq)
1958 if (txq != NULL && txq->ops != NULL) {
1959 txq->ops->release_mbufs(txq);
1960 txq->ops->free_swring(txq);
1966 txgbe_dev_tx_queue_release(void *txq)
1968 txgbe_tx_queue_release(txq);
1971 /* (Re)set dynamic txgbe_tx_queue fields to defaults */
1972 static void __rte_cold
1973 txgbe_reset_tx_queue(struct txgbe_tx_queue *txq)
1975 static const struct txgbe_tx_desc zeroed_desc = {0};
1976 struct txgbe_tx_entry *txe = txq->sw_ring;
1979 /* Zero out HW ring memory */
1980 for (i = 0; i < txq->nb_tx_desc; i++)
1981 txq->tx_ring[i] = zeroed_desc;
1983 /* Initialize SW ring entries */
1984 prev = (uint16_t)(txq->nb_tx_desc - 1);
1985 for (i = 0; i < txq->nb_tx_desc; i++) {
1986 volatile struct txgbe_tx_desc *txd = &txq->tx_ring[i];
1988 txd->dw3 = rte_cpu_to_le_32(TXGBE_TXD_DD);
1991 txe[prev].next_id = i;
1995 txq->tx_next_dd = (uint16_t)(txq->tx_free_thresh - 1);
1999 * Always allow 1 descriptor to be un-allocated to avoid
2000 * a H/W race condition
2002 txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2003 txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2005 memset((void *)&txq->ctx_cache, 0,
2006 TXGBE_CTX_NUM * sizeof(struct txgbe_ctx_info));
2009 static const struct txgbe_txq_ops def_txq_ops = {
2010 .release_mbufs = txgbe_tx_queue_release_mbufs,
2011 .free_swring = txgbe_tx_free_swring,
2012 .reset = txgbe_reset_tx_queue,
2015 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2016 * the queue parameters. Used in tx_queue_setup by primary process and then
2017 * in dev_init by secondary process when attaching to an existing ethdev.
2020 txgbe_set_tx_function(struct rte_eth_dev *dev, struct txgbe_tx_queue *txq)
2022 /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2023 if (txq->offloads == 0 &&
2024 txq->tx_free_thresh >= RTE_PMD_TXGBE_TX_MAX_BURST) {
2025 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2026 dev->tx_pkt_burst = txgbe_xmit_pkts_simple;
2027 dev->tx_pkt_prepare = NULL;
2029 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2031 " - offloads = 0x%" PRIx64,
2034 " - tx_free_thresh = %lu [RTE_PMD_TXGBE_TX_MAX_BURST=%lu]",
2035 (unsigned long)txq->tx_free_thresh,
2036 (unsigned long)RTE_PMD_TXGBE_TX_MAX_BURST);
2037 dev->tx_pkt_burst = txgbe_xmit_pkts;
2038 dev->tx_pkt_prepare = txgbe_prep_pkts;
2043 txgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2051 txgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2053 uint64_t tx_offload_capa;
2056 DEV_TX_OFFLOAD_VLAN_INSERT |
2057 DEV_TX_OFFLOAD_IPV4_CKSUM |
2058 DEV_TX_OFFLOAD_UDP_CKSUM |
2059 DEV_TX_OFFLOAD_TCP_CKSUM |
2060 DEV_TX_OFFLOAD_SCTP_CKSUM |
2061 DEV_TX_OFFLOAD_TCP_TSO |
2062 DEV_TX_OFFLOAD_UDP_TSO |
2063 DEV_TX_OFFLOAD_UDP_TNL_TSO |
2064 DEV_TX_OFFLOAD_IP_TNL_TSO |
2065 DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
2066 DEV_TX_OFFLOAD_GRE_TNL_TSO |
2067 DEV_TX_OFFLOAD_IPIP_TNL_TSO |
2068 DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
2069 DEV_TX_OFFLOAD_MULTI_SEGS;
2071 if (!txgbe_is_vf(dev))
2072 tx_offload_capa |= DEV_TX_OFFLOAD_QINQ_INSERT;
2074 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2076 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2078 return tx_offload_capa;
2082 txgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2085 unsigned int socket_id,
2086 const struct rte_eth_txconf *tx_conf)
2088 const struct rte_memzone *tz;
2089 struct txgbe_tx_queue *txq;
2090 struct txgbe_hw *hw;
2091 uint16_t tx_free_thresh;
2094 PMD_INIT_FUNC_TRACE();
2095 hw = TXGBE_DEV_HW(dev);
2097 offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2100 * Validate number of transmit descriptors.
2101 * It must not exceed hardware maximum, and must be multiple
2104 if (nb_desc % TXGBE_TXD_ALIGN != 0 ||
2105 nb_desc > TXGBE_RING_DESC_MAX ||
2106 nb_desc < TXGBE_RING_DESC_MIN) {
2111 * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2112 * descriptors are used or if the number of descriptors required
2113 * to transmit a packet is greater than the number of free TX
2115 * One descriptor in the TX ring is used as a sentinel to avoid a
2116 * H/W race condition, hence the maximum threshold constraints.
2117 * When set to zero use default values.
2119 tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2120 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2121 if (tx_free_thresh >= (nb_desc - 3)) {
2122 PMD_INIT_LOG(ERR, "tx_free_thresh must be less than the number of "
2123 "TX descriptors minus 3. (tx_free_thresh=%u "
2124 "port=%d queue=%d)",
2125 (unsigned int)tx_free_thresh,
2126 (int)dev->data->port_id, (int)queue_idx);
2130 if ((nb_desc % tx_free_thresh) != 0) {
2131 PMD_INIT_LOG(ERR, "tx_free_thresh must be a divisor of the "
2132 "number of TX descriptors. (tx_free_thresh=%u "
2133 "port=%d queue=%d)", (unsigned int)tx_free_thresh,
2134 (int)dev->data->port_id, (int)queue_idx);
2138 /* Free memory prior to re-allocation if needed... */
2139 if (dev->data->tx_queues[queue_idx] != NULL) {
2140 txgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2141 dev->data->tx_queues[queue_idx] = NULL;
2144 /* First allocate the tx queue data structure */
2145 txq = rte_zmalloc_socket("ethdev TX queue",
2146 sizeof(struct txgbe_tx_queue),
2147 RTE_CACHE_LINE_SIZE, socket_id);
2152 * Allocate TX ring hardware descriptors. A memzone large enough to
2153 * handle the maximum ring size is allocated in order to allow for
2154 * resizing in later calls to the queue setup function.
2156 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2157 sizeof(struct txgbe_tx_desc) * TXGBE_RING_DESC_MAX,
2158 TXGBE_ALIGN, socket_id);
2160 txgbe_tx_queue_release(txq);
2164 txq->nb_tx_desc = nb_desc;
2165 txq->tx_free_thresh = tx_free_thresh;
2166 txq->pthresh = tx_conf->tx_thresh.pthresh;
2167 txq->hthresh = tx_conf->tx_thresh.hthresh;
2168 txq->wthresh = tx_conf->tx_thresh.wthresh;
2169 txq->queue_id = queue_idx;
2170 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2171 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2172 txq->port_id = dev->data->port_id;
2173 txq->offloads = offloads;
2174 txq->ops = &def_txq_ops;
2175 txq->tx_deferred_start = tx_conf->tx_deferred_start;
2177 /* Modification to set tail pointer for virtual function
2178 * if vf is detected.
2180 if (hw->mac.type == txgbe_mac_raptor_vf) {
2181 txq->tdt_reg_addr = TXGBE_REG_ADDR(hw, TXGBE_TXWP(queue_idx));
2182 txq->tdc_reg_addr = TXGBE_REG_ADDR(hw, TXGBE_TXCFG(queue_idx));
2184 txq->tdt_reg_addr = TXGBE_REG_ADDR(hw,
2185 TXGBE_TXWP(txq->reg_idx));
2186 txq->tdc_reg_addr = TXGBE_REG_ADDR(hw,
2187 TXGBE_TXCFG(txq->reg_idx));
2190 txq->tx_ring_phys_addr = TMZ_PADDR(tz);
2191 txq->tx_ring = (struct txgbe_tx_desc *)TMZ_VADDR(tz);
2193 /* Allocate software ring */
2194 txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2195 sizeof(struct txgbe_tx_entry) * nb_desc,
2196 RTE_CACHE_LINE_SIZE, socket_id);
2197 if (txq->sw_ring == NULL) {
2198 txgbe_tx_queue_release(txq);
2201 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64,
2202 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2204 /* set up scalar TX function as appropriate */
2205 txgbe_set_tx_function(dev, txq);
2207 txq->ops->reset(txq);
2209 dev->data->tx_queues[queue_idx] = txq;
2215 * txgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2217 * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2218 * in the sw_rsc_ring is not set to NULL but rather points to the next
2219 * mbuf of this RSC aggregation (that has not been completed yet and still
2220 * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2221 * will just free first "nb_segs" segments of the cluster explicitly by calling
2222 * an rte_pktmbuf_free_seg().
2224 * @m scattered cluster head
2226 static void __rte_cold
2227 txgbe_free_sc_cluster(struct rte_mbuf *m)
2229 uint16_t i, nb_segs = m->nb_segs;
2230 struct rte_mbuf *next_seg;
2232 for (i = 0; i < nb_segs; i++) {
2234 rte_pktmbuf_free_seg(m);
2239 static void __rte_cold
2240 txgbe_rx_queue_release_mbufs(struct txgbe_rx_queue *rxq)
2244 if (rxq->sw_ring != NULL) {
2245 for (i = 0; i < rxq->nb_rx_desc; i++) {
2246 if (rxq->sw_ring[i].mbuf != NULL) {
2247 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2248 rxq->sw_ring[i].mbuf = NULL;
2251 if (rxq->rx_nb_avail) {
2252 for (i = 0; i < rxq->rx_nb_avail; ++i) {
2253 struct rte_mbuf *mb;
2255 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2256 rte_pktmbuf_free_seg(mb);
2258 rxq->rx_nb_avail = 0;
2262 if (rxq->sw_sc_ring)
2263 for (i = 0; i < rxq->nb_rx_desc; i++)
2264 if (rxq->sw_sc_ring[i].fbuf) {
2265 txgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2266 rxq->sw_sc_ring[i].fbuf = NULL;
2270 static void __rte_cold
2271 txgbe_rx_queue_release(struct txgbe_rx_queue *rxq)
2274 txgbe_rx_queue_release_mbufs(rxq);
2275 rte_free(rxq->sw_ring);
2276 rte_free(rxq->sw_sc_ring);
2282 txgbe_dev_rx_queue_release(void *rxq)
2284 txgbe_rx_queue_release(rxq);
2288 * Check if Rx Burst Bulk Alloc function can be used.
2290 * 0: the preconditions are satisfied and the bulk allocation function
2292 * -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2293 * function must be used.
2295 static inline int __rte_cold
2296 check_rx_burst_bulk_alloc_preconditions(struct txgbe_rx_queue *rxq)
2301 * Make sure the following pre-conditions are satisfied:
2302 * rxq->rx_free_thresh >= RTE_PMD_TXGBE_RX_MAX_BURST
2303 * rxq->rx_free_thresh < rxq->nb_rx_desc
2304 * (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2305 * Scattered packets are not supported. This should be checked
2306 * outside of this function.
2308 if (!(rxq->rx_free_thresh >= RTE_PMD_TXGBE_RX_MAX_BURST)) {
2309 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2310 "rxq->rx_free_thresh=%d, "
2311 "RTE_PMD_TXGBE_RX_MAX_BURST=%d",
2312 rxq->rx_free_thresh, RTE_PMD_TXGBE_RX_MAX_BURST);
2314 } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2315 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2316 "rxq->rx_free_thresh=%d, "
2317 "rxq->nb_rx_desc=%d",
2318 rxq->rx_free_thresh, rxq->nb_rx_desc);
2320 } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2321 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2322 "rxq->nb_rx_desc=%d, "
2323 "rxq->rx_free_thresh=%d",
2324 rxq->nb_rx_desc, rxq->rx_free_thresh);
2331 /* Reset dynamic txgbe_rx_queue fields back to defaults */
2332 static void __rte_cold
2333 txgbe_reset_rx_queue(struct txgbe_adapter *adapter, struct txgbe_rx_queue *rxq)
2335 static const struct txgbe_rx_desc zeroed_desc = {
2336 {{0}, {0} }, {{0}, {0} } };
2338 uint16_t len = rxq->nb_rx_desc;
2341 * By default, the Rx queue setup function allocates enough memory for
2342 * TXGBE_RING_DESC_MAX. The Rx Burst bulk allocation function requires
2343 * extra memory at the end of the descriptor ring to be zero'd out.
2345 if (adapter->rx_bulk_alloc_allowed)
2346 /* zero out extra memory */
2347 len += RTE_PMD_TXGBE_RX_MAX_BURST;
2350 * Zero out HW ring memory. Zero out extra memory at the end of
2351 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2352 * reads extra memory as zeros.
2354 for (i = 0; i < len; i++)
2355 rxq->rx_ring[i] = zeroed_desc;
2358 * initialize extra software ring entries. Space for these extra
2359 * entries is always allocated
2361 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2362 for (i = rxq->nb_rx_desc; i < len; ++i)
2363 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2365 rxq->rx_nb_avail = 0;
2366 rxq->rx_next_avail = 0;
2367 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2369 rxq->nb_rx_hold = 0;
2370 rxq->pkt_first_seg = NULL;
2371 rxq->pkt_last_seg = NULL;
2375 txgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2378 unsigned int socket_id,
2379 const struct rte_eth_rxconf *rx_conf,
2380 struct rte_mempool *mp)
2382 const struct rte_memzone *rz;
2383 struct txgbe_rx_queue *rxq;
2384 struct txgbe_hw *hw;
2386 struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev);
2389 PMD_INIT_FUNC_TRACE();
2390 hw = TXGBE_DEV_HW(dev);
2392 offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
2395 * Validate number of receive descriptors.
2396 * It must not exceed hardware maximum, and must be multiple
2399 if (nb_desc % TXGBE_RXD_ALIGN != 0 ||
2400 nb_desc > TXGBE_RING_DESC_MAX ||
2401 nb_desc < TXGBE_RING_DESC_MIN) {
2405 /* Free memory prior to re-allocation if needed... */
2406 if (dev->data->rx_queues[queue_idx] != NULL) {
2407 txgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2408 dev->data->rx_queues[queue_idx] = NULL;
2411 /* First allocate the rx queue data structure */
2412 rxq = rte_zmalloc_socket("ethdev RX queue",
2413 sizeof(struct txgbe_rx_queue),
2414 RTE_CACHE_LINE_SIZE, socket_id);
2418 rxq->nb_rx_desc = nb_desc;
2419 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2420 rxq->queue_id = queue_idx;
2421 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2422 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2423 rxq->port_id = dev->data->port_id;
2424 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
2425 rxq->crc_len = RTE_ETHER_CRC_LEN;
2428 rxq->drop_en = rx_conf->rx_drop_en;
2429 rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2430 rxq->offloads = offloads;
2433 * The packet type in RX descriptor is different for different NICs.
2434 * So set different masks for different NICs.
2436 rxq->pkt_type_mask = TXGBE_PTID_MASK;
2439 * Allocate RX ring hardware descriptors. A memzone large enough to
2440 * handle the maximum ring size is allocated in order to allow for
2441 * resizing in later calls to the queue setup function.
2443 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2444 RX_RING_SZ, TXGBE_ALIGN, socket_id);
2446 txgbe_rx_queue_release(rxq);
2451 * Zero init all the descriptors in the ring.
2453 memset(rz->addr, 0, RX_RING_SZ);
2456 * Modified to setup VFRDT for Virtual Function
2458 if (hw->mac.type == txgbe_mac_raptor_vf) {
2460 TXGBE_REG_ADDR(hw, TXGBE_RXWP(queue_idx));
2462 TXGBE_REG_ADDR(hw, TXGBE_RXRP(queue_idx));
2465 TXGBE_REG_ADDR(hw, TXGBE_RXWP(rxq->reg_idx));
2467 TXGBE_REG_ADDR(hw, TXGBE_RXRP(rxq->reg_idx));
2470 rxq->rx_ring_phys_addr = TMZ_PADDR(rz);
2471 rxq->rx_ring = (struct txgbe_rx_desc *)TMZ_VADDR(rz);
2474 * Certain constraints must be met in order to use the bulk buffer
2475 * allocation Rx burst function. If any of Rx queues doesn't meet them
2476 * the feature should be disabled for the whole port.
2478 if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2479 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2480 "preconditions - canceling the feature for "
2481 "the whole port[%d]",
2482 rxq->queue_id, rxq->port_id);
2483 adapter->rx_bulk_alloc_allowed = false;
2487 * Allocate software ring. Allow for space at the end of the
2488 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2489 * function does not access an invalid memory region.
2492 if (adapter->rx_bulk_alloc_allowed)
2493 len += RTE_PMD_TXGBE_RX_MAX_BURST;
2495 rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2496 sizeof(struct txgbe_rx_entry) * len,
2497 RTE_CACHE_LINE_SIZE, socket_id);
2498 if (!rxq->sw_ring) {
2499 txgbe_rx_queue_release(rxq);
2504 * Always allocate even if it's not going to be needed in order to
2505 * simplify the code.
2507 * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2508 * be requested in txgbe_dev_rx_init(), which is called later from
2512 rte_zmalloc_socket("rxq->sw_sc_ring",
2513 sizeof(struct txgbe_scattered_rx_entry) * len,
2514 RTE_CACHE_LINE_SIZE, socket_id);
2515 if (!rxq->sw_sc_ring) {
2516 txgbe_rx_queue_release(rxq);
2520 PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2521 "dma_addr=0x%" PRIx64,
2522 rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2523 rxq->rx_ring_phys_addr);
2525 dev->data->rx_queues[queue_idx] = rxq;
2527 txgbe_reset_rx_queue(adapter, rxq);
2533 txgbe_dev_clear_queues(struct rte_eth_dev *dev)
2536 struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev);
2538 PMD_INIT_FUNC_TRACE();
2540 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2541 struct txgbe_tx_queue *txq = dev->data->tx_queues[i];
2544 txq->ops->release_mbufs(txq);
2545 txq->ops->reset(txq);
2549 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2550 struct txgbe_rx_queue *rxq = dev->data->rx_queues[i];
2553 txgbe_rx_queue_release_mbufs(rxq);
2554 txgbe_reset_rx_queue(adapter, rxq);
2560 txgbe_dev_free_queues(struct rte_eth_dev *dev)
2564 PMD_INIT_FUNC_TRACE();
2566 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2567 txgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2568 dev->data->rx_queues[i] = NULL;
2570 dev->data->nb_rx_queues = 0;
2572 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2573 txgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2574 dev->data->tx_queues[i] = NULL;
2576 dev->data->nb_tx_queues = 0;
2580 * Receive Side Scaling (RSS)
2583 * The source and destination IP addresses of the IP header and the source
2584 * and destination ports of TCP/UDP headers, if any, of received packets are
2585 * hashed against a configurable random key to compute a 32-bit RSS hash result.
2586 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2587 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
2588 * RSS output index which is used as the RX queue index where to store the
2590 * The following output is supplied in the RX write-back descriptor:
2591 * - 32-bit result of the Microsoft RSS hash function,
2592 * - 4-bit RSS type field.
2596 * Used as the default key.
2598 static uint8_t rss_intel_key[40] = {
2599 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2600 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2601 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2602 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2603 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2607 txgbe_rss_disable(struct rte_eth_dev *dev)
2609 struct txgbe_hw *hw;
2611 hw = TXGBE_DEV_HW(dev);
2613 wr32m(hw, TXGBE_RACTL, TXGBE_RACTL_RSSENA, 0);
2617 txgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
2618 struct rte_eth_rss_conf *rss_conf)
2620 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
2627 if (!txgbe_rss_update_sp(hw->mac.type)) {
2628 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
2633 hash_key = rss_conf->rss_key;
2635 /* Fill in RSS hash key */
2636 for (i = 0; i < 10; i++) {
2637 rss_key = LS32(hash_key[(i * 4) + 0], 0, 0xFF);
2638 rss_key |= LS32(hash_key[(i * 4) + 1], 8, 0xFF);
2639 rss_key |= LS32(hash_key[(i * 4) + 2], 16, 0xFF);
2640 rss_key |= LS32(hash_key[(i * 4) + 3], 24, 0xFF);
2641 wr32a(hw, TXGBE_REG_RSSKEY, i, rss_key);
2645 /* Set configured hashing protocols */
2646 rss_hf = rss_conf->rss_hf & TXGBE_RSS_OFFLOAD_ALL;
2647 mrqc = rd32(hw, TXGBE_RACTL);
2648 mrqc &= ~TXGBE_RACTL_RSSMASK;
2649 if (rss_hf & ETH_RSS_IPV4)
2650 mrqc |= TXGBE_RACTL_RSSIPV4;
2651 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
2652 mrqc |= TXGBE_RACTL_RSSIPV4TCP;
2653 if (rss_hf & ETH_RSS_IPV6 ||
2654 rss_hf & ETH_RSS_IPV6_EX)
2655 mrqc |= TXGBE_RACTL_RSSIPV6;
2656 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP ||
2657 rss_hf & ETH_RSS_IPV6_TCP_EX)
2658 mrqc |= TXGBE_RACTL_RSSIPV6TCP;
2659 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
2660 mrqc |= TXGBE_RACTL_RSSIPV4UDP;
2661 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP ||
2662 rss_hf & ETH_RSS_IPV6_UDP_EX)
2663 mrqc |= TXGBE_RACTL_RSSIPV6UDP;
2666 mrqc |= TXGBE_RACTL_RSSENA;
2668 mrqc &= ~TXGBE_RACTL_RSSENA;
2670 wr32(hw, TXGBE_RACTL, mrqc);
2676 txgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2677 struct rte_eth_rss_conf *rss_conf)
2679 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
2686 hash_key = rss_conf->rss_key;
2688 /* Return RSS hash key */
2689 for (i = 0; i < 10; i++) {
2690 rss_key = rd32a(hw, TXGBE_REG_RSSKEY, i);
2691 hash_key[(i * 4) + 0] = RS32(rss_key, 0, 0xFF);
2692 hash_key[(i * 4) + 1] = RS32(rss_key, 8, 0xFF);
2693 hash_key[(i * 4) + 2] = RS32(rss_key, 16, 0xFF);
2694 hash_key[(i * 4) + 3] = RS32(rss_key, 24, 0xFF);
2699 mrqc = rd32(hw, TXGBE_RACTL);
2700 if (mrqc & TXGBE_RACTL_RSSIPV4)
2701 rss_hf |= ETH_RSS_IPV4;
2702 if (mrqc & TXGBE_RACTL_RSSIPV4TCP)
2703 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2704 if (mrqc & TXGBE_RACTL_RSSIPV6)
2705 rss_hf |= ETH_RSS_IPV6 |
2707 if (mrqc & TXGBE_RACTL_RSSIPV6TCP)
2708 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP |
2709 ETH_RSS_IPV6_TCP_EX;
2710 if (mrqc & TXGBE_RACTL_RSSIPV4UDP)
2711 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2712 if (mrqc & TXGBE_RACTL_RSSIPV6UDP)
2713 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP |
2714 ETH_RSS_IPV6_UDP_EX;
2715 if (!(mrqc & TXGBE_RACTL_RSSENA))
2718 rss_hf &= TXGBE_RSS_OFFLOAD_ALL;
2720 rss_conf->rss_hf = rss_hf;
2725 txgbe_rss_configure(struct rte_eth_dev *dev)
2727 struct rte_eth_rss_conf rss_conf;
2728 struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev);
2729 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
2734 PMD_INIT_FUNC_TRACE();
2737 * Fill in redirection table
2738 * The byte-swap is needed because NIC registers are in
2739 * little-endian order.
2741 if (adapter->rss_reta_updated == 0) {
2743 for (i = 0, j = 0; i < ETH_RSS_RETA_SIZE_128; i++, j++) {
2744 if (j == dev->data->nb_rx_queues)
2746 reta = (reta >> 8) | LS32(j, 24, 0xFF);
2748 wr32a(hw, TXGBE_REG_RSSTBL, i >> 2, reta);
2752 * Configure the RSS key and the RSS protocols used to compute
2753 * the RSS hash of input packets.
2755 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2756 if (rss_conf.rss_key == NULL)
2757 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2758 txgbe_dev_rss_hash_update(dev, &rss_conf);
2761 #define NUM_VFTA_REGISTERS 128
2762 #define NIC_RX_BUFFER_SIZE 0x200
2765 txgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
2767 struct rte_eth_vmdq_dcb_conf *cfg;
2768 struct txgbe_hw *hw;
2769 enum rte_eth_nb_pools num_pools;
2770 uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
2772 uint8_t nb_tcs; /* number of traffic classes */
2775 PMD_INIT_FUNC_TRACE();
2776 hw = TXGBE_DEV_HW(dev);
2777 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2778 num_pools = cfg->nb_queue_pools;
2779 /* Check we have a valid number of pools */
2780 if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
2781 txgbe_rss_disable(dev);
2784 /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
2785 nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
2788 * split rx buffer up into sections, each for 1 traffic class
2790 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2791 for (i = 0; i < nb_tcs; i++) {
2792 uint32_t rxpbsize = rd32(hw, TXGBE_PBRXSIZE(i));
2794 rxpbsize &= (~(0x3FF << 10));
2795 /* clear 10 bits. */
2796 rxpbsize |= (pbsize << 10); /* set value */
2797 wr32(hw, TXGBE_PBRXSIZE(i), rxpbsize);
2799 /* zero alloc all unused TCs */
2800 for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2801 uint32_t rxpbsize = rd32(hw, TXGBE_PBRXSIZE(i));
2803 rxpbsize &= (~(0x3FF << 10));
2804 /* clear 10 bits. */
2805 wr32(hw, TXGBE_PBRXSIZE(i), rxpbsize);
2808 if (num_pools == ETH_16_POOLS) {
2809 mrqc = TXGBE_PORTCTL_NUMTC_8;
2810 mrqc |= TXGBE_PORTCTL_NUMVT_16;
2812 mrqc = TXGBE_PORTCTL_NUMTC_4;
2813 mrqc |= TXGBE_PORTCTL_NUMVT_32;
2815 wr32m(hw, TXGBE_PORTCTL,
2816 TXGBE_PORTCTL_NUMTC_MASK | TXGBE_PORTCTL_NUMVT_MASK, mrqc);
2818 vt_ctl = TXGBE_POOLCTL_RPLEN;
2819 if (cfg->enable_default_pool)
2820 vt_ctl |= TXGBE_POOLCTL_DEFPL(cfg->default_pool);
2822 vt_ctl |= TXGBE_POOLCTL_DEFDSA;
2824 wr32(hw, TXGBE_POOLCTL, vt_ctl);
2827 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
2829 * mapping is done with 3 bits per priority,
2830 * so shift by i*3 each time
2832 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
2834 wr32(hw, TXGBE_RPUP2TC, queue_mapping);
2836 wr32(hw, TXGBE_ARBRXCTL, TXGBE_ARBRXCTL_RRM);
2838 /* enable vlan filtering and allow all vlan tags through */
2839 vlanctrl = rd32(hw, TXGBE_VLANCTL);
2840 vlanctrl |= TXGBE_VLANCTL_VFE; /* enable vlan filters */
2841 wr32(hw, TXGBE_VLANCTL, vlanctrl);
2843 /* enable all vlan filters */
2844 for (i = 0; i < NUM_VFTA_REGISTERS; i++)
2845 wr32(hw, TXGBE_VLANTBL(i), 0xFFFFFFFF);
2847 wr32(hw, TXGBE_POOLRXENA(0),
2848 num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2850 wr32(hw, TXGBE_ETHADDRIDX, 0);
2851 wr32(hw, TXGBE_ETHADDRASSL, 0xFFFFFFFF);
2852 wr32(hw, TXGBE_ETHADDRASSH, 0xFFFFFFFF);
2854 /* set up filters for vlan tags as configured */
2855 for (i = 0; i < cfg->nb_pool_maps; i++) {
2856 /* set vlan id in VF register and set the valid bit */
2857 wr32(hw, TXGBE_PSRVLANIDX, i);
2858 wr32(hw, TXGBE_PSRVLAN, (TXGBE_PSRVLAN_EA |
2859 (cfg->pool_map[i].vlan_id & 0xFFF)));
2861 wr32(hw, TXGBE_PSRVLANPLM(0), cfg->pool_map[i].pools);
2866 * txgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
2867 * @dev: pointer to eth_dev structure
2868 * @dcb_config: pointer to txgbe_dcb_config structure
2871 txgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
2872 struct txgbe_dcb_config *dcb_config)
2875 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
2877 PMD_INIT_FUNC_TRACE();
2879 /* Disable the Tx desc arbiter */
2880 reg = rd32(hw, TXGBE_ARBTXCTL);
2881 reg |= TXGBE_ARBTXCTL_DIA;
2882 wr32(hw, TXGBE_ARBTXCTL, reg);
2884 /* Enable DCB for Tx with 8 TCs */
2885 reg = rd32(hw, TXGBE_PORTCTL);
2886 reg &= TXGBE_PORTCTL_NUMTC_MASK;
2887 reg |= TXGBE_PORTCTL_DCB;
2888 if (dcb_config->num_tcs.pg_tcs == 8)
2889 reg |= TXGBE_PORTCTL_NUMTC_8;
2891 reg |= TXGBE_PORTCTL_NUMTC_4;
2893 wr32(hw, TXGBE_PORTCTL, reg);
2895 /* Enable the Tx desc arbiter */
2896 reg = rd32(hw, TXGBE_ARBTXCTL);
2897 reg &= ~TXGBE_ARBTXCTL_DIA;
2898 wr32(hw, TXGBE_ARBTXCTL, reg);
2902 * txgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
2903 * @dev: pointer to rte_eth_dev structure
2904 * @dcb_config: pointer to txgbe_dcb_config structure
2907 txgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
2908 struct txgbe_dcb_config *dcb_config)
2910 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
2911 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
2912 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
2914 PMD_INIT_FUNC_TRACE();
2915 /*PF VF Transmit Enable*/
2916 wr32(hw, TXGBE_POOLTXENA(0),
2917 vmdq_tx_conf->nb_queue_pools ==
2918 ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2920 /*Configure general DCB TX parameters*/
2921 txgbe_dcb_tx_hw_config(dev, dcb_config);
2925 txgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
2926 struct txgbe_dcb_config *dcb_config)
2928 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
2929 &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2930 struct txgbe_dcb_tc_config *tc;
2933 /* convert rte_eth_conf.rx_adv_conf to struct txgbe_dcb_config */
2934 if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
2935 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
2936 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
2938 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
2939 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
2942 /* Initialize User Priority to Traffic Class mapping */
2943 for (j = 0; j < TXGBE_DCB_TC_MAX; j++) {
2944 tc = &dcb_config->tc_config[j];
2945 tc->path[TXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
2948 /* User Priority to Traffic Class mapping */
2949 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2950 j = vmdq_rx_conf->dcb_tc[i];
2951 tc = &dcb_config->tc_config[j];
2952 tc->path[TXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
2958 txgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
2959 struct txgbe_dcb_config *dcb_config)
2961 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
2962 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
2963 struct txgbe_dcb_tc_config *tc;
2966 /* convert rte_eth_conf.rx_adv_conf to struct txgbe_dcb_config */
2967 if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
2968 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
2969 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
2971 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
2972 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
2975 /* Initialize User Priority to Traffic Class mapping */
2976 for (j = 0; j < TXGBE_DCB_TC_MAX; j++) {
2977 tc = &dcb_config->tc_config[j];
2978 tc->path[TXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
2981 /* User Priority to Traffic Class mapping */
2982 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2983 j = vmdq_tx_conf->dcb_tc[i];
2984 tc = &dcb_config->tc_config[j];
2985 tc->path[TXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
2991 txgbe_dcb_rx_config(struct rte_eth_dev *dev,
2992 struct txgbe_dcb_config *dcb_config)
2994 struct rte_eth_dcb_rx_conf *rx_conf =
2995 &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
2996 struct txgbe_dcb_tc_config *tc;
2999 dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3000 dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3002 /* Initialize User Priority to Traffic Class mapping */
3003 for (j = 0; j < TXGBE_DCB_TC_MAX; j++) {
3004 tc = &dcb_config->tc_config[j];
3005 tc->path[TXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3008 /* User Priority to Traffic Class mapping */
3009 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3010 j = rx_conf->dcb_tc[i];
3011 tc = &dcb_config->tc_config[j];
3012 tc->path[TXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3018 txgbe_dcb_tx_config(struct rte_eth_dev *dev,
3019 struct txgbe_dcb_config *dcb_config)
3021 struct rte_eth_dcb_tx_conf *tx_conf =
3022 &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3023 struct txgbe_dcb_tc_config *tc;
3026 dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3027 dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3029 /* Initialize User Priority to Traffic Class mapping */
3030 for (j = 0; j < TXGBE_DCB_TC_MAX; j++) {
3031 tc = &dcb_config->tc_config[j];
3032 tc->path[TXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3035 /* User Priority to Traffic Class mapping */
3036 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3037 j = tx_conf->dcb_tc[i];
3038 tc = &dcb_config->tc_config[j];
3039 tc->path[TXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3045 * txgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3046 * @dev: pointer to eth_dev structure
3047 * @dcb_config: pointer to txgbe_dcb_config structure
3050 txgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3051 struct txgbe_dcb_config *dcb_config)
3057 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3059 PMD_INIT_FUNC_TRACE();
3061 * Disable the arbiter before changing parameters
3062 * (always enable recycle mode; WSP)
3064 reg = TXGBE_ARBRXCTL_RRM | TXGBE_ARBRXCTL_WSP | TXGBE_ARBRXCTL_DIA;
3065 wr32(hw, TXGBE_ARBRXCTL, reg);
3067 reg = rd32(hw, TXGBE_PORTCTL);
3068 reg &= ~(TXGBE_PORTCTL_NUMTC_MASK | TXGBE_PORTCTL_NUMVT_MASK);
3069 if (dcb_config->num_tcs.pg_tcs == 4) {
3070 reg |= TXGBE_PORTCTL_NUMTC_4;
3071 if (dcb_config->vt_mode)
3072 reg |= TXGBE_PORTCTL_NUMVT_32;
3074 wr32(hw, TXGBE_POOLCTL, 0);
3077 if (dcb_config->num_tcs.pg_tcs == 8) {
3078 reg |= TXGBE_PORTCTL_NUMTC_8;
3079 if (dcb_config->vt_mode)
3080 reg |= TXGBE_PORTCTL_NUMVT_16;
3082 wr32(hw, TXGBE_POOLCTL, 0);
3085 wr32(hw, TXGBE_PORTCTL, reg);
3087 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3088 /* Disable drop for all queues in VMDQ mode*/
3089 for (q = 0; q < TXGBE_MAX_RX_QUEUE_NUM; q++) {
3090 u32 val = 1 << (q % 32);
3091 wr32m(hw, TXGBE_QPRXDROP(q / 32), val, val);
3094 /* Enable drop for all queues in SRIOV mode */
3095 for (q = 0; q < TXGBE_MAX_RX_QUEUE_NUM; q++) {
3096 u32 val = 1 << (q % 32);
3097 wr32m(hw, TXGBE_QPRXDROP(q / 32), val, val);
3101 /* VLNCTL: enable vlan filtering and allow all vlan tags through */
3102 vlanctrl = rd32(hw, TXGBE_VLANCTL);
3103 vlanctrl |= TXGBE_VLANCTL_VFE; /* enable vlan filters */
3104 wr32(hw, TXGBE_VLANCTL, vlanctrl);
3106 /* VLANTBL - enable all vlan filters */
3107 for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3108 wr32(hw, TXGBE_VLANTBL(i), 0xFFFFFFFF);
3111 * Configure Rx packet plane (recycle mode; WSP) and
3114 reg = TXGBE_ARBRXCTL_RRM | TXGBE_ARBRXCTL_WSP;
3115 wr32(hw, TXGBE_ARBRXCTL, reg);
3119 txgbe_dcb_hw_arbite_rx_config(struct txgbe_hw *hw, uint16_t *refill,
3120 uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3122 txgbe_dcb_config_rx_arbiter_raptor(hw, refill, max, bwg_id,
3127 txgbe_dcb_hw_arbite_tx_config(struct txgbe_hw *hw, uint16_t *refill,
3128 uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3130 switch (hw->mac.type) {
3131 case txgbe_mac_raptor:
3132 txgbe_dcb_config_tx_desc_arbiter_raptor(hw, refill,
3134 txgbe_dcb_config_tx_data_arbiter_raptor(hw, refill,
3135 max, bwg_id, tsa, map);
3142 #define DCB_RX_CONFIG 1
3143 #define DCB_TX_CONFIG 1
3144 #define DCB_TX_PB 1024
3146 * txgbe_dcb_hw_configure - Enable DCB and configure
3147 * general DCB in VT mode and non-VT mode parameters
3148 * @dev: pointer to rte_eth_dev structure
3149 * @dcb_config: pointer to txgbe_dcb_config structure
3152 txgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3153 struct txgbe_dcb_config *dcb_config)
3156 uint8_t i, pfc_en, nb_tcs;
3157 uint16_t pbsize, rx_buffer_size;
3158 uint8_t config_dcb_rx = 0;
3159 uint8_t config_dcb_tx = 0;
3160 uint8_t tsa[TXGBE_DCB_TC_MAX] = {0};
3161 uint8_t bwgid[TXGBE_DCB_TC_MAX] = {0};
3162 uint16_t refill[TXGBE_DCB_TC_MAX] = {0};
3163 uint16_t max[TXGBE_DCB_TC_MAX] = {0};
3164 uint8_t map[TXGBE_DCB_TC_MAX] = {0};
3165 struct txgbe_dcb_tc_config *tc;
3166 uint32_t max_frame = dev->data->mtu +
3167 RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
3168 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3169 struct txgbe_bw_conf *bw_conf = TXGBE_DEV_BW_CONF(dev);
3171 switch (dev->data->dev_conf.rxmode.mq_mode) {
3172 case ETH_MQ_RX_VMDQ_DCB:
3173 dcb_config->vt_mode = true;
3174 config_dcb_rx = DCB_RX_CONFIG;
3176 * get dcb and VT rx configuration parameters
3179 txgbe_vmdq_dcb_rx_config(dev, dcb_config);
3180 /*Configure general VMDQ and DCB RX parameters*/
3181 txgbe_vmdq_dcb_configure(dev);
3184 case ETH_MQ_RX_DCB_RSS:
3185 dcb_config->vt_mode = false;
3186 config_dcb_rx = DCB_RX_CONFIG;
3187 /* Get dcb TX configuration parameters from rte_eth_conf */
3188 txgbe_dcb_rx_config(dev, dcb_config);
3189 /*Configure general DCB RX parameters*/
3190 txgbe_dcb_rx_hw_config(dev, dcb_config);
3193 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3196 switch (dev->data->dev_conf.txmode.mq_mode) {
3197 case ETH_MQ_TX_VMDQ_DCB:
3198 dcb_config->vt_mode = true;
3199 config_dcb_tx = DCB_TX_CONFIG;
3200 /* get DCB and VT TX configuration parameters
3203 txgbe_dcb_vt_tx_config(dev, dcb_config);
3204 /* Configure general VMDQ and DCB TX parameters */
3205 txgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3209 dcb_config->vt_mode = false;
3210 config_dcb_tx = DCB_TX_CONFIG;
3211 /* get DCB TX configuration parameters from rte_eth_conf */
3212 txgbe_dcb_tx_config(dev, dcb_config);
3213 /* Configure general DCB TX parameters */
3214 txgbe_dcb_tx_hw_config(dev, dcb_config);
3217 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3221 nb_tcs = dcb_config->num_tcs.pfc_tcs;
3223 txgbe_dcb_unpack_map_cee(dcb_config, TXGBE_DCB_RX_CONFIG, map);
3224 if (nb_tcs == ETH_4_TCS) {
3225 /* Avoid un-configured priority mapping to TC0 */
3227 uint8_t mask = 0xFF;
3229 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3230 mask = (uint8_t)(mask & (~(1 << map[i])));
3231 for (i = 0; mask && (i < TXGBE_DCB_TC_MAX); i++) {
3232 if ((mask & 0x1) && j < ETH_DCB_NUM_USER_PRIORITIES)
3236 /* Re-configure 4 TCs BW */
3237 for (i = 0; i < nb_tcs; i++) {
3238 tc = &dcb_config->tc_config[i];
3239 if (bw_conf->tc_num != nb_tcs)
3240 tc->path[TXGBE_DCB_TX_CONFIG].bwg_percent =
3241 (uint8_t)(100 / nb_tcs);
3242 tc->path[TXGBE_DCB_RX_CONFIG].bwg_percent =
3243 (uint8_t)(100 / nb_tcs);
3245 for (; i < TXGBE_DCB_TC_MAX; i++) {
3246 tc = &dcb_config->tc_config[i];
3247 tc->path[TXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3248 tc->path[TXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3251 /* Re-configure 8 TCs BW */
3252 for (i = 0; i < nb_tcs; i++) {
3253 tc = &dcb_config->tc_config[i];
3254 if (bw_conf->tc_num != nb_tcs)
3255 tc->path[TXGBE_DCB_TX_CONFIG].bwg_percent =
3256 (uint8_t)(100 / nb_tcs + (i & 1));
3257 tc->path[TXGBE_DCB_RX_CONFIG].bwg_percent =
3258 (uint8_t)(100 / nb_tcs + (i & 1));
3262 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3264 if (config_dcb_rx) {
3265 /* Set RX buffer size */
3266 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3267 uint32_t rxpbsize = pbsize << 10;
3269 for (i = 0; i < nb_tcs; i++)
3270 wr32(hw, TXGBE_PBRXSIZE(i), rxpbsize);
3272 /* zero alloc all unused TCs */
3273 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3274 wr32(hw, TXGBE_PBRXSIZE(i), 0);
3276 if (config_dcb_tx) {
3277 /* Only support an equally distributed
3278 * Tx packet buffer strategy.
3280 uint32_t txpktsize = TXGBE_PBTXSIZE_MAX / nb_tcs;
3281 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) -
3282 TXGBE_TXPKT_SIZE_MAX;
3284 for (i = 0; i < nb_tcs; i++) {
3285 wr32(hw, TXGBE_PBTXSIZE(i), txpktsize);
3286 wr32(hw, TXGBE_PBTXDMATH(i), txpbthresh);
3288 /* Clear unused TCs, if any, to zero buffer size*/
3289 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3290 wr32(hw, TXGBE_PBTXSIZE(i), 0);
3291 wr32(hw, TXGBE_PBTXDMATH(i), 0);
3295 /*Calculates traffic class credits*/
3296 txgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3297 TXGBE_DCB_TX_CONFIG);
3298 txgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3299 TXGBE_DCB_RX_CONFIG);
3301 if (config_dcb_rx) {
3302 /* Unpack CEE standard containers */
3303 txgbe_dcb_unpack_refill_cee(dcb_config,
3304 TXGBE_DCB_RX_CONFIG, refill);
3305 txgbe_dcb_unpack_max_cee(dcb_config, max);
3306 txgbe_dcb_unpack_bwgid_cee(dcb_config,
3307 TXGBE_DCB_RX_CONFIG, bwgid);
3308 txgbe_dcb_unpack_tsa_cee(dcb_config,
3309 TXGBE_DCB_RX_CONFIG, tsa);
3310 /* Configure PG(ETS) RX */
3311 txgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3314 if (config_dcb_tx) {
3315 /* Unpack CEE standard containers */
3316 txgbe_dcb_unpack_refill_cee(dcb_config,
3317 TXGBE_DCB_TX_CONFIG, refill);
3318 txgbe_dcb_unpack_max_cee(dcb_config, max);
3319 txgbe_dcb_unpack_bwgid_cee(dcb_config,
3320 TXGBE_DCB_TX_CONFIG, bwgid);
3321 txgbe_dcb_unpack_tsa_cee(dcb_config,
3322 TXGBE_DCB_TX_CONFIG, tsa);
3323 /* Configure PG(ETS) TX */
3324 txgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3327 /* Configure queue statistics registers */
3328 txgbe_dcb_config_tc_stats_raptor(hw, dcb_config);
3330 /* Check if the PFC is supported */
3331 if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3332 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3333 for (i = 0; i < nb_tcs; i++) {
3334 /* If the TC count is 8,
3335 * and the default high_water is 48,
3336 * the low_water is 16 as default.
3338 hw->fc.high_water[i] = (pbsize * 3) / 4;
3339 hw->fc.low_water[i] = pbsize / 4;
3340 /* Enable pfc for this TC */
3341 tc = &dcb_config->tc_config[i];
3342 tc->pfc = txgbe_dcb_pfc_enabled;
3344 txgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3345 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3347 ret = txgbe_dcb_config_pfc(hw, pfc_en, map);
3353 void txgbe_configure_pb(struct rte_eth_dev *dev)
3355 struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
3356 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3359 int tc = dev_conf->rx_adv_conf.dcb_rx_conf.nb_tcs;
3361 /* Reserve 256KB(/512KB) rx buffer for fdir */
3364 hw->mac.setup_pba(hw, tc, hdrm, PBA_STRATEGY_EQUAL);
3367 void txgbe_configure_port(struct rte_eth_dev *dev)
3369 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3371 uint16_t tpids[8] = {RTE_ETHER_TYPE_VLAN, RTE_ETHER_TYPE_QINQ,
3376 PMD_INIT_FUNC_TRACE();
3378 /* default outer vlan tpid */
3379 wr32(hw, TXGBE_EXTAG,
3380 TXGBE_EXTAG_ETAG(RTE_ETHER_TYPE_ETAG) |
3381 TXGBE_EXTAG_VLAN(RTE_ETHER_TYPE_QINQ));
3383 /* default inner vlan tpid */
3384 wr32m(hw, TXGBE_VLANCTL,
3385 TXGBE_VLANCTL_TPID_MASK,
3386 TXGBE_VLANCTL_TPID(RTE_ETHER_TYPE_VLAN));
3387 wr32m(hw, TXGBE_DMATXCTRL,
3388 TXGBE_DMATXCTRL_TPID_MASK,
3389 TXGBE_DMATXCTRL_TPID(RTE_ETHER_TYPE_VLAN));
3391 /* default vlan tpid filters */
3392 for (i = 0; i < 8; i++) {
3393 wr32m(hw, TXGBE_TAGTPID(i / 2),
3394 (i % 2 ? TXGBE_TAGTPID_MSB_MASK
3395 : TXGBE_TAGTPID_LSB_MASK),
3396 (i % 2 ? TXGBE_TAGTPID_MSB(tpids[i])
3397 : TXGBE_TAGTPID_LSB(tpids[i])));
3400 /* default vxlan port */
3401 wr32(hw, TXGBE_VXLANPORT, 4789);
3405 * txgbe_configure_dcb - Configure DCB Hardware
3406 * @dev: pointer to rte_eth_dev
3408 void txgbe_configure_dcb(struct rte_eth_dev *dev)
3410 struct txgbe_dcb_config *dcb_cfg = TXGBE_DEV_DCB_CONFIG(dev);
3411 struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
3413 PMD_INIT_FUNC_TRACE();
3415 /* check support mq_mode for DCB */
3416 if (dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB &&
3417 dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB &&
3418 dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS)
3421 if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3424 /** Configure DCB hardware **/
3425 txgbe_dcb_hw_configure(dev, dcb_cfg);
3429 * VMDq only support for 10 GbE NIC.
3432 txgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3434 struct rte_eth_vmdq_rx_conf *cfg;
3435 struct txgbe_hw *hw;
3436 enum rte_eth_nb_pools num_pools;
3437 uint32_t mrqc, vt_ctl, vlanctrl;
3441 PMD_INIT_FUNC_TRACE();
3442 hw = TXGBE_DEV_HW(dev);
3443 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3444 num_pools = cfg->nb_queue_pools;
3446 txgbe_rss_disable(dev);
3449 mrqc = TXGBE_PORTCTL_NUMVT_64;
3450 wr32m(hw, TXGBE_PORTCTL, TXGBE_PORTCTL_NUMVT_MASK, mrqc);
3452 /* turn on virtualisation and set the default pool */
3453 vt_ctl = TXGBE_POOLCTL_RPLEN;
3454 if (cfg->enable_default_pool)
3455 vt_ctl |= TXGBE_POOLCTL_DEFPL(cfg->default_pool);
3457 vt_ctl |= TXGBE_POOLCTL_DEFDSA;
3459 wr32(hw, TXGBE_POOLCTL, vt_ctl);
3461 for (i = 0; i < (int)num_pools; i++) {
3462 vmolr = txgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3463 wr32(hw, TXGBE_POOLETHCTL(i), vmolr);
3466 /* enable vlan filtering and allow all vlan tags through */
3467 vlanctrl = rd32(hw, TXGBE_VLANCTL);
3468 vlanctrl |= TXGBE_VLANCTL_VFE; /* enable vlan filters */
3469 wr32(hw, TXGBE_VLANCTL, vlanctrl);
3471 /* enable all vlan filters */
3472 for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3473 wr32(hw, TXGBE_VLANTBL(i), UINT32_MAX);
3475 /* pool enabling for receive - 64 */
3476 wr32(hw, TXGBE_POOLRXENA(0), UINT32_MAX);
3477 if (num_pools == ETH_64_POOLS)
3478 wr32(hw, TXGBE_POOLRXENA(1), UINT32_MAX);
3481 * allow pools to read specific mac addresses
3482 * In this case, all pools should be able to read from mac addr 0
3484 wr32(hw, TXGBE_ETHADDRIDX, 0);
3485 wr32(hw, TXGBE_ETHADDRASSL, 0xFFFFFFFF);
3486 wr32(hw, TXGBE_ETHADDRASSH, 0xFFFFFFFF);
3488 /* set up filters for vlan tags as configured */
3489 for (i = 0; i < cfg->nb_pool_maps; i++) {
3490 /* set vlan id in VF register and set the valid bit */
3491 wr32(hw, TXGBE_PSRVLANIDX, i);
3492 wr32(hw, TXGBE_PSRVLAN, (TXGBE_PSRVLAN_EA |
3493 TXGBE_PSRVLAN_VID(cfg->pool_map[i].vlan_id)));
3495 * Put the allowed pools in VFB reg. As we only have 16 or 64
3496 * pools, we only need to use the first half of the register
3499 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3500 wr32(hw, TXGBE_PSRVLANPLM(0),
3501 (cfg->pool_map[i].pools & UINT32_MAX));
3503 wr32(hw, TXGBE_PSRVLANPLM(1),
3504 ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3507 /* Tx General Switch Control Enables VMDQ loopback */
3508 if (cfg->enable_loop_back) {
3509 wr32(hw, TXGBE_PSRCTL, TXGBE_PSRCTL_LBENA);
3510 for (i = 0; i < 64; i++)
3511 wr32m(hw, TXGBE_POOLETHCTL(i),
3512 TXGBE_POOLETHCTL_LLB, TXGBE_POOLETHCTL_LLB);
3519 * txgbe_vmdq_tx_hw_configure - Configure general VMDq TX parameters
3520 * @hw: pointer to hardware structure
3523 txgbe_vmdq_tx_hw_configure(struct txgbe_hw *hw)
3528 PMD_INIT_FUNC_TRACE();
3529 /*PF VF Transmit Enable*/
3530 wr32(hw, TXGBE_POOLTXENA(0), UINT32_MAX);
3531 wr32(hw, TXGBE_POOLTXENA(1), UINT32_MAX);
3533 /* Disable the Tx desc arbiter */
3534 reg = rd32(hw, TXGBE_ARBTXCTL);
3535 reg |= TXGBE_ARBTXCTL_DIA;
3536 wr32(hw, TXGBE_ARBTXCTL, reg);
3538 wr32m(hw, TXGBE_PORTCTL, TXGBE_PORTCTL_NUMVT_MASK,
3539 TXGBE_PORTCTL_NUMVT_64);
3541 /* Disable drop for all queues */
3542 for (q = 0; q < 128; q++) {
3543 u32 val = 1 << (q % 32);
3544 wr32m(hw, TXGBE_QPRXDROP(q / 32), val, val);
3547 /* Enable the Tx desc arbiter */
3548 reg = rd32(hw, TXGBE_ARBTXCTL);
3549 reg &= ~TXGBE_ARBTXCTL_DIA;
3550 wr32(hw, TXGBE_ARBTXCTL, reg);
3555 static int __rte_cold
3556 txgbe_alloc_rx_queue_mbufs(struct txgbe_rx_queue *rxq)
3558 struct txgbe_rx_entry *rxe = rxq->sw_ring;
3562 /* Initialize software ring entries */
3563 for (i = 0; i < rxq->nb_rx_desc; i++) {
3564 volatile struct txgbe_rx_desc *rxd;
3565 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
3568 PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3569 (unsigned int)rxq->queue_id);
3573 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3574 mbuf->port = rxq->port_id;
3577 rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
3578 rxd = &rxq->rx_ring[i];
3579 TXGBE_RXD_HDRADDR(rxd, 0);
3580 TXGBE_RXD_PKTADDR(rxd, dma_addr);
3588 txgbe_config_vf_rss(struct rte_eth_dev *dev)
3590 struct txgbe_hw *hw;
3593 txgbe_rss_configure(dev);
3595 hw = TXGBE_DEV_HW(dev);
3598 mrqc = rd32(hw, TXGBE_PORTCTL);
3599 mrqc &= ~(TXGBE_PORTCTL_NUMTC_MASK | TXGBE_PORTCTL_NUMVT_MASK);
3600 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3602 mrqc |= TXGBE_PORTCTL_NUMVT_64;
3606 mrqc |= TXGBE_PORTCTL_NUMVT_32;
3610 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
3614 wr32(hw, TXGBE_PORTCTL, mrqc);
3620 txgbe_config_vf_default(struct rte_eth_dev *dev)
3622 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3625 mrqc = rd32(hw, TXGBE_PORTCTL);
3626 mrqc &= ~(TXGBE_PORTCTL_NUMTC_MASK | TXGBE_PORTCTL_NUMVT_MASK);
3627 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3629 mrqc |= TXGBE_PORTCTL_NUMVT_64;
3633 mrqc |= TXGBE_PORTCTL_NUMVT_32;
3637 mrqc |= TXGBE_PORTCTL_NUMVT_16;
3641 "invalid pool number in IOV mode");
3645 wr32(hw, TXGBE_PORTCTL, mrqc);
3651 txgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
3653 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3655 * SRIOV inactive scheme
3656 * any DCB/RSS w/o VMDq multi-queue setting
3658 switch (dev->data->dev_conf.rxmode.mq_mode) {
3660 case ETH_MQ_RX_DCB_RSS:
3661 case ETH_MQ_RX_VMDQ_RSS:
3662 txgbe_rss_configure(dev);
3665 case ETH_MQ_RX_VMDQ_DCB:
3666 txgbe_vmdq_dcb_configure(dev);
3669 case ETH_MQ_RX_VMDQ_ONLY:
3670 txgbe_vmdq_rx_hw_configure(dev);
3673 case ETH_MQ_RX_NONE:
3675 /* if mq_mode is none, disable rss mode.*/
3676 txgbe_rss_disable(dev);
3680 /* SRIOV active scheme
3681 * Support RSS together with SRIOV.
3683 switch (dev->data->dev_conf.rxmode.mq_mode) {
3685 case ETH_MQ_RX_VMDQ_RSS:
3686 txgbe_config_vf_rss(dev);
3688 case ETH_MQ_RX_VMDQ_DCB:
3690 /* In SRIOV, the configuration is the same as VMDq case */
3691 txgbe_vmdq_dcb_configure(dev);
3693 /* DCB/RSS together with SRIOV is not supported */
3694 case ETH_MQ_RX_VMDQ_DCB_RSS:
3695 case ETH_MQ_RX_DCB_RSS:
3697 "Could not support DCB/RSS with VMDq & SRIOV");
3700 txgbe_config_vf_default(dev);
3709 txgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
3711 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3715 /* disable arbiter */
3716 rttdcs = rd32(hw, TXGBE_ARBTXCTL);
3717 rttdcs |= TXGBE_ARBTXCTL_DIA;
3718 wr32(hw, TXGBE_ARBTXCTL, rttdcs);
3720 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3722 * SRIOV inactive scheme
3723 * any DCB w/o VMDq multi-queue setting
3725 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
3726 txgbe_vmdq_tx_hw_configure(hw);
3728 wr32m(hw, TXGBE_PORTCTL, TXGBE_PORTCTL_NUMVT_MASK, 0);
3730 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3732 * SRIOV active scheme
3733 * FIXME if support DCB together with VMDq & SRIOV
3736 mtqc = TXGBE_PORTCTL_NUMVT_64;
3739 mtqc = TXGBE_PORTCTL_NUMVT_32;
3742 mtqc = TXGBE_PORTCTL_NUMVT_16;
3746 PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
3748 wr32m(hw, TXGBE_PORTCTL, TXGBE_PORTCTL_NUMVT_MASK, mtqc);
3751 /* re-enable arbiter */
3752 rttdcs &= ~TXGBE_ARBTXCTL_DIA;
3753 wr32(hw, TXGBE_ARBTXCTL, rttdcs);
3759 * txgbe_get_rscctl_maxdesc
3761 * @pool Memory pool of the Rx queue
3763 static inline uint32_t
3764 txgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
3766 struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
3769 RTE_IPV4_MAX_PKT_LEN /
3770 (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
3773 return TXGBE_RXCFG_RSCMAX_16;
3774 else if (maxdesc >= 8)
3775 return TXGBE_RXCFG_RSCMAX_8;
3776 else if (maxdesc >= 4)
3777 return TXGBE_RXCFG_RSCMAX_4;
3779 return TXGBE_RXCFG_RSCMAX_1;
3783 * txgbe_set_rsc - configure RSC related port HW registers
3785 * Configures the port's RSC related registers.
3789 * Returns 0 in case of success or a non-zero error code
3792 txgbe_set_rsc(struct rte_eth_dev *dev)
3794 struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
3795 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3796 struct rte_eth_dev_info dev_info = { 0 };
3797 bool rsc_capable = false;
3803 dev->dev_ops->dev_infos_get(dev, &dev_info);
3804 if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
3807 if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
3808 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
3813 /* RSC global configuration */
3815 if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
3816 (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
3817 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
3822 rfctl = rd32(hw, TXGBE_PSRCTL);
3823 if (rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
3824 rfctl &= ~TXGBE_PSRCTL_RSCDIA;
3826 rfctl |= TXGBE_PSRCTL_RSCDIA;
3827 wr32(hw, TXGBE_PSRCTL, rfctl);
3829 /* If LRO hasn't been requested - we are done here. */
3830 if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
3833 /* Set PSRCTL.RSCACK bit */
3834 rdrxctl = rd32(hw, TXGBE_PSRCTL);
3835 rdrxctl |= TXGBE_PSRCTL_RSCACK;
3836 wr32(hw, TXGBE_PSRCTL, rdrxctl);
3838 /* Per-queue RSC configuration */
3839 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3840 struct txgbe_rx_queue *rxq = dev->data->rx_queues[i];
3842 rd32(hw, TXGBE_RXCFG(rxq->reg_idx));
3844 rd32(hw, TXGBE_POOLRSS(rxq->reg_idx));
3846 rd32(hw, TXGBE_ITR(rxq->reg_idx));
3849 * txgbe PMD doesn't support header-split at the moment.
3851 srrctl &= ~TXGBE_RXCFG_HDRLEN_MASK;
3852 srrctl |= TXGBE_RXCFG_HDRLEN(128);
3855 * TODO: Consider setting the Receive Descriptor Minimum
3856 * Threshold Size for an RSC case. This is not an obviously
3857 * beneficiary option but the one worth considering...
3860 srrctl |= TXGBE_RXCFG_RSCENA;
3861 srrctl &= ~TXGBE_RXCFG_RSCMAX_MASK;
3862 srrctl |= txgbe_get_rscctl_maxdesc(rxq->mb_pool);
3863 psrtype |= TXGBE_POOLRSS_L4HDR;
3866 * RSC: Set ITR interval corresponding to 2K ints/s.
3868 * Full-sized RSC aggregations for a 10Gb/s link will
3869 * arrive at about 20K aggregation/s rate.
3871 * 2K inst/s rate will make only 10% of the
3872 * aggregations to be closed due to the interrupt timer
3873 * expiration for a streaming at wire-speed case.
3875 * For a sparse streaming case this setting will yield
3876 * at most 500us latency for a single RSC aggregation.
3878 eitr &= ~TXGBE_ITR_IVAL_MASK;
3879 eitr |= TXGBE_ITR_IVAL_10G(TXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
3880 eitr |= TXGBE_ITR_WRDSA;
3882 wr32(hw, TXGBE_RXCFG(rxq->reg_idx), srrctl);
3883 wr32(hw, TXGBE_POOLRSS(rxq->reg_idx), psrtype);
3884 wr32(hw, TXGBE_ITR(rxq->reg_idx), eitr);
3887 * RSC requires the mapping of the queue to the
3890 txgbe_set_ivar_map(hw, 0, rxq->reg_idx, i);
3895 PMD_INIT_LOG(DEBUG, "enabling LRO mode");
3901 txgbe_set_rx_function(struct rte_eth_dev *dev)
3903 struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev);
3906 * Initialize the appropriate LRO callback.
3908 * If all queues satisfy the bulk allocation preconditions
3909 * (adapter->rx_bulk_alloc_allowed is TRUE) then we may use
3910 * bulk allocation. Otherwise use a single allocation version.
3912 if (dev->data->lro) {
3913 if (adapter->rx_bulk_alloc_allowed) {
3914 PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
3915 "allocation version");
3916 dev->rx_pkt_burst = txgbe_recv_pkts_lro_bulk_alloc;
3918 PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
3919 "allocation version");
3920 dev->rx_pkt_burst = txgbe_recv_pkts_lro_single_alloc;
3922 } else if (dev->data->scattered_rx) {
3924 * Set the non-LRO scattered callback: there are bulk and
3925 * single allocation versions.
3927 if (adapter->rx_bulk_alloc_allowed) {
3928 PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
3929 "allocation callback (port=%d).",
3930 dev->data->port_id);
3931 dev->rx_pkt_burst = txgbe_recv_pkts_lro_bulk_alloc;
3933 PMD_INIT_LOG(DEBUG, "Using Regular (non-vector, "
3934 "single allocation) "
3935 "Scattered Rx callback "
3937 dev->data->port_id);
3939 dev->rx_pkt_burst = txgbe_recv_pkts_lro_single_alloc;
3942 * Below we set "simple" callbacks according to port/queues parameters.
3943 * If parameters allow we are going to choose between the following
3946 * - Single buffer allocation (the simplest one)
3948 } else if (adapter->rx_bulk_alloc_allowed) {
3949 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
3950 "satisfied. Rx Burst Bulk Alloc function "
3951 "will be used on port=%d.",
3952 dev->data->port_id);
3954 dev->rx_pkt_burst = txgbe_recv_pkts_bulk_alloc;
3956 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
3957 "satisfied, or Scattered Rx is requested "
3959 dev->data->port_id);
3961 dev->rx_pkt_burst = txgbe_recv_pkts;
3966 * Initializes Receive Unit.
3969 txgbe_dev_rx_init(struct rte_eth_dev *dev)
3971 struct txgbe_hw *hw;
3972 struct txgbe_rx_queue *rxq;
3981 struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
3984 PMD_INIT_FUNC_TRACE();
3985 hw = TXGBE_DEV_HW(dev);
3988 * Make sure receives are disabled while setting
3989 * up the RX context (registers, descriptor rings, etc.).
3991 wr32m(hw, TXGBE_MACRXCFG, TXGBE_MACRXCFG_ENA, 0);
3992 wr32m(hw, TXGBE_PBRXCTL, TXGBE_PBRXCTL_ENA, 0);
3994 /* Enable receipt of broadcasted frames */
3995 fctrl = rd32(hw, TXGBE_PSRCTL);
3996 fctrl |= TXGBE_PSRCTL_BCA;
3997 wr32(hw, TXGBE_PSRCTL, fctrl);
4000 * Configure CRC stripping, if any.
4002 hlreg0 = rd32(hw, TXGBE_SECRXCTL);
4003 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
4004 hlreg0 &= ~TXGBE_SECRXCTL_CRCSTRIP;
4006 hlreg0 |= TXGBE_SECRXCTL_CRCSTRIP;
4007 wr32(hw, TXGBE_SECRXCTL, hlreg0);
4010 * Configure jumbo frame support, if any.
4012 if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
4013 wr32m(hw, TXGBE_FRMSZ, TXGBE_FRMSZ_MAX_MASK,
4014 TXGBE_FRMSZ_MAX(rx_conf->max_rx_pkt_len));
4016 wr32m(hw, TXGBE_FRMSZ, TXGBE_FRMSZ_MAX_MASK,
4017 TXGBE_FRMSZ_MAX(TXGBE_FRAME_SIZE_DFT));
4021 * If loopback mode is configured, set LPBK bit.
4023 hlreg0 = rd32(hw, TXGBE_PSRCTL);
4024 if (hw->mac.type == txgbe_mac_raptor &&
4025 dev->data->dev_conf.lpbk_mode)
4026 hlreg0 |= TXGBE_PSRCTL_LBENA;
4028 hlreg0 &= ~TXGBE_PSRCTL_LBENA;
4030 wr32(hw, TXGBE_PSRCTL, hlreg0);
4033 * Assume no header split and no VLAN strip support
4034 * on any Rx queue first .
4036 rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
4038 /* Setup RX queues */
4039 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4040 rxq = dev->data->rx_queues[i];
4043 * Reset crc_len in case it was changed after queue setup by a
4044 * call to configure.
4046 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
4047 rxq->crc_len = RTE_ETHER_CRC_LEN;
4051 /* Setup the Base and Length of the Rx Descriptor Rings */
4052 bus_addr = rxq->rx_ring_phys_addr;
4053 wr32(hw, TXGBE_RXBAL(rxq->reg_idx),
4054 (uint32_t)(bus_addr & BIT_MASK32));
4055 wr32(hw, TXGBE_RXBAH(rxq->reg_idx),
4056 (uint32_t)(bus_addr >> 32));
4057 wr32(hw, TXGBE_RXRP(rxq->reg_idx), 0);
4058 wr32(hw, TXGBE_RXWP(rxq->reg_idx), 0);
4060 srrctl = TXGBE_RXCFG_RNGLEN(rxq->nb_rx_desc);
4062 /* Set if packets are dropped when no descriptors available */
4064 srrctl |= TXGBE_RXCFG_DROP;
4067 * Configure the RX buffer size in the PKTLEN field of
4068 * the RXCFG register of the queue.
4069 * The value is in 1 KB resolution. Valid values can be from
4072 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4073 RTE_PKTMBUF_HEADROOM);
4074 buf_size = ROUND_UP(buf_size, 0x1 << 10);
4075 srrctl |= TXGBE_RXCFG_PKTLEN(buf_size);
4077 wr32(hw, TXGBE_RXCFG(rxq->reg_idx), srrctl);
4079 /* It adds dual VLAN length for supporting dual VLAN */
4080 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4081 2 * TXGBE_VLAN_TAG_SIZE > buf_size)
4082 dev->data->scattered_rx = 1;
4083 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
4084 rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
4087 if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
4088 dev->data->scattered_rx = 1;
4091 * Device configured with multiple RX queues.
4093 txgbe_dev_mq_rx_configure(dev);
4096 * Setup the Checksum Register.
4097 * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4098 * Enable IP/L4 checksum computation by hardware if requested to do so.
4100 rxcsum = rd32(hw, TXGBE_PSRCTL);
4101 rxcsum |= TXGBE_PSRCTL_PCSD;
4102 if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
4103 rxcsum |= TXGBE_PSRCTL_L4CSUM;
4105 rxcsum &= ~TXGBE_PSRCTL_L4CSUM;
4107 wr32(hw, TXGBE_PSRCTL, rxcsum);
4109 if (hw->mac.type == txgbe_mac_raptor) {
4110 rdrxctl = rd32(hw, TXGBE_SECRXCTL);
4111 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
4112 rdrxctl &= ~TXGBE_SECRXCTL_CRCSTRIP;
4114 rdrxctl |= TXGBE_SECRXCTL_CRCSTRIP;
4115 wr32(hw, TXGBE_SECRXCTL, rdrxctl);
4118 rc = txgbe_set_rsc(dev);
4122 txgbe_set_rx_function(dev);
4128 * Initializes Transmit Unit.
4131 txgbe_dev_tx_init(struct rte_eth_dev *dev)
4133 struct txgbe_hw *hw;
4134 struct txgbe_tx_queue *txq;
4138 PMD_INIT_FUNC_TRACE();
4139 hw = TXGBE_DEV_HW(dev);
4141 /* Setup the Base and Length of the Tx Descriptor Rings */
4142 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4143 txq = dev->data->tx_queues[i];
4145 bus_addr = txq->tx_ring_phys_addr;
4146 wr32(hw, TXGBE_TXBAL(txq->reg_idx),
4147 (uint32_t)(bus_addr & BIT_MASK32));
4148 wr32(hw, TXGBE_TXBAH(txq->reg_idx),
4149 (uint32_t)(bus_addr >> 32));
4150 wr32m(hw, TXGBE_TXCFG(txq->reg_idx), TXGBE_TXCFG_BUFLEN_MASK,
4151 TXGBE_TXCFG_BUFLEN(txq->nb_tx_desc));
4152 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4153 wr32(hw, TXGBE_TXRP(txq->reg_idx), 0);
4154 wr32(hw, TXGBE_TXWP(txq->reg_idx), 0);
4157 /* Device configured with multiple TX queues. */
4158 txgbe_dev_mq_tx_configure(dev);
4162 * Set up link loopback mode Tx->Rx.
4164 static inline void __rte_cold
4165 txgbe_setup_loopback_link_raptor(struct txgbe_hw *hw)
4167 PMD_INIT_FUNC_TRACE();
4169 wr32m(hw, TXGBE_MACRXCFG, TXGBE_MACRXCFG_LB, TXGBE_MACRXCFG_LB);
4175 * Start Transmit and Receive Units.
4178 txgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4180 struct txgbe_hw *hw;
4181 struct txgbe_tx_queue *txq;
4182 struct txgbe_rx_queue *rxq;
4188 PMD_INIT_FUNC_TRACE();
4189 hw = TXGBE_DEV_HW(dev);
4191 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4192 txq = dev->data->tx_queues[i];
4193 /* Setup Transmit Threshold Registers */
4194 wr32m(hw, TXGBE_TXCFG(txq->reg_idx),
4195 TXGBE_TXCFG_HTHRESH_MASK |
4196 TXGBE_TXCFG_WTHRESH_MASK,
4197 TXGBE_TXCFG_HTHRESH(txq->hthresh) |
4198 TXGBE_TXCFG_WTHRESH(txq->wthresh));
4201 dmatxctl = rd32(hw, TXGBE_DMATXCTRL);
4202 dmatxctl |= TXGBE_DMATXCTRL_ENA;
4203 wr32(hw, TXGBE_DMATXCTRL, dmatxctl);
4205 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4206 txq = dev->data->tx_queues[i];
4207 if (!txq->tx_deferred_start) {
4208 ret = txgbe_dev_tx_queue_start(dev, i);
4214 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4215 rxq = dev->data->rx_queues[i];
4216 if (!rxq->rx_deferred_start) {
4217 ret = txgbe_dev_rx_queue_start(dev, i);
4223 /* Enable Receive engine */
4224 rxctrl = rd32(hw, TXGBE_PBRXCTL);
4225 rxctrl |= TXGBE_PBRXCTL_ENA;
4226 hw->mac.enable_rx_dma(hw, rxctrl);
4228 /* If loopback mode is enabled, set up the link accordingly */
4229 if (hw->mac.type == txgbe_mac_raptor &&
4230 dev->data->dev_conf.lpbk_mode)
4231 txgbe_setup_loopback_link_raptor(hw);
4237 txgbe_dev_save_rx_queue(struct txgbe_hw *hw, uint16_t rx_queue_id)
4239 u32 *reg = &hw->q_rx_regs[rx_queue_id * 8];
4240 *(reg++) = rd32(hw, TXGBE_RXBAL(rx_queue_id));
4241 *(reg++) = rd32(hw, TXGBE_RXBAH(rx_queue_id));
4242 *(reg++) = rd32(hw, TXGBE_RXCFG(rx_queue_id));
4246 txgbe_dev_store_rx_queue(struct txgbe_hw *hw, uint16_t rx_queue_id)
4248 u32 *reg = &hw->q_rx_regs[rx_queue_id * 8];
4249 wr32(hw, TXGBE_RXBAL(rx_queue_id), *(reg++));
4250 wr32(hw, TXGBE_RXBAH(rx_queue_id), *(reg++));
4251 wr32(hw, TXGBE_RXCFG(rx_queue_id), *(reg++) & ~TXGBE_RXCFG_ENA);
4255 txgbe_dev_save_tx_queue(struct txgbe_hw *hw, uint16_t tx_queue_id)
4257 u32 *reg = &hw->q_tx_regs[tx_queue_id * 8];
4258 *(reg++) = rd32(hw, TXGBE_TXBAL(tx_queue_id));
4259 *(reg++) = rd32(hw, TXGBE_TXBAH(tx_queue_id));
4260 *(reg++) = rd32(hw, TXGBE_TXCFG(tx_queue_id));
4264 txgbe_dev_store_tx_queue(struct txgbe_hw *hw, uint16_t tx_queue_id)
4266 u32 *reg = &hw->q_tx_regs[tx_queue_id * 8];
4267 wr32(hw, TXGBE_TXBAL(tx_queue_id), *(reg++));
4268 wr32(hw, TXGBE_TXBAH(tx_queue_id), *(reg++));
4269 wr32(hw, TXGBE_TXCFG(tx_queue_id), *(reg++) & ~TXGBE_TXCFG_ENA);
4273 * Start Receive Units for specified queue.
4276 txgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4278 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
4279 struct txgbe_rx_queue *rxq;
4283 PMD_INIT_FUNC_TRACE();
4285 rxq = dev->data->rx_queues[rx_queue_id];
4287 /* Allocate buffers for descriptor rings */
4288 if (txgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4289 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4293 rxdctl = rd32(hw, TXGBE_RXCFG(rxq->reg_idx));
4294 rxdctl |= TXGBE_RXCFG_ENA;
4295 wr32(hw, TXGBE_RXCFG(rxq->reg_idx), rxdctl);
4297 /* Wait until RX Enable ready */
4298 poll_ms = RTE_TXGBE_REGISTER_POLL_WAIT_10_MS;
4301 rxdctl = rd32(hw, TXGBE_RXCFG(rxq->reg_idx));
4302 } while (--poll_ms && !(rxdctl & TXGBE_RXCFG_ENA));
4304 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
4306 wr32(hw, TXGBE_RXRP(rxq->reg_idx), 0);
4307 wr32(hw, TXGBE_RXWP(rxq->reg_idx), rxq->nb_rx_desc - 1);
4308 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4314 * Stop Receive Units for specified queue.
4317 txgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4319 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
4320 struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev);
4321 struct txgbe_rx_queue *rxq;
4325 PMD_INIT_FUNC_TRACE();
4327 rxq = dev->data->rx_queues[rx_queue_id];
4329 txgbe_dev_save_rx_queue(hw, rxq->reg_idx);
4330 wr32m(hw, TXGBE_RXCFG(rxq->reg_idx), TXGBE_RXCFG_ENA, 0);
4332 /* Wait until RX Enable bit clear */
4333 poll_ms = RTE_TXGBE_REGISTER_POLL_WAIT_10_MS;
4336 rxdctl = rd32(hw, TXGBE_RXCFG(rxq->reg_idx));
4337 } while (--poll_ms && (rxdctl & TXGBE_RXCFG_ENA));
4339 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
4341 rte_delay_us(RTE_TXGBE_WAIT_100_US);
4342 txgbe_dev_store_rx_queue(hw, rxq->reg_idx);
4344 txgbe_rx_queue_release_mbufs(rxq);
4345 txgbe_reset_rx_queue(adapter, rxq);
4346 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4352 * Start Transmit Units for specified queue.
4355 txgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4357 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
4358 struct txgbe_tx_queue *txq;
4362 PMD_INIT_FUNC_TRACE();
4364 txq = dev->data->tx_queues[tx_queue_id];
4365 wr32m(hw, TXGBE_TXCFG(txq->reg_idx), TXGBE_TXCFG_ENA, TXGBE_TXCFG_ENA);
4367 /* Wait until TX Enable ready */
4368 poll_ms = RTE_TXGBE_REGISTER_POLL_WAIT_10_MS;
4371 txdctl = rd32(hw, TXGBE_TXCFG(txq->reg_idx));
4372 } while (--poll_ms && !(txdctl & TXGBE_TXCFG_ENA));
4374 PMD_INIT_LOG(ERR, "Could not enable "
4375 "Tx Queue %d", tx_queue_id);
4378 wr32(hw, TXGBE_TXWP(txq->reg_idx), txq->tx_tail);
4379 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4385 * Stop Transmit Units for specified queue.
4388 txgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4390 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
4391 struct txgbe_tx_queue *txq;
4393 uint32_t txtdh, txtdt;
4396 PMD_INIT_FUNC_TRACE();
4398 txq = dev->data->tx_queues[tx_queue_id];
4400 /* Wait until TX queue is empty */
4401 poll_ms = RTE_TXGBE_REGISTER_POLL_WAIT_10_MS;
4403 rte_delay_us(RTE_TXGBE_WAIT_100_US);
4404 txtdh = rd32(hw, TXGBE_TXRP(txq->reg_idx));
4405 txtdt = rd32(hw, TXGBE_TXWP(txq->reg_idx));
4406 } while (--poll_ms && (txtdh != txtdt));
4409 "Tx Queue %d is not empty when stopping.",
4412 txgbe_dev_save_tx_queue(hw, txq->reg_idx);
4413 wr32m(hw, TXGBE_TXCFG(txq->reg_idx), TXGBE_TXCFG_ENA, 0);
4415 /* Wait until TX Enable bit clear */
4416 poll_ms = RTE_TXGBE_REGISTER_POLL_WAIT_10_MS;
4419 txdctl = rd32(hw, TXGBE_TXCFG(txq->reg_idx));
4420 } while (--poll_ms && (txdctl & TXGBE_TXCFG_ENA));
4422 PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
4425 rte_delay_us(RTE_TXGBE_WAIT_100_US);
4426 txgbe_dev_store_tx_queue(hw, txq->reg_idx);
4428 if (txq->ops != NULL) {
4429 txq->ops->release_mbufs(txq);
4430 txq->ops->reset(txq);
4432 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4438 txgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4439 struct rte_eth_rxq_info *qinfo)
4441 struct txgbe_rx_queue *rxq;
4443 rxq = dev->data->rx_queues[queue_id];
4445 qinfo->mp = rxq->mb_pool;
4446 qinfo->scattered_rx = dev->data->scattered_rx;
4447 qinfo->nb_desc = rxq->nb_rx_desc;
4449 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
4450 qinfo->conf.rx_drop_en = rxq->drop_en;
4451 qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
4452 qinfo->conf.offloads = rxq->offloads;
4456 txgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4457 struct rte_eth_txq_info *qinfo)
4459 struct txgbe_tx_queue *txq;
4461 txq = dev->data->tx_queues[queue_id];
4463 qinfo->nb_desc = txq->nb_tx_desc;
4465 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
4466 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
4467 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
4469 qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
4470 qinfo->conf.offloads = txq->offloads;
4471 qinfo->conf.tx_deferred_start = txq->tx_deferred_start;