1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2015-2020
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
20 #include <rte_debug.h>
21 #include <rte_ethdev.h>
22 #include <rte_ethdev_driver.h>
23 #include <rte_memzone.h>
24 #include <rte_atomic.h>
25 #include <rte_mempool.h>
26 #include <rte_malloc.h>
28 #include <rte_ether.h>
29 #include <rte_prefetch.h>
33 #include <rte_string_fns.h>
34 #include <rte_errno.h>
38 #include "txgbe_logs.h"
39 #include "base/txgbe.h"
40 #include "txgbe_ethdev.h"
41 #include "txgbe_rxtx.h"
43 #ifdef RTE_LIBRTE_IEEE1588
44 #define TXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
46 #define TXGBE_TX_IEEE1588_TMST 0
49 /* Bit Mask to indicate what bits required for building TX context */
50 static const u64 TXGBE_TX_OFFLOAD_MASK = (PKT_TX_IP_CKSUM |
59 PKT_TX_OUTER_IP_CKSUM |
60 TXGBE_TX_IEEE1588_TMST);
62 #define TXGBE_TX_OFFLOAD_NOTSUP_MASK \
63 (PKT_TX_OFFLOAD_MASK ^ TXGBE_TX_OFFLOAD_MASK)
66 * Prefetch a cache line into all cache levels.
68 #define rte_txgbe_prefetch(p) rte_prefetch0(p)
71 txgbe_is_vf(struct rte_eth_dev *dev)
73 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
75 switch (hw->mac.type) {
76 case txgbe_mac_raptor_vf:
83 /*********************************************************************
87 **********************************************************************/
90 * Check for descriptors with their DD bit set and free mbufs.
91 * Return the total number of buffers freed.
93 static __rte_always_inline int
94 txgbe_tx_free_bufs(struct txgbe_tx_queue *txq)
96 struct txgbe_tx_entry *txep;
99 struct rte_mbuf *m, *free[RTE_TXGBE_TX_MAX_FREE_BUF_SZ];
101 /* check DD bit on threshold descriptor */
102 status = txq->tx_ring[txq->tx_next_dd].dw3;
103 if (!(status & rte_cpu_to_le_32(TXGBE_TXD_DD))) {
104 if (txq->nb_tx_free >> 1 < txq->tx_free_thresh)
105 txgbe_set32_masked(txq->tdc_reg_addr,
106 TXGBE_TXCFG_FLUSH, TXGBE_TXCFG_FLUSH);
111 * first buffer to free from S/W ring is at index
112 * tx_next_dd - (tx_free_thresh-1)
114 txep = &txq->sw_ring[txq->tx_next_dd - (txq->tx_free_thresh - 1)];
115 for (i = 0; i < txq->tx_free_thresh; ++i, ++txep) {
116 /* free buffers one at a time */
117 m = rte_pktmbuf_prefree_seg(txep->mbuf);
120 if (unlikely(m == NULL))
123 if (nb_free >= RTE_TXGBE_TX_MAX_FREE_BUF_SZ ||
124 (nb_free > 0 && m->pool != free[0]->pool)) {
125 rte_mempool_put_bulk(free[0]->pool,
126 (void **)free, nb_free);
134 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
136 /* buffers were freed, update counters */
137 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_free_thresh);
138 txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_free_thresh);
139 if (txq->tx_next_dd >= txq->nb_tx_desc)
140 txq->tx_next_dd = (uint16_t)(txq->tx_free_thresh - 1);
142 return txq->tx_free_thresh;
145 /* Populate 4 descriptors with data from 4 mbufs */
147 tx4(volatile struct txgbe_tx_desc *txdp, struct rte_mbuf **pkts)
149 uint64_t buf_dma_addr;
153 for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
154 buf_dma_addr = rte_mbuf_data_iova(*pkts);
155 pkt_len = (*pkts)->data_len;
157 /* write data to descriptor */
158 txdp->qw0 = rte_cpu_to_le_64(buf_dma_addr);
159 txdp->dw2 = cpu_to_le32(TXGBE_TXD_FLAGS |
160 TXGBE_TXD_DATLEN(pkt_len));
161 txdp->dw3 = cpu_to_le32(TXGBE_TXD_PAYLEN(pkt_len));
163 rte_prefetch0(&(*pkts)->pool);
167 /* Populate 1 descriptor with data from 1 mbuf */
169 tx1(volatile struct txgbe_tx_desc *txdp, struct rte_mbuf **pkts)
171 uint64_t buf_dma_addr;
174 buf_dma_addr = rte_mbuf_data_iova(*pkts);
175 pkt_len = (*pkts)->data_len;
177 /* write data to descriptor */
178 txdp->qw0 = cpu_to_le64(buf_dma_addr);
179 txdp->dw2 = cpu_to_le32(TXGBE_TXD_FLAGS |
180 TXGBE_TXD_DATLEN(pkt_len));
181 txdp->dw3 = cpu_to_le32(TXGBE_TXD_PAYLEN(pkt_len));
183 rte_prefetch0(&(*pkts)->pool);
187 * Fill H/W descriptor ring with mbuf data.
188 * Copy mbuf pointers to the S/W ring.
191 txgbe_tx_fill_hw_ring(struct txgbe_tx_queue *txq, struct rte_mbuf **pkts,
194 volatile struct txgbe_tx_desc *txdp = &txq->tx_ring[txq->tx_tail];
195 struct txgbe_tx_entry *txep = &txq->sw_ring[txq->tx_tail];
196 const int N_PER_LOOP = 4;
197 const int N_PER_LOOP_MASK = N_PER_LOOP - 1;
198 int mainpart, leftover;
202 * Process most of the packets in chunks of N pkts. Any
203 * leftover packets will get processed one at a time.
205 mainpart = (nb_pkts & ((uint32_t)~N_PER_LOOP_MASK));
206 leftover = (nb_pkts & ((uint32_t)N_PER_LOOP_MASK));
207 for (i = 0; i < mainpart; i += N_PER_LOOP) {
208 /* Copy N mbuf pointers to the S/W ring */
209 for (j = 0; j < N_PER_LOOP; ++j)
210 (txep + i + j)->mbuf = *(pkts + i + j);
211 tx4(txdp + i, pkts + i);
214 if (unlikely(leftover > 0)) {
215 for (i = 0; i < leftover; ++i) {
216 (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
217 tx1(txdp + mainpart + i, pkts + mainpart + i);
222 static inline uint16_t
223 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
226 struct txgbe_tx_queue *txq = (struct txgbe_tx_queue *)tx_queue;
230 * Begin scanning the H/W ring for done descriptors when the
231 * number of available descriptors drops below tx_free_thresh. For
232 * each done descriptor, free the associated buffer.
234 if (txq->nb_tx_free < txq->tx_free_thresh)
235 txgbe_tx_free_bufs(txq);
237 /* Only use descriptors that are available */
238 nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
239 if (unlikely(nb_pkts == 0))
242 /* Use exactly nb_pkts descriptors */
243 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
246 * At this point, we know there are enough descriptors in the
247 * ring to transmit all the packets. This assumes that each
248 * mbuf contains a single segment, and that no new offloads
249 * are expected, which would require a new context descriptor.
253 * See if we're going to wrap-around. If so, handle the top
254 * of the descriptor ring first, then do the bottom. If not,
255 * the processing looks just like the "bottom" part anyway...
257 if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
258 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
259 txgbe_tx_fill_hw_ring(txq, tx_pkts, n);
263 /* Fill H/W descriptor ring with mbuf data */
264 txgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
265 txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
268 * Check for wrap-around. This would only happen if we used
269 * up to the last descriptor in the ring, no more, no less.
271 if (txq->tx_tail >= txq->nb_tx_desc)
274 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
275 (uint16_t)txq->port_id, (uint16_t)txq->queue_id,
276 (uint16_t)txq->tx_tail, (uint16_t)nb_pkts);
278 /* update tail pointer */
280 txgbe_set32_relaxed(txq->tdt_reg_addr, txq->tx_tail);
286 txgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
291 /* Try to transmit at least chunks of TX_MAX_BURST pkts */
292 if (likely(nb_pkts <= RTE_PMD_TXGBE_TX_MAX_BURST))
293 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
295 /* transmit more than the max burst, in chunks of TX_MAX_BURST */
300 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_TXGBE_TX_MAX_BURST);
301 ret = tx_xmit_pkts(tx_queue, &tx_pkts[nb_tx], n);
302 nb_tx = (uint16_t)(nb_tx + ret);
303 nb_pkts = (uint16_t)(nb_pkts - ret);
312 txgbe_set_xmit_ctx(struct txgbe_tx_queue *txq,
313 volatile struct txgbe_tx_ctx_desc *ctx_txd,
314 uint64_t ol_flags, union txgbe_tx_offload tx_offload)
316 union txgbe_tx_offload tx_offload_mask;
317 uint32_t type_tucmd_mlhl;
318 uint32_t mss_l4len_idx;
320 uint32_t vlan_macip_lens;
321 uint32_t tunnel_seed;
323 ctx_idx = txq->ctx_curr;
324 tx_offload_mask.data[0] = 0;
325 tx_offload_mask.data[1] = 0;
327 /* Specify which HW CTX to upload. */
328 mss_l4len_idx = TXGBE_TXD_IDX(ctx_idx);
329 type_tucmd_mlhl = TXGBE_TXD_CTXT;
331 tx_offload_mask.ptid |= ~0;
332 type_tucmd_mlhl |= TXGBE_TXD_PTID(tx_offload.ptid);
334 /* check if TCP segmentation required for this packet */
335 if (ol_flags & PKT_TX_TCP_SEG) {
336 tx_offload_mask.l2_len |= ~0;
337 tx_offload_mask.l3_len |= ~0;
338 tx_offload_mask.l4_len |= ~0;
339 tx_offload_mask.tso_segsz |= ~0;
340 mss_l4len_idx |= TXGBE_TXD_MSS(tx_offload.tso_segsz);
341 mss_l4len_idx |= TXGBE_TXD_L4LEN(tx_offload.l4_len);
342 } else { /* no TSO, check if hardware checksum is needed */
343 if (ol_flags & PKT_TX_IP_CKSUM) {
344 tx_offload_mask.l2_len |= ~0;
345 tx_offload_mask.l3_len |= ~0;
348 switch (ol_flags & PKT_TX_L4_MASK) {
349 case PKT_TX_UDP_CKSUM:
351 TXGBE_TXD_L4LEN(sizeof(struct rte_udp_hdr));
352 tx_offload_mask.l2_len |= ~0;
353 tx_offload_mask.l3_len |= ~0;
355 case PKT_TX_TCP_CKSUM:
357 TXGBE_TXD_L4LEN(sizeof(struct rte_tcp_hdr));
358 tx_offload_mask.l2_len |= ~0;
359 tx_offload_mask.l3_len |= ~0;
361 case PKT_TX_SCTP_CKSUM:
363 TXGBE_TXD_L4LEN(sizeof(struct rte_sctp_hdr));
364 tx_offload_mask.l2_len |= ~0;
365 tx_offload_mask.l3_len |= ~0;
372 vlan_macip_lens = TXGBE_TXD_IPLEN(tx_offload.l3_len >> 1);
374 if (ol_flags & PKT_TX_TUNNEL_MASK) {
375 tx_offload_mask.outer_tun_len |= ~0;
376 tx_offload_mask.outer_l2_len |= ~0;
377 tx_offload_mask.outer_l3_len |= ~0;
378 tx_offload_mask.l2_len |= ~0;
379 tunnel_seed = TXGBE_TXD_ETUNLEN(tx_offload.outer_tun_len >> 1);
380 tunnel_seed |= TXGBE_TXD_EIPLEN(tx_offload.outer_l3_len >> 2);
382 switch (ol_flags & PKT_TX_TUNNEL_MASK) {
383 case PKT_TX_TUNNEL_IPIP:
384 /* for non UDP / GRE tunneling, set to 0b */
386 case PKT_TX_TUNNEL_VXLAN:
387 case PKT_TX_TUNNEL_GENEVE:
388 tunnel_seed |= TXGBE_TXD_ETYPE_UDP;
390 case PKT_TX_TUNNEL_GRE:
391 tunnel_seed |= TXGBE_TXD_ETYPE_GRE;
394 PMD_TX_LOG(ERR, "Tunnel type not supported");
397 vlan_macip_lens |= TXGBE_TXD_MACLEN(tx_offload.outer_l2_len);
400 vlan_macip_lens |= TXGBE_TXD_MACLEN(tx_offload.l2_len);
403 if (ol_flags & PKT_TX_VLAN_PKT) {
404 tx_offload_mask.vlan_tci |= ~0;
405 vlan_macip_lens |= TXGBE_TXD_VLAN(tx_offload.vlan_tci);
408 txq->ctx_cache[ctx_idx].flags = ol_flags;
409 txq->ctx_cache[ctx_idx].tx_offload.data[0] =
410 tx_offload_mask.data[0] & tx_offload.data[0];
411 txq->ctx_cache[ctx_idx].tx_offload.data[1] =
412 tx_offload_mask.data[1] & tx_offload.data[1];
413 txq->ctx_cache[ctx_idx].tx_offload_mask = tx_offload_mask;
415 ctx_txd->dw0 = rte_cpu_to_le_32(vlan_macip_lens);
416 ctx_txd->dw1 = rte_cpu_to_le_32(tunnel_seed);
417 ctx_txd->dw2 = rte_cpu_to_le_32(type_tucmd_mlhl);
418 ctx_txd->dw3 = rte_cpu_to_le_32(mss_l4len_idx);
422 * Check which hardware context can be used. Use the existing match
423 * or create a new context descriptor.
425 static inline uint32_t
426 what_ctx_update(struct txgbe_tx_queue *txq, uint64_t flags,
427 union txgbe_tx_offload tx_offload)
429 /* If match with the current used context */
430 if (likely(txq->ctx_cache[txq->ctx_curr].flags == flags &&
431 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
432 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
433 & tx_offload.data[0])) &&
434 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
435 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
436 & tx_offload.data[1]))))
437 return txq->ctx_curr;
439 /* What if match with the next context */
441 if (likely(txq->ctx_cache[txq->ctx_curr].flags == flags &&
442 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
443 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
444 & tx_offload.data[0])) &&
445 (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
446 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
447 & tx_offload.data[1]))))
448 return txq->ctx_curr;
450 /* Mismatch, use the previous context */
451 return TXGBE_CTX_NUM;
454 static inline uint32_t
455 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
459 if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM) {
461 tmp |= TXGBE_TXD_L4CS;
463 if (ol_flags & PKT_TX_IP_CKSUM) {
465 tmp |= TXGBE_TXD_IPCS;
467 if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
469 tmp |= TXGBE_TXD_EIPCS;
471 if (ol_flags & PKT_TX_TCP_SEG) {
473 /* implies IPv4 cksum */
474 if (ol_flags & PKT_TX_IPV4)
475 tmp |= TXGBE_TXD_IPCS;
476 tmp |= TXGBE_TXD_L4CS;
478 if (ol_flags & PKT_TX_VLAN_PKT)
484 static inline uint32_t
485 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
487 uint32_t cmdtype = 0;
489 if (ol_flags & PKT_TX_VLAN_PKT)
490 cmdtype |= TXGBE_TXD_VLE;
491 if (ol_flags & PKT_TX_TCP_SEG)
492 cmdtype |= TXGBE_TXD_TSE;
493 if (ol_flags & PKT_TX_MACSEC)
494 cmdtype |= TXGBE_TXD_LINKSEC;
498 static inline uint8_t
499 tx_desc_ol_flags_to_ptid(uint64_t oflags, uint32_t ptype)
504 return txgbe_encode_ptype(ptype);
506 /* Only support flags in TXGBE_TX_OFFLOAD_MASK */
507 tun = !!(oflags & PKT_TX_TUNNEL_MASK);
510 ptype = RTE_PTYPE_L2_ETHER;
511 if (oflags & PKT_TX_VLAN)
512 ptype |= RTE_PTYPE_L2_ETHER_VLAN;
515 if (oflags & (PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IP_CKSUM))
516 ptype |= RTE_PTYPE_L3_IPV4;
517 else if (oflags & (PKT_TX_OUTER_IPV6))
518 ptype |= RTE_PTYPE_L3_IPV6;
520 if (oflags & (PKT_TX_IPV4 | PKT_TX_IP_CKSUM))
521 ptype |= (tun ? RTE_PTYPE_INNER_L3_IPV4 : RTE_PTYPE_L3_IPV4);
522 else if (oflags & (PKT_TX_IPV6))
523 ptype |= (tun ? RTE_PTYPE_INNER_L3_IPV6 : RTE_PTYPE_L3_IPV6);
526 switch (oflags & (PKT_TX_L4_MASK)) {
527 case PKT_TX_TCP_CKSUM:
528 ptype |= (tun ? RTE_PTYPE_INNER_L4_TCP : RTE_PTYPE_L4_TCP);
530 case PKT_TX_UDP_CKSUM:
531 ptype |= (tun ? RTE_PTYPE_INNER_L4_UDP : RTE_PTYPE_L4_UDP);
533 case PKT_TX_SCTP_CKSUM:
534 ptype |= (tun ? RTE_PTYPE_INNER_L4_SCTP : RTE_PTYPE_L4_SCTP);
538 if (oflags & PKT_TX_TCP_SEG)
539 ptype |= (tun ? RTE_PTYPE_INNER_L4_TCP : RTE_PTYPE_L4_TCP);
542 switch (oflags & PKT_TX_TUNNEL_MASK) {
543 case PKT_TX_TUNNEL_VXLAN:
544 ptype |= RTE_PTYPE_L2_ETHER |
546 RTE_PTYPE_TUNNEL_VXLAN;
547 ptype |= RTE_PTYPE_INNER_L2_ETHER;
549 case PKT_TX_TUNNEL_GRE:
550 ptype |= RTE_PTYPE_L2_ETHER |
552 RTE_PTYPE_TUNNEL_GRE;
553 ptype |= RTE_PTYPE_INNER_L2_ETHER;
555 case PKT_TX_TUNNEL_GENEVE:
556 ptype |= RTE_PTYPE_L2_ETHER |
558 RTE_PTYPE_TUNNEL_GENEVE;
559 ptype |= RTE_PTYPE_INNER_L2_ETHER;
561 case PKT_TX_TUNNEL_VXLAN_GPE:
562 ptype |= RTE_PTYPE_L2_ETHER |
564 RTE_PTYPE_TUNNEL_VXLAN_GPE;
565 ptype |= RTE_PTYPE_INNER_L2_ETHER;
567 case PKT_TX_TUNNEL_IPIP:
568 case PKT_TX_TUNNEL_IP:
569 ptype |= RTE_PTYPE_L2_ETHER |
575 return txgbe_encode_ptype(ptype);
578 #ifndef DEFAULT_TX_FREE_THRESH
579 #define DEFAULT_TX_FREE_THRESH 32
582 /* Reset transmit descriptors after they have been used */
584 txgbe_xmit_cleanup(struct txgbe_tx_queue *txq)
586 struct txgbe_tx_entry *sw_ring = txq->sw_ring;
587 volatile struct txgbe_tx_desc *txr = txq->tx_ring;
588 uint16_t last_desc_cleaned = txq->last_desc_cleaned;
589 uint16_t nb_tx_desc = txq->nb_tx_desc;
590 uint16_t desc_to_clean_to;
591 uint16_t nb_tx_to_clean;
594 /* Determine the last descriptor needing to be cleaned */
595 desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_free_thresh);
596 if (desc_to_clean_to >= nb_tx_desc)
597 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
599 /* Check to make sure the last descriptor to clean is done */
600 desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
601 status = txr[desc_to_clean_to].dw3;
602 if (!(status & rte_cpu_to_le_32(TXGBE_TXD_DD))) {
603 PMD_TX_FREE_LOG(DEBUG,
604 "TX descriptor %4u is not done"
605 "(port=%d queue=%d)",
607 txq->port_id, txq->queue_id);
608 if (txq->nb_tx_free >> 1 < txq->tx_free_thresh)
609 txgbe_set32_masked(txq->tdc_reg_addr,
610 TXGBE_TXCFG_FLUSH, TXGBE_TXCFG_FLUSH);
611 /* Failed to clean any descriptors, better luck next time */
615 /* Figure out how many descriptors will be cleaned */
616 if (last_desc_cleaned > desc_to_clean_to)
617 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
620 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
623 PMD_TX_FREE_LOG(DEBUG,
624 "Cleaning %4u TX descriptors: %4u to %4u "
625 "(port=%d queue=%d)",
626 nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
627 txq->port_id, txq->queue_id);
630 * The last descriptor to clean is done, so that means all the
631 * descriptors from the last descriptor that was cleaned
632 * up to the last descriptor with the RS bit set
633 * are done. Only reset the threshold descriptor.
635 txr[desc_to_clean_to].dw3 = 0;
637 /* Update the txq to reflect the last descriptor that was cleaned */
638 txq->last_desc_cleaned = desc_to_clean_to;
639 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
645 static inline uint8_t
646 txgbe_get_tun_len(struct rte_mbuf *mbuf)
648 struct txgbe_genevehdr genevehdr;
649 const struct txgbe_genevehdr *gh;
652 switch (mbuf->ol_flags & PKT_TX_TUNNEL_MASK) {
653 case PKT_TX_TUNNEL_IPIP:
656 case PKT_TX_TUNNEL_VXLAN:
657 case PKT_TX_TUNNEL_VXLAN_GPE:
658 tun_len = sizeof(struct txgbe_udphdr)
659 + sizeof(struct txgbe_vxlanhdr);
661 case PKT_TX_TUNNEL_GRE:
662 tun_len = sizeof(struct txgbe_nvgrehdr);
664 case PKT_TX_TUNNEL_GENEVE:
665 gh = rte_pktmbuf_read(mbuf,
666 mbuf->outer_l2_len + mbuf->outer_l3_len,
667 sizeof(genevehdr), &genevehdr);
668 tun_len = sizeof(struct txgbe_udphdr)
669 + sizeof(struct txgbe_genevehdr)
670 + (gh->opt_len << 2);
680 txgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
683 struct txgbe_tx_queue *txq;
684 struct txgbe_tx_entry *sw_ring;
685 struct txgbe_tx_entry *txe, *txn;
686 volatile struct txgbe_tx_desc *txr;
687 volatile struct txgbe_tx_desc *txd;
688 struct rte_mbuf *tx_pkt;
689 struct rte_mbuf *m_seg;
690 uint64_t buf_dma_addr;
691 uint32_t olinfo_status;
692 uint32_t cmd_type_len;
703 union txgbe_tx_offload tx_offload;
705 tx_offload.data[0] = 0;
706 tx_offload.data[1] = 0;
708 sw_ring = txq->sw_ring;
710 tx_id = txq->tx_tail;
711 txe = &sw_ring[tx_id];
713 /* Determine if the descriptor ring needs to be cleaned. */
714 if (txq->nb_tx_free < txq->tx_free_thresh)
715 txgbe_xmit_cleanup(txq);
717 rte_prefetch0(&txe->mbuf->pool);
720 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
723 pkt_len = tx_pkt->pkt_len;
726 * Determine how many (if any) context descriptors
727 * are needed for offload functionality.
729 ol_flags = tx_pkt->ol_flags;
731 /* If hardware offload required */
732 tx_ol_req = ol_flags & TXGBE_TX_OFFLOAD_MASK;
734 tx_offload.ptid = tx_desc_ol_flags_to_ptid(tx_ol_req,
735 tx_pkt->packet_type);
736 tx_offload.l2_len = tx_pkt->l2_len;
737 tx_offload.l3_len = tx_pkt->l3_len;
738 tx_offload.l4_len = tx_pkt->l4_len;
739 tx_offload.vlan_tci = tx_pkt->vlan_tci;
740 tx_offload.tso_segsz = tx_pkt->tso_segsz;
741 tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
742 tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
743 tx_offload.outer_tun_len = txgbe_get_tun_len(tx_pkt);
745 /* If new context need be built or reuse the exist ctx*/
746 ctx = what_ctx_update(txq, tx_ol_req, tx_offload);
747 /* Only allocate context descriptor if required */
748 new_ctx = (ctx == TXGBE_CTX_NUM);
753 * Keep track of how many descriptors are used this loop
754 * This will always be the number of segments + the number of
755 * Context descriptors required to transmit the packet
757 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
760 * The number of descriptors that must be allocated for a
761 * packet is the number of segments of that packet, plus 1
762 * Context Descriptor for the hardware offload, if any.
763 * Determine the last TX descriptor to allocate in the TX ring
764 * for the packet, starting from the current position (tx_id)
767 tx_last = (uint16_t)(tx_id + nb_used - 1);
770 if (tx_last >= txq->nb_tx_desc)
771 tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
773 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
774 " tx_first=%u tx_last=%u",
775 (uint16_t)txq->port_id,
776 (uint16_t)txq->queue_id,
782 * Make sure there are enough TX descriptors available to
783 * transmit the entire packet.
784 * nb_used better be less than or equal to txq->tx_free_thresh
786 if (nb_used > txq->nb_tx_free) {
787 PMD_TX_FREE_LOG(DEBUG,
788 "Not enough free TX descriptors "
789 "nb_used=%4u nb_free=%4u "
790 "(port=%d queue=%d)",
791 nb_used, txq->nb_tx_free,
792 txq->port_id, txq->queue_id);
794 if (txgbe_xmit_cleanup(txq) != 0) {
795 /* Could not clean any descriptors */
801 /* nb_used better be <= txq->tx_free_thresh */
802 if (unlikely(nb_used > txq->tx_free_thresh)) {
803 PMD_TX_FREE_LOG(DEBUG,
804 "The number of descriptors needed to "
805 "transmit the packet exceeds the "
806 "RS bit threshold. This will impact "
808 "nb_used=%4u nb_free=%4u "
809 "tx_free_thresh=%4u. "
810 "(port=%d queue=%d)",
811 nb_used, txq->nb_tx_free,
813 txq->port_id, txq->queue_id);
815 * Loop here until there are enough TX
816 * descriptors or until the ring cannot be
819 while (nb_used > txq->nb_tx_free) {
820 if (txgbe_xmit_cleanup(txq) != 0) {
822 * Could not clean any
834 * By now there are enough free TX descriptors to transmit
839 * Set common flags of all TX Data Descriptors.
841 * The following bits must be set in all Data Descriptors:
842 * - TXGBE_TXD_DTYP_DATA
843 * - TXGBE_TXD_DCMD_DEXT
845 * The following bits must be set in the first Data Descriptor
846 * and are ignored in the other ones:
847 * - TXGBE_TXD_DCMD_IFCS
848 * - TXGBE_TXD_MAC_1588
849 * - TXGBE_TXD_DCMD_VLE
851 * The following bits must only be set in the last Data
853 * - TXGBE_TXD_CMD_EOP
855 * The following bits can be set in any Data Descriptor, but
856 * are only set in the last Data Descriptor:
859 cmd_type_len = TXGBE_TXD_FCS;
861 #ifdef RTE_LIBRTE_IEEE1588
862 if (ol_flags & PKT_TX_IEEE1588_TMST)
863 cmd_type_len |= TXGBE_TXD_1588;
868 if (ol_flags & PKT_TX_TCP_SEG) {
869 /* when TSO is on, paylen in descriptor is the
870 * not the packet len but the tcp payload len
872 pkt_len -= (tx_offload.l2_len +
873 tx_offload.l3_len + tx_offload.l4_len);
875 (tx_pkt->ol_flags & PKT_TX_TUNNEL_MASK)
876 ? tx_offload.outer_l2_len +
877 tx_offload.outer_l3_len : 0;
881 * Setup the TX Advanced Context Descriptor if required
884 volatile struct txgbe_tx_ctx_desc *ctx_txd;
886 ctx_txd = (volatile struct txgbe_tx_ctx_desc *)
889 txn = &sw_ring[txe->next_id];
890 rte_prefetch0(&txn->mbuf->pool);
892 if (txe->mbuf != NULL) {
893 rte_pktmbuf_free_seg(txe->mbuf);
897 txgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
900 txe->last_id = tx_last;
901 tx_id = txe->next_id;
906 * Setup the TX Advanced Data Descriptor,
907 * This path will go through
908 * whatever new/reuse the context descriptor
910 cmd_type_len |= tx_desc_ol_flags_to_cmdtype(ol_flags);
912 tx_desc_cksum_flags_to_olinfo(ol_flags);
913 olinfo_status |= TXGBE_TXD_IDX(ctx);
916 olinfo_status |= TXGBE_TXD_PAYLEN(pkt_len);
921 txn = &sw_ring[txe->next_id];
922 rte_prefetch0(&txn->mbuf->pool);
924 if (txe->mbuf != NULL)
925 rte_pktmbuf_free_seg(txe->mbuf);
929 * Set up Transmit Data Descriptor.
931 slen = m_seg->data_len;
932 buf_dma_addr = rte_mbuf_data_iova(m_seg);
933 txd->qw0 = rte_cpu_to_le_64(buf_dma_addr);
934 txd->dw2 = rte_cpu_to_le_32(cmd_type_len | slen);
935 txd->dw3 = rte_cpu_to_le_32(olinfo_status);
936 txe->last_id = tx_last;
937 tx_id = txe->next_id;
940 } while (m_seg != NULL);
943 * The last packet data descriptor needs End Of Packet (EOP)
945 cmd_type_len |= TXGBE_TXD_EOP;
946 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
948 txd->dw2 |= rte_cpu_to_le_32(cmd_type_len);
956 * Set the Transmit Descriptor Tail (TDT)
958 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
959 (uint16_t)txq->port_id, (uint16_t)txq->queue_id,
960 (uint16_t)tx_id, (uint16_t)nb_tx);
961 txgbe_set32_relaxed(txq->tdt_reg_addr, tx_id);
962 txq->tx_tail = tx_id;
967 /*********************************************************************
971 **********************************************************************/
973 txgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
978 struct txgbe_tx_queue *txq = (struct txgbe_tx_queue *)tx_queue;
980 for (i = 0; i < nb_pkts; i++) {
982 ol_flags = m->ol_flags;
985 * Check if packet meets requirements for number of segments
987 * NOTE: for txgbe it's always (40 - WTHRESH) for both TSO and
991 if (m->nb_segs > TXGBE_TX_MAX_SEG - txq->wthresh) {
996 if (ol_flags & TXGBE_TX_OFFLOAD_NOTSUP_MASK) {
997 rte_errno = -ENOTSUP;
1001 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1002 ret = rte_validate_tx_offload(m);
1008 ret = rte_net_intel_cksum_prepare(m);
1018 /*********************************************************************
1022 **********************************************************************/
1023 /* @note: fix txgbe_dev_supported_ptypes_get() if any change here. */
1024 static inline uint32_t
1025 txgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptid_mask)
1027 uint16_t ptid = TXGBE_RXD_PTID(pkt_info);
1031 return txgbe_decode_ptype(ptid);
1034 static inline uint64_t
1035 txgbe_rxd_pkt_info_to_pkt_flags(uint32_t pkt_info)
1037 static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1038 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1039 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1040 PKT_RX_RSS_HASH, 0, 0, 0,
1041 0, 0, 0, PKT_RX_FDIR,
1043 #ifdef RTE_LIBRTE_IEEE1588
1044 static uint64_t ip_pkt_etqf_map[8] = {
1045 0, 0, 0, PKT_RX_IEEE1588_PTP,
1048 int etfid = txgbe_etflt_id(TXGBE_RXD_PTID(pkt_info));
1049 if (likely(-1 != etfid))
1050 return ip_pkt_etqf_map[etfid] |
1051 ip_rss_types_map[TXGBE_RXD_RSSTYPE(pkt_info)];
1053 return ip_rss_types_map[TXGBE_RXD_RSSTYPE(pkt_info)];
1055 return ip_rss_types_map[TXGBE_RXD_RSSTYPE(pkt_info)];
1059 static inline uint64_t
1060 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1065 * Check if VLAN present only.
1066 * Do not check whether L3/L4 rx checksum done by NIC or not,
1067 * That can be found from rte_eth_rxmode.offloads flag
1069 pkt_flags = (rx_status & TXGBE_RXD_STAT_VLAN &&
1070 vlan_flags & PKT_RX_VLAN_STRIPPED)
1073 #ifdef RTE_LIBRTE_IEEE1588
1074 if (rx_status & TXGBE_RXD_STAT_1588)
1075 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1080 static inline uint64_t
1081 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1083 uint64_t pkt_flags = 0;
1085 /* checksum offload can't be disabled */
1086 if (rx_status & TXGBE_RXD_STAT_IPCS) {
1087 pkt_flags |= (rx_status & TXGBE_RXD_ERR_IPCS
1088 ? PKT_RX_IP_CKSUM_BAD : PKT_RX_IP_CKSUM_GOOD);
1091 if (rx_status & TXGBE_RXD_STAT_L4CS) {
1092 pkt_flags |= (rx_status & TXGBE_RXD_ERR_L4CS
1093 ? PKT_RX_L4_CKSUM_BAD : PKT_RX_L4_CKSUM_GOOD);
1096 if (rx_status & TXGBE_RXD_STAT_EIPCS &&
1097 rx_status & TXGBE_RXD_ERR_EIPCS) {
1098 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1105 * LOOK_AHEAD defines how many desc statuses to check beyond the
1106 * current descriptor.
1107 * It must be a pound define for optimal performance.
1108 * Do not change the value of LOOK_AHEAD, as the txgbe_rx_scan_hw_ring
1109 * function only works with LOOK_AHEAD=8.
1111 #define LOOK_AHEAD 8
1112 #if (LOOK_AHEAD != 8)
1113 #error "PMD TXGBE: LOOK_AHEAD must be 8\n"
1116 txgbe_rx_scan_hw_ring(struct txgbe_rx_queue *rxq)
1118 volatile struct txgbe_rx_desc *rxdp;
1119 struct txgbe_rx_entry *rxep;
1120 struct rte_mbuf *mb;
1124 uint32_t s[LOOK_AHEAD];
1125 uint32_t pkt_info[LOOK_AHEAD];
1126 int i, j, nb_rx = 0;
1129 /* get references to current descriptor and S/W ring entry */
1130 rxdp = &rxq->rx_ring[rxq->rx_tail];
1131 rxep = &rxq->sw_ring[rxq->rx_tail];
1133 status = rxdp->qw1.lo.status;
1134 /* check to make sure there is at least 1 packet to receive */
1135 if (!(status & rte_cpu_to_le_32(TXGBE_RXD_STAT_DD)))
1139 * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1140 * reference packets that are ready to be received.
1142 for (i = 0; i < RTE_PMD_TXGBE_RX_MAX_BURST;
1143 i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1144 /* Read desc statuses backwards to avoid race condition */
1145 for (j = 0; j < LOOK_AHEAD; j++)
1146 s[j] = rte_le_to_cpu_32(rxdp[j].qw1.lo.status);
1150 /* Compute how many status bits were set */
1151 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1152 (s[nb_dd] & TXGBE_RXD_STAT_DD); nb_dd++)
1155 for (j = 0; j < nb_dd; j++)
1156 pkt_info[j] = rte_le_to_cpu_32(rxdp[j].qw0.dw0);
1160 /* Translate descriptor info to mbuf format */
1161 for (j = 0; j < nb_dd; ++j) {
1163 pkt_len = rte_le_to_cpu_16(rxdp[j].qw1.hi.len) -
1165 mb->data_len = pkt_len;
1166 mb->pkt_len = pkt_len;
1167 mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].qw1.hi.tag);
1169 /* convert descriptor fields to rte mbuf flags */
1170 pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1172 pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1174 txgbe_rxd_pkt_info_to_pkt_flags(pkt_info[j]);
1175 mb->ol_flags = pkt_flags;
1177 txgbe_rxd_pkt_info_to_pkt_type(pkt_info[j],
1178 rxq->pkt_type_mask);
1180 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1182 rte_le_to_cpu_32(rxdp[j].qw0.dw1);
1183 else if (pkt_flags & PKT_RX_FDIR) {
1184 mb->hash.fdir.hash =
1185 rte_le_to_cpu_16(rxdp[j].qw0.hi.csum) &
1186 TXGBE_ATR_HASH_MASK;
1188 rte_le_to_cpu_16(rxdp[j].qw0.hi.ipid);
1192 /* Move mbuf pointers from the S/W ring to the stage */
1193 for (j = 0; j < LOOK_AHEAD; ++j)
1194 rxq->rx_stage[i + j] = rxep[j].mbuf;
1196 /* stop if all requested packets could not be received */
1197 if (nb_dd != LOOK_AHEAD)
1201 /* clear software ring entries so we can cleanup correctly */
1202 for (i = 0; i < nb_rx; ++i)
1203 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1209 txgbe_rx_alloc_bufs(struct txgbe_rx_queue *rxq, bool reset_mbuf)
1211 volatile struct txgbe_rx_desc *rxdp;
1212 struct txgbe_rx_entry *rxep;
1213 struct rte_mbuf *mb;
1218 /* allocate buffers in bulk directly into the S/W ring */
1219 alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1220 rxep = &rxq->sw_ring[alloc_idx];
1221 diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1222 rxq->rx_free_thresh);
1223 if (unlikely(diag != 0))
1226 rxdp = &rxq->rx_ring[alloc_idx];
1227 for (i = 0; i < rxq->rx_free_thresh; ++i) {
1228 /* populate the static rte mbuf fields */
1231 mb->port = rxq->port_id;
1233 rte_mbuf_refcnt_set(mb, 1);
1234 mb->data_off = RTE_PKTMBUF_HEADROOM;
1236 /* populate the descriptors */
1237 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1238 TXGBE_RXD_HDRADDR(&rxdp[i], 0);
1239 TXGBE_RXD_PKTADDR(&rxdp[i], dma_addr);
1242 /* update state of internal queue structure */
1243 rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1244 if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1245 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1251 static inline uint16_t
1252 txgbe_rx_fill_from_stage(struct txgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1255 struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1258 /* how many packets are ready to return? */
1259 nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1261 /* copy mbuf pointers to the application's packet list */
1262 for (i = 0; i < nb_pkts; ++i)
1263 rx_pkts[i] = stage[i];
1265 /* update internal queue state */
1266 rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1267 rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1272 static inline uint16_t
1273 txgbe_rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1276 struct txgbe_rx_queue *rxq = (struct txgbe_rx_queue *)rx_queue;
1277 struct rte_eth_dev *dev = &rte_eth_devices[rxq->port_id];
1280 /* Any previously recv'd pkts will be returned from the Rx stage */
1281 if (rxq->rx_nb_avail)
1282 return txgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1284 /* Scan the H/W ring for packets to receive */
1285 nb_rx = (uint16_t)txgbe_rx_scan_hw_ring(rxq);
1287 /* update internal queue state */
1288 rxq->rx_next_avail = 0;
1289 rxq->rx_nb_avail = nb_rx;
1290 rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1292 /* if required, allocate new buffers to replenish descriptors */
1293 if (rxq->rx_tail > rxq->rx_free_trigger) {
1294 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1296 if (txgbe_rx_alloc_bufs(rxq, true) != 0) {
1299 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1300 "queue_id=%u", (uint16_t)rxq->port_id,
1301 (uint16_t)rxq->queue_id);
1303 dev->data->rx_mbuf_alloc_failed +=
1304 rxq->rx_free_thresh;
1307 * Need to rewind any previous receives if we cannot
1308 * allocate new buffers to replenish the old ones.
1310 rxq->rx_nb_avail = 0;
1311 rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1312 for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1313 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1318 /* update tail pointer */
1320 txgbe_set32_relaxed(rxq->rdt_reg_addr, cur_free_trigger);
1323 if (rxq->rx_tail >= rxq->nb_rx_desc)
1326 /* received any packets this loop? */
1327 if (rxq->rx_nb_avail)
1328 return txgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1333 /* split requests into chunks of size RTE_PMD_TXGBE_RX_MAX_BURST */
1335 txgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1340 if (unlikely(nb_pkts == 0))
1343 if (likely(nb_pkts <= RTE_PMD_TXGBE_RX_MAX_BURST))
1344 return txgbe_rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1346 /* request is relatively large, chunk it up */
1351 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_TXGBE_RX_MAX_BURST);
1352 ret = txgbe_rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1353 nb_rx = (uint16_t)(nb_rx + ret);
1354 nb_pkts = (uint16_t)(nb_pkts - ret);
1363 txgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1366 struct txgbe_rx_queue *rxq;
1367 volatile struct txgbe_rx_desc *rx_ring;
1368 volatile struct txgbe_rx_desc *rxdp;
1369 struct txgbe_rx_entry *sw_ring;
1370 struct txgbe_rx_entry *rxe;
1371 struct rte_mbuf *rxm;
1372 struct rte_mbuf *nmb;
1373 struct txgbe_rx_desc rxd;
1386 rx_id = rxq->rx_tail;
1387 rx_ring = rxq->rx_ring;
1388 sw_ring = rxq->sw_ring;
1389 struct rte_eth_dev *dev = &rte_eth_devices[rxq->port_id];
1390 while (nb_rx < nb_pkts) {
1392 * The order of operations here is important as the DD status
1393 * bit must not be read after any other descriptor fields.
1394 * rx_ring and rxdp are pointing to volatile data so the order
1395 * of accesses cannot be reordered by the compiler. If they were
1396 * not volatile, they could be reordered which could lead to
1397 * using invalid descriptor fields when read from rxd.
1399 rxdp = &rx_ring[rx_id];
1400 staterr = rxdp->qw1.lo.status;
1401 if (!(staterr & rte_cpu_to_le_32(TXGBE_RXD_STAT_DD)))
1408 * If the TXGBE_RXD_STAT_EOP flag is not set, the RX packet
1409 * is likely to be invalid and to be dropped by the various
1410 * validation checks performed by the network stack.
1412 * Allocate a new mbuf to replenish the RX ring descriptor.
1413 * If the allocation fails:
1414 * - arrange for that RX descriptor to be the first one
1415 * being parsed the next time the receive function is
1416 * invoked [on the same queue].
1418 * - Stop parsing the RX ring and return immediately.
1420 * This policy do not drop the packet received in the RX
1421 * descriptor for which the allocation of a new mbuf failed.
1422 * Thus, it allows that packet to be later retrieved if
1423 * mbuf have been freed in the mean time.
1424 * As a side effect, holding RX descriptors instead of
1425 * systematically giving them back to the NIC may lead to
1426 * RX ring exhaustion situations.
1427 * However, the NIC can gracefully prevent such situations
1428 * to happen by sending specific "back-pressure" flow control
1429 * frames to its peer(s).
1431 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1432 "ext_err_stat=0x%08x pkt_len=%u",
1433 (uint16_t)rxq->port_id, (uint16_t)rxq->queue_id,
1434 (uint16_t)rx_id, (uint32_t)staterr,
1435 (uint16_t)rte_le_to_cpu_16(rxd.qw1.hi.len));
1437 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1439 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1440 "queue_id=%u", (uint16_t)rxq->port_id,
1441 (uint16_t)rxq->queue_id);
1442 dev->data->rx_mbuf_alloc_failed++;
1447 rxe = &sw_ring[rx_id];
1449 if (rx_id == rxq->nb_rx_desc)
1452 /* Prefetch next mbuf while processing current one. */
1453 rte_txgbe_prefetch(sw_ring[rx_id].mbuf);
1456 * When next RX descriptor is on a cache-line boundary,
1457 * prefetch the next 4 RX descriptors and the next 8 pointers
1460 if ((rx_id & 0x3) == 0) {
1461 rte_txgbe_prefetch(&rx_ring[rx_id]);
1462 rte_txgbe_prefetch(&sw_ring[rx_id]);
1467 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1468 TXGBE_RXD_HDRADDR(rxdp, 0);
1469 TXGBE_RXD_PKTADDR(rxdp, dma_addr);
1472 * Initialize the returned mbuf.
1473 * 1) setup generic mbuf fields:
1474 * - number of segments,
1477 * - RX port identifier.
1478 * 2) integrate hardware offload data, if any:
1479 * - RSS flag & hash,
1480 * - IP checksum flag,
1481 * - VLAN TCI, if any,
1484 pkt_len = (uint16_t)(rte_le_to_cpu_16(rxd.qw1.hi.len) -
1486 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1487 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1490 rxm->pkt_len = pkt_len;
1491 rxm->data_len = pkt_len;
1492 rxm->port = rxq->port_id;
1494 pkt_info = rte_le_to_cpu_32(rxd.qw0.dw0);
1495 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1496 rxm->vlan_tci = rte_le_to_cpu_16(rxd.qw1.hi.tag);
1498 pkt_flags = rx_desc_status_to_pkt_flags(staterr,
1500 pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1501 pkt_flags |= txgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1502 rxm->ol_flags = pkt_flags;
1503 rxm->packet_type = txgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1504 rxq->pkt_type_mask);
1506 if (likely(pkt_flags & PKT_RX_RSS_HASH)) {
1507 rxm->hash.rss = rte_le_to_cpu_32(rxd.qw0.dw1);
1508 } else if (pkt_flags & PKT_RX_FDIR) {
1509 rxm->hash.fdir.hash =
1510 rte_le_to_cpu_16(rxd.qw0.hi.csum) &
1511 TXGBE_ATR_HASH_MASK;
1512 rxm->hash.fdir.id = rte_le_to_cpu_16(rxd.qw0.hi.ipid);
1515 * Store the mbuf address into the next entry of the array
1516 * of returned packets.
1518 rx_pkts[nb_rx++] = rxm;
1520 rxq->rx_tail = rx_id;
1523 * If the number of free RX descriptors is greater than the RX free
1524 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1526 * Update the RDT with the value of the last processed RX descriptor
1527 * minus 1, to guarantee that the RDT register is never equal to the
1528 * RDH register, which creates a "full" ring situation from the
1529 * hardware point of view...
1531 nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
1532 if (nb_hold > rxq->rx_free_thresh) {
1533 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1534 "nb_hold=%u nb_rx=%u",
1535 (uint16_t)rxq->port_id, (uint16_t)rxq->queue_id,
1536 (uint16_t)rx_id, (uint16_t)nb_hold,
1538 rx_id = (uint16_t)((rx_id == 0) ?
1539 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1540 txgbe_set32(rxq->rdt_reg_addr, rx_id);
1543 rxq->nb_rx_hold = nb_hold;
1548 * txgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1550 * Fill the following info in the HEAD buffer of the Rx cluster:
1551 * - RX port identifier
1552 * - hardware offload data, if any:
1554 * - IP checksum flag
1555 * - VLAN TCI, if any
1557 * @head HEAD of the packet cluster
1558 * @desc HW descriptor to get data from
1559 * @rxq Pointer to the Rx queue
1562 txgbe_fill_cluster_head_buf(struct rte_mbuf *head, struct txgbe_rx_desc *desc,
1563 struct txgbe_rx_queue *rxq, uint32_t staterr)
1568 head->port = rxq->port_id;
1570 /* The vlan_tci field is only valid when PKT_RX_VLAN is
1571 * set in the pkt_flags field.
1573 head->vlan_tci = rte_le_to_cpu_16(desc->qw1.hi.tag);
1574 pkt_info = rte_le_to_cpu_32(desc->qw0.dw0);
1575 pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1576 pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1577 pkt_flags |= txgbe_rxd_pkt_info_to_pkt_flags(pkt_info);
1578 head->ol_flags = pkt_flags;
1579 head->packet_type = txgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1580 rxq->pkt_type_mask);
1582 if (likely(pkt_flags & PKT_RX_RSS_HASH)) {
1583 head->hash.rss = rte_le_to_cpu_32(desc->qw0.dw1);
1584 } else if (pkt_flags & PKT_RX_FDIR) {
1585 head->hash.fdir.hash = rte_le_to_cpu_16(desc->qw0.hi.csum)
1586 & TXGBE_ATR_HASH_MASK;
1587 head->hash.fdir.id = rte_le_to_cpu_16(desc->qw0.hi.ipid);
1592 * txgbe_recv_pkts_lro - receive handler for and LRO case.
1594 * @rx_queue Rx queue handle
1595 * @rx_pkts table of received packets
1596 * @nb_pkts size of rx_pkts table
1597 * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1599 * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1600 * additional ring of txgbe_rsc_entry's that will hold the relevant RSC info.
1602 * We use the same logic as in Linux and in FreeBSD txgbe drivers:
1603 * 1) When non-EOP RSC completion arrives:
1604 * a) Update the HEAD of the current RSC aggregation cluster with the new
1605 * segment's data length.
1606 * b) Set the "next" pointer of the current segment to point to the segment
1607 * at the NEXTP index.
1608 * c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1609 * in the sw_rsc_ring.
1610 * 2) When EOP arrives we just update the cluster's total length and offload
1611 * flags and deliver the cluster up to the upper layers. In our case - put it
1612 * in the rx_pkts table.
1614 * Returns the number of received packets/clusters (according to the "bulk
1615 * receive" interface).
1617 static inline uint16_t
1618 txgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1621 struct txgbe_rx_queue *rxq = rx_queue;
1622 struct rte_eth_dev *dev = &rte_eth_devices[rxq->port_id];
1623 volatile struct txgbe_rx_desc *rx_ring = rxq->rx_ring;
1624 struct txgbe_rx_entry *sw_ring = rxq->sw_ring;
1625 struct txgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1626 uint16_t rx_id = rxq->rx_tail;
1628 uint16_t nb_hold = rxq->nb_rx_hold;
1629 uint16_t prev_id = rxq->rx_tail;
1631 while (nb_rx < nb_pkts) {
1633 struct txgbe_rx_entry *rxe;
1634 struct txgbe_scattered_rx_entry *sc_entry;
1635 struct txgbe_scattered_rx_entry *next_sc_entry = NULL;
1636 struct txgbe_rx_entry *next_rxe = NULL;
1637 struct rte_mbuf *first_seg;
1638 struct rte_mbuf *rxm;
1639 struct rte_mbuf *nmb = NULL;
1640 struct txgbe_rx_desc rxd;
1643 volatile struct txgbe_rx_desc *rxdp;
1648 * The code in this whole file uses the volatile pointer to
1649 * ensure the read ordering of the status and the rest of the
1650 * descriptor fields (on the compiler level only!!!). This is so
1651 * UGLY - why not to just use the compiler barrier instead? DPDK
1652 * even has the rte_compiler_barrier() for that.
1654 * But most importantly this is just wrong because this doesn't
1655 * ensure memory ordering in a general case at all. For
1656 * instance, DPDK is supposed to work on Power CPUs where
1657 * compiler barrier may just not be enough!
1659 * I tried to write only this function properly to have a
1660 * starting point (as a part of an LRO/RSC series) but the
1661 * compiler cursed at me when I tried to cast away the
1662 * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
1663 * keeping it the way it is for now.
1665 * The code in this file is broken in so many other places and
1666 * will just not work on a big endian CPU anyway therefore the
1667 * lines below will have to be revisited together with the rest
1671 * - Get rid of "volatile" and let the compiler do its job.
1672 * - Use the proper memory barrier (rte_rmb()) to ensure the
1673 * memory ordering below.
1675 rxdp = &rx_ring[rx_id];
1676 staterr = rte_le_to_cpu_32(rxdp->qw1.lo.status);
1678 if (!(staterr & TXGBE_RXD_STAT_DD))
1683 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1684 "staterr=0x%x data_len=%u",
1685 rxq->port_id, rxq->queue_id, rx_id, staterr,
1686 rte_le_to_cpu_16(rxd.qw1.hi.len));
1689 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1691 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
1692 "port_id=%u queue_id=%u",
1693 rxq->port_id, rxq->queue_id);
1695 dev->data->rx_mbuf_alloc_failed++;
1698 } else if (nb_hold > rxq->rx_free_thresh) {
1699 uint16_t next_rdt = rxq->rx_free_trigger;
1701 if (!txgbe_rx_alloc_bufs(rxq, false)) {
1703 txgbe_set32_relaxed(rxq->rdt_reg_addr,
1705 nb_hold -= rxq->rx_free_thresh;
1707 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
1708 "port_id=%u queue_id=%u",
1709 rxq->port_id, rxq->queue_id);
1711 dev->data->rx_mbuf_alloc_failed++;
1717 rxe = &sw_ring[rx_id];
1718 eop = staterr & TXGBE_RXD_STAT_EOP;
1720 next_id = rx_id + 1;
1721 if (next_id == rxq->nb_rx_desc)
1724 /* Prefetch next mbuf while processing current one. */
1725 rte_txgbe_prefetch(sw_ring[next_id].mbuf);
1728 * When next RX descriptor is on a cache-line boundary,
1729 * prefetch the next 4 RX descriptors and the next 4 pointers
1732 if ((next_id & 0x3) == 0) {
1733 rte_txgbe_prefetch(&rx_ring[next_id]);
1734 rte_txgbe_prefetch(&sw_ring[next_id]);
1741 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1743 * Update RX descriptor with the physical address of the
1744 * new data buffer of the new allocated mbuf.
1748 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1749 TXGBE_RXD_HDRADDR(rxdp, 0);
1750 TXGBE_RXD_PKTADDR(rxdp, dma);
1756 * Set data length & data buffer address of mbuf.
1758 data_len = rte_le_to_cpu_16(rxd.qw1.hi.len);
1759 rxm->data_len = data_len;
1764 * Get next descriptor index:
1765 * - For RSC it's in the NEXTP field.
1766 * - For a scattered packet - it's just a following
1769 if (TXGBE_RXD_RSCCNT(rxd.qw0.dw0))
1770 nextp_id = TXGBE_RXD_NEXTP(staterr);
1774 next_sc_entry = &sw_sc_ring[nextp_id];
1775 next_rxe = &sw_ring[nextp_id];
1776 rte_txgbe_prefetch(next_rxe);
1779 sc_entry = &sw_sc_ring[rx_id];
1780 first_seg = sc_entry->fbuf;
1781 sc_entry->fbuf = NULL;
1784 * If this is the first buffer of the received packet,
1785 * set the pointer to the first mbuf of the packet and
1786 * initialize its context.
1787 * Otherwise, update the total length and the number of segments
1788 * of the current scattered packet, and update the pointer to
1789 * the last mbuf of the current packet.
1791 if (first_seg == NULL) {
1793 first_seg->pkt_len = data_len;
1794 first_seg->nb_segs = 1;
1796 first_seg->pkt_len += data_len;
1797 first_seg->nb_segs++;
1804 * If this is not the last buffer of the received packet, update
1805 * the pointer to the first mbuf at the NEXTP entry in the
1806 * sw_sc_ring and continue to parse the RX ring.
1808 if (!eop && next_rxe) {
1809 rxm->next = next_rxe->mbuf;
1810 next_sc_entry->fbuf = first_seg;
1814 /* Initialize the first mbuf of the returned packet */
1815 txgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
1818 * Deal with the case, when HW CRC srip is disabled.
1819 * That can't happen when LRO is enabled, but still could
1820 * happen for scattered RX mode.
1822 first_seg->pkt_len -= rxq->crc_len;
1823 if (unlikely(rxm->data_len <= rxq->crc_len)) {
1824 struct rte_mbuf *lp;
1826 for (lp = first_seg; lp->next != rxm; lp = lp->next)
1829 first_seg->nb_segs--;
1830 lp->data_len -= rxq->crc_len - rxm->data_len;
1832 rte_pktmbuf_free_seg(rxm);
1834 rxm->data_len -= rxq->crc_len;
1837 /* Prefetch data of first segment, if configured to do so. */
1838 rte_packet_prefetch((char *)first_seg->buf_addr +
1839 first_seg->data_off);
1842 * Store the mbuf address into the next entry of the array
1843 * of returned packets.
1845 rx_pkts[nb_rx++] = first_seg;
1849 * Record index of the next RX descriptor to probe.
1851 rxq->rx_tail = rx_id;
1854 * If the number of free RX descriptors is greater than the RX free
1855 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1857 * Update the RDT with the value of the last processed RX descriptor
1858 * minus 1, to guarantee that the RDT register is never equal to the
1859 * RDH register, which creates a "full" ring situation from the
1860 * hardware point of view...
1862 if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
1863 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1864 "nb_hold=%u nb_rx=%u",
1865 rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
1868 txgbe_set32_relaxed(rxq->rdt_reg_addr, prev_id);
1872 rxq->nb_rx_hold = nb_hold;
1877 txgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1880 return txgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
1884 txgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1887 return txgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
1891 txgbe_get_rx_queue_offloads(struct rte_eth_dev *dev __rte_unused)
1893 return DEV_RX_OFFLOAD_VLAN_STRIP;
1897 txgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
1900 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
1901 struct rte_eth_dev_sriov *sriov = &RTE_ETH_DEV_SRIOV(dev);
1903 offloads = DEV_RX_OFFLOAD_IPV4_CKSUM |
1904 DEV_RX_OFFLOAD_UDP_CKSUM |
1905 DEV_RX_OFFLOAD_TCP_CKSUM |
1906 DEV_RX_OFFLOAD_KEEP_CRC |
1907 DEV_RX_OFFLOAD_JUMBO_FRAME |
1908 DEV_RX_OFFLOAD_VLAN_FILTER |
1909 DEV_RX_OFFLOAD_RSS_HASH |
1910 DEV_RX_OFFLOAD_SCATTER;
1912 if (!txgbe_is_vf(dev))
1913 offloads |= (DEV_RX_OFFLOAD_VLAN_FILTER |
1914 DEV_RX_OFFLOAD_QINQ_STRIP |
1915 DEV_RX_OFFLOAD_VLAN_EXTEND);
1918 * RSC is only supported by PF devices in a non-SR-IOV
1921 if (hw->mac.type == txgbe_mac_raptor && !sriov->active)
1922 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
1924 if (hw->mac.type == txgbe_mac_raptor)
1925 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
1927 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
1932 static void __rte_cold
1933 txgbe_tx_queue_release_mbufs(struct txgbe_tx_queue *txq)
1937 if (txq->sw_ring != NULL) {
1938 for (i = 0; i < txq->nb_tx_desc; i++) {
1939 if (txq->sw_ring[i].mbuf != NULL) {
1940 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1941 txq->sw_ring[i].mbuf = NULL;
1947 static void __rte_cold
1948 txgbe_tx_free_swring(struct txgbe_tx_queue *txq)
1951 txq->sw_ring != NULL)
1952 rte_free(txq->sw_ring);
1955 static void __rte_cold
1956 txgbe_tx_queue_release(struct txgbe_tx_queue *txq)
1958 if (txq != NULL && txq->ops != NULL) {
1959 txq->ops->release_mbufs(txq);
1960 txq->ops->free_swring(txq);
1966 txgbe_dev_tx_queue_release(void *txq)
1968 txgbe_tx_queue_release(txq);
1971 /* (Re)set dynamic txgbe_tx_queue fields to defaults */
1972 static void __rte_cold
1973 txgbe_reset_tx_queue(struct txgbe_tx_queue *txq)
1975 static const struct txgbe_tx_desc zeroed_desc = {0};
1976 struct txgbe_tx_entry *txe = txq->sw_ring;
1979 /* Zero out HW ring memory */
1980 for (i = 0; i < txq->nb_tx_desc; i++)
1981 txq->tx_ring[i] = zeroed_desc;
1983 /* Initialize SW ring entries */
1984 prev = (uint16_t)(txq->nb_tx_desc - 1);
1985 for (i = 0; i < txq->nb_tx_desc; i++) {
1986 volatile struct txgbe_tx_desc *txd = &txq->tx_ring[i];
1988 txd->dw3 = rte_cpu_to_le_32(TXGBE_TXD_DD);
1991 txe[prev].next_id = i;
1995 txq->tx_next_dd = (uint16_t)(txq->tx_free_thresh - 1);
1999 * Always allow 1 descriptor to be un-allocated to avoid
2000 * a H/W race condition
2002 txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2003 txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2005 memset((void *)&txq->ctx_cache, 0,
2006 TXGBE_CTX_NUM * sizeof(struct txgbe_ctx_info));
2009 static const struct txgbe_txq_ops def_txq_ops = {
2010 .release_mbufs = txgbe_tx_queue_release_mbufs,
2011 .free_swring = txgbe_tx_free_swring,
2012 .reset = txgbe_reset_tx_queue,
2015 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2016 * the queue parameters. Used in tx_queue_setup by primary process and then
2017 * in dev_init by secondary process when attaching to an existing ethdev.
2020 txgbe_set_tx_function(struct rte_eth_dev *dev, struct txgbe_tx_queue *txq)
2022 /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2023 if (txq->offloads == 0 &&
2024 txq->tx_free_thresh >= RTE_PMD_TXGBE_TX_MAX_BURST) {
2025 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2026 dev->tx_pkt_burst = txgbe_xmit_pkts_simple;
2027 dev->tx_pkt_prepare = NULL;
2029 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2031 " - offloads = 0x%" PRIx64,
2034 " - tx_free_thresh = %lu [RTE_PMD_TXGBE_TX_MAX_BURST=%lu]",
2035 (unsigned long)txq->tx_free_thresh,
2036 (unsigned long)RTE_PMD_TXGBE_TX_MAX_BURST);
2037 dev->tx_pkt_burst = txgbe_xmit_pkts;
2038 dev->tx_pkt_prepare = txgbe_prep_pkts;
2043 txgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2051 txgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2053 uint64_t tx_offload_capa;
2056 DEV_TX_OFFLOAD_VLAN_INSERT |
2057 DEV_TX_OFFLOAD_IPV4_CKSUM |
2058 DEV_TX_OFFLOAD_UDP_CKSUM |
2059 DEV_TX_OFFLOAD_TCP_CKSUM |
2060 DEV_TX_OFFLOAD_SCTP_CKSUM |
2061 DEV_TX_OFFLOAD_TCP_TSO |
2062 DEV_TX_OFFLOAD_UDP_TSO |
2063 DEV_TX_OFFLOAD_UDP_TNL_TSO |
2064 DEV_TX_OFFLOAD_IP_TNL_TSO |
2065 DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
2066 DEV_TX_OFFLOAD_GRE_TNL_TSO |
2067 DEV_TX_OFFLOAD_IPIP_TNL_TSO |
2068 DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
2069 DEV_TX_OFFLOAD_MULTI_SEGS;
2071 if (!txgbe_is_vf(dev))
2072 tx_offload_capa |= DEV_TX_OFFLOAD_QINQ_INSERT;
2074 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2076 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2078 return tx_offload_capa;
2082 txgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2085 unsigned int socket_id,
2086 const struct rte_eth_txconf *tx_conf)
2088 const struct rte_memzone *tz;
2089 struct txgbe_tx_queue *txq;
2090 struct txgbe_hw *hw;
2091 uint16_t tx_free_thresh;
2094 PMD_INIT_FUNC_TRACE();
2095 hw = TXGBE_DEV_HW(dev);
2097 offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2100 * Validate number of transmit descriptors.
2101 * It must not exceed hardware maximum, and must be multiple
2104 if (nb_desc % TXGBE_TXD_ALIGN != 0 ||
2105 nb_desc > TXGBE_RING_DESC_MAX ||
2106 nb_desc < TXGBE_RING_DESC_MIN) {
2111 * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2112 * descriptors are used or if the number of descriptors required
2113 * to transmit a packet is greater than the number of free TX
2115 * One descriptor in the TX ring is used as a sentinel to avoid a
2116 * H/W race condition, hence the maximum threshold constraints.
2117 * When set to zero use default values.
2119 tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2120 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2121 if (tx_free_thresh >= (nb_desc - 3)) {
2122 PMD_INIT_LOG(ERR, "tx_free_thresh must be less than the number of "
2123 "TX descriptors minus 3. (tx_free_thresh=%u "
2124 "port=%d queue=%d)",
2125 (unsigned int)tx_free_thresh,
2126 (int)dev->data->port_id, (int)queue_idx);
2130 if ((nb_desc % tx_free_thresh) != 0) {
2131 PMD_INIT_LOG(ERR, "tx_free_thresh must be a divisor of the "
2132 "number of TX descriptors. (tx_free_thresh=%u "
2133 "port=%d queue=%d)", (unsigned int)tx_free_thresh,
2134 (int)dev->data->port_id, (int)queue_idx);
2138 /* Free memory prior to re-allocation if needed... */
2139 if (dev->data->tx_queues[queue_idx] != NULL) {
2140 txgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2141 dev->data->tx_queues[queue_idx] = NULL;
2144 /* First allocate the tx queue data structure */
2145 txq = rte_zmalloc_socket("ethdev TX queue",
2146 sizeof(struct txgbe_tx_queue),
2147 RTE_CACHE_LINE_SIZE, socket_id);
2152 * Allocate TX ring hardware descriptors. A memzone large enough to
2153 * handle the maximum ring size is allocated in order to allow for
2154 * resizing in later calls to the queue setup function.
2156 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2157 sizeof(struct txgbe_tx_desc) * TXGBE_RING_DESC_MAX,
2158 TXGBE_ALIGN, socket_id);
2160 txgbe_tx_queue_release(txq);
2164 txq->nb_tx_desc = nb_desc;
2165 txq->tx_free_thresh = tx_free_thresh;
2166 txq->pthresh = tx_conf->tx_thresh.pthresh;
2167 txq->hthresh = tx_conf->tx_thresh.hthresh;
2168 txq->wthresh = tx_conf->tx_thresh.wthresh;
2169 txq->queue_id = queue_idx;
2170 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2171 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2172 txq->port_id = dev->data->port_id;
2173 txq->offloads = offloads;
2174 txq->ops = &def_txq_ops;
2175 txq->tx_deferred_start = tx_conf->tx_deferred_start;
2177 /* Modification to set tail pointer for virtual function
2178 * if vf is detected.
2180 if (hw->mac.type == txgbe_mac_raptor_vf) {
2181 txq->tdt_reg_addr = TXGBE_REG_ADDR(hw, TXGBE_TXWP(queue_idx));
2182 txq->tdc_reg_addr = TXGBE_REG_ADDR(hw, TXGBE_TXCFG(queue_idx));
2184 txq->tdt_reg_addr = TXGBE_REG_ADDR(hw,
2185 TXGBE_TXWP(txq->reg_idx));
2186 txq->tdc_reg_addr = TXGBE_REG_ADDR(hw,
2187 TXGBE_TXCFG(txq->reg_idx));
2190 txq->tx_ring_phys_addr = TMZ_PADDR(tz);
2191 txq->tx_ring = (struct txgbe_tx_desc *)TMZ_VADDR(tz);
2193 /* Allocate software ring */
2194 txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2195 sizeof(struct txgbe_tx_entry) * nb_desc,
2196 RTE_CACHE_LINE_SIZE, socket_id);
2197 if (txq->sw_ring == NULL) {
2198 txgbe_tx_queue_release(txq);
2201 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64,
2202 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2204 /* set up scalar TX function as appropriate */
2205 txgbe_set_tx_function(dev, txq);
2207 txq->ops->reset(txq);
2209 dev->data->tx_queues[queue_idx] = txq;
2215 * txgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2217 * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2218 * in the sw_rsc_ring is not set to NULL but rather points to the next
2219 * mbuf of this RSC aggregation (that has not been completed yet and still
2220 * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2221 * will just free first "nb_segs" segments of the cluster explicitly by calling
2222 * an rte_pktmbuf_free_seg().
2224 * @m scattered cluster head
2226 static void __rte_cold
2227 txgbe_free_sc_cluster(struct rte_mbuf *m)
2229 uint16_t i, nb_segs = m->nb_segs;
2230 struct rte_mbuf *next_seg;
2232 for (i = 0; i < nb_segs; i++) {
2234 rte_pktmbuf_free_seg(m);
2239 static void __rte_cold
2240 txgbe_rx_queue_release_mbufs(struct txgbe_rx_queue *rxq)
2244 if (rxq->sw_ring != NULL) {
2245 for (i = 0; i < rxq->nb_rx_desc; i++) {
2246 if (rxq->sw_ring[i].mbuf != NULL) {
2247 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2248 rxq->sw_ring[i].mbuf = NULL;
2251 if (rxq->rx_nb_avail) {
2252 for (i = 0; i < rxq->rx_nb_avail; ++i) {
2253 struct rte_mbuf *mb;
2255 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2256 rte_pktmbuf_free_seg(mb);
2258 rxq->rx_nb_avail = 0;
2262 if (rxq->sw_sc_ring)
2263 for (i = 0; i < rxq->nb_rx_desc; i++)
2264 if (rxq->sw_sc_ring[i].fbuf) {
2265 txgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2266 rxq->sw_sc_ring[i].fbuf = NULL;
2270 static void __rte_cold
2271 txgbe_rx_queue_release(struct txgbe_rx_queue *rxq)
2274 txgbe_rx_queue_release_mbufs(rxq);
2275 rte_free(rxq->sw_ring);
2276 rte_free(rxq->sw_sc_ring);
2282 txgbe_dev_rx_queue_release(void *rxq)
2284 txgbe_rx_queue_release(rxq);
2288 * Check if Rx Burst Bulk Alloc function can be used.
2290 * 0: the preconditions are satisfied and the bulk allocation function
2292 * -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2293 * function must be used.
2295 static inline int __rte_cold
2296 check_rx_burst_bulk_alloc_preconditions(struct txgbe_rx_queue *rxq)
2301 * Make sure the following pre-conditions are satisfied:
2302 * rxq->rx_free_thresh >= RTE_PMD_TXGBE_RX_MAX_BURST
2303 * rxq->rx_free_thresh < rxq->nb_rx_desc
2304 * (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2305 * Scattered packets are not supported. This should be checked
2306 * outside of this function.
2308 if (!(rxq->rx_free_thresh >= RTE_PMD_TXGBE_RX_MAX_BURST)) {
2309 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2310 "rxq->rx_free_thresh=%d, "
2311 "RTE_PMD_TXGBE_RX_MAX_BURST=%d",
2312 rxq->rx_free_thresh, RTE_PMD_TXGBE_RX_MAX_BURST);
2314 } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2315 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2316 "rxq->rx_free_thresh=%d, "
2317 "rxq->nb_rx_desc=%d",
2318 rxq->rx_free_thresh, rxq->nb_rx_desc);
2320 } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2321 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2322 "rxq->nb_rx_desc=%d, "
2323 "rxq->rx_free_thresh=%d",
2324 rxq->nb_rx_desc, rxq->rx_free_thresh);
2331 /* Reset dynamic txgbe_rx_queue fields back to defaults */
2332 static void __rte_cold
2333 txgbe_reset_rx_queue(struct txgbe_adapter *adapter, struct txgbe_rx_queue *rxq)
2335 static const struct txgbe_rx_desc zeroed_desc = {
2336 {{0}, {0} }, {{0}, {0} } };
2338 uint16_t len = rxq->nb_rx_desc;
2341 * By default, the Rx queue setup function allocates enough memory for
2342 * TXGBE_RING_DESC_MAX. The Rx Burst bulk allocation function requires
2343 * extra memory at the end of the descriptor ring to be zero'd out.
2345 if (adapter->rx_bulk_alloc_allowed)
2346 /* zero out extra memory */
2347 len += RTE_PMD_TXGBE_RX_MAX_BURST;
2350 * Zero out HW ring memory. Zero out extra memory at the end of
2351 * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2352 * reads extra memory as zeros.
2354 for (i = 0; i < len; i++)
2355 rxq->rx_ring[i] = zeroed_desc;
2358 * initialize extra software ring entries. Space for these extra
2359 * entries is always allocated
2361 memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2362 for (i = rxq->nb_rx_desc; i < len; ++i)
2363 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2365 rxq->rx_nb_avail = 0;
2366 rxq->rx_next_avail = 0;
2367 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2369 rxq->nb_rx_hold = 0;
2370 rxq->pkt_first_seg = NULL;
2371 rxq->pkt_last_seg = NULL;
2375 txgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2378 unsigned int socket_id,
2379 const struct rte_eth_rxconf *rx_conf,
2380 struct rte_mempool *mp)
2382 const struct rte_memzone *rz;
2383 struct txgbe_rx_queue *rxq;
2384 struct txgbe_hw *hw;
2386 struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev);
2389 PMD_INIT_FUNC_TRACE();
2390 hw = TXGBE_DEV_HW(dev);
2392 offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
2395 * Validate number of receive descriptors.
2396 * It must not exceed hardware maximum, and must be multiple
2399 if (nb_desc % TXGBE_RXD_ALIGN != 0 ||
2400 nb_desc > TXGBE_RING_DESC_MAX ||
2401 nb_desc < TXGBE_RING_DESC_MIN) {
2405 /* Free memory prior to re-allocation if needed... */
2406 if (dev->data->rx_queues[queue_idx] != NULL) {
2407 txgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2408 dev->data->rx_queues[queue_idx] = NULL;
2411 /* First allocate the rx queue data structure */
2412 rxq = rte_zmalloc_socket("ethdev RX queue",
2413 sizeof(struct txgbe_rx_queue),
2414 RTE_CACHE_LINE_SIZE, socket_id);
2418 rxq->nb_rx_desc = nb_desc;
2419 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2420 rxq->queue_id = queue_idx;
2421 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2422 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2423 rxq->port_id = dev->data->port_id;
2424 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
2425 rxq->crc_len = RTE_ETHER_CRC_LEN;
2428 rxq->drop_en = rx_conf->rx_drop_en;
2429 rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2430 rxq->offloads = offloads;
2433 * The packet type in RX descriptor is different for different NICs.
2434 * So set different masks for different NICs.
2436 rxq->pkt_type_mask = TXGBE_PTID_MASK;
2439 * Allocate RX ring hardware descriptors. A memzone large enough to
2440 * handle the maximum ring size is allocated in order to allow for
2441 * resizing in later calls to the queue setup function.
2443 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2444 RX_RING_SZ, TXGBE_ALIGN, socket_id);
2446 txgbe_rx_queue_release(rxq);
2451 * Zero init all the descriptors in the ring.
2453 memset(rz->addr, 0, RX_RING_SZ);
2456 * Modified to setup VFRDT for Virtual Function
2458 if (hw->mac.type == txgbe_mac_raptor_vf) {
2460 TXGBE_REG_ADDR(hw, TXGBE_RXWP(queue_idx));
2462 TXGBE_REG_ADDR(hw, TXGBE_RXRP(queue_idx));
2465 TXGBE_REG_ADDR(hw, TXGBE_RXWP(rxq->reg_idx));
2467 TXGBE_REG_ADDR(hw, TXGBE_RXRP(rxq->reg_idx));
2470 rxq->rx_ring_phys_addr = TMZ_PADDR(rz);
2471 rxq->rx_ring = (struct txgbe_rx_desc *)TMZ_VADDR(rz);
2474 * Certain constraints must be met in order to use the bulk buffer
2475 * allocation Rx burst function. If any of Rx queues doesn't meet them
2476 * the feature should be disabled for the whole port.
2478 if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2479 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2480 "preconditions - canceling the feature for "
2481 "the whole port[%d]",
2482 rxq->queue_id, rxq->port_id);
2483 adapter->rx_bulk_alloc_allowed = false;
2487 * Allocate software ring. Allow for space at the end of the
2488 * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2489 * function does not access an invalid memory region.
2492 if (adapter->rx_bulk_alloc_allowed)
2493 len += RTE_PMD_TXGBE_RX_MAX_BURST;
2495 rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2496 sizeof(struct txgbe_rx_entry) * len,
2497 RTE_CACHE_LINE_SIZE, socket_id);
2498 if (!rxq->sw_ring) {
2499 txgbe_rx_queue_release(rxq);
2504 * Always allocate even if it's not going to be needed in order to
2505 * simplify the code.
2507 * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2508 * be requested in txgbe_dev_rx_init(), which is called later from
2512 rte_zmalloc_socket("rxq->sw_sc_ring",
2513 sizeof(struct txgbe_scattered_rx_entry) * len,
2514 RTE_CACHE_LINE_SIZE, socket_id);
2515 if (!rxq->sw_sc_ring) {
2516 txgbe_rx_queue_release(rxq);
2520 PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2521 "dma_addr=0x%" PRIx64,
2522 rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2523 rxq->rx_ring_phys_addr);
2525 dev->data->rx_queues[queue_idx] = rxq;
2527 txgbe_reset_rx_queue(adapter, rxq);
2533 txgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2535 #define TXGBE_RXQ_SCAN_INTERVAL 4
2536 volatile struct txgbe_rx_desc *rxdp;
2537 struct txgbe_rx_queue *rxq;
2540 rxq = dev->data->rx_queues[rx_queue_id];
2541 rxdp = &rxq->rx_ring[rxq->rx_tail];
2543 while ((desc < rxq->nb_rx_desc) &&
2544 (rxdp->qw1.lo.status &
2545 rte_cpu_to_le_32(TXGBE_RXD_STAT_DD))) {
2546 desc += TXGBE_RXQ_SCAN_INTERVAL;
2547 rxdp += TXGBE_RXQ_SCAN_INTERVAL;
2548 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2549 rxdp = &(rxq->rx_ring[rxq->rx_tail +
2550 desc - rxq->nb_rx_desc]);
2557 txgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
2559 struct txgbe_rx_queue *rxq = rx_queue;
2560 volatile uint32_t *status;
2561 uint32_t nb_hold, desc;
2563 if (unlikely(offset >= rxq->nb_rx_desc))
2566 nb_hold = rxq->nb_rx_hold;
2567 if (offset >= rxq->nb_rx_desc - nb_hold)
2568 return RTE_ETH_RX_DESC_UNAVAIL;
2570 desc = rxq->rx_tail + offset;
2571 if (desc >= rxq->nb_rx_desc)
2572 desc -= rxq->nb_rx_desc;
2574 status = &rxq->rx_ring[desc].qw1.lo.status;
2575 if (*status & rte_cpu_to_le_32(TXGBE_RXD_STAT_DD))
2576 return RTE_ETH_RX_DESC_DONE;
2578 return RTE_ETH_RX_DESC_AVAIL;
2582 txgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
2584 struct txgbe_tx_queue *txq = tx_queue;
2585 volatile uint32_t *status;
2588 if (unlikely(offset >= txq->nb_tx_desc))
2591 desc = txq->tx_tail + offset;
2592 if (desc >= txq->nb_tx_desc) {
2593 desc -= txq->nb_tx_desc;
2594 if (desc >= txq->nb_tx_desc)
2595 desc -= txq->nb_tx_desc;
2598 status = &txq->tx_ring[desc].dw3;
2599 if (*status & rte_cpu_to_le_32(TXGBE_TXD_DD))
2600 return RTE_ETH_TX_DESC_DONE;
2602 return RTE_ETH_TX_DESC_FULL;
2606 txgbe_dev_clear_queues(struct rte_eth_dev *dev)
2609 struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev);
2611 PMD_INIT_FUNC_TRACE();
2613 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2614 struct txgbe_tx_queue *txq = dev->data->tx_queues[i];
2617 txq->ops->release_mbufs(txq);
2618 txq->ops->reset(txq);
2622 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2623 struct txgbe_rx_queue *rxq = dev->data->rx_queues[i];
2626 txgbe_rx_queue_release_mbufs(rxq);
2627 txgbe_reset_rx_queue(adapter, rxq);
2633 txgbe_dev_free_queues(struct rte_eth_dev *dev)
2637 PMD_INIT_FUNC_TRACE();
2639 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2640 txgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2641 dev->data->rx_queues[i] = NULL;
2643 dev->data->nb_rx_queues = 0;
2645 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2646 txgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2647 dev->data->tx_queues[i] = NULL;
2649 dev->data->nb_tx_queues = 0;
2653 * Receive Side Scaling (RSS)
2656 * The source and destination IP addresses of the IP header and the source
2657 * and destination ports of TCP/UDP headers, if any, of received packets are
2658 * hashed against a configurable random key to compute a 32-bit RSS hash result.
2659 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
2660 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
2661 * RSS output index which is used as the RX queue index where to store the
2663 * The following output is supplied in the RX write-back descriptor:
2664 * - 32-bit result of the Microsoft RSS hash function,
2665 * - 4-bit RSS type field.
2669 * Used as the default key.
2671 static uint8_t rss_intel_key[40] = {
2672 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
2673 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
2674 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2675 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
2676 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
2680 txgbe_rss_disable(struct rte_eth_dev *dev)
2682 struct txgbe_hw *hw;
2684 hw = TXGBE_DEV_HW(dev);
2686 wr32m(hw, TXGBE_RACTL, TXGBE_RACTL_RSSENA, 0);
2690 txgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
2691 struct rte_eth_rss_conf *rss_conf)
2693 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
2700 if (!txgbe_rss_update_sp(hw->mac.type)) {
2701 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
2706 hash_key = rss_conf->rss_key;
2708 /* Fill in RSS hash key */
2709 for (i = 0; i < 10; i++) {
2710 rss_key = LS32(hash_key[(i * 4) + 0], 0, 0xFF);
2711 rss_key |= LS32(hash_key[(i * 4) + 1], 8, 0xFF);
2712 rss_key |= LS32(hash_key[(i * 4) + 2], 16, 0xFF);
2713 rss_key |= LS32(hash_key[(i * 4) + 3], 24, 0xFF);
2714 wr32a(hw, TXGBE_REG_RSSKEY, i, rss_key);
2718 /* Set configured hashing protocols */
2719 rss_hf = rss_conf->rss_hf & TXGBE_RSS_OFFLOAD_ALL;
2720 mrqc = rd32(hw, TXGBE_RACTL);
2721 mrqc &= ~TXGBE_RACTL_RSSMASK;
2722 if (rss_hf & ETH_RSS_IPV4)
2723 mrqc |= TXGBE_RACTL_RSSIPV4;
2724 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
2725 mrqc |= TXGBE_RACTL_RSSIPV4TCP;
2726 if (rss_hf & ETH_RSS_IPV6 ||
2727 rss_hf & ETH_RSS_IPV6_EX)
2728 mrqc |= TXGBE_RACTL_RSSIPV6;
2729 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP ||
2730 rss_hf & ETH_RSS_IPV6_TCP_EX)
2731 mrqc |= TXGBE_RACTL_RSSIPV6TCP;
2732 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
2733 mrqc |= TXGBE_RACTL_RSSIPV4UDP;
2734 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP ||
2735 rss_hf & ETH_RSS_IPV6_UDP_EX)
2736 mrqc |= TXGBE_RACTL_RSSIPV6UDP;
2739 mrqc |= TXGBE_RACTL_RSSENA;
2741 mrqc &= ~TXGBE_RACTL_RSSENA;
2743 wr32(hw, TXGBE_RACTL, mrqc);
2749 txgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
2750 struct rte_eth_rss_conf *rss_conf)
2752 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
2759 hash_key = rss_conf->rss_key;
2761 /* Return RSS hash key */
2762 for (i = 0; i < 10; i++) {
2763 rss_key = rd32a(hw, TXGBE_REG_RSSKEY, i);
2764 hash_key[(i * 4) + 0] = RS32(rss_key, 0, 0xFF);
2765 hash_key[(i * 4) + 1] = RS32(rss_key, 8, 0xFF);
2766 hash_key[(i * 4) + 2] = RS32(rss_key, 16, 0xFF);
2767 hash_key[(i * 4) + 3] = RS32(rss_key, 24, 0xFF);
2772 mrqc = rd32(hw, TXGBE_RACTL);
2773 if (mrqc & TXGBE_RACTL_RSSIPV4)
2774 rss_hf |= ETH_RSS_IPV4;
2775 if (mrqc & TXGBE_RACTL_RSSIPV4TCP)
2776 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2777 if (mrqc & TXGBE_RACTL_RSSIPV6)
2778 rss_hf |= ETH_RSS_IPV6 |
2780 if (mrqc & TXGBE_RACTL_RSSIPV6TCP)
2781 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP |
2782 ETH_RSS_IPV6_TCP_EX;
2783 if (mrqc & TXGBE_RACTL_RSSIPV4UDP)
2784 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2785 if (mrqc & TXGBE_RACTL_RSSIPV6UDP)
2786 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP |
2787 ETH_RSS_IPV6_UDP_EX;
2788 if (!(mrqc & TXGBE_RACTL_RSSENA))
2791 rss_hf &= TXGBE_RSS_OFFLOAD_ALL;
2793 rss_conf->rss_hf = rss_hf;
2798 txgbe_rss_configure(struct rte_eth_dev *dev)
2800 struct rte_eth_rss_conf rss_conf;
2801 struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev);
2802 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
2807 PMD_INIT_FUNC_TRACE();
2810 * Fill in redirection table
2811 * The byte-swap is needed because NIC registers are in
2812 * little-endian order.
2814 if (adapter->rss_reta_updated == 0) {
2816 for (i = 0, j = 0; i < ETH_RSS_RETA_SIZE_128; i++, j++) {
2817 if (j == dev->data->nb_rx_queues)
2819 reta = (reta >> 8) | LS32(j, 24, 0xFF);
2821 wr32a(hw, TXGBE_REG_RSSTBL, i >> 2, reta);
2825 * Configure the RSS key and the RSS protocols used to compute
2826 * the RSS hash of input packets.
2828 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2829 if (rss_conf.rss_key == NULL)
2830 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2831 txgbe_dev_rss_hash_update(dev, &rss_conf);
2834 #define NUM_VFTA_REGISTERS 128
2835 #define NIC_RX_BUFFER_SIZE 0x200
2838 txgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
2840 struct rte_eth_vmdq_dcb_conf *cfg;
2841 struct txgbe_hw *hw;
2842 enum rte_eth_nb_pools num_pools;
2843 uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
2845 uint8_t nb_tcs; /* number of traffic classes */
2848 PMD_INIT_FUNC_TRACE();
2849 hw = TXGBE_DEV_HW(dev);
2850 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
2851 num_pools = cfg->nb_queue_pools;
2852 /* Check we have a valid number of pools */
2853 if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
2854 txgbe_rss_disable(dev);
2857 /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
2858 nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
2861 * split rx buffer up into sections, each for 1 traffic class
2863 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
2864 for (i = 0; i < nb_tcs; i++) {
2865 uint32_t rxpbsize = rd32(hw, TXGBE_PBRXSIZE(i));
2867 rxpbsize &= (~(0x3FF << 10));
2868 /* clear 10 bits. */
2869 rxpbsize |= (pbsize << 10); /* set value */
2870 wr32(hw, TXGBE_PBRXSIZE(i), rxpbsize);
2872 /* zero alloc all unused TCs */
2873 for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
2874 uint32_t rxpbsize = rd32(hw, TXGBE_PBRXSIZE(i));
2876 rxpbsize &= (~(0x3FF << 10));
2877 /* clear 10 bits. */
2878 wr32(hw, TXGBE_PBRXSIZE(i), rxpbsize);
2881 if (num_pools == ETH_16_POOLS) {
2882 mrqc = TXGBE_PORTCTL_NUMTC_8;
2883 mrqc |= TXGBE_PORTCTL_NUMVT_16;
2885 mrqc = TXGBE_PORTCTL_NUMTC_4;
2886 mrqc |= TXGBE_PORTCTL_NUMVT_32;
2888 wr32m(hw, TXGBE_PORTCTL,
2889 TXGBE_PORTCTL_NUMTC_MASK | TXGBE_PORTCTL_NUMVT_MASK, mrqc);
2891 vt_ctl = TXGBE_POOLCTL_RPLEN;
2892 if (cfg->enable_default_pool)
2893 vt_ctl |= TXGBE_POOLCTL_DEFPL(cfg->default_pool);
2895 vt_ctl |= TXGBE_POOLCTL_DEFDSA;
2897 wr32(hw, TXGBE_POOLCTL, vt_ctl);
2900 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
2902 * mapping is done with 3 bits per priority,
2903 * so shift by i*3 each time
2905 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
2907 wr32(hw, TXGBE_RPUP2TC, queue_mapping);
2909 wr32(hw, TXGBE_ARBRXCTL, TXGBE_ARBRXCTL_RRM);
2911 /* enable vlan filtering and allow all vlan tags through */
2912 vlanctrl = rd32(hw, TXGBE_VLANCTL);
2913 vlanctrl |= TXGBE_VLANCTL_VFE; /* enable vlan filters */
2914 wr32(hw, TXGBE_VLANCTL, vlanctrl);
2916 /* enable all vlan filters */
2917 for (i = 0; i < NUM_VFTA_REGISTERS; i++)
2918 wr32(hw, TXGBE_VLANTBL(i), 0xFFFFFFFF);
2920 wr32(hw, TXGBE_POOLRXENA(0),
2921 num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2923 wr32(hw, TXGBE_ETHADDRIDX, 0);
2924 wr32(hw, TXGBE_ETHADDRASSL, 0xFFFFFFFF);
2925 wr32(hw, TXGBE_ETHADDRASSH, 0xFFFFFFFF);
2927 /* set up filters for vlan tags as configured */
2928 for (i = 0; i < cfg->nb_pool_maps; i++) {
2929 /* set vlan id in VF register and set the valid bit */
2930 wr32(hw, TXGBE_PSRVLANIDX, i);
2931 wr32(hw, TXGBE_PSRVLAN, (TXGBE_PSRVLAN_EA |
2932 (cfg->pool_map[i].vlan_id & 0xFFF)));
2934 wr32(hw, TXGBE_PSRVLANPLM(0), cfg->pool_map[i].pools);
2939 * txgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
2940 * @dev: pointer to eth_dev structure
2941 * @dcb_config: pointer to txgbe_dcb_config structure
2944 txgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
2945 struct txgbe_dcb_config *dcb_config)
2948 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
2950 PMD_INIT_FUNC_TRACE();
2952 /* Disable the Tx desc arbiter */
2953 reg = rd32(hw, TXGBE_ARBTXCTL);
2954 reg |= TXGBE_ARBTXCTL_DIA;
2955 wr32(hw, TXGBE_ARBTXCTL, reg);
2957 /* Enable DCB for Tx with 8 TCs */
2958 reg = rd32(hw, TXGBE_PORTCTL);
2959 reg &= TXGBE_PORTCTL_NUMTC_MASK;
2960 reg |= TXGBE_PORTCTL_DCB;
2961 if (dcb_config->num_tcs.pg_tcs == 8)
2962 reg |= TXGBE_PORTCTL_NUMTC_8;
2964 reg |= TXGBE_PORTCTL_NUMTC_4;
2966 wr32(hw, TXGBE_PORTCTL, reg);
2968 /* Enable the Tx desc arbiter */
2969 reg = rd32(hw, TXGBE_ARBTXCTL);
2970 reg &= ~TXGBE_ARBTXCTL_DIA;
2971 wr32(hw, TXGBE_ARBTXCTL, reg);
2975 * txgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
2976 * @dev: pointer to rte_eth_dev structure
2977 * @dcb_config: pointer to txgbe_dcb_config structure
2980 txgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
2981 struct txgbe_dcb_config *dcb_config)
2983 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
2984 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
2985 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
2987 PMD_INIT_FUNC_TRACE();
2988 /*PF VF Transmit Enable*/
2989 wr32(hw, TXGBE_POOLTXENA(0),
2990 vmdq_tx_conf->nb_queue_pools ==
2991 ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
2993 /*Configure general DCB TX parameters*/
2994 txgbe_dcb_tx_hw_config(dev, dcb_config);
2998 txgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
2999 struct txgbe_dcb_config *dcb_config)
3001 struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3002 &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3003 struct txgbe_dcb_tc_config *tc;
3006 /* convert rte_eth_conf.rx_adv_conf to struct txgbe_dcb_config */
3007 if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3008 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3009 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3011 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3012 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3015 /* Initialize User Priority to Traffic Class mapping */
3016 for (j = 0; j < TXGBE_DCB_TC_MAX; j++) {
3017 tc = &dcb_config->tc_config[j];
3018 tc->path[TXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3021 /* User Priority to Traffic Class mapping */
3022 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3023 j = vmdq_rx_conf->dcb_tc[i];
3024 tc = &dcb_config->tc_config[j];
3025 tc->path[TXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3031 txgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3032 struct txgbe_dcb_config *dcb_config)
3034 struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3035 &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3036 struct txgbe_dcb_tc_config *tc;
3039 /* convert rte_eth_conf.rx_adv_conf to struct txgbe_dcb_config */
3040 if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3041 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3042 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3044 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3045 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3048 /* Initialize User Priority to Traffic Class mapping */
3049 for (j = 0; j < TXGBE_DCB_TC_MAX; j++) {
3050 tc = &dcb_config->tc_config[j];
3051 tc->path[TXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3054 /* User Priority to Traffic Class mapping */
3055 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3056 j = vmdq_tx_conf->dcb_tc[i];
3057 tc = &dcb_config->tc_config[j];
3058 tc->path[TXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3064 txgbe_dcb_rx_config(struct rte_eth_dev *dev,
3065 struct txgbe_dcb_config *dcb_config)
3067 struct rte_eth_dcb_rx_conf *rx_conf =
3068 &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3069 struct txgbe_dcb_tc_config *tc;
3072 dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3073 dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3075 /* Initialize User Priority to Traffic Class mapping */
3076 for (j = 0; j < TXGBE_DCB_TC_MAX; j++) {
3077 tc = &dcb_config->tc_config[j];
3078 tc->path[TXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3081 /* User Priority to Traffic Class mapping */
3082 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3083 j = rx_conf->dcb_tc[i];
3084 tc = &dcb_config->tc_config[j];
3085 tc->path[TXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3091 txgbe_dcb_tx_config(struct rte_eth_dev *dev,
3092 struct txgbe_dcb_config *dcb_config)
3094 struct rte_eth_dcb_tx_conf *tx_conf =
3095 &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3096 struct txgbe_dcb_tc_config *tc;
3099 dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3100 dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3102 /* Initialize User Priority to Traffic Class mapping */
3103 for (j = 0; j < TXGBE_DCB_TC_MAX; j++) {
3104 tc = &dcb_config->tc_config[j];
3105 tc->path[TXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3108 /* User Priority to Traffic Class mapping */
3109 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3110 j = tx_conf->dcb_tc[i];
3111 tc = &dcb_config->tc_config[j];
3112 tc->path[TXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3118 * txgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3119 * @dev: pointer to eth_dev structure
3120 * @dcb_config: pointer to txgbe_dcb_config structure
3123 txgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3124 struct txgbe_dcb_config *dcb_config)
3130 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3132 PMD_INIT_FUNC_TRACE();
3134 * Disable the arbiter before changing parameters
3135 * (always enable recycle mode; WSP)
3137 reg = TXGBE_ARBRXCTL_RRM | TXGBE_ARBRXCTL_WSP | TXGBE_ARBRXCTL_DIA;
3138 wr32(hw, TXGBE_ARBRXCTL, reg);
3140 reg = rd32(hw, TXGBE_PORTCTL);
3141 reg &= ~(TXGBE_PORTCTL_NUMTC_MASK | TXGBE_PORTCTL_NUMVT_MASK);
3142 if (dcb_config->num_tcs.pg_tcs == 4) {
3143 reg |= TXGBE_PORTCTL_NUMTC_4;
3144 if (dcb_config->vt_mode)
3145 reg |= TXGBE_PORTCTL_NUMVT_32;
3147 wr32(hw, TXGBE_POOLCTL, 0);
3150 if (dcb_config->num_tcs.pg_tcs == 8) {
3151 reg |= TXGBE_PORTCTL_NUMTC_8;
3152 if (dcb_config->vt_mode)
3153 reg |= TXGBE_PORTCTL_NUMVT_16;
3155 wr32(hw, TXGBE_POOLCTL, 0);
3158 wr32(hw, TXGBE_PORTCTL, reg);
3160 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3161 /* Disable drop for all queues in VMDQ mode*/
3162 for (q = 0; q < TXGBE_MAX_RX_QUEUE_NUM; q++) {
3163 u32 val = 1 << (q % 32);
3164 wr32m(hw, TXGBE_QPRXDROP(q / 32), val, val);
3167 /* Enable drop for all queues in SRIOV mode */
3168 for (q = 0; q < TXGBE_MAX_RX_QUEUE_NUM; q++) {
3169 u32 val = 1 << (q % 32);
3170 wr32m(hw, TXGBE_QPRXDROP(q / 32), val, val);
3174 /* VLNCTL: enable vlan filtering and allow all vlan tags through */
3175 vlanctrl = rd32(hw, TXGBE_VLANCTL);
3176 vlanctrl |= TXGBE_VLANCTL_VFE; /* enable vlan filters */
3177 wr32(hw, TXGBE_VLANCTL, vlanctrl);
3179 /* VLANTBL - enable all vlan filters */
3180 for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3181 wr32(hw, TXGBE_VLANTBL(i), 0xFFFFFFFF);
3184 * Configure Rx packet plane (recycle mode; WSP) and
3187 reg = TXGBE_ARBRXCTL_RRM | TXGBE_ARBRXCTL_WSP;
3188 wr32(hw, TXGBE_ARBRXCTL, reg);
3192 txgbe_dcb_hw_arbite_rx_config(struct txgbe_hw *hw, uint16_t *refill,
3193 uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3195 txgbe_dcb_config_rx_arbiter_raptor(hw, refill, max, bwg_id,
3200 txgbe_dcb_hw_arbite_tx_config(struct txgbe_hw *hw, uint16_t *refill,
3201 uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3203 switch (hw->mac.type) {
3204 case txgbe_mac_raptor:
3205 txgbe_dcb_config_tx_desc_arbiter_raptor(hw, refill,
3207 txgbe_dcb_config_tx_data_arbiter_raptor(hw, refill,
3208 max, bwg_id, tsa, map);
3215 #define DCB_RX_CONFIG 1
3216 #define DCB_TX_CONFIG 1
3217 #define DCB_TX_PB 1024
3219 * txgbe_dcb_hw_configure - Enable DCB and configure
3220 * general DCB in VT mode and non-VT mode parameters
3221 * @dev: pointer to rte_eth_dev structure
3222 * @dcb_config: pointer to txgbe_dcb_config structure
3225 txgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3226 struct txgbe_dcb_config *dcb_config)
3229 uint8_t i, pfc_en, nb_tcs;
3230 uint16_t pbsize, rx_buffer_size;
3231 uint8_t config_dcb_rx = 0;
3232 uint8_t config_dcb_tx = 0;
3233 uint8_t tsa[TXGBE_DCB_TC_MAX] = {0};
3234 uint8_t bwgid[TXGBE_DCB_TC_MAX] = {0};
3235 uint16_t refill[TXGBE_DCB_TC_MAX] = {0};
3236 uint16_t max[TXGBE_DCB_TC_MAX] = {0};
3237 uint8_t map[TXGBE_DCB_TC_MAX] = {0};
3238 struct txgbe_dcb_tc_config *tc;
3239 uint32_t max_frame = dev->data->mtu +
3240 RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
3241 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3242 struct txgbe_bw_conf *bw_conf = TXGBE_DEV_BW_CONF(dev);
3244 switch (dev->data->dev_conf.rxmode.mq_mode) {
3245 case ETH_MQ_RX_VMDQ_DCB:
3246 dcb_config->vt_mode = true;
3247 config_dcb_rx = DCB_RX_CONFIG;
3249 * get dcb and VT rx configuration parameters
3252 txgbe_vmdq_dcb_rx_config(dev, dcb_config);
3253 /*Configure general VMDQ and DCB RX parameters*/
3254 txgbe_vmdq_dcb_configure(dev);
3257 case ETH_MQ_RX_DCB_RSS:
3258 dcb_config->vt_mode = false;
3259 config_dcb_rx = DCB_RX_CONFIG;
3260 /* Get dcb TX configuration parameters from rte_eth_conf */
3261 txgbe_dcb_rx_config(dev, dcb_config);
3262 /*Configure general DCB RX parameters*/
3263 txgbe_dcb_rx_hw_config(dev, dcb_config);
3266 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3269 switch (dev->data->dev_conf.txmode.mq_mode) {
3270 case ETH_MQ_TX_VMDQ_DCB:
3271 dcb_config->vt_mode = true;
3272 config_dcb_tx = DCB_TX_CONFIG;
3273 /* get DCB and VT TX configuration parameters
3276 txgbe_dcb_vt_tx_config(dev, dcb_config);
3277 /* Configure general VMDQ and DCB TX parameters */
3278 txgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3282 dcb_config->vt_mode = false;
3283 config_dcb_tx = DCB_TX_CONFIG;
3284 /* get DCB TX configuration parameters from rte_eth_conf */
3285 txgbe_dcb_tx_config(dev, dcb_config);
3286 /* Configure general DCB TX parameters */
3287 txgbe_dcb_tx_hw_config(dev, dcb_config);
3290 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3294 nb_tcs = dcb_config->num_tcs.pfc_tcs;
3296 txgbe_dcb_unpack_map_cee(dcb_config, TXGBE_DCB_RX_CONFIG, map);
3297 if (nb_tcs == ETH_4_TCS) {
3298 /* Avoid un-configured priority mapping to TC0 */
3300 uint8_t mask = 0xFF;
3302 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3303 mask = (uint8_t)(mask & (~(1 << map[i])));
3304 for (i = 0; mask && (i < TXGBE_DCB_TC_MAX); i++) {
3305 if ((mask & 0x1) && j < ETH_DCB_NUM_USER_PRIORITIES)
3309 /* Re-configure 4 TCs BW */
3310 for (i = 0; i < nb_tcs; i++) {
3311 tc = &dcb_config->tc_config[i];
3312 if (bw_conf->tc_num != nb_tcs)
3313 tc->path[TXGBE_DCB_TX_CONFIG].bwg_percent =
3314 (uint8_t)(100 / nb_tcs);
3315 tc->path[TXGBE_DCB_RX_CONFIG].bwg_percent =
3316 (uint8_t)(100 / nb_tcs);
3318 for (; i < TXGBE_DCB_TC_MAX; i++) {
3319 tc = &dcb_config->tc_config[i];
3320 tc->path[TXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3321 tc->path[TXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3324 /* Re-configure 8 TCs BW */
3325 for (i = 0; i < nb_tcs; i++) {
3326 tc = &dcb_config->tc_config[i];
3327 if (bw_conf->tc_num != nb_tcs)
3328 tc->path[TXGBE_DCB_TX_CONFIG].bwg_percent =
3329 (uint8_t)(100 / nb_tcs + (i & 1));
3330 tc->path[TXGBE_DCB_RX_CONFIG].bwg_percent =
3331 (uint8_t)(100 / nb_tcs + (i & 1));
3335 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3337 if (config_dcb_rx) {
3338 /* Set RX buffer size */
3339 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3340 uint32_t rxpbsize = pbsize << 10;
3342 for (i = 0; i < nb_tcs; i++)
3343 wr32(hw, TXGBE_PBRXSIZE(i), rxpbsize);
3345 /* zero alloc all unused TCs */
3346 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3347 wr32(hw, TXGBE_PBRXSIZE(i), 0);
3349 if (config_dcb_tx) {
3350 /* Only support an equally distributed
3351 * Tx packet buffer strategy.
3353 uint32_t txpktsize = TXGBE_PBTXSIZE_MAX / nb_tcs;
3354 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) -
3355 TXGBE_TXPKT_SIZE_MAX;
3357 for (i = 0; i < nb_tcs; i++) {
3358 wr32(hw, TXGBE_PBTXSIZE(i), txpktsize);
3359 wr32(hw, TXGBE_PBTXDMATH(i), txpbthresh);
3361 /* Clear unused TCs, if any, to zero buffer size*/
3362 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3363 wr32(hw, TXGBE_PBTXSIZE(i), 0);
3364 wr32(hw, TXGBE_PBTXDMATH(i), 0);
3368 /*Calculates traffic class credits*/
3369 txgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3370 TXGBE_DCB_TX_CONFIG);
3371 txgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3372 TXGBE_DCB_RX_CONFIG);
3374 if (config_dcb_rx) {
3375 /* Unpack CEE standard containers */
3376 txgbe_dcb_unpack_refill_cee(dcb_config,
3377 TXGBE_DCB_RX_CONFIG, refill);
3378 txgbe_dcb_unpack_max_cee(dcb_config, max);
3379 txgbe_dcb_unpack_bwgid_cee(dcb_config,
3380 TXGBE_DCB_RX_CONFIG, bwgid);
3381 txgbe_dcb_unpack_tsa_cee(dcb_config,
3382 TXGBE_DCB_RX_CONFIG, tsa);
3383 /* Configure PG(ETS) RX */
3384 txgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3387 if (config_dcb_tx) {
3388 /* Unpack CEE standard containers */
3389 txgbe_dcb_unpack_refill_cee(dcb_config,
3390 TXGBE_DCB_TX_CONFIG, refill);
3391 txgbe_dcb_unpack_max_cee(dcb_config, max);
3392 txgbe_dcb_unpack_bwgid_cee(dcb_config,
3393 TXGBE_DCB_TX_CONFIG, bwgid);
3394 txgbe_dcb_unpack_tsa_cee(dcb_config,
3395 TXGBE_DCB_TX_CONFIG, tsa);
3396 /* Configure PG(ETS) TX */
3397 txgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3400 /* Configure queue statistics registers */
3401 txgbe_dcb_config_tc_stats_raptor(hw, dcb_config);
3403 /* Check if the PFC is supported */
3404 if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3405 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3406 for (i = 0; i < nb_tcs; i++) {
3407 /* If the TC count is 8,
3408 * and the default high_water is 48,
3409 * the low_water is 16 as default.
3411 hw->fc.high_water[i] = (pbsize * 3) / 4;
3412 hw->fc.low_water[i] = pbsize / 4;
3413 /* Enable pfc for this TC */
3414 tc = &dcb_config->tc_config[i];
3415 tc->pfc = txgbe_dcb_pfc_enabled;
3417 txgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3418 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3420 ret = txgbe_dcb_config_pfc(hw, pfc_en, map);
3426 void txgbe_configure_pb(struct rte_eth_dev *dev)
3428 struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
3429 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3432 int tc = dev_conf->rx_adv_conf.dcb_rx_conf.nb_tcs;
3434 /* Reserve 256KB(/512KB) rx buffer for fdir */
3437 hw->mac.setup_pba(hw, tc, hdrm, PBA_STRATEGY_EQUAL);
3440 void txgbe_configure_port(struct rte_eth_dev *dev)
3442 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3444 uint16_t tpids[8] = {RTE_ETHER_TYPE_VLAN, RTE_ETHER_TYPE_QINQ,
3449 PMD_INIT_FUNC_TRACE();
3451 /* default outer vlan tpid */
3452 wr32(hw, TXGBE_EXTAG,
3453 TXGBE_EXTAG_ETAG(RTE_ETHER_TYPE_ETAG) |
3454 TXGBE_EXTAG_VLAN(RTE_ETHER_TYPE_QINQ));
3456 /* default inner vlan tpid */
3457 wr32m(hw, TXGBE_VLANCTL,
3458 TXGBE_VLANCTL_TPID_MASK,
3459 TXGBE_VLANCTL_TPID(RTE_ETHER_TYPE_VLAN));
3460 wr32m(hw, TXGBE_DMATXCTRL,
3461 TXGBE_DMATXCTRL_TPID_MASK,
3462 TXGBE_DMATXCTRL_TPID(RTE_ETHER_TYPE_VLAN));
3464 /* default vlan tpid filters */
3465 for (i = 0; i < 8; i++) {
3466 wr32m(hw, TXGBE_TAGTPID(i / 2),
3467 (i % 2 ? TXGBE_TAGTPID_MSB_MASK
3468 : TXGBE_TAGTPID_LSB_MASK),
3469 (i % 2 ? TXGBE_TAGTPID_MSB(tpids[i])
3470 : TXGBE_TAGTPID_LSB(tpids[i])));
3473 /* default vxlan port */
3474 wr32(hw, TXGBE_VXLANPORT, 4789);
3478 * txgbe_configure_dcb - Configure DCB Hardware
3479 * @dev: pointer to rte_eth_dev
3481 void txgbe_configure_dcb(struct rte_eth_dev *dev)
3483 struct txgbe_dcb_config *dcb_cfg = TXGBE_DEV_DCB_CONFIG(dev);
3484 struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
3486 PMD_INIT_FUNC_TRACE();
3488 /* check support mq_mode for DCB */
3489 if (dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB &&
3490 dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB &&
3491 dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS)
3494 if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3497 /** Configure DCB hardware **/
3498 txgbe_dcb_hw_configure(dev, dcb_cfg);
3502 * VMDq only support for 10 GbE NIC.
3505 txgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3507 struct rte_eth_vmdq_rx_conf *cfg;
3508 struct txgbe_hw *hw;
3509 enum rte_eth_nb_pools num_pools;
3510 uint32_t mrqc, vt_ctl, vlanctrl;
3514 PMD_INIT_FUNC_TRACE();
3515 hw = TXGBE_DEV_HW(dev);
3516 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3517 num_pools = cfg->nb_queue_pools;
3519 txgbe_rss_disable(dev);
3522 mrqc = TXGBE_PORTCTL_NUMVT_64;
3523 wr32m(hw, TXGBE_PORTCTL, TXGBE_PORTCTL_NUMVT_MASK, mrqc);
3525 /* turn on virtualisation and set the default pool */
3526 vt_ctl = TXGBE_POOLCTL_RPLEN;
3527 if (cfg->enable_default_pool)
3528 vt_ctl |= TXGBE_POOLCTL_DEFPL(cfg->default_pool);
3530 vt_ctl |= TXGBE_POOLCTL_DEFDSA;
3532 wr32(hw, TXGBE_POOLCTL, vt_ctl);
3534 for (i = 0; i < (int)num_pools; i++) {
3535 vmolr = txgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3536 wr32(hw, TXGBE_POOLETHCTL(i), vmolr);
3539 /* enable vlan filtering and allow all vlan tags through */
3540 vlanctrl = rd32(hw, TXGBE_VLANCTL);
3541 vlanctrl |= TXGBE_VLANCTL_VFE; /* enable vlan filters */
3542 wr32(hw, TXGBE_VLANCTL, vlanctrl);
3544 /* enable all vlan filters */
3545 for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3546 wr32(hw, TXGBE_VLANTBL(i), UINT32_MAX);
3548 /* pool enabling for receive - 64 */
3549 wr32(hw, TXGBE_POOLRXENA(0), UINT32_MAX);
3550 if (num_pools == ETH_64_POOLS)
3551 wr32(hw, TXGBE_POOLRXENA(1), UINT32_MAX);
3554 * allow pools to read specific mac addresses
3555 * In this case, all pools should be able to read from mac addr 0
3557 wr32(hw, TXGBE_ETHADDRIDX, 0);
3558 wr32(hw, TXGBE_ETHADDRASSL, 0xFFFFFFFF);
3559 wr32(hw, TXGBE_ETHADDRASSH, 0xFFFFFFFF);
3561 /* set up filters for vlan tags as configured */
3562 for (i = 0; i < cfg->nb_pool_maps; i++) {
3563 /* set vlan id in VF register and set the valid bit */
3564 wr32(hw, TXGBE_PSRVLANIDX, i);
3565 wr32(hw, TXGBE_PSRVLAN, (TXGBE_PSRVLAN_EA |
3566 TXGBE_PSRVLAN_VID(cfg->pool_map[i].vlan_id)));
3568 * Put the allowed pools in VFB reg. As we only have 16 or 64
3569 * pools, we only need to use the first half of the register
3572 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3573 wr32(hw, TXGBE_PSRVLANPLM(0),
3574 (cfg->pool_map[i].pools & UINT32_MAX));
3576 wr32(hw, TXGBE_PSRVLANPLM(1),
3577 ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3580 /* Tx General Switch Control Enables VMDQ loopback */
3581 if (cfg->enable_loop_back) {
3582 wr32(hw, TXGBE_PSRCTL, TXGBE_PSRCTL_LBENA);
3583 for (i = 0; i < 64; i++)
3584 wr32m(hw, TXGBE_POOLETHCTL(i),
3585 TXGBE_POOLETHCTL_LLB, TXGBE_POOLETHCTL_LLB);
3592 * txgbe_vmdq_tx_hw_configure - Configure general VMDq TX parameters
3593 * @hw: pointer to hardware structure
3596 txgbe_vmdq_tx_hw_configure(struct txgbe_hw *hw)
3601 PMD_INIT_FUNC_TRACE();
3602 /*PF VF Transmit Enable*/
3603 wr32(hw, TXGBE_POOLTXENA(0), UINT32_MAX);
3604 wr32(hw, TXGBE_POOLTXENA(1), UINT32_MAX);
3606 /* Disable the Tx desc arbiter */
3607 reg = rd32(hw, TXGBE_ARBTXCTL);
3608 reg |= TXGBE_ARBTXCTL_DIA;
3609 wr32(hw, TXGBE_ARBTXCTL, reg);
3611 wr32m(hw, TXGBE_PORTCTL, TXGBE_PORTCTL_NUMVT_MASK,
3612 TXGBE_PORTCTL_NUMVT_64);
3614 /* Disable drop for all queues */
3615 for (q = 0; q < 128; q++) {
3616 u32 val = 1 << (q % 32);
3617 wr32m(hw, TXGBE_QPRXDROP(q / 32), val, val);
3620 /* Enable the Tx desc arbiter */
3621 reg = rd32(hw, TXGBE_ARBTXCTL);
3622 reg &= ~TXGBE_ARBTXCTL_DIA;
3623 wr32(hw, TXGBE_ARBTXCTL, reg);
3628 static int __rte_cold
3629 txgbe_alloc_rx_queue_mbufs(struct txgbe_rx_queue *rxq)
3631 struct txgbe_rx_entry *rxe = rxq->sw_ring;
3635 /* Initialize software ring entries */
3636 for (i = 0; i < rxq->nb_rx_desc; i++) {
3637 volatile struct txgbe_rx_desc *rxd;
3638 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
3641 PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
3642 (unsigned int)rxq->queue_id);
3646 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
3647 mbuf->port = rxq->port_id;
3650 rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
3651 rxd = &rxq->rx_ring[i];
3652 TXGBE_RXD_HDRADDR(rxd, 0);
3653 TXGBE_RXD_PKTADDR(rxd, dma_addr);
3661 txgbe_config_vf_rss(struct rte_eth_dev *dev)
3663 struct txgbe_hw *hw;
3666 txgbe_rss_configure(dev);
3668 hw = TXGBE_DEV_HW(dev);
3671 mrqc = rd32(hw, TXGBE_PORTCTL);
3672 mrqc &= ~(TXGBE_PORTCTL_NUMTC_MASK | TXGBE_PORTCTL_NUMVT_MASK);
3673 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3675 mrqc |= TXGBE_PORTCTL_NUMVT_64;
3679 mrqc |= TXGBE_PORTCTL_NUMVT_32;
3683 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
3687 wr32(hw, TXGBE_PORTCTL, mrqc);
3693 txgbe_config_vf_default(struct rte_eth_dev *dev)
3695 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3698 mrqc = rd32(hw, TXGBE_PORTCTL);
3699 mrqc &= ~(TXGBE_PORTCTL_NUMTC_MASK | TXGBE_PORTCTL_NUMVT_MASK);
3700 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3702 mrqc |= TXGBE_PORTCTL_NUMVT_64;
3706 mrqc |= TXGBE_PORTCTL_NUMVT_32;
3710 mrqc |= TXGBE_PORTCTL_NUMVT_16;
3714 "invalid pool number in IOV mode");
3718 wr32(hw, TXGBE_PORTCTL, mrqc);
3724 txgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
3726 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3728 * SRIOV inactive scheme
3729 * any DCB/RSS w/o VMDq multi-queue setting
3731 switch (dev->data->dev_conf.rxmode.mq_mode) {
3733 case ETH_MQ_RX_DCB_RSS:
3734 case ETH_MQ_RX_VMDQ_RSS:
3735 txgbe_rss_configure(dev);
3738 case ETH_MQ_RX_VMDQ_DCB:
3739 txgbe_vmdq_dcb_configure(dev);
3742 case ETH_MQ_RX_VMDQ_ONLY:
3743 txgbe_vmdq_rx_hw_configure(dev);
3746 case ETH_MQ_RX_NONE:
3748 /* if mq_mode is none, disable rss mode.*/
3749 txgbe_rss_disable(dev);
3753 /* SRIOV active scheme
3754 * Support RSS together with SRIOV.
3756 switch (dev->data->dev_conf.rxmode.mq_mode) {
3758 case ETH_MQ_RX_VMDQ_RSS:
3759 txgbe_config_vf_rss(dev);
3761 case ETH_MQ_RX_VMDQ_DCB:
3763 /* In SRIOV, the configuration is the same as VMDq case */
3764 txgbe_vmdq_dcb_configure(dev);
3766 /* DCB/RSS together with SRIOV is not supported */
3767 case ETH_MQ_RX_VMDQ_DCB_RSS:
3768 case ETH_MQ_RX_DCB_RSS:
3770 "Could not support DCB/RSS with VMDq & SRIOV");
3773 txgbe_config_vf_default(dev);
3782 txgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
3784 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3788 /* disable arbiter */
3789 rttdcs = rd32(hw, TXGBE_ARBTXCTL);
3790 rttdcs |= TXGBE_ARBTXCTL_DIA;
3791 wr32(hw, TXGBE_ARBTXCTL, rttdcs);
3793 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3795 * SRIOV inactive scheme
3796 * any DCB w/o VMDq multi-queue setting
3798 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
3799 txgbe_vmdq_tx_hw_configure(hw);
3801 wr32m(hw, TXGBE_PORTCTL, TXGBE_PORTCTL_NUMVT_MASK, 0);
3803 switch (RTE_ETH_DEV_SRIOV(dev).active) {
3805 * SRIOV active scheme
3806 * FIXME if support DCB together with VMDq & SRIOV
3809 mtqc = TXGBE_PORTCTL_NUMVT_64;
3812 mtqc = TXGBE_PORTCTL_NUMVT_32;
3815 mtqc = TXGBE_PORTCTL_NUMVT_16;
3819 PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
3821 wr32m(hw, TXGBE_PORTCTL, TXGBE_PORTCTL_NUMVT_MASK, mtqc);
3824 /* re-enable arbiter */
3825 rttdcs &= ~TXGBE_ARBTXCTL_DIA;
3826 wr32(hw, TXGBE_ARBTXCTL, rttdcs);
3832 * txgbe_get_rscctl_maxdesc
3834 * @pool Memory pool of the Rx queue
3836 static inline uint32_t
3837 txgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
3839 struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
3842 RTE_IPV4_MAX_PKT_LEN /
3843 (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
3846 return TXGBE_RXCFG_RSCMAX_16;
3847 else if (maxdesc >= 8)
3848 return TXGBE_RXCFG_RSCMAX_8;
3849 else if (maxdesc >= 4)
3850 return TXGBE_RXCFG_RSCMAX_4;
3852 return TXGBE_RXCFG_RSCMAX_1;
3856 * txgbe_set_rsc - configure RSC related port HW registers
3858 * Configures the port's RSC related registers.
3862 * Returns 0 in case of success or a non-zero error code
3865 txgbe_set_rsc(struct rte_eth_dev *dev)
3867 struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
3868 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
3869 struct rte_eth_dev_info dev_info = { 0 };
3870 bool rsc_capable = false;
3876 dev->dev_ops->dev_infos_get(dev, &dev_info);
3877 if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
3880 if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
3881 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
3886 /* RSC global configuration */
3888 if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
3889 (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
3890 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
3895 rfctl = rd32(hw, TXGBE_PSRCTL);
3896 if (rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
3897 rfctl &= ~TXGBE_PSRCTL_RSCDIA;
3899 rfctl |= TXGBE_PSRCTL_RSCDIA;
3900 wr32(hw, TXGBE_PSRCTL, rfctl);
3902 /* If LRO hasn't been requested - we are done here. */
3903 if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
3906 /* Set PSRCTL.RSCACK bit */
3907 rdrxctl = rd32(hw, TXGBE_PSRCTL);
3908 rdrxctl |= TXGBE_PSRCTL_RSCACK;
3909 wr32(hw, TXGBE_PSRCTL, rdrxctl);
3911 /* Per-queue RSC configuration */
3912 for (i = 0; i < dev->data->nb_rx_queues; i++) {
3913 struct txgbe_rx_queue *rxq = dev->data->rx_queues[i];
3915 rd32(hw, TXGBE_RXCFG(rxq->reg_idx));
3917 rd32(hw, TXGBE_POOLRSS(rxq->reg_idx));
3919 rd32(hw, TXGBE_ITR(rxq->reg_idx));
3922 * txgbe PMD doesn't support header-split at the moment.
3924 srrctl &= ~TXGBE_RXCFG_HDRLEN_MASK;
3925 srrctl |= TXGBE_RXCFG_HDRLEN(128);
3928 * TODO: Consider setting the Receive Descriptor Minimum
3929 * Threshold Size for an RSC case. This is not an obviously
3930 * beneficiary option but the one worth considering...
3933 srrctl |= TXGBE_RXCFG_RSCENA;
3934 srrctl &= ~TXGBE_RXCFG_RSCMAX_MASK;
3935 srrctl |= txgbe_get_rscctl_maxdesc(rxq->mb_pool);
3936 psrtype |= TXGBE_POOLRSS_L4HDR;
3939 * RSC: Set ITR interval corresponding to 2K ints/s.
3941 * Full-sized RSC aggregations for a 10Gb/s link will
3942 * arrive at about 20K aggregation/s rate.
3944 * 2K inst/s rate will make only 10% of the
3945 * aggregations to be closed due to the interrupt timer
3946 * expiration for a streaming at wire-speed case.
3948 * For a sparse streaming case this setting will yield
3949 * at most 500us latency for a single RSC aggregation.
3951 eitr &= ~TXGBE_ITR_IVAL_MASK;
3952 eitr |= TXGBE_ITR_IVAL_10G(TXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
3953 eitr |= TXGBE_ITR_WRDSA;
3955 wr32(hw, TXGBE_RXCFG(rxq->reg_idx), srrctl);
3956 wr32(hw, TXGBE_POOLRSS(rxq->reg_idx), psrtype);
3957 wr32(hw, TXGBE_ITR(rxq->reg_idx), eitr);
3960 * RSC requires the mapping of the queue to the
3963 txgbe_set_ivar_map(hw, 0, rxq->reg_idx, i);
3968 PMD_INIT_LOG(DEBUG, "enabling LRO mode");
3974 txgbe_set_rx_function(struct rte_eth_dev *dev)
3976 struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev);
3979 * Initialize the appropriate LRO callback.
3981 * If all queues satisfy the bulk allocation preconditions
3982 * (adapter->rx_bulk_alloc_allowed is TRUE) then we may use
3983 * bulk allocation. Otherwise use a single allocation version.
3985 if (dev->data->lro) {
3986 if (adapter->rx_bulk_alloc_allowed) {
3987 PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
3988 "allocation version");
3989 dev->rx_pkt_burst = txgbe_recv_pkts_lro_bulk_alloc;
3991 PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
3992 "allocation version");
3993 dev->rx_pkt_burst = txgbe_recv_pkts_lro_single_alloc;
3995 } else if (dev->data->scattered_rx) {
3997 * Set the non-LRO scattered callback: there are bulk and
3998 * single allocation versions.
4000 if (adapter->rx_bulk_alloc_allowed) {
4001 PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4002 "allocation callback (port=%d).",
4003 dev->data->port_id);
4004 dev->rx_pkt_burst = txgbe_recv_pkts_lro_bulk_alloc;
4006 PMD_INIT_LOG(DEBUG, "Using Regular (non-vector, "
4007 "single allocation) "
4008 "Scattered Rx callback "
4010 dev->data->port_id);
4012 dev->rx_pkt_burst = txgbe_recv_pkts_lro_single_alloc;
4015 * Below we set "simple" callbacks according to port/queues parameters.
4016 * If parameters allow we are going to choose between the following
4019 * - Single buffer allocation (the simplest one)
4021 } else if (adapter->rx_bulk_alloc_allowed) {
4022 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4023 "satisfied. Rx Burst Bulk Alloc function "
4024 "will be used on port=%d.",
4025 dev->data->port_id);
4027 dev->rx_pkt_burst = txgbe_recv_pkts_bulk_alloc;
4029 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4030 "satisfied, or Scattered Rx is requested "
4032 dev->data->port_id);
4034 dev->rx_pkt_burst = txgbe_recv_pkts;
4039 * Initializes Receive Unit.
4042 txgbe_dev_rx_init(struct rte_eth_dev *dev)
4044 struct txgbe_hw *hw;
4045 struct txgbe_rx_queue *rxq;
4054 struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4057 PMD_INIT_FUNC_TRACE();
4058 hw = TXGBE_DEV_HW(dev);
4061 * Make sure receives are disabled while setting
4062 * up the RX context (registers, descriptor rings, etc.).
4064 wr32m(hw, TXGBE_MACRXCFG, TXGBE_MACRXCFG_ENA, 0);
4065 wr32m(hw, TXGBE_PBRXCTL, TXGBE_PBRXCTL_ENA, 0);
4067 /* Enable receipt of broadcasted frames */
4068 fctrl = rd32(hw, TXGBE_PSRCTL);
4069 fctrl |= TXGBE_PSRCTL_BCA;
4070 wr32(hw, TXGBE_PSRCTL, fctrl);
4073 * Configure CRC stripping, if any.
4075 hlreg0 = rd32(hw, TXGBE_SECRXCTL);
4076 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
4077 hlreg0 &= ~TXGBE_SECRXCTL_CRCSTRIP;
4079 hlreg0 |= TXGBE_SECRXCTL_CRCSTRIP;
4080 wr32(hw, TXGBE_SECRXCTL, hlreg0);
4083 * Configure jumbo frame support, if any.
4085 if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
4086 wr32m(hw, TXGBE_FRMSZ, TXGBE_FRMSZ_MAX_MASK,
4087 TXGBE_FRMSZ_MAX(rx_conf->max_rx_pkt_len));
4089 wr32m(hw, TXGBE_FRMSZ, TXGBE_FRMSZ_MAX_MASK,
4090 TXGBE_FRMSZ_MAX(TXGBE_FRAME_SIZE_DFT));
4094 * If loopback mode is configured, set LPBK bit.
4096 hlreg0 = rd32(hw, TXGBE_PSRCTL);
4097 if (hw->mac.type == txgbe_mac_raptor &&
4098 dev->data->dev_conf.lpbk_mode)
4099 hlreg0 |= TXGBE_PSRCTL_LBENA;
4101 hlreg0 &= ~TXGBE_PSRCTL_LBENA;
4103 wr32(hw, TXGBE_PSRCTL, hlreg0);
4106 * Assume no header split and no VLAN strip support
4107 * on any Rx queue first .
4109 rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
4111 /* Setup RX queues */
4112 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4113 rxq = dev->data->rx_queues[i];
4116 * Reset crc_len in case it was changed after queue setup by a
4117 * call to configure.
4119 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
4120 rxq->crc_len = RTE_ETHER_CRC_LEN;
4124 /* Setup the Base and Length of the Rx Descriptor Rings */
4125 bus_addr = rxq->rx_ring_phys_addr;
4126 wr32(hw, TXGBE_RXBAL(rxq->reg_idx),
4127 (uint32_t)(bus_addr & BIT_MASK32));
4128 wr32(hw, TXGBE_RXBAH(rxq->reg_idx),
4129 (uint32_t)(bus_addr >> 32));
4130 wr32(hw, TXGBE_RXRP(rxq->reg_idx), 0);
4131 wr32(hw, TXGBE_RXWP(rxq->reg_idx), 0);
4133 srrctl = TXGBE_RXCFG_RNGLEN(rxq->nb_rx_desc);
4135 /* Set if packets are dropped when no descriptors available */
4137 srrctl |= TXGBE_RXCFG_DROP;
4140 * Configure the RX buffer size in the PKTLEN field of
4141 * the RXCFG register of the queue.
4142 * The value is in 1 KB resolution. Valid values can be from
4145 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4146 RTE_PKTMBUF_HEADROOM);
4147 buf_size = ROUND_UP(buf_size, 0x1 << 10);
4148 srrctl |= TXGBE_RXCFG_PKTLEN(buf_size);
4150 wr32(hw, TXGBE_RXCFG(rxq->reg_idx), srrctl);
4152 /* It adds dual VLAN length for supporting dual VLAN */
4153 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4154 2 * TXGBE_VLAN_TAG_SIZE > buf_size)
4155 dev->data->scattered_rx = 1;
4156 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
4157 rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
4160 if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
4161 dev->data->scattered_rx = 1;
4164 * Device configured with multiple RX queues.
4166 txgbe_dev_mq_rx_configure(dev);
4169 * Setup the Checksum Register.
4170 * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4171 * Enable IP/L4 checksum computation by hardware if requested to do so.
4173 rxcsum = rd32(hw, TXGBE_PSRCTL);
4174 rxcsum |= TXGBE_PSRCTL_PCSD;
4175 if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
4176 rxcsum |= TXGBE_PSRCTL_L4CSUM;
4178 rxcsum &= ~TXGBE_PSRCTL_L4CSUM;
4180 wr32(hw, TXGBE_PSRCTL, rxcsum);
4182 if (hw->mac.type == txgbe_mac_raptor) {
4183 rdrxctl = rd32(hw, TXGBE_SECRXCTL);
4184 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
4185 rdrxctl &= ~TXGBE_SECRXCTL_CRCSTRIP;
4187 rdrxctl |= TXGBE_SECRXCTL_CRCSTRIP;
4188 wr32(hw, TXGBE_SECRXCTL, rdrxctl);
4191 rc = txgbe_set_rsc(dev);
4195 txgbe_set_rx_function(dev);
4201 * Initializes Transmit Unit.
4204 txgbe_dev_tx_init(struct rte_eth_dev *dev)
4206 struct txgbe_hw *hw;
4207 struct txgbe_tx_queue *txq;
4211 PMD_INIT_FUNC_TRACE();
4212 hw = TXGBE_DEV_HW(dev);
4214 /* Setup the Base and Length of the Tx Descriptor Rings */
4215 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4216 txq = dev->data->tx_queues[i];
4218 bus_addr = txq->tx_ring_phys_addr;
4219 wr32(hw, TXGBE_TXBAL(txq->reg_idx),
4220 (uint32_t)(bus_addr & BIT_MASK32));
4221 wr32(hw, TXGBE_TXBAH(txq->reg_idx),
4222 (uint32_t)(bus_addr >> 32));
4223 wr32m(hw, TXGBE_TXCFG(txq->reg_idx), TXGBE_TXCFG_BUFLEN_MASK,
4224 TXGBE_TXCFG_BUFLEN(txq->nb_tx_desc));
4225 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4226 wr32(hw, TXGBE_TXRP(txq->reg_idx), 0);
4227 wr32(hw, TXGBE_TXWP(txq->reg_idx), 0);
4230 /* Device configured with multiple TX queues. */
4231 txgbe_dev_mq_tx_configure(dev);
4235 * Set up link loopback mode Tx->Rx.
4237 static inline void __rte_cold
4238 txgbe_setup_loopback_link_raptor(struct txgbe_hw *hw)
4240 PMD_INIT_FUNC_TRACE();
4242 wr32m(hw, TXGBE_MACRXCFG, TXGBE_MACRXCFG_LB, TXGBE_MACRXCFG_LB);
4248 * Start Transmit and Receive Units.
4251 txgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4253 struct txgbe_hw *hw;
4254 struct txgbe_tx_queue *txq;
4255 struct txgbe_rx_queue *rxq;
4261 PMD_INIT_FUNC_TRACE();
4262 hw = TXGBE_DEV_HW(dev);
4264 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4265 txq = dev->data->tx_queues[i];
4266 /* Setup Transmit Threshold Registers */
4267 wr32m(hw, TXGBE_TXCFG(txq->reg_idx),
4268 TXGBE_TXCFG_HTHRESH_MASK |
4269 TXGBE_TXCFG_WTHRESH_MASK,
4270 TXGBE_TXCFG_HTHRESH(txq->hthresh) |
4271 TXGBE_TXCFG_WTHRESH(txq->wthresh));
4274 dmatxctl = rd32(hw, TXGBE_DMATXCTRL);
4275 dmatxctl |= TXGBE_DMATXCTRL_ENA;
4276 wr32(hw, TXGBE_DMATXCTRL, dmatxctl);
4278 for (i = 0; i < dev->data->nb_tx_queues; i++) {
4279 txq = dev->data->tx_queues[i];
4280 if (!txq->tx_deferred_start) {
4281 ret = txgbe_dev_tx_queue_start(dev, i);
4287 for (i = 0; i < dev->data->nb_rx_queues; i++) {
4288 rxq = dev->data->rx_queues[i];
4289 if (!rxq->rx_deferred_start) {
4290 ret = txgbe_dev_rx_queue_start(dev, i);
4296 /* Enable Receive engine */
4297 rxctrl = rd32(hw, TXGBE_PBRXCTL);
4298 rxctrl |= TXGBE_PBRXCTL_ENA;
4299 hw->mac.enable_rx_dma(hw, rxctrl);
4301 /* If loopback mode is enabled, set up the link accordingly */
4302 if (hw->mac.type == txgbe_mac_raptor &&
4303 dev->data->dev_conf.lpbk_mode)
4304 txgbe_setup_loopback_link_raptor(hw);
4310 txgbe_dev_save_rx_queue(struct txgbe_hw *hw, uint16_t rx_queue_id)
4312 u32 *reg = &hw->q_rx_regs[rx_queue_id * 8];
4313 *(reg++) = rd32(hw, TXGBE_RXBAL(rx_queue_id));
4314 *(reg++) = rd32(hw, TXGBE_RXBAH(rx_queue_id));
4315 *(reg++) = rd32(hw, TXGBE_RXCFG(rx_queue_id));
4319 txgbe_dev_store_rx_queue(struct txgbe_hw *hw, uint16_t rx_queue_id)
4321 u32 *reg = &hw->q_rx_regs[rx_queue_id * 8];
4322 wr32(hw, TXGBE_RXBAL(rx_queue_id), *(reg++));
4323 wr32(hw, TXGBE_RXBAH(rx_queue_id), *(reg++));
4324 wr32(hw, TXGBE_RXCFG(rx_queue_id), *(reg++) & ~TXGBE_RXCFG_ENA);
4328 txgbe_dev_save_tx_queue(struct txgbe_hw *hw, uint16_t tx_queue_id)
4330 u32 *reg = &hw->q_tx_regs[tx_queue_id * 8];
4331 *(reg++) = rd32(hw, TXGBE_TXBAL(tx_queue_id));
4332 *(reg++) = rd32(hw, TXGBE_TXBAH(tx_queue_id));
4333 *(reg++) = rd32(hw, TXGBE_TXCFG(tx_queue_id));
4337 txgbe_dev_store_tx_queue(struct txgbe_hw *hw, uint16_t tx_queue_id)
4339 u32 *reg = &hw->q_tx_regs[tx_queue_id * 8];
4340 wr32(hw, TXGBE_TXBAL(tx_queue_id), *(reg++));
4341 wr32(hw, TXGBE_TXBAH(tx_queue_id), *(reg++));
4342 wr32(hw, TXGBE_TXCFG(tx_queue_id), *(reg++) & ~TXGBE_TXCFG_ENA);
4346 * Start Receive Units for specified queue.
4349 txgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4351 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
4352 struct txgbe_rx_queue *rxq;
4356 PMD_INIT_FUNC_TRACE();
4358 rxq = dev->data->rx_queues[rx_queue_id];
4360 /* Allocate buffers for descriptor rings */
4361 if (txgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4362 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4366 rxdctl = rd32(hw, TXGBE_RXCFG(rxq->reg_idx));
4367 rxdctl |= TXGBE_RXCFG_ENA;
4368 wr32(hw, TXGBE_RXCFG(rxq->reg_idx), rxdctl);
4370 /* Wait until RX Enable ready */
4371 poll_ms = RTE_TXGBE_REGISTER_POLL_WAIT_10_MS;
4374 rxdctl = rd32(hw, TXGBE_RXCFG(rxq->reg_idx));
4375 } while (--poll_ms && !(rxdctl & TXGBE_RXCFG_ENA));
4377 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
4379 wr32(hw, TXGBE_RXRP(rxq->reg_idx), 0);
4380 wr32(hw, TXGBE_RXWP(rxq->reg_idx), rxq->nb_rx_desc - 1);
4381 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4387 * Stop Receive Units for specified queue.
4390 txgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4392 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
4393 struct txgbe_adapter *adapter = TXGBE_DEV_ADAPTER(dev);
4394 struct txgbe_rx_queue *rxq;
4398 PMD_INIT_FUNC_TRACE();
4400 rxq = dev->data->rx_queues[rx_queue_id];
4402 txgbe_dev_save_rx_queue(hw, rxq->reg_idx);
4403 wr32m(hw, TXGBE_RXCFG(rxq->reg_idx), TXGBE_RXCFG_ENA, 0);
4405 /* Wait until RX Enable bit clear */
4406 poll_ms = RTE_TXGBE_REGISTER_POLL_WAIT_10_MS;
4409 rxdctl = rd32(hw, TXGBE_RXCFG(rxq->reg_idx));
4410 } while (--poll_ms && (rxdctl & TXGBE_RXCFG_ENA));
4412 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
4414 rte_delay_us(RTE_TXGBE_WAIT_100_US);
4415 txgbe_dev_store_rx_queue(hw, rxq->reg_idx);
4417 txgbe_rx_queue_release_mbufs(rxq);
4418 txgbe_reset_rx_queue(adapter, rxq);
4419 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4425 * Start Transmit Units for specified queue.
4428 txgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4430 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
4431 struct txgbe_tx_queue *txq;
4435 PMD_INIT_FUNC_TRACE();
4437 txq = dev->data->tx_queues[tx_queue_id];
4438 wr32m(hw, TXGBE_TXCFG(txq->reg_idx), TXGBE_TXCFG_ENA, TXGBE_TXCFG_ENA);
4440 /* Wait until TX Enable ready */
4441 poll_ms = RTE_TXGBE_REGISTER_POLL_WAIT_10_MS;
4444 txdctl = rd32(hw, TXGBE_TXCFG(txq->reg_idx));
4445 } while (--poll_ms && !(txdctl & TXGBE_TXCFG_ENA));
4447 PMD_INIT_LOG(ERR, "Could not enable "
4448 "Tx Queue %d", tx_queue_id);
4451 wr32(hw, TXGBE_TXWP(txq->reg_idx), txq->tx_tail);
4452 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4458 * Stop Transmit Units for specified queue.
4461 txgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4463 struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
4464 struct txgbe_tx_queue *txq;
4466 uint32_t txtdh, txtdt;
4469 PMD_INIT_FUNC_TRACE();
4471 txq = dev->data->tx_queues[tx_queue_id];
4473 /* Wait until TX queue is empty */
4474 poll_ms = RTE_TXGBE_REGISTER_POLL_WAIT_10_MS;
4476 rte_delay_us(RTE_TXGBE_WAIT_100_US);
4477 txtdh = rd32(hw, TXGBE_TXRP(txq->reg_idx));
4478 txtdt = rd32(hw, TXGBE_TXWP(txq->reg_idx));
4479 } while (--poll_ms && (txtdh != txtdt));
4482 "Tx Queue %d is not empty when stopping.",
4485 txgbe_dev_save_tx_queue(hw, txq->reg_idx);
4486 wr32m(hw, TXGBE_TXCFG(txq->reg_idx), TXGBE_TXCFG_ENA, 0);
4488 /* Wait until TX Enable bit clear */
4489 poll_ms = RTE_TXGBE_REGISTER_POLL_WAIT_10_MS;
4492 txdctl = rd32(hw, TXGBE_TXCFG(txq->reg_idx));
4493 } while (--poll_ms && (txdctl & TXGBE_TXCFG_ENA));
4495 PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
4498 rte_delay_us(RTE_TXGBE_WAIT_100_US);
4499 txgbe_dev_store_tx_queue(hw, txq->reg_idx);
4501 if (txq->ops != NULL) {
4502 txq->ops->release_mbufs(txq);
4503 txq->ops->reset(txq);
4505 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4511 txgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4512 struct rte_eth_rxq_info *qinfo)
4514 struct txgbe_rx_queue *rxq;
4516 rxq = dev->data->rx_queues[queue_id];
4518 qinfo->mp = rxq->mb_pool;
4519 qinfo->scattered_rx = dev->data->scattered_rx;
4520 qinfo->nb_desc = rxq->nb_rx_desc;
4522 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
4523 qinfo->conf.rx_drop_en = rxq->drop_en;
4524 qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
4525 qinfo->conf.offloads = rxq->offloads;
4529 txgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
4530 struct rte_eth_txq_info *qinfo)
4532 struct txgbe_tx_queue *txq;
4534 txq = dev->data->tx_queues[queue_id];
4536 qinfo->nb_desc = txq->nb_tx_desc;
4538 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
4539 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
4540 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
4542 qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
4543 qinfo->conf.offloads = txq->offloads;
4544 qinfo->conf.tx_deferred_start = txq->tx_deferred_start;