From: Lance Richardson Date: Sat, 6 Mar 2021 15:19:11 +0000 (-0500) Subject: net/bnxt: optimize Tx completion handling X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=527b10089cc5c65148eaa48caf9862f48c52960e;p=dpdk.git net/bnxt: optimize Tx completion handling Avoid copying mbuf pointers to separate array for bulk mbuf free when handling transmit completions for vector mode transmit. Signed-off-by: Lance Richardson Reviewed-by: Ajit Khaparde --- diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c index b561ff5520..7124f48013 100644 --- a/drivers/net/bnxt/bnxt_ethdev.c +++ b/drivers/net/bnxt/bnxt_ethdev.c @@ -3189,7 +3189,7 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset) struct bnxt_tx_queue *txq = (struct bnxt_tx_queue *)tx_queue; struct bnxt_tx_ring_info *txr; struct bnxt_cp_ring_info *cpr; - struct bnxt_sw_tx_bd *tx_buf; + struct rte_mbuf **tx_buf; struct tx_pkt_cmpl *txcmp; uint32_t cons, cp_cons; int rc; @@ -3219,7 +3219,7 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset) return RTE_ETH_TX_DESC_UNAVAIL; } tx_buf = &txr->tx_buf_ring[cons]; - if (tx_buf->mbuf == NULL) + if (*tx_buf == NULL) return RTE_ETH_TX_DESC_DONE; return RTE_ETH_TX_DESC_FULL; diff --git a/drivers/net/bnxt/bnxt_ring.c b/drivers/net/bnxt/bnxt_ring.c index 997dcdc28b..e4055fa49b 100644 --- a/drivers/net/bnxt/bnxt_ring.c +++ b/drivers/net/bnxt/bnxt_ring.c @@ -230,7 +230,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx, tx_ring->vmem = (void **)((char *)mz->addr + tx_vmem_start); tx_ring_info->tx_buf_ring = - (struct bnxt_sw_tx_bd *)tx_ring->vmem; + (struct rte_mbuf **)tx_ring->vmem; } } diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_common.h b/drivers/net/bnxt/bnxt_rxtx_vec_common.h index 91ff6736b1..9b9489a695 100644 --- a/drivers/net/bnxt/bnxt_rxtx_vec_common.h +++ b/drivers/net/bnxt/bnxt_rxtx_vec_common.h @@ -100,57 +100,78 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr) * is enabled. */ static inline void -bnxt_tx_cmp_vec_fast(struct bnxt_tx_queue *txq, int nr_pkts) +bnxt_tx_cmp_vec_fast(struct bnxt_tx_queue *txq, uint32_t nr_pkts) { struct bnxt_tx_ring_info *txr = txq->tx_ring; - struct rte_mbuf **free = txq->free; uint16_t cons, raw_cons = txr->tx_raw_cons; - unsigned int blk = 0; - uint32_t ring_mask = txr->tx_ring_struct->ring_mask; - - while (nr_pkts--) { - struct bnxt_sw_tx_bd *tx_buf; - - cons = raw_cons++ & ring_mask; - tx_buf = &txr->tx_buf_ring[cons]; - free[blk++] = tx_buf->mbuf; - tx_buf->mbuf = NULL; + uint32_t ring_mask, ring_size, num; + struct rte_mempool *pool; + + ring_mask = txr->tx_ring_struct->ring_mask; + ring_size = txr->tx_ring_struct->ring_size; + + cons = raw_cons & ring_mask; + num = RTE_MIN(nr_pkts, ring_size - cons); + pool = txr->tx_buf_ring[cons]->pool; + + rte_mempool_put_bulk(pool, (void **)&txr->tx_buf_ring[cons], num); + memset(&txr->tx_buf_ring[cons], 0, num * sizeof(struct rte_mbuf *)); + raw_cons += num; + num = nr_pkts - num; + if (num) { + cons = raw_cons & ring_mask; + rte_mempool_put_bulk(pool, (void **)&txr->tx_buf_ring[cons], + num); + memset(&txr->tx_buf_ring[cons], 0, + num * sizeof(struct rte_mbuf *)); + raw_cons += num; } - if (blk) - rte_mempool_put_bulk(free[0]->pool, (void **)free, blk); txr->tx_raw_cons = raw_cons; } static inline void -bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, int nr_pkts) +bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, uint32_t nr_pkts) { struct bnxt_tx_ring_info *txr = txq->tx_ring; - struct rte_mbuf **free = txq->free; uint16_t cons, raw_cons = txr->tx_raw_cons; - unsigned int blk = 0; - uint32_t ring_mask = txr->tx_ring_struct->ring_mask; + uint32_t ring_mask, ring_size, num, blk; + struct rte_mempool *pool; - while (nr_pkts--) { - struct bnxt_sw_tx_bd *tx_buf; - struct rte_mbuf *mbuf; + ring_mask = txr->tx_ring_struct->ring_mask; + ring_size = txr->tx_ring_struct->ring_size; - cons = raw_cons++ & ring_mask; - tx_buf = &txr->tx_buf_ring[cons]; - mbuf = rte_pktmbuf_prefree_seg(tx_buf->mbuf); - if (unlikely(mbuf == NULL)) - continue; - tx_buf->mbuf = NULL; + while (nr_pkts) { + struct rte_mbuf *mbuf; - if (blk && mbuf->pool != free[0]->pool) { - rte_mempool_put_bulk(free[0]->pool, (void **)free, blk); - blk = 0; + cons = raw_cons & ring_mask; + num = RTE_MIN(nr_pkts, ring_size - cons); + pool = txr->tx_buf_ring[cons]->pool; + + blk = 0; + do { + mbuf = txr->tx_buf_ring[cons + blk]; + mbuf = rte_pktmbuf_prefree_seg(mbuf); + if (!mbuf || mbuf->pool != pool) + break; + blk++; + } while (blk < num); + + if (blk) { + rte_mempool_put_bulk(pool, + (void **)&txr->tx_buf_ring[cons], + blk); + memset(&txr->tx_buf_ring[cons], 0, + blk * sizeof(struct rte_mbuf *)); + raw_cons += blk; + nr_pkts -= blk; + } + if (!mbuf) { + /* Skip freeing mbufs with non-zero reference count. */ + raw_cons++; + nr_pkts--; } - free[blk++] = mbuf; } - if (blk) - rte_mempool_put_bulk(free[0]->pool, (void **)free, blk); - txr->tx_raw_cons = raw_cons; } #endif /* _BNXT_RXTX_VEC_COMMON_H_ */ diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c index 3d54d9d59d..bc2e96ec38 100644 --- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c +++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c @@ -346,7 +346,7 @@ bnxt_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t tx_prod, tx_raw_prod = txr->tx_raw_prod; struct rte_mbuf *tx_mbuf; struct tx_bd_long *txbd = NULL; - struct bnxt_sw_tx_bd *tx_buf; + struct rte_mbuf **tx_buf; uint16_t to_send; nb_pkts = RTE_MIN(nb_pkts, bnxt_tx_avail(txq)); @@ -362,8 +362,7 @@ bnxt_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, tx_prod = RING_IDX(txr->tx_ring_struct, tx_raw_prod); tx_buf = &txr->tx_buf_ring[tx_prod]; - tx_buf->mbuf = tx_mbuf; - tx_buf->nr_bds = 1; + *tx_buf = tx_mbuf; txbd = &txr->tx_desc_ring[tx_prod]; txbd->address = tx_mbuf->buf_iova + tx_mbuf->data_off; diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c index 7a58434ce9..7ec04797b7 100644 --- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c +++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c @@ -321,12 +321,11 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq) static inline void bnxt_xmit_one(struct rte_mbuf *mbuf, struct tx_bd_long *txbd, - struct bnxt_sw_tx_bd *tx_buf) + struct rte_mbuf **tx_buf) { __m128i desc; - tx_buf->mbuf = mbuf; - tx_buf->nr_bds = 1; + *tx_buf = mbuf; desc = _mm_set_epi64x(mbuf->buf_iova + mbuf->data_off, bnxt_xmit_flags_len(mbuf->data_len, @@ -343,7 +342,7 @@ bnxt_xmit_fixed_burst_vec(struct bnxt_tx_queue *txq, struct rte_mbuf **tx_pkts, struct bnxt_tx_ring_info *txr = txq->tx_ring; uint16_t tx_prod, tx_raw_prod = txr->tx_raw_prod; struct tx_bd_long *txbd; - struct bnxt_sw_tx_bd *tx_buf; + struct rte_mbuf **tx_buf; uint16_t to_send; tx_prod = RING_IDX(txr->tx_ring_struct, tx_raw_prod); diff --git a/drivers/net/bnxt/bnxt_txq.c b/drivers/net/bnxt/bnxt_txq.c index 8679ac91e7..d95e1f7526 100644 --- a/drivers/net/bnxt/bnxt_txq.c +++ b/drivers/net/bnxt/bnxt_txq.c @@ -24,7 +24,7 @@ void bnxt_free_txq_stats(struct bnxt_tx_queue *txq) static void bnxt_tx_queue_release_mbufs(struct bnxt_tx_queue *txq) { - struct bnxt_sw_tx_bd *sw_ring; + struct rte_mbuf **sw_ring; uint16_t i; if (!txq || !txq->tx_ring) @@ -33,9 +33,9 @@ static void bnxt_tx_queue_release_mbufs(struct bnxt_tx_queue *txq) sw_ring = txq->tx_ring->tx_buf_ring; if (sw_ring) { for (i = 0; i < txq->tx_ring->tx_ring_struct->ring_size; i++) { - if (sw_ring[i].mbuf) { - rte_pktmbuf_free_seg(sw_ring[i].mbuf); - sw_ring[i].mbuf = NULL; + if (sw_ring[i]) { + rte_pktmbuf_free_seg(sw_ring[i]); + sw_ring[i] = NULL; } } } diff --git a/drivers/net/bnxt/bnxt_txr.c b/drivers/net/bnxt/bnxt_txr.c index 01db0cc6e6..65355fb040 100644 --- a/drivers/net/bnxt/bnxt_txr.c +++ b/drivers/net/bnxt/bnxt_txr.c @@ -76,7 +76,7 @@ int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id) ring->ring_mask = ring->ring_size - 1; ring->bd = (void *)txr->tx_desc_ring; ring->bd_dma = txr->tx_desc_mapping; - ring->vmem_size = ring->ring_size * sizeof(struct bnxt_sw_tx_bd); + ring->vmem_size = ring->ring_size * sizeof(struct rte_mbuf *); ring->vmem = (void **)&txr->tx_buf_ring; ring->fw_ring_id = INVALID_HW_RING_ID; @@ -104,6 +104,21 @@ int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id) return 0; } +static bool +bnxt_xmit_need_long_bd(struct rte_mbuf *tx_pkt, struct bnxt_tx_queue *txq) +{ + if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM | + PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM | + PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM | + PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN | + PKT_TX_TUNNEL_GENEVE | PKT_TX_IEEE1588_TMST | + PKT_TX_QINQ_PKT) || + (BNXT_TRUFLOW_EN(txq->bp) && + (txq->bp->tx_cfa_action || txq->vfr_tx_cfa_action))) + return true; + return false; +} + static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt, struct bnxt_tx_queue *txq, uint16_t *coal_pkts, @@ -116,10 +131,10 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt, struct tx_bd_long_hi *txbd1 = NULL; uint32_t vlan_tag_flags; bool long_bd = false; - unsigned short nr_bds = 0; + unsigned short nr_bds; uint16_t prod; struct rte_mbuf *m_seg; - struct bnxt_sw_tx_bd *tx_buf; + struct rte_mbuf **tx_buf; static const uint32_t lhint_arr[4] = { TX_BD_LONG_FLAGS_LHINT_LT512, TX_BD_LONG_FLAGS_LHINT_LT1K, @@ -130,17 +145,9 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt, if (unlikely(is_bnxt_in_error(txq->bp))) return -EIO; - if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM | - PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM | - PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM | - PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN | - PKT_TX_TUNNEL_GENEVE | PKT_TX_IEEE1588_TMST | - PKT_TX_QINQ_PKT) || - (BNXT_TRUFLOW_EN(txq->bp) && - (txq->bp->tx_cfa_action || txq->vfr_tx_cfa_action))) - long_bd = true; - + long_bd = bnxt_xmit_need_long_bd(tx_pkt, txq); nr_bds = long_bd + tx_pkt->nb_segs; + if (unlikely(bnxt_tx_avail(txq) < nr_bds)) return -ENOMEM; @@ -172,8 +179,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt, prod = RING_IDX(ring, txr->tx_raw_prod); tx_buf = &txr->tx_buf_ring[prod]; - tx_buf->mbuf = tx_pkt; - tx_buf->nr_bds = nr_bds; + *tx_buf = tx_pkt; txbd = &txr->tx_desc_ring[prod]; txbd->opaque = *coal_pkts; @@ -185,7 +191,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt, txbd->flags_type |= TX_BD_LONG_FLAGS_LHINT_GTE2K; else txbd->flags_type |= lhint_arr[tx_pkt->pkt_len >> 9]; - txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(tx_buf->mbuf)); + txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(tx_pkt)); *last_txbd = txbd; if (long_bd) { @@ -193,18 +199,18 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt, vlan_tag_flags = 0; /* HW can accelerate only outer vlan in QinQ mode */ - if (tx_buf->mbuf->ol_flags & PKT_TX_QINQ_PKT) { + if (tx_pkt->ol_flags & PKT_TX_QINQ_PKT) { vlan_tag_flags = TX_BD_LONG_CFA_META_KEY_VLAN_TAG | - tx_buf->mbuf->vlan_tci_outer; + tx_pkt->vlan_tci_outer; outer_tpid_bd = txq->bp->outer_tpid_bd & BNXT_OUTER_TPID_BD_MASK; vlan_tag_flags |= outer_tpid_bd; - } else if (tx_buf->mbuf->ol_flags & PKT_TX_VLAN_PKT) { + } else if (tx_pkt->ol_flags & PKT_TX_VLAN_PKT) { /* shurd: Should this mask at * TX_BD_LONG_CFA_META_VLAN_VID_MASK? */ vlan_tag_flags = TX_BD_LONG_CFA_META_KEY_VLAN_TAG | - tx_buf->mbuf->vlan_tci; + tx_pkt->vlan_tci; /* Currently supports 8021Q, 8021AD vlan offloads * QINQ1, QINQ2, QINQ3 vlan headers are deprecated */ @@ -325,7 +331,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt, prod = RING_IDX(ring, txr->tx_raw_prod); tx_buf = &txr->tx_buf_ring[prod]; - tx_buf->mbuf = m_seg; + *tx_buf = m_seg; txbd = &txr->tx_desc_ring[prod]; txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(m_seg)); @@ -356,16 +362,17 @@ static void bnxt_tx_cmp_fast(struct bnxt_tx_queue *txq, int nr_pkts) int i, j; for (i = 0; i < nr_pkts; i++) { - struct bnxt_sw_tx_bd *tx_buf; + struct rte_mbuf **tx_buf; unsigned short nr_bds; tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)]; - nr_bds = tx_buf->nr_bds; + nr_bds = (*tx_buf)->nb_segs + + bnxt_xmit_need_long_bd(*tx_buf, txq); for (j = 0; j < nr_bds; j++) { - if (tx_buf->mbuf) { + if (*tx_buf) { /* Add mbuf to the bulk free array */ - free[blk++] = tx_buf->mbuf; - tx_buf->mbuf = NULL; + free[blk++] = *tx_buf; + *tx_buf = NULL; } raw_cons = RING_NEXT(raw_cons); tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)]; @@ -389,14 +396,15 @@ static void bnxt_tx_cmp(struct bnxt_tx_queue *txq, int nr_pkts) for (i = 0; i < nr_pkts; i++) { struct rte_mbuf *mbuf; - struct bnxt_sw_tx_bd *tx_buf; + struct rte_mbuf **tx_buf; unsigned short nr_bds; tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)]; - nr_bds = tx_buf->nr_bds; + nr_bds = (*tx_buf)->nb_segs + + bnxt_xmit_need_long_bd(*tx_buf, txq); for (j = 0; j < nr_bds; j++) { - mbuf = tx_buf->mbuf; - tx_buf->mbuf = NULL; + mbuf = *tx_buf; + *tx_buf = NULL; raw_cons = RING_NEXT(raw_cons); tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)]; if (!mbuf) /* long_bd's tx_buf ? */ diff --git a/drivers/net/bnxt/bnxt_txr.h b/drivers/net/bnxt/bnxt_txr.h index 91e10dbd73..e4bd90f883 100644 --- a/drivers/net/bnxt/bnxt_txr.h +++ b/drivers/net/bnxt/bnxt_txr.h @@ -17,18 +17,13 @@ struct bnxt_tx_ring_info { struct bnxt_db_info tx_db; struct tx_bd_long *tx_desc_ring; - struct bnxt_sw_tx_bd *tx_buf_ring; + struct rte_mbuf **tx_buf_ring; rte_iova_t tx_desc_mapping; struct bnxt_ring *tx_ring_struct; }; -struct bnxt_sw_tx_bd { - struct rte_mbuf *mbuf; /* mbuf associated with TX descriptor */ - unsigned short nr_bds; -}; - static inline uint32_t bnxt_tx_bds_in_hw(struct bnxt_tx_queue *txq) { return ((txq->tx_ring->tx_raw_prod - txq->tx_ring->tx_raw_cons) &