net/bnxt: optimize Tx completion handling
authorLance Richardson <lance.richardson@broadcom.com>
Sat, 6 Mar 2021 15:19:11 +0000 (10:19 -0500)
committerAjit Khaparde <ajit.khaparde@broadcom.com>
Fri, 12 Mar 2021 15:07:33 +0000 (16:07 +0100)
Avoid copying mbuf pointers to separate array for bulk
mbuf free when handling transmit completions for vector
mode transmit.

Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
drivers/net/bnxt/bnxt_ethdev.c
drivers/net/bnxt/bnxt_ring.c
drivers/net/bnxt/bnxt_rxtx_vec_common.h
drivers/net/bnxt/bnxt_rxtx_vec_neon.c
drivers/net/bnxt/bnxt_rxtx_vec_sse.c
drivers/net/bnxt/bnxt_txq.c
drivers/net/bnxt/bnxt_txr.c
drivers/net/bnxt/bnxt_txr.h

index b561ff5..7124f48 100644 (file)
@@ -3189,7 +3189,7 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
        struct bnxt_tx_queue *txq = (struct bnxt_tx_queue *)tx_queue;
        struct bnxt_tx_ring_info *txr;
        struct bnxt_cp_ring_info *cpr;
-       struct bnxt_sw_tx_bd *tx_buf;
+       struct rte_mbuf **tx_buf;
        struct tx_pkt_cmpl *txcmp;
        uint32_t cons, cp_cons;
        int rc;
@@ -3219,7 +3219,7 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
                        return RTE_ETH_TX_DESC_UNAVAIL;
        }
        tx_buf = &txr->tx_buf_ring[cons];
-       if (tx_buf->mbuf == NULL)
+       if (*tx_buf == NULL)
                return RTE_ETH_TX_DESC_DONE;
 
        return RTE_ETH_TX_DESC_FULL;
index 997dcdc..e4055fa 100644 (file)
@@ -230,7 +230,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
                        tx_ring->vmem =
                            (void **)((char *)mz->addr + tx_vmem_start);
                        tx_ring_info->tx_buf_ring =
-                           (struct bnxt_sw_tx_bd *)tx_ring->vmem;
+                           (struct rte_mbuf **)tx_ring->vmem;
                }
        }
 
index 91ff673..9b9489a 100644 (file)
@@ -100,57 +100,78 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
  * is enabled.
  */
 static inline void
-bnxt_tx_cmp_vec_fast(struct bnxt_tx_queue *txq, int nr_pkts)
+bnxt_tx_cmp_vec_fast(struct bnxt_tx_queue *txq, uint32_t nr_pkts)
 {
        struct bnxt_tx_ring_info *txr = txq->tx_ring;
-       struct rte_mbuf **free = txq->free;
        uint16_t cons, raw_cons = txr->tx_raw_cons;
-       unsigned int blk = 0;
-       uint32_t ring_mask = txr->tx_ring_struct->ring_mask;
-
-       while (nr_pkts--) {
-               struct bnxt_sw_tx_bd *tx_buf;
-
-               cons = raw_cons++ & ring_mask;
-               tx_buf = &txr->tx_buf_ring[cons];
-               free[blk++] = tx_buf->mbuf;
-               tx_buf->mbuf = NULL;
+       uint32_t ring_mask, ring_size, num;
+       struct rte_mempool *pool;
+
+       ring_mask = txr->tx_ring_struct->ring_mask;
+       ring_size = txr->tx_ring_struct->ring_size;
+
+       cons = raw_cons & ring_mask;
+       num = RTE_MIN(nr_pkts, ring_size - cons);
+       pool = txr->tx_buf_ring[cons]->pool;
+
+       rte_mempool_put_bulk(pool, (void **)&txr->tx_buf_ring[cons], num);
+       memset(&txr->tx_buf_ring[cons], 0, num * sizeof(struct rte_mbuf *));
+       raw_cons += num;
+       num = nr_pkts - num;
+       if (num) {
+               cons = raw_cons & ring_mask;
+               rte_mempool_put_bulk(pool, (void **)&txr->tx_buf_ring[cons],
+                                    num);
+               memset(&txr->tx_buf_ring[cons], 0,
+                      num * sizeof(struct rte_mbuf *));
+               raw_cons += num;
        }
-       if (blk)
-               rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
 
        txr->tx_raw_cons = raw_cons;
 }
 
 static inline void
-bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, int nr_pkts)
+bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, uint32_t nr_pkts)
 {
        struct bnxt_tx_ring_info *txr = txq->tx_ring;
-       struct rte_mbuf **free = txq->free;
        uint16_t cons, raw_cons = txr->tx_raw_cons;
-       unsigned int blk = 0;
-       uint32_t ring_mask = txr->tx_ring_struct->ring_mask;
+       uint32_t ring_mask, ring_size, num, blk;
+       struct rte_mempool *pool;
 
-       while (nr_pkts--) {
-               struct bnxt_sw_tx_bd *tx_buf;
-               struct rte_mbuf *mbuf;
+       ring_mask = txr->tx_ring_struct->ring_mask;
+       ring_size = txr->tx_ring_struct->ring_size;
 
-               cons = raw_cons++ & ring_mask;
-               tx_buf = &txr->tx_buf_ring[cons];
-               mbuf = rte_pktmbuf_prefree_seg(tx_buf->mbuf);
-               if (unlikely(mbuf == NULL))
-                       continue;
-               tx_buf->mbuf = NULL;
+       while (nr_pkts) {
+               struct rte_mbuf *mbuf;
 
-               if (blk && mbuf->pool != free[0]->pool) {
-                       rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
-                       blk = 0;
+               cons = raw_cons & ring_mask;
+               num = RTE_MIN(nr_pkts, ring_size - cons);
+               pool = txr->tx_buf_ring[cons]->pool;
+
+               blk = 0;
+               do {
+                       mbuf = txr->tx_buf_ring[cons + blk];
+                       mbuf = rte_pktmbuf_prefree_seg(mbuf);
+                       if (!mbuf || mbuf->pool != pool)
+                               break;
+                       blk++;
+               } while (blk < num);
+
+               if (blk) {
+                       rte_mempool_put_bulk(pool,
+                                            (void **)&txr->tx_buf_ring[cons],
+                                            blk);
+                       memset(&txr->tx_buf_ring[cons], 0,
+                              blk * sizeof(struct rte_mbuf *));
+                       raw_cons += blk;
+                       nr_pkts -= blk;
+               }
+               if (!mbuf) {
+                       /* Skip freeing mbufs with non-zero reference count. */
+                       raw_cons++;
+                       nr_pkts--;
                }
-               free[blk++] = mbuf;
        }
-       if (blk)
-               rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
-
        txr->tx_raw_cons = raw_cons;
 }
 #endif /* _BNXT_RXTX_VEC_COMMON_H_ */
index 3d54d9d..bc2e96e 100644 (file)
@@ -346,7 +346,7 @@ bnxt_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
        uint16_t tx_prod, tx_raw_prod = txr->tx_raw_prod;
        struct rte_mbuf *tx_mbuf;
        struct tx_bd_long *txbd = NULL;
-       struct bnxt_sw_tx_bd *tx_buf;
+       struct rte_mbuf **tx_buf;
        uint16_t to_send;
 
        nb_pkts = RTE_MIN(nb_pkts, bnxt_tx_avail(txq));
@@ -362,8 +362,7 @@ bnxt_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 
                tx_prod = RING_IDX(txr->tx_ring_struct, tx_raw_prod);
                tx_buf = &txr->tx_buf_ring[tx_prod];
-               tx_buf->mbuf = tx_mbuf;
-               tx_buf->nr_bds = 1;
+               *tx_buf = tx_mbuf;
 
                txbd = &txr->tx_desc_ring[tx_prod];
                txbd->address = tx_mbuf->buf_iova + tx_mbuf->data_off;
index 7a58434..7ec0479 100644 (file)
@@ -321,12 +321,11 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq)
 
 static inline void
 bnxt_xmit_one(struct rte_mbuf *mbuf, struct tx_bd_long *txbd,
-             struct bnxt_sw_tx_bd *tx_buf)
+             struct rte_mbuf **tx_buf)
 {
        __m128i desc;
 
-       tx_buf->mbuf = mbuf;
-       tx_buf->nr_bds = 1;
+       *tx_buf = mbuf;
 
        desc = _mm_set_epi64x(mbuf->buf_iova + mbuf->data_off,
                              bnxt_xmit_flags_len(mbuf->data_len,
@@ -343,7 +342,7 @@ bnxt_xmit_fixed_burst_vec(struct bnxt_tx_queue *txq, struct rte_mbuf **tx_pkts,
        struct bnxt_tx_ring_info *txr = txq->tx_ring;
        uint16_t tx_prod, tx_raw_prod = txr->tx_raw_prod;
        struct tx_bd_long *txbd;
-       struct bnxt_sw_tx_bd *tx_buf;
+       struct rte_mbuf **tx_buf;
        uint16_t to_send;
 
        tx_prod = RING_IDX(txr->tx_ring_struct, tx_raw_prod);
index 8679ac9..d95e1f7 100644 (file)
@@ -24,7 +24,7 @@ void bnxt_free_txq_stats(struct bnxt_tx_queue *txq)
 
 static void bnxt_tx_queue_release_mbufs(struct bnxt_tx_queue *txq)
 {
-       struct bnxt_sw_tx_bd *sw_ring;
+       struct rte_mbuf **sw_ring;
        uint16_t i;
 
        if (!txq || !txq->tx_ring)
@@ -33,9 +33,9 @@ static void bnxt_tx_queue_release_mbufs(struct bnxt_tx_queue *txq)
        sw_ring = txq->tx_ring->tx_buf_ring;
        if (sw_ring) {
                for (i = 0; i < txq->tx_ring->tx_ring_struct->ring_size; i++) {
-                       if (sw_ring[i].mbuf) {
-                               rte_pktmbuf_free_seg(sw_ring[i].mbuf);
-                               sw_ring[i].mbuf = NULL;
+                       if (sw_ring[i]) {
+                               rte_pktmbuf_free_seg(sw_ring[i]);
+                               sw_ring[i] = NULL;
                        }
                }
        }
index 01db0cc..65355fb 100644 (file)
@@ -76,7 +76,7 @@ int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id)
        ring->ring_mask = ring->ring_size - 1;
        ring->bd = (void *)txr->tx_desc_ring;
        ring->bd_dma = txr->tx_desc_mapping;
-       ring->vmem_size = ring->ring_size * sizeof(struct bnxt_sw_tx_bd);
+       ring->vmem_size = ring->ring_size * sizeof(struct rte_mbuf *);
        ring->vmem = (void **)&txr->tx_buf_ring;
        ring->fw_ring_id = INVALID_HW_RING_ID;
 
@@ -104,6 +104,21 @@ int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id)
        return 0;
 }
 
+static bool
+bnxt_xmit_need_long_bd(struct rte_mbuf *tx_pkt, struct bnxt_tx_queue *txq)
+{
+       if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |
+                               PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
+                               PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM |
+                               PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN |
+                               PKT_TX_TUNNEL_GENEVE | PKT_TX_IEEE1588_TMST |
+                               PKT_TX_QINQ_PKT) ||
+            (BNXT_TRUFLOW_EN(txq->bp) &&
+             (txq->bp->tx_cfa_action || txq->vfr_tx_cfa_action)))
+               return true;
+       return false;
+}
+
 static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
                                struct bnxt_tx_queue *txq,
                                uint16_t *coal_pkts,
@@ -116,10 +131,10 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
        struct tx_bd_long_hi *txbd1 = NULL;
        uint32_t vlan_tag_flags;
        bool long_bd = false;
-       unsigned short nr_bds = 0;
+       unsigned short nr_bds;
        uint16_t prod;
        struct rte_mbuf *m_seg;
-       struct bnxt_sw_tx_bd *tx_buf;
+       struct rte_mbuf **tx_buf;
        static const uint32_t lhint_arr[4] = {
                TX_BD_LONG_FLAGS_LHINT_LT512,
                TX_BD_LONG_FLAGS_LHINT_LT1K,
@@ -130,17 +145,9 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
        if (unlikely(is_bnxt_in_error(txq->bp)))
                return -EIO;
 
-       if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |
-                               PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
-                               PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM |
-                               PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN |
-                               PKT_TX_TUNNEL_GENEVE | PKT_TX_IEEE1588_TMST |
-                               PKT_TX_QINQ_PKT) ||
-            (BNXT_TRUFLOW_EN(txq->bp) &&
-             (txq->bp->tx_cfa_action || txq->vfr_tx_cfa_action)))
-               long_bd = true;
-
+       long_bd = bnxt_xmit_need_long_bd(tx_pkt, txq);
        nr_bds = long_bd + tx_pkt->nb_segs;
+
        if (unlikely(bnxt_tx_avail(txq) < nr_bds))
                return -ENOMEM;
 
@@ -172,8 +179,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 
        prod = RING_IDX(ring, txr->tx_raw_prod);
        tx_buf = &txr->tx_buf_ring[prod];
-       tx_buf->mbuf = tx_pkt;
-       tx_buf->nr_bds = nr_bds;
+       *tx_buf = tx_pkt;
 
        txbd = &txr->tx_desc_ring[prod];
        txbd->opaque = *coal_pkts;
@@ -185,7 +191,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
                txbd->flags_type |= TX_BD_LONG_FLAGS_LHINT_GTE2K;
        else
                txbd->flags_type |= lhint_arr[tx_pkt->pkt_len >> 9];
-       txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(tx_buf->mbuf));
+       txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(tx_pkt));
        *last_txbd = txbd;
 
        if (long_bd) {
@@ -193,18 +199,18 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
                vlan_tag_flags = 0;
 
                /* HW can accelerate only outer vlan in QinQ mode */
-               if (tx_buf->mbuf->ol_flags & PKT_TX_QINQ_PKT) {
+               if (tx_pkt->ol_flags & PKT_TX_QINQ_PKT) {
                        vlan_tag_flags = TX_BD_LONG_CFA_META_KEY_VLAN_TAG |
-                               tx_buf->mbuf->vlan_tci_outer;
+                               tx_pkt->vlan_tci_outer;
                        outer_tpid_bd = txq->bp->outer_tpid_bd &
                                BNXT_OUTER_TPID_BD_MASK;
                        vlan_tag_flags |= outer_tpid_bd;
-               } else if (tx_buf->mbuf->ol_flags & PKT_TX_VLAN_PKT) {
+               } else if (tx_pkt->ol_flags & PKT_TX_VLAN_PKT) {
                        /* shurd: Should this mask at
                         * TX_BD_LONG_CFA_META_VLAN_VID_MASK?
                         */
                        vlan_tag_flags = TX_BD_LONG_CFA_META_KEY_VLAN_TAG |
-                               tx_buf->mbuf->vlan_tci;
+                               tx_pkt->vlan_tci;
                        /* Currently supports 8021Q, 8021AD vlan offloads
                         * QINQ1, QINQ2, QINQ3 vlan headers are deprecated
                         */
@@ -325,7 +331,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 
                prod = RING_IDX(ring, txr->tx_raw_prod);
                tx_buf = &txr->tx_buf_ring[prod];
-               tx_buf->mbuf = m_seg;
+               *tx_buf = m_seg;
 
                txbd = &txr->tx_desc_ring[prod];
                txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(m_seg));
@@ -356,16 +362,17 @@ static void bnxt_tx_cmp_fast(struct bnxt_tx_queue *txq, int nr_pkts)
        int i, j;
 
        for (i = 0; i < nr_pkts; i++) {
-               struct bnxt_sw_tx_bd *tx_buf;
+               struct rte_mbuf **tx_buf;
                unsigned short nr_bds;
 
                tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
-               nr_bds = tx_buf->nr_bds;
+               nr_bds = (*tx_buf)->nb_segs +
+                        bnxt_xmit_need_long_bd(*tx_buf, txq);
                for (j = 0; j < nr_bds; j++) {
-                       if (tx_buf->mbuf) {
+                       if (*tx_buf) {
                                /* Add mbuf to the bulk free array */
-                               free[blk++] = tx_buf->mbuf;
-                               tx_buf->mbuf = NULL;
+                               free[blk++] = *tx_buf;
+                               *tx_buf = NULL;
                        }
                        raw_cons = RING_NEXT(raw_cons);
                        tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
@@ -389,14 +396,15 @@ static void bnxt_tx_cmp(struct bnxt_tx_queue *txq, int nr_pkts)
 
        for (i = 0; i < nr_pkts; i++) {
                struct rte_mbuf *mbuf;
-               struct bnxt_sw_tx_bd *tx_buf;
+               struct rte_mbuf **tx_buf;
                unsigned short nr_bds;
 
                tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
-               nr_bds = tx_buf->nr_bds;
+               nr_bds = (*tx_buf)->nb_segs +
+                        bnxt_xmit_need_long_bd(*tx_buf, txq);
                for (j = 0; j < nr_bds; j++) {
-                       mbuf = tx_buf->mbuf;
-                       tx_buf->mbuf = NULL;
+                       mbuf = *tx_buf;
+                       *tx_buf = NULL;
                        raw_cons = RING_NEXT(raw_cons);
                        tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
                        if (!mbuf)      /* long_bd's tx_buf ? */
index 91e10db..e4bd90f 100644 (file)
@@ -17,18 +17,13 @@ struct bnxt_tx_ring_info {
        struct bnxt_db_info     tx_db;
 
        struct tx_bd_long       *tx_desc_ring;
-       struct bnxt_sw_tx_bd    *tx_buf_ring;
+       struct rte_mbuf         **tx_buf_ring;
 
        rte_iova_t              tx_desc_mapping;
 
        struct bnxt_ring        *tx_ring_struct;
 };
 
-struct bnxt_sw_tx_bd {
-       struct rte_mbuf         *mbuf; /* mbuf associated with TX descriptor */
-       unsigned short          nr_bds;
-};
-
 static inline uint32_t bnxt_tx_bds_in_hw(struct bnxt_tx_queue *txq)
 {
        return ((txq->tx_ring->tx_raw_prod - txq->tx_ring->tx_raw_cons) &