net/hns3: optimize Tx performance
authorChengwen Feng <fengchengwen@huawei.com>
Tue, 16 Nov 2021 01:22:12 +0000 (09:22 +0800)
committerFerruh Yigit <ferruh.yigit@intel.com>
Tue, 16 Nov 2021 15:39:20 +0000 (16:39 +0100)
This patch uses tx_free_thresh to control mbufs free when the common
xmit algorithm is used.

This patch also modifies the implementation of PMD's tx_done_cleanup
because the mbuf free algorithm changed.

In the testpmd single core MAC forwarding scenario, the performance is
improved by 10% at 64B on Kunpeng920 platform.

Cc: stable@dpdk.org
Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Min Hu (Connor) <humin29@huawei.com>
drivers/net/hns3/hns3_rxtx.c

index f0a5761..40cc4e9 100644 (file)
@@ -3077,40 +3077,51 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
        return 0;
 }
 
-static void
+static int
 hns3_tx_free_useless_buffer(struct hns3_tx_queue *txq)
 {
        uint16_t tx_next_clean = txq->next_to_clean;
-       uint16_t tx_next_use   = txq->next_to_use;
-       uint16_t tx_bd_ready   = txq->tx_bd_ready;
-       uint16_t tx_bd_max     = txq->nb_tx_desc;
-       struct hns3_entry *tx_bak_pkt = &txq->sw_ring[tx_next_clean];
+       uint16_t tx_next_use = txq->next_to_use;
+       struct hns3_entry *tx_entry = &txq->sw_ring[tx_next_clean];
        struct hns3_desc *desc = &txq->tx_ring[tx_next_clean];
-       struct rte_mbuf *mbuf;
+       int i;
 
-       while ((!(desc->tx.tp_fe_sc_vld_ra_ri &
-               rte_cpu_to_le_16(BIT(HNS3_TXD_VLD_B)))) &&
-               tx_next_use != tx_next_clean) {
-               mbuf = tx_bak_pkt->mbuf;
-               if (mbuf) {
-                       rte_pktmbuf_free_seg(mbuf);
-                       tx_bak_pkt->mbuf = NULL;
-               }
+       if (tx_next_use >= tx_next_clean &&
+           tx_next_use < tx_next_clean + txq->tx_rs_thresh)
+               return -1;
 
-               desc++;
-               tx_bak_pkt++;
-               tx_next_clean++;
-               tx_bd_ready++;
-
-               if (tx_next_clean >= tx_bd_max) {
-                       tx_next_clean = 0;
-                       desc = txq->tx_ring;
-                       tx_bak_pkt = txq->sw_ring;
-               }
+       /*
+        * All mbufs can be released only when the VLD bits of all
+        * descriptors in a batch are cleared.
+        */
+       for (i = 0; i < txq->tx_rs_thresh; i++) {
+               if (desc[i].tx.tp_fe_sc_vld_ra_ri &
+                       rte_le_to_cpu_16(BIT(HNS3_TXD_VLD_B)))
+                       return -1;
        }
 
-       txq->next_to_clean = tx_next_clean;
-       txq->tx_bd_ready   = tx_bd_ready;
+       for (i = 0; i < txq->tx_rs_thresh; i++) {
+               rte_pktmbuf_free_seg(tx_entry[i].mbuf);
+               tx_entry[i].mbuf = NULL;
+       }
+
+       /* Update numbers of available descriptor due to buffer freed */
+       txq->tx_bd_ready += txq->tx_rs_thresh;
+       txq->next_to_clean += txq->tx_rs_thresh;
+       if (txq->next_to_clean >= txq->nb_tx_desc)
+               txq->next_to_clean = 0;
+
+       return 0;
+}
+
+static inline int
+hns3_tx_free_required_buffer(struct hns3_tx_queue *txq, uint16_t required_bds)
+{
+       while (required_bds > txq->tx_bd_ready) {
+               if (hns3_tx_free_useless_buffer(txq) != 0)
+                       return -1;
+       }
+       return 0;
 }
 
 int
@@ -4147,8 +4158,8 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
        uint16_t nb_tx;
        uint16_t i;
 
-       /* free useless buffer */
-       hns3_tx_free_useless_buffer(txq);
+       if (txq->tx_bd_ready < txq->tx_free_thresh)
+               (void)hns3_tx_free_useless_buffer(txq);
 
        tx_next_use   = txq->next_to_use;
        tx_bd_max     = txq->nb_tx_desc;
@@ -4163,11 +4174,14 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                nb_buf = tx_pkt->nb_segs;
 
                if (nb_buf > txq->tx_bd_ready) {
-                       txq->dfx_stats.queue_full_cnt++;
-                       if (nb_tx == 0)
-                               return 0;
-
-                       goto end_of_tx;
+                       /* Try to release the required MBUF, but avoid releasing
+                        * all MBUFs, otherwise, the MBUFs will be released for
+                        * a long time and may cause jitter.
+                        */
+                       if (hns3_tx_free_required_buffer(txq, nb_buf) != 0) {
+                               txq->dfx_stats.queue_full_cnt++;
+                               goto end_of_tx;
+                       }
                }
 
                /*
@@ -4577,46 +4591,22 @@ hns3_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 static int
 hns3_tx_done_cleanup_full(struct hns3_tx_queue *txq, uint32_t free_cnt)
 {
-       uint16_t next_to_clean = txq->next_to_clean;
-       uint16_t next_to_use   = txq->next_to_use;
-       uint16_t tx_bd_ready   = txq->tx_bd_ready;
-       struct hns3_entry *tx_pkt = &txq->sw_ring[next_to_clean];
-       struct hns3_desc *desc = &txq->tx_ring[next_to_clean];
+       uint16_t round_free_cnt;
        uint32_t idx;
 
        if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
                free_cnt = txq->nb_tx_desc;
 
-       for (idx = 0; idx < free_cnt; idx++) {
-               if (next_to_clean == next_to_use)
-                       break;
+       if (txq->tx_rs_thresh == 0)
+               return 0;
 
-               if (desc->tx.tp_fe_sc_vld_ra_ri &
-                   rte_cpu_to_le_16(BIT(HNS3_TXD_VLD_B)))
+       round_free_cnt = roundup(free_cnt, txq->tx_rs_thresh);
+       for (idx = 0; idx < round_free_cnt; idx += txq->tx_rs_thresh) {
+               if (hns3_tx_free_useless_buffer(txq) != 0)
                        break;
-
-               if (tx_pkt->mbuf != NULL) {
-                       rte_pktmbuf_free_seg(tx_pkt->mbuf);
-                       tx_pkt->mbuf = NULL;
-               }
-
-               next_to_clean++;
-               tx_bd_ready++;
-               tx_pkt++;
-               desc++;
-               if (next_to_clean == txq->nb_tx_desc) {
-                       tx_pkt = txq->sw_ring;
-                       desc = txq->tx_ring;
-                       next_to_clean = 0;
-               }
-       }
-
-       if (idx > 0) {
-               txq->next_to_clean = next_to_clean;
-               txq->tx_bd_ready = tx_bd_ready;
        }
 
-       return (int)idx;
+       return RTE_MIN(idx, free_cnt);
 }
 
 int