net/bnxt: fix scalar Tx completion handling
authorLance Richardson <lance.richardson@broadcom.com>
Wed, 16 Jun 2021 17:55:22 +0000 (13:55 -0400)
committerAjit Khaparde <ajit.khaparde@broadcom.com>
Thu, 8 Jul 2021 03:55:54 +0000 (05:55 +0200)
Preserve the raw (unmasked) transmit completion ring
consumer index.

Remove cache prefetches that have no measurable performance
benefit.

Fixes: c7de4195cc4c ("net/bnxt: modify ring index logic")
Cc: stable@dpdk.org
Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
drivers/net/bnxt/bnxt_txr.c

index 2745996..54eaab3 100644 (file)
@@ -444,30 +444,26 @@ static void bnxt_tx_cmp(struct bnxt_tx_queue *txq, int nr_pkts)
 
 static int bnxt_handle_tx_cp(struct bnxt_tx_queue *txq)
 {
+       uint32_t nb_tx_pkts = 0, cons, ring_mask, opaque;
        struct bnxt_cp_ring_info *cpr = txq->cp_ring;
        uint32_t raw_cons = cpr->cp_raw_cons;
-       uint32_t cons;
-       uint32_t nb_tx_pkts = 0;
+       struct bnxt_ring *cp_ring_struct;
        struct tx_cmpl *txcmp;
-       struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring;
-       struct bnxt_ring *cp_ring_struct = cpr->cp_ring_struct;
-       uint32_t ring_mask = cp_ring_struct->ring_mask;
-       uint32_t opaque = 0;
 
        if (bnxt_tx_bds_in_hw(txq) < txq->tx_free_thresh)
                return 0;
 
+       cp_ring_struct = cpr->cp_ring_struct;
+       ring_mask = cp_ring_struct->ring_mask;
+
        do {
                cons = RING_CMPL(ring_mask, raw_cons);
                txcmp = (struct tx_cmpl *)&cpr->cp_desc_ring[cons];
-               rte_prefetch_non_temporal(&cp_desc_ring[(cons + 2) &
-                                                       ring_mask]);
 
-               if (!CMPL_VALID(txcmp, cpr->valid))
+               if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct))
                        break;
-               opaque = rte_cpu_to_le_32(txcmp->opaque);
-               NEXT_CMPL(cpr, cons, cpr->valid, 1);
-               rte_prefetch0(&cp_desc_ring[cons]);
+
+               opaque = rte_le_to_cpu_32(txcmp->opaque);
 
                if (CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2)
                        nb_tx_pkts += opaque;
@@ -475,9 +471,11 @@ static int bnxt_handle_tx_cp(struct bnxt_tx_queue *txq)
                        RTE_LOG_DP(ERR, PMD,
                                        "Unhandled CMP type %02x\n",
                                        CMP_TYPE(txcmp));
-               raw_cons = cons;
+               raw_cons = NEXT_RAW_CMP(raw_cons);
        } while (nb_tx_pkts < ring_mask);
 
+       cpr->valid = !!(raw_cons & cp_ring_struct->ring_size);
+
        if (nb_tx_pkts) {
                if (txq->offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
                        bnxt_tx_cmp_fast(txq, nb_tx_pkts);