From e5f2b3ebcbc89cf8e0e58c6eccf450e8e7e2f779 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Mon, 15 Nov 2021 13:24:09 -0500 Subject: [PATCH] net/bnxt: avoid unnecessary work in AVX2 Rx Each call to the AVX2 vector burst receive function makes at least one pass through the function's inner loop, loading 256 bytes of completion descriptors and copying 8 rte_mbuf pointers regardless of whether there are any packets to be received. Unidirectional forwarding performance is improved by about 3-4% if we ensure that at least one packet can be received before entering the inner loop. Fixes: c4e4c18963b0 ("net/bnxt: add AVX2 RX/Tx") Cc: stable@dpdk.org Signed-off-by: Lance Richardson Reviewed-by: Ajit Khaparde --- drivers/net/bnxt/bnxt_rxtx_vec_avx2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c b/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c index e4905b4fd1..54e3af22ac 100644 --- a/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c +++ b/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c @@ -98,6 +98,10 @@ recv_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rte_prefetch0(&cp_desc_ring[cons + 8]); rte_prefetch0(&cp_desc_ring[cons + 12]); + /* Return immediately if there is not at least one completed packet. */ + if (!bnxt_cpr_cmp_valid(&cp_desc_ring[cons], raw_cons, cp_ring_size)) + return 0; + /* Ensure that we do not go past the ends of the rings. */ nb_pkts = RTE_MIN(nb_pkts, RTE_MIN(rx_ring_size - mbcons, (cp_ring_size - cons) / 2)); -- 2.39.5