net/mlx5: reduce unnecessary memory access in Rx
authorRuifeng Wang <ruifeng.wang@arm.com>
Wed, 7 Jul 2021 09:03:07 +0000 (17:03 +0800)
committerRaslan Darawsheh <rasland@nvidia.com>
Thu, 15 Jul 2021 13:17:22 +0000 (15:17 +0200)
MR btree len is a constant during Rx replenish.
Moved retrieve of the value out of loop to reduce data loads.
Slight performance uplift was measured on both N1SDP and x86.

Suggested-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Signed-off-by: Ruifeng Wang <ruifeng.wang@arm.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
drivers/net/mlx5/mlx5_rxtx_vec.c

index d5af2d9..e64ef70 100644 (file)
@@ -106,22 +106,27 @@ mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
                        rxq->stats.rx_nombuf += n;
                        return;
                }
-               for (i = 0; i < n; ++i) {
-                       void *buf_addr;
-
-                       /*
-                        * In order to support the mbufs with external attached
-                        * data buffer we should use the buf_addr pointer
-                        * instead of rte_mbuf_buf_addr(). It touches the mbuf
-                        * itself and may impact the performance.
-                        */
-                       buf_addr = elts[i]->buf_addr;
-                       wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
-                                                     RTE_PKTMBUF_HEADROOM);
-                       /* If there's a single MR, no need to replace LKey. */
-                       if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh)
-                                    > 1))
+               if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) {
+                       for (i = 0; i < n; ++i) {
+                               /*
+                                * In order to support the mbufs with external attached
+                                * data buffer we should use the buf_addr pointer
+                                * instead of rte_mbuf_buf_addr(). It touches the mbuf
+                                * itself and may impact the performance.
+                                */
+                               void *buf_addr = elts[i]->buf_addr;
+
+                               wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
+                                                             RTE_PKTMBUF_HEADROOM);
                                wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
+                       }
+               } else {
+                       for (i = 0; i < n; ++i) {
+                               void *buf_addr = elts[i]->buf_addr;
+
+                               wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
+                                                             RTE_PKTMBUF_HEADROOM);
+                       }
                }
                rxq->rq_ci += n;
                /* Prevent overflowing into consumed mbufs. */