mlx4: improve Rx performance with better prefetching
authorNelio Laranjeiro <nelio.laranjeiro@6wind.com>
Fri, 30 Oct 2015 19:00:07 +0000 (20:00 +0100)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Fri, 30 Oct 2015 23:21:58 +0000 (00:21 +0100)
Prefetching initial bytes of mbuf structures earlier and in two cache lines
instead of one improves performance of mlx4_rx_burst(), which accesses the
mbuf->next field not present in the first 128 bytes.

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
drivers/net/mlx4/mlx4.c

index af31573..e422a80 100644 (file)
@@ -2820,6 +2820,12 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                assert(wr->num_sge == 1);
                assert(elts_head < rxq->elts_n);
                assert(rxq->elts_head < rxq->elts_n);
+               /*
+                * Fetch initial bytes of packet descriptor into a
+                * cacheline while allocating rep.
+                */
+               rte_prefetch0(seg);
+               rte_prefetch0(&seg->cacheline1);
                ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
                                                    &flags);
                if (unlikely(ret < 0)) {
@@ -2857,11 +2863,6 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                if (ret == 0)
                        break;
                len = ret;
-               /*
-                * Fetch initial bytes of packet descriptor into a
-                * cacheline while allocating rep.
-                */
-               rte_prefetch0(seg);
                rep = __rte_mbuf_raw_alloc(rxq->mp);
                if (unlikely(rep == NULL)) {
                        /*