mbuf: add prefetch helpers
authorOlivier Matz <olivier.matz@6wind.com>
Wed, 18 May 2016 16:02:08 +0000 (18:02 +0200)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Tue, 24 May 2016 09:21:14 +0000 (11:21 +0200)
Some architectures (ex: Power8) have a cache line size of 128 bytes,
so the drivers should not expect that prefetching the second part of
the mbuf with rte_prefetch0(&m->cacheline1) is valid.

This commit add helpers that can be used by drivers to prefetch the
rx or tx part of the mbuf, whatever the cache line size.

Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
Reviewed-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
drivers/net/fm10k/fm10k_rxtx_vec.c
drivers/net/i40e/i40e_rxtx_vec.c
drivers/net/ixgbe/ixgbe_rxtx_vec.c
drivers/net/mlx4/mlx4.c
drivers/net/mlx5/mlx5_rxtx.c
examples/ipsec-secgw/ipsec-secgw.c
lib/librte_mbuf/rte_mbuf.h

index 03e4a5c..ef256a5 100644 (file)
@@ -487,10 +487,10 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
                rte_compiler_barrier();
 
                if (split_packet) {
-                       rte_prefetch0(&rx_pkts[pos]->cacheline1);
-                       rte_prefetch0(&rx_pkts[pos + 1]->cacheline1);
-                       rte_prefetch0(&rx_pkts[pos + 2]->cacheline1);
-                       rte_prefetch0(&rx_pkts[pos + 3]->cacheline1);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
                }
 
                /* D.1 pkt 3,4 convert format from desc to pktmbuf */
index f7a62a8..eef80d9 100644 (file)
@@ -297,10 +297,10 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);
 
                if (split_packet) {
-                       rte_prefetch0(&rx_pkts[pos]->cacheline1);
-                       rte_prefetch0(&rx_pkts[pos + 1]->cacheline1);
-                       rte_prefetch0(&rx_pkts[pos + 2]->cacheline1);
-                       rte_prefetch0(&rx_pkts[pos + 3]->cacheline1);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
                }
 
                /* avoid compiler reorder optimization */
index c4d709b..e97ea82 100644 (file)
@@ -307,10 +307,10 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
                _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);
 
                if (split_packet) {
-                       rte_prefetch0(&rx_pkts[pos]->cacheline1);
-                       rte_prefetch0(&rx_pkts[pos + 1]->cacheline1);
-                       rte_prefetch0(&rx_pkts[pos + 2]->cacheline1);
-                       rte_prefetch0(&rx_pkts[pos + 3]->cacheline1);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
+                       rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
                }
 
                /* avoid compiler reorder optimization */
index 080ab61..9ed1491 100644 (file)
@@ -3283,8 +3283,8 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                 * Fetch initial bytes of packet descriptor into a
                 * cacheline while allocating rep.
                 */
-               rte_prefetch0(seg);
-               rte_prefetch0(&seg->cacheline1);
+               rte_mbuf_prefetch_part1(seg);
+               rte_mbuf_prefetch_part2(seg);
                ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
                                                    &flags);
                if (unlikely(ret < 0)) {
index 13c8d71..29bfcec 100644 (file)
@@ -1134,8 +1134,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                 * Fetch initial bytes of packet descriptor into a
                 * cacheline while allocating rep.
                 */
-               rte_prefetch0(seg);
-               rte_prefetch0(&seg->cacheline1);
+               rte_mbuf_prefetch_part1(seg);
+               rte_mbuf_prefetch_part2(seg);
                ret = rxq->poll(rxq->cq, NULL, NULL, &flags, &vlan_tci);
                if (unlikely(ret < 0)) {
                        struct ibv_wc wc;
index 1dc505c..ebd7c23 100644 (file)
@@ -298,7 +298,7 @@ prepare_tx_burst(struct rte_mbuf *pkts[], uint16_t nb_pkts, uint8_t port)
        const int32_t prefetch_offset = 2;
 
        for (i = 0; i < (nb_pkts - prefetch_offset); i++) {
-               rte_prefetch0(pkts[i + prefetch_offset]->cacheline1);
+               rte_mbuf_prefetch_part2(pkts[i + prefetch_offset]);
                prepare_tx_pkt(pkts[i], port);
        }
        /* Process left packets */
index 48911a6..11fa06d 100644 (file)
@@ -842,6 +842,44 @@ struct rte_mbuf {
        uint16_t timesync;
 } __rte_cache_aligned;
 
+/**
+ * Prefetch the first part of the mbuf
+ *
+ * The first 64 bytes of the mbuf corresponds to fields that are used early
+ * in the receive path. If the cache line of the architecture is higher than
+ * 64B, the second part will also be prefetched.
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ */
+static inline void
+rte_mbuf_prefetch_part1(struct rte_mbuf *m)
+{
+       rte_prefetch0(&m->cacheline0);
+}
+
+/**
+ * Prefetch the second part of the mbuf
+ *
+ * The next 64 bytes of the mbuf corresponds to fields that are used in the
+ * transmit path. If the cache line of the architecture is higher than 64B,
+ * this function does nothing as it is expected that the full mbuf is
+ * already in cache.
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ */
+static inline void
+rte_mbuf_prefetch_part2(struct rte_mbuf *m)
+{
+#if RTE_CACHE_LINE_SIZE == 64
+       rte_prefetch0(&m->cacheline1);
+#else
+       RTE_SET_USED(m);
+#endif
+}
+
+
 static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
 
 /**