Prefetching initial bytes of mbuf structures earlier and in two cache lines
instead of one improves performance of mlx4_rx_burst(), which accesses the
mbuf->next field not present in the first 128 bytes.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
assert(wr->num_sge == 1);
assert(elts_head < rxq->elts_n);
assert(rxq->elts_head < rxq->elts_n);
+ /*
+ * Fetch initial bytes of packet descriptor into a
+ * cacheline while allocating rep.
+ */
+ rte_prefetch0(seg);
+ rte_prefetch0(&seg->cacheline1);
ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
&flags);
if (unlikely(ret < 0)) {
if (ret == 0)
break;
len = ret;
- /*
- * Fetch initial bytes of packet descriptor into a
- * cacheline while allocating rep.
- */
- rte_prefetch0(seg);
rep = __rte_mbuf_raw_alloc(rxq->mp);
if (unlikely(rep == NULL)) {
/*