Prefetching completion queue entries is inefficient because too few CPU
cycles are spent before their use, which results into cache misses anyway.
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
txq->bf_offset ^= (1 << txq->bf_buf_size);
}
txq->bf_offset ^= (1 << txq->bf_buf_size);
}
-/**
- * Prefetch a CQE.
- *
- * @param txq
- * Pointer to TX queue structure.
- * @param cqe_ci
- * CQE consumer index.
- */
-static inline void
-tx_prefetch_cqe(struct txq *txq, uint16_t ci)
-{
- volatile struct mlx5_cqe *cqe;
-
- cqe = &(*txq->cqes)[ci & ((1 << txq->cqe_n) - 1)];
- rte_prefetch0(cqe);
-}
-
/**
* DPDK callback for TX.
*
/**
* DPDK callback for TX.
*
if (unlikely(!pkts_n))
return 0;
/* Prefetch first packet cacheline. */
if (unlikely(!pkts_n))
return 0;
/* Prefetch first packet cacheline. */
- tx_prefetch_cqe(txq, txq->cq_ci);
- tx_prefetch_cqe(txq, txq->cq_ci + 1);
rte_prefetch0(*pkts);
/* Start processing. */
txq_complete(txq);
rte_prefetch0(*pkts);
/* Start processing. */
txq_complete(txq);
if (unlikely(!pkts_n))
return 0;
/* Prefetch first packet cacheline. */
if (unlikely(!pkts_n))
return 0;
/* Prefetch first packet cacheline. */
- tx_prefetch_cqe(txq, txq->cq_ci);
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
/* Start processing. */
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
/* Start processing. */
if (unlikely(!pkts_n))
return 0;
/* Prefetch first packet cacheline. */
if (unlikely(!pkts_n))
return 0;
/* Prefetch first packet cacheline. */
- tx_prefetch_cqe(txq, txq->cq_ci);
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
/* Start processing. */
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
/* Start processing. */