From: Alexander Kozyrev Date: Mon, 16 Mar 2020 15:34:59 +0000 (+0000) Subject: net/mlx5: reduce Tx completion index memory loads X-Git-Url: http://git.droids-corp.org/?p=dpdk.git;a=commitdiff_plain;h=8d4659e70fc081ab220e44552af194900d720d2e net/mlx5: reduce Tx completion index memory loads There is a non-optimal check if doorbell is needed present in the mlx5_tx_handle_completion() function. Advancing a copy of the txq consumer index and checking this copy with initial value causes unnecessary memory loads and hurts the performance. It is better to have a simple small boolean variable for this purpose. That allows to eliminate all the excessive memory operations with the txq consumer index and restore the performance of the tx completions. Fixes: 1fd9af05e44e ("net/mlx5: update Tx error handling routine") Cc: stable@dpdk.org Signed-off-by: Alexander Kozyrev Acked-by: Viacheslav Ovsiienko --- diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 5ac63da803..f3bf763769 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -2160,7 +2160,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, { unsigned int count = MLX5_TX_COMP_MAX_CQE; volatile struct mlx5_cqe *last_cqe = NULL; - uint16_t ci = txq->cq_ci; + bool ring_doorbell = false; int ret; static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); @@ -2168,8 +2168,8 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, do { volatile struct mlx5_cqe *cqe; - cqe = &txq->cqes[ci & txq->cqe_m]; - ret = check_cqe(cqe, txq->cqe_s, ci); + cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; + ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { if (likely(ret != MLX5_CQE_STATUS_ERR)) { /* No new CQEs in completion queue. */ @@ -2183,7 +2183,6 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, * here, before we might perform SQ reset. */ rte_wmb(); - txq->cq_ci = ci; ret = mlx5_tx_error_cqe_handle (txq, (volatile struct mlx5_err_cqe *)cqe); if (unlikely(ret < 0)) { @@ -2199,16 +2198,18 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status. * The send queue is supposed to be empty. */ - ++ci; - txq->cq_pi = ci; + ring_doorbell = true; + ++txq->cq_ci; + txq->cq_pi = txq->cq_ci; last_cqe = NULL; continue; } /* Normal transmit completion. */ - MLX5_ASSERT(ci != txq->cq_pi); - MLX5_ASSERT((txq->fcqs[ci & txq->cqe_m] >> 16) == + MLX5_ASSERT(txq->cq_ci != txq->cq_pi); + MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) == cqe->wqe_counter); - ++ci; + ring_doorbell = true; + ++txq->cq_ci; last_cqe = cqe; /* * We have to restrict the amount of processed CQEs @@ -2221,14 +2222,10 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, if (likely(--count == 0)) break; } while (true); - if (likely(ci != txq->cq_ci)) { - /* - * Update completion queue consuming index - * and ring doorbell to notify hardware. - */ + if (likely(ring_doorbell)) { + /* Ring doorbell to notify hardware. */ rte_compiler_barrier(); - txq->cq_ci = ci; - *txq->cq_db = rte_cpu_to_be_32(ci); + *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); mlx5_tx_comp_flush(txq, last_cqe, olx); } }