From: NĂ©lio Laranjeiro Date: Thu, 2 Feb 2017 10:34:12 +0000 (+0100) Subject: net/mlx5: fix Tx WQE corruption caused by starvation X-Git-Tag: spdx-start~4557 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=f04f1d51564b35f20aefd8837b45abe3a3468bb1;p=dpdk.git net/mlx5: fix Tx WQE corruption caused by starvation Fixes an issue which may occurs with the inline feature activated and a packet greater than the max_inline requested. In such situation, more work request elements can be consumed and in the worst case override some still handled by the NIC, this can result in sending garbage on the network or putting the work queue in error. Fixes: 2a66cf378954 ("net/mlx5: support inline send") Cc: stable@dpdk.org Reported-by: Elad Persiko Signed-off-by: Nelio Laranjeiro Acked-by: Yongseok Koh --- diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index ffca21c7cb..a0e15aca37 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -238,8 +238,9 @@ txq_complete(struct txq *txq) } while (1); if (unlikely(cqe == NULL)) return; + txq->wqe_pi = ntohs(cqe->wqe_counter); ctrl = (volatile struct mlx5_wqe_ctrl *) - tx_mlx5_wqe(txq, ntohs(cqe->wqe_counter)); + tx_mlx5_wqe(txq, txq->wqe_pi); elts_tail = ctrl->ctrl3; assert(elts_tail < (1 << txq->wqe_n)); /* Free buffers. */ @@ -365,6 +366,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) unsigned int i = 0; unsigned int j = 0; unsigned int max; + uint16_t max_wqe; unsigned int comp; volatile struct mlx5_wqe_v *wqe = NULL; unsigned int segs_n = 0; @@ -380,6 +382,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) max = (elts_n - (elts_head - txq->elts_tail)); if (max > elts_n) max -= elts_n; + max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); + if (unlikely(!max_wqe)) + return 0; do { volatile rte_v128u32_t *dseg = NULL; uint32_t length; @@ -407,6 +412,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) --segs_n; if (!segs_n) --pkts_n; + if (unlikely(--max_wqe == 0)) + break; wqe = (volatile struct mlx5_wqe_v *) tx_mlx5_wqe(txq, txq->wqe_ci); rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); @@ -476,10 +483,19 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (room > max_inline) { uintptr_t addr_end = (addr + max_inline) & ~(RTE_CACHE_LINE_SIZE - 1); - uint16_t copy_b = ((addr_end - addr) > length) ? - length : - (addr_end - addr); + unsigned int copy_b = + RTE_MIN((addr_end - addr), length); + uint16_t n; + /* + * One Dseg remains in the current WQE. To + * keep the computation positive, it is + * removed after the bytes to Dseg conversion. + */ + n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; + if (unlikely(max_wqe < n)) + break; + max_wqe -= n; rte_memcpy((void *)raw, (void *)addr, copy_b); addr += copy_b; length -= copy_b; @@ -493,12 +509,18 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) */ ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2); if (length > 0) { - dseg = (volatile rte_v128u32_t *) - ((uintptr_t)wqe + - (ds * MLX5_WQE_DWORD_SIZE)); - if ((uintptr_t)dseg >= end) + if (ds % (MLX5_WQE_SIZE / + MLX5_WQE_DWORD_SIZE) == 0) { + if (unlikely(--max_wqe == 0)) + break; + dseg = (volatile rte_v128u32_t *) + tx_mlx5_wqe(txq, txq->wqe_ci + + ds / 4); + } else { dseg = (volatile rte_v128u32_t *) - txq->wqes; + ((uintptr_t)wqe + + (ds * MLX5_WQE_DWORD_SIZE)); + } goto use_dseg; } else if (!segs_n) { goto next_pkt; @@ -541,12 +563,12 @@ next_seg: */ assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE)); if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) { - unsigned int n = (txq->wqe_ci + ((ds + 3) / 4)) & - ((1 << txq->wqe_n) - 1); - + if (unlikely(--max_wqe == 0)) + break; dseg = (volatile rte_v128u32_t *) - tx_mlx5_wqe(txq, n); - rte_prefetch0(tx_mlx5_wqe(txq, n + 1)); + tx_mlx5_wqe(txq, txq->wqe_ci + ds / 4); + rte_prefetch0(tx_mlx5_wqe(txq, + txq->wqe_ci + ds / 4 + 1)); } else { ++dseg; } @@ -711,6 +733,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) unsigned int i = 0; unsigned int j = 0; unsigned int max; + uint16_t max_wqe; unsigned int comp; struct mlx5_mpw mpw = { .state = MLX5_MPW_STATE_CLOSED, @@ -726,6 +749,9 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) max = (elts_n - (elts_head - txq->elts_tail)); if (max > elts_n) max -= elts_n; + max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); + if (unlikely(!max_wqe)) + return 0; do { struct rte_mbuf *buf = *(pkts++); unsigned int elts_head_next; @@ -759,6 +785,14 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) (mpw.wqe->eseg.cs_flags != cs_flags))) mlx5_mpw_close(txq, &mpw); if (mpw.state == MLX5_MPW_STATE_CLOSED) { + /* + * Multi-Packet WQE consumes at most two WQE. + * mlx5_mpw_new() expects to be able to use such + * resources. + */ + if (unlikely(max_wqe < 2)) + break; + max_wqe -= 2; mlx5_mpw_new(txq, &mpw, length); mpw.wqe->eseg.cs_flags = cs_flags; } @@ -914,11 +948,24 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, unsigned int i = 0; unsigned int j = 0; unsigned int max; + uint16_t max_wqe; unsigned int comp; unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE; struct mlx5_mpw mpw = { .state = MLX5_MPW_STATE_CLOSED, }; + /* + * Compute the maximum number of WQE which can be consumed by inline + * code. + * - 2 DSEG for: + * - 1 control segment, + * - 1 Ethernet segment, + * - N Dseg from the inline request. + */ + const unsigned int wqe_inl_n = + ((2 * MLX5_WQE_DWORD_SIZE + + txq->max_inline * RTE_CACHE_LINE_SIZE) + + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; if (unlikely(!pkts_n)) return 0; @@ -950,6 +997,11 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, break; max -= segs_n; --pkts_n; + /* + * Compute max_wqe in case less WQE were consumed in previous + * iteration. + */ + max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); /* Should we enable HW CKSUM offload */ if (buf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) @@ -975,9 +1027,20 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, if (mpw.state == MLX5_MPW_STATE_CLOSED) { if ((segs_n != 1) || (length > inline_room)) { + /* + * Multi-Packet WQE consumes at most two WQE. + * mlx5_mpw_new() expects to be able to use + * such resources. + */ + if (unlikely(max_wqe < 2)) + break; + max_wqe -= 2; mlx5_mpw_new(txq, &mpw, length); mpw.wqe->eseg.cs_flags = cs_flags; } else { + if (unlikely(max_wqe < wqe_inl_n)) + break; + max_wqe -= wqe_inl_n; mlx5_mpw_inline_new(txq, &mpw, length); mpw.wqe->eseg.cs_flags = cs_flags; } diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index 302ca4938e..41a34d7ff8 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -249,6 +249,7 @@ struct txq { uint16_t elts_comp; /* Counter since last completion request. */ uint16_t cq_ci; /* Consumer index for completion queue. */ uint16_t wqe_ci; /* Consumer index for work queue. */ + uint16_t wqe_pi; /* Producer index for work queue. */ uint16_t elts_n:4; /* (*elts)[] length (in log2). */ uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ uint16_t wqe_n:4; /* Number of of WQ elements (in log2). */