From: Shahaf Shuler <shahafs@mellanox.com>
Date: Thu, 14 Sep 2017 10:50:39 +0000 (+0300)
Subject: net/mlx5: enforce Tx num of segments limitation
X-Git-Tag: spdx-start~1960
X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=883ce1724b652d8da1cacdaf8409580b24d6a33d;p=dpdk.git

net/mlx5: enforce Tx num of segments limitation

Mellanox NICs has a limitation on the number of mbuf segments a multi
segment mbuf can have. The max number depends on the Tx offloads
requested.

The current code not enforce such limitation, which might cause
malformed work requests to be written to the device.

This commit adds verification for the number of mbuf segments posted
to the device. In case of overflow the packet will not be sent.

In addition update the nic documentation with the limitation.
Considering device limitation is 63 data segments in a work request, the
maximum number of segment in mbuf was calculated taking TSO as the worst
case:

max_nb_segs = 63 - (control_segment + ethernet segment +
		    TSO headers inline + inline segment +
		    extra inline to align to cacheline)

Cc: stable@dpdk.org

Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index f4cb18bcaa..c6a196c2eb 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -124,6 +124,10 @@ Limitations
 
   Will match any ipv4 packet (VLAN included).
 
+- A multi segment packet must have less than 6 segments in case the Tx burst function
+  is set to multi-packet send or Enhanced multi-packet send. Otherwise it must have
+  less than 50 segments.
+
 Configuration
 -------------
 
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index a76bc6f655..59ff00d6b9 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -100,7 +100,8 @@
 
 /*
  * Maximum size of burst for vectorized Tx. This is related to the maximum size
- * of Enhaned MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * of Enhanced MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * Careful when changing, large value can cause WQE DS to overlap.
  */
 #define MLX5_VPMD_TX_MAX_BURST        32U
 
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 8b82b5e74a..e00be811f7 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -151,6 +151,9 @@
 /* Default mark value used when none is provided. */
 #define MLX5_FLOW_MARK_DEFAULT 0xffffff
 
+/* Maximum number of DS in WQE. */
+#define MLX5_DSEG_MAX 63
+
 /* Subset of struct mlx5_wqe_eth_seg. */
 struct mlx5_wqe_eth_seg_small {
 	uint32_t rsvd0;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 3e6ef8b04f..bc1f85c93e 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -654,6 +654,10 @@ next_seg:
 		else
 			j += sg;
 next_pkt:
+		if (ds > MLX5_DSEG_MAX) {
+			txq->stats.oerrors++;
+			break;
+		}
 		++elts_head;
 		++pkts;
 		++i;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
index f89762ff8b..39c732515c 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
@@ -248,6 +248,10 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		if (segs_n == 1 ||
 		    max_elts < segs_n || max_wqe < 2)
 			break;
+		if (segs_n > MLX5_MPW_DSEG_MAX) {
+			txq->stats.oerrors++;
+			break;
+		}
 		wqe = &((volatile struct mlx5_wqe64 *)
 			 txq->wqes)[wqe_ci & wq_mask].hdr;
 		if (buf->ol_flags &
@@ -365,6 +369,7 @@ txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
 	max_elts = (elts_n - (elts_head - txq->elts_tail));
 	max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
 	pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);
+	assert(pkts_n <= MLX5_DSEG_MAX - nb_dword_in_hdr);
 	if (unlikely(!pkts_n))
 		return 0;
 	elts = &(*txq->elts)[elts_head & elts_m];
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 4b0b532b1f..b4c5b10fb7 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -288,6 +288,8 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
 		.comp_mask = IBV_EXP_QP_INIT_ATTR_PD,
 	};
 	if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+		unsigned int ds_cnt;
+
 		tmpl.txq.max_inline =
 			((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
 			 RTE_CACHE_LINE_SIZE);
@@ -320,6 +322,28 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
 			attr.init.cap.max_inline_data =
 				tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
 		}
+		/*
+		 * Check if the inline size is too large in a way which
+		 * can make the WQE DS to overflow.
+		 * Considering in calculation:
+		 *	WQE CTRL (1 DS)
+		 *	WQE ETH  (1 DS)
+		 *	Inline part (N DS)
+		 */
+		ds_cnt = 2 +
+			(attr.init.cap.max_inline_data / MLX5_WQE_DWORD_SIZE);
+		if (ds_cnt > MLX5_DSEG_MAX) {
+			unsigned int max_inline = (MLX5_DSEG_MAX - 2) *
+						   MLX5_WQE_DWORD_SIZE;
+
+			max_inline = max_inline - (max_inline %
+						   RTE_CACHE_LINE_SIZE);
+			WARN("txq inline is too large (%d) setting it to "
+			     "the maximum possible: %d\n",
+			     priv->txq_inline, max_inline);
+			tmpl.txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
+			attr.init.cap.max_inline_data = max_inline;
+		}
 	}
 	if (priv->tso) {
 		attr.init.max_tso_header =