From ec837ad0fc7c6df4912cc2706b9cd54b225f4a34 Mon Sep 17 00:00:00 2001
From: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Date: Tue, 22 Jun 2021 19:40:49 +0300
Subject: [PATCH] net/mlx5: fix multi-segment inline for the first segments

Before 19.08 release the Tx burst routines of mlx5 PMD
provided data inline for the first short segments of the
multi-segment packets. In the release 19.08 mlx5 Tx datapath
was refactored and this behavior was broken, affecting the
performance.

For example, the T-Rex traffic generator might use small
leading segments to handle packet headers and performance
degradation was noticed.

If the first segments of the multi-segment packet are short
and the overall length is below the inline threshold it
should be inline into the WQE to fix the performance.

Fixes: 18a1c20044c0 ("net/mlx5: implement Tx burst template")
Cc: stable@dpdk.org

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/net/mlx5/mlx5_tx.h | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h
index 7d3ff8407c..634c9d754a 100644
--- a/drivers/net/mlx5/mlx5_tx.h
+++ b/drivers/net/mlx5/mlx5_tx.h
@@ -2040,6 +2040,8 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq,
 		unsigned int nxlen;
 		uintptr_t start;
 
+		mbuf = loc->mbuf;
+		nxlen = rte_pktmbuf_data_len(mbuf);
 		/*
 		 * Packet length exceeds the allowed inline data length,
 		 * check whether the minimal inlining is required.
@@ -2049,28 +2051,23 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq,
 				    MLX5_ESEG_MIN_INLINE_SIZE);
 			MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send);
 			inlen = txq->inlen_mode;
-		} else {
-			if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE ||
-			    !vlan || txq->vlan_en) {
-				/*
-				 * VLAN insertion will be done inside by HW.
-				 * It is not utmost effective - VLAN flag is
-				 * checked twice, but we should proceed the
-				 * inlining length correctly and take into
-				 * account the VLAN header being inserted.
-				 */
-				return mlx5_tx_packet_multi_send
-							(txq, loc, olx);
-			}
+		} else if (vlan && !txq->vlan_en) {
+			/*
+			 * VLAN insertion is requested and hardware does not
+			 * support the offload, will do with software inline.
+			 */
 			inlen = MLX5_ESEG_MIN_INLINE_SIZE;
+		} else if (mbuf->ol_flags & PKT_TX_DYNF_NOINLINE ||
+			   nxlen > txq->inlen_send) {
+			return mlx5_tx_packet_multi_send(txq, loc, olx);
+		} else {
+			goto do_first;
 		}
 		/*
 		 * Now we know the minimal amount of data is requested
 		 * to inline. Check whether we should inline the buffers
 		 * from the chain beginning to eliminate some mbufs.
 		 */
-		mbuf = loc->mbuf;
-		nxlen = rte_pktmbuf_data_len(mbuf);
 		if (unlikely(nxlen <= txq->inlen_send)) {
 			/* We can inline first mbuf at least. */
 			if (nxlen < inlen) {
@@ -2092,6 +2089,7 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq,
 					goto do_align;
 				}
 			}
+do_first:
 			do {
 				inlen = nxlen;
 				mbuf = NEXT(mbuf);
-- 
2.39.5