net/mlx5: fix multi-segment inline for the first segments
authorViacheslav Ovsiienko <viacheslavo@nvidia.com>
Tue, 22 Jun 2021 16:40:49 +0000 (19:40 +0300)
committerRaslan Darawsheh <rasland@nvidia.com>
Thu, 8 Jul 2021 20:09:20 +0000 (22:09 +0200)
Before 19.08 release the Tx burst routines of mlx5 PMD
provided data inline for the first short segments of the
multi-segment packets. In the release 19.08 mlx5 Tx datapath
was refactored and this behavior was broken, affecting the
performance.

For example, the T-Rex traffic generator might use small
leading segments to handle packet headers and performance
degradation was noticed.

If the first segments of the multi-segment packet are short
and the overall length is below the inline threshold it
should be inline into the WQE to fix the performance.

Fixes: 18a1c20044c0 ("net/mlx5: implement Tx burst template")
Cc: stable@dpdk.org
Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
drivers/net/mlx5/mlx5_tx.h

index 7d3ff84..634c9d7 100644 (file)
@@ -2040,6 +2040,8 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq,
                unsigned int nxlen;
                uintptr_t start;
 
+               mbuf = loc->mbuf;
+               nxlen = rte_pktmbuf_data_len(mbuf);
                /*
                 * Packet length exceeds the allowed inline data length,
                 * check whether the minimal inlining is required.
@@ -2049,28 +2051,23 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq,
                                    MLX5_ESEG_MIN_INLINE_SIZE);
                        MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send);
                        inlen = txq->inlen_mode;
-               } else {
-                       if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE ||
-                           !vlan || txq->vlan_en) {
-                               /*
-                                * VLAN insertion will be done inside by HW.
-                                * It is not utmost effective - VLAN flag is
-                                * checked twice, but we should proceed the
-                                * inlining length correctly and take into
-                                * account the VLAN header being inserted.
-                                */
-                               return mlx5_tx_packet_multi_send
-                                                       (txq, loc, olx);
-                       }
+               } else if (vlan && !txq->vlan_en) {
+                       /*
+                        * VLAN insertion is requested and hardware does not
+                        * support the offload, will do with software inline.
+                        */
                        inlen = MLX5_ESEG_MIN_INLINE_SIZE;
+               } else if (mbuf->ol_flags & PKT_TX_DYNF_NOINLINE ||
+                          nxlen > txq->inlen_send) {
+                       return mlx5_tx_packet_multi_send(txq, loc, olx);
+               } else {
+                       goto do_first;
                }
                /*
                 * Now we know the minimal amount of data is requested
                 * to inline. Check whether we should inline the buffers
                 * from the chain beginning to eliminate some mbufs.
                 */
-               mbuf = loc->mbuf;
-               nxlen = rte_pktmbuf_data_len(mbuf);
                if (unlikely(nxlen <= txq->inlen_send)) {
                        /* We can inline first mbuf at least. */
                        if (nxlen < inlen) {
@@ -2092,6 +2089,7 @@ mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq,
                                        goto do_align;
                                }
                        }
+do_first:
                        do {
                                inlen = nxlen;
                                mbuf = NEXT(mbuf);