net/cnxk: add multi-segment Tx for CN10K
authorNithin Dabilpuram <ndabilpuram@marvell.com>
Wed, 23 Jun 2021 04:46:29 +0000 (10:16 +0530)
committerJerin Jacob <jerinj@marvell.com>
Tue, 29 Jun 2021 21:39:29 +0000 (23:39 +0200)
Add Tx burst multi-segment version for CN10K.

Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
drivers/net/cnxk/cn10k_tx.c
drivers/net/cnxk/cn10k_tx.h
drivers/net/cnxk/cn10k_tx_mseg.c [new file with mode: 0644]
drivers/net/cnxk/meson.build

index 13c605f..9803002 100644 (file)
@@ -40,6 +40,8 @@ pick_tx_func(struct rte_eth_dev *eth_dev,
 void
 cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
 {
+       struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);
+
        const eth_tx_burst_t nix_eth_tx_burst[2][2][2][2][2] = {
 #define T(name, f4, f3, f2, f1, f0, sz, flags)                         \
        [f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_##name,
@@ -48,7 +50,21 @@ cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
 #undef T
        };
 
-       pick_tx_func(eth_dev, nix_eth_tx_burst);
+       const eth_tx_burst_t nix_eth_tx_burst_mseg[2][2][2][2][2] = {
+#define T(name, f4, f3, f2, f1, f0, sz, flags)                         \
+       [f4][f3][f2][f1][f0] = cn10k_nix_xmit_pkts_mseg_##name,
+
+               NIX_TX_FASTPATH_MODES
+#undef T
+       };
+
+       if (dev->scalar_ena ||
+           (dev->tx_offload_flags &
+            (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)))
+               pick_tx_func(eth_dev, nix_eth_tx_burst);
+
+       if (dev->tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+               pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
 
        rte_mb();
 }
index c54fbfe..63e9848 100644 (file)
@@ -338,6 +338,77 @@ cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, uintptr_t lmt_addr,
        *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
 }
 
+static __rte_always_inline uint16_t
+cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
+{
+       struct nix_send_hdr_s *send_hdr;
+       union nix_send_sg_s *sg;
+       struct rte_mbuf *m_next;
+       uint64_t *slist, sg_u;
+       uint64_t nb_segs;
+       uint64_t segdw;
+       uint8_t off, i;
+
+       send_hdr = (struct nix_send_hdr_s *)cmd;
+       send_hdr->w0.total = m->pkt_len;
+       send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
+
+       if (flags & NIX_TX_NEED_EXT_HDR)
+               off = 2;
+       else
+               off = 0;
+
+       sg = (union nix_send_sg_s *)&cmd[2 + off];
+       /* Clear sg->u header before use */
+       sg->u &= 0xFC00000000000000;
+       sg_u = sg->u;
+       slist = &cmd[3 + off];
+
+       i = 0;
+       nb_segs = m->nb_segs;
+
+       /* Fill mbuf segments */
+       do {
+               m_next = m->next;
+               sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
+               *slist = rte_mbuf_data_iova(m);
+               /* Set invert df if buffer is not to be freed by H/W */
+               if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
+                       sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
+                       /* Mark mempool object as "put" since it is freed by NIX
+                        */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+               if (!(sg_u & (1ULL << (i + 55))))
+                       __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
+#endif
+               slist++;
+               i++;
+               nb_segs--;
+               if (i > 2 && nb_segs) {
+                       i = 0;
+                       /* Next SG subdesc */
+                       *(uint64_t *)slist = sg_u & 0xFC00000000000000;
+                       sg->u = sg_u;
+                       sg->segs = 3;
+                       sg = (union nix_send_sg_s *)slist;
+                       sg_u = sg->u;
+                       slist++;
+               }
+               m = m_next;
+       } while (nb_segs);
+
+       sg->u = sg_u;
+       sg->segs = i;
+       segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
+       /* Roundup extra dwords to multiple of 2 */
+       segdw = (segdw >> 1) + (segdw & 0x1);
+       /* Default dwords */
+       segdw += (off >> 1) + 1;
+       send_hdr->w0.sizem1 = segdw - 1;
+
+       return segdw;
+}
+
 static __rte_always_inline uint16_t
 cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
                    uint64_t *cmd, const uint16_t flags)
@@ -421,6 +492,103 @@ again:
        return pkts;
 }
 
+static __rte_always_inline uint16_t
+cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
+                        uint16_t pkts, uint64_t *cmd, const uint16_t flags)
+{
+       struct cn10k_eth_txq *txq = tx_queue;
+       uintptr_t pa0, pa1, lmt_addr = txq->lmt_base;
+       const rte_iova_t io_addr = txq->io_addr;
+       uint16_t segdw, lmt_id, burst, left, i;
+       uint64_t data0, data1;
+       uint64_t lso_tun_fmt;
+       __uint128_t data128;
+       uint16_t shft;
+
+       NIX_XMIT_FC_OR_RETURN(txq, pkts);
+
+       cn10k_nix_tx_skeleton(txq, cmd, flags);
+
+       /* Reduce the cached count */
+       txq->fc_cache_pkts -= pkts;
+
+       if (flags & NIX_TX_OFFLOAD_TSO_F)
+               lso_tun_fmt = txq->lso_tun_fmt;
+
+       /* Get LMT base address and LMT ID as lcore id */
+       ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+       left = pkts;
+again:
+       burst = left > 32 ? 32 : left;
+       shft = 16;
+       data128 = 0;
+       for (i = 0; i < burst; i++) {
+               /* Perform header writes for TSO, barrier at
+                * lmt steorl will suffice.
+                */
+               if (flags & NIX_TX_OFFLOAD_TSO_F)
+                       cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
+
+               cn10k_nix_xmit_prepare(tx_pkts[i], cmd, lmt_addr, flags,
+                                      lso_tun_fmt);
+               /* Store sg list directly on lmt line */
+               segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)lmt_addr,
+                                              flags);
+               lmt_addr += (1ULL << ROC_LMT_LINE_SIZE_LOG2);
+               data128 |= (((__uint128_t)(segdw - 1)) << shft);
+               shft += 3;
+       }
+
+       data0 = (uint64_t)data128;
+       data1 = (uint64_t)(data128 >> 64);
+       /* Make data0 similar to data1 */
+       data0 >>= 16;
+       /* Trigger LMTST */
+       if (burst > 16) {
+               pa0 = io_addr | (data0 & 0x7) << 4;
+               data0 &= ~0x7ULL;
+               /* Move lmtst1..15 sz to bits 63:19 */
+               data0 <<= 16;
+               data0 |= (15ULL << 12);
+               data0 |= (uint64_t)lmt_id;
+
+               /* STEOR0 */
+               roc_lmt_submit_steorl(data0, pa0);
+
+               pa1 = io_addr | (data1 & 0x7) << 4;
+               data1 &= ~0x7ULL;
+               data1 <<= 16;
+               data1 |= ((uint64_t)(burst - 17)) << 12;
+               data1 |= (uint64_t)(lmt_id + 16);
+
+               /* STEOR1 */
+               roc_lmt_submit_steorl(data1, pa1);
+       } else if (burst) {
+               pa0 = io_addr | (data0 & 0x7) << 4;
+               data0 &= ~0x7ULL;
+               /* Move lmtst1..15 sz to bits 63:19 */
+               data0 <<= 16;
+               data0 |= ((burst - 1) << 12);
+               data0 |= (uint64_t)lmt_id;
+
+               /* STEOR0 */
+               roc_lmt_submit_steorl(data0, pa0);
+       }
+
+       left -= burst;
+       rte_io_wmb();
+       if (left) {
+               /* Start processing another burst */
+               tx_pkts += burst;
+               /* Reset lmt base addr */
+               lmt_addr -= (1ULL << ROC_LMT_LINE_SIZE_LOG2);
+               lmt_addr &= (~(BIT_ULL(ROC_LMT_BASE_PER_CORE_LOG2) - 1));
+               goto again;
+       }
+
+       return pkts;
+}
+
 #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
 #define VLAN_F      NIX_TX_OFFLOAD_VLAN_QINQ_F
@@ -496,6 +664,9 @@ T(tso_noff_vlan_ol3ol4csum_l3l4csum,        1, 1, 1, 1, 1,  6,              \
 
 #define T(name, f4, f3, f2, f1, f0, sz, flags)                                 \
        uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name(          \
+               void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
+                                                                              \
+       uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name(     \
                void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
 
 NIX_TX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_tx_mseg.c b/drivers/net/cnxk/cn10k_tx_mseg.c
new file mode 100644 (file)
index 0000000..6ae6907
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_ethdev.h"
+#include "cn10k_tx.h"
+
+#define T(name, f4, f3, f2, f1, f0, sz, flags)                                \
+       uint16_t __rte_noinline __rte_hot                                      \
+               cn10k_nix_xmit_pkts_mseg_##name(void *tx_queue,                \
+                                               struct rte_mbuf **tx_pkts,     \
+                                               uint16_t pkts)                 \
+       {                                                                      \
+               uint64_t cmd[(sz)];                                            \
+                                                                              \
+               /* For TSO inner checksum is a must */                         \
+               if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
+                   !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
+                       return 0;                                              \
+               return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd,  \
+                                               (flags) | NIX_TX_MULTI_SEG_F); \
+       }
+
+NIX_TX_FASTPATH_MODES
+#undef T
index 27f2355..3bfd30a 100644 (file)
@@ -33,6 +33,7 @@ sources += files(
         'cn10k_rx_mseg.c',
         'cn10k_rx_vec.c',
         'cn10k_tx.c',
+        'cn10k_tx_mseg.c',
 )
 
 deps += ['bus_pci', 'cryptodev', 'eventdev', 'security']