net/cnxk: support multi-segment vector Rx
authorPavan Nikhilesh <pbhagavatula@marvell.com>
Tue, 29 Jun 2021 07:44:18 +0000 (13:14 +0530)
committerJerin Jacob <jerinj@marvell.com>
Wed, 30 Jun 2021 02:47:59 +0000 (04:47 +0200)
Add multi-segment Rx vector routine, form the primary mbufs using
vector path switch to scalar path when extracting segments.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
drivers/net/cnxk/cn10k_rx.c
drivers/net/cnxk/cn10k_rx.h
drivers/net/cnxk/cn10k_rx_vec_mseg.c [new file with mode: 0644]
drivers/net/cnxk/cn9k_rx.c
drivers/net/cnxk/cn9k_rx.h
drivers/net/cnxk/cn9k_rx_vec_mseg.c [new file with mode: 0644]
drivers/net/cnxk/meson.build

index 5c956c0..3a9fd71 100644 (file)
@@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
                [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
                [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
                [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
+
+       rte_atomic_thread_fence(__ATOMIC_RELEASE);
 }
 
 void
@@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
 #undef R
        };
 
-       /* For PTP enabled, scalar rx function should be chosen as most of the
-        * PTP apps are implemented to rx burst 1 pkt.
-        */
-       if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
-               pick_rx_func(eth_dev, nix_eth_rx_burst);
-       else
-               pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
+       const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
+       [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name,
 
-       if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
-               pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+               NIX_RX_FASTPATH_MODES
+#undef R
+       };
 
        /* Copy multi seg version with no offload for tear down sequence */
        if (rte_eal_process_type() == RTE_PROC_PRIMARY)
                dev->rx_pkt_burst_no_offload =
                        nix_eth_rx_burst_mseg[0][0][0][0][0][0];
-       rte_mb();
+
+       /* For PTP enabled, scalar rx function should be chosen as most of the
+        * PTP apps are implemented to rx burst 1 pkt.
+        */
+       if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+               if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+                       return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+               return pick_rx_func(eth_dev, nix_eth_rx_burst);
+       }
+
+       if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+               return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
+       return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
 }
index 1cc37cb..5926ff7 100644 (file)
@@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
 
        sg = *(const uint64_t *)(rx + 1);
        nb_segs = (sg >> 48) & 0x3;
-       mbuf->nb_segs = nb_segs;
+
+       if (nb_segs == 1) {
+               mbuf->next = NULL;
+               return;
+       }
+
+       mbuf->pkt_len = rx->pkt_lenm1 + 1;
        mbuf->data_len = sg & 0xFFFF;
+       mbuf->nb_segs = nb_segs;
        sg = sg >> 16;
 
        eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1));
@@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
                ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf);
 
        mbuf->ol_flags = ol_flags;
-       *(uint64_t *)(&mbuf->rearm_data) = val;
        mbuf->pkt_len = len;
+       mbuf->data_len = len;
+       *(uint64_t *)(&mbuf->rearm_data) = val;
 
-       if (flag & NIX_RX_MULTI_SEG_F) {
+       if (flag & NIX_RX_MULTI_SEG_F)
                nix_cqe_xtract_mseg(rx, mbuf, val);
-       } else {
-               mbuf->data_len = len;
+       else
                mbuf->next = NULL;
-       }
 }
 
 static inline uint16_t
@@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
                vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
                vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
 
-               /* Update that no more segments */
-               mbuf0->next = NULL;
-               mbuf1->next = NULL;
-               mbuf2->next = NULL;
-               mbuf3->next = NULL;
-
                /* Store the mbufs to rx_pkts */
                vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
                vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
 
+               if (flags & NIX_RX_MULTI_SEG_F) {
+                       /* Multi segment is enable build mseg list for
+                        * individual mbufs in scalar mode.
+                        */
+                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+                                           (cq0 + CQE_SZ(0) + 8), mbuf0,
+                                           mbuf_initializer);
+                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+                                           (cq0 + CQE_SZ(1) + 8), mbuf1,
+                                           mbuf_initializer);
+                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+                                           (cq0 + CQE_SZ(2) + 8), mbuf2,
+                                           mbuf_initializer);
+                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+                                           (cq0 + CQE_SZ(3) + 8), mbuf3,
+                                           mbuf_initializer);
+               } else {
+                       /* Update that no more segments */
+                       mbuf0->next = NULL;
+                       mbuf1->next = NULL;
+                       mbuf2->next = NULL;
+                       mbuf3->next = NULL;
+               }
+
                /* Prefetch mbufs */
                roc_prefetch_store_keep(mbuf0);
                roc_prefetch_store_keep(mbuf1);
@@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss,     1, 1, 1, 1, 1, 1,                              \
                void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
                                                                               \
        uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name(      \
+               void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
+                                                                              \
+       uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
                void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
 
 NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
new file mode 100644 (file)
index 0000000..04d1e46
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_ethdev.h"
+#include "cn10k_rx.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
+       uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
+               void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts)      \
+       {                                                                      \
+               return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts,     \
+                                         (flags) | NIX_RX_MULTI_SEG_F);       \
+       }
+
+NIX_RX_FASTPATH_MODES
+#undef R
index 0acedd0..d293d4e 100644 (file)
@@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
                [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
                [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
                [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
+
+       rte_atomic_thread_fence(__ATOMIC_RELEASE);
 }
 
 void
@@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
 #undef R
        };
 
-       /* For PTP enabled, scalar rx function should be chosen as most of the
-        * PTP apps are implemented to rx burst 1 pkt.
-        */
-       if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
-               pick_rx_func(eth_dev, nix_eth_rx_burst);
-       else
-               pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
+       const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
+       [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name,
 
-       if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
-               pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+               NIX_RX_FASTPATH_MODES
+#undef R
+       };
 
        /* Copy multi seg version with no offload for tear down sequence */
        if (rte_eal_process_type() == RTE_PROC_PRIMARY)
                dev->rx_pkt_burst_no_offload =
                        nix_eth_rx_burst_mseg[0][0][0][0][0][0];
-       rte_mb();
+
+       /* For PTP enabled, scalar rx function should be chosen as most of the
+        * PTP apps are implemented to rx burst 1 pkt.
+        */
+       if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+               if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+                       return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+               return pick_rx_func(eth_dev, nix_eth_rx_burst);
+       }
+
+       if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+               return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
+       return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
 }
index 10ef5c6..5ae9e81 100644 (file)
@@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
 
        sg = *(const uint64_t *)(rx + 1);
        nb_segs = (sg >> 48) & 0x3;
-       mbuf->nb_segs = nb_segs;
+
+       if (nb_segs == 1) {
+               mbuf->next = NULL;
+               return;
+       }
+
+       mbuf->pkt_len = rx->pkt_lenm1 + 1;
        mbuf->data_len = sg & 0xFFFF;
+       mbuf->nb_segs = nb_segs;
        sg = sg >> 16;
 
        eol = ((const rte_iova_t *)(rx + 1) +
@@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
                        nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf);
 
        mbuf->ol_flags = ol_flags;
-       *(uint64_t *)(&mbuf->rearm_data) = val;
        mbuf->pkt_len = len;
+       mbuf->data_len = len;
+       *(uint64_t *)(&mbuf->rearm_data) = val;
 
-       if (flag & NIX_RX_MULTI_SEG_F) {
+       if (flag & NIX_RX_MULTI_SEG_F)
                nix_cqe_xtract_mseg(rx, mbuf, val);
-       } else {
-               mbuf->data_len = len;
+       else
                mbuf->next = NULL;
-       }
 }
 
 static inline uint16_t
@@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
                vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
                vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
 
-               /* Update that no more segments */
-               mbuf0->next = NULL;
-               mbuf1->next = NULL;
-               mbuf2->next = NULL;
-               mbuf3->next = NULL;
-
                /* Store the mbufs to rx_pkts */
                vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
                vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
 
+               if (flags & NIX_RX_MULTI_SEG_F) {
+                       /* Multi segment is enable build mseg list for
+                        * individual mbufs in scalar mode.
+                        */
+                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+                                           (cq0 + CQE_SZ(0) + 8), mbuf0,
+                                           mbuf_initializer);
+                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+                                           (cq0 + CQE_SZ(1) + 8), mbuf1,
+                                           mbuf_initializer);
+                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+                                           (cq0 + CQE_SZ(2) + 8), mbuf2,
+                                           mbuf_initializer);
+                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+                                           (cq0 + CQE_SZ(3) + 8), mbuf3,
+                                           mbuf_initializer);
+               } else {
+                       /* Update that no more segments */
+                       mbuf0->next = NULL;
+                       mbuf1->next = NULL;
+                       mbuf2->next = NULL;
+                       mbuf3->next = NULL;
+               }
+
                /* Prefetch mbufs */
                roc_prefetch_store_keep(mbuf0);
                roc_prefetch_store_keep(mbuf1);
@@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss,     1, 1, 1, 1, 1, 1,                              \
                void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
                                                                               \
        uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name(       \
+               void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
+                                                                              \
+       uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name(  \
                void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
 
 NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
new file mode 100644 (file)
index 0000000..e46d8a4
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_ethdev.h"
+#include "cn9k_rx.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
+       uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name(  \
+               void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts)      \
+       {                                                                      \
+               return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts,      \
+                                                (flags) |                     \
+                                                        NIX_RX_MULTI_SEG_F);  \
+       }
+
+NIX_RX_FASTPATH_MODES
+#undef R
index 7ae682a..8683554 100644 (file)
@@ -26,6 +26,7 @@ sources += files(
         'cn9k_rx.c',
         'cn9k_rx_mseg.c',
         'cn9k_rx_vec.c',
+        'cn9k_rx_vec_mseg.c',
         'cn9k_tx.c',
         'cn9k_tx_mseg.c',
         'cn9k_tx_vec.c',
@@ -37,6 +38,7 @@ sources += files(
         'cn10k_rx.c',
         'cn10k_rx_mseg.c',
         'cn10k_rx_vec.c',
+        'cn10k_rx_vec_mseg.c',
         'cn10k_tx.c',
         'cn10k_tx_mseg.c',
         'cn10k_tx_vec.c',