]> git.droids-corp.org - dpdk.git/commitdiff
net/cnxk: align prefetches to CN10K cache model
authorPavan Nikhilesh <pbhagavatula@marvell.com>
Thu, 24 Feb 2022 16:10:12 +0000 (21:40 +0530)
committerJerin Jacob <jerinj@marvell.com>
Thu, 24 Feb 2022 20:31:48 +0000 (21:31 +0100)
Align prefetches for CN10K cache model for vWQE in Rx and Tx.
Move mbuf->next NULL assignment to Tx path and enabled it only
when multi segments offload is enabled to reduce L1 pressure.
Add macros to detect corrupted mbuf->next values when
MEMPOOL_DEBUG is set.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Jerin Jacob <jerinj@marvell.com>
drivers/event/cnxk/cn10k_worker.h
drivers/net/cnxk/cn10k_rx.h
drivers/net/cnxk/cn10k_tx.h

index 0226896d44e6ce463a4189548807c9a8cf6dfed5..0d0685e28e1abf3713e8f788c6650ba3981dbc24 100644 (file)
@@ -118,23 +118,23 @@ cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags,
        uint64_t aura_handle, laddr;
        uint16_t nb_mbufs, non_vec;
        uint16_t lmt_id, d_off;
+       struct rte_mbuf **wqe;
        struct rte_mbuf *mbuf;
        uint8_t loff = 0;
        uint64_t sa_base;
-       uint64_t **wqe;
        int i;
 
        mbuf_init |= ((uint64_t)port_id) << 48;
        vec = (struct rte_event_vector *)vwqe;
-       wqe = vec->u64s;
+       wqe = vec->mbufs;
 
-       rte_prefetch_non_temporal(&vec->ptrs[0]);
+       rte_prefetch0(&vec->ptrs[0]);
 #define OBJS_PER_CLINE (RTE_CACHE_LINE_SIZE / sizeof(void *))
        for (i = OBJS_PER_CLINE; i < vec->nb_elem; i += OBJS_PER_CLINE)
-               rte_prefetch_non_temporal(&vec->ptrs[i]);
+               rte_prefetch0(&vec->ptrs[i]);
 
        nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
-       nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs,
+       nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, wqe, nb_mbufs,
                                              flags | NIX_RX_VWQE_F, lookup_mem,
                                              tstamp, lbase);
        wqe += nb_mbufs;
@@ -182,7 +182,7 @@ cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags,
                cn10k_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
                                        flags & NIX_RX_OFFLOAD_TSTAMP_F,
                                        (uint64_t *)tstamp_ptr);
-               wqe[0] = (uint64_t *)mbuf;
+               wqe[0] = (struct rte_mbuf *)mbuf;
                non_vec--;
                wqe++;
        }
@@ -620,6 +620,7 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
                                ev->sched_type, txq_data, flags);
                }
                rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+               rte_prefetch0(ws);
                return (meta & 0xFFFF);
        }
 
index 236a1dca6e4d410af9e7d62ca450985117bb3d18..de5e41483bd61b9091f266890a6940cdc264135c 100644 (file)
        (((f) & NIX_RX_VWQE_F) ?                                               \
                       (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) : \
                       (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o)))
+#define CQE_PTR_DIFF(b, i, o, f)                                               \
+       (((f) & NIX_RX_VWQE_F) ?                                               \
+                (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) - (o)) :       \
+                      (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) - (o)))
+
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+static inline void
+nix_mbuf_validate_next(struct rte_mbuf *m)
+{
+       if (m->nb_segs == 1 && m->next) {
+               rte_panic("mbuf->next[%p] valid when mbuf->nb_segs is %d",
+                       m->next, m->nb_segs);
+       }
+}
+#else
+static inline void
+nix_mbuf_validate_next(struct rte_mbuf *m)
+{
+       RTE_SET_USED(m);
+}
+#endif
 
 union mbuf_initializer {
        struct {
@@ -674,17 +695,66 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts,
                        cq0 = (uintptr_t)&mbufs[packets];
                }
 
-               /* Prefetch N desc ahead */
-               rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 4, 64, flags));
-               rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 5, 64, flags));
-               rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 6, 64, flags));
-               rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 7, 64, flags));
-
-               /* Get NIX_RX_SG_S for size and buffer pointer */
-               cq0_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 0, 64, flags));
-               cq1_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 1, 64, flags));
-               cq2_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 2, 64, flags));
-               cq3_w8 = vld1q_u64(CQE_PTR_OFF(cq0, 3, 64, flags));
+               if (flags & NIX_RX_VWQE_F) {
+                       if (pkts - packets > 4) {
+                               rte_prefetch_non_temporal(CQE_PTR_OFF(cq0,
+                                       4, 0, flags));
+                               rte_prefetch_non_temporal(CQE_PTR_OFF(cq0,
+                                       5, 0, flags));
+                               rte_prefetch_non_temporal(CQE_PTR_OFF(cq0,
+                                       6, 0, flags));
+                               rte_prefetch_non_temporal(CQE_PTR_OFF(cq0,
+                                       7, 0, flags));
+
+                               if (likely(pkts - packets > 8)) {
+                                       rte_prefetch1(CQE_PTR_OFF(cq0,
+                                               8, 0, flags));
+                                       rte_prefetch1(CQE_PTR_OFF(cq0,
+                                               9, 0, flags));
+                                       rte_prefetch1(CQE_PTR_OFF(cq0,
+                                               10, 0, flags));
+                                       rte_prefetch1(CQE_PTR_OFF(cq0,
+                                               11, 0, flags));
+                                       if (pkts - packets > 12) {
+                                               rte_prefetch1(CQE_PTR_OFF(cq0,
+                                                       12, 0, flags));
+                                               rte_prefetch1(CQE_PTR_OFF(cq0,
+                                                       13, 0, flags));
+                                               rte_prefetch1(CQE_PTR_OFF(cq0,
+                                                       14, 0, flags));
+                                               rte_prefetch1(CQE_PTR_OFF(cq0,
+                                                       15, 0, flags));
+                                       }
+                               }
+
+                               rte_prefetch0(CQE_PTR_DIFF(cq0,
+                                       4, RTE_PKTMBUF_HEADROOM, flags));
+                               rte_prefetch0(CQE_PTR_DIFF(cq0,
+                                       5, RTE_PKTMBUF_HEADROOM, flags));
+                               rte_prefetch0(CQE_PTR_DIFF(cq0,
+                                       6, RTE_PKTMBUF_HEADROOM, flags));
+                               rte_prefetch0(CQE_PTR_DIFF(cq0,
+                                       7, RTE_PKTMBUF_HEADROOM, flags));
+
+                               if (likely(pkts - packets > 8)) {
+                                       rte_prefetch0(CQE_PTR_DIFF(cq0,
+                                               8, RTE_PKTMBUF_HEADROOM, flags));
+                                       rte_prefetch0(CQE_PTR_DIFF(cq0,
+                                               9, RTE_PKTMBUF_HEADROOM, flags));
+                                       rte_prefetch0(CQE_PTR_DIFF(cq0,
+                                               10, RTE_PKTMBUF_HEADROOM, flags));
+                                       rte_prefetch0(CQE_PTR_DIFF(cq0,
+                                               11, RTE_PKTMBUF_HEADROOM, flags));
+                               }
+                       }
+               } else {
+                       if (pkts - packets > 4) {
+                               rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 4, 64, flags));
+                               rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 5, 64, flags));
+                               rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 6, 64, flags));
+                               rte_prefetch_non_temporal(CQE_PTR_OFF(cq0, 7, 64, flags));
+                       }
+               }
 
                if (!(flags & NIX_RX_VWQE_F)) {
                        /* Get NIX_RX_SG_S for size and buffer pointer */
@@ -995,19 +1065,18 @@ cn10k_nix_recv_pkts_vector(void *args, struct rte_mbuf **mbufs, uint16_t pkts,
                        nix_cqe_xtract_mseg((union nix_rx_parse_u *)
                                            (CQE_PTR_OFF(cq0, 3, 8, flags)),
                                            mbuf3, mbuf_initializer, flags);
-               } else {
-                       /* Update that no more segments */
-                       mbuf0->next = NULL;
-                       mbuf1->next = NULL;
-                       mbuf2->next = NULL;
-                       mbuf3->next = NULL;
                }
 
-               /* Prefetch mbufs */
-               roc_prefetch_store_keep(mbuf0);
-               roc_prefetch_store_keep(mbuf1);
-               roc_prefetch_store_keep(mbuf2);
-               roc_prefetch_store_keep(mbuf3);
+               /* Mark mempool obj as "get" as it is alloc'ed by NIX */
+               RTE_MEMPOOL_CHECK_COOKIES(mbuf0->pool, (void **)&mbuf0, 1, 1);
+               RTE_MEMPOOL_CHECK_COOKIES(mbuf1->pool, (void **)&mbuf1, 1, 1);
+               RTE_MEMPOOL_CHECK_COOKIES(mbuf2->pool, (void **)&mbuf2, 1, 1);
+               RTE_MEMPOOL_CHECK_COOKIES(mbuf3->pool, (void **)&mbuf3, 1, 1);
+
+               nix_mbuf_validate_next(mbuf0);
+               nix_mbuf_validate_next(mbuf1);
+               nix_mbuf_validate_next(mbuf2);
+               nix_mbuf_validate_next(mbuf3);
 
                packets += NIX_DESCS_PER_LOOP;
 
index ec6366168c588fdc6b23a569cbaf39ebac154e02..695e3ed35409ab21914374cf3d5fd4f5c7341fa5 100644 (file)
@@ -2569,6 +2569,13 @@ again:
                        lnum += 1;
                }
 
+               if (flags & NIX_TX_MULTI_SEG_F) {
+                       tx_pkts[0]->next = NULL;
+                       tx_pkts[1]->next = NULL;
+                       tx_pkts[2]->next = NULL;
+                       tx_pkts[3]->next = NULL;
+               }
+
                tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
        }