net/cnxk: make inline inbound device usage as default
[dpdk.git] / drivers / event / cnxk / cn10k_worker.h
index ada230e..c96048f 100644 (file)
@@ -92,12 +92,16 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
 }
 
 static __rte_always_inline void
-cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t __mbuf, uint8_t port_id,
                  const uint32_t tag, const uint32_t flags,
                  const void *const lookup_mem)
 {
        const uint64_t mbuf_init = 0x100010000ULL | RTE_PKTMBUF_HEADROOM |
                                   (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
+       struct rte_mbuf *mbuf = (struct rte_mbuf *)__mbuf;
+
+       /* Mark mempool obj as "get" as it is alloc'ed by NIX */
+       RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
 
        cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
                              (struct rte_mbuf *)mbuf, lookup_mem,
@@ -114,17 +118,23 @@ cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags,
        uint64_t aura_handle, laddr;
        uint16_t nb_mbufs, non_vec;
        uint16_t lmt_id, d_off;
+       struct rte_mbuf **wqe;
        struct rte_mbuf *mbuf;
        uint8_t loff = 0;
        uint64_t sa_base;
-       uint64_t **wqe;
+       int i;
 
        mbuf_init |= ((uint64_t)port_id) << 48;
        vec = (struct rte_event_vector *)vwqe;
-       wqe = vec->u64s;
+       wqe = vec->mbufs;
+
+       rte_prefetch0(&vec->ptrs[0]);
+#define OBJS_PER_CLINE (RTE_CACHE_LINE_SIZE / sizeof(void *))
+       for (i = OBJS_PER_CLINE; i < vec->nb_elem; i += OBJS_PER_CLINE)
+               rte_prefetch0(&vec->ptrs[i]);
 
        nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
-       nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, vec->mbufs, nb_mbufs,
+       nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init, wqe, nb_mbufs,
                                              flags | NIX_RX_VWQE_F, lookup_mem,
                                              tstamp, lbase);
        wqe += nb_mbufs;
@@ -153,12 +163,17 @@ cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags,
                mbuf = (struct rte_mbuf *)((char *)cqe -
                                           sizeof(struct rte_mbuf));
 
+               /* Mark mempool obj as "get" as it is alloc'ed by NIX */
+               RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
+
                /* Translate meta to mbuf */
                if (flags & NIX_RX_OFFLOAD_SECURITY_F) {
                        const uint64_t cq_w1 = *((const uint64_t *)cqe + 1);
+                       const uint64_t cq_w5 = *((const uint64_t *)cqe + 5);
 
-                       mbuf = nix_sec_meta_to_mbuf_sc(cq_w1, sa_base, laddr,
-                                                      &loff, mbuf, d_off);
+                       mbuf = nix_sec_meta_to_mbuf_sc(cq_w1, cq_w5, sa_base, laddr,
+                                                      &loff, mbuf, d_off,
+                                                      flags, mbuf_init);
                }
 
                cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem,
@@ -166,11 +181,10 @@ cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags,
                /* Extracting tstamp, if PTP enabled*/
                tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)cqe) +
                                           CNXK_SSO_WQE_SG_PTR);
-               cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
+               cn10k_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp,
                                        flags & NIX_RX_OFFLOAD_TSTAMP_F,
-                                       flags & NIX_RX_MULTI_SEG_F,
                                        (uint64_t *)tstamp_ptr);
-               wqe[0] = (uint64_t *)mbuf;
+               wqe[0] = (struct rte_mbuf *)mbuf;
                non_vec--;
                wqe++;
        }
@@ -191,15 +205,13 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
                uint64_t u64[2];
        } gw;
        uint64_t tstamp_ptr;
-       uint64_t mbuf;
 
        gw.get_work = ws->gw_wdata;
 #if defined(RTE_ARCH_ARM64) && !defined(__clang__)
        asm volatile(
                PLT_CPU_FEATURE_PREAMBLE
-               "caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
-               "sub %[mbuf], %H[wdata], #0x80                          \n"
-               : [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf)
+               "caspal %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
+               : [wdata] "+r"(gw.get_work)
                : [gw_loc] "r"(ws->base + SSOW_LF_GWS_OP_GET_WORK0)
                : "memory");
 #else
@@ -208,14 +220,12 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
                roc_load_pair(gw.u64[0], gw.u64[1],
                              ws->base + SSOW_LF_GWS_WQE0);
        } while (gw.u64[0] & BIT_ULL(63));
-       mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
 #endif
        ws->gw_rdata = gw.u64[0];
-       gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
-                   (gw.u64[0] & (0x3FFull << 36)) << 4 |
-                   (gw.u64[0] & 0xffffffff);
-
-       if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+       if (gw.u64[1]) {
+               gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
+                           (gw.u64[0] & (0x3FFull << 36)) << 4 |
+                           (gw.u64[0] & 0xffffffff);
                if ((flags & CPT_RX_WQE_F) &&
                    (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
                     RTE_EVENT_TYPE_CRYPTODEV)) {
@@ -223,28 +233,37 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
                } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
                           RTE_EVENT_TYPE_ETHDEV) {
                        uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+                       uint64_t mbuf;
 
+                       mbuf = gw.u64[1] - sizeof(struct rte_mbuf);
+                       rte_prefetch0((void *)mbuf);
                        if (flags & NIX_RX_OFFLOAD_SECURITY_F) {
+                               const uint64_t mbuf_init = 0x100010000ULL |
+                                       RTE_PKTMBUF_HEADROOM |
+                                       (flags & NIX_RX_OFFLOAD_TSTAMP_F ? 8 : 0);
                                struct rte_mbuf *m;
                                uintptr_t sa_base;
                                uint64_t iova = 0;
                                uint8_t loff = 0;
                                uint16_t d_off;
                                uint64_t cq_w1;
+                               uint64_t cq_w5;
 
                                m = (struct rte_mbuf *)mbuf;
                                d_off = (uintptr_t)(m->buf_addr) - (uintptr_t)m;
                                d_off += RTE_PKTMBUF_HEADROOM;
 
                                cq_w1 = *(uint64_t *)(gw.u64[1] + 8);
+                               cq_w5 = *(uint64_t *)(gw.u64[1] + 40);
 
                                sa_base =
                                        cnxk_nix_sa_base_get(port, lookup_mem);
                                sa_base &= ~(ROC_NIX_INL_SA_BASE_ALIGN - 1);
 
                                mbuf = (uint64_t)nix_sec_meta_to_mbuf_sc(
-                                       cq_w1, sa_base, (uintptr_t)&iova, &loff,
-                                       (struct rte_mbuf *)mbuf, d_off);
+                                       cq_w1, cq_w5, sa_base, (uintptr_t)&iova, &loff,
+                                       (struct rte_mbuf *)mbuf, d_off, flags,
+                                       mbuf_init | ((uint64_t)port) << 48);
                                if (loff)
                                        roc_npa_aura_op_free(m->pool->pool_id,
                                                             0, iova);
@@ -258,10 +277,9 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
                        tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
                                                            gw.u64[1]) +
                                                   CNXK_SSO_WQE_SG_PTR);
-                       cnxk_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
+                       cn10k_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
                                                ws->tstamp,
                                                flags & NIX_RX_OFFLOAD_TSTAMP_F,
-                                               flags & NIX_RX_MULTI_SEG_F,
                                                (uint64_t *)tstamp_ptr);
                        gw.u64[1] = mbuf;
                } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
@@ -275,6 +293,10 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
                        *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr;
                        cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem,
                                           ws->tstamp, ws->lmt_base);
+                       /* Mark vector mempool object as get */
+                       RTE_MEMPOOL_CHECK_COOKIES(
+                               rte_mempool_from_obj((void *)gw.u64[1]),
+                               (void **)&gw.u64[1], 1, 1);
                }
        }
 
@@ -391,6 +413,46 @@ uint16_t __rte_hot cn10k_sso_hws_ca_enq(void *port, struct rte_event ev[],
        uint16_t __rte_hot cn10k_sso_hws_deq_tmo_ca_seg_##name(                \
                void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
        uint16_t __rte_hot cn10k_sso_hws_deq_tmo_ca_seg_burst_##name(          \
+               void *port, struct rte_event ev[], uint16_t nb_events,         \
+               uint64_t timeout_ticks);                                       \
+       uint16_t __rte_hot cn10k_sso_hws_reas_deq_##name(                      \
+               void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+       uint16_t __rte_hot cn10k_sso_hws_reas_deq_burst_##name(                \
+               void *port, struct rte_event ev[], uint16_t nb_events,         \
+               uint64_t timeout_ticks);                                       \
+       uint16_t __rte_hot cn10k_sso_hws_reas_deq_tmo_##name(                  \
+               void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+       uint16_t __rte_hot cn10k_sso_hws_reas_deq_tmo_burst_##name(            \
+               void *port, struct rte_event ev[], uint16_t nb_events,         \
+               uint64_t timeout_ticks);                                       \
+       uint16_t __rte_hot cn10k_sso_hws_reas_deq_ca_##name(                   \
+               void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+       uint16_t __rte_hot cn10k_sso_hws_reas_deq_ca_burst_##name(             \
+               void *port, struct rte_event ev[], uint16_t nb_events,         \
+               uint64_t timeout_ticks);                                       \
+       uint16_t __rte_hot cn10k_sso_hws_reas_deq_tmo_ca_##name(               \
+               void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+       uint16_t __rte_hot cn10k_sso_hws_reas_deq_tmo_ca_burst_##name(         \
+               void *port, struct rte_event ev[], uint16_t nb_events,         \
+               uint64_t timeout_ticks);                                       \
+       uint16_t __rte_hot cn10k_sso_hws_reas_deq_seg_##name(                  \
+               void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+       uint16_t __rte_hot cn10k_sso_hws_reas_deq_seg_burst_##name(            \
+               void *port, struct rte_event ev[], uint16_t nb_events,         \
+               uint64_t timeout_ticks);                                       \
+       uint16_t __rte_hot cn10k_sso_hws_reas_deq_tmo_seg_##name(              \
+               void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+       uint16_t __rte_hot cn10k_sso_hws_reas_deq_tmo_seg_burst_##name(        \
+               void *port, struct rte_event ev[], uint16_t nb_events,         \
+               uint64_t timeout_ticks);                                       \
+       uint16_t __rte_hot cn10k_sso_hws_reas_deq_ca_seg_##name(               \
+               void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+       uint16_t __rte_hot cn10k_sso_hws_reas_deq_ca_seg_burst_##name(         \
+               void *port, struct rte_event ev[], uint16_t nb_events,         \
+               uint64_t timeout_ticks);                                       \
+       uint16_t __rte_hot cn10k_sso_hws_reas_deq_tmo_ca_seg_##name(           \
+               void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+       uint16_t __rte_hot cn10k_sso_hws_reas_deq_tmo_ca_seg_burst_##name(     \
                void *port, struct rte_event ev[], uint16_t nb_events,         \
                uint64_t timeout_ticks);
 
@@ -465,6 +527,14 @@ cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t *txq_data)
                           (BIT_ULL(48) - 1));
 }
 
+static __rte_always_inline void
+cn10k_sso_txq_fc_wait(const struct cn10k_eth_txq *txq)
+{
+       while ((uint64_t)txq->nb_sqb_bufs_adj <=
+              __atomic_load_n(txq->fc_mem, __ATOMIC_RELAXED))
+               ;
+}
+
 static __rte_always_inline void
 cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
                 uint16_t lmt_id, uintptr_t lmt_addr, uint8_t sched_type,
@@ -485,7 +555,8 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
        if (flags & NIX_TX_OFFLOAD_TSO_F)
                cn10k_nix_xmit_prepare_tso(m, flags);
 
-       cn10k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt, &sec);
+       cn10k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt, &sec,
+                              txq->mark_flag, txq->mark_fmt);
 
        laddr = lmt_addr;
        /* Prepare CPT instruction and get nixtx addr if
@@ -512,6 +583,7 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
        if (!CNXK_TAG_IS_HEAD(ws->gw_rdata) && !sched_type)
                ws->gw_rdata = roc_sso_hws_head_wait(ws->base);
 
+       cn10k_sso_txq_fc_wait(txq);
        roc_lmt_submit_steorl(lmt_id, pa);
 }
 
@@ -572,7 +644,6 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
        struct cn10k_eth_txq *txq;
        struct rte_mbuf *m;
        uintptr_t lmt_addr;
-       uint16_t ref_cnt;
        uint16_t lmt_id;
 
        lmt_addr = ws->lmt_base;
@@ -598,21 +669,14 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
                                ev->sched_type, txq_data, flags);
                }
                rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
+               rte_prefetch0(ws);
                return (meta & 0xFFFF);
        }
 
        m = ev->mbuf;
-       ref_cnt = m->refcnt;
        cn10k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data,
                         flags);
 
-       if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
-               if (ref_cnt > 1)
-                       return 1;
-       }
-
-       cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_TAG,
-                                ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
        return 1;
 }