event/octeontx2: enhance Tx path cache locality
authorPavan Nikhilesh <pbhagavatula@marvell.com>
Tue, 12 Jan 2021 08:39:38 +0000 (14:09 +0530)
committerJerin Jacob <jerinj@marvell.com>
Tue, 26 Jan 2021 09:39:03 +0000 (10:39 +0100)
Enhance Tx path cache locality, remove current tag type and group
stores from datapath to conserve store buffers.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
drivers/event/octeontx2/otx2_evdev.c
drivers/event/octeontx2/otx2_evdev.h
drivers/event/octeontx2/otx2_worker.c
drivers/event/octeontx2/otx2_worker.h
drivers/event/octeontx2/otx2_worker_dual.c
drivers/event/octeontx2/otx2_worker_dual.h
drivers/net/octeontx2/otx2_ethdev_sec_tx.h

index 0fe014c..80a786f 100644 (file)
@@ -833,10 +833,12 @@ sso_configure_dual_ports(const struct rte_eventdev *event_dev)
                ws->port = i;
                base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12);
                sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[0], base);
+               ws->base[0] = base;
                vws++;
 
                base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12);
                sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[1], base);
+               ws->base[1] = base;
                vws++;
 
                gws_cookie = ssogws_get_cookie(ws);
@@ -909,6 +911,7 @@ sso_configure_ports(const struct rte_eventdev *event_dev)
                ws->port = i;
                base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | i << 12);
                sso_set_port_ops(ws, base);
+               ws->base = base;
 
                gws_cookie = ssogws_get_cookie(ws);
                gws_cookie->event_dev = event_dev;
@@ -1449,18 +1452,12 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
                        ssogws_reset((struct otx2_ssogws *)&ws->ws_state[1]);
                        ws->swtag_req = 0;
                        ws->vws = 0;
-                       ws->ws_state[0].cur_grp = 0;
-                       ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY;
-                       ws->ws_state[1].cur_grp = 0;
-                       ws->ws_state[1].cur_tt = SSO_SYNC_EMPTY;
                } else {
                        struct otx2_ssogws *ws;
 
                        ws = event_dev->data->ports[i];
                        ssogws_reset(ws);
                        ws->swtag_req = 0;
-                       ws->cur_grp = 0;
-                       ws->cur_tt = SSO_SYNC_EMPTY;
                }
        }
 
@@ -1479,8 +1476,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
                        otx2_write64(enable, ws->grps_base[i] +
                                     SSO_LF_GGRP_QCTL);
                }
-               ws->ws_state[0].cur_grp = 0;
-               ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY;
        } else {
                struct otx2_ssogws *ws = event_dev->data->ports[0];
 
@@ -1492,8 +1487,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable)
                        otx2_write64(enable, ws->grps_base[i] +
                                     SSO_LF_GGRP_QCTL);
                }
-               ws->cur_grp = 0;
-               ws->cur_tt = SSO_SYNC_EMPTY;
        }
 
        /* reset SSO GWS cache */
index 0513cb8..ed9cbc8 100644 (file)
@@ -80,6 +80,7 @@
 
 #define OTX2_SSOW_GET_BASE_ADDR(_GW)        ((_GW) - SSOW_LF_GWS_OP_GET_WORK)
 #define OTX2_SSOW_TT_FROM_TAG(x)           (((x) >> 32) & SSO_TT_EMPTY)
+#define OTX2_SSOW_GRP_FROM_TAG(x)          (((x) >> 36) & 0x3ff)
 
 #define NSEC2USEC(__ns)                        ((__ns) / 1E3)
 #define USEC2NSEC(__us)                 ((__us) * 1E3)
@@ -169,25 +170,24 @@ struct otx2_sso_evdev {
        uintptr_t wqp_op;                                                      \
        uintptr_t swtag_flush_op;                                              \
        uintptr_t swtag_norm_op;                                               \
-       uintptr_t swtag_desched_op;                                            \
-       uint8_t cur_tt;                                                        \
-       uint8_t cur_grp
+       uintptr_t swtag_desched_op;
 
 /* Event port aka GWS */
 struct otx2_ssogws {
        /* Get Work Fastpath data */
        OTX2_SSOGWS_OPS;
-       uint8_t swtag_req;
+       /* PTP timestamp */
+       struct otx2_timesync_info *tstamp;
        void *lookup_mem;
+       uint8_t swtag_req;
        uint8_t port;
        /* Add Work Fastpath data */
        uint64_t xaq_lmt __rte_cache_aligned;
        uint64_t *fc_mem;
        uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
-       /* PTP timestamp */
-       struct otx2_timesync_info *tstamp;
        /* Tx Fastpath data */
-       uint8_t tx_adptr_data[] __rte_cache_aligned;
+       uint64_t base __rte_cache_aligned;
+       uint8_t tx_adptr_data[];
 } __rte_cache_aligned;
 
 struct otx2_ssogws_state {
@@ -197,18 +197,19 @@ struct otx2_ssogws_state {
 struct otx2_ssogws_dual {
        /* Get Work Fastpath data */
        struct otx2_ssogws_state ws_state[2]; /* Ping and Pong */
+       /* PTP timestamp */
+       struct otx2_timesync_info *tstamp;
+       void *lookup_mem;
        uint8_t swtag_req;
        uint8_t vws; /* Ping pong bit */
-       void *lookup_mem;
        uint8_t port;
        /* Add Work Fastpath data */
        uint64_t xaq_lmt __rte_cache_aligned;
        uint64_t *fc_mem;
        uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
-       /* PTP timestamp */
-       struct otx2_timesync_info *tstamp;
        /* Tx Fastpath data */
-       uint8_t tx_adptr_data[] __rte_cache_aligned;
+       uint64_t base[2] __rte_cache_aligned;
+       uint8_t tx_adptr_data[];
 } __rte_cache_aligned;
 
 static inline struct otx2_sso_evdev *
index b098407..95139d2 100644 (file)
@@ -25,7 +25,7 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev)
 {
        const uint32_t tag = (uint32_t)ev->event;
        const uint8_t new_tt = ev->sched_type;
-       const uint8_t cur_tt = ws->cur_tt;
+       const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op));
 
        /* 96XX model
         * cur_tt/new_tt     SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED
@@ -64,7 +64,7 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev)
        const uint8_t grp = ev->queue_id;
 
        /* Group hasn't changed, Use SWTAG to forward the event */
-       if (ws->cur_grp == grp)
+       if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(ws->tag_op)) == grp)
                otx2_ssogws_fwd_swtag(ws, ev);
        else
        /*
@@ -75,12 +75,6 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev)
                otx2_ssogws_fwd_group(ws, ev, grp);
 }
 
-static __rte_always_inline void
-otx2_ssogws_release_event(struct otx2_ssogws *ws)
-{
-       otx2_ssogws_swtag_flush(ws);
-}
-
 #define R(name, f6, f5, f4, f3, f2, f1, f0, flags)                     \
 uint16_t __rte_hot                                                             \
 otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev,              \
@@ -221,7 +215,7 @@ otx2_ssogws_enq(void *port, const struct rte_event *ev)
                otx2_ssogws_forward_event(ws, ev);
                break;
        case RTE_EVENT_OP_RELEASE:
-               otx2_ssogws_release_event(ws);
+               otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op);
                break;
        default:
                return 0;
@@ -274,14 +268,13 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],      \
 {                                                                      \
        struct otx2_ssogws *ws = port;                                  \
        uint64_t cmd[sz];                                               \
-       int i;                                                          \
                                                                        \
-       for (i = 0; i < nb_events; i++)                                 \
-               otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t   \
+       RTE_SET_USED(nb_events);                                        \
+       return otx2_ssogws_event_tx(ws->base, &ev[0], cmd,              \
+                                   (const uint64_t                     \
                                    (*)[RTE_MAX_QUEUES_PER_PORT])       \
                                    &ws->tx_adptr_data,                 \
                                    flags);                             \
-       return nb_events;                                               \
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
@@ -293,14 +286,13 @@ otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\
 {                                                                      \
        uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];                 \
        struct otx2_ssogws *ws = port;                                  \
-       int i;                                                          \
                                                                        \
-       for (i = 0; i < nb_events; i++)                                 \
-               otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t   \
+       RTE_SET_USED(nb_events);                                        \
+       return otx2_ssogws_event_tx(ws->base, &ev[0], cmd,              \
+                                   (const uint64_t                     \
                                    (*)[RTE_MAX_QUEUES_PER_PORT])       \
                                    &ws->tx_adptr_data,                 \
                                    (flags) | NIX_TX_MULTI_SEG_F);      \
-       return nb_events;                                               \
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
@@ -335,7 +327,7 @@ ssogws_flush_events(struct otx2_ssogws *ws, uint8_t queue_id, uintptr_t base,
                if (fn != NULL && ev.u64 != 0)
                        fn(arg, ev);
                if (ev.sched_type != SSO_TT_EMPTY)
-                       otx2_ssogws_swtag_flush(ws);
+                       otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op);
                rte_mb();
                aq_cnt = otx2_read64(base + SSO_LF_GGRP_AQ_CNT);
                ds_cnt = otx2_read64(base + SSO_LF_GGRP_MISC_CNT);
index 0a7d667..2b716c0 100644 (file)
@@ -64,8 +64,6 @@ otx2_ssogws_get_work(struct otx2_ssogws *ws, struct rte_event *ev,
        event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
                (event.get_work0 & (0x3FFull << 36)) << 4 |
                (event.get_work0 & 0xffffffff);
-       ws->cur_tt = event.sched_type;
-       ws->cur_grp = event.queue_id;
 
        if (event.sched_type != SSO_TT_EMPTY) {
                if ((flags & NIX_RX_OFFLOAD_SECURITY_F) &&
@@ -136,8 +134,6 @@ otx2_ssogws_get_work_empty(struct otx2_ssogws *ws, struct rte_event *ev,
        event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
                (event.get_work0 & (0x3FFull << 36)) << 4 |
                (event.get_work0 & 0xffffffff);
-       ws->cur_tt = event.sched_type;
-       ws->cur_grp = event.queue_id;
 
        if (event.sched_type != SSO_TT_EMPTY &&
            event.event_type == RTE_EVENT_TYPE_ETHDEV) {
@@ -192,18 +188,14 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws)
 {
        otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) +
                     SSOW_LF_GWS_OP_SWTAG_UNTAG);
-       ws->cur_tt = SSO_SYNC_UNTAGGED;
 }
 
 static __rte_always_inline void
-otx2_ssogws_swtag_flush(struct otx2_ssogws *ws)
+otx2_ssogws_swtag_flush(uint64_t tag_op, uint64_t flush_op)
 {
-       if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)) == SSO_TT_EMPTY) {
-               ws->cur_tt = SSO_SYNC_EMPTY;
+       if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(tag_op)) == SSO_TT_EMPTY)
                return;
-       }
-       otx2_write64(0, ws->swtag_flush_op);
-       ws->cur_tt = SSO_SYNC_EMPTY;
+       otx2_write64(0, flush_op);
 }
 
 static __rte_always_inline void
@@ -236,7 +228,7 @@ otx2_ssogws_swtag_wait(struct otx2_ssogws *ws)
 }
 
 static __rte_always_inline void
-otx2_ssogws_head_wait(struct otx2_ssogws *ws)
+otx2_ssogws_head_wait(uint64_t tag_op)
 {
 #ifdef RTE_ARCH_ARM64
        uint64_t tag;
@@ -250,11 +242,11 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws)
                        "       tbz %[tag], 35, rty%=           \n"
                        "done%=:                                \n"
                        : [tag] "=&r" (tag)
-                       : [tag_op] "r" (ws->tag_op)
+                       : [tag_op] "r" (tag_op)
                        );
 #else
        /* Wait for the HEAD to be set */
-       while (!(otx2_read64(ws->tag_op) & BIT_ULL(35)))
+       while (!(otx2_read64(tag_op) & BIT_ULL(35)))
                ;
 #endif
 }
@@ -276,8 +268,7 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m,
 }
 
 static __rte_always_inline uint16_t
-otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
-                    uint64_t *cmd,
+otx2_ssogws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
                     const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
                     const uint32_t flags)
 {
@@ -288,7 +279,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
        if ((flags & NIX_TX_OFFLOAD_SECURITY_F) &&
            (m->ol_flags & PKT_TX_SEC_OFFLOAD)) {
                txq = otx2_ssogws_xtract_meta(m, txq_data);
-               return otx2_sec_event_tx(ws, ev, m, txq, flags);
+               return otx2_sec_event_tx(base, ev, m, txq, flags);
        }
 
        /* Perform header writes before barrier for TSO */
@@ -309,7 +300,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
                                             m->ol_flags, segdw, flags);
                if (!ev->sched_type) {
                        otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
-                       otx2_ssogws_head_wait(ws);
+                       otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
                        if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
                                otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr,
                                                       txq->io_addr, segdw);
@@ -324,7 +315,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
 
                if (!ev->sched_type) {
                        otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
-                       otx2_ssogws_head_wait(ws);
+                       otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
                        if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
                                otx2_nix_xmit_one(cmd, txq->lmt_addr,
                                                  txq->io_addr, flags);
@@ -339,7 +330,8 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
                        return 1;
        }
 
-       otx2_ssogws_swtag_flush(ws);
+       otx2_ssogws_swtag_flush(base + SSOW_LF_GWS_TAG,
+                               base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
 
        return 1;
 }
index 946488e..81af4ca 100644 (file)
@@ -26,9 +26,9 @@ static __rte_always_inline void
 otx2_ssogws_dual_fwd_swtag(struct otx2_ssogws_state *ws,
                           const struct rte_event *ev)
 {
+       const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op));
        const uint32_t tag = (uint32_t)ev->event;
        const uint8_t new_tt = ev->sched_type;
-       const uint8_t cur_tt = ws->cur_tt;
 
        /* 96XX model
         * cur_tt/new_tt     SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED
@@ -66,15 +66,15 @@ otx2_ssogws_dual_forward_event(struct otx2_ssogws_dual *ws,
        const uint8_t grp = ev->queue_id;
 
        /* Group hasn't changed, Use SWTAG to forward the event */
-       if (vws->cur_grp == grp) {
+       if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(vws->tag_op)) == grp) {
                otx2_ssogws_dual_fwd_swtag(vws, ev);
                ws->swtag_req = 1;
        } else {
-       /*
-        * Group has been changed for group based work pipelining,
-        * Use deschedule/add_work operation to transfer the event to
-        * new group/core
-        */
+               /*
+                * Group has been changed for group based work pipelining,
+                * Use deschedule/add_work operation to transfer the event to
+                * new group/core
+                */
                otx2_ssogws_dual_fwd_group(vws, ev, grp);
        }
 }
@@ -93,7 +93,7 @@ otx2_ssogws_dual_enq(void *port, const struct rte_event *ev)
                otx2_ssogws_dual_forward_event(ws, vws, ev);
                break;
        case RTE_EVENT_OP_RELEASE:
-               otx2_ssogws_swtag_flush((struct otx2_ssogws *)vws);
+               otx2_ssogws_swtag_flush(vws->tag_op, vws->swtag_flush_op);
                break;
        default:
                return 0;
@@ -314,15 +314,13 @@ otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port,                        \
                                       uint16_t nb_events)              \
 {                                                                      \
        struct otx2_ssogws_dual *ws = port;                             \
-       struct otx2_ssogws *vws =                                       \
-               (struct otx2_ssogws *)&ws->ws_state[!ws->vws];          \
        uint64_t cmd[sz];                                               \
                                                                        \
        RTE_SET_USED(nb_events);                                        \
-       return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t       \
-                                   (*)[RTE_MAX_QUEUES_PER_PORT])       \
-                                   ws->tx_adptr_data,                  \
-                                   flags);                             \
+       return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0],         \
+                                         cmd, (const uint64_t          \
+                                         (*)[RTE_MAX_QUEUES_PER_PORT]) \
+                                         &ws->tx_adptr_data, flags);   \
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
@@ -333,16 +331,15 @@ otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port,                    \
                                           struct rte_event ev[],       \
                                           uint16_t nb_events)          \
 {                                                                      \
-       struct otx2_ssogws_dual *ws = port;                             \
-       struct otx2_ssogws *vws =                                       \
-               (struct otx2_ssogws *)&ws->ws_state[!ws->vws];          \
        uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];                 \
+       struct otx2_ssogws_dual *ws = port;                             \
                                                                        \
        RTE_SET_USED(nb_events);                                        \
-       return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t       \
-                                   (*)[RTE_MAX_QUEUES_PER_PORT])       \
-                                   ws->tx_adptr_data,                  \
-                                   (flags) | NIX_TX_MULTI_SEG_F);      \
+       return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0],         \
+                                         cmd, (const uint64_t          \
+                                         (*)[RTE_MAX_QUEUES_PER_PORT]) \
+                                         &ws->tx_adptr_data,           \
+                                         (flags) | NIX_TX_MULTI_SEG_F);\
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
index 6e60618..72b6164 100644 (file)
@@ -61,8 +61,6 @@ otx2_ssogws_dual_get_work(struct otx2_ssogws_state *ws,
        event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
                (event.get_work0 & (0x3FFull << 36)) << 4 |
                (event.get_work0 & 0xffffffff);
-       ws->cur_tt = event.sched_type;
-       ws->cur_grp = event.queue_id;
 
        if (event.sched_type != SSO_TT_EMPTY) {
                if ((flags & NIX_RX_OFFLOAD_SECURITY_F) &&
index 284bcd5..c8eae3d 100644 (file)
@@ -30,12 +30,11 @@ otx2_ipsec_fp_out_rlen_get(struct otx2_sec_session_ipsec_ip *sess,
 }
 
 static __rte_always_inline void
-otx2_ssogws_head_wait(struct otx2_ssogws *ws);
+otx2_ssogws_head_wait(uint64_t base);
 
 static __rte_always_inline int
-otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
-                 struct rte_mbuf *m, const struct otx2_eth_txq *txq,
-                 const uint32_t offload_flags)
+otx2_sec_event_tx(uint64_t base, struct rte_event *ev, struct rte_mbuf *m,
+                 const struct otx2_eth_txq *txq, const uint32_t offload_flags)
 {
        uint32_t dlen, rlen, desc_headroom, extend_head, extend_tail;
        struct otx2_sec_session_ipsec_ip *sess;
@@ -149,7 +148,7 @@ otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev,
        __mempool_check_cookies(m->pool, (void **)&m, 1, 0);
 
        if (!ev->sched_type)
-               otx2_ssogws_head_wait(ws);
+               otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG);
 
        inst.param1 = sess->esn_hi >> 16;
        inst.param2 = sess->esn_hi & 0xffff;