From fd7a6adf8af368153ef17df110e4bf995e3b3e8a Mon Sep 17 00:00:00 2001 From: Pavan Nikhilesh Date: Tue, 12 Jan 2021 14:09:38 +0530 Subject: [PATCH] event/octeontx2: enhance Tx path cache locality Enhance Tx path cache locality, remove current tag type and group stores from datapath to conserve store buffers. Signed-off-by: Pavan Nikhilesh --- drivers/event/octeontx2/otx2_evdev.c | 13 ++------ drivers/event/octeontx2/otx2_evdev.h | 23 +++++++------ drivers/event/octeontx2/otx2_worker.c | 28 ++++++---------- drivers/event/octeontx2/otx2_worker.h | 32 +++++++----------- drivers/event/octeontx2/otx2_worker_dual.c | 39 ++++++++++------------ drivers/event/octeontx2/otx2_worker_dual.h | 2 -- drivers/net/octeontx2/otx2_ethdev_sec_tx.h | 9 +++-- 7 files changed, 59 insertions(+), 87 deletions(-) diff --git a/drivers/event/octeontx2/otx2_evdev.c b/drivers/event/octeontx2/otx2_evdev.c index 0fe014c24a..80a786f21b 100644 --- a/drivers/event/octeontx2/otx2_evdev.c +++ b/drivers/event/octeontx2/otx2_evdev.c @@ -833,10 +833,12 @@ sso_configure_dual_ports(const struct rte_eventdev *event_dev) ws->port = i; base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12); sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[0], base); + ws->base[0] = base; vws++; base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12); sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[1], base); + ws->base[1] = base; vws++; gws_cookie = ssogws_get_cookie(ws); @@ -909,6 +911,7 @@ sso_configure_ports(const struct rte_eventdev *event_dev) ws->port = i; base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | i << 12); sso_set_port_ops(ws, base); + ws->base = base; gws_cookie = ssogws_get_cookie(ws); gws_cookie->event_dev = event_dev; @@ -1449,18 +1452,12 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable) ssogws_reset((struct otx2_ssogws *)&ws->ws_state[1]); ws->swtag_req = 0; ws->vws = 0; - ws->ws_state[0].cur_grp = 0; - ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY; - ws->ws_state[1].cur_grp = 0; - ws->ws_state[1].cur_tt = SSO_SYNC_EMPTY; } else { struct otx2_ssogws *ws; ws = event_dev->data->ports[i]; ssogws_reset(ws); ws->swtag_req = 0; - ws->cur_grp = 0; - ws->cur_tt = SSO_SYNC_EMPTY; } } @@ -1479,8 +1476,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable) otx2_write64(enable, ws->grps_base[i] + SSO_LF_GGRP_QCTL); } - ws->ws_state[0].cur_grp = 0; - ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY; } else { struct otx2_ssogws *ws = event_dev->data->ports[0]; @@ -1492,8 +1487,6 @@ sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable) otx2_write64(enable, ws->grps_base[i] + SSO_LF_GGRP_QCTL); } - ws->cur_grp = 0; - ws->cur_tt = SSO_SYNC_EMPTY; } /* reset SSO GWS cache */ diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h index 0513cb81c9..ed9cbc86b9 100644 --- a/drivers/event/octeontx2/otx2_evdev.h +++ b/drivers/event/octeontx2/otx2_evdev.h @@ -80,6 +80,7 @@ #define OTX2_SSOW_GET_BASE_ADDR(_GW) ((_GW) - SSOW_LF_GWS_OP_GET_WORK) #define OTX2_SSOW_TT_FROM_TAG(x) (((x) >> 32) & SSO_TT_EMPTY) +#define OTX2_SSOW_GRP_FROM_TAG(x) (((x) >> 36) & 0x3ff) #define NSEC2USEC(__ns) ((__ns) / 1E3) #define USEC2NSEC(__us) ((__us) * 1E3) @@ -169,25 +170,24 @@ struct otx2_sso_evdev { uintptr_t wqp_op; \ uintptr_t swtag_flush_op; \ uintptr_t swtag_norm_op; \ - uintptr_t swtag_desched_op; \ - uint8_t cur_tt; \ - uint8_t cur_grp + uintptr_t swtag_desched_op; /* Event port aka GWS */ struct otx2_ssogws { /* Get Work Fastpath data */ OTX2_SSOGWS_OPS; - uint8_t swtag_req; + /* PTP timestamp */ + struct otx2_timesync_info *tstamp; void *lookup_mem; + uint8_t swtag_req; uint8_t port; /* Add Work Fastpath data */ uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[OTX2_SSO_MAX_VHGRP]; - /* PTP timestamp */ - struct otx2_timesync_info *tstamp; /* Tx Fastpath data */ - uint8_t tx_adptr_data[] __rte_cache_aligned; + uint64_t base __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; struct otx2_ssogws_state { @@ -197,18 +197,19 @@ struct otx2_ssogws_state { struct otx2_ssogws_dual { /* Get Work Fastpath data */ struct otx2_ssogws_state ws_state[2]; /* Ping and Pong */ + /* PTP timestamp */ + struct otx2_timesync_info *tstamp; + void *lookup_mem; uint8_t swtag_req; uint8_t vws; /* Ping pong bit */ - void *lookup_mem; uint8_t port; /* Add Work Fastpath data */ uint64_t xaq_lmt __rte_cache_aligned; uint64_t *fc_mem; uintptr_t grps_base[OTX2_SSO_MAX_VHGRP]; - /* PTP timestamp */ - struct otx2_timesync_info *tstamp; /* Tx Fastpath data */ - uint8_t tx_adptr_data[] __rte_cache_aligned; + uint64_t base[2] __rte_cache_aligned; + uint8_t tx_adptr_data[]; } __rte_cache_aligned; static inline struct otx2_sso_evdev * diff --git a/drivers/event/octeontx2/otx2_worker.c b/drivers/event/octeontx2/otx2_worker.c index b098407e0d..95139d27a3 100644 --- a/drivers/event/octeontx2/otx2_worker.c +++ b/drivers/event/octeontx2/otx2_worker.c @@ -25,7 +25,7 @@ otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev) { const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - const uint8_t cur_tt = ws->cur_tt; + const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)); /* 96XX model * cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED @@ -64,7 +64,7 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev) const uint8_t grp = ev->queue_id; /* Group hasn't changed, Use SWTAG to forward the event */ - if (ws->cur_grp == grp) + if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(ws->tag_op)) == grp) otx2_ssogws_fwd_swtag(ws, ev); else /* @@ -75,12 +75,6 @@ otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev) otx2_ssogws_fwd_group(ws, ev, grp); } -static __rte_always_inline void -otx2_ssogws_release_event(struct otx2_ssogws *ws) -{ - otx2_ssogws_swtag_flush(ws); -} - #define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ uint16_t __rte_hot \ otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev, \ @@ -221,7 +215,7 @@ otx2_ssogws_enq(void *port, const struct rte_event *ev) otx2_ssogws_forward_event(ws, ev); break; case RTE_EVENT_OP_RELEASE: - otx2_ssogws_release_event(ws); + otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op); break; default: return 0; @@ -274,14 +268,13 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[], \ { \ struct otx2_ssogws *ws = port; \ uint64_t cmd[sz]; \ - int i; \ \ - for (i = 0; i < nb_events; i++) \ - otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \ + RTE_SET_USED(nb_events); \ + return otx2_ssogws_event_tx(ws->base, &ev[0], cmd, \ + (const uint64_t \ (*)[RTE_MAX_QUEUES_PER_PORT]) \ &ws->tx_adptr_data, \ flags); \ - return nb_events; \ } SSO_TX_ADPTR_ENQ_FASTPATH_FUNC #undef T @@ -293,14 +286,13 @@ otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\ { \ uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \ struct otx2_ssogws *ws = port; \ - int i; \ \ - for (i = 0; i < nb_events; i++) \ - otx2_ssogws_event_tx(ws, &ev[i], cmd, (const uint64_t \ + RTE_SET_USED(nb_events); \ + return otx2_ssogws_event_tx(ws->base, &ev[0], cmd, \ + (const uint64_t \ (*)[RTE_MAX_QUEUES_PER_PORT]) \ &ws->tx_adptr_data, \ (flags) | NIX_TX_MULTI_SEG_F); \ - return nb_events; \ } SSO_TX_ADPTR_ENQ_FASTPATH_FUNC #undef T @@ -335,7 +327,7 @@ ssogws_flush_events(struct otx2_ssogws *ws, uint8_t queue_id, uintptr_t base, if (fn != NULL && ev.u64 != 0) fn(arg, ev); if (ev.sched_type != SSO_TT_EMPTY) - otx2_ssogws_swtag_flush(ws); + otx2_ssogws_swtag_flush(ws->tag_op, ws->swtag_flush_op); rte_mb(); aq_cnt = otx2_read64(base + SSO_LF_GGRP_AQ_CNT); ds_cnt = otx2_read64(base + SSO_LF_GGRP_MISC_CNT); diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h index 0a7d6671c4..2b716c042d 100644 --- a/drivers/event/octeontx2/otx2_worker.h +++ b/drivers/event/octeontx2/otx2_worker.h @@ -64,8 +64,6 @@ otx2_ssogws_get_work(struct otx2_ssogws *ws, struct rte_event *ev, event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 | (event.get_work0 & (0x3FFull << 36)) << 4 | (event.get_work0 & 0xffffffff); - ws->cur_tt = event.sched_type; - ws->cur_grp = event.queue_id; if (event.sched_type != SSO_TT_EMPTY) { if ((flags & NIX_RX_OFFLOAD_SECURITY_F) && @@ -136,8 +134,6 @@ otx2_ssogws_get_work_empty(struct otx2_ssogws *ws, struct rte_event *ev, event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 | (event.get_work0 & (0x3FFull << 36)) << 4 | (event.get_work0 & 0xffffffff); - ws->cur_tt = event.sched_type; - ws->cur_grp = event.queue_id; if (event.sched_type != SSO_TT_EMPTY && event.event_type == RTE_EVENT_TYPE_ETHDEV) { @@ -192,18 +188,14 @@ otx2_ssogws_swtag_untag(struct otx2_ssogws *ws) { otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) + SSOW_LF_GWS_OP_SWTAG_UNTAG); - ws->cur_tt = SSO_SYNC_UNTAGGED; } static __rte_always_inline void -otx2_ssogws_swtag_flush(struct otx2_ssogws *ws) +otx2_ssogws_swtag_flush(uint64_t tag_op, uint64_t flush_op) { - if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)) == SSO_TT_EMPTY) { - ws->cur_tt = SSO_SYNC_EMPTY; + if (OTX2_SSOW_TT_FROM_TAG(otx2_read64(tag_op)) == SSO_TT_EMPTY) return; - } - otx2_write64(0, ws->swtag_flush_op); - ws->cur_tt = SSO_SYNC_EMPTY; + otx2_write64(0, flush_op); } static __rte_always_inline void @@ -236,7 +228,7 @@ otx2_ssogws_swtag_wait(struct otx2_ssogws *ws) } static __rte_always_inline void -otx2_ssogws_head_wait(struct otx2_ssogws *ws) +otx2_ssogws_head_wait(uint64_t tag_op) { #ifdef RTE_ARCH_ARM64 uint64_t tag; @@ -250,11 +242,11 @@ otx2_ssogws_head_wait(struct otx2_ssogws *ws) " tbz %[tag], 35, rty%= \n" "done%=: \n" : [tag] "=&r" (tag) - : [tag_op] "r" (ws->tag_op) + : [tag_op] "r" (tag_op) ); #else /* Wait for the HEAD to be set */ - while (!(otx2_read64(ws->tag_op) & BIT_ULL(35))) + while (!(otx2_read64(tag_op) & BIT_ULL(35))) ; #endif } @@ -276,8 +268,7 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m, } static __rte_always_inline uint16_t -otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, - uint64_t *cmd, +otx2_ssogws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd, const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT], const uint32_t flags) { @@ -288,7 +279,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, if ((flags & NIX_TX_OFFLOAD_SECURITY_F) && (m->ol_flags & PKT_TX_SEC_OFFLOAD)) { txq = otx2_ssogws_xtract_meta(m, txq_data); - return otx2_sec_event_tx(ws, ev, m, txq, flags); + return otx2_sec_event_tx(base, ev, m, txq, flags); } /* Perform header writes before barrier for TSO */ @@ -309,7 +300,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, m->ol_flags, segdw, flags); if (!ev->sched_type) { otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw); - otx2_ssogws_head_wait(ws); + otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG); if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, segdw); @@ -324,7 +315,7 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, if (!ev->sched_type) { otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags); - otx2_ssogws_head_wait(ws); + otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG); if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0) otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, flags); @@ -339,7 +330,8 @@ otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, return 1; } - otx2_ssogws_swtag_flush(ws); + otx2_ssogws_swtag_flush(base + SSOW_LF_GWS_TAG, + base + SSOW_LF_GWS_OP_SWTAG_FLUSH); return 1; } diff --git a/drivers/event/octeontx2/otx2_worker_dual.c b/drivers/event/octeontx2/otx2_worker_dual.c index 946488eabf..81af4ca904 100644 --- a/drivers/event/octeontx2/otx2_worker_dual.c +++ b/drivers/event/octeontx2/otx2_worker_dual.c @@ -26,9 +26,9 @@ static __rte_always_inline void otx2_ssogws_dual_fwd_swtag(struct otx2_ssogws_state *ws, const struct rte_event *ev) { + const uint8_t cur_tt = OTX2_SSOW_TT_FROM_TAG(otx2_read64(ws->tag_op)); const uint32_t tag = (uint32_t)ev->event; const uint8_t new_tt = ev->sched_type; - const uint8_t cur_tt = ws->cur_tt; /* 96XX model * cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED @@ -66,15 +66,15 @@ otx2_ssogws_dual_forward_event(struct otx2_ssogws_dual *ws, const uint8_t grp = ev->queue_id; /* Group hasn't changed, Use SWTAG to forward the event */ - if (vws->cur_grp == grp) { + if (OTX2_SSOW_GRP_FROM_TAG(otx2_read64(vws->tag_op)) == grp) { otx2_ssogws_dual_fwd_swtag(vws, ev); ws->swtag_req = 1; } else { - /* - * Group has been changed for group based work pipelining, - * Use deschedule/add_work operation to transfer the event to - * new group/core - */ + /* + * Group has been changed for group based work pipelining, + * Use deschedule/add_work operation to transfer the event to + * new group/core + */ otx2_ssogws_dual_fwd_group(vws, ev, grp); } } @@ -93,7 +93,7 @@ otx2_ssogws_dual_enq(void *port, const struct rte_event *ev) otx2_ssogws_dual_forward_event(ws, vws, ev); break; case RTE_EVENT_OP_RELEASE: - otx2_ssogws_swtag_flush((struct otx2_ssogws *)vws); + otx2_ssogws_swtag_flush(vws->tag_op, vws->swtag_flush_op); break; default: return 0; @@ -314,15 +314,13 @@ otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port, \ uint16_t nb_events) \ { \ struct otx2_ssogws_dual *ws = port; \ - struct otx2_ssogws *vws = \ - (struct otx2_ssogws *)&ws->ws_state[!ws->vws]; \ uint64_t cmd[sz]; \ \ RTE_SET_USED(nb_events); \ - return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t \ - (*)[RTE_MAX_QUEUES_PER_PORT]) \ - ws->tx_adptr_data, \ - flags); \ + return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0], \ + cmd, (const uint64_t \ + (*)[RTE_MAX_QUEUES_PER_PORT]) \ + &ws->tx_adptr_data, flags); \ } SSO_TX_ADPTR_ENQ_FASTPATH_FUNC #undef T @@ -333,16 +331,15 @@ otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port, \ struct rte_event ev[], \ uint16_t nb_events) \ { \ - struct otx2_ssogws_dual *ws = port; \ - struct otx2_ssogws *vws = \ - (struct otx2_ssogws *)&ws->ws_state[!ws->vws]; \ uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \ + struct otx2_ssogws_dual *ws = port; \ \ RTE_SET_USED(nb_events); \ - return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t \ - (*)[RTE_MAX_QUEUES_PER_PORT]) \ - ws->tx_adptr_data, \ - (flags) | NIX_TX_MULTI_SEG_F); \ + return otx2_ssogws_event_tx(ws->base[!ws->vws], &ev[0], \ + cmd, (const uint64_t \ + (*)[RTE_MAX_QUEUES_PER_PORT]) \ + &ws->tx_adptr_data, \ + (flags) | NIX_TX_MULTI_SEG_F);\ } SSO_TX_ADPTR_ENQ_FASTPATH_FUNC #undef T diff --git a/drivers/event/octeontx2/otx2_worker_dual.h b/drivers/event/octeontx2/otx2_worker_dual.h index 6e60618217..72b616439f 100644 --- a/drivers/event/octeontx2/otx2_worker_dual.h +++ b/drivers/event/octeontx2/otx2_worker_dual.h @@ -61,8 +61,6 @@ otx2_ssogws_dual_get_work(struct otx2_ssogws_state *ws, event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 | (event.get_work0 & (0x3FFull << 36)) << 4 | (event.get_work0 & 0xffffffff); - ws->cur_tt = event.sched_type; - ws->cur_grp = event.queue_id; if (event.sched_type != SSO_TT_EMPTY) { if ((flags & NIX_RX_OFFLOAD_SECURITY_F) && diff --git a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h index 284bcd5367..c8eae3d628 100644 --- a/drivers/net/octeontx2/otx2_ethdev_sec_tx.h +++ b/drivers/net/octeontx2/otx2_ethdev_sec_tx.h @@ -30,12 +30,11 @@ otx2_ipsec_fp_out_rlen_get(struct otx2_sec_session_ipsec_ip *sess, } static __rte_always_inline void -otx2_ssogws_head_wait(struct otx2_ssogws *ws); +otx2_ssogws_head_wait(uint64_t base); static __rte_always_inline int -otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, - struct rte_mbuf *m, const struct otx2_eth_txq *txq, - const uint32_t offload_flags) +otx2_sec_event_tx(uint64_t base, struct rte_event *ev, struct rte_mbuf *m, + const struct otx2_eth_txq *txq, const uint32_t offload_flags) { uint32_t dlen, rlen, desc_headroom, extend_head, extend_tail; struct otx2_sec_session_ipsec_ip *sess; @@ -149,7 +148,7 @@ otx2_sec_event_tx(struct otx2_ssogws *ws, struct rte_event *ev, __mempool_check_cookies(m->pool, (void **)&m, 1, 0); if (!ev->sched_type) - otx2_ssogws_head_wait(ws); + otx2_ssogws_head_wait(base + SSOW_LF_GWS_TAG); inst.param1 = sess->esn_hi >> 16; inst.param2 = sess->esn_hi & 0xffff; -- 2.20.1