From aa62547f7ef8795595c5e77e2e4409426870840a Mon Sep 17 00:00:00 2001 From: Pavan Nikhilesh Date: Fri, 28 Jun 2019 23:53:28 +0530 Subject: [PATCH] event/octeontx2: add SSO dual workslot mode OcteonTx2 AP core SSO cache contains two entries each entry caches state of an single GWS aka event port. AP core requests events from SSO by using following sequence : 1. Write to SSOW_LF_GWS_OP_GET_WORK 2. Wait for SSO to complete scheduling by polling on SSOW_LF_GWS_TAG[63] 3. SSO notifies core by clearing SSOW_LF_GWS_TAG[63] and if work is valid SSOW_LF_GWS_WQP is non-zero. The above sequence uses only one in-core cache entry. In dual workslot mode we try to use both the in-core cache entries by triggering GET_WORK on a second workslot as soon as the above sequence completes. This effectively hides the schedule latency of SSO if there are enough events with unique flow_tags in-flight. This mode reserves two SSO GWS lf's for each event port effectively doubling single core performance. Dual workslot mode is the default mode of operation in octeontx2. Signed-off-by: Pavan Nikhilesh Signed-off-by: Jerin Jacob --- drivers/event/octeontx2/otx2_evdev.c | 204 ++++++++++++++++++--- drivers/event/octeontx2/otx2_evdev.h | 17 ++ drivers/event/octeontx2/otx2_evdev_irq.c | 4 +- drivers/event/octeontx2/otx2_evdev_stats.h | 52 +++++- 4 files changed, 242 insertions(+), 35 deletions(-) diff --git a/drivers/event/octeontx2/otx2_evdev.c b/drivers/event/octeontx2/otx2_evdev.c index 51220f447a..16d5e7dfa0 100644 --- a/drivers/event/octeontx2/otx2_evdev.c +++ b/drivers/event/octeontx2/otx2_evdev.c @@ -20,7 +20,7 @@ static inline int sso_get_msix_offsets(const struct rte_eventdev *event_dev) { struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); - uint8_t nb_ports = dev->nb_event_ports; + uint8_t nb_ports = dev->nb_event_ports * (dev->dual_ws ? 2 : 1); struct otx2_mbox *mbox = dev->mbox; struct msix_offset_rsp *msix_rsp; int i, rc; @@ -82,16 +82,26 @@ otx2_sso_port_link(struct rte_eventdev *event_dev, void *port, const uint8_t queues[], const uint8_t priorities[], uint16_t nb_links) { + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); uint8_t port_id = 0; uint16_t link; - RTE_SET_USED(event_dev); RTE_SET_USED(priorities); for (link = 0; link < nb_links; link++) { - struct otx2_ssogws *ws = port; - - port_id = ws->port; - sso_port_link_modify(ws, queues[link], true); + if (dev->dual_ws) { + struct otx2_ssogws_dual *ws = port; + + port_id = ws->port; + sso_port_link_modify((struct otx2_ssogws *) + &ws->ws_state[0], queues[link], true); + sso_port_link_modify((struct otx2_ssogws *) + &ws->ws_state[1], queues[link], true); + } else { + struct otx2_ssogws *ws = port; + + port_id = ws->port; + sso_port_link_modify(ws, queues[link], true); + } } sso_func_trace("Port=%d nb_links=%d", port_id, nb_links); @@ -102,15 +112,27 @@ static int otx2_sso_port_unlink(struct rte_eventdev *event_dev, void *port, uint8_t queues[], uint16_t nb_unlinks) { + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); uint8_t port_id = 0; uint16_t unlink; - RTE_SET_USED(event_dev); for (unlink = 0; unlink < nb_unlinks; unlink++) { - struct otx2_ssogws *ws = port; - - port_id = ws->port; - sso_port_link_modify(ws, queues[unlink], false); + if (dev->dual_ws) { + struct otx2_ssogws_dual *ws = port; + + port_id = ws->port; + sso_port_link_modify((struct otx2_ssogws *) + &ws->ws_state[0], queues[unlink], + false); + sso_port_link_modify((struct otx2_ssogws *) + &ws->ws_state[1], queues[unlink], + false); + } else { + struct otx2_ssogws *ws = port; + + port_id = ws->port; + sso_port_link_modify(ws, queues[unlink], false); + } } sso_func_trace("Port=%d nb_unlinks=%d", port_id, nb_unlinks); @@ -242,11 +264,23 @@ sso_clr_links(const struct rte_eventdev *event_dev) int i, j; for (i = 0; i < dev->nb_event_ports; i++) { - struct otx2_ssogws *ws; + if (dev->dual_ws) { + struct otx2_ssogws_dual *ws; - ws = event_dev->data->ports[i]; - for (j = 0; j < dev->nb_event_queues; j++) - sso_port_link_modify(ws, j, false); + ws = event_dev->data->ports[i]; + for (j = 0; j < dev->nb_event_queues; j++) { + sso_port_link_modify((struct otx2_ssogws *) + &ws->ws_state[0], j, false); + sso_port_link_modify((struct otx2_ssogws *) + &ws->ws_state[1], j, false); + } + } else { + struct otx2_ssogws *ws; + + ws = event_dev->data->ports[i]; + for (j = 0; j < dev->nb_event_queues; j++) + sso_port_link_modify(ws, j, false); + } } } @@ -261,6 +295,73 @@ sso_set_port_ops(struct otx2_ssogws *ws, uintptr_t base) ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED; } +static int +sso_configure_dual_ports(const struct rte_eventdev *event_dev) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + struct otx2_mbox *mbox = dev->mbox; + uint8_t vws = 0; + uint8_t nb_lf; + int i, rc; + + otx2_sso_dbg("Configuring event ports %d", dev->nb_event_ports); + + nb_lf = dev->nb_event_ports * 2; + /* Ask AF to attach required LFs. */ + rc = sso_hw_lf_cfg(mbox, SSO_LF_GWS, nb_lf, true); + if (rc < 0) { + otx2_err("Failed to attach SSO GWS LF"); + return -ENODEV; + } + + if (sso_lf_cfg(dev, mbox, SSO_LF_GWS, nb_lf, true) < 0) { + sso_hw_lf_cfg(mbox, SSO_LF_GWS, nb_lf, false); + otx2_err("Failed to init SSO GWS LF"); + return -ENODEV; + } + + for (i = 0; i < dev->nb_event_ports; i++) { + struct otx2_ssogws_dual *ws; + uintptr_t base; + + /* Free memory prior to re-allocation if needed */ + if (event_dev->data->ports[i] != NULL) { + ws = event_dev->data->ports[i]; + rte_free(ws); + ws = NULL; + } + + /* Allocate event port memory */ + ws = rte_zmalloc_socket("otx2_sso_ws", + sizeof(struct otx2_ssogws_dual), + RTE_CACHE_LINE_SIZE, + event_dev->data->socket_id); + if (ws == NULL) { + otx2_err("Failed to alloc memory for port=%d", i); + rc = -ENOMEM; + break; + } + + ws->port = i; + base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12); + sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[0], base); + vws++; + + base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12); + sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[1], base); + vws++; + + event_dev->data->ports[i] = ws; + } + + if (rc < 0) { + sso_lf_cfg(dev, mbox, SSO_LF_GWS, nb_lf, false); + sso_hw_lf_cfg(mbox, SSO_LF_GWS, nb_lf, false); + } + + return rc; +} + static int sso_configure_ports(const struct rte_eventdev *event_dev) { @@ -465,6 +566,7 @@ sso_lf_teardown(struct otx2_sso_evdev *dev, break; case SSO_LF_GWS: nb_lf = dev->nb_event_ports; + nb_lf *= dev->dual_ws ? 2 : 1; break; default: return; @@ -530,7 +632,12 @@ otx2_sso_configure(const struct rte_eventdev *event_dev) dev->nb_event_queues = conf->nb_event_queues; dev->nb_event_ports = conf->nb_event_ports; - if (sso_configure_ports(event_dev)) { + if (dev->dual_ws) + rc = sso_configure_dual_ports(event_dev); + else + rc = sso_configure_ports(event_dev); + + if (rc < 0) { otx2_err("Failed to configure event ports"); return -ENODEV; } @@ -660,14 +767,27 @@ otx2_sso_port_setup(struct rte_eventdev *event_dev, uint8_t port_id, /* Set get_work timeout for HWS */ val = NSEC2USEC(dev->deq_tmo_ns) - 1; - struct otx2_ssogws *ws = event_dev->data->ports[port_id]; - uintptr_t base = OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op); - - rte_memcpy(ws->grps_base, grps_base, - sizeof(uintptr_t) * OTX2_SSO_MAX_VHGRP); - ws->fc_mem = dev->fc_mem; - ws->xaq_lmt = dev->xaq_lmt; - otx2_write64(val, base + SSOW_LF_GWS_NW_TIM); + if (dev->dual_ws) { + struct otx2_ssogws_dual *ws = event_dev->data->ports[port_id]; + + rte_memcpy(ws->grps_base, grps_base, + sizeof(uintptr_t) * OTX2_SSO_MAX_VHGRP); + ws->fc_mem = dev->fc_mem; + ws->xaq_lmt = dev->xaq_lmt; + otx2_write64(val, OTX2_SSOW_GET_BASE_ADDR( + ws->ws_state[0].getwrk_op) + SSOW_LF_GWS_NW_TIM); + otx2_write64(val, OTX2_SSOW_GET_BASE_ADDR( + ws->ws_state[1].getwrk_op) + SSOW_LF_GWS_NW_TIM); + } else { + struct otx2_ssogws *ws = event_dev->data->ports[port_id]; + uintptr_t base = OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op); + + rte_memcpy(ws->grps_base, grps_base, + sizeof(uintptr_t) * OTX2_SSO_MAX_VHGRP); + ws->fc_mem = dev->fc_mem; + ws->xaq_lmt = dev->xaq_lmt; + otx2_write64(val, base + SSOW_LF_GWS_NW_TIM); + } otx2_sso_dbg("Port=%d ws=%p", port_id, event_dev->data->ports[port_id]); @@ -735,18 +855,37 @@ otx2_sso_dump(struct rte_eventdev *event_dev, FILE *f) uint8_t queue; uint8_t port; + fprintf(f, "[%s] SSO running in [%s] mode\n", __func__, dev->dual_ws ? + "dual_ws" : "single_ws"); /* Dump SSOW registers */ for (port = 0; port < dev->nb_event_ports; port++) { - fprintf(f, "[%s]SSO single workslot[%d] dump\n", - __func__, port); - ssogws_dump(event_dev->data->ports[port], f); + if (dev->dual_ws) { + struct otx2_ssogws_dual *ws = + event_dev->data->ports[port]; + + fprintf(f, "[%s] SSO dual workslot[%d] vws[%d] dump\n", + __func__, port, 0); + ssogws_dump((struct otx2_ssogws *)&ws->ws_state[0], f); + fprintf(f, "[%s]SSO dual workslot[%d] vws[%d] dump\n", + __func__, port, 1); + ssogws_dump((struct otx2_ssogws *)&ws->ws_state[1], f); + } else { + fprintf(f, "[%s]SSO single workslot[%d] dump\n", + __func__, port); + ssogws_dump(event_dev->data->ports[port], f); + } } /* Dump SSO registers */ for (queue = 0; queue < dev->nb_event_queues; queue++) { fprintf(f, "[%s]SSO group[%d] dump\n", __func__, queue); - struct otx2_ssogws *ws = event_dev->data->ports[0]; - ssoggrp_dump(ws->grps_base[queue], f); + if (dev->dual_ws) { + struct otx2_ssogws_dual *ws = event_dev->data->ports[0]; + ssoggrp_dump(ws->grps_base[queue], f); + } else { + struct otx2_ssogws *ws = event_dev->data->ports[0]; + ssoggrp_dump(ws->grps_base[queue], f); + } } } @@ -879,7 +1018,14 @@ otx2_sso_init(struct rte_eventdev *event_dev) goto otx2_npa_lf_uninit; } + dev->dual_ws = 1; sso_parse_devargs(dev, pci_dev->device.devargs); + if (dev->dual_ws) { + otx2_sso_dbg("Using dual workslot mode"); + dev->max_event_ports = dev->max_event_ports / 2; + } else { + otx2_sso_dbg("Using single workslot mode"); + } otx2_sso_pf_func_set(dev->pf_func); otx2_sso_dbg("Initializing %s max_queues=%d max_ports=%d", diff --git a/drivers/event/octeontx2/otx2_evdev.h b/drivers/event/octeontx2/otx2_evdev.h index 6f8d709b6d..72de9ace5c 100644 --- a/drivers/event/octeontx2/otx2_evdev.h +++ b/drivers/event/octeontx2/otx2_evdev.h @@ -121,6 +121,7 @@ struct otx2_sso_evdev { uint64_t nb_xaq_cfg; rte_iova_t fc_iova; struct rte_mempool *xaq_pool; + uint8_t dual_ws; /* Dev args */ uint32_t xae_cnt; /* HW const */ @@ -155,6 +156,22 @@ struct otx2_ssogws { uintptr_t grps_base[OTX2_SSO_MAX_VHGRP]; } __rte_cache_aligned; +struct otx2_ssogws_state { + OTX2_SSOGWS_OPS; +}; + +struct otx2_ssogws_dual { + /* Get Work Fastpath data */ + struct otx2_ssogws_state ws_state[2]; /* Ping and Pong */ + uint8_t swtag_req; + uint8_t vws; /* Ping pong bit */ + uint8_t port; + /* Add Work Fastpath data */ + uint64_t xaq_lmt __rte_cache_aligned; + uint64_t *fc_mem; + uintptr_t grps_base[OTX2_SSO_MAX_VHGRP]; +} __rte_cache_aligned; + static inline struct otx2_sso_evdev * sso_pmd_priv(const struct rte_eventdev *event_dev) { diff --git a/drivers/event/octeontx2/otx2_evdev_irq.c b/drivers/event/octeontx2/otx2_evdev_irq.c index 7df21cc249..7379bb17fa 100644 --- a/drivers/event/octeontx2/otx2_evdev_irq.c +++ b/drivers/event/octeontx2/otx2_evdev_irq.c @@ -117,7 +117,7 @@ sso_register_irqs(const struct rte_eventdev *event_dev) int i, rc = -EINVAL; uint8_t nb_ports; - nb_ports = dev->nb_event_ports; + nb_ports = dev->nb_event_ports * (dev->dual_ws ? 2 : 1); for (i = 0; i < dev->nb_event_queues; i++) { if (dev->sso_msixoff[i] == MSIX_VECTOR_INVALID) { @@ -159,7 +159,7 @@ sso_unregister_irqs(const struct rte_eventdev *event_dev) uint8_t nb_ports; int i; - nb_ports = dev->nb_event_ports; + nb_ports = dev->nb_event_ports * (dev->dual_ws ? 2 : 1); for (i = 0; i < dev->nb_event_queues; i++) { uintptr_t base = dev->bar2 + (RVU_BLOCK_ADDR_SSO << 20 | diff --git a/drivers/event/octeontx2/otx2_evdev_stats.h b/drivers/event/octeontx2/otx2_evdev_stats.h index df76a13330..9d7c694ee6 100644 --- a/drivers/event/octeontx2/otx2_evdev_stats.h +++ b/drivers/event/octeontx2/otx2_evdev_stats.h @@ -76,11 +76,29 @@ otx2_sso_xstats_get(const struct rte_eventdev *event_dev, xstats = sso_hws_xstats; req_rsp = otx2_mbox_alloc_msg_sso_hws_get_stats(mbox); - ((struct sso_info_req *)req_rsp)->hws = queue_port_id; + ((struct sso_info_req *)req_rsp)->hws = dev->dual_ws ? + 2 * queue_port_id : queue_port_id; rc = otx2_mbox_process_msg(mbox, (void **)&req_rsp); if (rc < 0) goto invalid_value; + if (dev->dual_ws) { + for (i = 0; i < n && i < xstats_mode_count; i++) { + xstat = &xstats[ids[i] - start_offset]; + values[i] = *(uint64_t *) + ((char *)req_rsp + xstat->offset); + values[i] = (values[i] >> xstat->shift) & + xstat->mask; + } + + req_rsp = otx2_mbox_alloc_msg_sso_hws_get_stats(mbox); + ((struct sso_info_req *)req_rsp)->hws = + (2 * queue_port_id) + 1; + rc = otx2_mbox_process_msg(mbox, (void **)&req_rsp); + if (rc < 0) + goto invalid_value; + } + break; case RTE_EVENT_DEV_XSTATS_QUEUE: if (queue_port_id >= (signed int)dev->nb_event_queues) @@ -107,7 +125,11 @@ otx2_sso_xstats_get(const struct rte_eventdev *event_dev, value = *(uint64_t *)((char *)req_rsp + xstat->offset); value = (value >> xstat->shift) & xstat->mask; - values[i] = value; + if ((mode == RTE_EVENT_DEV_XSTATS_PORT) && dev->dual_ws) + values[i] += value; + else + values[i] = value; + values[i] -= xstat->reset_snap[queue_port_id]; } @@ -143,11 +165,30 @@ otx2_sso_xstats_reset(struct rte_eventdev *event_dev, xstats = sso_hws_xstats; req_rsp = otx2_mbox_alloc_msg_sso_hws_get_stats(mbox); - ((struct sso_info_req *)req_rsp)->hws = queue_port_id; + ((struct sso_info_req *)req_rsp)->hws = dev->dual_ws ? + 2 * queue_port_id : queue_port_id; rc = otx2_mbox_process_msg(mbox, (void **)&req_rsp); if (rc < 0) goto invalid_value; + if (dev->dual_ws) { + for (i = 0; i < n && i < xstats_mode_count; i++) { + xstat = &xstats[ids[i] - start_offset]; + xstat->reset_snap[queue_port_id] = *(uint64_t *) + ((char *)req_rsp + xstat->offset); + xstat->reset_snap[queue_port_id] = + (xstat->reset_snap[queue_port_id] >> + xstat->shift) & xstat->mask; + } + + req_rsp = otx2_mbox_alloc_msg_sso_hws_get_stats(mbox); + ((struct sso_info_req *)req_rsp)->hws = + (2 * queue_port_id) + 1; + rc = otx2_mbox_process_msg(mbox, (void **)&req_rsp); + if (rc < 0) + goto invalid_value; + } + break; case RTE_EVENT_DEV_XSTATS_QUEUE: if (queue_port_id >= (signed int)dev->nb_event_queues) @@ -174,7 +215,10 @@ otx2_sso_xstats_reset(struct rte_eventdev *event_dev, value = *(uint64_t *)((char *)req_rsp + xstat->offset); value = (value >> xstat->shift) & xstat->mask; - xstat->reset_snap[queue_port_id] = value; + if ((mode == RTE_EVENT_DEV_XSTATS_PORT) && dev->dual_ws) + xstat->reset_snap[queue_port_id] += value; + else + xstat->reset_snap[queue_port_id] = value; } return i; invalid_value: -- 2.20.1