event/octeontx2: add SSO dual workslot mode
authorPavan Nikhilesh <pbhagavatula@marvell.com>
Fri, 28 Jun 2019 18:23:28 +0000 (23:53 +0530)
committerJerin Jacob <jerinj@marvell.com>
Wed, 3 Jul 2019 04:56:10 +0000 (06:56 +0200)
OcteonTx2 AP core SSO cache contains two entries each entry caches
state of an single GWS aka event port.
AP core requests events from SSO by using following sequence :
1. Write to SSOW_LF_GWS_OP_GET_WORK
2. Wait for SSO to complete scheduling by polling on SSOW_LF_GWS_TAG[63]
3. SSO notifies core by clearing SSOW_LF_GWS_TAG[63] and if work is
valid SSOW_LF_GWS_WQP is non-zero.
The above sequence uses only one in-core cache entry.

In dual workslot mode we try to use both the in-core cache entries by
triggering GET_WORK on a second workslot as soon as the above sequence
completes. This effectively hides the schedule latency of SSO if there
are enough events with unique flow_tags in-flight.
This mode reserves two SSO GWS lf's for each event port effectively
doubling single core performance.
Dual workslot mode is the default mode of operation in octeontx2.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Signed-off-by: Jerin Jacob <jerinj@marvell.com>
drivers/event/octeontx2/otx2_evdev.c
drivers/event/octeontx2/otx2_evdev.h
drivers/event/octeontx2/otx2_evdev_irq.c
drivers/event/octeontx2/otx2_evdev_stats.h

index 51220f4..16d5e7d 100644 (file)
@@ -20,7 +20,7 @@ static inline int
 sso_get_msix_offsets(const struct rte_eventdev *event_dev)
 {
        struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev);
-       uint8_t nb_ports = dev->nb_event_ports;
+       uint8_t nb_ports = dev->nb_event_ports * (dev->dual_ws ? 2 : 1);
        struct otx2_mbox *mbox = dev->mbox;
        struct msix_offset_rsp *msix_rsp;
        int i, rc;
@@ -82,16 +82,26 @@ otx2_sso_port_link(struct rte_eventdev *event_dev, void *port,
                   const uint8_t queues[], const uint8_t priorities[],
                   uint16_t nb_links)
 {
+       struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev);
        uint8_t port_id = 0;
        uint16_t link;
 
-       RTE_SET_USED(event_dev);
        RTE_SET_USED(priorities);
        for (link = 0; link < nb_links; link++) {
-               struct otx2_ssogws *ws = port;
-
-               port_id = ws->port;
-               sso_port_link_modify(ws, queues[link], true);
+               if (dev->dual_ws) {
+                       struct otx2_ssogws_dual *ws = port;
+
+                       port_id = ws->port;
+                       sso_port_link_modify((struct otx2_ssogws *)
+                                       &ws->ws_state[0], queues[link], true);
+                       sso_port_link_modify((struct otx2_ssogws *)
+                                       &ws->ws_state[1], queues[link], true);
+               } else {
+                       struct otx2_ssogws *ws = port;
+
+                       port_id = ws->port;
+                       sso_port_link_modify(ws, queues[link], true);
+               }
        }
        sso_func_trace("Port=%d nb_links=%d", port_id, nb_links);
 
@@ -102,15 +112,27 @@ static int
 otx2_sso_port_unlink(struct rte_eventdev *event_dev, void *port,
                     uint8_t queues[], uint16_t nb_unlinks)
 {
+       struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev);
        uint8_t port_id = 0;
        uint16_t unlink;
 
-       RTE_SET_USED(event_dev);
        for (unlink = 0; unlink < nb_unlinks; unlink++) {
-               struct otx2_ssogws *ws = port;
-
-               port_id = ws->port;
-               sso_port_link_modify(ws, queues[unlink], false);
+               if (dev->dual_ws) {
+                       struct otx2_ssogws_dual *ws = port;
+
+                       port_id = ws->port;
+                       sso_port_link_modify((struct otx2_ssogws *)
+                                       &ws->ws_state[0], queues[unlink],
+                                       false);
+                       sso_port_link_modify((struct otx2_ssogws *)
+                                       &ws->ws_state[1], queues[unlink],
+                                       false);
+               } else {
+                       struct otx2_ssogws *ws = port;
+
+                       port_id = ws->port;
+                       sso_port_link_modify(ws, queues[unlink], false);
+               }
        }
        sso_func_trace("Port=%d nb_unlinks=%d", port_id, nb_unlinks);
 
@@ -242,11 +264,23 @@ sso_clr_links(const struct rte_eventdev *event_dev)
        int i, j;
 
        for (i = 0; i < dev->nb_event_ports; i++) {
-               struct otx2_ssogws *ws;
+               if (dev->dual_ws) {
+                       struct otx2_ssogws_dual *ws;
 
-               ws = event_dev->data->ports[i];
-               for (j = 0; j < dev->nb_event_queues; j++)
-                       sso_port_link_modify(ws, j, false);
+                       ws = event_dev->data->ports[i];
+                       for (j = 0; j < dev->nb_event_queues; j++) {
+                               sso_port_link_modify((struct otx2_ssogws *)
+                                               &ws->ws_state[0], j, false);
+                               sso_port_link_modify((struct otx2_ssogws *)
+                                               &ws->ws_state[1], j, false);
+                       }
+               } else {
+                       struct otx2_ssogws *ws;
+
+                       ws = event_dev->data->ports[i];
+                       for (j = 0; j < dev->nb_event_queues; j++)
+                               sso_port_link_modify(ws, j, false);
+               }
        }
 }
 
@@ -261,6 +295,73 @@ sso_set_port_ops(struct otx2_ssogws *ws, uintptr_t base)
        ws->swtag_desched_op    = base + SSOW_LF_GWS_OP_SWTAG_DESCHED;
 }
 
+static int
+sso_configure_dual_ports(const struct rte_eventdev *event_dev)
+{
+       struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev);
+       struct otx2_mbox *mbox = dev->mbox;
+       uint8_t vws = 0;
+       uint8_t nb_lf;
+       int i, rc;
+
+       otx2_sso_dbg("Configuring event ports %d", dev->nb_event_ports);
+
+       nb_lf = dev->nb_event_ports * 2;
+       /* Ask AF to attach required LFs. */
+       rc = sso_hw_lf_cfg(mbox, SSO_LF_GWS, nb_lf, true);
+       if (rc < 0) {
+               otx2_err("Failed to attach SSO GWS LF");
+               return -ENODEV;
+       }
+
+       if (sso_lf_cfg(dev, mbox, SSO_LF_GWS, nb_lf, true) < 0) {
+               sso_hw_lf_cfg(mbox, SSO_LF_GWS, nb_lf, false);
+               otx2_err("Failed to init SSO GWS LF");
+               return -ENODEV;
+       }
+
+       for (i = 0; i < dev->nb_event_ports; i++) {
+               struct otx2_ssogws_dual *ws;
+               uintptr_t base;
+
+               /* Free memory prior to re-allocation if needed */
+               if (event_dev->data->ports[i] != NULL) {
+                       ws = event_dev->data->ports[i];
+                       rte_free(ws);
+                       ws = NULL;
+               }
+
+               /* Allocate event port memory */
+               ws = rte_zmalloc_socket("otx2_sso_ws",
+                                       sizeof(struct otx2_ssogws_dual),
+                                       RTE_CACHE_LINE_SIZE,
+                                       event_dev->data->socket_id);
+               if (ws == NULL) {
+                       otx2_err("Failed to alloc memory for port=%d", i);
+                       rc = -ENOMEM;
+                       break;
+               }
+
+               ws->port = i;
+               base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12);
+               sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[0], base);
+               vws++;
+
+               base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12);
+               sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[1], base);
+               vws++;
+
+               event_dev->data->ports[i] = ws;
+       }
+
+       if (rc < 0) {
+               sso_lf_cfg(dev, mbox, SSO_LF_GWS, nb_lf, false);
+               sso_hw_lf_cfg(mbox, SSO_LF_GWS, nb_lf, false);
+       }
+
+       return rc;
+}
+
 static int
 sso_configure_ports(const struct rte_eventdev *event_dev)
 {
@@ -465,6 +566,7 @@ sso_lf_teardown(struct otx2_sso_evdev *dev,
                break;
        case SSO_LF_GWS:
                nb_lf = dev->nb_event_ports;
+               nb_lf *= dev->dual_ws ? 2 : 1;
                break;
        default:
                return;
@@ -530,7 +632,12 @@ otx2_sso_configure(const struct rte_eventdev *event_dev)
        dev->nb_event_queues = conf->nb_event_queues;
        dev->nb_event_ports = conf->nb_event_ports;
 
-       if (sso_configure_ports(event_dev)) {
+       if (dev->dual_ws)
+               rc = sso_configure_dual_ports(event_dev);
+       else
+               rc = sso_configure_ports(event_dev);
+
+       if (rc < 0) {
                otx2_err("Failed to configure event ports");
                return -ENODEV;
        }
@@ -660,14 +767,27 @@ otx2_sso_port_setup(struct rte_eventdev *event_dev, uint8_t port_id,
        /* Set get_work timeout for HWS */
        val = NSEC2USEC(dev->deq_tmo_ns) - 1;
 
-       struct otx2_ssogws *ws = event_dev->data->ports[port_id];
-       uintptr_t base = OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op);
-
-       rte_memcpy(ws->grps_base, grps_base,
-                  sizeof(uintptr_t) * OTX2_SSO_MAX_VHGRP);
-       ws->fc_mem = dev->fc_mem;
-       ws->xaq_lmt = dev->xaq_lmt;
-       otx2_write64(val, base + SSOW_LF_GWS_NW_TIM);
+       if (dev->dual_ws) {
+               struct otx2_ssogws_dual *ws = event_dev->data->ports[port_id];
+
+               rte_memcpy(ws->grps_base, grps_base,
+                          sizeof(uintptr_t) * OTX2_SSO_MAX_VHGRP);
+               ws->fc_mem = dev->fc_mem;
+               ws->xaq_lmt = dev->xaq_lmt;
+               otx2_write64(val, OTX2_SSOW_GET_BASE_ADDR(
+                            ws->ws_state[0].getwrk_op) + SSOW_LF_GWS_NW_TIM);
+               otx2_write64(val, OTX2_SSOW_GET_BASE_ADDR(
+                            ws->ws_state[1].getwrk_op) + SSOW_LF_GWS_NW_TIM);
+       } else {
+               struct otx2_ssogws *ws = event_dev->data->ports[port_id];
+               uintptr_t base = OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op);
+
+               rte_memcpy(ws->grps_base, grps_base,
+                          sizeof(uintptr_t) * OTX2_SSO_MAX_VHGRP);
+               ws->fc_mem = dev->fc_mem;
+               ws->xaq_lmt = dev->xaq_lmt;
+               otx2_write64(val, base + SSOW_LF_GWS_NW_TIM);
+       }
 
        otx2_sso_dbg("Port=%d ws=%p", port_id, event_dev->data->ports[port_id]);
 
@@ -735,18 +855,37 @@ otx2_sso_dump(struct rte_eventdev *event_dev, FILE *f)
        uint8_t queue;
        uint8_t port;
 
+       fprintf(f, "[%s] SSO running in [%s] mode\n", __func__, dev->dual_ws ?
+               "dual_ws" : "single_ws");
        /* Dump SSOW registers */
        for (port = 0; port < dev->nb_event_ports; port++) {
-               fprintf(f, "[%s]SSO single workslot[%d] dump\n",
-                       __func__, port);
-               ssogws_dump(event_dev->data->ports[port], f);
+               if (dev->dual_ws) {
+                       struct otx2_ssogws_dual *ws =
+                               event_dev->data->ports[port];
+
+                       fprintf(f, "[%s] SSO dual workslot[%d] vws[%d] dump\n",
+                               __func__, port, 0);
+                       ssogws_dump((struct otx2_ssogws *)&ws->ws_state[0], f);
+                       fprintf(f, "[%s]SSO dual workslot[%d] vws[%d] dump\n",
+                               __func__, port, 1);
+                       ssogws_dump((struct otx2_ssogws *)&ws->ws_state[1], f);
+               } else {
+                       fprintf(f, "[%s]SSO single workslot[%d] dump\n",
+                               __func__, port);
+                       ssogws_dump(event_dev->data->ports[port], f);
+               }
        }
 
        /* Dump SSO registers */
        for (queue = 0; queue < dev->nb_event_queues; queue++) {
                fprintf(f, "[%s]SSO group[%d] dump\n", __func__, queue);
-               struct otx2_ssogws *ws = event_dev->data->ports[0];
-               ssoggrp_dump(ws->grps_base[queue], f);
+               if (dev->dual_ws) {
+                       struct otx2_ssogws_dual *ws = event_dev->data->ports[0];
+                       ssoggrp_dump(ws->grps_base[queue], f);
+               } else {
+                       struct otx2_ssogws *ws = event_dev->data->ports[0];
+                       ssoggrp_dump(ws->grps_base[queue], f);
+               }
        }
 }
 
@@ -879,7 +1018,14 @@ otx2_sso_init(struct rte_eventdev *event_dev)
                goto otx2_npa_lf_uninit;
        }
 
+       dev->dual_ws = 1;
        sso_parse_devargs(dev, pci_dev->device.devargs);
+       if (dev->dual_ws) {
+               otx2_sso_dbg("Using dual workslot mode");
+               dev->max_event_ports = dev->max_event_ports / 2;
+       } else {
+               otx2_sso_dbg("Using single workslot mode");
+       }
 
        otx2_sso_pf_func_set(dev->pf_func);
        otx2_sso_dbg("Initializing %s max_queues=%d max_ports=%d",
index 6f8d709..72de9ac 100644 (file)
@@ -121,6 +121,7 @@ struct otx2_sso_evdev {
        uint64_t nb_xaq_cfg;
        rte_iova_t fc_iova;
        struct rte_mempool *xaq_pool;
+       uint8_t dual_ws;
        /* Dev args */
        uint32_t xae_cnt;
        /* HW const */
@@ -155,6 +156,22 @@ struct otx2_ssogws {
        uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
 } __rte_cache_aligned;
 
+struct otx2_ssogws_state {
+       OTX2_SSOGWS_OPS;
+};
+
+struct otx2_ssogws_dual {
+       /* Get Work Fastpath data */
+       struct otx2_ssogws_state ws_state[2]; /* Ping and Pong */
+       uint8_t swtag_req;
+       uint8_t vws; /* Ping pong bit */
+       uint8_t port;
+       /* Add Work Fastpath data */
+       uint64_t xaq_lmt __rte_cache_aligned;
+       uint64_t *fc_mem;
+       uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
+} __rte_cache_aligned;
+
 static inline struct otx2_sso_evdev *
 sso_pmd_priv(const struct rte_eventdev *event_dev)
 {
index 7df21cc..7379bb1 100644 (file)
@@ -117,7 +117,7 @@ sso_register_irqs(const struct rte_eventdev *event_dev)
        int i, rc = -EINVAL;
        uint8_t nb_ports;
 
-       nb_ports = dev->nb_event_ports;
+       nb_ports = dev->nb_event_ports * (dev->dual_ws ? 2 : 1);
 
        for (i = 0; i < dev->nb_event_queues; i++) {
                if (dev->sso_msixoff[i] == MSIX_VECTOR_INVALID) {
@@ -159,7 +159,7 @@ sso_unregister_irqs(const struct rte_eventdev *event_dev)
        uint8_t nb_ports;
        int i;
 
-       nb_ports = dev->nb_event_ports;
+       nb_ports = dev->nb_event_ports * (dev->dual_ws ? 2 : 1);
 
        for (i = 0; i < dev->nb_event_queues; i++) {
                uintptr_t base = dev->bar2 + (RVU_BLOCK_ADDR_SSO << 20 |
index df76a13..9d7c694 100644 (file)
@@ -76,11 +76,29 @@ otx2_sso_xstats_get(const struct rte_eventdev *event_dev,
                xstats = sso_hws_xstats;
 
                req_rsp = otx2_mbox_alloc_msg_sso_hws_get_stats(mbox);
-                       ((struct sso_info_req *)req_rsp)->hws = queue_port_id;
+                       ((struct sso_info_req *)req_rsp)->hws = dev->dual_ws ?
+                                       2 * queue_port_id : queue_port_id;
                rc = otx2_mbox_process_msg(mbox, (void **)&req_rsp);
                if (rc < 0)
                        goto invalid_value;
 
+               if (dev->dual_ws) {
+                       for (i = 0; i < n && i < xstats_mode_count; i++) {
+                               xstat = &xstats[ids[i] - start_offset];
+                               values[i] = *(uint64_t *)
+                                       ((char *)req_rsp + xstat->offset);
+                               values[i] = (values[i] >> xstat->shift) &
+                                       xstat->mask;
+                       }
+
+                       req_rsp = otx2_mbox_alloc_msg_sso_hws_get_stats(mbox);
+                       ((struct sso_info_req *)req_rsp)->hws =
+                                       (2 * queue_port_id) + 1;
+                       rc = otx2_mbox_process_msg(mbox, (void **)&req_rsp);
+                       if (rc < 0)
+                               goto invalid_value;
+               }
+
                break;
        case RTE_EVENT_DEV_XSTATS_QUEUE:
                if (queue_port_id >= (signed int)dev->nb_event_queues)
@@ -107,7 +125,11 @@ otx2_sso_xstats_get(const struct rte_eventdev *event_dev,
                value = *(uint64_t *)((char *)req_rsp + xstat->offset);
                value = (value >> xstat->shift) & xstat->mask;
 
-               values[i] = value;
+               if ((mode == RTE_EVENT_DEV_XSTATS_PORT) && dev->dual_ws)
+                       values[i] += value;
+               else
+                       values[i] = value;
+
                values[i] -= xstat->reset_snap[queue_port_id];
        }
 
@@ -143,11 +165,30 @@ otx2_sso_xstats_reset(struct rte_eventdev *event_dev,
                xstats = sso_hws_xstats;
 
                req_rsp = otx2_mbox_alloc_msg_sso_hws_get_stats(mbox);
-               ((struct sso_info_req *)req_rsp)->hws = queue_port_id;
+               ((struct sso_info_req *)req_rsp)->hws = dev->dual_ws ?
+                       2 * queue_port_id : queue_port_id;
                rc = otx2_mbox_process_msg(mbox, (void **)&req_rsp);
                if (rc < 0)
                        goto invalid_value;
 
+               if (dev->dual_ws) {
+                       for (i = 0; i < n && i < xstats_mode_count; i++) {
+                               xstat = &xstats[ids[i] - start_offset];
+                               xstat->reset_snap[queue_port_id] = *(uint64_t *)
+                                       ((char *)req_rsp + xstat->offset);
+                               xstat->reset_snap[queue_port_id] =
+                                       (xstat->reset_snap[queue_port_id] >>
+                                               xstat->shift) & xstat->mask;
+                       }
+
+                       req_rsp = otx2_mbox_alloc_msg_sso_hws_get_stats(mbox);
+                       ((struct sso_info_req *)req_rsp)->hws =
+                                       (2 * queue_port_id) + 1;
+                       rc = otx2_mbox_process_msg(mbox, (void **)&req_rsp);
+                       if (rc < 0)
+                               goto invalid_value;
+               }
+
                break;
        case RTE_EVENT_DEV_XSTATS_QUEUE:
                if (queue_port_id >= (signed int)dev->nb_event_queues)
@@ -174,7 +215,10 @@ otx2_sso_xstats_reset(struct rte_eventdev *event_dev,
                value = *(uint64_t *)((char *)req_rsp + xstat->offset);
                value = (value >> xstat->shift) & xstat->mask;
 
-               xstat->reset_snap[queue_port_id] =  value;
+               if ((mode == RTE_EVENT_DEV_XSTATS_PORT) && dev->dual_ws)
+                       xstat->reset_snap[queue_port_id] += value;
+               else
+                       xstat->reset_snap[queue_port_id] =  value;
        }
        return i;
 invalid_value: