event/octeontx2: improve chunk pool performance
[dpdk.git] / drivers / event / octeontx2 / otx2_tim_evdev.c
index 44a939a..206ed43 100644 (file)
@@ -2,6 +2,7 @@
  * Copyright(C) 2019 Marvell International Ltd.
  */
 
+#include <rte_kvargs.h>
 #include <rte_malloc.h>
 #include <rte_mbuf_pool_ops.h>
 
 
 static struct rte_event_timer_adapter_ops otx2_tim_ops;
 
+static inline int
+tim_get_msix_offsets(void)
+{
+       struct otx2_tim_evdev *dev = tim_priv_get();
+       struct otx2_mbox *mbox = dev->mbox;
+       struct msix_offset_rsp *msix_rsp;
+       int i, rc;
+
+       /* Get TIM MSIX vector offsets */
+       otx2_mbox_alloc_msg_msix_offset(mbox);
+       rc = otx2_mbox_process_msg(mbox, (void *)&msix_rsp);
+
+       for (i = 0; i < dev->nb_rings; i++)
+               dev->tim_msixoff[i] = msix_rsp->timlf_msixoff[i];
+
+       return rc;
+}
+
+static void
+tim_set_fp_ops(struct otx2_tim_ring *tim_ring)
+{
+       uint8_t prod_flag = !tim_ring->prod_type_sp;
+
+       /* [MOD/AND] [DFB/FB] [SP][MP]*/
+       const rte_event_timer_arm_burst_t arm_burst[2][2][2][2] = {
+#define FP(_name, _f4, _f3, _f2, _f1, flags) \
+               [_f4][_f3][_f2][_f1] = otx2_tim_arm_burst_ ## _name,
+TIM_ARM_FASTPATH_MODES
+#undef FP
+       };
+
+       const rte_event_timer_arm_tmo_tick_burst_t arm_tmo_burst[2][2][2] = {
+#define FP(_name, _f3, _f2, _f1, flags) \
+               [_f3][_f2][_f1] = otx2_tim_arm_tmo_tick_burst_ ## _name,
+TIM_ARM_TMO_FASTPATH_MODES
+#undef FP
+       };
+
+       otx2_tim_ops.arm_burst =
+               arm_burst[tim_ring->enable_stats][tim_ring->optimized]
+                       [tim_ring->ena_dfb][prod_flag];
+       otx2_tim_ops.arm_tmo_tick_burst =
+               arm_tmo_burst[tim_ring->enable_stats][tim_ring->optimized]
+                       [tim_ring->ena_dfb];
+       otx2_tim_ops.cancel_burst = otx2_tim_timer_cancel_burst;
+}
+
+static void
+otx2_tim_ring_info_get(const struct rte_event_timer_adapter *adptr,
+                      struct rte_event_timer_adapter_info *adptr_info)
+{
+       struct otx2_tim_ring *tim_ring = adptr->data->adapter_priv;
+
+       adptr_info->max_tmo_ns = tim_ring->max_tout;
+       adptr_info->min_resolution_ns = tim_ring->tck_nsec;
+       rte_memcpy(&adptr_info->conf, &adptr->data->conf,
+                  sizeof(struct rte_event_timer_adapter_conf));
+}
+
 static void
 tim_optimze_bkt_param(struct otx2_tim_ring *tim_ring)
 {
@@ -64,6 +124,7 @@ tim_chnk_pool_create(struct otx2_tim_ring *tim_ring,
        char pool_name[25];
        int rc;
 
+       cache_sz /= rte_lcore_count();
        /* Create chunk pool. */
        if (rcfg->flags & RTE_EVENT_TIMER_ADAPTER_F_SP_PUT) {
                mp_flags = MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET;
@@ -77,33 +138,44 @@ tim_chnk_pool_create(struct otx2_tim_ring *tim_ring,
        if (cache_sz > RTE_MEMPOOL_CACHE_MAX_SIZE)
                cache_sz = RTE_MEMPOOL_CACHE_MAX_SIZE;
 
-       /* NPA need not have cache as free is not visible to SW */
-       tim_ring->chunk_pool = rte_mempool_create_empty(pool_name,
-                                                       tim_ring->nb_chunks,
-                                                       tim_ring->chunk_sz,
-                                                       0, 0, rte_socket_id(),
-                                                       mp_flags);
+       if (!tim_ring->disable_npa) {
+               tim_ring->chunk_pool = rte_mempool_create_empty(pool_name,
+                               tim_ring->nb_chunks, tim_ring->chunk_sz,
+                               cache_sz, 0, rte_socket_id(), mp_flags);
 
-       if (tim_ring->chunk_pool == NULL) {
-               otx2_err("Unable to create chunkpool.");
-               return -ENOMEM;
-       }
+               if (tim_ring->chunk_pool == NULL) {
+                       otx2_err("Unable to create chunkpool.");
+                       return -ENOMEM;
+               }
 
-       rc = rte_mempool_set_ops_byname(tim_ring->chunk_pool,
-                                       rte_mbuf_platform_mempool_ops(), NULL);
-       if (rc < 0) {
-               otx2_err("Unable to set chunkpool ops");
-               goto free;
-       }
+               rc = rte_mempool_set_ops_byname(tim_ring->chunk_pool,
+                                               rte_mbuf_platform_mempool_ops(),
+                                               NULL);
+               if (rc < 0) {
+                       otx2_err("Unable to set chunkpool ops");
+                       goto free;
+               }
 
-       rc = rte_mempool_populate_default(tim_ring->chunk_pool);
-       if (rc < 0) {
-               otx2_err("Unable to set populate chunkpool.");
-               goto free;
+               rc = rte_mempool_populate_default(tim_ring->chunk_pool);
+               if (rc < 0) {
+                       otx2_err("Unable to set populate chunkpool.");
+                       goto free;
+               }
+               tim_ring->aura = npa_lf_aura_handle_to_aura(
+                               tim_ring->chunk_pool->pool_id);
+               tim_ring->ena_dfb = 0;
+       } else {
+               tim_ring->chunk_pool = rte_mempool_create(pool_name,
+                               tim_ring->nb_chunks, tim_ring->chunk_sz,
+                               cache_sz, 0, NULL, NULL, NULL, NULL,
+                               rte_socket_id(),
+                               mp_flags);
+               if (tim_ring->chunk_pool == NULL) {
+                       otx2_err("Unable to create chunkpool.");
+                       return -ENOMEM;
+               }
+               tim_ring->ena_dfb = 1;
        }
-       tim_ring->aura = npa_lf_aura_handle_to_aura(
-                                               tim_ring->chunk_pool->pool_id);
-       tim_ring->ena_dfb = 0;
 
        return 0;
 
@@ -183,7 +255,7 @@ otx2_tim_ring_create(struct rte_event_timer_adapter *adptr)
        struct tim_lf_alloc_req *req;
        struct tim_lf_alloc_rsp *rsp;
        uint64_t nb_timers;
-       int rc;
+       int i, rc;
 
        if (dev == NULL)
                return -ENODEV;
@@ -227,8 +299,23 @@ otx2_tim_ring_create(struct rte_event_timer_adapter *adptr)
        tim_ring->tck_nsec = RTE_ALIGN_MUL_CEIL(rcfg->timer_tick_ns, 10);
        tim_ring->max_tout = rcfg->max_tmo_ns;
        tim_ring->nb_bkts = (tim_ring->max_tout / tim_ring->tck_nsec);
-       tim_ring->chunk_sz = OTX2_TIM_RING_DEF_CHUNK_SZ;
+       tim_ring->chunk_sz = dev->chunk_sz;
        nb_timers = rcfg->nb_timers;
+       tim_ring->disable_npa = dev->disable_npa;
+       tim_ring->enable_stats = dev->enable_stats;
+
+       for (i = 0; i < dev->ring_ctl_cnt ; i++) {
+               struct otx2_tim_ctl *ring_ctl = &dev->ring_ctl_data[i];
+
+               if (ring_ctl->ring == tim_ring->ring_id) {
+                       tim_ring->chunk_sz = ring_ctl->chunk_slots ?
+                               ((uint32_t)(ring_ctl->chunk_slots + 1) *
+                                OTX2_TIM_CHUNK_ALIGNMENT) : tim_ring->chunk_sz;
+                       tim_ring->enable_stats = ring_ctl->enable_stats;
+                       tim_ring->disable_npa = ring_ctl->disable_npa;
+               }
+       }
+
        tim_ring->nb_chunks = nb_timers / OTX2_TIM_NB_CHUNK_SLOTS(
                                                        tim_ring->chunk_sz);
        tim_ring->nb_chunk_slots = OTX2_TIM_NB_CHUNK_SLOTS(tim_ring->chunk_sz);
@@ -274,10 +361,22 @@ otx2_tim_ring_create(struct rte_event_timer_adapter *adptr)
        tim_ring->base = dev->bar2 +
                (RVU_BLOCK_ADDR_TIM << 20 | tim_ring->ring_id << 12);
 
+       rc = tim_register_irq(tim_ring->ring_id);
+       if (rc < 0)
+               goto chnk_mem_err;
+
        otx2_write64((uint64_t)tim_ring->bkt,
                     tim_ring->base + TIM_LF_RING_BASE);
        otx2_write64(tim_ring->aura, tim_ring->base + TIM_LF_RING_AURA);
 
+       /* Set fastpath ops. */
+       tim_set_fp_ops(tim_ring);
+
+       /* Update SSO xae count. */
+       sso_updt_xae_cnt(sso_pmd_priv(dev->event_dev), (void *)&nb_timers,
+                        RTE_EVENT_TYPE_TIMER);
+       sso_xae_reconfigure(dev->event_dev);
+
        return rc;
 
 chnk_mem_err:
@@ -291,6 +390,69 @@ rng_mem_err:
        return rc;
 }
 
+static int
+otx2_tim_ring_start(const struct rte_event_timer_adapter *adptr)
+{
+       struct otx2_tim_ring *tim_ring = adptr->data->adapter_priv;
+       struct otx2_tim_evdev *dev = tim_priv_get();
+       struct tim_enable_rsp *rsp;
+       struct tim_ring_req *req;
+       int rc;
+
+       if (dev == NULL)
+               return -ENODEV;
+
+       req = otx2_mbox_alloc_msg_tim_enable_ring(dev->mbox);
+       req->ring = tim_ring->ring_id;
+
+       rc = otx2_mbox_process_msg(dev->mbox, (void **)&rsp);
+       if (rc < 0) {
+               tim_err_desc(rc);
+               goto fail;
+       }
+#ifdef RTE_ARM_EAL_RDTSC_USE_PMU
+       uint64_t tenns_stmp, tenns_diff;
+       uint64_t pmu_stmp;
+
+       pmu_stmp = rte_rdtsc();
+       asm volatile("mrs %0, cntvct_el0" : "=r" (tenns_stmp));
+
+       tenns_diff = tenns_stmp - rsp->timestarted;
+       pmu_stmp = pmu_stmp - (NSEC2TICK(tenns_diff  * 10, rte_get_timer_hz()));
+       tim_ring->ring_start_cyc = pmu_stmp;
+#else
+       tim_ring->ring_start_cyc = rsp->timestarted;
+#endif
+       tim_ring->tck_int = NSEC2TICK(tim_ring->tck_nsec, rte_get_timer_hz());
+       tim_ring->fast_div = rte_reciprocal_value_u64(tim_ring->tck_int);
+
+fail:
+       return rc;
+}
+
+static int
+otx2_tim_ring_stop(const struct rte_event_timer_adapter *adptr)
+{
+       struct otx2_tim_ring *tim_ring = adptr->data->adapter_priv;
+       struct otx2_tim_evdev *dev = tim_priv_get();
+       struct tim_ring_req *req;
+       int rc;
+
+       if (dev == NULL)
+               return -ENODEV;
+
+       req = otx2_mbox_alloc_msg_tim_disable_ring(dev->mbox);
+       req->ring = tim_ring->ring_id;
+
+       rc = otx2_mbox_process(dev->mbox);
+       if (rc < 0) {
+               tim_err_desc(rc);
+               rc = -EBUSY;
+       }
+
+       return rc;
+}
+
 static int
 otx2_tim_ring_free(struct rte_event_timer_adapter *adptr)
 {
@@ -302,6 +464,8 @@ otx2_tim_ring_free(struct rte_event_timer_adapter *adptr)
        if (dev == NULL)
                return -ENODEV;
 
+       tim_unregister_irq(tim_ring->ring_id);
+
        req = otx2_mbox_alloc_msg_tim_lf_free(dev->mbox);
        req->ring = tim_ring->ring_id;
 
@@ -318,6 +482,30 @@ otx2_tim_ring_free(struct rte_event_timer_adapter *adptr)
        return 0;
 }
 
+static int
+otx2_tim_stats_get(const struct rte_event_timer_adapter *adapter,
+                  struct rte_event_timer_adapter_stats *stats)
+{
+       struct otx2_tim_ring *tim_ring = adapter->data->adapter_priv;
+       uint64_t bkt_cyc = rte_rdtsc() - tim_ring->ring_start_cyc;
+
+
+       stats->evtim_exp_count = rte_atomic64_read(&tim_ring->arm_cnt);
+       stats->ev_enq_count = stats->evtim_exp_count;
+       stats->adapter_tick_count = rte_reciprocal_divide_u64(bkt_cyc,
+                               &tim_ring->fast_div);
+       return 0;
+}
+
+static int
+otx2_tim_stats_reset(const struct rte_event_timer_adapter *adapter)
+{
+       struct otx2_tim_ring *tim_ring = adapter->data->adapter_priv;
+
+       rte_atomic64_clear(&tim_ring->arm_cnt);
+       return 0;
+}
+
 int
 otx2_tim_caps_get(const struct rte_eventdev *evdev, uint64_t flags,
                  uint32_t *caps,
@@ -326,11 +514,20 @@ otx2_tim_caps_get(const struct rte_eventdev *evdev, uint64_t flags,
        struct otx2_tim_evdev *dev = tim_priv_get();
 
        RTE_SET_USED(flags);
+
        if (dev == NULL)
                return -ENODEV;
 
        otx2_tim_ops.init = otx2_tim_ring_create;
        otx2_tim_ops.uninit = otx2_tim_ring_free;
+       otx2_tim_ops.start = otx2_tim_ring_start;
+       otx2_tim_ops.stop = otx2_tim_ring_stop;
+       otx2_tim_ops.get_info   = otx2_tim_ring_info_get;
+
+       if (dev->enable_stats) {
+               otx2_tim_ops.stats_get   = otx2_tim_stats_get;
+               otx2_tim_ops.stats_reset = otx2_tim_stats_reset;
+       }
 
        /* Store evdev pointer for later use. */
        dev->event_dev = (struct rte_eventdev *)(uintptr_t)evdev;
@@ -340,10 +537,122 @@ otx2_tim_caps_get(const struct rte_eventdev *evdev, uint64_t flags,
        return 0;
 }
 
+#define OTX2_TIM_DISABLE_NPA   "tim_disable_npa"
+#define OTX2_TIM_CHNK_SLOTS    "tim_chnk_slots"
+#define OTX2_TIM_STATS_ENA     "tim_stats_ena"
+#define OTX2_TIM_RINGS_LMT     "tim_rings_lmt"
+#define OTX2_TIM_RING_CTL      "tim_ring_ctl"
+
+static void
+tim_parse_ring_param(char *value, void *opaque)
+{
+       struct otx2_tim_evdev *dev = opaque;
+       struct otx2_tim_ctl ring_ctl = {0};
+       char *tok = strtok(value, "-");
+       struct otx2_tim_ctl *old_ptr;
+       uint16_t *val;
+
+       val = (uint16_t *)&ring_ctl;
+
+       if (!strlen(value))
+               return;
+
+       while (tok != NULL) {
+               *val = atoi(tok);
+               tok = strtok(NULL, "-");
+               val++;
+       }
+
+       if (val != (&ring_ctl.enable_stats + 1)) {
+               otx2_err(
+               "Invalid ring param expected [ring-chunk_sz-disable_npa-enable_stats]");
+               return;
+       }
+
+       dev->ring_ctl_cnt++;
+       old_ptr = dev->ring_ctl_data;
+       dev->ring_ctl_data = rte_realloc(dev->ring_ctl_data,
+                                        sizeof(struct otx2_tim_ctl) *
+                                        dev->ring_ctl_cnt, 0);
+       if (dev->ring_ctl_data == NULL) {
+               dev->ring_ctl_data = old_ptr;
+               dev->ring_ctl_cnt--;
+               return;
+       }
+
+       dev->ring_ctl_data[dev->ring_ctl_cnt - 1] = ring_ctl;
+}
+
+static void
+tim_parse_ring_ctl_list(const char *value, void *opaque)
+{
+       char *s = strdup(value);
+       char *start = NULL;
+       char *end = NULL;
+       char *f = s;
+
+       while (*s) {
+               if (*s == '[')
+                       start = s;
+               else if (*s == ']')
+                       end = s;
+
+               if (start && start < end) {
+                       *end = 0;
+                       tim_parse_ring_param(start + 1, opaque);
+                       start = end;
+                       s = end;
+               }
+               s++;
+       }
+
+       free(f);
+}
+
+static int
+tim_parse_kvargs_dict(const char *key, const char *value, void *opaque)
+{
+       RTE_SET_USED(key);
+
+       /* Dict format [ring-chunk_sz-disable_npa-enable_stats] use '-' as ','
+        * isn't allowed. 0 represents default.
+        */
+       tim_parse_ring_ctl_list(value, opaque);
+
+       return 0;
+}
+
+static void
+tim_parse_devargs(struct rte_devargs *devargs, struct otx2_tim_evdev *dev)
+{
+       struct rte_kvargs *kvlist;
+
+       if (devargs == NULL)
+               return;
+
+       kvlist = rte_kvargs_parse(devargs->args, NULL);
+       if (kvlist == NULL)
+               return;
+
+       rte_kvargs_process(kvlist, OTX2_TIM_DISABLE_NPA,
+                          &parse_kvargs_flag, &dev->disable_npa);
+       rte_kvargs_process(kvlist, OTX2_TIM_CHNK_SLOTS,
+                          &parse_kvargs_value, &dev->chunk_slots);
+       rte_kvargs_process(kvlist, OTX2_TIM_STATS_ENA, &parse_kvargs_flag,
+                          &dev->enable_stats);
+       rte_kvargs_process(kvlist, OTX2_TIM_RINGS_LMT, &parse_kvargs_value,
+                          &dev->min_ring_cnt);
+       rte_kvargs_process(kvlist, OTX2_TIM_RING_CTL,
+                          &tim_parse_kvargs_dict, &dev);
+
+       rte_kvargs_free(kvlist);
+}
+
 void
 otx2_tim_init(struct rte_pci_device *pci_dev, struct otx2_dev *cmn_dev)
 {
        struct rsrc_attach_req *atch_req;
+       struct rsrc_detach_req *dtch_req;
        struct free_rsrcs_rsp *rsrc_cnt;
        const struct rte_memzone *mz;
        struct otx2_tim_evdev *dev;
@@ -365,6 +674,8 @@ otx2_tim_init(struct rte_pci_device *pci_dev, struct otx2_dev *cmn_dev)
        dev->mbox = cmn_dev->mbox;
        dev->bar2 = cmn_dev->bar2;
 
+       tim_parse_devargs(pci_dev->device.devargs, dev);
+
        otx2_mbox_alloc_msg_free_rsrc_cnt(dev->mbox);
        rc = otx2_mbox_process_msg(dev->mbox, (void *)&rsrc_cnt);
        if (rc < 0) {
@@ -372,7 +683,8 @@ otx2_tim_init(struct rte_pci_device *pci_dev, struct otx2_dev *cmn_dev)
                goto mz_free;
        }
 
-       dev->nb_rings = rsrc_cnt->tim;
+       dev->nb_rings = dev->min_ring_cnt ?
+               RTE_MIN(dev->min_ring_cnt, rsrc_cnt->tim) : rsrc_cnt->tim;
 
        if (!dev->nb_rings) {
                otx2_tim_dbg("No TIM Logical functions provisioned.");
@@ -389,8 +701,29 @@ otx2_tim_init(struct rte_pci_device *pci_dev, struct otx2_dev *cmn_dev)
                goto mz_free;
        }
 
+       rc = tim_get_msix_offsets();
+       if (rc < 0) {
+               otx2_err("Unable to get MSIX offsets for TIM.");
+               goto detach;
+       }
+
+       if (dev->chunk_slots &&
+           dev->chunk_slots <= OTX2_TIM_MAX_CHUNK_SLOTS &&
+           dev->chunk_slots >= OTX2_TIM_MIN_CHUNK_SLOTS) {
+               dev->chunk_sz = (dev->chunk_slots + 1) *
+                       OTX2_TIM_CHUNK_ALIGNMENT;
+       } else {
+               dev->chunk_sz = OTX2_TIM_RING_DEF_CHUNK_SZ;
+       }
+
        return;
 
+detach:
+       dtch_req = otx2_mbox_alloc_msg_detach_resources(dev->mbox);
+       dtch_req->partial = true;
+       dtch_req->timlfs = true;
+
+       otx2_mbox_process(dev->mbox);
 mz_free:
        rte_memzone_free(mz);
 }