event/octeontx2: improve chunk pool performance
[dpdk.git] / drivers / event / octeontx2 / otx2_tim_evdev.c
index a2cba09..206ed43 100644 (file)
@@ -29,6 +29,47 @@ tim_get_msix_offsets(void)
        return rc;
 }
 
+static void
+tim_set_fp_ops(struct otx2_tim_ring *tim_ring)
+{
+       uint8_t prod_flag = !tim_ring->prod_type_sp;
+
+       /* [MOD/AND] [DFB/FB] [SP][MP]*/
+       const rte_event_timer_arm_burst_t arm_burst[2][2][2][2] = {
+#define FP(_name, _f4, _f3, _f2, _f1, flags) \
+               [_f4][_f3][_f2][_f1] = otx2_tim_arm_burst_ ## _name,
+TIM_ARM_FASTPATH_MODES
+#undef FP
+       };
+
+       const rte_event_timer_arm_tmo_tick_burst_t arm_tmo_burst[2][2][2] = {
+#define FP(_name, _f3, _f2, _f1, flags) \
+               [_f3][_f2][_f1] = otx2_tim_arm_tmo_tick_burst_ ## _name,
+TIM_ARM_TMO_FASTPATH_MODES
+#undef FP
+       };
+
+       otx2_tim_ops.arm_burst =
+               arm_burst[tim_ring->enable_stats][tim_ring->optimized]
+                       [tim_ring->ena_dfb][prod_flag];
+       otx2_tim_ops.arm_tmo_tick_burst =
+               arm_tmo_burst[tim_ring->enable_stats][tim_ring->optimized]
+                       [tim_ring->ena_dfb];
+       otx2_tim_ops.cancel_burst = otx2_tim_timer_cancel_burst;
+}
+
+static void
+otx2_tim_ring_info_get(const struct rte_event_timer_adapter *adptr,
+                      struct rte_event_timer_adapter_info *adptr_info)
+{
+       struct otx2_tim_ring *tim_ring = adptr->data->adapter_priv;
+
+       adptr_info->max_tmo_ns = tim_ring->max_tout;
+       adptr_info->min_resolution_ns = tim_ring->tck_nsec;
+       rte_memcpy(&adptr_info->conf, &adptr->data->conf,
+                  sizeof(struct rte_event_timer_adapter_conf));
+}
+
 static void
 tim_optimze_bkt_param(struct otx2_tim_ring *tim_ring)
 {
@@ -83,6 +124,7 @@ tim_chnk_pool_create(struct otx2_tim_ring *tim_ring,
        char pool_name[25];
        int rc;
 
+       cache_sz /= rte_lcore_count();
        /* Create chunk pool. */
        if (rcfg->flags & RTE_EVENT_TIMER_ADAPTER_F_SP_PUT) {
                mp_flags = MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET;
@@ -97,10 +139,9 @@ tim_chnk_pool_create(struct otx2_tim_ring *tim_ring,
                cache_sz = RTE_MEMPOOL_CACHE_MAX_SIZE;
 
        if (!tim_ring->disable_npa) {
-               /* NPA need not have cache as free is not visible to SW */
                tim_ring->chunk_pool = rte_mempool_create_empty(pool_name,
                                tim_ring->nb_chunks, tim_ring->chunk_sz,
-                               0, 0, rte_socket_id(), mp_flags);
+                               cache_sz, 0, rte_socket_id(), mp_flags);
 
                if (tim_ring->chunk_pool == NULL) {
                        otx2_err("Unable to create chunkpool.");
@@ -214,7 +255,7 @@ otx2_tim_ring_create(struct rte_event_timer_adapter *adptr)
        struct tim_lf_alloc_req *req;
        struct tim_lf_alloc_rsp *rsp;
        uint64_t nb_timers;
-       int rc;
+       int i, rc;
 
        if (dev == NULL)
                return -ENODEV;
@@ -261,6 +302,19 @@ otx2_tim_ring_create(struct rte_event_timer_adapter *adptr)
        tim_ring->chunk_sz = dev->chunk_sz;
        nb_timers = rcfg->nb_timers;
        tim_ring->disable_npa = dev->disable_npa;
+       tim_ring->enable_stats = dev->enable_stats;
+
+       for (i = 0; i < dev->ring_ctl_cnt ; i++) {
+               struct otx2_tim_ctl *ring_ctl = &dev->ring_ctl_data[i];
+
+               if (ring_ctl->ring == tim_ring->ring_id) {
+                       tim_ring->chunk_sz = ring_ctl->chunk_slots ?
+                               ((uint32_t)(ring_ctl->chunk_slots + 1) *
+                                OTX2_TIM_CHUNK_ALIGNMENT) : tim_ring->chunk_sz;
+                       tim_ring->enable_stats = ring_ctl->enable_stats;
+                       tim_ring->disable_npa = ring_ctl->disable_npa;
+               }
+       }
 
        tim_ring->nb_chunks = nb_timers / OTX2_TIM_NB_CHUNK_SLOTS(
                                                        tim_ring->chunk_sz);
@@ -315,6 +369,9 @@ otx2_tim_ring_create(struct rte_event_timer_adapter *adptr)
                     tim_ring->base + TIM_LF_RING_BASE);
        otx2_write64(tim_ring->aura, tim_ring->base + TIM_LF_RING_AURA);
 
+       /* Set fastpath ops. */
+       tim_set_fp_ops(tim_ring);
+
        /* Update SSO xae count. */
        sso_updt_xae_cnt(sso_pmd_priv(dev->event_dev), (void *)&nb_timers,
                         RTE_EVENT_TYPE_TIMER);
@@ -333,6 +390,69 @@ rng_mem_err:
        return rc;
 }
 
+static int
+otx2_tim_ring_start(const struct rte_event_timer_adapter *adptr)
+{
+       struct otx2_tim_ring *tim_ring = adptr->data->adapter_priv;
+       struct otx2_tim_evdev *dev = tim_priv_get();
+       struct tim_enable_rsp *rsp;
+       struct tim_ring_req *req;
+       int rc;
+
+       if (dev == NULL)
+               return -ENODEV;
+
+       req = otx2_mbox_alloc_msg_tim_enable_ring(dev->mbox);
+       req->ring = tim_ring->ring_id;
+
+       rc = otx2_mbox_process_msg(dev->mbox, (void **)&rsp);
+       if (rc < 0) {
+               tim_err_desc(rc);
+               goto fail;
+       }
+#ifdef RTE_ARM_EAL_RDTSC_USE_PMU
+       uint64_t tenns_stmp, tenns_diff;
+       uint64_t pmu_stmp;
+
+       pmu_stmp = rte_rdtsc();
+       asm volatile("mrs %0, cntvct_el0" : "=r" (tenns_stmp));
+
+       tenns_diff = tenns_stmp - rsp->timestarted;
+       pmu_stmp = pmu_stmp - (NSEC2TICK(tenns_diff  * 10, rte_get_timer_hz()));
+       tim_ring->ring_start_cyc = pmu_stmp;
+#else
+       tim_ring->ring_start_cyc = rsp->timestarted;
+#endif
+       tim_ring->tck_int = NSEC2TICK(tim_ring->tck_nsec, rte_get_timer_hz());
+       tim_ring->fast_div = rte_reciprocal_value_u64(tim_ring->tck_int);
+
+fail:
+       return rc;
+}
+
+static int
+otx2_tim_ring_stop(const struct rte_event_timer_adapter *adptr)
+{
+       struct otx2_tim_ring *tim_ring = adptr->data->adapter_priv;
+       struct otx2_tim_evdev *dev = tim_priv_get();
+       struct tim_ring_req *req;
+       int rc;
+
+       if (dev == NULL)
+               return -ENODEV;
+
+       req = otx2_mbox_alloc_msg_tim_disable_ring(dev->mbox);
+       req->ring = tim_ring->ring_id;
+
+       rc = otx2_mbox_process(dev->mbox);
+       if (rc < 0) {
+               tim_err_desc(rc);
+               rc = -EBUSY;
+       }
+
+       return rc;
+}
+
 static int
 otx2_tim_ring_free(struct rte_event_timer_adapter *adptr)
 {
@@ -362,6 +482,30 @@ otx2_tim_ring_free(struct rte_event_timer_adapter *adptr)
        return 0;
 }
 
+static int
+otx2_tim_stats_get(const struct rte_event_timer_adapter *adapter,
+                  struct rte_event_timer_adapter_stats *stats)
+{
+       struct otx2_tim_ring *tim_ring = adapter->data->adapter_priv;
+       uint64_t bkt_cyc = rte_rdtsc() - tim_ring->ring_start_cyc;
+
+
+       stats->evtim_exp_count = rte_atomic64_read(&tim_ring->arm_cnt);
+       stats->ev_enq_count = stats->evtim_exp_count;
+       stats->adapter_tick_count = rte_reciprocal_divide_u64(bkt_cyc,
+                               &tim_ring->fast_div);
+       return 0;
+}
+
+static int
+otx2_tim_stats_reset(const struct rte_event_timer_adapter *adapter)
+{
+       struct otx2_tim_ring *tim_ring = adapter->data->adapter_priv;
+
+       rte_atomic64_clear(&tim_ring->arm_cnt);
+       return 0;
+}
+
 int
 otx2_tim_caps_get(const struct rte_eventdev *evdev, uint64_t flags,
                  uint32_t *caps,
@@ -370,11 +514,20 @@ otx2_tim_caps_get(const struct rte_eventdev *evdev, uint64_t flags,
        struct otx2_tim_evdev *dev = tim_priv_get();
 
        RTE_SET_USED(flags);
+
        if (dev == NULL)
                return -ENODEV;
 
        otx2_tim_ops.init = otx2_tim_ring_create;
        otx2_tim_ops.uninit = otx2_tim_ring_free;
+       otx2_tim_ops.start = otx2_tim_ring_start;
+       otx2_tim_ops.stop = otx2_tim_ring_stop;
+       otx2_tim_ops.get_info   = otx2_tim_ring_info_get;
+
+       if (dev->enable_stats) {
+               otx2_tim_ops.stats_get   = otx2_tim_stats_get;
+               otx2_tim_ops.stats_reset = otx2_tim_stats_reset;
+       }
 
        /* Store evdev pointer for later use. */
        dev->event_dev = (struct rte_eventdev *)(uintptr_t)evdev;
@@ -386,6 +539,88 @@ otx2_tim_caps_get(const struct rte_eventdev *evdev, uint64_t flags,
 
 #define OTX2_TIM_DISABLE_NPA   "tim_disable_npa"
 #define OTX2_TIM_CHNK_SLOTS    "tim_chnk_slots"
+#define OTX2_TIM_STATS_ENA     "tim_stats_ena"
+#define OTX2_TIM_RINGS_LMT     "tim_rings_lmt"
+#define OTX2_TIM_RING_CTL      "tim_ring_ctl"
+
+static void
+tim_parse_ring_param(char *value, void *opaque)
+{
+       struct otx2_tim_evdev *dev = opaque;
+       struct otx2_tim_ctl ring_ctl = {0};
+       char *tok = strtok(value, "-");
+       struct otx2_tim_ctl *old_ptr;
+       uint16_t *val;
+
+       val = (uint16_t *)&ring_ctl;
+
+       if (!strlen(value))
+               return;
+
+       while (tok != NULL) {
+               *val = atoi(tok);
+               tok = strtok(NULL, "-");
+               val++;
+       }
+
+       if (val != (&ring_ctl.enable_stats + 1)) {
+               otx2_err(
+               "Invalid ring param expected [ring-chunk_sz-disable_npa-enable_stats]");
+               return;
+       }
+
+       dev->ring_ctl_cnt++;
+       old_ptr = dev->ring_ctl_data;
+       dev->ring_ctl_data = rte_realloc(dev->ring_ctl_data,
+                                        sizeof(struct otx2_tim_ctl) *
+                                        dev->ring_ctl_cnt, 0);
+       if (dev->ring_ctl_data == NULL) {
+               dev->ring_ctl_data = old_ptr;
+               dev->ring_ctl_cnt--;
+               return;
+       }
+
+       dev->ring_ctl_data[dev->ring_ctl_cnt - 1] = ring_ctl;
+}
+
+static void
+tim_parse_ring_ctl_list(const char *value, void *opaque)
+{
+       char *s = strdup(value);
+       char *start = NULL;
+       char *end = NULL;
+       char *f = s;
+
+       while (*s) {
+               if (*s == '[')
+                       start = s;
+               else if (*s == ']')
+                       end = s;
+
+               if (start && start < end) {
+                       *end = 0;
+                       tim_parse_ring_param(start + 1, opaque);
+                       start = end;
+                       s = end;
+               }
+               s++;
+       }
+
+       free(f);
+}
+
+static int
+tim_parse_kvargs_dict(const char *key, const char *value, void *opaque)
+{
+       RTE_SET_USED(key);
+
+       /* Dict format [ring-chunk_sz-disable_npa-enable_stats] use '-' as ','
+        * isn't allowed. 0 represents default.
+        */
+       tim_parse_ring_ctl_list(value, opaque);
+
+       return 0;
+}
 
 static void
 tim_parse_devargs(struct rte_devargs *devargs, struct otx2_tim_evdev *dev)
@@ -403,6 +638,14 @@ tim_parse_devargs(struct rte_devargs *devargs, struct otx2_tim_evdev *dev)
                           &parse_kvargs_flag, &dev->disable_npa);
        rte_kvargs_process(kvlist, OTX2_TIM_CHNK_SLOTS,
                           &parse_kvargs_value, &dev->chunk_slots);
+       rte_kvargs_process(kvlist, OTX2_TIM_STATS_ENA, &parse_kvargs_flag,
+                          &dev->enable_stats);
+       rte_kvargs_process(kvlist, OTX2_TIM_RINGS_LMT, &parse_kvargs_value,
+                          &dev->min_ring_cnt);
+       rte_kvargs_process(kvlist, OTX2_TIM_RING_CTL,
+                          &tim_parse_kvargs_dict, &dev);
+
+       rte_kvargs_free(kvlist);
 }
 
 void
@@ -440,7 +683,8 @@ otx2_tim_init(struct rte_pci_device *pci_dev, struct otx2_dev *cmn_dev)
                goto mz_free;
        }
 
-       dev->nb_rings = rsrc_cnt->tim;
+       dev->nb_rings = dev->min_ring_cnt ?
+               RTE_MIN(dev->min_ring_cnt, rsrc_cnt->tim) : rsrc_cnt->tim;
 
        if (!dev->nb_rings) {
                otx2_tim_dbg("No TIM Logical functions provisioned.");