From 25b401c8b6405b7d788bc5602cbeb3ee463ff322 Mon Sep 17 00:00:00 2001 From: Pavan Nikhilesh Date: Tue, 23 Mar 2021 14:14:35 +0530 Subject: [PATCH] event/octeontx2: simplify timer bucket estimation Simplify timer bucket estimation we need not align buckets to power of 2 instead use reciprocal division to compute mod. Signed-off-by: Pavan Nikhilesh --- drivers/event/octeontx2/otx2_tim_evdev.c | 78 ++++----------------- drivers/event/octeontx2/otx2_tim_evdev.h | 84 ++++++++--------------- drivers/event/octeontx2/otx2_tim_worker.c | 4 +- drivers/event/octeontx2/otx2_tim_worker.h | 40 +++++------ 4 files changed, 64 insertions(+), 142 deletions(-) diff --git a/drivers/event/octeontx2/otx2_tim_evdev.c b/drivers/event/octeontx2/otx2_tim_evdev.c index 39a29f17f4..0bbba48b41 100644 --- a/drivers/event/octeontx2/otx2_tim_evdev.c +++ b/drivers/event/octeontx2/otx2_tim_evdev.c @@ -34,27 +34,25 @@ tim_set_fp_ops(struct otx2_tim_ring *tim_ring) { uint8_t prod_flag = !tim_ring->prod_type_sp; - /* [MOD/AND] [DFB/FB] [SP][MP]*/ - const rte_event_timer_arm_burst_t arm_burst[2][2][2][2] = { -#define FP(_name, _f4, _f3, _f2, _f1, flags) \ - [_f4][_f3][_f2][_f1] = otx2_tim_arm_burst_ ## _name, -TIM_ARM_FASTPATH_MODES + /* [DFB/FB] [SP][MP]*/ + const rte_event_timer_arm_burst_t arm_burst[2][2][2] = { +#define FP(_name, _f3, _f2, _f1, flags) \ + [_f3][_f2][_f1] = otx2_tim_arm_burst_##_name, + TIM_ARM_FASTPATH_MODES #undef FP }; - const rte_event_timer_arm_tmo_tick_burst_t arm_tmo_burst[2][2][2] = { -#define FP(_name, _f3, _f2, _f1, flags) \ - [_f3][_f2][_f1] = otx2_tim_arm_tmo_tick_burst_ ## _name, -TIM_ARM_TMO_FASTPATH_MODES + const rte_event_timer_arm_tmo_tick_burst_t arm_tmo_burst[2][2] = { +#define FP(_name, _f2, _f1, flags) \ + [_f2][_f1] = otx2_tim_arm_tmo_tick_burst_##_name, + TIM_ARM_TMO_FASTPATH_MODES #undef FP }; otx2_tim_ops.arm_burst = - arm_burst[tim_ring->enable_stats][tim_ring->optimized] - [tim_ring->ena_dfb][prod_flag]; + arm_burst[tim_ring->enable_stats][tim_ring->ena_dfb][prod_flag]; otx2_tim_ops.arm_tmo_tick_burst = - arm_tmo_burst[tim_ring->enable_stats][tim_ring->optimized] - [tim_ring->ena_dfb]; + arm_tmo_burst[tim_ring->enable_stats][tim_ring->ena_dfb]; otx2_tim_ops.cancel_burst = otx2_tim_timer_cancel_burst; } @@ -71,51 +69,6 @@ otx2_tim_ring_info_get(const struct rte_event_timer_adapter *adptr, sizeof(struct rte_event_timer_adapter_conf)); } -static void -tim_optimze_bkt_param(struct otx2_tim_ring *tim_ring) -{ - uint64_t tck_nsec; - uint32_t hbkts; - uint32_t lbkts; - - hbkts = rte_align32pow2(tim_ring->nb_bkts); - tck_nsec = RTE_ALIGN_MUL_CEIL(tim_ring->max_tout / (hbkts - 1), 10); - - if ((tck_nsec < TICK2NSEC(OTX2_TIM_MIN_TMO_TKS, - tim_ring->tenns_clk_freq) || - hbkts > OTX2_TIM_MAX_BUCKETS)) - hbkts = 0; - - lbkts = rte_align32prevpow2(tim_ring->nb_bkts); - tck_nsec = RTE_ALIGN_MUL_CEIL((tim_ring->max_tout / (lbkts - 1)), 10); - - if ((tck_nsec < TICK2NSEC(OTX2_TIM_MIN_TMO_TKS, - tim_ring->tenns_clk_freq) || - lbkts > OTX2_TIM_MAX_BUCKETS)) - lbkts = 0; - - if (!hbkts && !lbkts) - return; - - if (!hbkts) { - tim_ring->nb_bkts = lbkts; - goto end; - } else if (!lbkts) { - tim_ring->nb_bkts = hbkts; - goto end; - } - - tim_ring->nb_bkts = (hbkts - tim_ring->nb_bkts) < - (tim_ring->nb_bkts - lbkts) ? hbkts : lbkts; -end: - tim_ring->optimized = true; - tim_ring->tck_nsec = RTE_ALIGN_MUL_CEIL((tim_ring->max_tout / - (tim_ring->nb_bkts - 1)), 10); - otx2_tim_dbg("Optimized configured values"); - otx2_tim_dbg("Nb_bkts : %" PRIu32 "", tim_ring->nb_bkts); - otx2_tim_dbg("Tck_nsec : %" PRIu64 "", tim_ring->tck_nsec); -} - static int tim_chnk_pool_create(struct otx2_tim_ring *tim_ring, struct rte_event_timer_adapter_conf *rcfg) @@ -337,14 +290,6 @@ otx2_tim_ring_create(struct rte_event_timer_adapter *adptr) tim_ring->chunk_sz); tim_ring->nb_chunk_slots = OTX2_TIM_NB_CHUNK_SLOTS(tim_ring->chunk_sz); - /* Try to optimize the bucket parameters. */ - if ((rcfg->flags & RTE_EVENT_TIMER_ADAPTER_F_ADJUST_RES)) { - if (rte_is_power_of_2(tim_ring->nb_bkts)) - tim_ring->optimized = true; - else - tim_optimze_bkt_param(tim_ring); - } - if (tim_ring->disable_npa) tim_ring->nb_chunks = tim_ring->nb_chunks * tim_ring->nb_bkts; else @@ -477,6 +422,7 @@ otx2_tim_ring_start(const struct rte_event_timer_adapter *adptr) tim_ring->tck_int = NSEC2TICK(tim_ring->tck_nsec, rte_get_timer_hz()); tim_ring->tot_int = tim_ring->tck_int * tim_ring->nb_bkts; tim_ring->fast_div = rte_reciprocal_value_u64(tim_ring->tck_int); + tim_ring->fast_bkt = rte_reciprocal_value_u64(tim_ring->nb_bkts); otx2_tim_calibrate_start_tsc(tim_ring); diff --git a/drivers/event/octeontx2/otx2_tim_evdev.h b/drivers/event/octeontx2/otx2_tim_evdev.h index 82d116c09d..e9cefea9fc 100644 --- a/drivers/event/octeontx2/otx2_tim_evdev.h +++ b/drivers/event/octeontx2/otx2_tim_evdev.h @@ -76,8 +76,6 @@ #define OTX2_TIM_SP 0x1 #define OTX2_TIM_MP 0x2 -#define OTX2_TIM_BKT_AND 0x4 -#define OTX2_TIM_BKT_MOD 0x8 #define OTX2_TIM_ENA_FB 0x10 #define OTX2_TIM_ENA_DFB 0x20 #define OTX2_TIM_ENA_STATS 0x40 @@ -149,11 +147,11 @@ struct otx2_tim_ring { struct otx2_tim_bkt *bkt; struct rte_mempool *chunk_pool; struct rte_reciprocal_u64 fast_div; + struct rte_reciprocal_u64 fast_bkt; uint64_t arm_cnt; uint8_t prod_type_sp; uint8_t enable_stats; uint8_t disable_npa; - uint8_t optimized; uint8_t ena_dfb; uint8_t ena_periodic; uint16_t ring_id; @@ -179,60 +177,38 @@ tim_priv_get(void) return mz->addr; } -#define TIM_ARM_FASTPATH_MODES \ -FP(mod_sp, 0, 0, 0, 0, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_DFB | OTX2_TIM_SP) \ -FP(mod_mp, 0, 0, 0, 1, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_DFB | OTX2_TIM_MP) \ -FP(mod_fb_sp, 0, 0, 1, 0, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_FB | OTX2_TIM_SP) \ -FP(mod_fb_mp, 0, 0, 1, 1, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_FB | OTX2_TIM_MP) \ -FP(and_sp, 0, 1, 0, 0, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_DFB | OTX2_TIM_SP) \ -FP(and_mp, 0, 1, 0, 1, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_DFB | OTX2_TIM_MP) \ -FP(and_fb_sp, 0, 1, 1, 0, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_FB | OTX2_TIM_SP) \ -FP(and_fb_mp, 0, 1, 1, 1, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_FB | OTX2_TIM_MP) \ -FP(stats_mod_sp, 1, 0, 0, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \ - OTX2_TIM_ENA_DFB | OTX2_TIM_SP) \ -FP(stats_mod_mp, 1, 0, 0, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \ - OTX2_TIM_ENA_DFB | OTX2_TIM_MP) \ -FP(stats_mod_fb_sp, 1, 0, 1, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \ - OTX2_TIM_ENA_FB | OTX2_TIM_SP) \ -FP(stats_mod_fb_mp, 1, 0, 1, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \ - OTX2_TIM_ENA_FB | OTX2_TIM_MP) \ -FP(stats_and_sp, 1, 1, 0, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \ - OTX2_TIM_ENA_DFB | OTX2_TIM_SP) \ -FP(stats_and_mp, 1, 1, 0, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \ - OTX2_TIM_ENA_DFB | OTX2_TIM_MP) \ -FP(stats_and_fb_sp, 1, 1, 1, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \ - OTX2_TIM_ENA_FB | OTX2_TIM_SP) \ -FP(stats_and_fb_mp, 1, 1, 1, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \ - OTX2_TIM_ENA_FB | OTX2_TIM_MP) - -#define TIM_ARM_TMO_FASTPATH_MODES \ -FP(mod, 0, 0, 0, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_DFB) \ -FP(mod_fb, 0, 0, 1, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_FB) \ -FP(and, 0, 1, 0, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_DFB) \ -FP(and_fb, 0, 1, 1, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_FB) \ -FP(stats_mod, 1, 0, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \ - OTX2_TIM_ENA_DFB) \ -FP(stats_mod_fb, 1, 0, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \ - OTX2_TIM_ENA_FB) \ -FP(stats_and, 1, 1, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \ - OTX2_TIM_ENA_DFB) \ -FP(stats_and_fb, 1, 1, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \ - OTX2_TIM_ENA_FB) - -#define FP(_name, _f4, _f3, _f2, _f1, flags) \ -uint16_t \ -otx2_tim_arm_burst_ ## _name(const struct rte_event_timer_adapter *adptr, \ - struct rte_event_timer **tim, \ - const uint16_t nb_timers); +#define TIM_ARM_FASTPATH_MODES \ + FP(sp, 0, 0, 0, OTX2_TIM_ENA_DFB | OTX2_TIM_SP) \ + FP(mp, 0, 0, 1, OTX2_TIM_ENA_DFB | OTX2_TIM_MP) \ + FP(fb_sp, 0, 1, 0, OTX2_TIM_ENA_FB | OTX2_TIM_SP) \ + FP(fb_mp, 0, 1, 1, OTX2_TIM_ENA_FB | OTX2_TIM_MP) \ + FP(stats_mod_sp, 1, 0, 0, \ + OTX2_TIM_ENA_STATS | OTX2_TIM_ENA_DFB | OTX2_TIM_SP) \ + FP(stats_mod_mp, 1, 0, 1, \ + OTX2_TIM_ENA_STATS | OTX2_TIM_ENA_DFB | OTX2_TIM_MP) \ + FP(stats_mod_fb_sp, 1, 1, 0, \ + OTX2_TIM_ENA_STATS | OTX2_TIM_ENA_FB | OTX2_TIM_SP) \ + FP(stats_mod_fb_mp, 1, 1, 1, \ + OTX2_TIM_ENA_STATS | OTX2_TIM_ENA_FB | OTX2_TIM_MP) + +#define TIM_ARM_TMO_FASTPATH_MODES \ + FP(dfb, 0, 0, OTX2_TIM_ENA_DFB) \ + FP(fb, 0, 1, OTX2_TIM_ENA_FB) \ + FP(stats_dfb, 1, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_ENA_DFB) \ + FP(stats_fb, 1, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_ENA_FB) + +#define FP(_name, _f3, _f2, _f1, flags) \ + uint16_t otx2_tim_arm_burst_##_name( \ + const struct rte_event_timer_adapter *adptr, \ + struct rte_event_timer **tim, const uint16_t nb_timers); TIM_ARM_FASTPATH_MODES #undef FP -#define FP(_name, _f3, _f2, _f1, flags) \ -uint16_t \ -otx2_tim_arm_tmo_tick_burst_ ## _name( \ - const struct rte_event_timer_adapter *adptr, \ - struct rte_event_timer **tim, \ - const uint64_t timeout_tick, const uint16_t nb_timers); +#define FP(_name, _f2, _f1, flags) \ + uint16_t otx2_tim_arm_tmo_tick_burst_##_name( \ + const struct rte_event_timer_adapter *adptr, \ + struct rte_event_timer **tim, const uint64_t timeout_tick, \ + const uint16_t nb_timers); TIM_ARM_TMO_FASTPATH_MODES #undef FP diff --git a/drivers/event/octeontx2/otx2_tim_worker.c b/drivers/event/octeontx2/otx2_tim_worker.c index 4b5cfdc725..eb901844d0 100644 --- a/drivers/event/octeontx2/otx2_tim_worker.c +++ b/drivers/event/octeontx2/otx2_tim_worker.c @@ -136,7 +136,7 @@ tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr, return set_timers; } -#define FP(_name, _f4, _f3, _f2, _f1, _flags) \ +#define FP(_name, _f3, _f2, _f1, _flags) \ uint16_t __rte_noinline \ otx2_tim_arm_burst_ ## _name(const struct rte_event_timer_adapter *adptr, \ struct rte_event_timer **tim, \ @@ -147,7 +147,7 @@ otx2_tim_arm_burst_ ## _name(const struct rte_event_timer_adapter *adptr, \ TIM_ARM_FASTPATH_MODES #undef FP -#define FP(_name, _f3, _f2, _f1, _flags) \ +#define FP(_name, _f2, _f1, _flags) \ uint16_t __rte_noinline \ otx2_tim_arm_tmo_tick_burst_ ## _name( \ const struct rte_event_timer_adapter *adptr, \ diff --git a/drivers/event/octeontx2/otx2_tim_worker.h b/drivers/event/octeontx2/otx2_tim_worker.h index af2f864d72..f03912b811 100644 --- a/drivers/event/octeontx2/otx2_tim_worker.h +++ b/drivers/event/octeontx2/otx2_tim_worker.h @@ -115,27 +115,27 @@ tim_bkt_clr_nent(struct otx2_tim_bkt *bktp) return __atomic_and_fetch(&bktp->w1, v, __ATOMIC_ACQ_REL); } +static inline uint64_t +tim_bkt_fast_mod(uint64_t n, uint64_t d, struct rte_reciprocal_u64 R) +{ + return (n - (d * rte_reciprocal_divide_u64(n, &R))); +} + static __rte_always_inline void -tim_get_target_bucket(struct otx2_tim_ring * const tim_ring, +tim_get_target_bucket(struct otx2_tim_ring *const tim_ring, const uint32_t rel_bkt, struct otx2_tim_bkt **bkt, - struct otx2_tim_bkt **mirr_bkt, const uint8_t flag) + struct otx2_tim_bkt **mirr_bkt) { const uint64_t bkt_cyc = rte_rdtsc() - tim_ring->ring_start_cyc; - uint32_t bucket = rte_reciprocal_divide_u64(bkt_cyc, - &tim_ring->fast_div) + rel_bkt; - uint32_t mirr_bucket = 0; - - if (flag & OTX2_TIM_BKT_MOD) { - bucket = bucket % tim_ring->nb_bkts; - mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) % - tim_ring->nb_bkts; - } - if (flag & OTX2_TIM_BKT_AND) { - bucket = bucket & (tim_ring->nb_bkts - 1); - mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) & - (tim_ring->nb_bkts - 1); - } - + uint64_t bucket = + rte_reciprocal_divide_u64(bkt_cyc, &tim_ring->fast_div) + + rel_bkt; + uint64_t mirr_bucket = 0; + + bucket = + tim_bkt_fast_mod(bucket, tim_ring->nb_bkts, tim_ring->fast_bkt); + mirr_bucket = tim_bkt_fast_mod(bucket + (tim_ring->nb_bkts >> 1), + tim_ring->nb_bkts, tim_ring->fast_bkt); *bkt = &tim_ring->bkt[bucket]; *mirr_bkt = &tim_ring->bkt[mirr_bucket]; } @@ -236,7 +236,7 @@ tim_add_entry_sp(struct otx2_tim_ring * const tim_ring, int16_t rem; __retry: - tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags); + tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt); /* Get Bucket sema*/ lock_sema = tim_bkt_fetch_sema_lock(bkt); @@ -322,7 +322,7 @@ tim_add_entry_mp(struct otx2_tim_ring * const tim_ring, int16_t rem; __retry: - tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags); + tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt); /* Get Bucket sema*/ lock_sema = tim_bkt_fetch_sema_lock(bkt); @@ -454,7 +454,7 @@ tim_add_entry_brst(struct otx2_tim_ring * const tim_ring, uint8_t lock_cnt; __retry: - tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags); + tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt); /* Only one thread beyond this. */ lock_sema = tim_bkt_inc_lock(bkt); -- 2.20.1