1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
5 #ifndef __CNXK_TIM_WORKER_H__
6 #define __CNXK_TIM_WORKER_H__
8 #include "cnxk_tim_evdev.h"
11 cnxk_tim_bkt_fetch_lock(uint64_t w1)
13 return (w1 >> TIM_BUCKET_W1_S_LOCK) & TIM_BUCKET_W1_M_LOCK;
17 cnxk_tim_bkt_fetch_rem(uint64_t w1)
19 return (w1 >> TIM_BUCKET_W1_S_CHUNK_REMAINDER) &
20 TIM_BUCKET_W1_M_CHUNK_REMAINDER;
24 cnxk_tim_bkt_get_rem(struct cnxk_tim_bkt *bktp)
26 return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE);
30 cnxk_tim_bkt_set_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
32 __atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
36 cnxk_tim_bkt_sub_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
38 __atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
42 cnxk_tim_bkt_get_hbt(uint64_t w1)
44 return (w1 >> TIM_BUCKET_W1_S_HBT) & TIM_BUCKET_W1_M_HBT;
48 cnxk_tim_bkt_get_bsk(uint64_t w1)
50 return (w1 >> TIM_BUCKET_W1_S_BSK) & TIM_BUCKET_W1_M_BSK;
53 static inline uint64_t
54 cnxk_tim_bkt_clr_bsk(struct cnxk_tim_bkt *bktp)
56 /* Clear everything except lock. */
57 const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;
59 return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
62 static inline uint64_t
63 cnxk_tim_bkt_fetch_sema_lock(struct cnxk_tim_bkt *bktp)
65 return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
69 static inline uint64_t
70 cnxk_tim_bkt_fetch_sema(struct cnxk_tim_bkt *bktp)
72 return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED);
75 static inline uint64_t
76 cnxk_tim_bkt_inc_lock(struct cnxk_tim_bkt *bktp)
78 const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;
80 return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE);
84 cnxk_tim_bkt_dec_lock(struct cnxk_tim_bkt *bktp)
86 __atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELEASE);
90 cnxk_tim_bkt_dec_lock_relaxed(struct cnxk_tim_bkt *bktp)
92 __atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELAXED);
95 static inline uint32_t
96 cnxk_tim_bkt_get_nent(uint64_t w1)
98 return (w1 >> TIM_BUCKET_W1_S_NUM_ENTRIES) &
99 TIM_BUCKET_W1_M_NUM_ENTRIES;
103 cnxk_tim_bkt_inc_nent(struct cnxk_tim_bkt *bktp)
105 __atomic_add_fetch(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
109 cnxk_tim_bkt_add_nent(struct cnxk_tim_bkt *bktp, uint32_t v)
111 __atomic_add_fetch(&bktp->nb_entry, v, __ATOMIC_RELAXED);
114 static inline uint64_t
115 cnxk_tim_bkt_clr_nent(struct cnxk_tim_bkt *bktp)
118 ~(TIM_BUCKET_W1_M_NUM_ENTRIES << TIM_BUCKET_W1_S_NUM_ENTRIES);
120 return __atomic_and_fetch(&bktp->w1, v, __ATOMIC_ACQ_REL);
123 static inline uint64_t
124 cnxk_tim_bkt_fast_mod(uint64_t n, uint64_t d, struct rte_reciprocal_u64 R)
126 return (n - (d * rte_reciprocal_divide_u64(n, &R)));
129 static __rte_always_inline void
130 cnxk_tim_get_target_bucket(struct cnxk_tim_ring *const tim_ring,
131 const uint32_t rel_bkt, struct cnxk_tim_bkt **bkt,
132 struct cnxk_tim_bkt **mirr_bkt)
134 const uint64_t bkt_cyc = cnxk_tim_cntvct() - tim_ring->ring_start_cyc;
136 rte_reciprocal_divide_u64(bkt_cyc, &tim_ring->fast_div) +
138 uint64_t mirr_bucket = 0;
140 bucket = cnxk_tim_bkt_fast_mod(bucket, tim_ring->nb_bkts,
143 cnxk_tim_bkt_fast_mod(bucket + (tim_ring->nb_bkts >> 1),
144 tim_ring->nb_bkts, tim_ring->fast_bkt);
145 *bkt = &tim_ring->bkt[bucket];
146 *mirr_bkt = &tim_ring->bkt[mirr_bucket];
149 static struct cnxk_tim_ent *
150 cnxk_tim_clr_bkt(struct cnxk_tim_ring *const tim_ring,
151 struct cnxk_tim_bkt *const bkt)
153 #define TIM_MAX_OUTSTANDING_OBJ 64
154 void *pend_chunks[TIM_MAX_OUTSTANDING_OBJ];
155 struct cnxk_tim_ent *chunk;
156 struct cnxk_tim_ent *pnext;
159 chunk = ((struct cnxk_tim_ent *)(uintptr_t)bkt->first_chunk);
160 chunk = (struct cnxk_tim_ent *)(uintptr_t)(chunk +
161 tim_ring->nb_chunk_slots)
164 pnext = (struct cnxk_tim_ent *)(uintptr_t)(
165 (chunk + tim_ring->nb_chunk_slots)->w0);
166 if (objs == TIM_MAX_OUTSTANDING_OBJ) {
167 rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks,
171 pend_chunks[objs++] = chunk;
176 rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks, objs);
178 return (struct cnxk_tim_ent *)(uintptr_t)bkt->first_chunk;
181 static struct cnxk_tim_ent *
182 cnxk_tim_refill_chunk(struct cnxk_tim_bkt *const bkt,
183 struct cnxk_tim_bkt *const mirr_bkt,
184 struct cnxk_tim_ring *const tim_ring)
186 struct cnxk_tim_ent *chunk;
188 if (bkt->nb_entry || !bkt->first_chunk) {
189 if (unlikely(rte_mempool_get(tim_ring->chunk_pool,
193 *(uint64_t *)(((struct cnxk_tim_ent *)
194 mirr_bkt->current_chunk) +
195 tim_ring->nb_chunk_slots) =
198 bkt->first_chunk = (uintptr_t)chunk;
201 chunk = cnxk_tim_clr_bkt(tim_ring, bkt);
202 bkt->first_chunk = (uintptr_t)chunk;
204 *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
209 static struct cnxk_tim_ent *
210 cnxk_tim_insert_chunk(struct cnxk_tim_bkt *const bkt,
211 struct cnxk_tim_bkt *const mirr_bkt,
212 struct cnxk_tim_ring *const tim_ring)
214 struct cnxk_tim_ent *chunk;
216 if (unlikely(rte_mempool_get(tim_ring->chunk_pool, (void **)&chunk)))
219 *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
221 *(uint64_t *)(((struct cnxk_tim_ent *)(uintptr_t)
222 mirr_bkt->current_chunk) +
223 tim_ring->nb_chunk_slots) = (uintptr_t)chunk;
225 bkt->first_chunk = (uintptr_t)chunk;
230 static __rte_always_inline int
231 cnxk_tim_add_entry_sp(struct cnxk_tim_ring *const tim_ring,
232 const uint32_t rel_bkt, struct rte_event_timer *const tim,
233 const struct cnxk_tim_ent *const pent,
236 struct cnxk_tim_bkt *mirr_bkt;
237 struct cnxk_tim_ent *chunk;
238 struct cnxk_tim_bkt *bkt;
243 cnxk_tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt);
246 lock_sema = cnxk_tim_bkt_fetch_sema_lock(bkt);
248 /* Bucket related checks. */
249 if (unlikely(cnxk_tim_bkt_get_hbt(lock_sema))) {
250 if (cnxk_tim_bkt_get_nent(lock_sema) != 0) {
252 #ifdef RTE_ARCH_ARM64
253 asm volatile(PLT_CPU_FEATURE_PREAMBLE
254 " ldxr %[hbt], [%[w1]] \n"
255 " tbz %[hbt], 33, dne%= \n"
258 " ldxr %[hbt], [%[w1]] \n"
259 " tbnz %[hbt], 33, rty%= \n"
261 : [hbt] "=&r"(hbt_state)
262 : [w1] "r"((&bkt->w1))
266 hbt_state = __atomic_load_n(&bkt->w1,
268 } while (hbt_state & BIT_ULL(33));
271 if (!(hbt_state & BIT_ULL(34))) {
272 cnxk_tim_bkt_dec_lock(bkt);
277 /* Insert the work. */
278 rem = cnxk_tim_bkt_fetch_rem(lock_sema);
281 if (flags & CNXK_TIM_ENA_FB)
282 chunk = cnxk_tim_refill_chunk(bkt, mirr_bkt, tim_ring);
283 if (flags & CNXK_TIM_ENA_DFB)
284 chunk = cnxk_tim_insert_chunk(bkt, mirr_bkt, tim_ring);
286 if (unlikely(chunk == NULL)) {
287 bkt->chunk_remainder = 0;
288 tim->impl_opaque[0] = 0;
289 tim->impl_opaque[1] = 0;
290 tim->state = RTE_EVENT_TIMER_ERROR;
291 cnxk_tim_bkt_dec_lock(bkt);
294 mirr_bkt->current_chunk = (uintptr_t)chunk;
295 bkt->chunk_remainder = tim_ring->nb_chunk_slots - 1;
297 chunk = (struct cnxk_tim_ent *)mirr_bkt->current_chunk;
298 chunk += tim_ring->nb_chunk_slots - rem;
301 /* Copy work entry. */
304 tim->impl_opaque[0] = (uintptr_t)chunk;
305 tim->impl_opaque[1] = (uintptr_t)bkt;
306 __atomic_store_n(&tim->state, RTE_EVENT_TIMER_ARMED, __ATOMIC_RELEASE);
307 cnxk_tim_bkt_inc_nent(bkt);
308 cnxk_tim_bkt_dec_lock_relaxed(bkt);
313 static __rte_always_inline int
314 cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
315 const uint32_t rel_bkt, struct rte_event_timer *const tim,
316 const struct cnxk_tim_ent *const pent,
319 struct cnxk_tim_bkt *mirr_bkt;
320 struct cnxk_tim_ent *chunk;
321 struct cnxk_tim_bkt *bkt;
326 cnxk_tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt);
328 lock_sema = cnxk_tim_bkt_fetch_sema_lock(bkt);
330 /* Bucket related checks. */
331 if (unlikely(cnxk_tim_bkt_get_hbt(lock_sema))) {
332 if (cnxk_tim_bkt_get_nent(lock_sema) != 0) {
334 #ifdef RTE_ARCH_ARM64
335 asm volatile(PLT_CPU_FEATURE_PREAMBLE
336 " ldxr %[hbt], [%[w1]] \n"
337 " tbz %[hbt], 33, dne%= \n"
340 " ldxr %[hbt], [%[w1]] \n"
341 " tbnz %[hbt], 33, rty%= \n"
343 : [hbt] "=&r"(hbt_state)
344 : [w1] "r"((&bkt->w1))
348 hbt_state = __atomic_load_n(&bkt->w1,
350 } while (hbt_state & BIT_ULL(33));
353 if (!(hbt_state & BIT_ULL(34))) {
354 cnxk_tim_bkt_dec_lock(bkt);
360 rem = cnxk_tim_bkt_fetch_rem(lock_sema);
362 cnxk_tim_bkt_dec_lock(bkt);
363 #ifdef RTE_ARCH_ARM64
364 asm volatile(PLT_CPU_FEATURE_PREAMBLE
365 " ldxr %[rem], [%[crem]] \n"
366 " tbz %[rem], 63, dne%= \n"
369 " ldxr %[rem], [%[crem]] \n"
370 " tbnz %[rem], 63, rty%= \n"
373 : [crem] "r"(&bkt->w1)
376 while (__atomic_load_n((int64_t *)&bkt->w1, __ATOMIC_RELAXED) <
382 /* Only one thread can be here*/
383 if (flags & CNXK_TIM_ENA_FB)
384 chunk = cnxk_tim_refill_chunk(bkt, mirr_bkt, tim_ring);
385 if (flags & CNXK_TIM_ENA_DFB)
386 chunk = cnxk_tim_insert_chunk(bkt, mirr_bkt, tim_ring);
388 if (unlikely(chunk == NULL)) {
389 tim->impl_opaque[0] = 0;
390 tim->impl_opaque[1] = 0;
391 tim->state = RTE_EVENT_TIMER_ERROR;
392 cnxk_tim_bkt_set_rem(bkt, 0);
393 cnxk_tim_bkt_dec_lock(bkt);
397 if (cnxk_tim_bkt_fetch_lock(lock_sema)) {
399 lock_sema = __atomic_load_n(&bkt->w1,
401 } while (cnxk_tim_bkt_fetch_lock(lock_sema) - 1);
403 rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
404 mirr_bkt->current_chunk = (uintptr_t)chunk;
405 __atomic_store_n(&bkt->chunk_remainder,
406 tim_ring->nb_chunk_slots - 1,
409 chunk = (struct cnxk_tim_ent *)mirr_bkt->current_chunk;
410 chunk += tim_ring->nb_chunk_slots - rem;
414 tim->impl_opaque[0] = (uintptr_t)chunk;
415 tim->impl_opaque[1] = (uintptr_t)bkt;
416 __atomic_store_n(&tim->state, RTE_EVENT_TIMER_ARMED, __ATOMIC_RELEASE);
417 cnxk_tim_bkt_inc_nent(bkt);
418 cnxk_tim_bkt_dec_lock_relaxed(bkt);
423 static inline uint16_t
424 cnxk_tim_cpy_wrk(uint16_t index, uint16_t cpy_lmt, struct cnxk_tim_ent *chunk,
425 struct rte_event_timer **const tim,
426 const struct cnxk_tim_ent *const ents,
427 const struct cnxk_tim_bkt *const bkt)
429 for (; index < cpy_lmt; index++) {
430 *chunk = *(ents + index);
431 tim[index]->impl_opaque[0] = (uintptr_t)chunk++;
432 tim[index]->impl_opaque[1] = (uintptr_t)bkt;
433 tim[index]->state = RTE_EVENT_TIMER_ARMED;
439 /* Burst mode functions */
441 cnxk_tim_add_entry_brst(struct cnxk_tim_ring *const tim_ring,
442 const uint16_t rel_bkt,
443 struct rte_event_timer **const tim,
444 const struct cnxk_tim_ent *ents,
445 const uint16_t nb_timers, const uint8_t flags)
447 struct cnxk_tim_ent *chunk = NULL;
448 struct cnxk_tim_bkt *mirr_bkt;
449 struct cnxk_tim_bkt *bkt;
450 uint16_t chunk_remainder;
457 cnxk_tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt);
459 /* Only one thread beyond this. */
460 lock_sema = cnxk_tim_bkt_inc_lock(bkt);
461 lock_cnt = (uint8_t)((lock_sema >> TIM_BUCKET_W1_S_LOCK) &
462 TIM_BUCKET_W1_M_LOCK);
465 cnxk_tim_bkt_dec_lock(bkt);
466 #ifdef RTE_ARCH_ARM64
467 asm volatile(PLT_CPU_FEATURE_PREAMBLE
468 " ldxrb %w[lock_cnt], [%[lock]] \n"
469 " tst %w[lock_cnt], 255 \n"
473 " ldxrb %w[lock_cnt], [%[lock]] \n"
474 " tst %w[lock_cnt], 255 \n"
477 : [lock_cnt] "=&r"(lock_cnt)
478 : [lock] "r"(&bkt->lock)
481 while (__atomic_load_n(&bkt->lock, __ATOMIC_RELAXED))
487 /* Bucket related checks. */
488 if (unlikely(cnxk_tim_bkt_get_hbt(lock_sema))) {
489 if (cnxk_tim_bkt_get_nent(lock_sema) != 0) {
491 #ifdef RTE_ARCH_ARM64
492 asm volatile(PLT_CPU_FEATURE_PREAMBLE
493 " ldxr %[hbt], [%[w1]] \n"
494 " tbz %[hbt], 33, dne%= \n"
497 " ldxr %[hbt], [%[w1]] \n"
498 " tbnz %[hbt], 33, rty%= \n"
500 : [hbt] "=&r"(hbt_state)
501 : [w1] "r"((&bkt->w1))
505 hbt_state = __atomic_load_n(&bkt->w1,
507 } while (hbt_state & BIT_ULL(33));
510 if (!(hbt_state & BIT_ULL(34))) {
511 cnxk_tim_bkt_dec_lock(bkt);
517 chunk_remainder = cnxk_tim_bkt_fetch_rem(lock_sema);
518 rem = chunk_remainder - nb_timers;
520 crem = tim_ring->nb_chunk_slots - chunk_remainder;
521 if (chunk_remainder && crem) {
522 chunk = ((struct cnxk_tim_ent *)
523 mirr_bkt->current_chunk) +
526 index = cnxk_tim_cpy_wrk(index, chunk_remainder, chunk,
528 cnxk_tim_bkt_sub_rem(bkt, chunk_remainder);
529 cnxk_tim_bkt_add_nent(bkt, chunk_remainder);
532 if (flags & CNXK_TIM_ENA_FB)
533 chunk = cnxk_tim_refill_chunk(bkt, mirr_bkt, tim_ring);
534 if (flags & CNXK_TIM_ENA_DFB)
535 chunk = cnxk_tim_insert_chunk(bkt, mirr_bkt, tim_ring);
537 if (unlikely(chunk == NULL)) {
538 cnxk_tim_bkt_dec_lock(bkt);
540 tim[index]->state = RTE_EVENT_TIMER_ERROR;
543 *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
544 mirr_bkt->current_chunk = (uintptr_t)chunk;
545 cnxk_tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt);
547 rem = nb_timers - chunk_remainder;
548 cnxk_tim_bkt_set_rem(bkt, tim_ring->nb_chunk_slots - rem);
549 cnxk_tim_bkt_add_nent(bkt, rem);
551 chunk = (struct cnxk_tim_ent *)mirr_bkt->current_chunk;
552 chunk += (tim_ring->nb_chunk_slots - chunk_remainder);
554 cnxk_tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt);
555 cnxk_tim_bkt_sub_rem(bkt, nb_timers);
556 cnxk_tim_bkt_add_nent(bkt, nb_timers);
559 cnxk_tim_bkt_dec_lock(bkt);
565 cnxk_tim_rm_entry(struct rte_event_timer *tim)
567 struct cnxk_tim_ent *entry;
568 struct cnxk_tim_bkt *bkt;
571 if (tim->impl_opaque[1] == 0 || tim->impl_opaque[0] == 0)
574 entry = (struct cnxk_tim_ent *)(uintptr_t)tim->impl_opaque[0];
575 if (entry->wqe != tim->ev.u64) {
576 tim->impl_opaque[0] = 0;
577 tim->impl_opaque[1] = 0;
581 bkt = (struct cnxk_tim_bkt *)(uintptr_t)tim->impl_opaque[1];
582 lock_sema = cnxk_tim_bkt_inc_lock(bkt);
583 if (cnxk_tim_bkt_get_hbt(lock_sema) ||
584 !cnxk_tim_bkt_get_nent(lock_sema)) {
585 tim->impl_opaque[0] = 0;
586 tim->impl_opaque[1] = 0;
587 cnxk_tim_bkt_dec_lock(bkt);
593 tim->state = RTE_EVENT_TIMER_CANCELED;
594 tim->impl_opaque[0] = 0;
595 tim->impl_opaque[1] = 0;
596 cnxk_tim_bkt_dec_lock(bkt);
601 #endif /* __CNXK_TIM_WORKER_H__ */