1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2019 Marvell International Ltd.
5 #ifndef __OTX2_TIM_WORKER_H__
6 #define __OTX2_TIM_WORKER_H__
8 #include "otx2_tim_evdev.h"
11 tim_bkt_fetch_lock(uint64_t w1)
13 return (w1 >> TIM_BUCKET_W1_S_LOCK) &
18 tim_bkt_fetch_rem(uint64_t w1)
20 return (w1 >> TIM_BUCKET_W1_S_CHUNK_REMAINDER) &
21 TIM_BUCKET_W1_M_CHUNK_REMAINDER;
25 tim_bkt_get_rem(struct otx2_tim_bkt *bktp)
27 return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE);
31 tim_bkt_set_rem(struct otx2_tim_bkt *bktp, uint16_t v)
33 __atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
37 tim_bkt_sub_rem(struct otx2_tim_bkt *bktp, uint16_t v)
39 __atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
43 tim_bkt_get_hbt(uint64_t w1)
45 return (w1 >> TIM_BUCKET_W1_S_HBT) & TIM_BUCKET_W1_M_HBT;
49 tim_bkt_get_bsk(uint64_t w1)
51 return (w1 >> TIM_BUCKET_W1_S_BSK) & TIM_BUCKET_W1_M_BSK;
54 static inline uint64_t
55 tim_bkt_clr_bsk(struct otx2_tim_bkt *bktp)
57 /* Clear everything except lock. */
58 const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;
60 return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
63 static inline uint64_t
64 tim_bkt_fetch_sema_lock(struct otx2_tim_bkt *bktp)
66 return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
70 static inline uint64_t
71 tim_bkt_fetch_sema(struct otx2_tim_bkt *bktp)
73 return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED);
76 static inline uint64_t
77 tim_bkt_inc_lock(struct otx2_tim_bkt *bktp)
79 const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;
81 return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE);
85 tim_bkt_dec_lock(struct otx2_tim_bkt *bktp)
87 __atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELEASE);
91 tim_bkt_dec_lock_relaxed(struct otx2_tim_bkt *bktp)
93 __atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELAXED);
96 static inline uint32_t
97 tim_bkt_get_nent(uint64_t w1)
99 return (w1 >> TIM_BUCKET_W1_S_NUM_ENTRIES) &
100 TIM_BUCKET_W1_M_NUM_ENTRIES;
104 tim_bkt_inc_nent(struct otx2_tim_bkt *bktp)
106 __atomic_add_fetch(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
110 tim_bkt_add_nent(struct otx2_tim_bkt *bktp, uint32_t v)
112 __atomic_add_fetch(&bktp->nb_entry, v, __ATOMIC_RELAXED);
115 static inline uint64_t
116 tim_bkt_clr_nent(struct otx2_tim_bkt *bktp)
118 const uint64_t v = ~(TIM_BUCKET_W1_M_NUM_ENTRIES <<
119 TIM_BUCKET_W1_S_NUM_ENTRIES);
121 return __atomic_and_fetch(&bktp->w1, v, __ATOMIC_ACQ_REL);
124 static inline uint64_t
125 tim_bkt_fast_mod(uint64_t n, uint64_t d, struct rte_reciprocal_u64 R)
127 return (n - (d * rte_reciprocal_divide_u64(n, &R)));
130 static __rte_always_inline void
131 tim_get_target_bucket(struct otx2_tim_ring *const tim_ring,
132 const uint32_t rel_bkt, struct otx2_tim_bkt **bkt,
133 struct otx2_tim_bkt **mirr_bkt)
135 const uint64_t bkt_cyc = tim_cntvct() - tim_ring->ring_start_cyc;
137 rte_reciprocal_divide_u64(bkt_cyc, &tim_ring->fast_div) +
139 uint64_t mirr_bucket = 0;
142 tim_bkt_fast_mod(bucket, tim_ring->nb_bkts, tim_ring->fast_bkt);
143 mirr_bucket = tim_bkt_fast_mod(bucket + (tim_ring->nb_bkts >> 1),
144 tim_ring->nb_bkts, tim_ring->fast_bkt);
145 *bkt = &tim_ring->bkt[bucket];
146 *mirr_bkt = &tim_ring->bkt[mirr_bucket];
149 static struct otx2_tim_ent *
150 tim_clr_bkt(struct otx2_tim_ring * const tim_ring,
151 struct otx2_tim_bkt * const bkt)
153 #define TIM_MAX_OUTSTANDING_OBJ 64
154 void *pend_chunks[TIM_MAX_OUTSTANDING_OBJ];
155 struct otx2_tim_ent *chunk;
156 struct otx2_tim_ent *pnext;
160 chunk = ((struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk);
161 chunk = (struct otx2_tim_ent *)(uintptr_t)(chunk +
162 tim_ring->nb_chunk_slots)->w0;
164 pnext = (struct otx2_tim_ent *)(uintptr_t)
165 ((chunk + tim_ring->nb_chunk_slots)->w0);
166 if (objs == TIM_MAX_OUTSTANDING_OBJ) {
167 rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks,
171 pend_chunks[objs++] = chunk;
176 rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks,
179 return (struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk;
182 static struct otx2_tim_ent *
183 tim_refill_chunk(struct otx2_tim_bkt * const bkt,
184 struct otx2_tim_bkt * const mirr_bkt,
185 struct otx2_tim_ring * const tim_ring)
187 struct otx2_tim_ent *chunk;
189 if (bkt->nb_entry || !bkt->first_chunk) {
190 if (unlikely(rte_mempool_get(tim_ring->chunk_pool,
194 *(uint64_t *)(((struct otx2_tim_ent *)
195 mirr_bkt->current_chunk) +
196 tim_ring->nb_chunk_slots) =
199 bkt->first_chunk = (uintptr_t)chunk;
202 chunk = tim_clr_bkt(tim_ring, bkt);
203 bkt->first_chunk = (uintptr_t)chunk;
205 *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
210 static struct otx2_tim_ent *
211 tim_insert_chunk(struct otx2_tim_bkt * const bkt,
212 struct otx2_tim_bkt * const mirr_bkt,
213 struct otx2_tim_ring * const tim_ring)
215 struct otx2_tim_ent *chunk;
217 if (unlikely(rte_mempool_get(tim_ring->chunk_pool, (void **)&chunk)))
220 *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
222 *(uint64_t *)(((struct otx2_tim_ent *)(uintptr_t)
223 mirr_bkt->current_chunk) +
224 tim_ring->nb_chunk_slots) = (uintptr_t)chunk;
226 bkt->first_chunk = (uintptr_t)chunk;
231 static __rte_always_inline int
232 tim_add_entry_sp(struct otx2_tim_ring * const tim_ring,
233 const uint32_t rel_bkt,
234 struct rte_event_timer * const tim,
235 const struct otx2_tim_ent * const pent,
238 struct otx2_tim_bkt *mirr_bkt;
239 struct otx2_tim_ent *chunk;
240 struct otx2_tim_bkt *bkt;
245 tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt);
248 lock_sema = tim_bkt_fetch_sema_lock(bkt);
250 /* Bucket related checks. */
251 if (unlikely(tim_bkt_get_hbt(lock_sema))) {
252 if (tim_bkt_get_nent(lock_sema) != 0) {
254 #ifdef RTE_ARCH_ARM64
255 asm volatile(" ldxr %[hbt], [%[w1]] \n"
256 " tbz %[hbt], 33, dne%= \n"
259 " ldxr %[hbt], [%[w1]] \n"
260 " tbnz %[hbt], 33, rty%= \n"
262 : [hbt] "=&r"(hbt_state)
263 : [w1] "r"((&bkt->w1))
267 hbt_state = __atomic_load_n(&bkt->w1,
269 } while (hbt_state & BIT_ULL(33));
272 if (!(hbt_state & BIT_ULL(34))) {
273 tim_bkt_dec_lock(bkt);
278 /* Insert the work. */
279 rem = tim_bkt_fetch_rem(lock_sema);
282 if (flags & OTX2_TIM_ENA_FB)
283 chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
284 if (flags & OTX2_TIM_ENA_DFB)
285 chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);
287 if (unlikely(chunk == NULL)) {
288 bkt->chunk_remainder = 0;
289 tim->impl_opaque[0] = 0;
290 tim->impl_opaque[1] = 0;
291 tim->state = RTE_EVENT_TIMER_ERROR;
292 tim_bkt_dec_lock(bkt);
295 mirr_bkt->current_chunk = (uintptr_t)chunk;
296 bkt->chunk_remainder = tim_ring->nb_chunk_slots - 1;
298 chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
299 chunk += tim_ring->nb_chunk_slots - rem;
302 /* Copy work entry. */
305 tim->impl_opaque[0] = (uintptr_t)chunk;
306 tim->impl_opaque[1] = (uintptr_t)bkt;
307 __atomic_store_n(&tim->state, RTE_EVENT_TIMER_ARMED, __ATOMIC_RELEASE);
308 tim_bkt_inc_nent(bkt);
309 tim_bkt_dec_lock_relaxed(bkt);
314 static __rte_always_inline int
315 tim_add_entry_mp(struct otx2_tim_ring * const tim_ring,
316 const uint32_t rel_bkt,
317 struct rte_event_timer * const tim,
318 const struct otx2_tim_ent * const pent,
321 struct otx2_tim_bkt *mirr_bkt;
322 struct otx2_tim_ent *chunk;
323 struct otx2_tim_bkt *bkt;
328 tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt);
330 lock_sema = tim_bkt_fetch_sema_lock(bkt);
332 /* Bucket related checks. */
333 if (unlikely(tim_bkt_get_hbt(lock_sema))) {
334 if (tim_bkt_get_nent(lock_sema) != 0) {
336 #ifdef RTE_ARCH_ARM64
337 asm volatile(" ldxr %[hbt], [%[w1]] \n"
338 " tbz %[hbt], 33, dne%= \n"
341 " ldxr %[hbt], [%[w1]] \n"
342 " tbnz %[hbt], 33, rty%= \n"
344 : [hbt] "=&r"(hbt_state)
345 : [w1] "r"((&bkt->w1))
349 hbt_state = __atomic_load_n(&bkt->w1,
351 } while (hbt_state & BIT_ULL(33));
354 if (!(hbt_state & BIT_ULL(34))) {
355 tim_bkt_dec_lock(bkt);
361 rem = tim_bkt_fetch_rem(lock_sema);
363 tim_bkt_dec_lock(bkt);
364 #ifdef RTE_ARCH_ARM64
366 asm volatile(" ldxr %[w1], [%[crem]] \n"
367 " tbz %[w1], 63, dne%= \n"
370 " ldxr %[w1], [%[crem]] \n"
371 " tbnz %[w1], 63, rty%= \n"
374 : [crem] "r"(&bkt->w1)
377 while (__atomic_load_n((int64_t *)&bkt->w1, __ATOMIC_RELAXED) <
383 /* Only one thread can be here*/
384 if (flags & OTX2_TIM_ENA_FB)
385 chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
386 if (flags & OTX2_TIM_ENA_DFB)
387 chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);
389 if (unlikely(chunk == NULL)) {
390 tim->impl_opaque[0] = 0;
391 tim->impl_opaque[1] = 0;
392 tim->state = RTE_EVENT_TIMER_ERROR;
393 tim_bkt_set_rem(bkt, 0);
394 tim_bkt_dec_lock(bkt);
398 if (tim_bkt_fetch_lock(lock_sema)) {
400 lock_sema = __atomic_load_n(&bkt->w1,
402 } while (tim_bkt_fetch_lock(lock_sema) - 1);
403 rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
405 mirr_bkt->current_chunk = (uintptr_t)chunk;
406 __atomic_store_n(&bkt->chunk_remainder,
407 tim_ring->nb_chunk_slots - 1, __ATOMIC_RELEASE);
409 chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
410 chunk += tim_ring->nb_chunk_slots - rem;
414 tim->impl_opaque[0] = (uintptr_t)chunk;
415 tim->impl_opaque[1] = (uintptr_t)bkt;
416 __atomic_store_n(&tim->state, RTE_EVENT_TIMER_ARMED, __ATOMIC_RELEASE);
417 tim_bkt_inc_nent(bkt);
418 tim_bkt_dec_lock_relaxed(bkt);
423 static inline uint16_t
424 tim_cpy_wrk(uint16_t index, uint16_t cpy_lmt,
425 struct otx2_tim_ent *chunk,
426 struct rte_event_timer ** const tim,
427 const struct otx2_tim_ent * const ents,
428 const struct otx2_tim_bkt * const bkt)
430 for (; index < cpy_lmt; index++) {
431 *chunk = *(ents + index);
432 tim[index]->impl_opaque[0] = (uintptr_t)chunk++;
433 tim[index]->impl_opaque[1] = (uintptr_t)bkt;
434 tim[index]->state = RTE_EVENT_TIMER_ARMED;
440 /* Burst mode functions */
442 tim_add_entry_brst(struct otx2_tim_ring * const tim_ring,
443 const uint16_t rel_bkt,
444 struct rte_event_timer ** const tim,
445 const struct otx2_tim_ent *ents,
446 const uint16_t nb_timers, const uint8_t flags)
448 struct otx2_tim_ent *chunk = NULL;
449 struct otx2_tim_bkt *mirr_bkt;
450 struct otx2_tim_bkt *bkt;
451 uint16_t chunk_remainder;
458 tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt);
460 /* Only one thread beyond this. */
461 lock_sema = tim_bkt_inc_lock(bkt);
463 ((lock_sema >> TIM_BUCKET_W1_S_LOCK) & TIM_BUCKET_W1_M_LOCK);
466 tim_bkt_dec_lock(bkt);
467 #ifdef RTE_ARCH_ARM64
468 asm volatile(" ldxrb %w[lock_cnt], [%[lock]] \n"
469 " tst %w[lock_cnt], 255 \n"
473 " ldxrb %w[lock_cnt], [%[lock]] \n"
474 " tst %w[lock_cnt], 255 \n"
477 : [lock_cnt] "=&r"(lock_cnt)
478 : [lock] "r"(&bkt->lock)
481 while (__atomic_load_n(&bkt->lock, __ATOMIC_RELAXED))
487 /* Bucket related checks. */
488 if (unlikely(tim_bkt_get_hbt(lock_sema))) {
489 if (tim_bkt_get_nent(lock_sema) != 0) {
491 #ifdef RTE_ARCH_ARM64
492 asm volatile(" ldxr %[hbt], [%[w1]] \n"
493 " tbz %[hbt], 33, dne%= \n"
496 " ldxr %[hbt], [%[w1]] \n"
497 " tbnz %[hbt], 33, rty%= \n"
499 : [hbt] "=&r"(hbt_state)
500 : [w1] "r"((&bkt->w1))
504 hbt_state = __atomic_load_n(&bkt->w1,
506 } while (hbt_state & BIT_ULL(33));
509 if (!(hbt_state & BIT_ULL(34))) {
510 tim_bkt_dec_lock(bkt);
516 chunk_remainder = tim_bkt_fetch_rem(lock_sema);
517 rem = chunk_remainder - nb_timers;
519 crem = tim_ring->nb_chunk_slots - chunk_remainder;
520 if (chunk_remainder && crem) {
521 chunk = ((struct otx2_tim_ent *)
522 mirr_bkt->current_chunk) + crem;
524 index = tim_cpy_wrk(index, chunk_remainder, chunk, tim,
526 tim_bkt_sub_rem(bkt, chunk_remainder);
527 tim_bkt_add_nent(bkt, chunk_remainder);
530 if (flags & OTX2_TIM_ENA_FB)
531 chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
532 if (flags & OTX2_TIM_ENA_DFB)
533 chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);
535 if (unlikely(chunk == NULL)) {
536 tim_bkt_dec_lock(bkt);
538 tim[index]->state = RTE_EVENT_TIMER_ERROR;
541 *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
542 mirr_bkt->current_chunk = (uintptr_t)chunk;
543 tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt);
545 rem = nb_timers - chunk_remainder;
546 tim_bkt_set_rem(bkt, tim_ring->nb_chunk_slots - rem);
547 tim_bkt_add_nent(bkt, rem);
549 chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
550 chunk += (tim_ring->nb_chunk_slots - chunk_remainder);
552 tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt);
553 tim_bkt_sub_rem(bkt, nb_timers);
554 tim_bkt_add_nent(bkt, nb_timers);
557 tim_bkt_dec_lock(bkt);
563 tim_rm_entry(struct rte_event_timer *tim)
565 struct otx2_tim_ent *entry;
566 struct otx2_tim_bkt *bkt;
569 if (tim->impl_opaque[1] == 0 || tim->impl_opaque[0] == 0)
572 entry = (struct otx2_tim_ent *)(uintptr_t)tim->impl_opaque[0];
573 if (entry->wqe != tim->ev.u64) {
574 tim->impl_opaque[0] = 0;
575 tim->impl_opaque[1] = 0;
579 bkt = (struct otx2_tim_bkt *)(uintptr_t)tim->impl_opaque[1];
580 lock_sema = tim_bkt_inc_lock(bkt);
581 if (tim_bkt_get_hbt(lock_sema) || !tim_bkt_get_nent(lock_sema)) {
582 tim->impl_opaque[0] = 0;
583 tim->impl_opaque[1] = 0;
584 tim_bkt_dec_lock(bkt);
590 tim->state = RTE_EVENT_TIMER_CANCELED;
591 tim->impl_opaque[0] = 0;
592 tim->impl_opaque[1] = 0;
593 tim_bkt_dec_lock(bkt);
598 #endif /* __OTX2_TIM_WORKER_H__ */