1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2019 Marvell International Ltd.
5 #ifndef __OTX2_TIM_WORKER_H__
6 #define __OTX2_TIM_WORKER_H__
8 #include "otx2_tim_evdev.h"
11 tim_bkt_fetch_lock(uint64_t w1)
13 return (w1 >> TIM_BUCKET_W1_S_LOCK) &
18 tim_bkt_fetch_rem(uint64_t w1)
20 return (w1 >> TIM_BUCKET_W1_S_CHUNK_REMAINDER) &
21 TIM_BUCKET_W1_M_CHUNK_REMAINDER;
25 tim_bkt_get_rem(struct otx2_tim_bkt *bktp)
27 return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE);
31 tim_bkt_set_rem(struct otx2_tim_bkt *bktp, uint16_t v)
33 __atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
37 tim_bkt_sub_rem(struct otx2_tim_bkt *bktp, uint16_t v)
39 __atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
43 tim_bkt_get_hbt(uint64_t w1)
45 return (w1 >> TIM_BUCKET_W1_S_HBT) & TIM_BUCKET_W1_M_HBT;
49 tim_bkt_get_bsk(uint64_t w1)
51 return (w1 >> TIM_BUCKET_W1_S_BSK) & TIM_BUCKET_W1_M_BSK;
54 static inline uint64_t
55 tim_bkt_clr_bsk(struct otx2_tim_bkt *bktp)
57 /* Clear everything except lock. */
58 const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;
60 return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
63 static inline uint64_t
64 tim_bkt_fetch_sema_lock(struct otx2_tim_bkt *bktp)
66 return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
70 static inline uint64_t
71 tim_bkt_fetch_sema(struct otx2_tim_bkt *bktp)
73 return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED);
76 static inline uint64_t
77 tim_bkt_inc_lock(struct otx2_tim_bkt *bktp)
79 const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;
81 return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE);
85 tim_bkt_dec_lock(struct otx2_tim_bkt *bktp)
87 __atomic_add_fetch(&bktp->lock, 0xff, __ATOMIC_RELEASE);
90 static inline uint32_t
91 tim_bkt_get_nent(uint64_t w1)
93 return (w1 >> TIM_BUCKET_W1_S_NUM_ENTRIES) &
94 TIM_BUCKET_W1_M_NUM_ENTRIES;
98 tim_bkt_inc_nent(struct otx2_tim_bkt *bktp)
100 __atomic_add_fetch(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
104 tim_bkt_add_nent(struct otx2_tim_bkt *bktp, uint32_t v)
106 __atomic_add_fetch(&bktp->nb_entry, v, __ATOMIC_RELAXED);
109 static inline uint64_t
110 tim_bkt_clr_nent(struct otx2_tim_bkt *bktp)
112 const uint64_t v = ~(TIM_BUCKET_W1_M_NUM_ENTRIES <<
113 TIM_BUCKET_W1_S_NUM_ENTRIES);
115 return __atomic_and_fetch(&bktp->w1, v, __ATOMIC_ACQ_REL);
118 static __rte_always_inline void
119 tim_get_target_bucket(struct otx2_tim_ring * const tim_ring,
120 const uint32_t rel_bkt, struct otx2_tim_bkt **bkt,
121 struct otx2_tim_bkt **mirr_bkt, const uint8_t flag)
123 const uint64_t bkt_cyc = rte_rdtsc() - tim_ring->ring_start_cyc;
124 uint32_t bucket = rte_reciprocal_divide_u64(bkt_cyc,
125 &tim_ring->fast_div) + rel_bkt;
126 uint32_t mirr_bucket = 0;
128 if (flag & OTX2_TIM_BKT_MOD) {
129 bucket = bucket % tim_ring->nb_bkts;
130 mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) %
133 if (flag & OTX2_TIM_BKT_AND) {
134 bucket = bucket & (tim_ring->nb_bkts - 1);
135 mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) &
136 (tim_ring->nb_bkts - 1);
139 *bkt = &tim_ring->bkt[bucket];
140 *mirr_bkt = &tim_ring->bkt[mirr_bucket];
143 static struct otx2_tim_ent *
144 tim_clr_bkt(struct otx2_tim_ring * const tim_ring,
145 struct otx2_tim_bkt * const bkt)
147 struct otx2_tim_ent *chunk;
148 struct otx2_tim_ent *pnext;
150 chunk = ((struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk);
151 chunk = (struct otx2_tim_ent *)(uintptr_t)(chunk +
152 tim_ring->nb_chunk_slots)->w0;
154 pnext = (struct otx2_tim_ent *)(uintptr_t)
155 ((chunk + tim_ring->nb_chunk_slots)->w0);
156 rte_mempool_put(tim_ring->chunk_pool, chunk);
160 return (struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk;
163 static struct otx2_tim_ent *
164 tim_refill_chunk(struct otx2_tim_bkt * const bkt,
165 struct otx2_tim_bkt * const mirr_bkt,
166 struct otx2_tim_ring * const tim_ring)
168 struct otx2_tim_ent *chunk;
170 if (bkt->nb_entry || !bkt->first_chunk) {
171 if (unlikely(rte_mempool_get(tim_ring->chunk_pool,
175 *(uint64_t *)(((struct otx2_tim_ent *)
176 mirr_bkt->current_chunk) +
177 tim_ring->nb_chunk_slots) =
180 bkt->first_chunk = (uintptr_t)chunk;
183 chunk = tim_clr_bkt(tim_ring, bkt);
184 bkt->first_chunk = (uintptr_t)chunk;
186 *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
191 static struct otx2_tim_ent *
192 tim_insert_chunk(struct otx2_tim_bkt * const bkt,
193 struct otx2_tim_bkt * const mirr_bkt,
194 struct otx2_tim_ring * const tim_ring)
196 struct otx2_tim_ent *chunk;
198 if (unlikely(rte_mempool_get(tim_ring->chunk_pool, (void **)&chunk)))
201 *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
203 *(uint64_t *)(((struct otx2_tim_ent *)(uintptr_t)
204 mirr_bkt->current_chunk) +
205 tim_ring->nb_chunk_slots) = (uintptr_t)chunk;
207 bkt->first_chunk = (uintptr_t)chunk;
212 static __rte_always_inline int
213 tim_add_entry_sp(struct otx2_tim_ring * const tim_ring,
214 const uint32_t rel_bkt,
215 struct rte_event_timer * const tim,
216 const struct otx2_tim_ent * const pent,
219 struct otx2_tim_bkt *mirr_bkt;
220 struct otx2_tim_ent *chunk;
221 struct otx2_tim_bkt *bkt;
226 tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);
229 lock_sema = tim_bkt_fetch_sema_lock(bkt);
231 /* Bucket related checks. */
232 if (unlikely(tim_bkt_get_hbt(lock_sema))) {
233 if (tim_bkt_get_nent(lock_sema) != 0) {
235 #ifdef RTE_ARCH_ARM64
237 " ldaxr %[hbt], [%[w1]] \n"
238 " tbz %[hbt], 33, dne%= \n"
241 " ldaxr %[hbt], [%[w1]] \n"
242 " tbnz %[hbt], 33, rty%= \n"
244 : [hbt] "=&r" (hbt_state)
245 : [w1] "r" ((&bkt->w1))
250 hbt_state = __atomic_load_n(&bkt->w1,
252 } while (hbt_state & BIT_ULL(33));
255 if (!(hbt_state & BIT_ULL(34))) {
256 tim_bkt_dec_lock(bkt);
261 /* Insert the work. */
262 rem = tim_bkt_fetch_rem(lock_sema);
265 if (flags & OTX2_TIM_ENA_FB)
266 chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
267 if (flags & OTX2_TIM_ENA_DFB)
268 chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);
270 if (unlikely(chunk == NULL)) {
271 bkt->chunk_remainder = 0;
272 tim_bkt_dec_lock(bkt);
273 tim->impl_opaque[0] = 0;
274 tim->impl_opaque[1] = 0;
275 tim->state = RTE_EVENT_TIMER_ERROR;
278 mirr_bkt->current_chunk = (uintptr_t)chunk;
279 bkt->chunk_remainder = tim_ring->nb_chunk_slots - 1;
281 chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
282 chunk += tim_ring->nb_chunk_slots - rem;
285 /* Copy work entry. */
288 tim_bkt_inc_nent(bkt);
289 tim_bkt_dec_lock(bkt);
291 tim->impl_opaque[0] = (uintptr_t)chunk;
292 tim->impl_opaque[1] = (uintptr_t)bkt;
293 tim->state = RTE_EVENT_TIMER_ARMED;
298 static __rte_always_inline int
299 tim_add_entry_mp(struct otx2_tim_ring * const tim_ring,
300 const uint32_t rel_bkt,
301 struct rte_event_timer * const tim,
302 const struct otx2_tim_ent * const pent,
305 struct otx2_tim_bkt *mirr_bkt;
306 struct otx2_tim_ent *chunk;
307 struct otx2_tim_bkt *bkt;
312 tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);
314 lock_sema = tim_bkt_fetch_sema_lock(bkt);
316 /* Bucket related checks. */
317 if (unlikely(tim_bkt_get_hbt(lock_sema))) {
318 if (tim_bkt_get_nent(lock_sema) != 0) {
320 #ifdef RTE_ARCH_ARM64
322 " ldaxr %[hbt], [%[w1]] \n"
323 " tbz %[hbt], 33, dne%= \n"
326 " ldaxr %[hbt], [%[w1]] \n"
327 " tbnz %[hbt], 33, rty%= \n"
329 : [hbt] "=&r" (hbt_state)
330 : [w1] "r" ((&bkt->w1))
335 hbt_state = __atomic_load_n(&bkt->w1,
337 } while (hbt_state & BIT_ULL(33));
340 if (!(hbt_state & BIT_ULL(34))) {
341 tim_bkt_dec_lock(bkt);
347 rem = tim_bkt_fetch_rem(lock_sema);
349 #ifdef RTE_ARCH_ARM64
351 " ldaxrh %w[rem], [%[crem]] \n"
352 " tbz %w[rem], 15, dne%= \n"
355 " ldaxrh %w[rem], [%[crem]] \n"
356 " tbnz %w[rem], 15, rty%= \n"
359 : [crem] "r" (&bkt->chunk_remainder)
363 while (__atomic_load_n(&bkt->chunk_remainder,
364 __ATOMIC_ACQUIRE) < 0)
367 /* Goto diff bucket. */
368 tim_bkt_dec_lock(bkt);
371 /* Only one thread can be here*/
372 if (flags & OTX2_TIM_ENA_FB)
373 chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
374 if (flags & OTX2_TIM_ENA_DFB)
375 chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);
377 if (unlikely(chunk == NULL)) {
378 tim_bkt_set_rem(bkt, 0);
379 tim_bkt_dec_lock(bkt);
380 tim->impl_opaque[0] = 0;
381 tim->impl_opaque[1] = 0;
382 tim->state = RTE_EVENT_TIMER_ERROR;
386 while (tim_bkt_fetch_lock(lock_sema) !=
387 (-tim_bkt_fetch_rem(lock_sema)))
388 lock_sema = __atomic_load_n(&bkt->w1, __ATOMIC_ACQUIRE);
390 mirr_bkt->current_chunk = (uintptr_t)chunk;
391 __atomic_store_n(&bkt->chunk_remainder,
392 tim_ring->nb_chunk_slots - 1, __ATOMIC_RELEASE);
394 chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
395 chunk += tim_ring->nb_chunk_slots - rem;
399 /* Copy work entry. */
400 tim_bkt_inc_nent(bkt);
401 tim_bkt_dec_lock(bkt);
402 tim->impl_opaque[0] = (uintptr_t)chunk;
403 tim->impl_opaque[1] = (uintptr_t)bkt;
404 tim->state = RTE_EVENT_TIMER_ARMED;
409 static inline uint16_t
410 tim_cpy_wrk(uint16_t index, uint16_t cpy_lmt,
411 struct otx2_tim_ent *chunk,
412 struct rte_event_timer ** const tim,
413 const struct otx2_tim_ent * const ents,
414 const struct otx2_tim_bkt * const bkt)
416 for (; index < cpy_lmt; index++) {
417 *chunk = *(ents + index);
418 tim[index]->impl_opaque[0] = (uintptr_t)chunk++;
419 tim[index]->impl_opaque[1] = (uintptr_t)bkt;
420 tim[index]->state = RTE_EVENT_TIMER_ARMED;
426 /* Burst mode functions */
428 tim_add_entry_brst(struct otx2_tim_ring * const tim_ring,
429 const uint16_t rel_bkt,
430 struct rte_event_timer ** const tim,
431 const struct otx2_tim_ent *ents,
432 const uint16_t nb_timers, const uint8_t flags)
434 struct otx2_tim_ent *chunk = NULL;
435 struct otx2_tim_bkt *mirr_bkt;
436 struct otx2_tim_bkt *bkt;
437 uint16_t chunk_remainder;
444 tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);
446 /* Only one thread beyond this. */
447 lock_sema = tim_bkt_inc_lock(bkt);
449 ((lock_sema >> TIM_BUCKET_W1_S_LOCK) & TIM_BUCKET_W1_M_LOCK);
452 tim_bkt_dec_lock(bkt);
456 /* Bucket related checks. */
457 if (unlikely(tim_bkt_get_hbt(lock_sema))) {
458 if (tim_bkt_get_nent(lock_sema) != 0) {
460 #ifdef RTE_ARCH_ARM64
462 " ldaxr %[hbt], [%[w1]] \n"
463 " tbz %[hbt], 33, dne%= \n"
466 " ldaxr %[hbt], [%[w1]] \n"
467 " tbnz %[hbt], 33, rty%= \n"
469 : [hbt] "=&r" (hbt_state)
470 : [w1] "r" ((&bkt->w1))
475 hbt_state = __atomic_load_n(&bkt->w1,
477 } while (hbt_state & BIT_ULL(33));
480 if (!(hbt_state & BIT_ULL(34))) {
481 tim_bkt_dec_lock(bkt);
487 chunk_remainder = tim_bkt_fetch_rem(lock_sema);
488 rem = chunk_remainder - nb_timers;
490 crem = tim_ring->nb_chunk_slots - chunk_remainder;
491 if (chunk_remainder && crem) {
492 chunk = ((struct otx2_tim_ent *)
493 mirr_bkt->current_chunk) + crem;
495 index = tim_cpy_wrk(index, chunk_remainder, chunk, tim,
497 tim_bkt_sub_rem(bkt, chunk_remainder);
498 tim_bkt_add_nent(bkt, chunk_remainder);
501 if (flags & OTX2_TIM_ENA_FB)
502 chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
503 if (flags & OTX2_TIM_ENA_DFB)
504 chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);
506 if (unlikely(chunk == NULL)) {
507 tim_bkt_dec_lock(bkt);
509 tim[index]->state = RTE_EVENT_TIMER_ERROR;
512 *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
513 mirr_bkt->current_chunk = (uintptr_t)chunk;
514 tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt);
516 rem = nb_timers - chunk_remainder;
517 tim_bkt_set_rem(bkt, tim_ring->nb_chunk_slots - rem);
518 tim_bkt_add_nent(bkt, rem);
520 chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
521 chunk += (tim_ring->nb_chunk_slots - chunk_remainder);
523 tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt);
524 tim_bkt_sub_rem(bkt, nb_timers);
525 tim_bkt_add_nent(bkt, nb_timers);
528 tim_bkt_dec_lock(bkt);
534 tim_rm_entry(struct rte_event_timer *tim)
536 struct otx2_tim_ent *entry;
537 struct otx2_tim_bkt *bkt;
540 if (tim->impl_opaque[1] == 0 || tim->impl_opaque[0] == 0)
543 entry = (struct otx2_tim_ent *)(uintptr_t)tim->impl_opaque[0];
544 if (entry->wqe != tim->ev.u64) {
545 tim->impl_opaque[0] = 0;
546 tim->impl_opaque[1] = 0;
550 bkt = (struct otx2_tim_bkt *)(uintptr_t)tim->impl_opaque[1];
551 lock_sema = tim_bkt_inc_lock(bkt);
552 if (tim_bkt_get_hbt(lock_sema) || !tim_bkt_get_nent(lock_sema)) {
553 tim_bkt_dec_lock(bkt);
554 tim->impl_opaque[0] = 0;
555 tim->impl_opaque[1] = 0;
561 tim_bkt_dec_lock(bkt);
563 tim->state = RTE_EVENT_TIMER_CANCELED;
564 tim->impl_opaque[0] = 0;
565 tim->impl_opaque[1] = 0;
570 #endif /* __OTX2_TIM_WORKER_H__ */