1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
5 #ifndef __CNXK_TIM_WORKER_H__
6 #define __CNXK_TIM_WORKER_H__
8 #include "cnxk_tim_evdev.h"
11 cnxk_tim_bkt_fetch_lock(uint64_t w1)
13 return (w1 >> TIM_BUCKET_W1_S_LOCK) & TIM_BUCKET_W1_M_LOCK;
17 cnxk_tim_bkt_fetch_rem(uint64_t w1)
19 return (w1 >> TIM_BUCKET_W1_S_CHUNK_REMAINDER) &
20 TIM_BUCKET_W1_M_CHUNK_REMAINDER;
24 cnxk_tim_bkt_get_rem(struct cnxk_tim_bkt *bktp)
26 return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE);
30 cnxk_tim_bkt_set_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
32 __atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
36 cnxk_tim_bkt_sub_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
38 __atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
42 cnxk_tim_bkt_get_hbt(uint64_t w1)
44 return (w1 >> TIM_BUCKET_W1_S_HBT) & TIM_BUCKET_W1_M_HBT;
48 cnxk_tim_bkt_get_bsk(uint64_t w1)
50 return (w1 >> TIM_BUCKET_W1_S_BSK) & TIM_BUCKET_W1_M_BSK;
53 static inline uint64_t
54 cnxk_tim_bkt_clr_bsk(struct cnxk_tim_bkt *bktp)
56 /* Clear everything except lock. */
57 const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;
59 return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
62 static inline uint64_t
63 cnxk_tim_bkt_fetch_sema_lock(struct cnxk_tim_bkt *bktp)
65 return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
69 static inline uint64_t
70 cnxk_tim_bkt_fetch_sema(struct cnxk_tim_bkt *bktp)
72 return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED);
75 static inline uint64_t
76 cnxk_tim_bkt_inc_lock(struct cnxk_tim_bkt *bktp)
78 const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;
80 return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE);
84 cnxk_tim_bkt_dec_lock(struct cnxk_tim_bkt *bktp)
86 __atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELEASE);
90 cnxk_tim_bkt_dec_lock_relaxed(struct cnxk_tim_bkt *bktp)
92 __atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELAXED);
95 static inline uint32_t
96 cnxk_tim_bkt_get_nent(uint64_t w1)
98 return (w1 >> TIM_BUCKET_W1_S_NUM_ENTRIES) &
99 TIM_BUCKET_W1_M_NUM_ENTRIES;
103 cnxk_tim_bkt_inc_nent(struct cnxk_tim_bkt *bktp)
105 __atomic_add_fetch(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
109 cnxk_tim_bkt_add_nent(struct cnxk_tim_bkt *bktp, uint32_t v)
111 __atomic_add_fetch(&bktp->nb_entry, v, __ATOMIC_RELAXED);
114 static inline uint64_t
115 cnxk_tim_bkt_clr_nent(struct cnxk_tim_bkt *bktp)
118 ~(TIM_BUCKET_W1_M_NUM_ENTRIES << TIM_BUCKET_W1_S_NUM_ENTRIES);
120 return __atomic_and_fetch(&bktp->w1, v, __ATOMIC_ACQ_REL);
123 static inline uint64_t
124 cnxk_tim_bkt_fast_mod(uint64_t n, uint64_t d, struct rte_reciprocal_u64 R)
126 return (n - (d * rte_reciprocal_divide_u64(n, &R)));
129 static __rte_always_inline void
130 cnxk_tim_get_target_bucket(struct cnxk_tim_ring *const tim_ring,
131 const uint32_t rel_bkt, struct cnxk_tim_bkt **bkt,
132 struct cnxk_tim_bkt **mirr_bkt)
134 const uint64_t bkt_cyc = cnxk_tim_cntvct() - tim_ring->ring_start_cyc;
136 rte_reciprocal_divide_u64(bkt_cyc, &tim_ring->fast_div) +
138 uint64_t mirr_bucket = 0;
140 bucket = cnxk_tim_bkt_fast_mod(bucket, tim_ring->nb_bkts,
143 cnxk_tim_bkt_fast_mod(bucket + (tim_ring->nb_bkts >> 1),
144 tim_ring->nb_bkts, tim_ring->fast_bkt);
145 *bkt = &tim_ring->bkt[bucket];
146 *mirr_bkt = &tim_ring->bkt[mirr_bucket];
149 static struct cnxk_tim_ent *
150 cnxk_tim_clr_bkt(struct cnxk_tim_ring *const tim_ring,
151 struct cnxk_tim_bkt *const bkt)
153 #define TIM_MAX_OUTSTANDING_OBJ 64
154 void *pend_chunks[TIM_MAX_OUTSTANDING_OBJ];
155 struct cnxk_tim_ent *chunk;
156 struct cnxk_tim_ent *pnext;
159 chunk = ((struct cnxk_tim_ent *)(uintptr_t)bkt->first_chunk);
160 chunk = (struct cnxk_tim_ent *)(uintptr_t)(chunk +
161 tim_ring->nb_chunk_slots)
164 pnext = (struct cnxk_tim_ent *)(uintptr_t)(
165 (chunk + tim_ring->nb_chunk_slots)->w0);
166 if (objs == TIM_MAX_OUTSTANDING_OBJ) {
167 rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks,
171 pend_chunks[objs++] = chunk;
176 rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks, objs);
178 return (struct cnxk_tim_ent *)(uintptr_t)bkt->first_chunk;
181 static struct cnxk_tim_ent *
182 cnxk_tim_refill_chunk(struct cnxk_tim_bkt *const bkt,
183 struct cnxk_tim_bkt *const mirr_bkt,
184 struct cnxk_tim_ring *const tim_ring)
186 struct cnxk_tim_ent *chunk;
188 if (bkt->nb_entry || !bkt->first_chunk) {
189 if (unlikely(rte_mempool_get(tim_ring->chunk_pool,
193 *(uint64_t *)(((struct cnxk_tim_ent *)
194 mirr_bkt->current_chunk) +
195 tim_ring->nb_chunk_slots) =
198 bkt->first_chunk = (uintptr_t)chunk;
201 chunk = cnxk_tim_clr_bkt(tim_ring, bkt);
202 bkt->first_chunk = (uintptr_t)chunk;
204 *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
209 static struct cnxk_tim_ent *
210 cnxk_tim_insert_chunk(struct cnxk_tim_bkt *const bkt,
211 struct cnxk_tim_bkt *const mirr_bkt,
212 struct cnxk_tim_ring *const tim_ring)
214 struct cnxk_tim_ent *chunk;
216 if (unlikely(rte_mempool_get(tim_ring->chunk_pool, (void **)&chunk)))
219 *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
221 *(uint64_t *)(((struct cnxk_tim_ent *)(uintptr_t)
222 mirr_bkt->current_chunk) +
223 tim_ring->nb_chunk_slots) = (uintptr_t)chunk;
225 bkt->first_chunk = (uintptr_t)chunk;
230 static __rte_always_inline int
231 cnxk_tim_add_entry_sp(struct cnxk_tim_ring *const tim_ring,
232 const uint32_t rel_bkt, struct rte_event_timer *const tim,
233 const struct cnxk_tim_ent *const pent,
236 struct cnxk_tim_bkt *mirr_bkt;
237 struct cnxk_tim_ent *chunk;
238 struct cnxk_tim_bkt *bkt;
243 cnxk_tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt);
246 lock_sema = cnxk_tim_bkt_fetch_sema_lock(bkt);
248 /* Bucket related checks. */
249 if (unlikely(cnxk_tim_bkt_get_hbt(lock_sema))) {
250 if (cnxk_tim_bkt_get_nent(lock_sema) != 0) {
252 #ifdef RTE_ARCH_ARM64
253 asm volatile(PLT_CPU_FEATURE_PREAMBLE
254 " ldxr %[hbt], [%[w1]] \n"
255 " tbz %[hbt], 33, dne%= \n"
258 " ldxr %[hbt], [%[w1]] \n"
259 " tbnz %[hbt], 33, rty%= \n"
261 : [hbt] "=&r"(hbt_state)
262 : [w1] "r"((&bkt->w1))
266 hbt_state = __atomic_load_n(&bkt->w1,
268 } while (hbt_state & BIT_ULL(33));
271 if (!(hbt_state & BIT_ULL(34))) {
272 cnxk_tim_bkt_dec_lock(bkt);
277 /* Insert the work. */
278 rem = cnxk_tim_bkt_fetch_rem(lock_sema);
281 if (flags & CNXK_TIM_ENA_FB)
282 chunk = cnxk_tim_refill_chunk(bkt, mirr_bkt, tim_ring);
283 if (flags & CNXK_TIM_ENA_DFB)
284 chunk = cnxk_tim_insert_chunk(bkt, mirr_bkt, tim_ring);
286 if (unlikely(chunk == NULL)) {
287 bkt->chunk_remainder = 0;
288 tim->impl_opaque[0] = 0;
289 tim->impl_opaque[1] = 0;
290 tim->state = RTE_EVENT_TIMER_ERROR;
291 cnxk_tim_bkt_dec_lock(bkt);
294 mirr_bkt->current_chunk = (uintptr_t)chunk;
295 bkt->chunk_remainder = tim_ring->nb_chunk_slots - 1;
297 chunk = (struct cnxk_tim_ent *)mirr_bkt->current_chunk;
298 chunk += tim_ring->nb_chunk_slots - rem;
301 /* Copy work entry. */
304 tim->impl_opaque[0] = (uintptr_t)chunk;
305 tim->impl_opaque[1] = (uintptr_t)bkt;
306 __atomic_store_n(&tim->state, RTE_EVENT_TIMER_ARMED, __ATOMIC_RELEASE);
307 cnxk_tim_bkt_inc_nent(bkt);
308 cnxk_tim_bkt_dec_lock_relaxed(bkt);
313 static __rte_always_inline int
314 cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
315 const uint32_t rel_bkt, struct rte_event_timer *const tim,
316 const struct cnxk_tim_ent *const pent,
319 struct cnxk_tim_bkt *mirr_bkt;
320 struct cnxk_tim_ent *chunk;
321 struct cnxk_tim_bkt *bkt;
326 cnxk_tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt);
328 lock_sema = cnxk_tim_bkt_fetch_sema_lock(bkt);
330 /* Bucket related checks. */
331 if (unlikely(cnxk_tim_bkt_get_hbt(lock_sema))) {
332 if (cnxk_tim_bkt_get_nent(lock_sema) != 0) {
334 #ifdef RTE_ARCH_ARM64
335 asm volatile(PLT_CPU_FEATURE_PREAMBLE
336 " ldxr %[hbt], [%[w1]] \n"
337 " tbz %[hbt], 33, dne%= \n"
340 " ldxr %[hbt], [%[w1]] \n"
341 " tbnz %[hbt], 33, rty%= \n"
343 : [hbt] "=&r"(hbt_state)
344 : [w1] "r"((&bkt->w1))
348 hbt_state = __atomic_load_n(&bkt->w1,
350 } while (hbt_state & BIT_ULL(33));
353 if (!(hbt_state & BIT_ULL(34))) {
354 cnxk_tim_bkt_dec_lock(bkt);
360 rem = cnxk_tim_bkt_fetch_rem(lock_sema);
362 cnxk_tim_bkt_dec_lock(bkt);
363 #ifdef RTE_ARCH_ARM64
364 asm volatile(PLT_CPU_FEATURE_PREAMBLE
365 " ldxr %[rem], [%[crem]] \n"
366 " tbz %[rem], 63, dne%= \n"
369 " ldxr %[rem], [%[crem]] \n"
370 " tbnz %[rem], 63, rty%= \n"
373 : [crem] "r"(&bkt->w1)
376 while (__atomic_load_n((int64_t *)&bkt->w1, __ATOMIC_RELAXED) <
382 /* Only one thread can be here*/
383 if (flags & CNXK_TIM_ENA_FB)
384 chunk = cnxk_tim_refill_chunk(bkt, mirr_bkt, tim_ring);
385 if (flags & CNXK_TIM_ENA_DFB)
386 chunk = cnxk_tim_insert_chunk(bkt, mirr_bkt, tim_ring);
388 if (unlikely(chunk == NULL)) {
389 tim->impl_opaque[0] = 0;
390 tim->impl_opaque[1] = 0;
391 tim->state = RTE_EVENT_TIMER_ERROR;
392 cnxk_tim_bkt_set_rem(bkt, 0);
393 cnxk_tim_bkt_dec_lock(bkt);
397 if (cnxk_tim_bkt_fetch_lock(lock_sema)) {
399 lock_sema = __atomic_load_n(&bkt->w1,
401 } while (cnxk_tim_bkt_fetch_lock(lock_sema) - 1);
403 rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
404 mirr_bkt->current_chunk = (uintptr_t)chunk;
405 __atomic_store_n(&bkt->chunk_remainder,
406 tim_ring->nb_chunk_slots - 1,
409 chunk = (struct cnxk_tim_ent *)mirr_bkt->current_chunk;
410 chunk += tim_ring->nb_chunk_slots - rem;
414 tim->impl_opaque[0] = (uintptr_t)chunk;
415 tim->impl_opaque[1] = (uintptr_t)bkt;
416 __atomic_store_n(&tim->state, RTE_EVENT_TIMER_ARMED, __ATOMIC_RELEASE);
417 cnxk_tim_bkt_inc_nent(bkt);
418 cnxk_tim_bkt_dec_lock_relaxed(bkt);
423 #endif /* __CNXK_TIM_WORKER_H__ */