event/cnxk: add timer arm routine
[dpdk.git] / drivers / event / cnxk / cnxk_tim_worker.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4
5 #ifndef __CNXK_TIM_WORKER_H__
6 #define __CNXK_TIM_WORKER_H__
7
8 #include "cnxk_tim_evdev.h"
9
10 static inline uint8_t
11 cnxk_tim_bkt_fetch_lock(uint64_t w1)
12 {
13         return (w1 >> TIM_BUCKET_W1_S_LOCK) & TIM_BUCKET_W1_M_LOCK;
14 }
15
16 static inline int16_t
17 cnxk_tim_bkt_fetch_rem(uint64_t w1)
18 {
19         return (w1 >> TIM_BUCKET_W1_S_CHUNK_REMAINDER) &
20                TIM_BUCKET_W1_M_CHUNK_REMAINDER;
21 }
22
23 static inline int16_t
24 cnxk_tim_bkt_get_rem(struct cnxk_tim_bkt *bktp)
25 {
26         return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE);
27 }
28
29 static inline void
30 cnxk_tim_bkt_set_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
31 {
32         __atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
33 }
34
35 static inline void
36 cnxk_tim_bkt_sub_rem(struct cnxk_tim_bkt *bktp, uint16_t v)
37 {
38         __atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
39 }
40
41 static inline uint8_t
42 cnxk_tim_bkt_get_hbt(uint64_t w1)
43 {
44         return (w1 >> TIM_BUCKET_W1_S_HBT) & TIM_BUCKET_W1_M_HBT;
45 }
46
47 static inline uint8_t
48 cnxk_tim_bkt_get_bsk(uint64_t w1)
49 {
50         return (w1 >> TIM_BUCKET_W1_S_BSK) & TIM_BUCKET_W1_M_BSK;
51 }
52
53 static inline uint64_t
54 cnxk_tim_bkt_clr_bsk(struct cnxk_tim_bkt *bktp)
55 {
56         /* Clear everything except lock. */
57         const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;
58
59         return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
60 }
61
62 static inline uint64_t
63 cnxk_tim_bkt_fetch_sema_lock(struct cnxk_tim_bkt *bktp)
64 {
65         return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
66                                   __ATOMIC_ACQUIRE);
67 }
68
69 static inline uint64_t
70 cnxk_tim_bkt_fetch_sema(struct cnxk_tim_bkt *bktp)
71 {
72         return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED);
73 }
74
75 static inline uint64_t
76 cnxk_tim_bkt_inc_lock(struct cnxk_tim_bkt *bktp)
77 {
78         const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;
79
80         return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE);
81 }
82
83 static inline void
84 cnxk_tim_bkt_dec_lock(struct cnxk_tim_bkt *bktp)
85 {
86         __atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELEASE);
87 }
88
89 static inline void
90 cnxk_tim_bkt_dec_lock_relaxed(struct cnxk_tim_bkt *bktp)
91 {
92         __atomic_fetch_sub(&bktp->lock, 1, __ATOMIC_RELAXED);
93 }
94
95 static inline uint32_t
96 cnxk_tim_bkt_get_nent(uint64_t w1)
97 {
98         return (w1 >> TIM_BUCKET_W1_S_NUM_ENTRIES) &
99                TIM_BUCKET_W1_M_NUM_ENTRIES;
100 }
101
102 static inline void
103 cnxk_tim_bkt_inc_nent(struct cnxk_tim_bkt *bktp)
104 {
105         __atomic_add_fetch(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
106 }
107
108 static inline void
109 cnxk_tim_bkt_add_nent(struct cnxk_tim_bkt *bktp, uint32_t v)
110 {
111         __atomic_add_fetch(&bktp->nb_entry, v, __ATOMIC_RELAXED);
112 }
113
114 static inline uint64_t
115 cnxk_tim_bkt_clr_nent(struct cnxk_tim_bkt *bktp)
116 {
117         const uint64_t v =
118                 ~(TIM_BUCKET_W1_M_NUM_ENTRIES << TIM_BUCKET_W1_S_NUM_ENTRIES);
119
120         return __atomic_and_fetch(&bktp->w1, v, __ATOMIC_ACQ_REL);
121 }
122
123 static inline uint64_t
124 cnxk_tim_bkt_fast_mod(uint64_t n, uint64_t d, struct rte_reciprocal_u64 R)
125 {
126         return (n - (d * rte_reciprocal_divide_u64(n, &R)));
127 }
128
129 static __rte_always_inline void
130 cnxk_tim_get_target_bucket(struct cnxk_tim_ring *const tim_ring,
131                            const uint32_t rel_bkt, struct cnxk_tim_bkt **bkt,
132                            struct cnxk_tim_bkt **mirr_bkt)
133 {
134         const uint64_t bkt_cyc = cnxk_tim_cntvct() - tim_ring->ring_start_cyc;
135         uint64_t bucket =
136                 rte_reciprocal_divide_u64(bkt_cyc, &tim_ring->fast_div) +
137                 rel_bkt;
138         uint64_t mirr_bucket = 0;
139
140         bucket = cnxk_tim_bkt_fast_mod(bucket, tim_ring->nb_bkts,
141                                        tim_ring->fast_bkt);
142         mirr_bucket =
143                 cnxk_tim_bkt_fast_mod(bucket + (tim_ring->nb_bkts >> 1),
144                                       tim_ring->nb_bkts, tim_ring->fast_bkt);
145         *bkt = &tim_ring->bkt[bucket];
146         *mirr_bkt = &tim_ring->bkt[mirr_bucket];
147 }
148
149 static struct cnxk_tim_ent *
150 cnxk_tim_clr_bkt(struct cnxk_tim_ring *const tim_ring,
151                  struct cnxk_tim_bkt *const bkt)
152 {
153 #define TIM_MAX_OUTSTANDING_OBJ 64
154         void *pend_chunks[TIM_MAX_OUTSTANDING_OBJ];
155         struct cnxk_tim_ent *chunk;
156         struct cnxk_tim_ent *pnext;
157         uint8_t objs = 0;
158
159         chunk = ((struct cnxk_tim_ent *)(uintptr_t)bkt->first_chunk);
160         chunk = (struct cnxk_tim_ent *)(uintptr_t)(chunk +
161                                                    tim_ring->nb_chunk_slots)
162                         ->w0;
163         while (chunk) {
164                 pnext = (struct cnxk_tim_ent *)(uintptr_t)(
165                         (chunk + tim_ring->nb_chunk_slots)->w0);
166                 if (objs == TIM_MAX_OUTSTANDING_OBJ) {
167                         rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks,
168                                              objs);
169                         objs = 0;
170                 }
171                 pend_chunks[objs++] = chunk;
172                 chunk = pnext;
173         }
174
175         if (objs)
176                 rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks, objs);
177
178         return (struct cnxk_tim_ent *)(uintptr_t)bkt->first_chunk;
179 }
180
181 static struct cnxk_tim_ent *
182 cnxk_tim_refill_chunk(struct cnxk_tim_bkt *const bkt,
183                       struct cnxk_tim_bkt *const mirr_bkt,
184                       struct cnxk_tim_ring *const tim_ring)
185 {
186         struct cnxk_tim_ent *chunk;
187
188         if (bkt->nb_entry || !bkt->first_chunk) {
189                 if (unlikely(rte_mempool_get(tim_ring->chunk_pool,
190                                              (void **)&chunk)))
191                         return NULL;
192                 if (bkt->nb_entry) {
193                         *(uint64_t *)(((struct cnxk_tim_ent *)
194                                                mirr_bkt->current_chunk) +
195                                       tim_ring->nb_chunk_slots) =
196                                 (uintptr_t)chunk;
197                 } else {
198                         bkt->first_chunk = (uintptr_t)chunk;
199                 }
200         } else {
201                 chunk = cnxk_tim_clr_bkt(tim_ring, bkt);
202                 bkt->first_chunk = (uintptr_t)chunk;
203         }
204         *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
205
206         return chunk;
207 }
208
209 static struct cnxk_tim_ent *
210 cnxk_tim_insert_chunk(struct cnxk_tim_bkt *const bkt,
211                       struct cnxk_tim_bkt *const mirr_bkt,
212                       struct cnxk_tim_ring *const tim_ring)
213 {
214         struct cnxk_tim_ent *chunk;
215
216         if (unlikely(rte_mempool_get(tim_ring->chunk_pool, (void **)&chunk)))
217                 return NULL;
218
219         *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
220         if (bkt->nb_entry) {
221                 *(uint64_t *)(((struct cnxk_tim_ent *)(uintptr_t)
222                                        mirr_bkt->current_chunk) +
223                               tim_ring->nb_chunk_slots) = (uintptr_t)chunk;
224         } else {
225                 bkt->first_chunk = (uintptr_t)chunk;
226         }
227         return chunk;
228 }
229
230 static __rte_always_inline int
231 cnxk_tim_add_entry_sp(struct cnxk_tim_ring *const tim_ring,
232                       const uint32_t rel_bkt, struct rte_event_timer *const tim,
233                       const struct cnxk_tim_ent *const pent,
234                       const uint8_t flags)
235 {
236         struct cnxk_tim_bkt *mirr_bkt;
237         struct cnxk_tim_ent *chunk;
238         struct cnxk_tim_bkt *bkt;
239         uint64_t lock_sema;
240         int16_t rem;
241
242 __retry:
243         cnxk_tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt);
244
245         /* Get Bucket sema*/
246         lock_sema = cnxk_tim_bkt_fetch_sema_lock(bkt);
247
248         /* Bucket related checks. */
249         if (unlikely(cnxk_tim_bkt_get_hbt(lock_sema))) {
250                 if (cnxk_tim_bkt_get_nent(lock_sema) != 0) {
251                         uint64_t hbt_state;
252 #ifdef RTE_ARCH_ARM64
253                         asm volatile(PLT_CPU_FEATURE_PREAMBLE
254                                      "          ldxr %[hbt], [%[w1]]    \n"
255                                      "          tbz %[hbt], 33, dne%=   \n"
256                                      "          sevl                    \n"
257                                      "rty%=:    wfe                     \n"
258                                      "          ldxr %[hbt], [%[w1]]    \n"
259                                      "          tbnz %[hbt], 33, rty%=  \n"
260                                      "dne%=:                            \n"
261                                      : [hbt] "=&r"(hbt_state)
262                                      : [w1] "r"((&bkt->w1))
263                                      : "memory");
264 #else
265                         do {
266                                 hbt_state = __atomic_load_n(&bkt->w1,
267                                                             __ATOMIC_RELAXED);
268                         } while (hbt_state & BIT_ULL(33));
269 #endif
270
271                         if (!(hbt_state & BIT_ULL(34))) {
272                                 cnxk_tim_bkt_dec_lock(bkt);
273                                 goto __retry;
274                         }
275                 }
276         }
277         /* Insert the work. */
278         rem = cnxk_tim_bkt_fetch_rem(lock_sema);
279
280         if (!rem) {
281                 if (flags & CNXK_TIM_ENA_FB)
282                         chunk = cnxk_tim_refill_chunk(bkt, mirr_bkt, tim_ring);
283                 if (flags & CNXK_TIM_ENA_DFB)
284                         chunk = cnxk_tim_insert_chunk(bkt, mirr_bkt, tim_ring);
285
286                 if (unlikely(chunk == NULL)) {
287                         bkt->chunk_remainder = 0;
288                         tim->impl_opaque[0] = 0;
289                         tim->impl_opaque[1] = 0;
290                         tim->state = RTE_EVENT_TIMER_ERROR;
291                         cnxk_tim_bkt_dec_lock(bkt);
292                         return -ENOMEM;
293                 }
294                 mirr_bkt->current_chunk = (uintptr_t)chunk;
295                 bkt->chunk_remainder = tim_ring->nb_chunk_slots - 1;
296         } else {
297                 chunk = (struct cnxk_tim_ent *)mirr_bkt->current_chunk;
298                 chunk += tim_ring->nb_chunk_slots - rem;
299         }
300
301         /* Copy work entry. */
302         *chunk = *pent;
303
304         tim->impl_opaque[0] = (uintptr_t)chunk;
305         tim->impl_opaque[1] = (uintptr_t)bkt;
306         __atomic_store_n(&tim->state, RTE_EVENT_TIMER_ARMED, __ATOMIC_RELEASE);
307         cnxk_tim_bkt_inc_nent(bkt);
308         cnxk_tim_bkt_dec_lock_relaxed(bkt);
309
310         return 0;
311 }
312
313 static __rte_always_inline int
314 cnxk_tim_add_entry_mp(struct cnxk_tim_ring *const tim_ring,
315                       const uint32_t rel_bkt, struct rte_event_timer *const tim,
316                       const struct cnxk_tim_ent *const pent,
317                       const uint8_t flags)
318 {
319         struct cnxk_tim_bkt *mirr_bkt;
320         struct cnxk_tim_ent *chunk;
321         struct cnxk_tim_bkt *bkt;
322         uint64_t lock_sema;
323         int16_t rem;
324
325 __retry:
326         cnxk_tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt);
327         /* Get Bucket sema*/
328         lock_sema = cnxk_tim_bkt_fetch_sema_lock(bkt);
329
330         /* Bucket related checks. */
331         if (unlikely(cnxk_tim_bkt_get_hbt(lock_sema))) {
332                 if (cnxk_tim_bkt_get_nent(lock_sema) != 0) {
333                         uint64_t hbt_state;
334 #ifdef RTE_ARCH_ARM64
335                         asm volatile(PLT_CPU_FEATURE_PREAMBLE
336                                      "          ldxr %[hbt], [%[w1]]    \n"
337                                      "          tbz %[hbt], 33, dne%=   \n"
338                                      "          sevl                    \n"
339                                      "rty%=:    wfe                     \n"
340                                      "          ldxr %[hbt], [%[w1]]    \n"
341                                      "          tbnz %[hbt], 33, rty%=  \n"
342                                      "dne%=:                            \n"
343                                      : [hbt] "=&r"(hbt_state)
344                                      : [w1] "r"((&bkt->w1))
345                                      : "memory");
346 #else
347                         do {
348                                 hbt_state = __atomic_load_n(&bkt->w1,
349                                                             __ATOMIC_RELAXED);
350                         } while (hbt_state & BIT_ULL(33));
351 #endif
352
353                         if (!(hbt_state & BIT_ULL(34))) {
354                                 cnxk_tim_bkt_dec_lock(bkt);
355                                 goto __retry;
356                         }
357                 }
358         }
359
360         rem = cnxk_tim_bkt_fetch_rem(lock_sema);
361         if (rem < 0) {
362                 cnxk_tim_bkt_dec_lock(bkt);
363 #ifdef RTE_ARCH_ARM64
364                 asm volatile(PLT_CPU_FEATURE_PREAMBLE
365                              "          ldxr %[rem], [%[crem]]  \n"
366                              "          tbz %[rem], 63, dne%=           \n"
367                              "          sevl                            \n"
368                              "rty%=:    wfe                             \n"
369                              "          ldxr %[rem], [%[crem]]  \n"
370                              "          tbnz %[rem], 63, rty%=          \n"
371                              "dne%=:                                    \n"
372                              : [rem] "=&r"(rem)
373                              : [crem] "r"(&bkt->w1)
374                              : "memory");
375 #else
376                 while (__atomic_load_n((int64_t *)&bkt->w1, __ATOMIC_RELAXED) <
377                        0)
378                         ;
379 #endif
380                 goto __retry;
381         } else if (!rem) {
382                 /* Only one thread can be here*/
383                 if (flags & CNXK_TIM_ENA_FB)
384                         chunk = cnxk_tim_refill_chunk(bkt, mirr_bkt, tim_ring);
385                 if (flags & CNXK_TIM_ENA_DFB)
386                         chunk = cnxk_tim_insert_chunk(bkt, mirr_bkt, tim_ring);
387
388                 if (unlikely(chunk == NULL)) {
389                         tim->impl_opaque[0] = 0;
390                         tim->impl_opaque[1] = 0;
391                         tim->state = RTE_EVENT_TIMER_ERROR;
392                         cnxk_tim_bkt_set_rem(bkt, 0);
393                         cnxk_tim_bkt_dec_lock(bkt);
394                         return -ENOMEM;
395                 }
396                 *chunk = *pent;
397                 if (cnxk_tim_bkt_fetch_lock(lock_sema)) {
398                         do {
399                                 lock_sema = __atomic_load_n(&bkt->w1,
400                                                             __ATOMIC_RELAXED);
401                         } while (cnxk_tim_bkt_fetch_lock(lock_sema) - 1);
402                 }
403                 rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
404                 mirr_bkt->current_chunk = (uintptr_t)chunk;
405                 __atomic_store_n(&bkt->chunk_remainder,
406                                  tim_ring->nb_chunk_slots - 1,
407                                  __ATOMIC_RELEASE);
408         } else {
409                 chunk = (struct cnxk_tim_ent *)mirr_bkt->current_chunk;
410                 chunk += tim_ring->nb_chunk_slots - rem;
411                 *chunk = *pent;
412         }
413
414         tim->impl_opaque[0] = (uintptr_t)chunk;
415         tim->impl_opaque[1] = (uintptr_t)bkt;
416         __atomic_store_n(&tim->state, RTE_EVENT_TIMER_ARMED, __ATOMIC_RELEASE);
417         cnxk_tim_bkt_inc_nent(bkt);
418         cnxk_tim_bkt_dec_lock_relaxed(bkt);
419
420         return 0;
421 }
422
423 #endif /* __CNXK_TIM_WORKER_H__ */