build: add global libatomic dependency for 32-bit clang
[dpdk.git] / drivers / event / octeontx2 / otx2_tim_worker.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2019 Marvell International Ltd.
3  */
4
5 #ifndef __OTX2_TIM_WORKER_H__
6 #define __OTX2_TIM_WORKER_H__
7
8 #include "otx2_tim_evdev.h"
9
10 static inline uint8_t
11 tim_bkt_fetch_lock(uint64_t w1)
12 {
13         return (w1 >> TIM_BUCKET_W1_S_LOCK) &
14                 TIM_BUCKET_W1_M_LOCK;
15 }
16
17 static inline int16_t
18 tim_bkt_fetch_rem(uint64_t w1)
19 {
20         return (w1 >> TIM_BUCKET_W1_S_CHUNK_REMAINDER) &
21                 TIM_BUCKET_W1_M_CHUNK_REMAINDER;
22 }
23
24 static inline int16_t
25 tim_bkt_get_rem(struct otx2_tim_bkt *bktp)
26 {
27         return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE);
28 }
29
30 static inline void
31 tim_bkt_set_rem(struct otx2_tim_bkt *bktp, uint16_t v)
32 {
33         __atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
34 }
35
36 static inline void
37 tim_bkt_sub_rem(struct otx2_tim_bkt *bktp, uint16_t v)
38 {
39         __atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
40 }
41
42 static inline uint8_t
43 tim_bkt_get_hbt(uint64_t w1)
44 {
45         return (w1 >> TIM_BUCKET_W1_S_HBT) & TIM_BUCKET_W1_M_HBT;
46 }
47
48 static inline uint8_t
49 tim_bkt_get_bsk(uint64_t w1)
50 {
51         return (w1 >> TIM_BUCKET_W1_S_BSK) & TIM_BUCKET_W1_M_BSK;
52 }
53
54 static inline uint64_t
55 tim_bkt_clr_bsk(struct otx2_tim_bkt *bktp)
56 {
57         /* Clear everything except lock. */
58         const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;
59
60         return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
61 }
62
63 static inline uint64_t
64 tim_bkt_fetch_sema_lock(struct otx2_tim_bkt *bktp)
65 {
66         return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
67                         __ATOMIC_ACQUIRE);
68 }
69
70 static inline uint64_t
71 tim_bkt_fetch_sema(struct otx2_tim_bkt *bktp)
72 {
73         return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED);
74 }
75
76 static inline uint64_t
77 tim_bkt_inc_lock(struct otx2_tim_bkt *bktp)
78 {
79         const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;
80
81         return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE);
82 }
83
84 static inline void
85 tim_bkt_dec_lock(struct otx2_tim_bkt *bktp)
86 {
87         __atomic_add_fetch(&bktp->lock, 0xff, __ATOMIC_RELEASE);
88 }
89
90 static inline uint32_t
91 tim_bkt_get_nent(uint64_t w1)
92 {
93         return (w1 >> TIM_BUCKET_W1_S_NUM_ENTRIES) &
94                 TIM_BUCKET_W1_M_NUM_ENTRIES;
95 }
96
97 static inline void
98 tim_bkt_inc_nent(struct otx2_tim_bkt *bktp)
99 {
100         __atomic_add_fetch(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
101 }
102
103 static inline void
104 tim_bkt_add_nent(struct otx2_tim_bkt *bktp, uint32_t v)
105 {
106         __atomic_add_fetch(&bktp->nb_entry, v, __ATOMIC_RELAXED);
107 }
108
109 static inline uint64_t
110 tim_bkt_clr_nent(struct otx2_tim_bkt *bktp)
111 {
112         const uint64_t v = ~(TIM_BUCKET_W1_M_NUM_ENTRIES <<
113                         TIM_BUCKET_W1_S_NUM_ENTRIES);
114
115         return __atomic_and_fetch(&bktp->w1, v, __ATOMIC_ACQ_REL);
116 }
117
118 static __rte_always_inline void
119 tim_get_target_bucket(struct otx2_tim_ring * const tim_ring,
120                       const uint32_t rel_bkt, struct otx2_tim_bkt **bkt,
121                       struct otx2_tim_bkt **mirr_bkt, const uint8_t flag)
122 {
123         const uint64_t bkt_cyc = rte_rdtsc() - tim_ring->ring_start_cyc;
124         uint32_t bucket = rte_reciprocal_divide_u64(bkt_cyc,
125                         &tim_ring->fast_div) + rel_bkt;
126         uint32_t mirr_bucket = 0;
127
128         if (flag & OTX2_TIM_BKT_MOD) {
129                 bucket = bucket % tim_ring->nb_bkts;
130                 mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) %
131                                                 tim_ring->nb_bkts;
132         }
133         if (flag & OTX2_TIM_BKT_AND) {
134                 bucket = bucket & (tim_ring->nb_bkts - 1);
135                 mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) &
136                                                 (tim_ring->nb_bkts - 1);
137         }
138
139         *bkt = &tim_ring->bkt[bucket];
140         *mirr_bkt = &tim_ring->bkt[mirr_bucket];
141 }
142
143 static struct otx2_tim_ent *
144 tim_clr_bkt(struct otx2_tim_ring * const tim_ring,
145             struct otx2_tim_bkt * const bkt)
146 {
147 #define TIM_MAX_OUTSTANDING_OBJ         64
148         void *pend_chunks[TIM_MAX_OUTSTANDING_OBJ];
149         struct otx2_tim_ent *chunk;
150         struct otx2_tim_ent *pnext;
151         uint8_t objs = 0;
152
153
154         chunk = ((struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk);
155         chunk = (struct otx2_tim_ent *)(uintptr_t)(chunk +
156                         tim_ring->nb_chunk_slots)->w0;
157         while (chunk) {
158                 pnext = (struct otx2_tim_ent *)(uintptr_t)
159                         ((chunk + tim_ring->nb_chunk_slots)->w0);
160                 if (objs == TIM_MAX_OUTSTANDING_OBJ) {
161                         rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks,
162                                              objs);
163                         objs = 0;
164                 }
165                 pend_chunks[objs++] = chunk;
166                 chunk = pnext;
167         }
168
169         if (objs)
170                 rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks,
171                                 objs);
172
173         return (struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk;
174 }
175
176 static struct otx2_tim_ent *
177 tim_refill_chunk(struct otx2_tim_bkt * const bkt,
178                  struct otx2_tim_bkt * const mirr_bkt,
179                  struct otx2_tim_ring * const tim_ring)
180 {
181         struct otx2_tim_ent *chunk;
182
183         if (bkt->nb_entry || !bkt->first_chunk) {
184                 if (unlikely(rte_mempool_get(tim_ring->chunk_pool,
185                                              (void **)&chunk)))
186                         return NULL;
187                 if (bkt->nb_entry) {
188                         *(uint64_t *)(((struct otx2_tim_ent *)
189                                                 mirr_bkt->current_chunk) +
190                                         tim_ring->nb_chunk_slots) =
191                                 (uintptr_t)chunk;
192                 } else {
193                         bkt->first_chunk = (uintptr_t)chunk;
194                 }
195         } else {
196                 chunk = tim_clr_bkt(tim_ring, bkt);
197                 bkt->first_chunk = (uintptr_t)chunk;
198         }
199         *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
200
201         return chunk;
202 }
203
204 static struct otx2_tim_ent *
205 tim_insert_chunk(struct otx2_tim_bkt * const bkt,
206                  struct otx2_tim_bkt * const mirr_bkt,
207                  struct otx2_tim_ring * const tim_ring)
208 {
209         struct otx2_tim_ent *chunk;
210
211         if (unlikely(rte_mempool_get(tim_ring->chunk_pool, (void **)&chunk)))
212                 return NULL;
213
214         *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
215         if (bkt->nb_entry) {
216                 *(uint64_t *)(((struct otx2_tim_ent *)(uintptr_t)
217                                         mirr_bkt->current_chunk) +
218                                 tim_ring->nb_chunk_slots) = (uintptr_t)chunk;
219         } else {
220                 bkt->first_chunk = (uintptr_t)chunk;
221         }
222         return chunk;
223 }
224
225 static __rte_always_inline int
226 tim_add_entry_sp(struct otx2_tim_ring * const tim_ring,
227                  const uint32_t rel_bkt,
228                  struct rte_event_timer * const tim,
229                  const struct otx2_tim_ent * const pent,
230                  const uint8_t flags)
231 {
232         struct otx2_tim_bkt *mirr_bkt;
233         struct otx2_tim_ent *chunk;
234         struct otx2_tim_bkt *bkt;
235         uint64_t lock_sema;
236         int16_t rem;
237
238 __retry:
239         tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);
240
241         /* Get Bucket sema*/
242         lock_sema = tim_bkt_fetch_sema_lock(bkt);
243
244         /* Bucket related checks. */
245         if (unlikely(tim_bkt_get_hbt(lock_sema))) {
246                 if (tim_bkt_get_nent(lock_sema) != 0) {
247                         uint64_t hbt_state;
248 #ifdef RTE_ARCH_ARM64
249                         asm volatile(
250                                         "       ldaxr %[hbt], [%[w1]]   \n"
251                                         "       tbz %[hbt], 33, dne%=   \n"
252                                         "       sevl                    \n"
253                                         "rty%=: wfe                     \n"
254                                         "       ldaxr %[hbt], [%[w1]]   \n"
255                                         "       tbnz %[hbt], 33, rty%=  \n"
256                                         "dne%=:                         \n"
257                                         : [hbt] "=&r" (hbt_state)
258                                         : [w1] "r" ((&bkt->w1))
259                                         : "memory"
260                                     );
261 #else
262                         do {
263                                 hbt_state = __atomic_load_n(&bkt->w1,
264                                                 __ATOMIC_ACQUIRE);
265                         } while (hbt_state & BIT_ULL(33));
266 #endif
267
268                         if (!(hbt_state & BIT_ULL(34))) {
269                                 tim_bkt_dec_lock(bkt);
270                                 goto __retry;
271                         }
272                 }
273         }
274         /* Insert the work. */
275         rem = tim_bkt_fetch_rem(lock_sema);
276
277         if (!rem) {
278                 if (flags & OTX2_TIM_ENA_FB)
279                         chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
280                 if (flags & OTX2_TIM_ENA_DFB)
281                         chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);
282
283                 if (unlikely(chunk == NULL)) {
284                         bkt->chunk_remainder = 0;
285                         tim_bkt_dec_lock(bkt);
286                         tim->impl_opaque[0] = 0;
287                         tim->impl_opaque[1] = 0;
288                         tim->state = RTE_EVENT_TIMER_ERROR;
289                         return -ENOMEM;
290                 }
291                 mirr_bkt->current_chunk = (uintptr_t)chunk;
292                 bkt->chunk_remainder = tim_ring->nb_chunk_slots - 1;
293         } else {
294                 chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
295                 chunk += tim_ring->nb_chunk_slots - rem;
296         }
297
298         /* Copy work entry. */
299         *chunk = *pent;
300
301         tim_bkt_inc_nent(bkt);
302         tim_bkt_dec_lock(bkt);
303
304         tim->impl_opaque[0] = (uintptr_t)chunk;
305         tim->impl_opaque[1] = (uintptr_t)bkt;
306         tim->state = RTE_EVENT_TIMER_ARMED;
307
308         return 0;
309 }
310
311 static __rte_always_inline int
312 tim_add_entry_mp(struct otx2_tim_ring * const tim_ring,
313                  const uint32_t rel_bkt,
314                  struct rte_event_timer * const tim,
315                  const struct otx2_tim_ent * const pent,
316                  const uint8_t flags)
317 {
318         struct otx2_tim_bkt *mirr_bkt;
319         struct otx2_tim_ent *chunk;
320         struct otx2_tim_bkt *bkt;
321         uint64_t lock_sema;
322         int16_t rem;
323
324 __retry:
325         tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);
326         /* Get Bucket sema*/
327         lock_sema = tim_bkt_fetch_sema_lock(bkt);
328
329         /* Bucket related checks. */
330         if (unlikely(tim_bkt_get_hbt(lock_sema))) {
331                 if (tim_bkt_get_nent(lock_sema) != 0) {
332                         uint64_t hbt_state;
333 #ifdef RTE_ARCH_ARM64
334                         asm volatile(
335                                         "       ldaxr %[hbt], [%[w1]]   \n"
336                                         "       tbz %[hbt], 33, dne%=   \n"
337                                         "       sevl                    \n"
338                                         "rty%=: wfe                     \n"
339                                         "       ldaxr %[hbt], [%[w1]]   \n"
340                                         "       tbnz %[hbt], 33, rty%=  \n"
341                                         "dne%=:                         \n"
342                                         : [hbt] "=&r" (hbt_state)
343                                         : [w1] "r" ((&bkt->w1))
344                                         : "memory"
345                                     );
346 #else
347                         do {
348                                 hbt_state = __atomic_load_n(&bkt->w1,
349                                                 __ATOMIC_ACQUIRE);
350                         } while (hbt_state & BIT_ULL(33));
351 #endif
352
353                         if (!(hbt_state & BIT_ULL(34))) {
354                                 tim_bkt_dec_lock(bkt);
355                                 goto __retry;
356                         }
357                 }
358         }
359
360         rem = tim_bkt_fetch_rem(lock_sema);
361         if (rem < 0) {
362 #ifdef RTE_ARCH_ARM64
363                 asm volatile(
364                                 "       ldaxrh %w[rem], [%[crem]]       \n"
365                                 "       tbz %w[rem], 15, dne%=          \n"
366                                 "       sevl                            \n"
367                                 "rty%=: wfe                             \n"
368                                 "       ldaxrh %w[rem], [%[crem]]       \n"
369                                 "       tbnz %w[rem], 15, rty%=         \n"
370                                 "dne%=:                                 \n"
371                                 : [rem] "=&r" (rem)
372                                 : [crem] "r" (&bkt->chunk_remainder)
373                                 : "memory"
374                             );
375 #else
376                 while (__atomic_load_n(&bkt->chunk_remainder,
377                                        __ATOMIC_ACQUIRE) < 0)
378                         ;
379 #endif
380                 /* Goto diff bucket. */
381                 tim_bkt_dec_lock(bkt);
382                 goto __retry;
383         } else if (!rem) {
384                 /* Only one thread can be here*/
385                 if (flags & OTX2_TIM_ENA_FB)
386                         chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
387                 if (flags & OTX2_TIM_ENA_DFB)
388                         chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);
389
390                 if (unlikely(chunk == NULL)) {
391                         tim_bkt_set_rem(bkt, 0);
392                         tim_bkt_dec_lock(bkt);
393                         tim->impl_opaque[0] = 0;
394                         tim->impl_opaque[1] = 0;
395                         tim->state = RTE_EVENT_TIMER_ERROR;
396                         return -ENOMEM;
397                 }
398                 *chunk = *pent;
399                 while (tim_bkt_fetch_lock(lock_sema) !=
400                                 (-tim_bkt_fetch_rem(lock_sema)))
401                         lock_sema = __atomic_load_n(&bkt->w1, __ATOMIC_ACQUIRE);
402
403                 mirr_bkt->current_chunk = (uintptr_t)chunk;
404                 __atomic_store_n(&bkt->chunk_remainder,
405                                 tim_ring->nb_chunk_slots - 1, __ATOMIC_RELEASE);
406         } else {
407                 chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
408                 chunk += tim_ring->nb_chunk_slots - rem;
409                 *chunk = *pent;
410         }
411
412         /* Copy work entry. */
413         tim_bkt_inc_nent(bkt);
414         tim_bkt_dec_lock(bkt);
415         tim->impl_opaque[0] = (uintptr_t)chunk;
416         tim->impl_opaque[1] = (uintptr_t)bkt;
417         tim->state = RTE_EVENT_TIMER_ARMED;
418
419         return 0;
420 }
421
422 static inline uint16_t
423 tim_cpy_wrk(uint16_t index, uint16_t cpy_lmt,
424             struct otx2_tim_ent *chunk,
425             struct rte_event_timer ** const tim,
426             const struct otx2_tim_ent * const ents,
427             const struct otx2_tim_bkt * const bkt)
428 {
429         for (; index < cpy_lmt; index++) {
430                 *chunk = *(ents + index);
431                 tim[index]->impl_opaque[0] = (uintptr_t)chunk++;
432                 tim[index]->impl_opaque[1] = (uintptr_t)bkt;
433                 tim[index]->state = RTE_EVENT_TIMER_ARMED;
434         }
435
436         return index;
437 }
438
439 /* Burst mode functions */
440 static inline int
441 tim_add_entry_brst(struct otx2_tim_ring * const tim_ring,
442                    const uint16_t rel_bkt,
443                    struct rte_event_timer ** const tim,
444                    const struct otx2_tim_ent *ents,
445                    const uint16_t nb_timers, const uint8_t flags)
446 {
447         struct otx2_tim_ent *chunk = NULL;
448         struct otx2_tim_bkt *mirr_bkt;
449         struct otx2_tim_bkt *bkt;
450         uint16_t chunk_remainder;
451         uint16_t index = 0;
452         uint64_t lock_sema;
453         int16_t rem, crem;
454         uint8_t lock_cnt;
455
456 __retry:
457         tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);
458
459         /* Only one thread beyond this. */
460         lock_sema = tim_bkt_inc_lock(bkt);
461         lock_cnt = (uint8_t)
462                 ((lock_sema >> TIM_BUCKET_W1_S_LOCK) & TIM_BUCKET_W1_M_LOCK);
463
464         if (lock_cnt) {
465                 tim_bkt_dec_lock(bkt);
466                 goto __retry;
467         }
468
469         /* Bucket related checks. */
470         if (unlikely(tim_bkt_get_hbt(lock_sema))) {
471                 if (tim_bkt_get_nent(lock_sema) != 0) {
472                         uint64_t hbt_state;
473 #ifdef RTE_ARCH_ARM64
474                         asm volatile(
475                                         "       ldaxr %[hbt], [%[w1]]   \n"
476                                         "       tbz %[hbt], 33, dne%=   \n"
477                                         "       sevl                    \n"
478                                         "rty%=: wfe                     \n"
479                                         "       ldaxr %[hbt], [%[w1]]   \n"
480                                         "       tbnz %[hbt], 33, rty%=  \n"
481                                         "dne%=:                         \n"
482                                         : [hbt] "=&r" (hbt_state)
483                                         : [w1] "r" ((&bkt->w1))
484                                         : "memory"
485                                         );
486 #else
487                         do {
488                                 hbt_state = __atomic_load_n(&bkt->w1,
489                                                 __ATOMIC_ACQUIRE);
490                         } while (hbt_state & BIT_ULL(33));
491 #endif
492
493                         if (!(hbt_state & BIT_ULL(34))) {
494                                 tim_bkt_dec_lock(bkt);
495                                 goto __retry;
496                         }
497                 }
498         }
499
500         chunk_remainder = tim_bkt_fetch_rem(lock_sema);
501         rem = chunk_remainder - nb_timers;
502         if (rem < 0) {
503                 crem = tim_ring->nb_chunk_slots - chunk_remainder;
504                 if (chunk_remainder && crem) {
505                         chunk = ((struct otx2_tim_ent *)
506                                         mirr_bkt->current_chunk) + crem;
507
508                         index = tim_cpy_wrk(index, chunk_remainder, chunk, tim,
509                                             ents, bkt);
510                         tim_bkt_sub_rem(bkt, chunk_remainder);
511                         tim_bkt_add_nent(bkt, chunk_remainder);
512                 }
513
514                 if (flags & OTX2_TIM_ENA_FB)
515                         chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
516                 if (flags & OTX2_TIM_ENA_DFB)
517                         chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);
518
519                 if (unlikely(chunk == NULL)) {
520                         tim_bkt_dec_lock(bkt);
521                         rte_errno = ENOMEM;
522                         tim[index]->state = RTE_EVENT_TIMER_ERROR;
523                         return crem;
524                 }
525                 *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
526                 mirr_bkt->current_chunk = (uintptr_t)chunk;
527                 tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt);
528
529                 rem = nb_timers - chunk_remainder;
530                 tim_bkt_set_rem(bkt, tim_ring->nb_chunk_slots - rem);
531                 tim_bkt_add_nent(bkt, rem);
532         } else {
533                 chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
534                 chunk += (tim_ring->nb_chunk_slots - chunk_remainder);
535
536                 tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt);
537                 tim_bkt_sub_rem(bkt, nb_timers);
538                 tim_bkt_add_nent(bkt, nb_timers);
539         }
540
541         tim_bkt_dec_lock(bkt);
542
543         return nb_timers;
544 }
545
546 static int
547 tim_rm_entry(struct rte_event_timer *tim)
548 {
549         struct otx2_tim_ent *entry;
550         struct otx2_tim_bkt *bkt;
551         uint64_t lock_sema;
552
553         if (tim->impl_opaque[1] == 0 || tim->impl_opaque[0] == 0)
554                 return -ENOENT;
555
556         entry = (struct otx2_tim_ent *)(uintptr_t)tim->impl_opaque[0];
557         if (entry->wqe != tim->ev.u64) {
558                 tim->impl_opaque[0] = 0;
559                 tim->impl_opaque[1] = 0;
560                 return -ENOENT;
561         }
562
563         bkt = (struct otx2_tim_bkt *)(uintptr_t)tim->impl_opaque[1];
564         lock_sema = tim_bkt_inc_lock(bkt);
565         if (tim_bkt_get_hbt(lock_sema) || !tim_bkt_get_nent(lock_sema)) {
566                 tim_bkt_dec_lock(bkt);
567                 tim->impl_opaque[0] = 0;
568                 tim->impl_opaque[1] = 0;
569                 return -ENOENT;
570         }
571
572         entry->w0 = 0;
573         entry->wqe = 0;
574         tim_bkt_dec_lock(bkt);
575
576         tim->state = RTE_EVENT_TIMER_CANCELED;
577         tim->impl_opaque[0] = 0;
578         tim->impl_opaque[1] = 0;
579
580         return 0;
581 }
582
583 #endif /* __OTX2_TIM_WORKER_H__ */