7b771fbca7b99b39359b8f267d3d9f588dd1ec87
[dpdk.git] / drivers / event / octeontx2 / otx2_tim_worker.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2019 Marvell International Ltd.
3  */
4
5 #ifndef __OTX2_TIM_WORKER_H__
6 #define __OTX2_TIM_WORKER_H__
7
8 #include "otx2_tim_evdev.h"
9
10 static inline uint8_t
11 tim_bkt_fetch_lock(uint64_t w1)
12 {
13         return (w1 >> TIM_BUCKET_W1_S_LOCK) &
14                 TIM_BUCKET_W1_M_LOCK;
15 }
16
17 static inline int16_t
18 tim_bkt_fetch_rem(uint64_t w1)
19 {
20         return (w1 >> TIM_BUCKET_W1_S_CHUNK_REMAINDER) &
21                 TIM_BUCKET_W1_M_CHUNK_REMAINDER;
22 }
23
24 static inline int16_t
25 tim_bkt_get_rem(struct otx2_tim_bkt *bktp)
26 {
27         return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE);
28 }
29
30 static inline void
31 tim_bkt_set_rem(struct otx2_tim_bkt *bktp, uint16_t v)
32 {
33         __atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
34 }
35
36 static inline void
37 tim_bkt_sub_rem(struct otx2_tim_bkt *bktp, uint16_t v)
38 {
39         __atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
40 }
41
42 static inline uint8_t
43 tim_bkt_get_hbt(uint64_t w1)
44 {
45         return (w1 >> TIM_BUCKET_W1_S_HBT) & TIM_BUCKET_W1_M_HBT;
46 }
47
48 static inline uint8_t
49 tim_bkt_get_bsk(uint64_t w1)
50 {
51         return (w1 >> TIM_BUCKET_W1_S_BSK) & TIM_BUCKET_W1_M_BSK;
52 }
53
54 static inline uint64_t
55 tim_bkt_clr_bsk(struct otx2_tim_bkt *bktp)
56 {
57         /* Clear everything except lock. */
58         const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;
59
60         return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
61 }
62
63 static inline uint64_t
64 tim_bkt_fetch_sema_lock(struct otx2_tim_bkt *bktp)
65 {
66         return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
67                         __ATOMIC_ACQUIRE);
68 }
69
70 static inline uint64_t
71 tim_bkt_fetch_sema(struct otx2_tim_bkt *bktp)
72 {
73         return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED);
74 }
75
76 static inline uint64_t
77 tim_bkt_inc_lock(struct otx2_tim_bkt *bktp)
78 {
79         const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;
80
81         return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE);
82 }
83
84 static inline void
85 tim_bkt_dec_lock(struct otx2_tim_bkt *bktp)
86 {
87         __atomic_add_fetch(&bktp->lock, 0xff, __ATOMIC_RELEASE);
88 }
89
90 static inline uint32_t
91 tim_bkt_get_nent(uint64_t w1)
92 {
93         return (w1 >> TIM_BUCKET_W1_S_NUM_ENTRIES) &
94                 TIM_BUCKET_W1_M_NUM_ENTRIES;
95 }
96
97 static inline void
98 tim_bkt_inc_nent(struct otx2_tim_bkt *bktp)
99 {
100         __atomic_add_fetch(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
101 }
102
103 static inline void
104 tim_bkt_add_nent(struct otx2_tim_bkt *bktp, uint32_t v)
105 {
106         __atomic_add_fetch(&bktp->nb_entry, v, __ATOMIC_RELAXED);
107 }
108
109 static inline uint64_t
110 tim_bkt_clr_nent(struct otx2_tim_bkt *bktp)
111 {
112         const uint64_t v = ~(TIM_BUCKET_W1_M_NUM_ENTRIES <<
113                         TIM_BUCKET_W1_S_NUM_ENTRIES);
114
115         return __atomic_and_fetch(&bktp->w1, v, __ATOMIC_ACQ_REL);
116 }
117
118 static __rte_always_inline void
119 tim_get_target_bucket(struct otx2_tim_ring * const tim_ring,
120                       const uint32_t rel_bkt, struct otx2_tim_bkt **bkt,
121                       struct otx2_tim_bkt **mirr_bkt, const uint8_t flag)
122 {
123         const uint64_t bkt_cyc = rte_rdtsc() - tim_ring->ring_start_cyc;
124         uint32_t bucket = rte_reciprocal_divide_u64(bkt_cyc,
125                         &tim_ring->fast_div) + rel_bkt;
126         uint32_t mirr_bucket = 0;
127
128         if (flag & OTX2_TIM_BKT_MOD) {
129                 bucket = bucket % tim_ring->nb_bkts;
130                 mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) %
131                                                 tim_ring->nb_bkts;
132         }
133         if (flag & OTX2_TIM_BKT_AND) {
134                 bucket = bucket & (tim_ring->nb_bkts - 1);
135                 mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) &
136                                                 (tim_ring->nb_bkts - 1);
137         }
138
139         *bkt = &tim_ring->bkt[bucket];
140         *mirr_bkt = &tim_ring->bkt[mirr_bucket];
141 }
142
143 static struct otx2_tim_ent *
144 tim_clr_bkt(struct otx2_tim_ring * const tim_ring,
145             struct otx2_tim_bkt * const bkt)
146 {
147         struct otx2_tim_ent *chunk;
148         struct otx2_tim_ent *pnext;
149
150         chunk = ((struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk);
151         chunk = (struct otx2_tim_ent *)(uintptr_t)(chunk +
152                         tim_ring->nb_chunk_slots)->w0;
153         while (chunk) {
154                 pnext = (struct otx2_tim_ent *)(uintptr_t)
155                         ((chunk + tim_ring->nb_chunk_slots)->w0);
156                 rte_mempool_put(tim_ring->chunk_pool, chunk);
157                 chunk = pnext;
158         }
159
160         return (struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk;
161 }
162
163 static struct otx2_tim_ent *
164 tim_refill_chunk(struct otx2_tim_bkt * const bkt,
165                  struct otx2_tim_bkt * const mirr_bkt,
166                  struct otx2_tim_ring * const tim_ring)
167 {
168         struct otx2_tim_ent *chunk;
169
170         if (bkt->nb_entry || !bkt->first_chunk) {
171                 if (unlikely(rte_mempool_get(tim_ring->chunk_pool,
172                                              (void **)&chunk)))
173                         return NULL;
174                 if (bkt->nb_entry) {
175                         *(uint64_t *)(((struct otx2_tim_ent *)
176                                                 mirr_bkt->current_chunk) +
177                                         tim_ring->nb_chunk_slots) =
178                                 (uintptr_t)chunk;
179                 } else {
180                         bkt->first_chunk = (uintptr_t)chunk;
181                 }
182         } else {
183                 chunk = tim_clr_bkt(tim_ring, bkt);
184                 bkt->first_chunk = (uintptr_t)chunk;
185         }
186         *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
187
188         return chunk;
189 }
190
191 static struct otx2_tim_ent *
192 tim_insert_chunk(struct otx2_tim_bkt * const bkt,
193                  struct otx2_tim_bkt * const mirr_bkt,
194                  struct otx2_tim_ring * const tim_ring)
195 {
196         struct otx2_tim_ent *chunk;
197
198         if (unlikely(rte_mempool_get(tim_ring->chunk_pool, (void **)&chunk)))
199                 return NULL;
200
201         *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
202         if (bkt->nb_entry) {
203                 *(uint64_t *)(((struct otx2_tim_ent *)(uintptr_t)
204                                         mirr_bkt->current_chunk) +
205                                 tim_ring->nb_chunk_slots) = (uintptr_t)chunk;
206         } else {
207                 bkt->first_chunk = (uintptr_t)chunk;
208         }
209         return chunk;
210 }
211
212 static __rte_always_inline int
213 tim_add_entry_sp(struct otx2_tim_ring * const tim_ring,
214                  const uint32_t rel_bkt,
215                  struct rte_event_timer * const tim,
216                  const struct otx2_tim_ent * const pent,
217                  const uint8_t flags)
218 {
219         struct otx2_tim_bkt *mirr_bkt;
220         struct otx2_tim_ent *chunk;
221         struct otx2_tim_bkt *bkt;
222         uint64_t lock_sema;
223         int16_t rem;
224
225 __retry:
226         tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);
227
228         /* Get Bucket sema*/
229         lock_sema = tim_bkt_fetch_sema_lock(bkt);
230
231         /* Bucket related checks. */
232         if (unlikely(tim_bkt_get_hbt(lock_sema))) {
233                 if (tim_bkt_get_nent(lock_sema) != 0) {
234                         uint64_t hbt_state;
235 #ifdef RTE_ARCH_ARM64
236                         asm volatile(
237                                         "       ldaxr %[hbt], [%[w1]]   \n"
238                                         "       tbz %[hbt], 33, dne%=   \n"
239                                         "       sevl                    \n"
240                                         "rty%=: wfe                     \n"
241                                         "       ldaxr %[hbt], [%[w1]]   \n"
242                                         "       tbnz %[hbt], 33, rty%=  \n"
243                                         "dne%=:                         \n"
244                                         : [hbt] "=&r" (hbt_state)
245                                         : [w1] "r" ((&bkt->w1))
246                                         : "memory"
247                                     );
248 #else
249                         do {
250                                 hbt_state = __atomic_load_n(&bkt->w1,
251                                                 __ATOMIC_ACQUIRE);
252                         } while (hbt_state & BIT_ULL(33));
253 #endif
254
255                         if (!(hbt_state & BIT_ULL(34))) {
256                                 tim_bkt_dec_lock(bkt);
257                                 goto __retry;
258                         }
259                 }
260         }
261         /* Insert the work. */
262         rem = tim_bkt_fetch_rem(lock_sema);
263
264         if (!rem) {
265                 if (flags & OTX2_TIM_ENA_FB)
266                         chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
267                 if (flags & OTX2_TIM_ENA_DFB)
268                         chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);
269
270                 if (unlikely(chunk == NULL)) {
271                         bkt->chunk_remainder = 0;
272                         tim_bkt_dec_lock(bkt);
273                         tim->impl_opaque[0] = 0;
274                         tim->impl_opaque[1] = 0;
275                         tim->state = RTE_EVENT_TIMER_ERROR;
276                         return -ENOMEM;
277                 }
278                 mirr_bkt->current_chunk = (uintptr_t)chunk;
279                 bkt->chunk_remainder = tim_ring->nb_chunk_slots - 1;
280         } else {
281                 chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
282                 chunk += tim_ring->nb_chunk_slots - rem;
283         }
284
285         /* Copy work entry. */
286         *chunk = *pent;
287
288         tim_bkt_inc_nent(bkt);
289         tim_bkt_dec_lock(bkt);
290
291         tim->impl_opaque[0] = (uintptr_t)chunk;
292         tim->impl_opaque[1] = (uintptr_t)bkt;
293         tim->state = RTE_EVENT_TIMER_ARMED;
294
295         return 0;
296 }
297
298 static __rte_always_inline int
299 tim_add_entry_mp(struct otx2_tim_ring * const tim_ring,
300                  const uint32_t rel_bkt,
301                  struct rte_event_timer * const tim,
302                  const struct otx2_tim_ent * const pent,
303                  const uint8_t flags)
304 {
305         struct otx2_tim_bkt *mirr_bkt;
306         struct otx2_tim_ent *chunk;
307         struct otx2_tim_bkt *bkt;
308         uint64_t lock_sema;
309         int16_t rem;
310
311 __retry:
312         tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);
313         /* Get Bucket sema*/
314         lock_sema = tim_bkt_fetch_sema_lock(bkt);
315
316         /* Bucket related checks. */
317         if (unlikely(tim_bkt_get_hbt(lock_sema))) {
318                 if (tim_bkt_get_nent(lock_sema) != 0) {
319                         uint64_t hbt_state;
320 #ifdef RTE_ARCH_ARM64
321                         asm volatile(
322                                         "       ldaxr %[hbt], [%[w1]]   \n"
323                                         "       tbz %[hbt], 33, dne%=   \n"
324                                         "       sevl                    \n"
325                                         "rty%=: wfe                     \n"
326                                         "       ldaxr %[hbt], [%[w1]]   \n"
327                                         "       tbnz %[hbt], 33, rty%=  \n"
328                                         "dne%=:                         \n"
329                                         : [hbt] "=&r" (hbt_state)
330                                         : [w1] "r" ((&bkt->w1))
331                                         : "memory"
332                                     );
333 #else
334                         do {
335                                 hbt_state = __atomic_load_n(&bkt->w1,
336                                                 __ATOMIC_ACQUIRE);
337                         } while (hbt_state & BIT_ULL(33));
338 #endif
339
340                         if (!(hbt_state & BIT_ULL(34))) {
341                                 tim_bkt_dec_lock(bkt);
342                                 goto __retry;
343                         }
344                 }
345         }
346
347         rem = tim_bkt_fetch_rem(lock_sema);
348         if (rem < 0) {
349 #ifdef RTE_ARCH_ARM64
350                 asm volatile(
351                                 "       ldaxrh %w[rem], [%[crem]]       \n"
352                                 "       tbz %w[rem], 15, dne%=          \n"
353                                 "       sevl                            \n"
354                                 "rty%=: wfe                             \n"
355                                 "       ldaxrh %w[rem], [%[crem]]       \n"
356                                 "       tbnz %w[rem], 15, rty%=         \n"
357                                 "dne%=:                                 \n"
358                                 : [rem] "=&r" (rem)
359                                 : [crem] "r" (&bkt->chunk_remainder)
360                                 : "memory"
361                             );
362 #else
363                 while (__atomic_load_n(&bkt->chunk_remainder,
364                                        __ATOMIC_ACQUIRE) < 0)
365                         ;
366 #endif
367                 /* Goto diff bucket. */
368                 tim_bkt_dec_lock(bkt);
369                 goto __retry;
370         } else if (!rem) {
371                 /* Only one thread can be here*/
372                 if (flags & OTX2_TIM_ENA_FB)
373                         chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
374                 if (flags & OTX2_TIM_ENA_DFB)
375                         chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);
376
377                 if (unlikely(chunk == NULL)) {
378                         tim_bkt_set_rem(bkt, 0);
379                         tim_bkt_dec_lock(bkt);
380                         tim->impl_opaque[0] = 0;
381                         tim->impl_opaque[1] = 0;
382                         tim->state = RTE_EVENT_TIMER_ERROR;
383                         return -ENOMEM;
384                 }
385                 *chunk = *pent;
386                 while (tim_bkt_fetch_lock(lock_sema) !=
387                                 (-tim_bkt_fetch_rem(lock_sema)))
388                         lock_sema = __atomic_load_n(&bkt->w1, __ATOMIC_ACQUIRE);
389
390                 mirr_bkt->current_chunk = (uintptr_t)chunk;
391                 __atomic_store_n(&bkt->chunk_remainder,
392                                 tim_ring->nb_chunk_slots - 1, __ATOMIC_RELEASE);
393         } else {
394                 chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
395                 chunk += tim_ring->nb_chunk_slots - rem;
396                 *chunk = *pent;
397         }
398
399         /* Copy work entry. */
400         tim_bkt_inc_nent(bkt);
401         tim_bkt_dec_lock(bkt);
402         tim->impl_opaque[0] = (uintptr_t)chunk;
403         tim->impl_opaque[1] = (uintptr_t)bkt;
404         tim->state = RTE_EVENT_TIMER_ARMED;
405
406         return 0;
407 }
408
409 static inline uint16_t
410 tim_cpy_wrk(uint16_t index, uint16_t cpy_lmt,
411             struct otx2_tim_ent *chunk,
412             struct rte_event_timer ** const tim,
413             const struct otx2_tim_ent * const ents,
414             const struct otx2_tim_bkt * const bkt)
415 {
416         for (; index < cpy_lmt; index++) {
417                 *chunk = *(ents + index);
418                 tim[index]->impl_opaque[0] = (uintptr_t)chunk++;
419                 tim[index]->impl_opaque[1] = (uintptr_t)bkt;
420                 tim[index]->state = RTE_EVENT_TIMER_ARMED;
421         }
422
423         return index;
424 }
425
426 /* Burst mode functions */
427 static inline int
428 tim_add_entry_brst(struct otx2_tim_ring * const tim_ring,
429                    const uint16_t rel_bkt,
430                    struct rte_event_timer ** const tim,
431                    const struct otx2_tim_ent *ents,
432                    const uint16_t nb_timers, const uint8_t flags)
433 {
434         struct otx2_tim_ent *chunk = NULL;
435         struct otx2_tim_bkt *mirr_bkt;
436         struct otx2_tim_bkt *bkt;
437         uint16_t chunk_remainder;
438         uint16_t index = 0;
439         uint64_t lock_sema;
440         int16_t rem, crem;
441         uint8_t lock_cnt;
442
443 __retry:
444         tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags);
445
446         /* Only one thread beyond this. */
447         lock_sema = tim_bkt_inc_lock(bkt);
448         lock_cnt = (uint8_t)
449                 ((lock_sema >> TIM_BUCKET_W1_S_LOCK) & TIM_BUCKET_W1_M_LOCK);
450
451         if (lock_cnt) {
452                 tim_bkt_dec_lock(bkt);
453                 goto __retry;
454         }
455
456         /* Bucket related checks. */
457         if (unlikely(tim_bkt_get_hbt(lock_sema))) {
458                 if (tim_bkt_get_nent(lock_sema) != 0) {
459                         uint64_t hbt_state;
460 #ifdef RTE_ARCH_ARM64
461                         asm volatile(
462                                         "       ldaxr %[hbt], [%[w1]]   \n"
463                                         "       tbz %[hbt], 33, dne%=   \n"
464                                         "       sevl                    \n"
465                                         "rty%=: wfe                     \n"
466                                         "       ldaxr %[hbt], [%[w1]]   \n"
467                                         "       tbnz %[hbt], 33, rty%=  \n"
468                                         "dne%=:                         \n"
469                                         : [hbt] "=&r" (hbt_state)
470                                         : [w1] "r" ((&bkt->w1))
471                                         : "memory"
472                                         );
473 #else
474                         do {
475                                 hbt_state = __atomic_load_n(&bkt->w1,
476                                                 __ATOMIC_ACQUIRE);
477                         } while (hbt_state & BIT_ULL(33));
478 #endif
479
480                         if (!(hbt_state & BIT_ULL(34))) {
481                                 tim_bkt_dec_lock(bkt);
482                                 goto __retry;
483                         }
484                 }
485         }
486
487         chunk_remainder = tim_bkt_fetch_rem(lock_sema);
488         rem = chunk_remainder - nb_timers;
489         if (rem < 0) {
490                 crem = tim_ring->nb_chunk_slots - chunk_remainder;
491                 if (chunk_remainder && crem) {
492                         chunk = ((struct otx2_tim_ent *)
493                                         mirr_bkt->current_chunk) + crem;
494
495                         index = tim_cpy_wrk(index, chunk_remainder, chunk, tim,
496                                             ents, bkt);
497                         tim_bkt_sub_rem(bkt, chunk_remainder);
498                         tim_bkt_add_nent(bkt, chunk_remainder);
499                 }
500
501                 if (flags & OTX2_TIM_ENA_FB)
502                         chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring);
503                 if (flags & OTX2_TIM_ENA_DFB)
504                         chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring);
505
506                 if (unlikely(chunk == NULL)) {
507                         tim_bkt_dec_lock(bkt);
508                         rte_errno = ENOMEM;
509                         tim[index]->state = RTE_EVENT_TIMER_ERROR;
510                         return crem;
511                 }
512                 *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
513                 mirr_bkt->current_chunk = (uintptr_t)chunk;
514                 tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt);
515
516                 rem = nb_timers - chunk_remainder;
517                 tim_bkt_set_rem(bkt, tim_ring->nb_chunk_slots - rem);
518                 tim_bkt_add_nent(bkt, rem);
519         } else {
520                 chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk;
521                 chunk += (tim_ring->nb_chunk_slots - chunk_remainder);
522
523                 tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt);
524                 tim_bkt_sub_rem(bkt, nb_timers);
525                 tim_bkt_add_nent(bkt, nb_timers);
526         }
527
528         tim_bkt_dec_lock(bkt);
529
530         return nb_timers;
531 }
532
533 static int
534 tim_rm_entry(struct rte_event_timer *tim)
535 {
536         struct otx2_tim_ent *entry;
537         struct otx2_tim_bkt *bkt;
538         uint64_t lock_sema;
539
540         if (tim->impl_opaque[1] == 0 || tim->impl_opaque[0] == 0)
541                 return -ENOENT;
542
543         entry = (struct otx2_tim_ent *)(uintptr_t)tim->impl_opaque[0];
544         if (entry->wqe != tim->ev.u64) {
545                 tim->impl_opaque[0] = 0;
546                 tim->impl_opaque[1] = 0;
547                 return -ENOENT;
548         }
549
550         bkt = (struct otx2_tim_bkt *)(uintptr_t)tim->impl_opaque[1];
551         lock_sema = tim_bkt_inc_lock(bkt);
552         if (tim_bkt_get_hbt(lock_sema) || !tim_bkt_get_nent(lock_sema)) {
553                 tim_bkt_dec_lock(bkt);
554                 tim->impl_opaque[0] = 0;
555                 tim->impl_opaque[1] = 0;
556                 return -ENOENT;
557         }
558
559         entry->w0 = 0;
560         entry->wqe = 0;
561         tim_bkt_dec_lock(bkt);
562
563         tim->state = RTE_EVENT_TIMER_CANCELED;
564         tim->impl_opaque[0] = 0;
565         tim->impl_opaque[1] = 0;
566
567         return 0;
568 }
569
570 #endif /* __OTX2_TIM_WORKER_H__ */