event/octeontx2: fix HW timer race condition
[dpdk.git] / drivers / event / octeontx2 / otx2_tim_worker.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2019 Marvell International Ltd.
3  */
4
5 #ifndef __OTX2_TIM_WORKER_H__
6 #define __OTX2_TIM_WORKER_H__
7
8 #include "otx2_tim_evdev.h"
9
10 static inline uint8_t
11 tim_bkt_fetch_lock(uint64_t w1)
12 {
13         return (w1 >> TIM_BUCKET_W1_S_LOCK) &
14                 TIM_BUCKET_W1_M_LOCK;
15 }
16
17 static inline int16_t
18 tim_bkt_fetch_rem(uint64_t w1)
19 {
20         return (w1 >> TIM_BUCKET_W1_S_CHUNK_REMAINDER) &
21                 TIM_BUCKET_W1_M_CHUNK_REMAINDER;
22 }
23
24 static inline int16_t
25 tim_bkt_get_rem(struct otx2_tim_bkt *bktp)
26 {
27         return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE);
28 }
29
30 static inline void
31 tim_bkt_set_rem(struct otx2_tim_bkt *bktp, uint16_t v)
32 {
33         __atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
34 }
35
36 static inline void
37 tim_bkt_sub_rem(struct otx2_tim_bkt *bktp, uint16_t v)
38 {
39         __atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED);
40 }
41
42 static inline uint8_t
43 tim_bkt_get_hbt(uint64_t w1)
44 {
45         return (w1 >> TIM_BUCKET_W1_S_HBT) & TIM_BUCKET_W1_M_HBT;
46 }
47
48 static inline uint8_t
49 tim_bkt_get_bsk(uint64_t w1)
50 {
51         return (w1 >> TIM_BUCKET_W1_S_BSK) & TIM_BUCKET_W1_M_BSK;
52 }
53
54 static inline uint64_t
55 tim_bkt_clr_bsk(struct otx2_tim_bkt *bktp)
56 {
57         /* Clear everything except lock. */
58         const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK;
59
60         return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL);
61 }
62
63 static inline uint64_t
64 tim_bkt_fetch_sema_lock(struct otx2_tim_bkt *bktp)
65 {
66         return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK,
67                         __ATOMIC_ACQUIRE);
68 }
69
70 static inline uint64_t
71 tim_bkt_fetch_sema(struct otx2_tim_bkt *bktp)
72 {
73         return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED);
74 }
75
76 static inline uint64_t
77 tim_bkt_inc_lock(struct otx2_tim_bkt *bktp)
78 {
79         const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK;
80
81         return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE);
82 }
83
84 static inline void
85 tim_bkt_dec_lock(struct otx2_tim_bkt *bktp)
86 {
87         __atomic_add_fetch(&bktp->lock, 0xff, __ATOMIC_RELEASE);
88 }
89
90 static inline uint32_t
91 tim_bkt_get_nent(uint64_t w1)
92 {
93         return (w1 >> TIM_BUCKET_W1_S_NUM_ENTRIES) &
94                 TIM_BUCKET_W1_M_NUM_ENTRIES;
95 }
96
97 static inline void
98 tim_bkt_inc_nent(struct otx2_tim_bkt *bktp)
99 {
100         __atomic_add_fetch(&bktp->nb_entry, 1, __ATOMIC_RELAXED);
101 }
102
103 static inline void
104 tim_bkt_add_nent(struct otx2_tim_bkt *bktp, uint32_t v)
105 {
106         __atomic_add_fetch(&bktp->nb_entry, v, __ATOMIC_RELAXED);
107 }
108
109 static inline uint64_t
110 tim_bkt_clr_nent(struct otx2_tim_bkt *bktp)
111 {
112         const uint64_t v = ~(TIM_BUCKET_W1_M_NUM_ENTRIES <<
113                         TIM_BUCKET_W1_S_NUM_ENTRIES);
114
115         return __atomic_and_fetch(&bktp->w1, v, __ATOMIC_ACQ_REL);
116 }
117
118 static __rte_always_inline struct otx2_tim_bkt *
119 tim_get_target_bucket(struct otx2_tim_ring * const tim_ring,
120                       const uint32_t rel_bkt, const uint8_t flag)
121 {
122         const uint64_t bkt_cyc = rte_rdtsc() - tim_ring->ring_start_cyc;
123         uint32_t bucket = rte_reciprocal_divide_u64(bkt_cyc,
124                         &tim_ring->fast_div) + rel_bkt;
125
126         if (flag & OTX2_TIM_BKT_MOD)
127                 bucket = bucket % tim_ring->nb_bkts;
128         if (flag & OTX2_TIM_BKT_AND)
129                 bucket = bucket & (tim_ring->nb_bkts - 1);
130
131         return &tim_ring->bkt[bucket];
132 }
133
134 static struct otx2_tim_ent *
135 tim_clr_bkt(struct otx2_tim_ring * const tim_ring,
136             struct otx2_tim_bkt * const bkt)
137 {
138         struct otx2_tim_ent *chunk;
139         struct otx2_tim_ent *pnext;
140
141         chunk = ((struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk);
142         chunk = (struct otx2_tim_ent *)(uintptr_t)(chunk +
143                         tim_ring->nb_chunk_slots)->w0;
144         while (chunk) {
145                 pnext = (struct otx2_tim_ent *)(uintptr_t)
146                         ((chunk + tim_ring->nb_chunk_slots)->w0);
147                 rte_mempool_put(tim_ring->chunk_pool, chunk);
148                 chunk = pnext;
149         }
150
151         return (struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk;
152 }
153
154 static struct otx2_tim_ent *
155 tim_refill_chunk(struct otx2_tim_bkt * const bkt,
156                  struct otx2_tim_ring * const tim_ring)
157 {
158         struct otx2_tim_ent *chunk;
159
160         if (bkt->nb_entry || !bkt->first_chunk) {
161                 if (unlikely(rte_mempool_get(tim_ring->chunk_pool,
162                                              (void **)&chunk)))
163                         return NULL;
164                 if (bkt->nb_entry) {
165                         *(uint64_t *)(((struct otx2_tim_ent *)(uintptr_t)
166                                                 bkt->current_chunk) +
167                                         tim_ring->nb_chunk_slots) =
168                                 (uintptr_t)chunk;
169                 } else {
170                         bkt->first_chunk = (uintptr_t)chunk;
171                 }
172         } else {
173                 chunk = tim_clr_bkt(tim_ring, bkt);
174                 bkt->first_chunk = (uintptr_t)chunk;
175         }
176         *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
177
178         return chunk;
179 }
180
181 static struct otx2_tim_ent *
182 tim_insert_chunk(struct otx2_tim_bkt * const bkt,
183                  struct otx2_tim_ring * const tim_ring)
184 {
185         struct otx2_tim_ent *chunk;
186
187         if (unlikely(rte_mempool_get(tim_ring->chunk_pool, (void **)&chunk)))
188                 return NULL;
189
190         *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
191         if (bkt->nb_entry) {
192                 *(uint64_t *)(((struct otx2_tim_ent *)(uintptr_t)
193                                         bkt->current_chunk) +
194                                 tim_ring->nb_chunk_slots) = (uintptr_t)chunk;
195         } else {
196                 bkt->first_chunk = (uintptr_t)chunk;
197         }
198         return chunk;
199 }
200
201 static __rte_always_inline int
202 tim_add_entry_sp(struct otx2_tim_ring * const tim_ring,
203                  const uint32_t rel_bkt,
204                  struct rte_event_timer * const tim,
205                  const struct otx2_tim_ent * const pent,
206                  const uint8_t flags)
207 {
208         struct otx2_tim_ent *chunk;
209         struct otx2_tim_bkt *bkt;
210         uint64_t lock_sema;
211         int16_t rem;
212
213         bkt = tim_get_target_bucket(tim_ring, rel_bkt, flags);
214
215 __retry:
216         /* Get Bucket sema*/
217         lock_sema = tim_bkt_fetch_sema_lock(bkt);
218
219         /* Bucket related checks. */
220         if (unlikely(tim_bkt_get_hbt(lock_sema))) {
221                 if (tim_bkt_get_nent(lock_sema) != 0) {
222                         uint64_t hbt_state;
223 #ifdef RTE_ARCH_ARM64
224                         asm volatile(
225                                         "       ldaxr %[hbt], [%[w1]]   \n"
226                                         "       tbz %[hbt], 33, dne%=   \n"
227                                         "       sevl                    \n"
228                                         "rty%=: wfe                     \n"
229                                         "       ldaxr %[hbt], [%[w1]]   \n"
230                                         "       tbnz %[hbt], 33, rty%=  \n"
231                                         "dne%=:                         \n"
232                                         : [hbt] "=&r" (hbt_state)
233                                         : [w1] "r" ((&bkt->w1))
234                                         : "memory"
235                                         );
236 #else
237                         do {
238                                 hbt_state = __atomic_load_n(&bkt->w1,
239                                                 __ATOMIC_ACQUIRE);
240                         } while (hbt_state & BIT_ULL(33));
241 #endif
242
243                         if (!(hbt_state & BIT_ULL(34))) {
244                                 tim_bkt_dec_lock(bkt);
245                                 goto __retry;
246                         }
247                 }
248         }
249
250         /* Insert the work. */
251         rem = tim_bkt_fetch_rem(lock_sema);
252
253         if (!rem) {
254                 if (flags & OTX2_TIM_ENA_FB)
255                         chunk = tim_refill_chunk(bkt, tim_ring);
256                 if (flags & OTX2_TIM_ENA_DFB)
257                         chunk = tim_insert_chunk(bkt, tim_ring);
258
259                 if (unlikely(chunk == NULL)) {
260                         bkt->chunk_remainder = 0;
261                         tim_bkt_dec_lock(bkt);
262                         tim->impl_opaque[0] = 0;
263                         tim->impl_opaque[1] = 0;
264                         tim->state = RTE_EVENT_TIMER_ERROR;
265                         return -ENOMEM;
266                 }
267                 bkt->current_chunk = (uintptr_t)chunk;
268                 bkt->chunk_remainder = tim_ring->nb_chunk_slots - 1;
269         } else {
270                 chunk = (struct otx2_tim_ent *)(uintptr_t)bkt->current_chunk;
271                 chunk += tim_ring->nb_chunk_slots - rem;
272         }
273
274         /* Copy work entry. */
275         *chunk = *pent;
276
277         tim_bkt_inc_nent(bkt);
278         tim_bkt_dec_lock(bkt);
279
280         tim->impl_opaque[0] = (uintptr_t)chunk;
281         tim->impl_opaque[1] = (uintptr_t)bkt;
282         tim->state = RTE_EVENT_TIMER_ARMED;
283
284         return 0;
285 }
286
287 static __rte_always_inline int
288 tim_add_entry_mp(struct otx2_tim_ring * const tim_ring,
289                  const uint32_t rel_bkt,
290                  struct rte_event_timer * const tim,
291                  const struct otx2_tim_ent * const pent,
292                  const uint8_t flags)
293 {
294         struct otx2_tim_ent *chunk;
295         struct otx2_tim_bkt *bkt;
296         uint64_t lock_sema;
297         int16_t rem;
298
299 __retry:
300         bkt = tim_get_target_bucket(tim_ring, rel_bkt, flags);
301         /* Get Bucket sema*/
302         lock_sema = tim_bkt_fetch_sema_lock(bkt);
303
304         /* Bucket related checks. */
305         if (unlikely(tim_bkt_get_hbt(lock_sema))) {
306                 if (tim_bkt_get_nent(lock_sema) != 0) {
307                         uint64_t hbt_state;
308 #ifdef RTE_ARCH_ARM64
309                         asm volatile(
310                                         "       ldaxr %[hbt], [%[w1]]   \n"
311                                         "       tbz %[hbt], 33, dne%=   \n"
312                                         "       sevl                    \n"
313                                         "rty%=: wfe                     \n"
314                                         "       ldaxr %[hbt], [%[w1]]   \n"
315                                         "       tbnz %[hbt], 33, rty%=  \n"
316                                         "dne%=:                         \n"
317                                         : [hbt] "=&r" (hbt_state)
318                                         : [w1] "r" ((&bkt->w1))
319                                         : "memory"
320                                         );
321 #else
322                         do {
323                                 hbt_state = __atomic_load_n(&bkt->w1,
324                                                 __ATOMIC_ACQUIRE);
325                         } while (hbt_state & BIT_ULL(33));
326 #endif
327
328                         if (!(hbt_state & BIT_ULL(34))) {
329                                 tim_bkt_dec_lock(bkt);
330                                 goto __retry;
331                         }
332                 }
333         }
334
335         rem = tim_bkt_fetch_rem(lock_sema);
336         if (rem < 0) {
337 #ifdef RTE_ARCH_ARM64
338                 asm volatile(
339                                 "       ldaxrh %w[rem], [%[crem]]       \n"
340                                 "       tbz %w[rem], 15, dne%=          \n"
341                                 "       sevl                            \n"
342                                 "rty%=: wfe                             \n"
343                                 "       ldaxrh %w[rem], [%[crem]]       \n"
344                                 "       tbnz %w[rem], 15, rty%=         \n"
345                                 "dne%=:                                 \n"
346                                 : [rem] "=&r" (rem)
347                                 : [crem] "r" (&bkt->chunk_remainder)
348                                 : "memory"
349                             );
350 #else
351                 while (__atomic_load_n(&bkt->chunk_remainder,
352                                        __ATOMIC_ACQUIRE) < 0)
353                         ;
354 #endif
355                 /* Goto diff bucket. */
356                 tim_bkt_dec_lock(bkt);
357                 goto __retry;
358         } else if (!rem) {
359                 /* Only one thread can be here*/
360                 if (flags & OTX2_TIM_ENA_FB)
361                         chunk = tim_refill_chunk(bkt, tim_ring);
362                 if (flags & OTX2_TIM_ENA_DFB)
363                         chunk = tim_insert_chunk(bkt, tim_ring);
364
365                 if (unlikely(chunk == NULL)) {
366                         tim_bkt_set_rem(bkt, 0);
367                         tim_bkt_dec_lock(bkt);
368                         tim->impl_opaque[0] = 0;
369                         tim->impl_opaque[1] = 0;
370                         tim->state = RTE_EVENT_TIMER_ERROR;
371                         return -ENOMEM;
372                 }
373                 *chunk = *pent;
374                 while (tim_bkt_fetch_lock(lock_sema) !=
375                                 (-tim_bkt_fetch_rem(lock_sema)))
376                         lock_sema = __atomic_load_n(&bkt->w1, __ATOMIC_ACQUIRE);
377
378                 bkt->current_chunk =  (uintptr_t)chunk;
379                 __atomic_store_n(&bkt->chunk_remainder,
380                                 tim_ring->nb_chunk_slots - 1, __ATOMIC_RELEASE);
381         } else {
382                 chunk = (struct otx2_tim_ent *)bkt->current_chunk;
383                 chunk += tim_ring->nb_chunk_slots - rem;
384                 *chunk = *pent;
385         }
386
387         /* Copy work entry. */
388         tim_bkt_inc_nent(bkt);
389         tim_bkt_dec_lock(bkt);
390         tim->impl_opaque[0] = (uintptr_t)chunk;
391         tim->impl_opaque[1] = (uintptr_t)bkt;
392         tim->state = RTE_EVENT_TIMER_ARMED;
393
394         return 0;
395 }
396
397 static inline uint16_t
398 tim_cpy_wrk(uint16_t index, uint16_t cpy_lmt,
399             struct otx2_tim_ent *chunk,
400             struct rte_event_timer ** const tim,
401             const struct otx2_tim_ent * const ents,
402             const struct otx2_tim_bkt * const bkt)
403 {
404         for (; index < cpy_lmt; index++) {
405                 *chunk = *(ents + index);
406                 tim[index]->impl_opaque[0] = (uintptr_t)chunk++;
407                 tim[index]->impl_opaque[1] = (uintptr_t)bkt;
408                 tim[index]->state = RTE_EVENT_TIMER_ARMED;
409         }
410
411         return index;
412 }
413
414 /* Burst mode functions */
415 static inline int
416 tim_add_entry_brst(struct otx2_tim_ring * const tim_ring,
417                    const uint16_t rel_bkt,
418                    struct rte_event_timer ** const tim,
419                    const struct otx2_tim_ent *ents,
420                    const uint16_t nb_timers, const uint8_t flags)
421 {
422         struct otx2_tim_ent *chunk = NULL;
423         struct otx2_tim_bkt *bkt;
424         uint16_t chunk_remainder;
425         uint16_t index = 0;
426         uint64_t lock_sema;
427         int16_t rem, crem;
428         uint8_t lock_cnt;
429
430 __retry:
431         bkt = tim_get_target_bucket(tim_ring, rel_bkt, flags);
432
433         /* Only one thread beyond this. */
434         lock_sema = tim_bkt_inc_lock(bkt);
435         lock_cnt = (uint8_t)
436                 ((lock_sema >> TIM_BUCKET_W1_S_LOCK) & TIM_BUCKET_W1_M_LOCK);
437
438         if (lock_cnt) {
439                 tim_bkt_dec_lock(bkt);
440                 goto __retry;
441         }
442
443         /* Bucket related checks. */
444         if (unlikely(tim_bkt_get_hbt(lock_sema))) {
445                 if (tim_bkt_get_nent(lock_sema) != 0) {
446                         uint64_t hbt_state;
447 #ifdef RTE_ARCH_ARM64
448                         asm volatile(
449                                         "       ldaxr %[hbt], [%[w1]]   \n"
450                                         "       tbz %[hbt], 33, dne%=   \n"
451                                         "       sevl                    \n"
452                                         "rty%=: wfe                     \n"
453                                         "       ldaxr %[hbt], [%[w1]]   \n"
454                                         "       tbnz %[hbt], 33, rty%=  \n"
455                                         "dne%=:                         \n"
456                                         : [hbt] "=&r" (hbt_state)
457                                         : [w1] "r" ((&bkt->w1))
458                                         : "memory"
459                                         );
460 #else
461                         do {
462                                 hbt_state = __atomic_load_n(&bkt->w1,
463                                                 __ATOMIC_ACQUIRE);
464                         } while (hbt_state & BIT_ULL(33));
465 #endif
466
467                         if (!(hbt_state & BIT_ULL(34))) {
468                                 tim_bkt_dec_lock(bkt);
469                                 goto __retry;
470                         }
471                 }
472         }
473
474         chunk_remainder = tim_bkt_fetch_rem(lock_sema);
475         rem = chunk_remainder - nb_timers;
476         if (rem < 0) {
477                 crem = tim_ring->nb_chunk_slots - chunk_remainder;
478                 if (chunk_remainder && crem) {
479                         chunk = ((struct otx2_tim_ent *)
480                                         (uintptr_t)bkt->current_chunk) + crem;
481
482                         index = tim_cpy_wrk(index, chunk_remainder, chunk, tim,
483                                             ents, bkt);
484                         tim_bkt_sub_rem(bkt, chunk_remainder);
485                         tim_bkt_add_nent(bkt, chunk_remainder);
486                 }
487
488                 if (flags & OTX2_TIM_ENA_FB)
489                         chunk = tim_refill_chunk(bkt, tim_ring);
490                 if (flags & OTX2_TIM_ENA_DFB)
491                         chunk = tim_insert_chunk(bkt, tim_ring);
492
493                 if (unlikely(chunk == NULL)) {
494                         tim_bkt_dec_lock(bkt);
495                         rte_errno = ENOMEM;
496                         tim[index]->state = RTE_EVENT_TIMER_ERROR;
497                         return crem;
498                 }
499                 *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0;
500                 bkt->current_chunk = (uintptr_t)chunk;
501                 tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt);
502
503                 rem = nb_timers - chunk_remainder;
504                 tim_bkt_set_rem(bkt, tim_ring->nb_chunk_slots - rem);
505                 tim_bkt_add_nent(bkt, rem);
506         } else {
507                 chunk = (struct otx2_tim_ent *)(uintptr_t)bkt->current_chunk;
508                 chunk += (tim_ring->nb_chunk_slots - chunk_remainder);
509
510                 tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt);
511                 tim_bkt_sub_rem(bkt, nb_timers);
512                 tim_bkt_add_nent(bkt, nb_timers);
513         }
514
515         tim_bkt_dec_lock(bkt);
516
517         return nb_timers;
518 }
519
520 static int
521 tim_rm_entry(struct rte_event_timer *tim)
522 {
523         struct otx2_tim_ent *entry;
524         struct otx2_tim_bkt *bkt;
525         uint64_t lock_sema;
526
527         if (tim->impl_opaque[1] == 0 || tim->impl_opaque[0] == 0)
528                 return -ENOENT;
529
530         entry = (struct otx2_tim_ent *)(uintptr_t)tim->impl_opaque[0];
531         if (entry->wqe != tim->ev.u64) {
532                 tim->impl_opaque[0] = 0;
533                 tim->impl_opaque[1] = 0;
534                 return -ENOENT;
535         }
536
537         bkt = (struct otx2_tim_bkt *)(uintptr_t)tim->impl_opaque[1];
538         lock_sema = tim_bkt_inc_lock(bkt);
539         if (tim_bkt_get_hbt(lock_sema) || !tim_bkt_get_nent(lock_sema)) {
540                 tim_bkt_dec_lock(bkt);
541                 tim->impl_opaque[0] = 0;
542                 tim->impl_opaque[1] = 0;
543                 return -ENOENT;
544         }
545
546         entry->w0 = 0;
547         entry->wqe = 0;
548         tim_bkt_dec_lock(bkt);
549
550         tim->state = RTE_EVENT_TIMER_CANCELED;
551         tim->impl_opaque[0] = 0;
552         tim->impl_opaque[1] = 0;
553
554         return 0;
555 }
556
557 #endif /* __OTX2_TIM_WORKER_H__ */