timer: add function to stop all timers in a list
[dpdk.git] / lib / librte_timer / rte_timer.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <string.h>
6 #include <stdio.h>
7 #include <stdint.h>
8 #include <stdbool.h>
9 #include <inttypes.h>
10 #include <assert.h>
11 #include <sys/queue.h>
12
13 #include <rte_atomic.h>
14 #include <rte_common.h>
15 #include <rte_cycles.h>
16 #include <rte_per_lcore.h>
17 #include <rte_memory.h>
18 #include <rte_launch.h>
19 #include <rte_eal.h>
20 #include <rte_lcore.h>
21 #include <rte_branch_prediction.h>
22 #include <rte_spinlock.h>
23 #include <rte_random.h>
24 #include <rte_pause.h>
25 #include <rte_memzone.h>
26 #include <rte_malloc.h>
27 #include <rte_compat.h>
28
29 #include "rte_timer.h"
30
31 /**
32  * Per-lcore info for timers.
33  */
34 struct priv_timer {
35         struct rte_timer pending_head;  /**< dummy timer instance to head up list */
36         rte_spinlock_t list_lock;       /**< lock to protect list access */
37
38         /** per-core variable that true if a timer was updated on this
39          *  core since last reset of the variable */
40         int updated;
41
42         /** track the current depth of the skiplist */
43         unsigned curr_skiplist_depth;
44
45         unsigned prev_lcore;              /**< used for lcore round robin */
46
47         /** running timer on this lcore now */
48         struct rte_timer *running_tim;
49
50 #ifdef RTE_LIBRTE_TIMER_DEBUG
51         /** per-lcore statistics */
52         struct rte_timer_debug_stats stats;
53 #endif
54 } __rte_cache_aligned;
55
56 #define FL_ALLOCATED    (1 << 0)
57 struct rte_timer_data {
58         struct priv_timer priv_timer[RTE_MAX_LCORE];
59         uint8_t internal_flags;
60 };
61
62 #define RTE_MAX_DATA_ELS 64
63 static struct rte_timer_data *rte_timer_data_arr;
64 static const uint32_t default_data_id;
65 static uint32_t rte_timer_subsystem_initialized;
66
67 /* For maintaining older interfaces for a period */
68 static struct rte_timer_data default_timer_data;
69
70 /* when debug is enabled, store some statistics */
71 #ifdef RTE_LIBRTE_TIMER_DEBUG
72 #define __TIMER_STAT_ADD(priv_timer, name, n) do {                      \
73                 unsigned __lcore_id = rte_lcore_id();                   \
74                 if (__lcore_id < RTE_MAX_LCORE)                         \
75                         priv_timer[__lcore_id].stats.name += (n);       \
76         } while(0)
77 #else
78 #define __TIMER_STAT_ADD(priv_timer, name, n) do {} while (0)
79 #endif
80
81 static inline int
82 timer_data_valid(uint32_t id)
83 {
84         return !!(rte_timer_data_arr[id].internal_flags & FL_ALLOCATED);
85 }
86
87 /* validate ID and retrieve timer data pointer, or return error value */
88 #define TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, retval) do {    \
89         if (id >= RTE_MAX_DATA_ELS || !timer_data_valid(id))            \
90                 return retval;                                          \
91         timer_data = &rte_timer_data_arr[id];                           \
92 } while (0)
93
94 int __rte_experimental
95 rte_timer_data_alloc(uint32_t *id_ptr)
96 {
97         int i;
98         struct rte_timer_data *data;
99
100         if (!rte_timer_subsystem_initialized)
101                 return -ENOMEM;
102
103         for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
104                 data = &rte_timer_data_arr[i];
105                 if (!(data->internal_flags & FL_ALLOCATED)) {
106                         data->internal_flags |= FL_ALLOCATED;
107
108                         if (id_ptr)
109                                 *id_ptr = i;
110
111                         return 0;
112                 }
113         }
114
115         return -ENOSPC;
116 }
117
118 int __rte_experimental
119 rte_timer_data_dealloc(uint32_t id)
120 {
121         struct rte_timer_data *timer_data;
122         TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, -EINVAL);
123
124         timer_data->internal_flags &= ~(FL_ALLOCATED);
125
126         return 0;
127 }
128
129 void
130 rte_timer_subsystem_init_v20(void)
131 {
132         unsigned lcore_id;
133         struct priv_timer *priv_timer = default_timer_data.priv_timer;
134
135         /* since priv_timer is static, it's zeroed by default, so only init some
136          * fields.
137          */
138         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++) {
139                 rte_spinlock_init(&priv_timer[lcore_id].list_lock);
140                 priv_timer[lcore_id].prev_lcore = lcore_id;
141         }
142 }
143 VERSION_SYMBOL(rte_timer_subsystem_init, _v20, 2.0);
144
145 /* Init the timer library. Allocate an array of timer data structs in shared
146  * memory, and allocate the zeroth entry for use with original timer
147  * APIs. Since the intersection of the sets of lcore ids in primary and
148  * secondary processes should be empty, the zeroth entry can be shared by
149  * multiple processes.
150  */
151 int
152 rte_timer_subsystem_init_v1905(void)
153 {
154         const struct rte_memzone *mz;
155         struct rte_timer_data *data;
156         int i, lcore_id;
157         static const char *mz_name = "rte_timer_mz";
158
159         if (rte_timer_subsystem_initialized)
160                 return -EALREADY;
161
162         if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
163                 mz = rte_memzone_lookup(mz_name);
164                 if (mz == NULL)
165                         return -EEXIST;
166
167                 rte_timer_data_arr = mz->addr;
168
169                 rte_timer_data_arr[default_data_id].internal_flags |=
170                         FL_ALLOCATED;
171
172                 rte_timer_subsystem_initialized = 1;
173
174                 return 0;
175         }
176
177         mz = rte_memzone_reserve_aligned(mz_name,
178                         RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr),
179                         SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
180         if (mz == NULL)
181                 return -ENOMEM;
182
183         rte_timer_data_arr = mz->addr;
184
185         for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
186                 data = &rte_timer_data_arr[i];
187
188                 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
189                         rte_spinlock_init(
190                                 &data->priv_timer[lcore_id].list_lock);
191                         data->priv_timer[lcore_id].prev_lcore = lcore_id;
192                 }
193         }
194
195         rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED;
196
197         rte_timer_subsystem_initialized = 1;
198
199         return 0;
200 }
201 MAP_STATIC_SYMBOL(int rte_timer_subsystem_init(void),
202                   rte_timer_subsystem_init_v1905);
203 BIND_DEFAULT_SYMBOL(rte_timer_subsystem_init, _v1905, 19.05);
204
205 void __rte_experimental
206 rte_timer_subsystem_finalize(void)
207 {
208         if (rte_timer_data_arr)
209                 rte_free(rte_timer_data_arr);
210
211         rte_timer_subsystem_initialized = 0;
212 }
213
214 /* Initialize the timer handle tim for use */
215 void
216 rte_timer_init(struct rte_timer *tim)
217 {
218         union rte_timer_status status;
219
220         status.state = RTE_TIMER_STOP;
221         status.owner = RTE_TIMER_NO_OWNER;
222         tim->status.u32 = status.u32;
223 }
224
225 /*
226  * if timer is pending or stopped (or running on the same core than
227  * us), mark timer as configuring, and on success return the previous
228  * status of the timer
229  */
230 static int
231 timer_set_config_state(struct rte_timer *tim,
232                        union rte_timer_status *ret_prev_status,
233                        struct priv_timer *priv_timer)
234 {
235         union rte_timer_status prev_status, status;
236         int success = 0;
237         unsigned lcore_id;
238
239         lcore_id = rte_lcore_id();
240
241         /* wait that the timer is in correct status before update,
242          * and mark it as being configured */
243         while (success == 0) {
244                 prev_status.u32 = tim->status.u32;
245
246                 /* timer is running on another core
247                  * or ready to run on local core, exit
248                  */
249                 if (prev_status.state == RTE_TIMER_RUNNING &&
250                     (prev_status.owner != (uint16_t)lcore_id ||
251                      tim != priv_timer[lcore_id].running_tim))
252                         return -1;
253
254                 /* timer is being configured on another core */
255                 if (prev_status.state == RTE_TIMER_CONFIG)
256                         return -1;
257
258                 /* here, we know that timer is stopped or pending,
259                  * mark it atomically as being configured */
260                 status.state = RTE_TIMER_CONFIG;
261                 status.owner = (int16_t)lcore_id;
262                 success = rte_atomic32_cmpset(&tim->status.u32,
263                                               prev_status.u32,
264                                               status.u32);
265         }
266
267         ret_prev_status->u32 = prev_status.u32;
268         return 0;
269 }
270
271 /*
272  * if timer is pending, mark timer as running
273  */
274 static int
275 timer_set_running_state(struct rte_timer *tim)
276 {
277         union rte_timer_status prev_status, status;
278         unsigned lcore_id = rte_lcore_id();
279         int success = 0;
280
281         /* wait that the timer is in correct status before update,
282          * and mark it as running */
283         while (success == 0) {
284                 prev_status.u32 = tim->status.u32;
285
286                 /* timer is not pending anymore */
287                 if (prev_status.state != RTE_TIMER_PENDING)
288                         return -1;
289
290                 /* here, we know that timer is stopped or pending,
291                  * mark it atomically as being configured */
292                 status.state = RTE_TIMER_RUNNING;
293                 status.owner = (int16_t)lcore_id;
294                 success = rte_atomic32_cmpset(&tim->status.u32,
295                                               prev_status.u32,
296                                               status.u32);
297         }
298
299         return 0;
300 }
301
302 /*
303  * Return a skiplist level for a new entry.
304  * This probabilistically gives a level with p=1/4 that an entry at level n
305  * will also appear at level n+1.
306  */
307 static uint32_t
308 timer_get_skiplist_level(unsigned curr_depth)
309 {
310 #ifdef RTE_LIBRTE_TIMER_DEBUG
311         static uint32_t i, count = 0;
312         static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0};
313 #endif
314
315         /* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1,
316          * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest
317          * bit position of a (pseudo)random number.
318          */
319         uint32_t rand = rte_rand() & (UINT32_MAX - 1);
320         uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2;
321
322         /* limit the levels used to one above our current level, so we don't,
323          * for instance, have a level 0 and a level 7 without anything between
324          */
325         if (level > curr_depth)
326                 level = curr_depth;
327         if (level >= MAX_SKIPLIST_DEPTH)
328                 level = MAX_SKIPLIST_DEPTH-1;
329 #ifdef RTE_LIBRTE_TIMER_DEBUG
330         count ++;
331         levels[level]++;
332         if (count % 10000 == 0)
333                 for (i = 0; i < MAX_SKIPLIST_DEPTH; i++)
334                         printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]);
335 #endif
336         return level;
337 }
338
339 /*
340  * For a given time value, get the entries at each level which
341  * are <= that time value.
342  */
343 static void
344 timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore,
345                        struct rte_timer **prev, struct priv_timer *priv_timer)
346 {
347         unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth;
348         prev[lvl] = &priv_timer[tim_lcore].pending_head;
349         while(lvl != 0) {
350                 lvl--;
351                 prev[lvl] = prev[lvl+1];
352                 while (prev[lvl]->sl_next[lvl] &&
353                                 prev[lvl]->sl_next[lvl]->expire <= time_val)
354                         prev[lvl] = prev[lvl]->sl_next[lvl];
355         }
356 }
357
358 /*
359  * Given a timer node in the skiplist, find the previous entries for it at
360  * all skiplist levels.
361  */
362 static void
363 timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
364                                 struct rte_timer **prev,
365                                 struct priv_timer *priv_timer)
366 {
367         int i;
368
369         /* to get a specific entry in the list, look for just lower than the time
370          * values, and then increment on each level individually if necessary
371          */
372         timer_get_prev_entries(tim->expire - 1, tim_lcore, prev, priv_timer);
373         for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) {
374                 while (prev[i]->sl_next[i] != NULL &&
375                                 prev[i]->sl_next[i] != tim &&
376                                 prev[i]->sl_next[i]->expire <= tim->expire)
377                         prev[i] = prev[i]->sl_next[i];
378         }
379 }
380
381 /* call with lock held as necessary
382  * add in list
383  * timer must be in config state
384  * timer must not be in a list
385  */
386 static void
387 timer_add(struct rte_timer *tim, unsigned int tim_lcore,
388           struct priv_timer *priv_timer)
389 {
390         unsigned lvl;
391         struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
392
393         /* find where exactly this element goes in the list of elements
394          * for each depth. */
395         timer_get_prev_entries(tim->expire, tim_lcore, prev, priv_timer);
396
397         /* now assign it a new level and add at that level */
398         const unsigned tim_level = timer_get_skiplist_level(
399                         priv_timer[tim_lcore].curr_skiplist_depth);
400         if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth)
401                 priv_timer[tim_lcore].curr_skiplist_depth++;
402
403         lvl = tim_level;
404         while (lvl > 0) {
405                 tim->sl_next[lvl] = prev[lvl]->sl_next[lvl];
406                 prev[lvl]->sl_next[lvl] = tim;
407                 lvl--;
408         }
409         tim->sl_next[0] = prev[0]->sl_next[0];
410         prev[0]->sl_next[0] = tim;
411
412         /* save the lowest list entry into the expire field of the dummy hdr
413          * NOTE: this is not atomic on 32-bit*/
414         priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
415                         pending_head.sl_next[0]->expire;
416 }
417
418 /*
419  * del from list, lock if needed
420  * timer must be in config state
421  * timer must be in a list
422  */
423 static void
424 timer_del(struct rte_timer *tim, union rte_timer_status prev_status,
425           int local_is_locked, struct priv_timer *priv_timer)
426 {
427         unsigned lcore_id = rte_lcore_id();
428         unsigned prev_owner = prev_status.owner;
429         int i;
430         struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
431
432         /* if timer needs is pending another core, we need to lock the
433          * list; if it is on local core, we need to lock if we are not
434          * called from rte_timer_manage() */
435         if (prev_owner != lcore_id || !local_is_locked)
436                 rte_spinlock_lock(&priv_timer[prev_owner].list_lock);
437
438         /* save the lowest list entry into the expire field of the dummy hdr.
439          * NOTE: this is not atomic on 32-bit */
440         if (tim == priv_timer[prev_owner].pending_head.sl_next[0])
441                 priv_timer[prev_owner].pending_head.expire =
442                                 ((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire);
443
444         /* adjust pointers from previous entries to point past this */
445         timer_get_prev_entries_for_node(tim, prev_owner, prev, priv_timer);
446         for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) {
447                 if (prev[i]->sl_next[i] == tim)
448                         prev[i]->sl_next[i] = tim->sl_next[i];
449         }
450
451         /* in case we deleted last entry at a level, adjust down max level */
452         for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--)
453                 if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL)
454                         priv_timer[prev_owner].curr_skiplist_depth --;
455                 else
456                         break;
457
458         if (prev_owner != lcore_id || !local_is_locked)
459                 rte_spinlock_unlock(&priv_timer[prev_owner].list_lock);
460 }
461
462 /* Reset and start the timer associated with the timer handle (private func) */
463 static int
464 __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
465                   uint64_t period, unsigned tim_lcore,
466                   rte_timer_cb_t fct, void *arg,
467                   int local_is_locked,
468                   struct rte_timer_data *timer_data)
469 {
470         union rte_timer_status prev_status, status;
471         int ret;
472         unsigned lcore_id = rte_lcore_id();
473         struct priv_timer *priv_timer = timer_data->priv_timer;
474
475         /* round robin for tim_lcore */
476         if (tim_lcore == (unsigned)LCORE_ID_ANY) {
477                 if (lcore_id < RTE_MAX_LCORE) {
478                         /* EAL thread with valid lcore_id */
479                         tim_lcore = rte_get_next_lcore(
480                                 priv_timer[lcore_id].prev_lcore,
481                                 0, 1);
482                         priv_timer[lcore_id].prev_lcore = tim_lcore;
483                 } else
484                         /* non-EAL thread do not run rte_timer_manage(),
485                          * so schedule the timer on the first enabled lcore. */
486                         tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1);
487         }
488
489         /* wait that the timer is in correct status before update,
490          * and mark it as being configured */
491         ret = timer_set_config_state(tim, &prev_status, priv_timer);
492         if (ret < 0)
493                 return -1;
494
495         __TIMER_STAT_ADD(priv_timer, reset, 1);
496         if (prev_status.state == RTE_TIMER_RUNNING &&
497             lcore_id < RTE_MAX_LCORE) {
498                 priv_timer[lcore_id].updated = 1;
499         }
500
501         /* remove it from list */
502         if (prev_status.state == RTE_TIMER_PENDING) {
503                 timer_del(tim, prev_status, local_is_locked, priv_timer);
504                 __TIMER_STAT_ADD(priv_timer, pending, -1);
505         }
506
507         tim->period = period;
508         tim->expire = expire;
509         tim->f = fct;
510         tim->arg = arg;
511
512         /* if timer needs to be scheduled on another core, we need to
513          * lock the destination list; if it is on local core, we need to lock if
514          * we are not called from rte_timer_manage()
515          */
516         if (tim_lcore != lcore_id || !local_is_locked)
517                 rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
518
519         __TIMER_STAT_ADD(priv_timer, pending, 1);
520         timer_add(tim, tim_lcore, priv_timer);
521
522         /* update state: as we are in CONFIG state, only us can modify
523          * the state so we don't need to use cmpset() here */
524         rte_wmb();
525         status.state = RTE_TIMER_PENDING;
526         status.owner = (int16_t)tim_lcore;
527         tim->status.u32 = status.u32;
528
529         if (tim_lcore != lcore_id || !local_is_locked)
530                 rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
531
532         return 0;
533 }
534
535 /* Reset and start the timer associated with the timer handle tim */
536 int
537 rte_timer_reset_v20(struct rte_timer *tim, uint64_t ticks,
538                     enum rte_timer_type type, unsigned int tim_lcore,
539                     rte_timer_cb_t fct, void *arg)
540 {
541         uint64_t cur_time = rte_get_timer_cycles();
542         uint64_t period;
543
544         if (unlikely((tim_lcore != (unsigned)LCORE_ID_ANY) &&
545                         !(rte_lcore_is_enabled(tim_lcore) ||
546                           rte_lcore_has_role(tim_lcore, ROLE_SERVICE))))
547                 return -1;
548
549         if (type == PERIODICAL)
550                 period = ticks;
551         else
552                 period = 0;
553
554         return __rte_timer_reset(tim,  cur_time + ticks, period, tim_lcore,
555                           fct, arg, 0, &default_timer_data);
556 }
557 VERSION_SYMBOL(rte_timer_reset, _v20, 2.0);
558
559 int
560 rte_timer_reset_v1905(struct rte_timer *tim, uint64_t ticks,
561                       enum rte_timer_type type, unsigned int tim_lcore,
562                       rte_timer_cb_t fct, void *arg)
563 {
564         return rte_timer_alt_reset(default_data_id, tim, ticks, type,
565                                    tim_lcore, fct, arg);
566 }
567 MAP_STATIC_SYMBOL(int rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
568                                       enum rte_timer_type type,
569                                       unsigned int tim_lcore,
570                                       rte_timer_cb_t fct, void *arg),
571                   rte_timer_reset_v1905);
572 BIND_DEFAULT_SYMBOL(rte_timer_reset, _v1905, 19.05);
573
574 int __rte_experimental
575 rte_timer_alt_reset(uint32_t timer_data_id, struct rte_timer *tim,
576                     uint64_t ticks, enum rte_timer_type type,
577                     unsigned int tim_lcore, rte_timer_cb_t fct, void *arg)
578 {
579         uint64_t cur_time = rte_get_timer_cycles();
580         uint64_t period;
581         struct rte_timer_data *timer_data;
582
583         TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
584
585         if (unlikely((tim_lcore != (unsigned int)LCORE_ID_ANY) &&
586                         !(rte_lcore_is_enabled(tim_lcore) ||
587                           rte_lcore_has_role(tim_lcore, ROLE_SERVICE))))
588                 return -1;
589
590         if (type == PERIODICAL)
591                 period = ticks;
592         else
593                 period = 0;
594
595         return __rte_timer_reset(tim,  cur_time + ticks, period, tim_lcore,
596                                  fct, arg, 0, timer_data);
597 }
598
599 /* loop until rte_timer_reset() succeed */
600 void
601 rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks,
602                      enum rte_timer_type type, unsigned tim_lcore,
603                      rte_timer_cb_t fct, void *arg)
604 {
605         while (rte_timer_reset(tim, ticks, type, tim_lcore,
606                                fct, arg) != 0)
607                 rte_pause();
608 }
609
610 static int
611 __rte_timer_stop(struct rte_timer *tim, int local_is_locked,
612                  struct rte_timer_data *timer_data)
613 {
614         union rte_timer_status prev_status, status;
615         unsigned lcore_id = rte_lcore_id();
616         int ret;
617         struct priv_timer *priv_timer = timer_data->priv_timer;
618
619         /* wait that the timer is in correct status before update,
620          * and mark it as being configured */
621         ret = timer_set_config_state(tim, &prev_status, priv_timer);
622         if (ret < 0)
623                 return -1;
624
625         __TIMER_STAT_ADD(priv_timer, stop, 1);
626         if (prev_status.state == RTE_TIMER_RUNNING &&
627             lcore_id < RTE_MAX_LCORE) {
628                 priv_timer[lcore_id].updated = 1;
629         }
630
631         /* remove it from list */
632         if (prev_status.state == RTE_TIMER_PENDING) {
633                 timer_del(tim, prev_status, local_is_locked, priv_timer);
634                 __TIMER_STAT_ADD(priv_timer, pending, -1);
635         }
636
637         /* mark timer as stopped */
638         rte_wmb();
639         status.state = RTE_TIMER_STOP;
640         status.owner = RTE_TIMER_NO_OWNER;
641         tim->status.u32 = status.u32;
642
643         return 0;
644 }
645
646 /* Stop the timer associated with the timer handle tim */
647 int
648 rte_timer_stop_v20(struct rte_timer *tim)
649 {
650         return __rte_timer_stop(tim, 0, &default_timer_data);
651 }
652 VERSION_SYMBOL(rte_timer_stop, _v20, 2.0);
653
654 int
655 rte_timer_stop_v1905(struct rte_timer *tim)
656 {
657         return rte_timer_alt_stop(default_data_id, tim);
658 }
659 MAP_STATIC_SYMBOL(int rte_timer_stop(struct rte_timer *tim),
660                   rte_timer_stop_v1905);
661 BIND_DEFAULT_SYMBOL(rte_timer_stop, _v1905, 19.05);
662
663 int __rte_experimental
664 rte_timer_alt_stop(uint32_t timer_data_id, struct rte_timer *tim)
665 {
666         struct rte_timer_data *timer_data;
667
668         TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
669
670         return __rte_timer_stop(tim, 0, timer_data);
671 }
672
673 /* loop until rte_timer_stop() succeed */
674 void
675 rte_timer_stop_sync(struct rte_timer *tim)
676 {
677         while (rte_timer_stop(tim) != 0)
678                 rte_pause();
679 }
680
681 /* Test the PENDING status of the timer handle tim */
682 int
683 rte_timer_pending(struct rte_timer *tim)
684 {
685         return tim->status.state == RTE_TIMER_PENDING;
686 }
687
688 /* must be called periodically, run all timer that expired */
689 static void
690 __rte_timer_manage(struct rte_timer_data *timer_data)
691 {
692         union rte_timer_status status;
693         struct rte_timer *tim, *next_tim;
694         struct rte_timer *run_first_tim, **pprev;
695         unsigned lcore_id = rte_lcore_id();
696         struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
697         uint64_t cur_time;
698         int i, ret;
699         struct priv_timer *priv_timer = timer_data->priv_timer;
700
701         /* timer manager only runs on EAL thread with valid lcore_id */
702         assert(lcore_id < RTE_MAX_LCORE);
703
704         __TIMER_STAT_ADD(priv_timer, manage, 1);
705         /* optimize for the case where per-cpu list is empty */
706         if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL)
707                 return;
708         cur_time = rte_get_timer_cycles();
709
710 #ifdef RTE_ARCH_64
711         /* on 64-bit the value cached in the pending_head.expired will be
712          * updated atomically, so we can consult that for a quick check here
713          * outside the lock */
714         if (likely(priv_timer[lcore_id].pending_head.expire > cur_time))
715                 return;
716 #endif
717
718         /* browse ordered list, add expired timers in 'expired' list */
719         rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
720
721         /* if nothing to do just unlock and return */
722         if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL ||
723             priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) {
724                 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
725                 return;
726         }
727
728         /* save start of list of expired timers */
729         tim = priv_timer[lcore_id].pending_head.sl_next[0];
730
731         /* break the existing list at current time point */
732         timer_get_prev_entries(cur_time, lcore_id, prev, priv_timer);
733         for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) {
734                 if (prev[i] == &priv_timer[lcore_id].pending_head)
735                         continue;
736                 priv_timer[lcore_id].pending_head.sl_next[i] =
737                     prev[i]->sl_next[i];
738                 if (prev[i]->sl_next[i] == NULL)
739                         priv_timer[lcore_id].curr_skiplist_depth--;
740                 prev[i] ->sl_next[i] = NULL;
741         }
742
743         /* transition run-list from PENDING to RUNNING */
744         run_first_tim = tim;
745         pprev = &run_first_tim;
746
747         for ( ; tim != NULL; tim = next_tim) {
748                 next_tim = tim->sl_next[0];
749
750                 ret = timer_set_running_state(tim);
751                 if (likely(ret == 0)) {
752                         pprev = &tim->sl_next[0];
753                 } else {
754                         /* another core is trying to re-config this one,
755                          * remove it from local expired list
756                          */
757                         *pprev = next_tim;
758                 }
759         }
760
761         /* update the next to expire timer value */
762         priv_timer[lcore_id].pending_head.expire =
763             (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
764                 priv_timer[lcore_id].pending_head.sl_next[0]->expire;
765
766         rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
767
768         /* now scan expired list and call callbacks */
769         for (tim = run_first_tim; tim != NULL; tim = next_tim) {
770                 next_tim = tim->sl_next[0];
771                 priv_timer[lcore_id].updated = 0;
772                 priv_timer[lcore_id].running_tim = tim;
773
774                 /* execute callback function with list unlocked */
775                 tim->f(tim, tim->arg);
776
777                 __TIMER_STAT_ADD(priv_timer, pending, -1);
778                 /* the timer was stopped or reloaded by the callback
779                  * function, we have nothing to do here */
780                 if (priv_timer[lcore_id].updated == 1)
781                         continue;
782
783                 if (tim->period == 0) {
784                         /* remove from done list and mark timer as stopped */
785                         status.state = RTE_TIMER_STOP;
786                         status.owner = RTE_TIMER_NO_OWNER;
787                         rte_wmb();
788                         tim->status.u32 = status.u32;
789                 }
790                 else {
791                         /* keep it in list and mark timer as pending */
792                         rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
793                         status.state = RTE_TIMER_PENDING;
794                         __TIMER_STAT_ADD(priv_timer, pending, 1);
795                         status.owner = (int16_t)lcore_id;
796                         rte_wmb();
797                         tim->status.u32 = status.u32;
798                         __rte_timer_reset(tim, tim->expire + tim->period,
799                                 tim->period, lcore_id, tim->f, tim->arg, 1,
800                                 timer_data);
801                         rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
802                 }
803         }
804         priv_timer[lcore_id].running_tim = NULL;
805 }
806
807 void
808 rte_timer_manage_v20(void)
809 {
810         __rte_timer_manage(&default_timer_data);
811 }
812 VERSION_SYMBOL(rte_timer_manage, _v20, 2.0);
813
814 int
815 rte_timer_manage_v1905(void)
816 {
817         struct rte_timer_data *timer_data;
818
819         TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL);
820
821         __rte_timer_manage(timer_data);
822
823         return 0;
824 }
825 MAP_STATIC_SYMBOL(int rte_timer_manage(void), rte_timer_manage_v1905);
826 BIND_DEFAULT_SYMBOL(rte_timer_manage, _v1905, 19.05);
827
828 int __rte_experimental
829 rte_timer_alt_manage(uint32_t timer_data_id,
830                      unsigned int *poll_lcores,
831                      int nb_poll_lcores,
832                      rte_timer_alt_manage_cb_t f)
833 {
834         union rte_timer_status status;
835         struct rte_timer *tim, *next_tim, **pprev;
836         struct rte_timer *run_first_tims[RTE_MAX_LCORE];
837         unsigned int runlist_lcore_ids[RTE_MAX_LCORE];
838         unsigned int this_lcore = rte_lcore_id();
839         struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
840         uint64_t cur_time;
841         int i, j, ret;
842         int nb_runlists = 0;
843         struct rte_timer_data *data;
844         struct priv_timer *privp;
845         uint32_t poll_lcore;
846
847         TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, data, -EINVAL);
848
849         /* timer manager only runs on EAL thread with valid lcore_id */
850         assert(this_lcore < RTE_MAX_LCORE);
851
852         __TIMER_STAT_ADD(data->priv_timer, manage, 1);
853
854         if (poll_lcores == NULL) {
855                 poll_lcores = (unsigned int []){rte_lcore_id()};
856                 nb_poll_lcores = 1;
857         }
858
859         for (i = 0; i < nb_poll_lcores; i++) {
860                 poll_lcore = poll_lcores[i];
861                 privp = &data->priv_timer[poll_lcore];
862
863                 /* optimize for the case where per-cpu list is empty */
864                 if (privp->pending_head.sl_next[0] == NULL)
865                         continue;
866                 cur_time = rte_get_timer_cycles();
867
868 #ifdef RTE_ARCH_64
869                 /* on 64-bit the value cached in the pending_head.expired will
870                  * be updated atomically, so we can consult that for a quick
871                  * check here outside the lock
872                  */
873                 if (likely(privp->pending_head.expire > cur_time))
874                         continue;
875 #endif
876
877                 /* browse ordered list, add expired timers in 'expired' list */
878                 rte_spinlock_lock(&privp->list_lock);
879
880                 /* if nothing to do just unlock and return */
881                 if (privp->pending_head.sl_next[0] == NULL ||
882                     privp->pending_head.sl_next[0]->expire > cur_time) {
883                         rte_spinlock_unlock(&privp->list_lock);
884                         continue;
885                 }
886
887                 /* save start of list of expired timers */
888                 tim = privp->pending_head.sl_next[0];
889
890                 /* break the existing list at current time point */
891                 timer_get_prev_entries(cur_time, poll_lcore, prev,
892                                        data->priv_timer);
893                 for (j = privp->curr_skiplist_depth - 1; j >= 0; j--) {
894                         if (prev[j] == &privp->pending_head)
895                                 continue;
896                         privp->pending_head.sl_next[j] =
897                                 prev[j]->sl_next[j];
898                         if (prev[j]->sl_next[j] == NULL)
899                                 privp->curr_skiplist_depth--;
900
901                         prev[j]->sl_next[j] = NULL;
902                 }
903
904                 /* transition run-list from PENDING to RUNNING */
905                 run_first_tims[nb_runlists] = tim;
906                 runlist_lcore_ids[nb_runlists] = poll_lcore;
907                 pprev = &run_first_tims[nb_runlists];
908                 nb_runlists++;
909
910                 for ( ; tim != NULL; tim = next_tim) {
911                         next_tim = tim->sl_next[0];
912
913                         ret = timer_set_running_state(tim);
914                         if (likely(ret == 0)) {
915                                 pprev = &tim->sl_next[0];
916                         } else {
917                                 /* another core is trying to re-config this one,
918                                  * remove it from local expired list
919                                  */
920                                 *pprev = next_tim;
921                         }
922                 }
923
924                 /* update the next to expire timer value */
925                 privp->pending_head.expire =
926                     (privp->pending_head.sl_next[0] == NULL) ? 0 :
927                         privp->pending_head.sl_next[0]->expire;
928
929                 rte_spinlock_unlock(&privp->list_lock);
930         }
931
932         /* Now process the run lists */
933         while (1) {
934                 bool done = true;
935                 uint64_t min_expire = UINT64_MAX;
936                 int min_idx = 0;
937
938                 /* Find the next oldest timer to process */
939                 for (i = 0; i < nb_runlists; i++) {
940                         tim = run_first_tims[i];
941
942                         if (tim != NULL && tim->expire < min_expire) {
943                                 min_expire = tim->expire;
944                                 min_idx = i;
945                                 done = false;
946                         }
947                 }
948
949                 if (done)
950                         break;
951
952                 tim = run_first_tims[min_idx];
953                 privp = &data->priv_timer[runlist_lcore_ids[min_idx]];
954
955                 /* Move down the runlist from which we picked a timer to
956                  * execute
957                  */
958                 run_first_tims[min_idx] = run_first_tims[min_idx]->sl_next[0];
959
960                 privp->updated = 0;
961                 privp->running_tim = tim;
962
963                 /* Call the provided callback function */
964                 f(tim);
965
966                 __TIMER_STAT_ADD(privp, pending, -1);
967
968                 /* the timer was stopped or reloaded by the callback
969                  * function, we have nothing to do here
970                  */
971                 if (privp->updated == 1)
972                         continue;
973
974                 if (tim->period == 0) {
975                         /* remove from done list and mark timer as stopped */
976                         status.state = RTE_TIMER_STOP;
977                         status.owner = RTE_TIMER_NO_OWNER;
978                         rte_wmb();
979                         tim->status.u32 = status.u32;
980                 } else {
981                         /* keep it in list and mark timer as pending */
982                         rte_spinlock_lock(
983                                 &data->priv_timer[this_lcore].list_lock);
984                         status.state = RTE_TIMER_PENDING;
985                         __TIMER_STAT_ADD(data->priv_timer, pending, 1);
986                         status.owner = (int16_t)this_lcore;
987                         rte_wmb();
988                         tim->status.u32 = status.u32;
989                         __rte_timer_reset(tim, tim->expire + tim->period,
990                                 tim->period, this_lcore, tim->f, tim->arg, 1,
991                                 data);
992                         rte_spinlock_unlock(
993                                 &data->priv_timer[this_lcore].list_lock);
994                 }
995
996                 privp->running_tim = NULL;
997         }
998
999         return 0;
1000 }
1001
1002 /* Walk pending lists, stopping timers and calling user-specified function */
1003 int __rte_experimental
1004 rte_timer_stop_all(uint32_t timer_data_id, unsigned int *walk_lcores,
1005                    int nb_walk_lcores,
1006                    rte_timer_stop_all_cb_t f, void *f_arg)
1007 {
1008         int i;
1009         struct priv_timer *priv_timer;
1010         uint32_t walk_lcore;
1011         struct rte_timer *tim, *next_tim;
1012         struct rte_timer_data *timer_data;
1013
1014         TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
1015
1016         for (i = 0; i < nb_walk_lcores; i++) {
1017                 walk_lcore = walk_lcores[i];
1018                 priv_timer = &timer_data->priv_timer[walk_lcore];
1019
1020                 rte_spinlock_lock(&priv_timer->list_lock);
1021
1022                 for (tim = priv_timer->pending_head.sl_next[0];
1023                      tim != NULL;
1024                      tim = next_tim) {
1025                         next_tim = tim->sl_next[0];
1026
1027                         /* Call timer_stop with lock held */
1028                         __rte_timer_stop(tim, 1, timer_data);
1029
1030                         if (f)
1031                                 f(tim, f_arg);
1032                 }
1033
1034                 rte_spinlock_unlock(&priv_timer->list_lock);
1035         }
1036
1037         return 0;
1038 }
1039
1040 /* dump statistics about timers */
1041 static void
1042 __rte_timer_dump_stats(struct rte_timer_data *timer_data __rte_unused, FILE *f)
1043 {
1044 #ifdef RTE_LIBRTE_TIMER_DEBUG
1045         struct rte_timer_debug_stats sum;
1046         unsigned lcore_id;
1047         struct priv_timer *priv_timer = timer_data->priv_timer;
1048
1049         memset(&sum, 0, sizeof(sum));
1050         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1051                 sum.reset += priv_timer[lcore_id].stats.reset;
1052                 sum.stop += priv_timer[lcore_id].stats.stop;
1053                 sum.manage += priv_timer[lcore_id].stats.manage;
1054                 sum.pending += priv_timer[lcore_id].stats.pending;
1055         }
1056         fprintf(f, "Timer statistics:\n");
1057         fprintf(f, "  reset = %"PRIu64"\n", sum.reset);
1058         fprintf(f, "  stop = %"PRIu64"\n", sum.stop);
1059         fprintf(f, "  manage = %"PRIu64"\n", sum.manage);
1060         fprintf(f, "  pending = %"PRIu64"\n", sum.pending);
1061 #else
1062         fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n");
1063 #endif
1064 }
1065
1066 void
1067 rte_timer_dump_stats_v20(FILE *f)
1068 {
1069         __rte_timer_dump_stats(&default_timer_data, f);
1070 }
1071 VERSION_SYMBOL(rte_timer_dump_stats, _v20, 2.0);
1072
1073 int
1074 rte_timer_dump_stats_v1905(FILE *f)
1075 {
1076         return rte_timer_alt_dump_stats(default_data_id, f);
1077 }
1078 MAP_STATIC_SYMBOL(int rte_timer_dump_stats(FILE *f),
1079                   rte_timer_dump_stats_v1905);
1080 BIND_DEFAULT_SYMBOL(rte_timer_dump_stats, _v1905, 19.05);
1081
1082 int __rte_experimental
1083 rte_timer_alt_dump_stats(uint32_t timer_data_id __rte_unused, FILE *f)
1084 {
1085         struct rte_timer_data *timer_data;
1086
1087         TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
1088
1089         __rte_timer_dump_stats(timer_data, f);
1090
1091         return 0;
1092 }