event/octeontx2: support xstats
[dpdk.git] / lib / librte_timer / rte_timer.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <string.h>
6 #include <stdio.h>
7 #include <stdint.h>
8 #include <stdbool.h>
9 #include <inttypes.h>
10 #include <assert.h>
11 #include <sys/queue.h>
12
13 #include <rte_atomic.h>
14 #include <rte_common.h>
15 #include <rte_cycles.h>
16 #include <rte_per_lcore.h>
17 #include <rte_memory.h>
18 #include <rte_launch.h>
19 #include <rte_eal.h>
20 #include <rte_lcore.h>
21 #include <rte_branch_prediction.h>
22 #include <rte_spinlock.h>
23 #include <rte_random.h>
24 #include <rte_pause.h>
25 #include <rte_memzone.h>
26 #include <rte_malloc.h>
27 #include <rte_compat.h>
28 #include <rte_errno.h>
29
30 #include "rte_timer.h"
31
32 /**
33  * Per-lcore info for timers.
34  */
35 struct priv_timer {
36         struct rte_timer pending_head;  /**< dummy timer instance to head up list */
37         rte_spinlock_t list_lock;       /**< lock to protect list access */
38
39         /** per-core variable that true if a timer was updated on this
40          *  core since last reset of the variable */
41         int updated;
42
43         /** track the current depth of the skiplist */
44         unsigned curr_skiplist_depth;
45
46         unsigned prev_lcore;              /**< used for lcore round robin */
47
48         /** running timer on this lcore now */
49         struct rte_timer *running_tim;
50
51 #ifdef RTE_LIBRTE_TIMER_DEBUG
52         /** per-lcore statistics */
53         struct rte_timer_debug_stats stats;
54 #endif
55 } __rte_cache_aligned;
56
57 #define FL_ALLOCATED    (1 << 0)
58 struct rte_timer_data {
59         struct priv_timer priv_timer[RTE_MAX_LCORE];
60         uint8_t internal_flags;
61 };
62
63 #define RTE_MAX_DATA_ELS 64
64 static struct rte_timer_data *rte_timer_data_arr;
65 static const uint32_t default_data_id;
66 static uint32_t rte_timer_subsystem_initialized;
67
68 /* For maintaining older interfaces for a period */
69 static struct rte_timer_data default_timer_data;
70
71 /* when debug is enabled, store some statistics */
72 #ifdef RTE_LIBRTE_TIMER_DEBUG
73 #define __TIMER_STAT_ADD(priv_timer, name, n) do {                      \
74                 unsigned __lcore_id = rte_lcore_id();                   \
75                 if (__lcore_id < RTE_MAX_LCORE)                         \
76                         priv_timer[__lcore_id].stats.name += (n);       \
77         } while(0)
78 #else
79 #define __TIMER_STAT_ADD(priv_timer, name, n) do {} while (0)
80 #endif
81
82 static inline int
83 timer_data_valid(uint32_t id)
84 {
85         return !!(rte_timer_data_arr[id].internal_flags & FL_ALLOCATED);
86 }
87
88 /* validate ID and retrieve timer data pointer, or return error value */
89 #define TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, retval) do {    \
90         if (id >= RTE_MAX_DATA_ELS || !timer_data_valid(id))            \
91                 return retval;                                          \
92         timer_data = &rte_timer_data_arr[id];                           \
93 } while (0)
94
95 int
96 rte_timer_data_alloc(uint32_t *id_ptr)
97 {
98         int i;
99         struct rte_timer_data *data;
100
101         if (!rte_timer_subsystem_initialized)
102                 return -ENOMEM;
103
104         for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
105                 data = &rte_timer_data_arr[i];
106                 if (!(data->internal_flags & FL_ALLOCATED)) {
107                         data->internal_flags |= FL_ALLOCATED;
108
109                         if (id_ptr)
110                                 *id_ptr = i;
111
112                         return 0;
113                 }
114         }
115
116         return -ENOSPC;
117 }
118
119 int
120 rte_timer_data_dealloc(uint32_t id)
121 {
122         struct rte_timer_data *timer_data;
123         TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, -EINVAL);
124
125         timer_data->internal_flags &= ~(FL_ALLOCATED);
126
127         return 0;
128 }
129
130 void
131 rte_timer_subsystem_init_v20(void)
132 {
133         unsigned lcore_id;
134         struct priv_timer *priv_timer = default_timer_data.priv_timer;
135
136         /* since priv_timer is static, it's zeroed by default, so only init some
137          * fields.
138          */
139         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++) {
140                 rte_spinlock_init(&priv_timer[lcore_id].list_lock);
141                 priv_timer[lcore_id].prev_lcore = lcore_id;
142         }
143 }
144 VERSION_SYMBOL(rte_timer_subsystem_init, _v20, 2.0);
145
146 /* Init the timer library. Allocate an array of timer data structs in shared
147  * memory, and allocate the zeroth entry for use with original timer
148  * APIs. Since the intersection of the sets of lcore ids in primary and
149  * secondary processes should be empty, the zeroth entry can be shared by
150  * multiple processes.
151  */
152 int
153 rte_timer_subsystem_init_v1905(void)
154 {
155         const struct rte_memzone *mz;
156         struct rte_timer_data *data;
157         int i, lcore_id;
158         static const char *mz_name = "rte_timer_mz";
159         const size_t data_arr_size =
160                                 RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr);
161         bool do_full_init = true;
162
163         if (rte_timer_subsystem_initialized)
164                 return -EALREADY;
165
166 reserve:
167         rte_errno = 0;
168         mz = rte_memzone_reserve_aligned(mz_name, data_arr_size, SOCKET_ID_ANY,
169                                          0, RTE_CACHE_LINE_SIZE);
170         if (mz == NULL) {
171                 if (rte_errno == EEXIST) {
172                         mz = rte_memzone_lookup(mz_name);
173                         if (mz == NULL)
174                                 goto reserve;
175
176                         do_full_init = false;
177                 } else
178                         return -ENOMEM;
179         }
180
181         rte_timer_data_arr = mz->addr;
182
183         if (do_full_init) {
184                 for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
185                         data = &rte_timer_data_arr[i];
186
187                         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE;
188                              lcore_id++) {
189                                 rte_spinlock_init(
190                                         &data->priv_timer[lcore_id].list_lock);
191                                 data->priv_timer[lcore_id].prev_lcore =
192                                         lcore_id;
193                         }
194                 }
195         }
196
197         rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED;
198
199         rte_timer_subsystem_initialized = 1;
200
201         return 0;
202 }
203 MAP_STATIC_SYMBOL(int rte_timer_subsystem_init(void),
204                   rte_timer_subsystem_init_v1905);
205 BIND_DEFAULT_SYMBOL(rte_timer_subsystem_init, _v1905, 19.05);
206
207 void
208 rte_timer_subsystem_finalize(void)
209 {
210         if (!rte_timer_subsystem_initialized)
211                 return;
212
213         rte_timer_subsystem_initialized = 0;
214 }
215
216 /* Initialize the timer handle tim for use */
217 void
218 rte_timer_init(struct rte_timer *tim)
219 {
220         union rte_timer_status status;
221
222         status.state = RTE_TIMER_STOP;
223         status.owner = RTE_TIMER_NO_OWNER;
224         tim->status.u32 = status.u32;
225 }
226
227 /*
228  * if timer is pending or stopped (or running on the same core than
229  * us), mark timer as configuring, and on success return the previous
230  * status of the timer
231  */
232 static int
233 timer_set_config_state(struct rte_timer *tim,
234                        union rte_timer_status *ret_prev_status,
235                        struct priv_timer *priv_timer)
236 {
237         union rte_timer_status prev_status, status;
238         int success = 0;
239         unsigned lcore_id;
240
241         lcore_id = rte_lcore_id();
242
243         /* wait that the timer is in correct status before update,
244          * and mark it as being configured */
245         while (success == 0) {
246                 prev_status.u32 = tim->status.u32;
247
248                 /* timer is running on another core
249                  * or ready to run on local core, exit
250                  */
251                 if (prev_status.state == RTE_TIMER_RUNNING &&
252                     (prev_status.owner != (uint16_t)lcore_id ||
253                      tim != priv_timer[lcore_id].running_tim))
254                         return -1;
255
256                 /* timer is being configured on another core */
257                 if (prev_status.state == RTE_TIMER_CONFIG)
258                         return -1;
259
260                 /* here, we know that timer is stopped or pending,
261                  * mark it atomically as being configured */
262                 status.state = RTE_TIMER_CONFIG;
263                 status.owner = (int16_t)lcore_id;
264                 success = rte_atomic32_cmpset(&tim->status.u32,
265                                               prev_status.u32,
266                                               status.u32);
267         }
268
269         ret_prev_status->u32 = prev_status.u32;
270         return 0;
271 }
272
273 /*
274  * if timer is pending, mark timer as running
275  */
276 static int
277 timer_set_running_state(struct rte_timer *tim)
278 {
279         union rte_timer_status prev_status, status;
280         unsigned lcore_id = rte_lcore_id();
281         int success = 0;
282
283         /* wait that the timer is in correct status before update,
284          * and mark it as running */
285         while (success == 0) {
286                 prev_status.u32 = tim->status.u32;
287
288                 /* timer is not pending anymore */
289                 if (prev_status.state != RTE_TIMER_PENDING)
290                         return -1;
291
292                 /* here, we know that timer is stopped or pending,
293                  * mark it atomically as being configured */
294                 status.state = RTE_TIMER_RUNNING;
295                 status.owner = (int16_t)lcore_id;
296                 success = rte_atomic32_cmpset(&tim->status.u32,
297                                               prev_status.u32,
298                                               status.u32);
299         }
300
301         return 0;
302 }
303
304 /*
305  * Return a skiplist level for a new entry.
306  * This probabilistically gives a level with p=1/4 that an entry at level n
307  * will also appear at level n+1.
308  */
309 static uint32_t
310 timer_get_skiplist_level(unsigned curr_depth)
311 {
312 #ifdef RTE_LIBRTE_TIMER_DEBUG
313         static uint32_t i, count = 0;
314         static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0};
315 #endif
316
317         /* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1,
318          * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest
319          * bit position of a (pseudo)random number.
320          */
321         uint32_t rand = rte_rand() & (UINT32_MAX - 1);
322         uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2;
323
324         /* limit the levels used to one above our current level, so we don't,
325          * for instance, have a level 0 and a level 7 without anything between
326          */
327         if (level > curr_depth)
328                 level = curr_depth;
329         if (level >= MAX_SKIPLIST_DEPTH)
330                 level = MAX_SKIPLIST_DEPTH-1;
331 #ifdef RTE_LIBRTE_TIMER_DEBUG
332         count ++;
333         levels[level]++;
334         if (count % 10000 == 0)
335                 for (i = 0; i < MAX_SKIPLIST_DEPTH; i++)
336                         printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]);
337 #endif
338         return level;
339 }
340
341 /*
342  * For a given time value, get the entries at each level which
343  * are <= that time value.
344  */
345 static void
346 timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore,
347                        struct rte_timer **prev, struct priv_timer *priv_timer)
348 {
349         unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth;
350         prev[lvl] = &priv_timer[tim_lcore].pending_head;
351         while(lvl != 0) {
352                 lvl--;
353                 prev[lvl] = prev[lvl+1];
354                 while (prev[lvl]->sl_next[lvl] &&
355                                 prev[lvl]->sl_next[lvl]->expire <= time_val)
356                         prev[lvl] = prev[lvl]->sl_next[lvl];
357         }
358 }
359
360 /*
361  * Given a timer node in the skiplist, find the previous entries for it at
362  * all skiplist levels.
363  */
364 static void
365 timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
366                                 struct rte_timer **prev,
367                                 struct priv_timer *priv_timer)
368 {
369         int i;
370
371         /* to get a specific entry in the list, look for just lower than the time
372          * values, and then increment on each level individually if necessary
373          */
374         timer_get_prev_entries(tim->expire - 1, tim_lcore, prev, priv_timer);
375         for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) {
376                 while (prev[i]->sl_next[i] != NULL &&
377                                 prev[i]->sl_next[i] != tim &&
378                                 prev[i]->sl_next[i]->expire <= tim->expire)
379                         prev[i] = prev[i]->sl_next[i];
380         }
381 }
382
383 /* call with lock held as necessary
384  * add in list
385  * timer must be in config state
386  * timer must not be in a list
387  */
388 static void
389 timer_add(struct rte_timer *tim, unsigned int tim_lcore,
390           struct priv_timer *priv_timer)
391 {
392         unsigned lvl;
393         struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
394
395         /* find where exactly this element goes in the list of elements
396          * for each depth. */
397         timer_get_prev_entries(tim->expire, tim_lcore, prev, priv_timer);
398
399         /* now assign it a new level and add at that level */
400         const unsigned tim_level = timer_get_skiplist_level(
401                         priv_timer[tim_lcore].curr_skiplist_depth);
402         if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth)
403                 priv_timer[tim_lcore].curr_skiplist_depth++;
404
405         lvl = tim_level;
406         while (lvl > 0) {
407                 tim->sl_next[lvl] = prev[lvl]->sl_next[lvl];
408                 prev[lvl]->sl_next[lvl] = tim;
409                 lvl--;
410         }
411         tim->sl_next[0] = prev[0]->sl_next[0];
412         prev[0]->sl_next[0] = tim;
413
414         /* save the lowest list entry into the expire field of the dummy hdr
415          * NOTE: this is not atomic on 32-bit*/
416         priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
417                         pending_head.sl_next[0]->expire;
418 }
419
420 /*
421  * del from list, lock if needed
422  * timer must be in config state
423  * timer must be in a list
424  */
425 static void
426 timer_del(struct rte_timer *tim, union rte_timer_status prev_status,
427           int local_is_locked, struct priv_timer *priv_timer)
428 {
429         unsigned lcore_id = rte_lcore_id();
430         unsigned prev_owner = prev_status.owner;
431         int i;
432         struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
433
434         /* if timer needs is pending another core, we need to lock the
435          * list; if it is on local core, we need to lock if we are not
436          * called from rte_timer_manage() */
437         if (prev_owner != lcore_id || !local_is_locked)
438                 rte_spinlock_lock(&priv_timer[prev_owner].list_lock);
439
440         /* save the lowest list entry into the expire field of the dummy hdr.
441          * NOTE: this is not atomic on 32-bit */
442         if (tim == priv_timer[prev_owner].pending_head.sl_next[0])
443                 priv_timer[prev_owner].pending_head.expire =
444                                 ((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire);
445
446         /* adjust pointers from previous entries to point past this */
447         timer_get_prev_entries_for_node(tim, prev_owner, prev, priv_timer);
448         for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) {
449                 if (prev[i]->sl_next[i] == tim)
450                         prev[i]->sl_next[i] = tim->sl_next[i];
451         }
452
453         /* in case we deleted last entry at a level, adjust down max level */
454         for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--)
455                 if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL)
456                         priv_timer[prev_owner].curr_skiplist_depth --;
457                 else
458                         break;
459
460         if (prev_owner != lcore_id || !local_is_locked)
461                 rte_spinlock_unlock(&priv_timer[prev_owner].list_lock);
462 }
463
464 /* Reset and start the timer associated with the timer handle (private func) */
465 static int
466 __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
467                   uint64_t period, unsigned tim_lcore,
468                   rte_timer_cb_t fct, void *arg,
469                   int local_is_locked,
470                   struct rte_timer_data *timer_data)
471 {
472         union rte_timer_status prev_status, status;
473         int ret;
474         unsigned lcore_id = rte_lcore_id();
475         struct priv_timer *priv_timer = timer_data->priv_timer;
476
477         /* round robin for tim_lcore */
478         if (tim_lcore == (unsigned)LCORE_ID_ANY) {
479                 if (lcore_id < RTE_MAX_LCORE) {
480                         /* EAL thread with valid lcore_id */
481                         tim_lcore = rte_get_next_lcore(
482                                 priv_timer[lcore_id].prev_lcore,
483                                 0, 1);
484                         priv_timer[lcore_id].prev_lcore = tim_lcore;
485                 } else
486                         /* non-EAL thread do not run rte_timer_manage(),
487                          * so schedule the timer on the first enabled lcore. */
488                         tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1);
489         }
490
491         /* wait that the timer is in correct status before update,
492          * and mark it as being configured */
493         ret = timer_set_config_state(tim, &prev_status, priv_timer);
494         if (ret < 0)
495                 return -1;
496
497         __TIMER_STAT_ADD(priv_timer, reset, 1);
498         if (prev_status.state == RTE_TIMER_RUNNING &&
499             lcore_id < RTE_MAX_LCORE) {
500                 priv_timer[lcore_id].updated = 1;
501         }
502
503         /* remove it from list */
504         if (prev_status.state == RTE_TIMER_PENDING) {
505                 timer_del(tim, prev_status, local_is_locked, priv_timer);
506                 __TIMER_STAT_ADD(priv_timer, pending, -1);
507         }
508
509         tim->period = period;
510         tim->expire = expire;
511         tim->f = fct;
512         tim->arg = arg;
513
514         /* if timer needs to be scheduled on another core, we need to
515          * lock the destination list; if it is on local core, we need to lock if
516          * we are not called from rte_timer_manage()
517          */
518         if (tim_lcore != lcore_id || !local_is_locked)
519                 rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
520
521         __TIMER_STAT_ADD(priv_timer, pending, 1);
522         timer_add(tim, tim_lcore, priv_timer);
523
524         /* update state: as we are in CONFIG state, only us can modify
525          * the state so we don't need to use cmpset() here */
526         rte_wmb();
527         status.state = RTE_TIMER_PENDING;
528         status.owner = (int16_t)tim_lcore;
529         tim->status.u32 = status.u32;
530
531         if (tim_lcore != lcore_id || !local_is_locked)
532                 rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
533
534         return 0;
535 }
536
537 /* Reset and start the timer associated with the timer handle tim */
538 int
539 rte_timer_reset_v20(struct rte_timer *tim, uint64_t ticks,
540                     enum rte_timer_type type, unsigned int tim_lcore,
541                     rte_timer_cb_t fct, void *arg)
542 {
543         uint64_t cur_time = rte_get_timer_cycles();
544         uint64_t period;
545
546         if (unlikely((tim_lcore != (unsigned)LCORE_ID_ANY) &&
547                         !(rte_lcore_is_enabled(tim_lcore) ||
548                           rte_lcore_has_role(tim_lcore, ROLE_SERVICE))))
549                 return -1;
550
551         if (type == PERIODICAL)
552                 period = ticks;
553         else
554                 period = 0;
555
556         return __rte_timer_reset(tim,  cur_time + ticks, period, tim_lcore,
557                           fct, arg, 0, &default_timer_data);
558 }
559 VERSION_SYMBOL(rte_timer_reset, _v20, 2.0);
560
561 int
562 rte_timer_reset_v1905(struct rte_timer *tim, uint64_t ticks,
563                       enum rte_timer_type type, unsigned int tim_lcore,
564                       rte_timer_cb_t fct, void *arg)
565 {
566         return rte_timer_alt_reset(default_data_id, tim, ticks, type,
567                                    tim_lcore, fct, arg);
568 }
569 MAP_STATIC_SYMBOL(int rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
570                                       enum rte_timer_type type,
571                                       unsigned int tim_lcore,
572                                       rte_timer_cb_t fct, void *arg),
573                   rte_timer_reset_v1905);
574 BIND_DEFAULT_SYMBOL(rte_timer_reset, _v1905, 19.05);
575
576 int
577 rte_timer_alt_reset(uint32_t timer_data_id, struct rte_timer *tim,
578                     uint64_t ticks, enum rte_timer_type type,
579                     unsigned int tim_lcore, rte_timer_cb_t fct, void *arg)
580 {
581         uint64_t cur_time = rte_get_timer_cycles();
582         uint64_t period;
583         struct rte_timer_data *timer_data;
584
585         TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
586
587         if (type == PERIODICAL)
588                 period = ticks;
589         else
590                 period = 0;
591
592         return __rte_timer_reset(tim,  cur_time + ticks, period, tim_lcore,
593                                  fct, arg, 0, timer_data);
594 }
595
596 /* loop until rte_timer_reset() succeed */
597 void
598 rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks,
599                      enum rte_timer_type type, unsigned tim_lcore,
600                      rte_timer_cb_t fct, void *arg)
601 {
602         while (rte_timer_reset(tim, ticks, type, tim_lcore,
603                                fct, arg) != 0)
604                 rte_pause();
605 }
606
607 static int
608 __rte_timer_stop(struct rte_timer *tim, int local_is_locked,
609                  struct rte_timer_data *timer_data)
610 {
611         union rte_timer_status prev_status, status;
612         unsigned lcore_id = rte_lcore_id();
613         int ret;
614         struct priv_timer *priv_timer = timer_data->priv_timer;
615
616         /* wait that the timer is in correct status before update,
617          * and mark it as being configured */
618         ret = timer_set_config_state(tim, &prev_status, priv_timer);
619         if (ret < 0)
620                 return -1;
621
622         __TIMER_STAT_ADD(priv_timer, stop, 1);
623         if (prev_status.state == RTE_TIMER_RUNNING &&
624             lcore_id < RTE_MAX_LCORE) {
625                 priv_timer[lcore_id].updated = 1;
626         }
627
628         /* remove it from list */
629         if (prev_status.state == RTE_TIMER_PENDING) {
630                 timer_del(tim, prev_status, local_is_locked, priv_timer);
631                 __TIMER_STAT_ADD(priv_timer, pending, -1);
632         }
633
634         /* mark timer as stopped */
635         rte_wmb();
636         status.state = RTE_TIMER_STOP;
637         status.owner = RTE_TIMER_NO_OWNER;
638         tim->status.u32 = status.u32;
639
640         return 0;
641 }
642
643 /* Stop the timer associated with the timer handle tim */
644 int
645 rte_timer_stop_v20(struct rte_timer *tim)
646 {
647         return __rte_timer_stop(tim, 0, &default_timer_data);
648 }
649 VERSION_SYMBOL(rte_timer_stop, _v20, 2.0);
650
651 int
652 rte_timer_stop_v1905(struct rte_timer *tim)
653 {
654         return rte_timer_alt_stop(default_data_id, tim);
655 }
656 MAP_STATIC_SYMBOL(int rte_timer_stop(struct rte_timer *tim),
657                   rte_timer_stop_v1905);
658 BIND_DEFAULT_SYMBOL(rte_timer_stop, _v1905, 19.05);
659
660 int
661 rte_timer_alt_stop(uint32_t timer_data_id, struct rte_timer *tim)
662 {
663         struct rte_timer_data *timer_data;
664
665         TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
666
667         return __rte_timer_stop(tim, 0, timer_data);
668 }
669
670 /* loop until rte_timer_stop() succeed */
671 void
672 rte_timer_stop_sync(struct rte_timer *tim)
673 {
674         while (rte_timer_stop(tim) != 0)
675                 rte_pause();
676 }
677
678 /* Test the PENDING status of the timer handle tim */
679 int
680 rte_timer_pending(struct rte_timer *tim)
681 {
682         return tim->status.state == RTE_TIMER_PENDING;
683 }
684
685 /* must be called periodically, run all timer that expired */
686 static void
687 __rte_timer_manage(struct rte_timer_data *timer_data)
688 {
689         union rte_timer_status status;
690         struct rte_timer *tim, *next_tim;
691         struct rte_timer *run_first_tim, **pprev;
692         unsigned lcore_id = rte_lcore_id();
693         struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
694         uint64_t cur_time;
695         int i, ret;
696         struct priv_timer *priv_timer = timer_data->priv_timer;
697
698         /* timer manager only runs on EAL thread with valid lcore_id */
699         assert(lcore_id < RTE_MAX_LCORE);
700
701         __TIMER_STAT_ADD(priv_timer, manage, 1);
702         /* optimize for the case where per-cpu list is empty */
703         if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL)
704                 return;
705         cur_time = rte_get_timer_cycles();
706
707 #ifdef RTE_ARCH_64
708         /* on 64-bit the value cached in the pending_head.expired will be
709          * updated atomically, so we can consult that for a quick check here
710          * outside the lock */
711         if (likely(priv_timer[lcore_id].pending_head.expire > cur_time))
712                 return;
713 #endif
714
715         /* browse ordered list, add expired timers in 'expired' list */
716         rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
717
718         /* if nothing to do just unlock and return */
719         if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL ||
720             priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) {
721                 rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
722                 return;
723         }
724
725         /* save start of list of expired timers */
726         tim = priv_timer[lcore_id].pending_head.sl_next[0];
727
728         /* break the existing list at current time point */
729         timer_get_prev_entries(cur_time, lcore_id, prev, priv_timer);
730         for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) {
731                 if (prev[i] == &priv_timer[lcore_id].pending_head)
732                         continue;
733                 priv_timer[lcore_id].pending_head.sl_next[i] =
734                     prev[i]->sl_next[i];
735                 if (prev[i]->sl_next[i] == NULL)
736                         priv_timer[lcore_id].curr_skiplist_depth--;
737                 prev[i] ->sl_next[i] = NULL;
738         }
739
740         /* transition run-list from PENDING to RUNNING */
741         run_first_tim = tim;
742         pprev = &run_first_tim;
743
744         for ( ; tim != NULL; tim = next_tim) {
745                 next_tim = tim->sl_next[0];
746
747                 ret = timer_set_running_state(tim);
748                 if (likely(ret == 0)) {
749                         pprev = &tim->sl_next[0];
750                 } else {
751                         /* another core is trying to re-config this one,
752                          * remove it from local expired list
753                          */
754                         *pprev = next_tim;
755                 }
756         }
757
758         /* update the next to expire timer value */
759         priv_timer[lcore_id].pending_head.expire =
760             (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
761                 priv_timer[lcore_id].pending_head.sl_next[0]->expire;
762
763         rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
764
765         /* now scan expired list and call callbacks */
766         for (tim = run_first_tim; tim != NULL; tim = next_tim) {
767                 next_tim = tim->sl_next[0];
768                 priv_timer[lcore_id].updated = 0;
769                 priv_timer[lcore_id].running_tim = tim;
770
771                 /* execute callback function with list unlocked */
772                 tim->f(tim, tim->arg);
773
774                 __TIMER_STAT_ADD(priv_timer, pending, -1);
775                 /* the timer was stopped or reloaded by the callback
776                  * function, we have nothing to do here */
777                 if (priv_timer[lcore_id].updated == 1)
778                         continue;
779
780                 if (tim->period == 0) {
781                         /* remove from done list and mark timer as stopped */
782                         status.state = RTE_TIMER_STOP;
783                         status.owner = RTE_TIMER_NO_OWNER;
784                         rte_wmb();
785                         tim->status.u32 = status.u32;
786                 }
787                 else {
788                         /* keep it in list and mark timer as pending */
789                         rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
790                         status.state = RTE_TIMER_PENDING;
791                         __TIMER_STAT_ADD(priv_timer, pending, 1);
792                         status.owner = (int16_t)lcore_id;
793                         rte_wmb();
794                         tim->status.u32 = status.u32;
795                         __rte_timer_reset(tim, tim->expire + tim->period,
796                                 tim->period, lcore_id, tim->f, tim->arg, 1,
797                                 timer_data);
798                         rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
799                 }
800         }
801         priv_timer[lcore_id].running_tim = NULL;
802 }
803
804 void
805 rte_timer_manage_v20(void)
806 {
807         __rte_timer_manage(&default_timer_data);
808 }
809 VERSION_SYMBOL(rte_timer_manage, _v20, 2.0);
810
811 int
812 rte_timer_manage_v1905(void)
813 {
814         struct rte_timer_data *timer_data;
815
816         TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL);
817
818         __rte_timer_manage(timer_data);
819
820         return 0;
821 }
822 MAP_STATIC_SYMBOL(int rte_timer_manage(void), rte_timer_manage_v1905);
823 BIND_DEFAULT_SYMBOL(rte_timer_manage, _v1905, 19.05);
824
825 int
826 rte_timer_alt_manage(uint32_t timer_data_id,
827                      unsigned int *poll_lcores,
828                      int nb_poll_lcores,
829                      rte_timer_alt_manage_cb_t f)
830 {
831         unsigned int default_poll_lcores[] = {rte_lcore_id()};
832         union rte_timer_status status;
833         struct rte_timer *tim, *next_tim, **pprev;
834         struct rte_timer *run_first_tims[RTE_MAX_LCORE];
835         unsigned int this_lcore = rte_lcore_id();
836         struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
837         uint64_t cur_time;
838         int i, j, ret;
839         int nb_runlists = 0;
840         struct rte_timer_data *data;
841         struct priv_timer *privp;
842         uint32_t poll_lcore;
843
844         TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, data, -EINVAL);
845
846         /* timer manager only runs on EAL thread with valid lcore_id */
847         assert(this_lcore < RTE_MAX_LCORE);
848
849         __TIMER_STAT_ADD(data->priv_timer, manage, 1);
850
851         if (poll_lcores == NULL) {
852                 poll_lcores = default_poll_lcores;
853                 nb_poll_lcores = RTE_DIM(default_poll_lcores);
854         }
855
856         for (i = 0; i < nb_poll_lcores; i++) {
857                 poll_lcore = poll_lcores[i];
858                 privp = &data->priv_timer[poll_lcore];
859
860                 /* optimize for the case where per-cpu list is empty */
861                 if (privp->pending_head.sl_next[0] == NULL)
862                         continue;
863                 cur_time = rte_get_timer_cycles();
864
865 #ifdef RTE_ARCH_64
866                 /* on 64-bit the value cached in the pending_head.expired will
867                  * be updated atomically, so we can consult that for a quick
868                  * check here outside the lock
869                  */
870                 if (likely(privp->pending_head.expire > cur_time))
871                         continue;
872 #endif
873
874                 /* browse ordered list, add expired timers in 'expired' list */
875                 rte_spinlock_lock(&privp->list_lock);
876
877                 /* if nothing to do just unlock and return */
878                 if (privp->pending_head.sl_next[0] == NULL ||
879                     privp->pending_head.sl_next[0]->expire > cur_time) {
880                         rte_spinlock_unlock(&privp->list_lock);
881                         continue;
882                 }
883
884                 /* save start of list of expired timers */
885                 tim = privp->pending_head.sl_next[0];
886
887                 /* break the existing list at current time point */
888                 timer_get_prev_entries(cur_time, poll_lcore, prev,
889                                        data->priv_timer);
890                 for (j = privp->curr_skiplist_depth - 1; j >= 0; j--) {
891                         if (prev[j] == &privp->pending_head)
892                                 continue;
893                         privp->pending_head.sl_next[j] =
894                                 prev[j]->sl_next[j];
895                         if (prev[j]->sl_next[j] == NULL)
896                                 privp->curr_skiplist_depth--;
897
898                         prev[j]->sl_next[j] = NULL;
899                 }
900
901                 /* transition run-list from PENDING to RUNNING */
902                 run_first_tims[nb_runlists] = tim;
903                 pprev = &run_first_tims[nb_runlists];
904                 nb_runlists++;
905
906                 for ( ; tim != NULL; tim = next_tim) {
907                         next_tim = tim->sl_next[0];
908
909                         ret = timer_set_running_state(tim);
910                         if (likely(ret == 0)) {
911                                 pprev = &tim->sl_next[0];
912                         } else {
913                                 /* another core is trying to re-config this one,
914                                  * remove it from local expired list
915                                  */
916                                 *pprev = next_tim;
917                         }
918                 }
919
920                 /* update the next to expire timer value */
921                 privp->pending_head.expire =
922                     (privp->pending_head.sl_next[0] == NULL) ? 0 :
923                         privp->pending_head.sl_next[0]->expire;
924
925                 rte_spinlock_unlock(&privp->list_lock);
926         }
927
928         /* Now process the run lists */
929         while (1) {
930                 bool done = true;
931                 uint64_t min_expire = UINT64_MAX;
932                 int min_idx = 0;
933
934                 /* Find the next oldest timer to process */
935                 for (i = 0; i < nb_runlists; i++) {
936                         tim = run_first_tims[i];
937
938                         if (tim != NULL && tim->expire < min_expire) {
939                                 min_expire = tim->expire;
940                                 min_idx = i;
941                                 done = false;
942                         }
943                 }
944
945                 if (done)
946                         break;
947
948                 tim = run_first_tims[min_idx];
949
950                 /* Move down the runlist from which we picked a timer to
951                  * execute
952                  */
953                 run_first_tims[min_idx] = run_first_tims[min_idx]->sl_next[0];
954
955                 data->priv_timer[this_lcore].updated = 0;
956                 data->priv_timer[this_lcore].running_tim = tim;
957
958                 /* Call the provided callback function */
959                 f(tim);
960
961                 __TIMER_STAT_ADD(data->priv_timer, pending, -1);
962
963                 /* the timer was stopped or reloaded by the callback
964                  * function, we have nothing to do here
965                  */
966                 if (data->priv_timer[this_lcore].updated == 1)
967                         continue;
968
969                 if (tim->period == 0) {
970                         /* remove from done list and mark timer as stopped */
971                         status.state = RTE_TIMER_STOP;
972                         status.owner = RTE_TIMER_NO_OWNER;
973                         rte_wmb();
974                         tim->status.u32 = status.u32;
975                 } else {
976                         /* keep it in list and mark timer as pending */
977                         rte_spinlock_lock(
978                                 &data->priv_timer[this_lcore].list_lock);
979                         status.state = RTE_TIMER_PENDING;
980                         __TIMER_STAT_ADD(data->priv_timer, pending, 1);
981                         status.owner = (int16_t)this_lcore;
982                         rte_wmb();
983                         tim->status.u32 = status.u32;
984                         __rte_timer_reset(tim, tim->expire + tim->period,
985                                 tim->period, this_lcore, tim->f, tim->arg, 1,
986                                 data);
987                         rte_spinlock_unlock(
988                                 &data->priv_timer[this_lcore].list_lock);
989                 }
990
991                 data->priv_timer[this_lcore].running_tim = NULL;
992         }
993
994         return 0;
995 }
996
997 /* Walk pending lists, stopping timers and calling user-specified function */
998 int
999 rte_timer_stop_all(uint32_t timer_data_id, unsigned int *walk_lcores,
1000                    int nb_walk_lcores,
1001                    rte_timer_stop_all_cb_t f, void *f_arg)
1002 {
1003         int i;
1004         struct priv_timer *priv_timer;
1005         uint32_t walk_lcore;
1006         struct rte_timer *tim, *next_tim;
1007         struct rte_timer_data *timer_data;
1008
1009         TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
1010
1011         for (i = 0; i < nb_walk_lcores; i++) {
1012                 walk_lcore = walk_lcores[i];
1013                 priv_timer = &timer_data->priv_timer[walk_lcore];
1014
1015                 rte_spinlock_lock(&priv_timer->list_lock);
1016
1017                 for (tim = priv_timer->pending_head.sl_next[0];
1018                      tim != NULL;
1019                      tim = next_tim) {
1020                         next_tim = tim->sl_next[0];
1021
1022                         /* Call timer_stop with lock held */
1023                         __rte_timer_stop(tim, 1, timer_data);
1024
1025                         if (f)
1026                                 f(tim, f_arg);
1027                 }
1028
1029                 rte_spinlock_unlock(&priv_timer->list_lock);
1030         }
1031
1032         return 0;
1033 }
1034
1035 /* dump statistics about timers */
1036 static void
1037 __rte_timer_dump_stats(struct rte_timer_data *timer_data __rte_unused, FILE *f)
1038 {
1039 #ifdef RTE_LIBRTE_TIMER_DEBUG
1040         struct rte_timer_debug_stats sum;
1041         unsigned lcore_id;
1042         struct priv_timer *priv_timer = timer_data->priv_timer;
1043
1044         memset(&sum, 0, sizeof(sum));
1045         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1046                 sum.reset += priv_timer[lcore_id].stats.reset;
1047                 sum.stop += priv_timer[lcore_id].stats.stop;
1048                 sum.manage += priv_timer[lcore_id].stats.manage;
1049                 sum.pending += priv_timer[lcore_id].stats.pending;
1050         }
1051         fprintf(f, "Timer statistics:\n");
1052         fprintf(f, "  reset = %"PRIu64"\n", sum.reset);
1053         fprintf(f, "  stop = %"PRIu64"\n", sum.stop);
1054         fprintf(f, "  manage = %"PRIu64"\n", sum.manage);
1055         fprintf(f, "  pending = %"PRIu64"\n", sum.pending);
1056 #else
1057         fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n");
1058 #endif
1059 }
1060
1061 void
1062 rte_timer_dump_stats_v20(FILE *f)
1063 {
1064         __rte_timer_dump_stats(&default_timer_data, f);
1065 }
1066 VERSION_SYMBOL(rte_timer_dump_stats, _v20, 2.0);
1067
1068 int
1069 rte_timer_dump_stats_v1905(FILE *f)
1070 {
1071         return rte_timer_alt_dump_stats(default_data_id, f);
1072 }
1073 MAP_STATIC_SYMBOL(int rte_timer_dump_stats(FILE *f),
1074                   rte_timer_dump_stats_v1905);
1075 BIND_DEFAULT_SYMBOL(rte_timer_dump_stats, _v1905, 19.05);
1076
1077 int
1078 rte_timer_alt_dump_stats(uint32_t timer_data_id __rte_unused, FILE *f)
1079 {
1080         struct rte_timer_data *timer_data;
1081
1082         TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
1083
1084         __rte_timer_dump_stats(timer_data, f);
1085
1086         return 0;
1087 }