eventdev: fix race condition on timer list counter
[dpdk.git] / lib / librte_eventdev / rte_event_timer_adapter.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017-2018 Intel Corporation.
3  * All rights reserved.
4  */
5
6 #include <string.h>
7 #include <inttypes.h>
8 #include <stdbool.h>
9 #include <sys/queue.h>
10
11 #include <rte_memzone.h>
12 #include <rte_memory.h>
13 #include <rte_dev.h>
14 #include <rte_errno.h>
15 #include <rte_malloc.h>
16 #include <rte_ring.h>
17 #include <rte_mempool.h>
18 #include <rte_common.h>
19 #include <rte_timer.h>
20 #include <rte_service_component.h>
21 #include <rte_cycles.h>
22
23 #include "rte_eventdev.h"
24 #include "rte_eventdev_pmd.h"
25 #include "rte_eventdev_trace.h"
26 #include "rte_event_timer_adapter.h"
27 #include "rte_event_timer_adapter_pmd.h"
28
29 #define DATA_MZ_NAME_MAX_LEN 64
30 #define DATA_MZ_NAME_FORMAT "rte_event_timer_adapter_data_%d"
31
32 RTE_LOG_REGISTER(evtim_logtype, lib.eventdev.adapter.timer, NOTICE);
33 RTE_LOG_REGISTER(evtim_buffer_logtype, lib.eventdev.adapter.timer, NOTICE);
34 RTE_LOG_REGISTER(evtim_svc_logtype, lib.eventdev.adapter.timer.svc, NOTICE);
35
36 static struct rte_event_timer_adapter adapters[RTE_EVENT_TIMER_ADAPTER_NUM_MAX];
37
38 static const struct rte_event_timer_adapter_ops swtim_ops;
39
40 #define EVTIM_LOG(level, logtype, ...) \
41         rte_log(RTE_LOG_ ## level, logtype, \
42                 RTE_FMT("EVTIMER: %s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) \
43                         "\n", __func__, __LINE__, RTE_FMT_TAIL(__VA_ARGS__,)))
44
45 #define EVTIM_LOG_ERR(...) EVTIM_LOG(ERR, evtim_logtype, __VA_ARGS__)
46
47 #ifdef RTE_LIBRTE_EVENTDEV_DEBUG
48 #define EVTIM_LOG_DBG(...) \
49         EVTIM_LOG(DEBUG, evtim_logtype, __VA_ARGS__)
50 #define EVTIM_BUF_LOG_DBG(...) \
51         EVTIM_LOG(DEBUG, evtim_buffer_logtype, __VA_ARGS__)
52 #define EVTIM_SVC_LOG_DBG(...) \
53         EVTIM_LOG(DEBUG, evtim_svc_logtype, __VA_ARGS__)
54 #else
55 #define EVTIM_LOG_DBG(...) (void)0
56 #define EVTIM_BUF_LOG_DBG(...) (void)0
57 #define EVTIM_SVC_LOG_DBG(...) (void)0
58 #endif
59
60 static int
61 default_port_conf_cb(uint16_t id, uint8_t event_dev_id, uint8_t *event_port_id,
62                      void *conf_arg)
63 {
64         struct rte_event_timer_adapter *adapter;
65         struct rte_eventdev *dev;
66         struct rte_event_dev_config dev_conf;
67         struct rte_event_port_conf *port_conf, def_port_conf = {0};
68         int started;
69         uint8_t port_id;
70         uint8_t dev_id;
71         int ret;
72
73         RTE_SET_USED(event_dev_id);
74
75         adapter = &adapters[id];
76         dev = &rte_eventdevs[adapter->data->event_dev_id];
77         dev_id = dev->data->dev_id;
78         dev_conf = dev->data->dev_conf;
79
80         started = dev->data->dev_started;
81         if (started)
82                 rte_event_dev_stop(dev_id);
83
84         port_id = dev_conf.nb_event_ports;
85         dev_conf.nb_event_ports += 1;
86         ret = rte_event_dev_configure(dev_id, &dev_conf);
87         if (ret < 0) {
88                 EVTIM_LOG_ERR("failed to configure event dev %u\n", dev_id);
89                 if (started)
90                         if (rte_event_dev_start(dev_id))
91                                 return -EIO;
92
93                 return ret;
94         }
95
96         if (conf_arg != NULL)
97                 port_conf = conf_arg;
98         else {
99                 port_conf = &def_port_conf;
100                 ret = rte_event_port_default_conf_get(dev_id, port_id,
101                                                       port_conf);
102                 if (ret < 0)
103                         return ret;
104         }
105
106         ret = rte_event_port_setup(dev_id, port_id, port_conf);
107         if (ret < 0) {
108                 EVTIM_LOG_ERR("failed to setup event port %u on event dev %u\n",
109                               port_id, dev_id);
110                 return ret;
111         }
112
113         *event_port_id = port_id;
114
115         if (started)
116                 ret = rte_event_dev_start(dev_id);
117
118         return ret;
119 }
120
121 struct rte_event_timer_adapter *
122 rte_event_timer_adapter_create(const struct rte_event_timer_adapter_conf *conf)
123 {
124         return rte_event_timer_adapter_create_ext(conf, default_port_conf_cb,
125                                                   NULL);
126 }
127
128 struct rte_event_timer_adapter *
129 rte_event_timer_adapter_create_ext(
130                 const struct rte_event_timer_adapter_conf *conf,
131                 rte_event_timer_adapter_port_conf_cb_t conf_cb,
132                 void *conf_arg)
133 {
134         uint16_t adapter_id;
135         struct rte_event_timer_adapter *adapter;
136         const struct rte_memzone *mz;
137         char mz_name[DATA_MZ_NAME_MAX_LEN];
138         int n, ret;
139         struct rte_eventdev *dev;
140
141         if (conf == NULL) {
142                 rte_errno = EINVAL;
143                 return NULL;
144         }
145
146         /* Check eventdev ID */
147         if (!rte_event_pmd_is_valid_dev(conf->event_dev_id)) {
148                 rte_errno = EINVAL;
149                 return NULL;
150         }
151         dev = &rte_eventdevs[conf->event_dev_id];
152
153         adapter_id = conf->timer_adapter_id;
154
155         /* Check that adapter_id is in range */
156         if (adapter_id >= RTE_EVENT_TIMER_ADAPTER_NUM_MAX) {
157                 rte_errno = EINVAL;
158                 return NULL;
159         }
160
161         /* Check adapter ID not already allocated */
162         adapter = &adapters[adapter_id];
163         if (adapter->allocated) {
164                 rte_errno = EEXIST;
165                 return NULL;
166         }
167
168         /* Create shared data area. */
169         n = snprintf(mz_name, sizeof(mz_name), DATA_MZ_NAME_FORMAT, adapter_id);
170         if (n >= (int)sizeof(mz_name)) {
171                 rte_errno = EINVAL;
172                 return NULL;
173         }
174         mz = rte_memzone_reserve(mz_name,
175                                  sizeof(struct rte_event_timer_adapter_data),
176                                  conf->socket_id, 0);
177         if (mz == NULL)
178                 /* rte_errno set by rte_memzone_reserve */
179                 return NULL;
180
181         adapter->data = mz->addr;
182         memset(adapter->data, 0, sizeof(struct rte_event_timer_adapter_data));
183
184         adapter->data->mz = mz;
185         adapter->data->event_dev_id = conf->event_dev_id;
186         adapter->data->id = adapter_id;
187         adapter->data->socket_id = conf->socket_id;
188         adapter->data->conf = *conf;  /* copy conf structure */
189
190         /* Query eventdev PMD for timer adapter capabilities and ops */
191         ret = dev->dev_ops->timer_adapter_caps_get(dev,
192                                                    adapter->data->conf.flags,
193                                                    &adapter->data->caps,
194                                                    &adapter->ops);
195         if (ret < 0) {
196                 rte_errno = -ret;
197                 goto free_memzone;
198         }
199
200         if (!(adapter->data->caps &
201               RTE_EVENT_TIMER_ADAPTER_CAP_INTERNAL_PORT)) {
202                 FUNC_PTR_OR_NULL_RET_WITH_ERRNO(conf_cb, EINVAL);
203                 ret = conf_cb(adapter->data->id, adapter->data->event_dev_id,
204                               &adapter->data->event_port_id, conf_arg);
205                 if (ret < 0) {
206                         rte_errno = -ret;
207                         goto free_memzone;
208                 }
209         }
210
211         /* If eventdev PMD did not provide ops, use default software
212          * implementation.
213          */
214         if (adapter->ops == NULL)
215                 adapter->ops = &swtim_ops;
216
217         /* Allow driver to do some setup */
218         FUNC_PTR_OR_NULL_RET_WITH_ERRNO(adapter->ops->init, ENOTSUP);
219         ret = adapter->ops->init(adapter);
220         if (ret < 0) {
221                 rte_errno = -ret;
222                 goto free_memzone;
223         }
224
225         /* Set fast-path function pointers */
226         adapter->arm_burst = adapter->ops->arm_burst;
227         adapter->arm_tmo_tick_burst = adapter->ops->arm_tmo_tick_burst;
228         adapter->cancel_burst = adapter->ops->cancel_burst;
229
230         adapter->allocated = 1;
231
232         rte_eventdev_trace_timer_adapter_create(adapter_id, adapter, conf,
233                 conf_cb);
234         return adapter;
235
236 free_memzone:
237         rte_memzone_free(adapter->data->mz);
238         return NULL;
239 }
240
241 int
242 rte_event_timer_adapter_get_info(const struct rte_event_timer_adapter *adapter,
243                 struct rte_event_timer_adapter_info *adapter_info)
244 {
245         ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
246
247         if (adapter->ops->get_info)
248                 /* let driver set values it knows */
249                 adapter->ops->get_info(adapter, adapter_info);
250
251         /* Set common values */
252         adapter_info->conf = adapter->data->conf;
253         adapter_info->event_dev_port_id = adapter->data->event_port_id;
254         adapter_info->caps = adapter->data->caps;
255
256         return 0;
257 }
258
259 int
260 rte_event_timer_adapter_start(const struct rte_event_timer_adapter *adapter)
261 {
262         int ret;
263
264         ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
265         FUNC_PTR_OR_ERR_RET(adapter->ops->start, -EINVAL);
266
267         if (adapter->data->started) {
268                 EVTIM_LOG_ERR("event timer adapter %"PRIu8" already started",
269                               adapter->data->id);
270                 return -EALREADY;
271         }
272
273         ret = adapter->ops->start(adapter);
274         if (ret < 0)
275                 return ret;
276
277         adapter->data->started = 1;
278         rte_eventdev_trace_timer_adapter_start(adapter);
279         return 0;
280 }
281
282 int
283 rte_event_timer_adapter_stop(const struct rte_event_timer_adapter *adapter)
284 {
285         int ret;
286
287         ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
288         FUNC_PTR_OR_ERR_RET(adapter->ops->stop, -EINVAL);
289
290         if (adapter->data->started == 0) {
291                 EVTIM_LOG_ERR("event timer adapter %"PRIu8" already stopped",
292                               adapter->data->id);
293                 return 0;
294         }
295
296         ret = adapter->ops->stop(adapter);
297         if (ret < 0)
298                 return ret;
299
300         adapter->data->started = 0;
301         rte_eventdev_trace_timer_adapter_stop(adapter);
302         return 0;
303 }
304
305 struct rte_event_timer_adapter *
306 rte_event_timer_adapter_lookup(uint16_t adapter_id)
307 {
308         char name[DATA_MZ_NAME_MAX_LEN];
309         const struct rte_memzone *mz;
310         struct rte_event_timer_adapter_data *data;
311         struct rte_event_timer_adapter *adapter;
312         int ret;
313         struct rte_eventdev *dev;
314
315         if (adapters[adapter_id].allocated)
316                 return &adapters[adapter_id]; /* Adapter is already loaded */
317
318         snprintf(name, DATA_MZ_NAME_MAX_LEN, DATA_MZ_NAME_FORMAT, adapter_id);
319         mz = rte_memzone_lookup(name);
320         if (mz == NULL) {
321                 rte_errno = ENOENT;
322                 return NULL;
323         }
324
325         data = mz->addr;
326
327         adapter = &adapters[data->id];
328         adapter->data = data;
329
330         dev = &rte_eventdevs[adapter->data->event_dev_id];
331
332         /* Query eventdev PMD for timer adapter capabilities and ops */
333         ret = dev->dev_ops->timer_adapter_caps_get(dev,
334                                                    adapter->data->conf.flags,
335                                                    &adapter->data->caps,
336                                                    &adapter->ops);
337         if (ret < 0) {
338                 rte_errno = EINVAL;
339                 return NULL;
340         }
341
342         /* If eventdev PMD did not provide ops, use default software
343          * implementation.
344          */
345         if (adapter->ops == NULL)
346                 adapter->ops = &swtim_ops;
347
348         /* Set fast-path function pointers */
349         adapter->arm_burst = adapter->ops->arm_burst;
350         adapter->arm_tmo_tick_burst = adapter->ops->arm_tmo_tick_burst;
351         adapter->cancel_burst = adapter->ops->cancel_burst;
352
353         adapter->allocated = 1;
354
355         return adapter;
356 }
357
358 int
359 rte_event_timer_adapter_free(struct rte_event_timer_adapter *adapter)
360 {
361         int ret;
362
363         ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
364         FUNC_PTR_OR_ERR_RET(adapter->ops->uninit, -EINVAL);
365
366         if (adapter->data->started == 1) {
367                 EVTIM_LOG_ERR("event timer adapter %"PRIu8" must be stopped "
368                               "before freeing", adapter->data->id);
369                 return -EBUSY;
370         }
371
372         /* free impl priv data */
373         ret = adapter->ops->uninit(adapter);
374         if (ret < 0)
375                 return ret;
376
377         /* free shared data area */
378         ret = rte_memzone_free(adapter->data->mz);
379         if (ret < 0)
380                 return ret;
381
382         adapter->data = NULL;
383         adapter->allocated = 0;
384
385         rte_eventdev_trace_timer_adapter_free(adapter);
386         return 0;
387 }
388
389 int
390 rte_event_timer_adapter_service_id_get(struct rte_event_timer_adapter *adapter,
391                                        uint32_t *service_id)
392 {
393         ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
394
395         if (adapter->data->service_inited && service_id != NULL)
396                 *service_id = adapter->data->service_id;
397
398         return adapter->data->service_inited ? 0 : -ESRCH;
399 }
400
401 int
402 rte_event_timer_adapter_stats_get(struct rte_event_timer_adapter *adapter,
403                                   struct rte_event_timer_adapter_stats *stats)
404 {
405         ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
406         FUNC_PTR_OR_ERR_RET(adapter->ops->stats_get, -EINVAL);
407         if (stats == NULL)
408                 return -EINVAL;
409
410         return adapter->ops->stats_get(adapter, stats);
411 }
412
413 int
414 rte_event_timer_adapter_stats_reset(struct rte_event_timer_adapter *adapter)
415 {
416         ADAPTER_VALID_OR_ERR_RET(adapter, -EINVAL);
417         FUNC_PTR_OR_ERR_RET(adapter->ops->stats_reset, -EINVAL);
418         return adapter->ops->stats_reset(adapter);
419 }
420
421 /*
422  * Software event timer adapter buffer helper functions
423  */
424
425 #define NSECPERSEC 1E9
426
427 /* Optimizations used to index into the buffer require that the buffer size
428  * be a power of 2.
429  */
430 #define EVENT_BUFFER_SZ 4096
431 #define EVENT_BUFFER_BATCHSZ 32
432 #define EVENT_BUFFER_MASK (EVENT_BUFFER_SZ - 1)
433
434 #define EXP_TIM_BUF_SZ 128
435
436 struct event_buffer {
437         size_t head;
438         size_t tail;
439         struct rte_event events[EVENT_BUFFER_SZ];
440 } __rte_cache_aligned;
441
442 static inline bool
443 event_buffer_full(struct event_buffer *bufp)
444 {
445         return (bufp->head - bufp->tail) == EVENT_BUFFER_SZ;
446 }
447
448 static inline bool
449 event_buffer_batch_ready(struct event_buffer *bufp)
450 {
451         return (bufp->head - bufp->tail) >= EVENT_BUFFER_BATCHSZ;
452 }
453
454 static void
455 event_buffer_init(struct event_buffer *bufp)
456 {
457         bufp->head = bufp->tail = 0;
458         memset(&bufp->events, 0, sizeof(struct rte_event) * EVENT_BUFFER_SZ);
459 }
460
461 static int
462 event_buffer_add(struct event_buffer *bufp, struct rte_event *eventp)
463 {
464         size_t head_idx;
465         struct rte_event *buf_eventp;
466
467         if (event_buffer_full(bufp))
468                 return -1;
469
470         /* Instead of modulus, bitwise AND with mask to get head_idx. */
471         head_idx = bufp->head & EVENT_BUFFER_MASK;
472         buf_eventp = &bufp->events[head_idx];
473         rte_memcpy(buf_eventp, eventp, sizeof(struct rte_event));
474
475         /* Wrap automatically when overflow occurs. */
476         bufp->head++;
477
478         return 0;
479 }
480
481 static void
482 event_buffer_flush(struct event_buffer *bufp, uint8_t dev_id, uint8_t port_id,
483                    uint16_t *nb_events_flushed,
484                    uint16_t *nb_events_inv)
485 {
486         struct rte_event *events = bufp->events;
487         size_t head_idx, tail_idx;
488         uint16_t n = 0;
489
490         /* Instead of modulus, bitwise AND with mask to get index. */
491         head_idx = bufp->head & EVENT_BUFFER_MASK;
492         tail_idx = bufp->tail & EVENT_BUFFER_MASK;
493
494         RTE_ASSERT(head_idx < EVENT_BUFFER_SZ && tail_idx < EVENT_BUFFER_SZ);
495
496         /* Determine the largest contigous run we can attempt to enqueue to the
497          * event device.
498          */
499         if (head_idx > tail_idx)
500                 n = head_idx - tail_idx;
501         else if (head_idx < tail_idx)
502                 n = EVENT_BUFFER_SZ - tail_idx;
503         else if (event_buffer_full(bufp))
504                 n = EVENT_BUFFER_SZ - tail_idx;
505         else {
506                 *nb_events_flushed = 0;
507                 return;
508         }
509
510         n = RTE_MIN(EVENT_BUFFER_BATCHSZ, n);
511         *nb_events_inv = 0;
512
513         *nb_events_flushed = rte_event_enqueue_burst(dev_id, port_id,
514                                                      &events[tail_idx], n);
515         if (*nb_events_flushed != n) {
516                 if (rte_errno == EINVAL) {
517                         EVTIM_LOG_ERR("failed to enqueue invalid event - "
518                                       "dropping it");
519                         (*nb_events_inv)++;
520                 } else if (rte_errno == ENOSPC)
521                         rte_pause();
522         }
523
524         if (*nb_events_flushed > 0)
525                 EVTIM_BUF_LOG_DBG("enqueued %"PRIu16" timer events to event "
526                                   "device", *nb_events_flushed);
527
528         bufp->tail = bufp->tail + *nb_events_flushed + *nb_events_inv;
529 }
530
531 /*
532  * Software event timer adapter implementation
533  */
534 struct swtim {
535         /* Identifier of service executing timer management logic. */
536         uint32_t service_id;
537         /* The cycle count at which the adapter should next tick */
538         uint64_t next_tick_cycles;
539         /* The tick resolution used by adapter instance. May have been
540          * adjusted from what user requested
541          */
542         uint64_t timer_tick_ns;
543         /* Maximum timeout in nanoseconds allowed by adapter instance. */
544         uint64_t max_tmo_ns;
545         /* Buffered timer expiry events to be enqueued to an event device. */
546         struct event_buffer buffer;
547         /* Statistics */
548         struct rte_event_timer_adapter_stats stats;
549         /* Mempool of timer objects */
550         struct rte_mempool *tim_pool;
551         /* Back pointer for convenience */
552         struct rte_event_timer_adapter *adapter;
553         /* Identifier of timer data instance */
554         uint32_t timer_data_id;
555         /* Track which cores have actually armed a timer */
556         struct {
557                 rte_atomic16_t v;
558         } __rte_cache_aligned in_use[RTE_MAX_LCORE];
559         /* Track which cores' timer lists should be polled */
560         unsigned int poll_lcores[RTE_MAX_LCORE];
561         /* The number of lists that should be polled */
562         int n_poll_lcores;
563         /* Timers which have expired and can be returned to a mempool */
564         struct rte_timer *expired_timers[EXP_TIM_BUF_SZ];
565         /* The number of timers that can be returned to a mempool */
566         size_t n_expired_timers;
567 };
568
569 static inline struct swtim *
570 swtim_pmd_priv(const struct rte_event_timer_adapter *adapter)
571 {
572         return adapter->data->adapter_priv;
573 }
574
575 static void
576 swtim_callback(struct rte_timer *tim)
577 {
578         struct rte_event_timer *evtim = tim->arg;
579         struct rte_event_timer_adapter *adapter;
580         unsigned int lcore = rte_lcore_id();
581         struct swtim *sw;
582         uint16_t nb_evs_flushed = 0;
583         uint16_t nb_evs_invalid = 0;
584         uint64_t opaque;
585         int ret;
586         int n_lcores;
587
588         opaque = evtim->impl_opaque[1];
589         adapter = (struct rte_event_timer_adapter *)(uintptr_t)opaque;
590         sw = swtim_pmd_priv(adapter);
591
592         ret = event_buffer_add(&sw->buffer, &evtim->ev);
593         if (ret < 0) {
594                 /* If event buffer is full, put timer back in list with
595                  * immediate expiry value, so that we process it again on the
596                  * next iteration.
597                  */
598                 ret = rte_timer_alt_reset(sw->timer_data_id, tim, 0, SINGLE,
599                                           lcore, NULL, evtim);
600                 if (ret < 0) {
601                         EVTIM_LOG_DBG("event buffer full, failed to reset "
602                                       "timer with immediate expiry value");
603                 } else {
604                         sw->stats.evtim_retry_count++;
605                         EVTIM_LOG_DBG("event buffer full, resetting rte_timer "
606                                       "with immediate expiry value");
607                 }
608
609                 if (unlikely(rte_atomic16_test_and_set(&sw->in_use[lcore].v))) {
610                         n_lcores = __atomic_fetch_add(&sw->n_poll_lcores, 1,
611                                                      __ATOMIC_RELAXED);
612                         __atomic_store_n(&sw->poll_lcores[n_lcores], lcore,
613                                         __ATOMIC_RELAXED);
614                 }
615         } else {
616                 EVTIM_BUF_LOG_DBG("buffered an event timer expiry event");
617
618                 /* Empty the buffer here, if necessary, to free older expired
619                  * timers only
620                  */
621                 if (unlikely(sw->n_expired_timers == EXP_TIM_BUF_SZ)) {
622                         rte_mempool_put_bulk(sw->tim_pool,
623                                              (void **)sw->expired_timers,
624                                              sw->n_expired_timers);
625                         sw->n_expired_timers = 0;
626                 }
627
628                 sw->expired_timers[sw->n_expired_timers++] = tim;
629                 sw->stats.evtim_exp_count++;
630
631                 evtim->state = RTE_EVENT_TIMER_NOT_ARMED;
632         }
633
634         if (event_buffer_batch_ready(&sw->buffer)) {
635                 event_buffer_flush(&sw->buffer,
636                                    adapter->data->event_dev_id,
637                                    adapter->data->event_port_id,
638                                    &nb_evs_flushed,
639                                    &nb_evs_invalid);
640
641                 sw->stats.ev_enq_count += nb_evs_flushed;
642                 sw->stats.ev_inv_count += nb_evs_invalid;
643         }
644 }
645
646 static __rte_always_inline uint64_t
647 get_timeout_cycles(struct rte_event_timer *evtim,
648                    const struct rte_event_timer_adapter *adapter)
649 {
650         struct swtim *sw = swtim_pmd_priv(adapter);
651         uint64_t timeout_ns = evtim->timeout_ticks * sw->timer_tick_ns;
652         return timeout_ns * rte_get_timer_hz() / NSECPERSEC;
653 }
654
655 /* This function returns true if one or more (adapter) ticks have occurred since
656  * the last time it was called.
657  */
658 static inline bool
659 swtim_did_tick(struct swtim *sw)
660 {
661         uint64_t cycles_per_adapter_tick, start_cycles;
662         uint64_t *next_tick_cyclesp;
663
664         next_tick_cyclesp = &sw->next_tick_cycles;
665         cycles_per_adapter_tick = sw->timer_tick_ns *
666                         (rte_get_timer_hz() / NSECPERSEC);
667         start_cycles = rte_get_timer_cycles();
668
669         /* Note: initially, *next_tick_cyclesp == 0, so the clause below will
670          * execute, and set things going.
671          */
672
673         if (start_cycles >= *next_tick_cyclesp) {
674                 /* Snap the current cycle count to the preceding adapter tick
675                  * boundary.
676                  */
677                 start_cycles -= start_cycles % cycles_per_adapter_tick;
678                 *next_tick_cyclesp = start_cycles + cycles_per_adapter_tick;
679
680                 return true;
681         }
682
683         return false;
684 }
685
686 /* Check that event timer timeout value is in range */
687 static __rte_always_inline int
688 check_timeout(struct rte_event_timer *evtim,
689               const struct rte_event_timer_adapter *adapter)
690 {
691         uint64_t tmo_nsec;
692         struct swtim *sw = swtim_pmd_priv(adapter);
693
694         tmo_nsec = evtim->timeout_ticks * sw->timer_tick_ns;
695         if (tmo_nsec > sw->max_tmo_ns)
696                 return -1;
697         if (tmo_nsec < sw->timer_tick_ns)
698                 return -2;
699
700         return 0;
701 }
702
703 /* Check that event timer event queue sched type matches destination event queue
704  * sched type
705  */
706 static __rte_always_inline int
707 check_destination_event_queue(struct rte_event_timer *evtim,
708                               const struct rte_event_timer_adapter *adapter)
709 {
710         int ret;
711         uint32_t sched_type;
712
713         ret = rte_event_queue_attr_get(adapter->data->event_dev_id,
714                                        evtim->ev.queue_id,
715                                        RTE_EVENT_QUEUE_ATTR_SCHEDULE_TYPE,
716                                        &sched_type);
717
718         if ((ret == 0 && evtim->ev.sched_type == sched_type) ||
719             ret == -EOVERFLOW)
720                 return 0;
721
722         return -1;
723 }
724
725 static int
726 swtim_service_func(void *arg)
727 {
728         struct rte_event_timer_adapter *adapter = arg;
729         struct swtim *sw = swtim_pmd_priv(adapter);
730         uint16_t nb_evs_flushed = 0;
731         uint16_t nb_evs_invalid = 0;
732
733         if (swtim_did_tick(sw)) {
734                 rte_timer_alt_manage(sw->timer_data_id,
735                                      sw->poll_lcores,
736                                      sw->n_poll_lcores,
737                                      swtim_callback);
738
739                 /* Return expired timer objects back to mempool */
740                 rte_mempool_put_bulk(sw->tim_pool, (void **)sw->expired_timers,
741                                      sw->n_expired_timers);
742                 sw->n_expired_timers = 0;
743
744                 event_buffer_flush(&sw->buffer,
745                                    adapter->data->event_dev_id,
746                                    adapter->data->event_port_id,
747                                    &nb_evs_flushed,
748                                    &nb_evs_invalid);
749
750                 sw->stats.ev_enq_count += nb_evs_flushed;
751                 sw->stats.ev_inv_count += nb_evs_invalid;
752                 sw->stats.adapter_tick_count++;
753         }
754
755         return 0;
756 }
757
758 /* The adapter initialization function rounds the mempool size up to the next
759  * power of 2, so we can take the difference between that value and what the
760  * user requested, and use the space for caches.  This avoids a scenario where a
761  * user can't arm the number of timers the adapter was configured with because
762  * mempool objects have been lost to caches.
763  *
764  * nb_actual should always be a power of 2, so we can iterate over the powers
765  * of 2 to see what the largest cache size we can use is.
766  */
767 static int
768 compute_msg_mempool_cache_size(uint64_t nb_requested, uint64_t nb_actual)
769 {
770         int i;
771         int size;
772         int cache_size = 0;
773
774         for (i = 0;; i++) {
775                 size = 1 << i;
776
777                 if (RTE_MAX_LCORE * size < (int)(nb_actual - nb_requested) &&
778                     size < RTE_MEMPOOL_CACHE_MAX_SIZE &&
779                     size <= nb_actual / 1.5)
780                         cache_size = size;
781                 else
782                         break;
783         }
784
785         return cache_size;
786 }
787
788 static int
789 swtim_init(struct rte_event_timer_adapter *adapter)
790 {
791         int i, ret;
792         struct swtim *sw;
793         unsigned int flags;
794         struct rte_service_spec service;
795
796         /* Allocate storage for private data area */
797 #define SWTIM_NAMESIZE 32
798         char swtim_name[SWTIM_NAMESIZE];
799         snprintf(swtim_name, SWTIM_NAMESIZE, "swtim_%"PRIu8,
800                         adapter->data->id);
801         sw = rte_zmalloc_socket(swtim_name, sizeof(*sw), RTE_CACHE_LINE_SIZE,
802                         adapter->data->socket_id);
803         if (sw == NULL) {
804                 EVTIM_LOG_ERR("failed to allocate space for private data");
805                 rte_errno = ENOMEM;
806                 return -1;
807         }
808
809         /* Connect storage to adapter instance */
810         adapter->data->adapter_priv = sw;
811         sw->adapter = adapter;
812
813         sw->timer_tick_ns = adapter->data->conf.timer_tick_ns;
814         sw->max_tmo_ns = adapter->data->conf.max_tmo_ns;
815
816         /* Create a timer pool */
817         char pool_name[SWTIM_NAMESIZE];
818         snprintf(pool_name, SWTIM_NAMESIZE, "swtim_pool_%"PRIu8,
819                  adapter->data->id);
820         /* Optimal mempool size is a power of 2 minus one */
821         uint64_t nb_timers = rte_align64pow2(adapter->data->conf.nb_timers);
822         int pool_size = nb_timers - 1;
823         int cache_size = compute_msg_mempool_cache_size(
824                                 adapter->data->conf.nb_timers, nb_timers);
825         flags = 0; /* pool is multi-producer, multi-consumer */
826         sw->tim_pool = rte_mempool_create(pool_name, pool_size,
827                         sizeof(struct rte_timer), cache_size, 0, NULL, NULL,
828                         NULL, NULL, adapter->data->socket_id, flags);
829         if (sw->tim_pool == NULL) {
830                 EVTIM_LOG_ERR("failed to create timer object mempool");
831                 rte_errno = ENOMEM;
832                 goto free_alloc;
833         }
834
835         /* Initialize the variables that track in-use timer lists */
836         for (i = 0; i < RTE_MAX_LCORE; i++)
837                 rte_atomic16_init(&sw->in_use[i].v);
838
839         /* Initialize the timer subsystem and allocate timer data instance */
840         ret = rte_timer_subsystem_init();
841         if (ret < 0) {
842                 if (ret != -EALREADY) {
843                         EVTIM_LOG_ERR("failed to initialize timer subsystem");
844                         rte_errno = -ret;
845                         goto free_mempool;
846                 }
847         }
848
849         ret = rte_timer_data_alloc(&sw->timer_data_id);
850         if (ret < 0) {
851                 EVTIM_LOG_ERR("failed to allocate timer data instance");
852                 rte_errno = -ret;
853                 goto free_mempool;
854         }
855
856         /* Initialize timer event buffer */
857         event_buffer_init(&sw->buffer);
858
859         sw->adapter = adapter;
860
861         /* Register a service component to run adapter logic */
862         memset(&service, 0, sizeof(service));
863         snprintf(service.name, RTE_SERVICE_NAME_MAX,
864                  "swtim_svc_%"PRIu8, adapter->data->id);
865         service.socket_id = adapter->data->socket_id;
866         service.callback = swtim_service_func;
867         service.callback_userdata = adapter;
868         service.capabilities &= ~(RTE_SERVICE_CAP_MT_SAFE);
869         ret = rte_service_component_register(&service, &sw->service_id);
870         if (ret < 0) {
871                 EVTIM_LOG_ERR("failed to register service %s with id %"PRIu32
872                               ": err = %d", service.name, sw->service_id,
873                               ret);
874
875                 rte_errno = ENOSPC;
876                 goto free_mempool;
877         }
878
879         EVTIM_LOG_DBG("registered service %s with id %"PRIu32, service.name,
880                       sw->service_id);
881
882         adapter->data->service_id = sw->service_id;
883         adapter->data->service_inited = 1;
884
885         return 0;
886 free_mempool:
887         rte_mempool_free(sw->tim_pool);
888 free_alloc:
889         rte_free(sw);
890         return -1;
891 }
892
893 static void
894 swtim_free_tim(struct rte_timer *tim, void *arg)
895 {
896         struct swtim *sw = arg;
897
898         rte_mempool_put(sw->tim_pool, tim);
899 }
900
901 /* Traverse the list of outstanding timers and put them back in the mempool
902  * before freeing the adapter to avoid leaking the memory.
903  */
904 static int
905 swtim_uninit(struct rte_event_timer_adapter *adapter)
906 {
907         int ret;
908         struct swtim *sw = swtim_pmd_priv(adapter);
909
910         /* Free outstanding timers */
911         rte_timer_stop_all(sw->timer_data_id,
912                            sw->poll_lcores,
913                            sw->n_poll_lcores,
914                            swtim_free_tim,
915                            sw);
916
917         ret = rte_service_component_unregister(sw->service_id);
918         if (ret < 0) {
919                 EVTIM_LOG_ERR("failed to unregister service component");
920                 return ret;
921         }
922
923         rte_mempool_free(sw->tim_pool);
924         rte_free(sw);
925         adapter->data->adapter_priv = NULL;
926
927         return 0;
928 }
929
930 static inline int32_t
931 get_mapped_count_for_service(uint32_t service_id)
932 {
933         int32_t core_count, i, mapped_count = 0;
934         uint32_t lcore_arr[RTE_MAX_LCORE];
935
936         core_count = rte_service_lcore_list(lcore_arr, RTE_MAX_LCORE);
937
938         for (i = 0; i < core_count; i++)
939                 if (rte_service_map_lcore_get(service_id, lcore_arr[i]) == 1)
940                         mapped_count++;
941
942         return mapped_count;
943 }
944
945 static int
946 swtim_start(const struct rte_event_timer_adapter *adapter)
947 {
948         int mapped_count;
949         struct swtim *sw = swtim_pmd_priv(adapter);
950
951         /* Mapping the service to more than one service core can introduce
952          * delays while one thread is waiting to acquire a lock, so only allow
953          * one core to be mapped to the service.
954          *
955          * Note: the service could be modified such that it spreads cores to
956          * poll over multiple service instances.
957          */
958         mapped_count = get_mapped_count_for_service(sw->service_id);
959
960         if (mapped_count != 1)
961                 return mapped_count < 1 ? -ENOENT : -ENOTSUP;
962
963         return rte_service_component_runstate_set(sw->service_id, 1);
964 }
965
966 static int
967 swtim_stop(const struct rte_event_timer_adapter *adapter)
968 {
969         int ret;
970         struct swtim *sw = swtim_pmd_priv(adapter);
971
972         ret = rte_service_component_runstate_set(sw->service_id, 0);
973         if (ret < 0)
974                 return ret;
975
976         /* Wait for the service to complete its final iteration */
977         while (rte_service_may_be_active(sw->service_id))
978                 rte_pause();
979
980         return 0;
981 }
982
983 static void
984 swtim_get_info(const struct rte_event_timer_adapter *adapter,
985                 struct rte_event_timer_adapter_info *adapter_info)
986 {
987         struct swtim *sw = swtim_pmd_priv(adapter);
988         adapter_info->min_resolution_ns = sw->timer_tick_ns;
989         adapter_info->max_tmo_ns = sw->max_tmo_ns;
990 }
991
992 static int
993 swtim_stats_get(const struct rte_event_timer_adapter *adapter,
994                 struct rte_event_timer_adapter_stats *stats)
995 {
996         struct swtim *sw = swtim_pmd_priv(adapter);
997         *stats = sw->stats; /* structure copy */
998         return 0;
999 }
1000
1001 static int
1002 swtim_stats_reset(const struct rte_event_timer_adapter *adapter)
1003 {
1004         struct swtim *sw = swtim_pmd_priv(adapter);
1005         memset(&sw->stats, 0, sizeof(sw->stats));
1006         return 0;
1007 }
1008
1009 static uint16_t
1010 __swtim_arm_burst(const struct rte_event_timer_adapter *adapter,
1011                 struct rte_event_timer **evtims,
1012                 uint16_t nb_evtims)
1013 {
1014         int i, ret;
1015         struct swtim *sw = swtim_pmd_priv(adapter);
1016         uint32_t lcore_id = rte_lcore_id();
1017         struct rte_timer *tim, *tims[nb_evtims];
1018         uint64_t cycles;
1019         int n_lcores;
1020
1021 #ifdef RTE_LIBRTE_EVENTDEV_DEBUG
1022         /* Check that the service is running. */
1023         if (rte_service_runstate_get(adapter->data->service_id) != 1) {
1024                 rte_errno = EINVAL;
1025                 return 0;
1026         }
1027 #endif
1028
1029         /* Adjust lcore_id if non-EAL thread. Arbitrarily pick the timer list of
1030          * the highest lcore to insert such timers into
1031          */
1032         if (lcore_id == LCORE_ID_ANY)
1033                 lcore_id = RTE_MAX_LCORE - 1;
1034
1035         /* If this is the first time we're arming an event timer on this lcore,
1036          * mark this lcore as "in use"; this will cause the service
1037          * function to process the timer list that corresponds to this lcore.
1038          */
1039         if (unlikely(rte_atomic16_test_and_set(&sw->in_use[lcore_id].v))) {
1040                 EVTIM_LOG_DBG("Adding lcore id = %u to list of lcores to poll",
1041                               lcore_id);
1042                 n_lcores = __atomic_fetch_add(&sw->n_poll_lcores, 1,
1043                                              __ATOMIC_RELAXED);
1044                 __atomic_store_n(&sw->poll_lcores[n_lcores], lcore_id,
1045                                 __ATOMIC_RELAXED);
1046         }
1047
1048         ret = rte_mempool_get_bulk(sw->tim_pool, (void **)tims,
1049                                    nb_evtims);
1050         if (ret < 0) {
1051                 rte_errno = ENOSPC;
1052                 return 0;
1053         }
1054
1055         for (i = 0; i < nb_evtims; i++) {
1056                 /* Don't modify the event timer state in these cases */
1057                 if (evtims[i]->state == RTE_EVENT_TIMER_ARMED) {
1058                         rte_errno = EALREADY;
1059                         break;
1060                 } else if (!(evtims[i]->state == RTE_EVENT_TIMER_NOT_ARMED ||
1061                              evtims[i]->state == RTE_EVENT_TIMER_CANCELED)) {
1062                         rte_errno = EINVAL;
1063                         break;
1064                 }
1065
1066                 ret = check_timeout(evtims[i], adapter);
1067                 if (unlikely(ret == -1)) {
1068                         evtims[i]->state = RTE_EVENT_TIMER_ERROR_TOOLATE;
1069                         rte_errno = EINVAL;
1070                         break;
1071                 } else if (unlikely(ret == -2)) {
1072                         evtims[i]->state = RTE_EVENT_TIMER_ERROR_TOOEARLY;
1073                         rte_errno = EINVAL;
1074                         break;
1075                 }
1076
1077                 if (unlikely(check_destination_event_queue(evtims[i],
1078                                                            adapter) < 0)) {
1079                         evtims[i]->state = RTE_EVENT_TIMER_ERROR;
1080                         rte_errno = EINVAL;
1081                         break;
1082                 }
1083
1084                 tim = tims[i];
1085                 rte_timer_init(tim);
1086
1087                 evtims[i]->impl_opaque[0] = (uintptr_t)tim;
1088                 evtims[i]->impl_opaque[1] = (uintptr_t)adapter;
1089
1090                 cycles = get_timeout_cycles(evtims[i], adapter);
1091                 ret = rte_timer_alt_reset(sw->timer_data_id, tim, cycles,
1092                                           SINGLE, lcore_id, NULL, evtims[i]);
1093                 if (ret < 0) {
1094                         /* tim was in RUNNING or CONFIG state */
1095                         evtims[i]->state = RTE_EVENT_TIMER_ERROR;
1096                         break;
1097                 }
1098
1099                 rte_smp_wmb();
1100                 EVTIM_LOG_DBG("armed an event timer");
1101                 evtims[i]->state = RTE_EVENT_TIMER_ARMED;
1102         }
1103
1104         if (i < nb_evtims)
1105                 rte_mempool_put_bulk(sw->tim_pool,
1106                                      (void **)&tims[i], nb_evtims - i);
1107
1108         return i;
1109 }
1110
1111 static uint16_t
1112 swtim_arm_burst(const struct rte_event_timer_adapter *adapter,
1113                 struct rte_event_timer **evtims,
1114                 uint16_t nb_evtims)
1115 {
1116         return __swtim_arm_burst(adapter, evtims, nb_evtims);
1117 }
1118
1119 static uint16_t
1120 swtim_cancel_burst(const struct rte_event_timer_adapter *adapter,
1121                    struct rte_event_timer **evtims,
1122                    uint16_t nb_evtims)
1123 {
1124         int i, ret;
1125         struct rte_timer *timp;
1126         uint64_t opaque;
1127         struct swtim *sw = swtim_pmd_priv(adapter);
1128
1129 #ifdef RTE_LIBRTE_EVENTDEV_DEBUG
1130         /* Check that the service is running. */
1131         if (rte_service_runstate_get(adapter->data->service_id) != 1) {
1132                 rte_errno = EINVAL;
1133                 return 0;
1134         }
1135 #endif
1136
1137         for (i = 0; i < nb_evtims; i++) {
1138                 /* Don't modify the event timer state in these cases */
1139                 if (evtims[i]->state == RTE_EVENT_TIMER_CANCELED) {
1140                         rte_errno = EALREADY;
1141                         break;
1142                 } else if (evtims[i]->state != RTE_EVENT_TIMER_ARMED) {
1143                         rte_errno = EINVAL;
1144                         break;
1145                 }
1146
1147                 rte_smp_rmb();
1148
1149                 opaque = evtims[i]->impl_opaque[0];
1150                 timp = (struct rte_timer *)(uintptr_t)opaque;
1151                 RTE_ASSERT(timp != NULL);
1152
1153                 ret = rte_timer_alt_stop(sw->timer_data_id, timp);
1154                 if (ret < 0) {
1155                         /* Timer is running or being configured */
1156                         rte_errno = EAGAIN;
1157                         break;
1158                 }
1159
1160                 rte_mempool_put(sw->tim_pool, (void **)timp);
1161
1162                 evtims[i]->state = RTE_EVENT_TIMER_CANCELED;
1163                 evtims[i]->impl_opaque[0] = 0;
1164                 evtims[i]->impl_opaque[1] = 0;
1165
1166                 rte_smp_wmb();
1167         }
1168
1169         return i;
1170 }
1171
1172 static uint16_t
1173 swtim_arm_tmo_tick_burst(const struct rte_event_timer_adapter *adapter,
1174                          struct rte_event_timer **evtims,
1175                          uint64_t timeout_ticks,
1176                          uint16_t nb_evtims)
1177 {
1178         int i;
1179
1180         for (i = 0; i < nb_evtims; i++)
1181                 evtims[i]->timeout_ticks = timeout_ticks;
1182
1183         return __swtim_arm_burst(adapter, evtims, nb_evtims);
1184 }
1185
1186 static const struct rte_event_timer_adapter_ops swtim_ops = {
1187         .init                   = swtim_init,
1188         .uninit                 = swtim_uninit,
1189         .start                  = swtim_start,
1190         .stop                   = swtim_stop,
1191         .get_info               = swtim_get_info,
1192         .stats_get              = swtim_stats_get,
1193         .stats_reset            = swtim_stats_reset,
1194         .arm_burst              = swtim_arm_burst,
1195         .arm_tmo_tick_burst     = swtim_arm_tmo_tick_burst,
1196         .cancel_burst           = swtim_cancel_burst,
1197 };