service: introduce service cores concept
[dpdk.git] / lib / librte_eal / common / rte_service.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2017 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdio.h>
35 #include <unistd.h>
36 #include <inttypes.h>
37 #include <limits.h>
38 #include <string.h>
39 #include <dirent.h>
40
41 #include <rte_service.h>
42 #include "include/rte_service_component.h"
43
44 #include <rte_eal.h>
45 #include <rte_lcore.h>
46 #include <rte_common.h>
47 #include <rte_debug.h>
48 #include <rte_cycles.h>
49 #include <rte_atomic.h>
50 #include <rte_memory.h>
51 #include <rte_malloc.h>
52
53 #define RTE_SERVICE_NUM_MAX 64
54
55 #define SERVICE_F_REGISTERED    (1 << 0)
56 #define SERVICE_F_STATS_ENABLED (1 << 1)
57
58 /* runstates for services and lcores, denoting if they are active or not */
59 #define RUNSTATE_STOPPED 0
60 #define RUNSTATE_RUNNING 1
61
62 /* internal representation of a service */
63 struct rte_service_spec_impl {
64         /* public part of the struct */
65         struct rte_service_spec spec;
66
67         /* atomic lock that when set indicates a service core is currently
68          * running this service callback. When not set, a core may take the
69          * lock and then run the service callback.
70          */
71         rte_atomic32_t execute_lock;
72
73         /* API set/get-able variables */
74         int32_t runstate;
75         uint8_t internal_flags;
76
77         /* per service statistics */
78         uint32_t num_mapped_cores;
79         uint64_t calls;
80         uint64_t cycles_spent;
81 } __rte_cache_aligned;
82
83 /* the internal values of a service core */
84 struct core_state {
85         /* map of services IDs are run on this core */
86         uint64_t service_mask;
87         uint8_t runstate; /* running or stopped */
88         uint8_t is_service_core; /* set if core is currently a service core */
89
90         /* extreme statistics */
91         uint64_t calls_per_service[RTE_SERVICE_NUM_MAX];
92 } __rte_cache_aligned;
93
94 static uint32_t rte_service_count;
95 static struct rte_service_spec_impl *rte_services;
96 static struct core_state *lcore_states;
97 static uint32_t rte_service_library_initialized;
98
99 int32_t rte_service_init(void)
100 {
101         if (rte_service_library_initialized) {
102                 printf("service library init() called, init flag %d\n",
103                         rte_service_library_initialized);
104                 return -EALREADY;
105         }
106
107         rte_services = rte_calloc("rte_services", RTE_SERVICE_NUM_MAX,
108                         sizeof(struct rte_service_spec_impl),
109                         RTE_CACHE_LINE_SIZE);
110         if (!rte_services) {
111                 printf("error allocating rte services array\n");
112                 return -ENOMEM;
113         }
114
115         lcore_states = rte_calloc("rte_service_core_states", RTE_MAX_LCORE,
116                         sizeof(struct core_state), RTE_CACHE_LINE_SIZE);
117         if (!lcore_states) {
118                 printf("error allocating core states array\n");
119                 return -ENOMEM;
120         }
121
122         int i;
123         int count = 0;
124         struct rte_config *cfg = rte_eal_get_configuration();
125         for (i = 0; i < RTE_MAX_LCORE; i++) {
126                 if (lcore_config[i].core_role == ROLE_SERVICE) {
127                         if ((unsigned int)i == cfg->master_lcore)
128                                 continue;
129                         rte_service_lcore_add(i);
130                         count++;
131                 }
132         }
133
134         rte_service_library_initialized = 1;
135         return 0;
136 }
137
138 /* returns 1 if service is registered and has not been unregistered
139  * Returns 0 if service never registered, or has been unregistered
140  */
141 static inline int
142 service_valid(uint32_t id)
143 {
144         return !!(rte_services[id].internal_flags & SERVICE_F_REGISTERED);
145 }
146
147 /* returns 1 if statistics should be colleced for service
148  * Returns 0 if statistics should not be collected for service
149  */
150 static inline int
151 service_stats_enabled(struct rte_service_spec_impl *impl)
152 {
153         return !!(impl->internal_flags & SERVICE_F_STATS_ENABLED);
154 }
155
156 static inline int
157 service_mt_safe(struct rte_service_spec_impl *s)
158 {
159         return s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE;
160 }
161
162 int32_t rte_service_set_stats_enable(struct rte_service_spec *service,
163                                   int32_t enabled)
164 {
165         struct rte_service_spec_impl *impl =
166                 (struct rte_service_spec_impl *)service;
167         if (!impl)
168                 return -EINVAL;
169
170         if (enabled)
171                 impl->internal_flags |= SERVICE_F_STATS_ENABLED;
172         else
173                 impl->internal_flags &= ~(SERVICE_F_STATS_ENABLED);
174
175         return 0;
176 }
177
178 uint32_t
179 rte_service_get_count(void)
180 {
181         return rte_service_count;
182 }
183
184 struct rte_service_spec *
185 rte_service_get_by_id(uint32_t id)
186 {
187         struct rte_service_spec *service = NULL;
188         if (id < rte_service_count)
189                 service = (struct rte_service_spec *)&rte_services[id];
190
191         return service;
192 }
193
194 struct rte_service_spec *rte_service_get_by_name(const char *name)
195 {
196         struct rte_service_spec *service = NULL;
197         int i;
198         for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
199                 if (service_valid(i) &&
200                                 strcmp(name, rte_services[i].spec.name) == 0) {
201                         service = (struct rte_service_spec *)&rte_services[i];
202                         break;
203                 }
204         }
205
206         return service;
207 }
208
209 const char *
210 rte_service_get_name(const struct rte_service_spec *service)
211 {
212         return service->name;
213 }
214
215 int32_t
216 rte_service_probe_capability(const struct rte_service_spec *service,
217                              uint32_t capability)
218 {
219         return service->capabilities & capability;
220 }
221
222 int32_t
223 rte_service_is_running(const struct rte_service_spec *spec)
224 {
225         const struct rte_service_spec_impl *impl =
226                 (const struct rte_service_spec_impl *)spec;
227         if (!impl)
228                 return -EINVAL;
229
230         return (impl->runstate == RUNSTATE_RUNNING) &&
231                 (impl->num_mapped_cores > 0);
232 }
233
234 int32_t
235 rte_service_register(const struct rte_service_spec *spec)
236 {
237         uint32_t i;
238         int32_t free_slot = -1;
239
240         if (spec->callback == NULL || strlen(spec->name) == 0)
241                 return -EINVAL;
242
243         for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
244                 if (!service_valid(i)) {
245                         free_slot = i;
246                         break;
247                 }
248         }
249
250         if ((free_slot < 0) || (i == RTE_SERVICE_NUM_MAX))
251                 return -ENOSPC;
252
253         struct rte_service_spec_impl *s = &rte_services[free_slot];
254         s->spec = *spec;
255         s->internal_flags |= SERVICE_F_REGISTERED;
256
257         rte_smp_wmb();
258         rte_service_count++;
259
260         return 0;
261 }
262
263 int32_t
264 rte_service_unregister(struct rte_service_spec *spec)
265 {
266         struct rte_service_spec_impl *s = NULL;
267         struct rte_service_spec_impl *spec_impl =
268                 (struct rte_service_spec_impl *)spec;
269
270         uint32_t i;
271         uint32_t service_id;
272         for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
273                 if (&rte_services[i] == spec_impl) {
274                         s = spec_impl;
275                         service_id = i;
276                         break;
277                 }
278         }
279
280         if (!s)
281                 return -EINVAL;
282
283         rte_service_count--;
284         rte_smp_wmb();
285
286         s->internal_flags &= ~(SERVICE_F_REGISTERED);
287
288         for (i = 0; i < RTE_MAX_LCORE; i++)
289                 lcore_states[i].service_mask &= ~(1 << service_id);
290
291         memset(&rte_services[service_id], 0,
292                         sizeof(struct rte_service_spec_impl));
293
294         return 0;
295 }
296
297 int32_t
298 rte_service_start(struct rte_service_spec *service)
299 {
300         struct rte_service_spec_impl *s =
301                 (struct rte_service_spec_impl *)service;
302         s->runstate = RUNSTATE_RUNNING;
303         rte_smp_wmb();
304         return 0;
305 }
306
307 int32_t
308 rte_service_stop(struct rte_service_spec *service)
309 {
310         struct rte_service_spec_impl *s =
311                 (struct rte_service_spec_impl *)service;
312         s->runstate = RUNSTATE_STOPPED;
313         rte_smp_wmb();
314         return 0;
315 }
316
317 static int32_t
318 rte_service_runner_func(void *arg)
319 {
320         RTE_SET_USED(arg);
321         uint32_t i;
322         const int lcore = rte_lcore_id();
323         struct core_state *cs = &lcore_states[lcore];
324
325         while (lcore_states[lcore].runstate == RUNSTATE_RUNNING) {
326                 const uint64_t service_mask = cs->service_mask;
327                 for (i = 0; i < rte_service_count; i++) {
328                         struct rte_service_spec_impl *s = &rte_services[i];
329                         if (s->runstate != RUNSTATE_RUNNING ||
330                                         !(service_mask & (1 << i)))
331                                 continue;
332
333                         /* check do we need cmpset, if MT safe or <= 1 core
334                          * mapped, atomic ops are not required.
335                          */
336                         const int need_cmpset = !((service_mt_safe(s) == 0) &&
337                                                 (s->num_mapped_cores > 1));
338                         uint32_t *lock = (uint32_t *)&s->execute_lock;
339
340                         if (need_cmpset || rte_atomic32_cmpset(lock, 0, 1)) {
341                                 void *userdata = s->spec.callback_userdata;
342
343                                 if (service_stats_enabled(s)) {
344                                         uint64_t start = rte_rdtsc();
345                                         s->spec.callback(userdata);
346                                         uint64_t end = rte_rdtsc();
347                                         s->cycles_spent += end - start;
348                                         cs->calls_per_service[i]++;
349                                         s->calls++;
350                                 } else
351                                         s->spec.callback(userdata);
352
353                                 if (need_cmpset)
354                                         rte_atomic32_clear(&s->execute_lock);
355                         }
356                 }
357
358                 rte_smp_rmb();
359         }
360
361         lcore_config[lcore].state = WAIT;
362
363         return 0;
364 }
365
366 int32_t
367 rte_service_lcore_count(void)
368 {
369         int32_t count = 0;
370         uint32_t i;
371         for (i = 0; i < RTE_MAX_LCORE; i++)
372                 count += lcore_states[i].is_service_core;
373         return count;
374 }
375
376 int32_t
377 rte_service_lcore_list(uint32_t array[], uint32_t n)
378 {
379         uint32_t count = rte_service_lcore_count();
380         if (count > n)
381                 return -ENOMEM;
382
383         if (!array)
384                 return -EINVAL;
385
386         uint32_t i;
387         uint32_t idx = 0;
388         for (i = 0; i < RTE_MAX_LCORE; i++) {
389                 struct core_state *cs = &lcore_states[i];
390                 if (cs->is_service_core) {
391                         array[idx] = i;
392                         idx++;
393                 }
394         }
395
396         return count;
397 }
398
399 int32_t
400 rte_service_start_with_defaults(void)
401 {
402         /* create a default mapping from cores to services, then start the
403          * services to make them transparent to unaware applications.
404          */
405         uint32_t i;
406         int ret;
407         uint32_t count = rte_service_get_count();
408
409         int32_t lcore_iter = 0;
410         uint32_t ids[RTE_MAX_LCORE];
411         int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
412
413         if (lcore_count == 0)
414                 return -ENOTSUP;
415
416         for (i = 0; (int)i < lcore_count; i++)
417                 rte_service_lcore_start(ids[i]);
418
419         for (i = 0; i < count; i++) {
420                 struct rte_service_spec *s = rte_service_get_by_id(i);
421                 if (!s)
422                         return -EINVAL;
423
424                 /* do 1:1 core mapping here, with each service getting
425                  * assigned a single core by default. Adding multiple services
426                  * should multiplex to a single core, or 1:1 if there are the
427                  * same amount of services as service-cores
428                  */
429                 ret = rte_service_enable_on_lcore(s, ids[lcore_iter]);
430                 if (ret)
431                         return -ENODEV;
432
433                 lcore_iter++;
434                 if (lcore_iter >= lcore_count)
435                         lcore_iter = 0;
436
437                 ret = rte_service_start(s);
438                 if (ret)
439                         return -ENOEXEC;
440         }
441
442         return 0;
443 }
444
445 static int32_t
446 service_update(struct rte_service_spec *service, uint32_t lcore,
447                 uint32_t *set, uint32_t *enabled)
448 {
449         uint32_t i;
450         int32_t sid = -1;
451
452         for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
453                 if ((struct rte_service_spec *)&rte_services[i] == service &&
454                                 service_valid(i)) {
455                         sid = i;
456                         break;
457                 }
458         }
459
460         if (sid == -1 || lcore >= RTE_MAX_LCORE)
461                 return -EINVAL;
462
463         if (!lcore_states[lcore].is_service_core)
464                 return -EINVAL;
465
466         if (set) {
467                 if (*set) {
468                         lcore_states[lcore].service_mask |=  (1 << sid);
469                         rte_services[sid].num_mapped_cores++;
470                 } else {
471                         lcore_states[lcore].service_mask &= ~(1 << sid);
472                         rte_services[sid].num_mapped_cores--;
473                 }
474         }
475
476         if (enabled)
477                 *enabled = (lcore_states[lcore].service_mask & (1 << sid));
478
479         rte_smp_wmb();
480
481         return 0;
482 }
483
484 int32_t rte_service_get_enabled_on_lcore(struct rte_service_spec *service,
485                                         uint32_t lcore)
486 {
487         uint32_t enabled;
488         int ret = service_update(service, lcore, 0, &enabled);
489         if (ret == 0)
490                 return enabled;
491         return -EINVAL;
492 }
493
494 int32_t
495 rte_service_enable_on_lcore(struct rte_service_spec *service, uint32_t lcore)
496 {
497         uint32_t on = 1;
498         return service_update(service, lcore, &on, 0);
499 }
500
501 int32_t
502 rte_service_disable_on_lcore(struct rte_service_spec *service, uint32_t lcore)
503 {
504         uint32_t off = 0;
505         return service_update(service, lcore, &off, 0);
506 }
507
508 int32_t rte_service_lcore_reset_all(void)
509 {
510         /* loop over cores, reset all to mask 0 */
511         uint32_t i;
512         for (i = 0; i < RTE_MAX_LCORE; i++) {
513                 lcore_states[i].service_mask = 0;
514                 lcore_states[i].is_service_core = 0;
515                 lcore_states[i].runstate = RUNSTATE_STOPPED;
516         }
517         for (i = 0; i < RTE_SERVICE_NUM_MAX; i++)
518                 rte_services[i].num_mapped_cores = 0;
519
520         rte_smp_wmb();
521
522         return 0;
523 }
524
525 static void
526 set_lcore_state(uint32_t lcore, int32_t state)
527 {
528         /* mark core state in hugepage backed config */
529         struct rte_config *cfg = rte_eal_get_configuration();
530         cfg->lcore_role[lcore] = state;
531
532         /* mark state in process local lcore_config */
533         lcore_config[lcore].core_role = state;
534
535         /* update per-lcore optimized state tracking */
536         lcore_states[lcore].is_service_core = (state == ROLE_SERVICE);
537 }
538
539 int32_t
540 rte_service_lcore_add(uint32_t lcore)
541 {
542         if (lcore >= RTE_MAX_LCORE)
543                 return -EINVAL;
544         if (lcore_states[lcore].is_service_core)
545                 return -EALREADY;
546
547         set_lcore_state(lcore, ROLE_SERVICE);
548
549         /* ensure that after adding a core the mask and state are defaults */
550         lcore_states[lcore].service_mask = 0;
551         lcore_states[lcore].runstate = RUNSTATE_STOPPED;
552
553         rte_smp_wmb();
554         return 0;
555 }
556
557 int32_t
558 rte_service_lcore_del(uint32_t lcore)
559 {
560         if (lcore >= RTE_MAX_LCORE)
561                 return -EINVAL;
562
563         struct core_state *cs = &lcore_states[lcore];
564         if (!cs->is_service_core)
565                 return -EINVAL;
566
567         if (cs->runstate != RUNSTATE_STOPPED)
568                 return -EBUSY;
569
570         set_lcore_state(lcore, ROLE_RTE);
571
572         rte_smp_wmb();
573         return 0;
574 }
575
576 int32_t
577 rte_service_lcore_start(uint32_t lcore)
578 {
579         if (lcore >= RTE_MAX_LCORE)
580                 return -EINVAL;
581
582         struct core_state *cs = &lcore_states[lcore];
583         if (!cs->is_service_core)
584                 return -EINVAL;
585
586         if (cs->runstate == RUNSTATE_RUNNING)
587                 return -EALREADY;
588
589         /* set core to run state first, and then launch otherwise it will
590          * return immediately as runstate keeps it in the service poll loop
591          */
592         lcore_states[lcore].runstate = RUNSTATE_RUNNING;
593
594         int ret = rte_eal_remote_launch(rte_service_runner_func, 0, lcore);
595         /* returns -EBUSY if the core is already launched, 0 on success */
596         return ret;
597 }
598
599 int32_t
600 rte_service_lcore_stop(uint32_t lcore)
601 {
602         if (lcore >= RTE_MAX_LCORE)
603                 return -EINVAL;
604
605         if (lcore_states[lcore].runstate == RUNSTATE_STOPPED)
606                 return -EALREADY;
607
608         uint32_t i;
609         for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
610                 int32_t enabled = lcore_states[i].service_mask & (1 << i);
611                 int32_t service_running = rte_services[i].runstate !=
612                                                 RUNSTATE_STOPPED;
613                 int32_t only_core = rte_services[i].num_mapped_cores == 1;
614
615                 /* if the core is mapped, and the service is running, and this
616                  * is the only core that is mapped, the service would cease to
617                  * run if this core stopped, so fail instead.
618                  */
619                 if (enabled && service_running && only_core)
620                         return -EBUSY;
621         }
622
623         lcore_states[lcore].runstate = RUNSTATE_STOPPED;
624
625         return 0;
626 }
627
628 static void
629 rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s,
630                      uint64_t all_cycles, uint32_t reset)
631 {
632         /* avoid divide by zero */
633         if (all_cycles == 0)
634                 all_cycles = 1;
635
636         int calls = 1;
637         if (s->calls != 0)
638                 calls = s->calls;
639
640         fprintf(f, "  %s: stats %d\tcalls %"PRIu64"\tcycles %"
641                         PRIu64"\tavg: %"PRIu64"\n",
642                         s->spec.name, service_stats_enabled(s), s->calls,
643                         s->cycles_spent, s->cycles_spent / calls);
644
645         if (reset) {
646                 s->cycles_spent = 0;
647                 s->calls = 0;
648         }
649 }
650
651 static void
652 service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset)
653 {
654         uint32_t i;
655         struct core_state *cs = &lcore_states[lcore];
656
657         fprintf(f, "%02d\t", lcore);
658         for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
659                 if (!service_valid(i))
660                         continue;
661                 fprintf(f, "%"PRIu64"\t", cs->calls_per_service[i]);
662                 if (reset)
663                         cs->calls_per_service[i] = 0;
664         }
665         fprintf(f, "\n");
666 }
667
668 int32_t rte_service_dump(FILE *f, struct rte_service_spec *service)
669 {
670         uint32_t i;
671
672         uint64_t total_cycles = 0;
673         for (i = 0; i < rte_service_count; i++) {
674                 if (!service_valid(i))
675                         continue;
676                 total_cycles += rte_services[i].cycles_spent;
677         }
678
679         if (service) {
680                 struct rte_service_spec_impl *s =
681                         (struct rte_service_spec_impl *)service;
682                 fprintf(f, "Service %s Summary\n", s->spec.name);
683                 uint32_t reset = 0;
684                 rte_service_dump_one(f, s, total_cycles, reset);
685                 return 0;
686         }
687
688         fprintf(f, "Services Summary\n");
689         for (i = 0; i < rte_service_count; i++) {
690                 uint32_t reset = 1;
691                 rte_service_dump_one(f, &rte_services[i], total_cycles, reset);
692         }
693
694         fprintf(f, "Service Cores Summary\n");
695         for (i = 0; i < RTE_MAX_LCORE; i++) {
696                 if (lcore_config[i].core_role != ROLE_SERVICE)
697                         continue;
698
699                 uint32_t reset = 0;
700                 service_dump_calls_per_lcore(f, i, reset);
701         }
702
703         return 0;
704 }