regex/mlx5: remove RXP CSR file
[dpdk.git] / examples / performance-thread / common / lthread_sched.c
1 /*
2  * SPDX-License-Identifier: BSD-3-Clause
3  * Copyright 2015 Intel Corporation.
4  * Copyright 2012 Hasan Alayli <halayli@gmail.com>
5  */
6
7 #define RTE_MEM 1
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdint.h>
13 #include <stddef.h>
14 #include <limits.h>
15 #include <inttypes.h>
16 #include <unistd.h>
17 #include <pthread.h>
18 #include <fcntl.h>
19 #include <sys/time.h>
20 #include <sys/mman.h>
21 #include <sched.h>
22
23 #include <rte_prefetch.h>
24 #include <rte_per_lcore.h>
25 #include <rte_log.h>
26 #include <rte_common.h>
27 #include <rte_branch_prediction.h>
28
29 #include "lthread_api.h"
30 #include "lthread_int.h"
31 #include "lthread_sched.h"
32 #include "lthread_objcache.h"
33 #include "lthread_timer.h"
34 #include "lthread_mutex.h"
35 #include "lthread_cond.h"
36 #include "lthread_tls.h"
37 #include "lthread_diag.h"
38
39 /*
40  * This file implements the lthread scheduler
41  * The scheduler is the function lthread_run()
42  * This must be run as the main loop of an EAL thread.
43  *
44  * Currently once a scheduler is created it cannot be destroyed
45  * When a scheduler shuts down it is assumed that the application is terminating
46  */
47
48 static uint16_t num_schedulers;
49 static uint16_t active_schedulers;
50
51 /* one scheduler per lcore */
52 RTE_DEFINE_PER_LCORE(struct lthread_sched *, this_sched) = NULL;
53
54 struct lthread_sched *schedcore[LTHREAD_MAX_LCORES];
55
56 diag_callback diag_cb;
57
58 uint64_t diag_mask;
59
60
61 /* constructor */
62 RTE_INIT(lthread_sched_ctor)
63 {
64         memset(schedcore, 0, sizeof(schedcore));
65         __atomic_store_n(&num_schedulers, 1, __ATOMIC_RELAXED);
66         __atomic_store_n(&active_schedulers, 0, __ATOMIC_RELAXED);
67         diag_cb = NULL;
68 }
69
70
71 enum sched_alloc_phase {
72         SCHED_ALLOC_OK,
73         SCHED_ALLOC_QNODE_POOL,
74         SCHED_ALLOC_READY_QUEUE,
75         SCHED_ALLOC_PREADY_QUEUE,
76         SCHED_ALLOC_LTHREAD_CACHE,
77         SCHED_ALLOC_STACK_CACHE,
78         SCHED_ALLOC_PERLT_CACHE,
79         SCHED_ALLOC_TLS_CACHE,
80         SCHED_ALLOC_COND_CACHE,
81         SCHED_ALLOC_MUTEX_CACHE,
82 };
83
84 static int
85 _lthread_sched_alloc_resources(struct lthread_sched *new_sched)
86 {
87         int alloc_status;
88
89         do {
90                 /* Initialize per scheduler queue node pool */
91                 alloc_status = SCHED_ALLOC_QNODE_POOL;
92                 new_sched->qnode_pool =
93                         _qnode_pool_create("qnode pool", LTHREAD_PREALLOC);
94                 if (new_sched->qnode_pool == NULL)
95                         break;
96
97                 /* Initialize per scheduler local ready queue */
98                 alloc_status = SCHED_ALLOC_READY_QUEUE;
99                 new_sched->ready = _lthread_queue_create("ready queue");
100                 if (new_sched->ready == NULL)
101                         break;
102
103                 /* Initialize per scheduler local peer ready queue */
104                 alloc_status = SCHED_ALLOC_PREADY_QUEUE;
105                 new_sched->pready = _lthread_queue_create("pready queue");
106                 if (new_sched->pready == NULL)
107                         break;
108
109                 /* Initialize per scheduler local free lthread cache */
110                 alloc_status = SCHED_ALLOC_LTHREAD_CACHE;
111                 new_sched->lthread_cache =
112                         _lthread_objcache_create("lthread cache",
113                                                 sizeof(struct lthread),
114                                                 LTHREAD_PREALLOC);
115                 if (new_sched->lthread_cache == NULL)
116                         break;
117
118                 /* Initialize per scheduler local free stack cache */
119                 alloc_status = SCHED_ALLOC_STACK_CACHE;
120                 new_sched->stack_cache =
121                         _lthread_objcache_create("stack_cache",
122                                                 sizeof(struct lthread_stack),
123                                                 LTHREAD_PREALLOC);
124                 if (new_sched->stack_cache == NULL)
125                         break;
126
127                 /* Initialize per scheduler local free per lthread data cache */
128                 alloc_status = SCHED_ALLOC_PERLT_CACHE;
129                 new_sched->per_lthread_cache =
130                         _lthread_objcache_create("per_lt cache",
131                                                 RTE_PER_LTHREAD_SECTION_SIZE,
132                                                 LTHREAD_PREALLOC);
133                 if (new_sched->per_lthread_cache == NULL)
134                         break;
135
136                 /* Initialize per scheduler local free tls cache */
137                 alloc_status = SCHED_ALLOC_TLS_CACHE;
138                 new_sched->tls_cache =
139                         _lthread_objcache_create("TLS cache",
140                                                 sizeof(struct lthread_tls),
141                                                 LTHREAD_PREALLOC);
142                 if (new_sched->tls_cache == NULL)
143                         break;
144
145                 /* Initialize per scheduler local free cond var cache */
146                 alloc_status = SCHED_ALLOC_COND_CACHE;
147                 new_sched->cond_cache =
148                         _lthread_objcache_create("cond cache",
149                                                 sizeof(struct lthread_cond),
150                                                 LTHREAD_PREALLOC);
151                 if (new_sched->cond_cache == NULL)
152                         break;
153
154                 /* Initialize per scheduler local free mutex cache */
155                 alloc_status = SCHED_ALLOC_MUTEX_CACHE;
156                 new_sched->mutex_cache =
157                         _lthread_objcache_create("mutex cache",
158                                                 sizeof(struct lthread_mutex),
159                                                 LTHREAD_PREALLOC);
160                 if (new_sched->mutex_cache == NULL)
161                         break;
162
163                 alloc_status = SCHED_ALLOC_OK;
164         } while (0);
165
166         /* roll back on any failure */
167         switch (alloc_status) {
168         case SCHED_ALLOC_MUTEX_CACHE:
169                 _lthread_objcache_destroy(new_sched->cond_cache);
170                 /* fall through */
171         case SCHED_ALLOC_COND_CACHE:
172                 _lthread_objcache_destroy(new_sched->tls_cache);
173                 /* fall through */
174         case SCHED_ALLOC_TLS_CACHE:
175                 _lthread_objcache_destroy(new_sched->per_lthread_cache);
176                 /* fall through */
177         case SCHED_ALLOC_PERLT_CACHE:
178                 _lthread_objcache_destroy(new_sched->stack_cache);
179                 /* fall through */
180         case SCHED_ALLOC_STACK_CACHE:
181                 _lthread_objcache_destroy(new_sched->lthread_cache);
182                 /* fall through */
183         case SCHED_ALLOC_LTHREAD_CACHE:
184                 _lthread_queue_destroy(new_sched->pready);
185                 /* fall through */
186         case SCHED_ALLOC_PREADY_QUEUE:
187                 _lthread_queue_destroy(new_sched->ready);
188                 /* fall through */
189         case SCHED_ALLOC_READY_QUEUE:
190                 _qnode_pool_destroy(new_sched->qnode_pool);
191                 /* fall through */
192         case SCHED_ALLOC_QNODE_POOL:
193                 /* fall through */
194         case SCHED_ALLOC_OK:
195                 break;
196         }
197         return alloc_status;
198 }
199
200
201 /*
202  * Create a scheduler on the current lcore
203  */
204 struct lthread_sched *_lthread_sched_create(size_t stack_size)
205 {
206         int status;
207         struct lthread_sched *new_sched;
208         unsigned lcoreid = rte_lcore_id();
209
210         RTE_ASSERT(stack_size <= LTHREAD_MAX_STACK_SIZE);
211
212         if (stack_size == 0)
213                 stack_size = LTHREAD_MAX_STACK_SIZE;
214
215         new_sched =
216              rte_calloc_socket(NULL, 1, sizeof(struct lthread_sched),
217                                 RTE_CACHE_LINE_SIZE,
218                                 rte_socket_id());
219         if (new_sched == NULL) {
220                 RTE_LOG(CRIT, LTHREAD,
221                         "Failed to allocate memory for scheduler\n");
222                 return NULL;
223         }
224
225         _lthread_key_pool_init();
226
227         new_sched->stack_size = stack_size;
228         new_sched->birth = rte_rdtsc();
229         THIS_SCHED = new_sched;
230
231         status = _lthread_sched_alloc_resources(new_sched);
232         if (status != SCHED_ALLOC_OK) {
233                 RTE_LOG(CRIT, LTHREAD,
234                         "Failed to allocate resources for scheduler code = %d\n",
235                         status);
236                 rte_free(new_sched);
237                 return NULL;
238         }
239
240         bzero(&new_sched->ctx, sizeof(struct ctx));
241
242         new_sched->lcore_id = lcoreid;
243
244         schedcore[lcoreid] = new_sched;
245
246         new_sched->run_flag = 1;
247
248         DIAG_EVENT(new_sched, LT_DIAG_SCHED_CREATE, rte_lcore_id(), 0);
249
250         rte_wmb();
251         return new_sched;
252 }
253
254 /*
255  * Set the number of schedulers in the system
256  */
257 int lthread_num_schedulers_set(int num)
258 {
259         __atomic_store_n(&num_schedulers, num, __ATOMIC_RELAXED);
260         return (int)__atomic_load_n(&num_schedulers, __ATOMIC_RELAXED);
261 }
262
263 /*
264  * Return the number of schedulers active
265  */
266 int lthread_active_schedulers(void)
267 {
268         return (int)__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED);
269 }
270
271
272 /**
273  * shutdown the scheduler running on the specified lcore
274  */
275 void lthread_scheduler_shutdown(unsigned lcoreid)
276 {
277         uint64_t coreid = (uint64_t) lcoreid;
278
279         if (coreid < LTHREAD_MAX_LCORES) {
280                 if (schedcore[coreid] != NULL)
281                         schedcore[coreid]->run_flag = 0;
282         }
283 }
284
285 /**
286  * shutdown all schedulers
287  */
288 void lthread_scheduler_shutdown_all(void)
289 {
290         uint64_t i;
291
292         /*
293          * give time for all schedulers to have started
294          * Note we use sched_yield() rather than pthread_yield() to allow
295          * for the possibility of a pthread wrapper on lthread_yield(),
296          * something that is not possible unless the scheduler is running.
297          */
298         while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) <
299                __atomic_load_n(&num_schedulers, __ATOMIC_RELAXED))
300                 sched_yield();
301
302         for (i = 0; i < LTHREAD_MAX_LCORES; i++) {
303                 if (schedcore[i] != NULL)
304                         schedcore[i]->run_flag = 0;
305         }
306 }
307
308 /*
309  * Resume a suspended lthread
310  */
311 static __rte_always_inline void
312 _lthread_resume(struct lthread *lt);
313 static inline void _lthread_resume(struct lthread *lt)
314 {
315         struct lthread_sched *sched = THIS_SCHED;
316         struct lthread_stack *s;
317         uint64_t state = lt->state;
318 #if LTHREAD_DIAG
319         int init = 0;
320 #endif
321
322         sched->current_lthread = lt;
323
324         if (state & (BIT(ST_LT_CANCELLED) | BIT(ST_LT_EXITED))) {
325                 /* if detached we can free the thread now */
326                 if (state & BIT(ST_LT_DETACH)) {
327                         _lthread_free(lt);
328                         sched->current_lthread = NULL;
329                         return;
330                 }
331         }
332
333         if (state & BIT(ST_LT_INIT)) {
334                 /* first time this thread has been run */
335                 /* assign thread to this scheduler */
336                 lt->sched = THIS_SCHED;
337
338                 /* allocate stack */
339                 s = _stack_alloc();
340
341                 lt->stack_container = s;
342                 _lthread_set_stack(lt, s->stack, s->stack_size);
343
344                 /* allocate memory for TLS used by this thread */
345                 _lthread_tls_alloc(lt);
346
347                 lt->state = BIT(ST_LT_READY);
348 #if LTHREAD_DIAG
349                 init = 1;
350 #endif
351         }
352
353         DIAG_EVENT(lt, LT_DIAG_LTHREAD_RESUMED, init, lt);
354
355         /* switch to the new thread */
356         ctx_switch(&lt->ctx, &sched->ctx);
357
358         /* If posting to a queue that could be read by another lcore
359          * we defer the queue write till now to ensure the context has been
360          * saved before the other core tries to resume it
361          * This applies to blocking on mutex, cond, and to set_affinity
362          */
363         if (lt->pending_wr_queue != NULL) {
364                 struct lthread_queue *dest = lt->pending_wr_queue;
365
366                 lt->pending_wr_queue = NULL;
367
368                 /* queue the current thread to the specified queue */
369                 _lthread_queue_insert_mp(dest, lt);
370         }
371
372         sched->current_lthread = NULL;
373 }
374
375 /*
376  * Handle sleep timer expiry
377 */
378 void
379 _sched_timer_cb(struct rte_timer *tim, void *arg)
380 {
381         struct lthread *lt = (struct lthread *) arg;
382         uint64_t state = lt->state;
383
384         DIAG_EVENT(lt, LT_DIAG_LTHREAD_TMR_EXPIRED, &lt->tim, 0);
385
386         rte_timer_stop(tim);
387
388         if (lt->state & BIT(ST_LT_CANCELLED))
389                 (THIS_SCHED)->nb_blocked_threads--;
390
391         lt->state = state | BIT(ST_LT_EXPIRED);
392         _lthread_resume(lt);
393         lt->state = state & CLEARBIT(ST_LT_EXPIRED);
394 }
395
396
397
398 /*
399  * Returns 0 if there is a pending job in scheduler or 1 if done and can exit.
400  */
401 static inline int _lthread_sched_isdone(struct lthread_sched *sched)
402 {
403         return (sched->run_flag == 0) &&
404                         (_lthread_queue_empty(sched->ready)) &&
405                         (_lthread_queue_empty(sched->pready)) &&
406                         (sched->nb_blocked_threads == 0);
407 }
408
409 /*
410  * Wait for all schedulers to start
411  */
412 static inline void _lthread_schedulers_sync_start(void)
413 {
414         __atomic_fetch_add(&active_schedulers, 1, __ATOMIC_RELAXED);
415
416         /* wait for lthread schedulers
417          * Note we use sched_yield() rather than pthread_yield() to allow
418          * for the possibility of a pthread wrapper on lthread_yield(),
419          * something that is not possible unless the scheduler is running.
420          */
421         while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) <
422                __atomic_load_n(&num_schedulers, __ATOMIC_RELAXED))
423                 sched_yield();
424
425 }
426
427 /*
428  * Wait for all schedulers to stop
429  */
430 static inline void _lthread_schedulers_sync_stop(void)
431 {
432         __atomic_fetch_sub(&active_schedulers, 1, __ATOMIC_RELAXED);
433         __atomic_fetch_sub(&num_schedulers, 1, __ATOMIC_RELAXED);
434
435         /* wait for schedulers
436          * Note we use sched_yield() rather than pthread_yield() to allow
437          * for the possibility of a pthread wrapper on lthread_yield(),
438          * something that is not possible unless the scheduler is running.
439          */
440         while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) > 0)
441                 sched_yield();
442
443 }
444
445
446 /*
447  * Run the lthread scheduler
448  * This loop is the heart of the system
449  */
450 void lthread_run(void)
451 {
452
453         struct lthread_sched *sched = THIS_SCHED;
454         struct lthread *lt = NULL;
455
456         RTE_LOG(INFO, LTHREAD,
457                 "starting scheduler %p on lcore %u phys core %u\n",
458                 sched, rte_lcore_id(),
459                 rte_lcore_index(rte_lcore_id()));
460
461         /* if more than one, wait for all schedulers to start */
462         _lthread_schedulers_sync_start();
463
464
465         /*
466          * This is the main scheduling loop
467          * So long as there are tasks in existence we run this loop.
468          * We check for:-
469          *   expired timers,
470          *   the local ready queue,
471          *   and the peer ready queue,
472          *
473          * and resume lthreads ad infinitum.
474          */
475         while (!_lthread_sched_isdone(sched)) {
476
477                 rte_timer_manage();
478
479                 lt = _lthread_queue_poll(sched->ready);
480                 if (lt != NULL)
481                         _lthread_resume(lt);
482                 lt = _lthread_queue_poll(sched->pready);
483                 if (lt != NULL)
484                         _lthread_resume(lt);
485         }
486
487
488         /* if more than one wait for all schedulers to stop */
489         _lthread_schedulers_sync_stop();
490
491         (THIS_SCHED) = NULL;
492
493         RTE_LOG(INFO, LTHREAD,
494                 "stopping scheduler %p on lcore %u phys core %u\n",
495                 sched, rte_lcore_id(),
496                 rte_lcore_index(rte_lcore_id()));
497         fflush(stdout);
498 }
499
500 /*
501  * Return the scheduler for this lcore
502  *
503  */
504 struct lthread_sched *_lthread_sched_get(unsigned int lcore_id)
505 {
506         struct lthread_sched *res = NULL;
507
508         if (lcore_id < LTHREAD_MAX_LCORES)
509                 res = schedcore[lcore_id];
510
511         return res;
512 }
513
514 /*
515  * migrate the current thread to another scheduler running
516  * on the specified lcore.
517  */
518 int lthread_set_affinity(unsigned lcoreid)
519 {
520         struct lthread *lt = THIS_LTHREAD;
521         struct lthread_sched *dest_sched;
522
523         if (unlikely(lcoreid >= LTHREAD_MAX_LCORES))
524                 return POSIX_ERRNO(EINVAL);
525
526         DIAG_EVENT(lt, LT_DIAG_LTHREAD_AFFINITY, lcoreid, 0);
527
528         dest_sched = schedcore[lcoreid];
529
530         if (unlikely(dest_sched == NULL))
531                 return POSIX_ERRNO(EINVAL);
532
533         if (likely(dest_sched != THIS_SCHED)) {
534                 lt->sched = dest_sched;
535                 lt->pending_wr_queue = dest_sched->pready;
536                 _affinitize();
537                 return 0;
538         }
539         return 0;
540 }