2 * SPDX-License-Identifier: BSD-3-Clause
3 * Copyright 2015 Intel Corporation.
4 * Copyright 2012 Hasan Alayli <halayli@gmail.com>
23 #include <rte_prefetch.h>
24 #include <rte_per_lcore.h>
26 #include <rte_common.h>
27 #include <rte_branch_prediction.h>
29 #include "lthread_api.h"
30 #include "lthread_int.h"
31 #include "lthread_sched.h"
32 #include "lthread_objcache.h"
33 #include "lthread_timer.h"
34 #include "lthread_mutex.h"
35 #include "lthread_cond.h"
36 #include "lthread_tls.h"
37 #include "lthread_diag.h"
40 * This file implements the lthread scheduler
41 * The scheduler is the function lthread_run()
42 * This must be run as the main loop of an EAL thread.
44 * Currently once a scheduler is created it cannot be destroyed
45 * When a scheduler shuts down it is assumed that the application is terminating
48 static uint16_t num_schedulers;
49 static uint16_t active_schedulers;
51 /* one scheduler per lcore */
52 RTE_DEFINE_PER_LCORE(struct lthread_sched *, this_sched) = NULL;
54 struct lthread_sched *schedcore[LTHREAD_MAX_LCORES];
56 diag_callback diag_cb;
62 RTE_INIT(lthread_sched_ctor)
64 memset(schedcore, 0, sizeof(schedcore));
65 __atomic_store_n(&num_schedulers, 1, __ATOMIC_RELAXED);
66 __atomic_store_n(&active_schedulers, 0, __ATOMIC_RELAXED);
71 enum sched_alloc_phase {
73 SCHED_ALLOC_QNODE_POOL,
74 SCHED_ALLOC_READY_QUEUE,
75 SCHED_ALLOC_PREADY_QUEUE,
76 SCHED_ALLOC_LTHREAD_CACHE,
77 SCHED_ALLOC_STACK_CACHE,
78 SCHED_ALLOC_PERLT_CACHE,
79 SCHED_ALLOC_TLS_CACHE,
80 SCHED_ALLOC_COND_CACHE,
81 SCHED_ALLOC_MUTEX_CACHE,
85 _lthread_sched_alloc_resources(struct lthread_sched *new_sched)
90 /* Initialize per scheduler queue node pool */
91 alloc_status = SCHED_ALLOC_QNODE_POOL;
92 new_sched->qnode_pool =
93 _qnode_pool_create("qnode pool", LTHREAD_PREALLOC);
94 if (new_sched->qnode_pool == NULL)
97 /* Initialize per scheduler local ready queue */
98 alloc_status = SCHED_ALLOC_READY_QUEUE;
99 new_sched->ready = _lthread_queue_create("ready queue");
100 if (new_sched->ready == NULL)
103 /* Initialize per scheduler local peer ready queue */
104 alloc_status = SCHED_ALLOC_PREADY_QUEUE;
105 new_sched->pready = _lthread_queue_create("pready queue");
106 if (new_sched->pready == NULL)
109 /* Initialize per scheduler local free lthread cache */
110 alloc_status = SCHED_ALLOC_LTHREAD_CACHE;
111 new_sched->lthread_cache =
112 _lthread_objcache_create("lthread cache",
113 sizeof(struct lthread),
115 if (new_sched->lthread_cache == NULL)
118 /* Initialize per scheduler local free stack cache */
119 alloc_status = SCHED_ALLOC_STACK_CACHE;
120 new_sched->stack_cache =
121 _lthread_objcache_create("stack_cache",
122 sizeof(struct lthread_stack),
124 if (new_sched->stack_cache == NULL)
127 /* Initialize per scheduler local free per lthread data cache */
128 alloc_status = SCHED_ALLOC_PERLT_CACHE;
129 new_sched->per_lthread_cache =
130 _lthread_objcache_create("per_lt cache",
131 RTE_PER_LTHREAD_SECTION_SIZE,
133 if (new_sched->per_lthread_cache == NULL)
136 /* Initialize per scheduler local free tls cache */
137 alloc_status = SCHED_ALLOC_TLS_CACHE;
138 new_sched->tls_cache =
139 _lthread_objcache_create("TLS cache",
140 sizeof(struct lthread_tls),
142 if (new_sched->tls_cache == NULL)
145 /* Initialize per scheduler local free cond var cache */
146 alloc_status = SCHED_ALLOC_COND_CACHE;
147 new_sched->cond_cache =
148 _lthread_objcache_create("cond cache",
149 sizeof(struct lthread_cond),
151 if (new_sched->cond_cache == NULL)
154 /* Initialize per scheduler local free mutex cache */
155 alloc_status = SCHED_ALLOC_MUTEX_CACHE;
156 new_sched->mutex_cache =
157 _lthread_objcache_create("mutex cache",
158 sizeof(struct lthread_mutex),
160 if (new_sched->mutex_cache == NULL)
163 alloc_status = SCHED_ALLOC_OK;
166 /* roll back on any failure */
167 switch (alloc_status) {
168 case SCHED_ALLOC_MUTEX_CACHE:
169 _lthread_objcache_destroy(new_sched->cond_cache);
171 case SCHED_ALLOC_COND_CACHE:
172 _lthread_objcache_destroy(new_sched->tls_cache);
174 case SCHED_ALLOC_TLS_CACHE:
175 _lthread_objcache_destroy(new_sched->per_lthread_cache);
177 case SCHED_ALLOC_PERLT_CACHE:
178 _lthread_objcache_destroy(new_sched->stack_cache);
180 case SCHED_ALLOC_STACK_CACHE:
181 _lthread_objcache_destroy(new_sched->lthread_cache);
183 case SCHED_ALLOC_LTHREAD_CACHE:
184 _lthread_queue_destroy(new_sched->pready);
186 case SCHED_ALLOC_PREADY_QUEUE:
187 _lthread_queue_destroy(new_sched->ready);
189 case SCHED_ALLOC_READY_QUEUE:
190 _qnode_pool_destroy(new_sched->qnode_pool);
192 case SCHED_ALLOC_QNODE_POOL:
202 * Create a scheduler on the current lcore
204 struct lthread_sched *_lthread_sched_create(size_t stack_size)
207 struct lthread_sched *new_sched;
208 unsigned lcoreid = rte_lcore_id();
210 RTE_ASSERT(stack_size <= LTHREAD_MAX_STACK_SIZE);
213 stack_size = LTHREAD_MAX_STACK_SIZE;
216 rte_calloc_socket(NULL, 1, sizeof(struct lthread_sched),
219 if (new_sched == NULL) {
220 RTE_LOG(CRIT, LTHREAD,
221 "Failed to allocate memory for scheduler\n");
225 _lthread_key_pool_init();
227 new_sched->stack_size = stack_size;
228 new_sched->birth = rte_rdtsc();
229 THIS_SCHED = new_sched;
231 status = _lthread_sched_alloc_resources(new_sched);
232 if (status != SCHED_ALLOC_OK) {
233 RTE_LOG(CRIT, LTHREAD,
234 "Failed to allocate resources for scheduler code = %d\n",
240 bzero(&new_sched->ctx, sizeof(struct ctx));
242 new_sched->lcore_id = lcoreid;
244 schedcore[lcoreid] = new_sched;
246 new_sched->run_flag = 1;
248 DIAG_EVENT(new_sched, LT_DIAG_SCHED_CREATE, rte_lcore_id(), 0);
255 * Set the number of schedulers in the system
257 int lthread_num_schedulers_set(int num)
259 __atomic_store_n(&num_schedulers, num, __ATOMIC_RELAXED);
260 return (int)__atomic_load_n(&num_schedulers, __ATOMIC_RELAXED);
264 * Return the number of schedulers active
266 int lthread_active_schedulers(void)
268 return (int)__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED);
273 * shutdown the scheduler running on the specified lcore
275 void lthread_scheduler_shutdown(unsigned lcoreid)
277 uint64_t coreid = (uint64_t) lcoreid;
279 if (coreid < LTHREAD_MAX_LCORES) {
280 if (schedcore[coreid] != NULL)
281 schedcore[coreid]->run_flag = 0;
286 * shutdown all schedulers
288 void lthread_scheduler_shutdown_all(void)
293 * give time for all schedulers to have started
294 * Note we use sched_yield() rather than pthread_yield() to allow
295 * for the possibility of a pthread wrapper on lthread_yield(),
296 * something that is not possible unless the scheduler is running.
298 while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) <
299 __atomic_load_n(&num_schedulers, __ATOMIC_RELAXED))
302 for (i = 0; i < LTHREAD_MAX_LCORES; i++) {
303 if (schedcore[i] != NULL)
304 schedcore[i]->run_flag = 0;
309 * Resume a suspended lthread
311 static __rte_always_inline void
312 _lthread_resume(struct lthread *lt);
313 static inline void _lthread_resume(struct lthread *lt)
315 struct lthread_sched *sched = THIS_SCHED;
316 struct lthread_stack *s;
317 uint64_t state = lt->state;
322 sched->current_lthread = lt;
324 if (state & (BIT(ST_LT_CANCELLED) | BIT(ST_LT_EXITED))) {
325 /* if detached we can free the thread now */
326 if (state & BIT(ST_LT_DETACH)) {
328 sched->current_lthread = NULL;
333 if (state & BIT(ST_LT_INIT)) {
334 /* first time this thread has been run */
335 /* assign thread to this scheduler */
336 lt->sched = THIS_SCHED;
341 lt->stack_container = s;
342 _lthread_set_stack(lt, s->stack, s->stack_size);
344 /* allocate memory for TLS used by this thread */
345 _lthread_tls_alloc(lt);
347 lt->state = BIT(ST_LT_READY);
353 DIAG_EVENT(lt, LT_DIAG_LTHREAD_RESUMED, init, lt);
355 /* switch to the new thread */
356 ctx_switch(<->ctx, &sched->ctx);
358 /* If posting to a queue that could be read by another lcore
359 * we defer the queue write till now to ensure the context has been
360 * saved before the other core tries to resume it
361 * This applies to blocking on mutex, cond, and to set_affinity
363 if (lt->pending_wr_queue != NULL) {
364 struct lthread_queue *dest = lt->pending_wr_queue;
366 lt->pending_wr_queue = NULL;
368 /* queue the current thread to the specified queue */
369 _lthread_queue_insert_mp(dest, lt);
372 sched->current_lthread = NULL;
376 * Handle sleep timer expiry
379 _sched_timer_cb(struct rte_timer *tim, void *arg)
381 struct lthread *lt = (struct lthread *) arg;
382 uint64_t state = lt->state;
384 DIAG_EVENT(lt, LT_DIAG_LTHREAD_TMR_EXPIRED, <->tim, 0);
388 if (lt->state & BIT(ST_LT_CANCELLED))
389 (THIS_SCHED)->nb_blocked_threads--;
391 lt->state = state | BIT(ST_LT_EXPIRED);
393 lt->state = state & CLEARBIT(ST_LT_EXPIRED);
399 * Returns 0 if there is a pending job in scheduler or 1 if done and can exit.
401 static inline int _lthread_sched_isdone(struct lthread_sched *sched)
403 return (sched->run_flag == 0) &&
404 (_lthread_queue_empty(sched->ready)) &&
405 (_lthread_queue_empty(sched->pready)) &&
406 (sched->nb_blocked_threads == 0);
410 * Wait for all schedulers to start
412 static inline void _lthread_schedulers_sync_start(void)
414 __atomic_fetch_add(&active_schedulers, 1, __ATOMIC_RELAXED);
416 /* wait for lthread schedulers
417 * Note we use sched_yield() rather than pthread_yield() to allow
418 * for the possibility of a pthread wrapper on lthread_yield(),
419 * something that is not possible unless the scheduler is running.
421 while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) <
422 __atomic_load_n(&num_schedulers, __ATOMIC_RELAXED))
428 * Wait for all schedulers to stop
430 static inline void _lthread_schedulers_sync_stop(void)
432 __atomic_fetch_sub(&active_schedulers, 1, __ATOMIC_RELAXED);
433 __atomic_fetch_sub(&num_schedulers, 1, __ATOMIC_RELAXED);
435 /* wait for schedulers
436 * Note we use sched_yield() rather than pthread_yield() to allow
437 * for the possibility of a pthread wrapper on lthread_yield(),
438 * something that is not possible unless the scheduler is running.
440 while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) > 0)
447 * Run the lthread scheduler
448 * This loop is the heart of the system
450 void lthread_run(void)
453 struct lthread_sched *sched = THIS_SCHED;
454 struct lthread *lt = NULL;
456 RTE_LOG(INFO, LTHREAD,
457 "starting scheduler %p on lcore %u phys core %u\n",
458 sched, rte_lcore_id(),
459 rte_lcore_index(rte_lcore_id()));
461 /* if more than one, wait for all schedulers to start */
462 _lthread_schedulers_sync_start();
466 * This is the main scheduling loop
467 * So long as there are tasks in existence we run this loop.
470 * the local ready queue,
471 * and the peer ready queue,
473 * and resume lthreads ad infinitum.
475 while (!_lthread_sched_isdone(sched)) {
479 lt = _lthread_queue_poll(sched->ready);
482 lt = _lthread_queue_poll(sched->pready);
488 /* if more than one wait for all schedulers to stop */
489 _lthread_schedulers_sync_stop();
493 RTE_LOG(INFO, LTHREAD,
494 "stopping scheduler %p on lcore %u phys core %u\n",
495 sched, rte_lcore_id(),
496 rte_lcore_index(rte_lcore_id()));
501 * Return the scheduler for this lcore
504 struct lthread_sched *_lthread_sched_get(unsigned int lcore_id)
506 struct lthread_sched *res = NULL;
508 if (lcore_id < LTHREAD_MAX_LCORES)
509 res = schedcore[lcore_id];
515 * migrate the current thread to another scheduler running
516 * on the specified lcore.
518 int lthread_set_affinity(unsigned lcoreid)
520 struct lthread *lt = THIS_LTHREAD;
521 struct lthread_sched *dest_sched;
523 if (unlikely(lcoreid >= LTHREAD_MAX_LCORES))
524 return POSIX_ERRNO(EINVAL);
526 DIAG_EVENT(lt, LT_DIAG_LTHREAD_AFFINITY, lcoreid, 0);
528 dest_sched = schedcore[lcoreid];
530 if (unlikely(dest_sched == NULL))
531 return POSIX_ERRNO(EINVAL);
533 if (likely(dest_sched != THIS_SCHED)) {
534 lt->sched = dest_sched;
535 lt->pending_wr_queue = dest_sched->pready;