4 * Copyright(c) 2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 * Some portions of this software is derived from the
36 * https://github.com/halayli/lthread which carrys the following license.
38 * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com>
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
49 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
79 #include <rte_config.h>
80 #include <rte_prefetch.h>
81 #include <rte_per_lcore.h>
82 #include <rte_atomic.h>
83 #include <rte_atomic_64.h>
85 #include <rte_common.h>
86 #include <rte_branch_prediction.h>
88 #include "lthread_api.h"
89 #include "lthread_int.h"
90 #include "lthread_sched.h"
91 #include "lthread_objcache.h"
92 #include "lthread_timer.h"
93 #include "lthread_mutex.h"
94 #include "lthread_cond.h"
95 #include "lthread_tls.h"
96 #include "lthread_diag.h"
99 * This file implements the lthread scheduler
100 * The scheduler is the function lthread_run()
101 * This must be run as the main loop of an EAL thread.
103 * Currently once a scheduler is created it cannot be destroyed
104 * When a scheduler shuts down it is assumed that the application is terminating
107 static rte_atomic16_t num_schedulers;
108 static rte_atomic16_t active_schedulers;
110 /* one scheduler per lcore */
111 RTE_DEFINE_PER_LCORE(struct lthread_sched *, this_sched) = NULL;
113 struct lthread_sched *schedcore[LTHREAD_MAX_LCORES];
115 diag_callback diag_cb;
121 void lthread_sched_ctor(void) __attribute__ ((constructor));
122 void lthread_sched_ctor(void)
124 memset(schedcore, 0, sizeof(schedcore));
125 rte_atomic16_init(&num_schedulers);
126 rte_atomic16_set(&num_schedulers, 1);
127 rte_atomic16_init(&active_schedulers);
128 rte_atomic16_set(&active_schedulers, 0);
133 enum sched_alloc_phase {
135 SCHED_ALLOC_QNODE_POOL,
136 SCHED_ALLOC_READY_QUEUE,
137 SCHED_ALLOC_PREADY_QUEUE,
138 SCHED_ALLOC_LTHREAD_CACHE,
139 SCHED_ALLOC_STACK_CACHE,
140 SCHED_ALLOC_PERLT_CACHE,
141 SCHED_ALLOC_TLS_CACHE,
142 SCHED_ALLOC_COND_CACHE,
143 SCHED_ALLOC_MUTEX_CACHE,
147 _lthread_sched_alloc_resources(struct lthread_sched *new_sched)
152 /* Initialize per scheduler queue node pool */
153 alloc_status = SCHED_ALLOC_QNODE_POOL;
154 new_sched->qnode_pool =
155 _qnode_pool_create("qnode pool", LTHREAD_PREALLOC);
156 if (new_sched->qnode_pool == NULL)
159 /* Initialize per scheduler local ready queue */
160 alloc_status = SCHED_ALLOC_READY_QUEUE;
161 new_sched->ready = _lthread_queue_create("ready queue");
162 if (new_sched->ready == NULL)
165 /* Initialize per scheduler local peer ready queue */
166 alloc_status = SCHED_ALLOC_PREADY_QUEUE;
167 new_sched->pready = _lthread_queue_create("pready queue");
168 if (new_sched->pready == NULL)
171 /* Initialize per scheduler local free lthread cache */
172 alloc_status = SCHED_ALLOC_LTHREAD_CACHE;
173 new_sched->lthread_cache =
174 _lthread_objcache_create("lthread cache",
175 sizeof(struct lthread),
177 if (new_sched->lthread_cache == NULL)
180 /* Initialize per scheduler local free stack cache */
181 alloc_status = SCHED_ALLOC_STACK_CACHE;
182 new_sched->stack_cache =
183 _lthread_objcache_create("stack_cache",
184 sizeof(struct lthread_stack),
186 if (new_sched->stack_cache == NULL)
189 /* Initialize per scheduler local free per lthread data cache */
190 alloc_status = SCHED_ALLOC_PERLT_CACHE;
191 new_sched->per_lthread_cache =
192 _lthread_objcache_create("per_lt cache",
193 RTE_PER_LTHREAD_SECTION_SIZE,
195 if (new_sched->per_lthread_cache == NULL)
198 /* Initialize per scheduler local free tls cache */
199 alloc_status = SCHED_ALLOC_TLS_CACHE;
200 new_sched->tls_cache =
201 _lthread_objcache_create("TLS cache",
202 sizeof(struct lthread_tls),
204 if (new_sched->tls_cache == NULL)
207 /* Initialize per scheduler local free cond var cache */
208 alloc_status = SCHED_ALLOC_COND_CACHE;
209 new_sched->cond_cache =
210 _lthread_objcache_create("cond cache",
211 sizeof(struct lthread_cond),
213 if (new_sched->cond_cache == NULL)
216 /* Initialize per scheduler local free mutex cache */
217 alloc_status = SCHED_ALLOC_MUTEX_CACHE;
218 new_sched->mutex_cache =
219 _lthread_objcache_create("mutex cache",
220 sizeof(struct lthread_mutex),
222 if (new_sched->mutex_cache == NULL)
225 alloc_status = SCHED_ALLOC_OK;
228 /* roll back on any failure */
229 switch (alloc_status) {
230 case SCHED_ALLOC_MUTEX_CACHE:
231 _lthread_objcache_destroy(new_sched->cond_cache);
233 case SCHED_ALLOC_COND_CACHE:
234 _lthread_objcache_destroy(new_sched->tls_cache);
236 case SCHED_ALLOC_TLS_CACHE:
237 _lthread_objcache_destroy(new_sched->per_lthread_cache);
239 case SCHED_ALLOC_PERLT_CACHE:
240 _lthread_objcache_destroy(new_sched->stack_cache);
242 case SCHED_ALLOC_STACK_CACHE:
243 _lthread_objcache_destroy(new_sched->lthread_cache);
245 case SCHED_ALLOC_LTHREAD_CACHE:
246 _lthread_queue_destroy(new_sched->pready);
248 case SCHED_ALLOC_PREADY_QUEUE:
249 _lthread_queue_destroy(new_sched->ready);
251 case SCHED_ALLOC_READY_QUEUE:
252 _qnode_pool_destroy(new_sched->qnode_pool);
254 case SCHED_ALLOC_QNODE_POOL:
264 * Create a scheduler on the current lcore
266 struct lthread_sched *_lthread_sched_create(size_t stack_size)
269 struct lthread_sched *new_sched;
270 unsigned lcoreid = rte_lcore_id();
272 LTHREAD_ASSERT(stack_size <= LTHREAD_MAX_STACK_SIZE);
275 stack_size = LTHREAD_MAX_STACK_SIZE;
278 rte_calloc_socket(NULL, 1, sizeof(struct lthread_sched),
281 if (new_sched == NULL) {
282 RTE_LOG(CRIT, LTHREAD,
283 "Failed to allocate memory for scheduler\n");
287 _lthread_key_pool_init();
289 new_sched->stack_size = stack_size;
290 new_sched->birth = rte_rdtsc();
291 THIS_SCHED = new_sched;
293 status = _lthread_sched_alloc_resources(new_sched);
294 if (status != SCHED_ALLOC_OK) {
295 RTE_LOG(CRIT, LTHREAD,
296 "Failed to allocate resources for scheduler code = %d\n",
302 bzero(&new_sched->ctx, sizeof(struct ctx));
304 new_sched->lcore_id = lcoreid;
306 schedcore[lcoreid] = new_sched;
308 new_sched->run_flag = 1;
310 DIAG_EVENT(new_sched, LT_DIAG_SCHED_CREATE, rte_lcore_id(), 0);
317 * Set the number of schedulers in the system
319 int lthread_num_schedulers_set(int num)
321 rte_atomic16_set(&num_schedulers, num);
322 return (int)rte_atomic16_read(&num_schedulers);
326 * Return the number of schedulers active
328 int lthread_active_schedulers(void)
330 return (int)rte_atomic16_read(&active_schedulers);
335 * shutdown the scheduler running on the specified lcore
337 void lthread_scheduler_shutdown(unsigned lcoreid)
339 uint64_t coreid = (uint64_t) lcoreid;
341 if (coreid < LTHREAD_MAX_LCORES) {
342 if (schedcore[coreid] != NULL)
343 schedcore[coreid]->run_flag = 0;
348 * shutdown all schedulers
350 void lthread_scheduler_shutdown_all(void)
355 * give time for all schedulers to have started
356 * Note we use sched_yield() rather than pthread_yield() to allow
357 * for the possibility of a pthread wrapper on lthread_yield(),
358 * something that is not possible unless the scheduler is running.
360 while (rte_atomic16_read(&active_schedulers) <
361 rte_atomic16_read(&num_schedulers))
364 for (i = 0; i < LTHREAD_MAX_LCORES; i++) {
365 if (schedcore[i] != NULL)
366 schedcore[i]->run_flag = 0;
371 * Resume a suspended lthread
374 _lthread_resume(struct lthread *lt) __attribute__ ((always_inline));
375 static inline void _lthread_resume(struct lthread *lt)
377 struct lthread_sched *sched = THIS_SCHED;
378 struct lthread_stack *s;
379 uint64_t state = lt->state;
384 sched->current_lthread = lt;
386 if (state & (BIT(ST_LT_CANCELLED) | BIT(ST_LT_EXITED))) {
387 /* if detached we can free the thread now */
388 if (state & BIT(ST_LT_DETACH)) {
390 sched->current_lthread = NULL;
395 if (state & BIT(ST_LT_INIT)) {
396 /* first time this thread has been run */
397 /* assign thread to this scheduler */
398 lt->sched = THIS_SCHED;
403 lt->stack_container = s;
404 _lthread_set_stack(lt, s->stack, s->stack_size);
406 /* allocate memory for TLS used by this thread */
407 _lthread_tls_alloc(lt);
409 lt->state = BIT(ST_LT_READY);
415 DIAG_EVENT(lt, LT_DIAG_LTHREAD_RESUMED, init, lt);
417 /* switch to the new thread */
418 ctx_switch(<->ctx, &sched->ctx);
420 /* If posting to a queue that could be read by another lcore
421 * we defer the queue write till now to ensure the context has been
422 * saved before the other core tries to resume it
423 * This applies to blocking on mutex, cond, and to set_affinity
425 if (lt->pending_wr_queue != NULL) {
426 struct lthread_queue *dest = lt->pending_wr_queue;
428 lt->pending_wr_queue = NULL;
430 /* queue the current thread to the specified queue */
431 _lthread_queue_insert_mp(dest, lt);
434 sched->current_lthread = NULL;
438 * Handle sleep timer expiry
441 _sched_timer_cb(struct rte_timer *tim, void *arg)
443 struct lthread *lt = (struct lthread *) arg;
444 uint64_t state = lt->state;
446 DIAG_EVENT(lt, LT_DIAG_LTHREAD_TMR_EXPIRED, <->tim, 0);
450 if (lt->state & BIT(ST_LT_CANCELLED))
451 (THIS_SCHED)->nb_blocked_threads--;
453 lt->state = state | BIT(ST_LT_EXPIRED);
455 lt->state = state & CLEARBIT(ST_LT_EXPIRED);
461 * Returns 0 if there is a pending job in scheduler or 1 if done and can exit.
463 static inline int _lthread_sched_isdone(struct lthread_sched *sched)
465 return (sched->run_flag == 0) &&
466 (_lthread_queue_empty(sched->ready)) &&
467 (_lthread_queue_empty(sched->pready)) &&
468 (sched->nb_blocked_threads == 0);
472 * Wait for all schedulers to start
474 static inline void _lthread_schedulers_sync_start(void)
476 rte_atomic16_inc(&active_schedulers);
478 /* wait for lthread schedulers
479 * Note we use sched_yield() rather than pthread_yield() to allow
480 * for the possibility of a pthread wrapper on lthread_yield(),
481 * something that is not possible unless the scheduler is running.
483 while (rte_atomic16_read(&active_schedulers) <
484 rte_atomic16_read(&num_schedulers))
490 * Wait for all schedulers to stop
492 static inline void _lthread_schedulers_sync_stop(void)
494 rte_atomic16_dec(&active_schedulers);
495 rte_atomic16_dec(&num_schedulers);
497 /* wait for schedulers
498 * Note we use sched_yield() rather than pthread_yield() to allow
499 * for the possibility of a pthread wrapper on lthread_yield(),
500 * something that is not possible unless the scheduler is running.
502 while (rte_atomic16_read(&active_schedulers) > 0)
509 * Run the lthread scheduler
510 * This loop is the heart of the system
512 void lthread_run(void)
515 struct lthread_sched *sched = THIS_SCHED;
516 struct lthread *lt = NULL;
518 RTE_LOG(INFO, LTHREAD,
519 "starting scheduler %p on lcore %u phys core %u\n",
520 sched, rte_lcore_id(),
521 rte_lcore_index(rte_lcore_id()));
523 /* if more than one, wait for all schedulers to start */
524 _lthread_schedulers_sync_start();
528 * This is the main scheduling loop
529 * So long as there are tasks in existence we run this loop.
532 * the local ready queue,
533 * and the peer ready queue,
535 * and resume lthreads ad infinitum.
537 while (!_lthread_sched_isdone(sched)) {
541 lt = _lthread_queue_poll(sched->ready);
544 lt = _lthread_queue_poll(sched->pready);
550 /* if more than one wait for all schedulers to stop */
551 _lthread_schedulers_sync_stop();
555 RTE_LOG(INFO, LTHREAD,
556 "stopping scheduler %p on lcore %u phys core %u\n",
557 sched, rte_lcore_id(),
558 rte_lcore_index(rte_lcore_id()));
563 * Return the scheduler for this lcore
566 struct lthread_sched *_lthread_sched_get(int lcore_id)
568 if (lcore_id > LTHREAD_MAX_LCORES)
570 return schedcore[lcore_id];
574 * migrate the current thread to another scheduler running
575 * on the specified lcore.
577 int lthread_set_affinity(unsigned lcoreid)
579 struct lthread *lt = THIS_LTHREAD;
580 struct lthread_sched *dest_sched;
582 if (unlikely(lcoreid > LTHREAD_MAX_LCORES))
583 return POSIX_ERRNO(EINVAL);
586 DIAG_EVENT(lt, LT_DIAG_LTHREAD_AFFINITY, lcoreid, 0);
588 dest_sched = schedcore[lcoreid];
590 if (unlikely(dest_sched == NULL))
591 return POSIX_ERRNO(EINVAL);
593 if (likely(dest_sched != THIS_SCHED)) {
594 lt->sched = dest_sched;
595 lt->pending_wr_queue = dest_sched->pready;