examples/performance-thread/common/lthread_sched.c

   1 /*
   2  * SPDX-License-Identifier: BSD-3-Clause
   3  * Copyright 2015 Intel Corporation.
   4  * Copyright 2012 Hasan Alayli <halayli@gmail.com>
   5  */
   6
   7 #define RTE_MEM 1
   8
   9 #include <stdio.h>
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <stdint.h>
  13 #include <stddef.h>
  14 #include <limits.h>
  15 #include <inttypes.h>
  16 #include <unistd.h>
  17 #include <pthread.h>
  18 #include <fcntl.h>
  19 #include <sys/time.h>
  20 #include <sys/mman.h>
  21 #include <sched.h>
  22
  23 #include <rte_prefetch.h>
  24 #include <rte_per_lcore.h>
  25 #include <rte_log.h>
  26 #include <rte_common.h>
  27 #include <rte_branch_prediction.h>
  28
  29 #include "lthread_api.h"
  30 #include "lthread_int.h"
  31 #include "lthread_sched.h"
  32 #include "lthread_objcache.h"
  33 #include "lthread_timer.h"
  34 #include "lthread_mutex.h"
  35 #include "lthread_cond.h"
  36 #include "lthread_tls.h"
  37 #include "lthread_diag.h"
  38
  39 /*
  40  * This file implements the lthread scheduler
  41  * The scheduler is the function lthread_run()
  42  * This must be run as the main loop of an EAL thread.
  43  *
  44  * Currently once a scheduler is created it cannot be destroyed
  45  * When a scheduler shuts down it is assumed that the application is terminating
  46  */
  47
  48 static uint16_t num_schedulers;
  49 static uint16_t active_schedulers;
  50
  51 /* one scheduler per lcore */
  52 RTE_DEFINE_PER_LCORE(struct lthread_sched *, this_sched) = NULL;
  53
  54 struct lthread_sched *schedcore[LTHREAD_MAX_LCORES];
  55
  56 diag_callback diag_cb;
  57
  58 uint64_t diag_mask;
  59
  60
  61 /* constructor */
  62 RTE_INIT(lthread_sched_ctor)
  63 {
  64         memset(schedcore, 0, sizeof(schedcore));
  65         __atomic_store_n(&num_schedulers, 1, __ATOMIC_RELAXED);
  66         __atomic_store_n(&active_schedulers, 0, __ATOMIC_RELAXED);
  67         diag_cb = NULL;
  68 }
  69
  70
  71 enum sched_alloc_phase {
  72         SCHED_ALLOC_OK,
  73         SCHED_ALLOC_QNODE_POOL,
  74         SCHED_ALLOC_READY_QUEUE,
  75         SCHED_ALLOC_PREADY_QUEUE,
  76         SCHED_ALLOC_LTHREAD_CACHE,
  77         SCHED_ALLOC_STACK_CACHE,
  78         SCHED_ALLOC_PERLT_CACHE,
  79         SCHED_ALLOC_TLS_CACHE,
  80         SCHED_ALLOC_COND_CACHE,
  81         SCHED_ALLOC_MUTEX_CACHE,
  82 };
  83
  84 static int
  85 _lthread_sched_alloc_resources(struct lthread_sched *new_sched)
  86 {
  87         int alloc_status;
  88
  89         do {
  90                 /* Initialize per scheduler queue node pool */
  91                 alloc_status = SCHED_ALLOC_QNODE_POOL;
  92                 new_sched->qnode_pool =
  93                         _qnode_pool_create("qnode pool", LTHREAD_PREALLOC);
  94                 if (new_sched->qnode_pool == NULL)
  95                         break;
  96
  97                 /* Initialize per scheduler local ready queue */
  98                 alloc_status = SCHED_ALLOC_READY_QUEUE;
  99                 new_sched->ready = _lthread_queue_create("ready queue");
 100                 if (new_sched->ready == NULL)
 101                         break;
 102
 103                 /* Initialize per scheduler local peer ready queue */
 104                 alloc_status = SCHED_ALLOC_PREADY_QUEUE;
 105                 new_sched->pready = _lthread_queue_create("pready queue");
 106                 if (new_sched->pready == NULL)
 107                         break;
 108
 109                 /* Initialize per scheduler local free lthread cache */
 110                 alloc_status = SCHED_ALLOC_LTHREAD_CACHE;
 111                 new_sched->lthread_cache =
 112                         _lthread_objcache_create("lthread cache",
 113                                                 sizeof(struct lthread),
 114                                                 LTHREAD_PREALLOC);
 115                 if (new_sched->lthread_cache == NULL)
 116                         break;
 117
 118                 /* Initialize per scheduler local free stack cache */
 119                 alloc_status = SCHED_ALLOC_STACK_CACHE;
 120                 new_sched->stack_cache =
 121                         _lthread_objcache_create("stack_cache",
 122                                                 sizeof(struct lthread_stack),
 123                                                 LTHREAD_PREALLOC);
 124                 if (new_sched->stack_cache == NULL)
 125                         break;
 126
 127                 /* Initialize per scheduler local free per lthread data cache */
 128                 alloc_status = SCHED_ALLOC_PERLT_CACHE;
 129                 new_sched->per_lthread_cache =
 130                         _lthread_objcache_create("per_lt cache",
 131                                                 RTE_PER_LTHREAD_SECTION_SIZE,
 132                                                 LTHREAD_PREALLOC);
 133                 if (new_sched->per_lthread_cache == NULL)
 134                         break;
 135
 136                 /* Initialize per scheduler local free tls cache */
 137                 alloc_status = SCHED_ALLOC_TLS_CACHE;
 138                 new_sched->tls_cache =
 139                         _lthread_objcache_create("TLS cache",
 140                                                 sizeof(struct lthread_tls),
 141                                                 LTHREAD_PREALLOC);
 142                 if (new_sched->tls_cache == NULL)
 143                         break;
 144
 145                 /* Initialize per scheduler local free cond var cache */
 146                 alloc_status = SCHED_ALLOC_COND_CACHE;
 147                 new_sched->cond_cache =
 148                         _lthread_objcache_create("cond cache",
 149                                                 sizeof(struct lthread_cond),
 150                                                 LTHREAD_PREALLOC);
 151                 if (new_sched->cond_cache == NULL)
 152                         break;
 153
 154                 /* Initialize per scheduler local free mutex cache */
 155                 alloc_status = SCHED_ALLOC_MUTEX_CACHE;
 156                 new_sched->mutex_cache =
 157                         _lthread_objcache_create("mutex cache",
 158                                                 sizeof(struct lthread_mutex),
 159                                                 LTHREAD_PREALLOC);
 160                 if (new_sched->mutex_cache == NULL)
 161                         break;
 162
 163                 alloc_status = SCHED_ALLOC_OK;
 164         } while (0);
 165
 166         /* roll back on any failure */
 167         switch (alloc_status) {
 168         case SCHED_ALLOC_MUTEX_CACHE:
 169                 _lthread_objcache_destroy(new_sched->cond_cache);
 170                 /* fall through */
 171         case SCHED_ALLOC_COND_CACHE:
 172                 _lthread_objcache_destroy(new_sched->tls_cache);
 173                 /* fall through */
 174         case SCHED_ALLOC_TLS_CACHE:
 175                 _lthread_objcache_destroy(new_sched->per_lthread_cache);
 176                 /* fall through */
 177         case SCHED_ALLOC_PERLT_CACHE:
 178                 _lthread_objcache_destroy(new_sched->stack_cache);
 179                 /* fall through */
 180         case SCHED_ALLOC_STACK_CACHE:
 181                 _lthread_objcache_destroy(new_sched->lthread_cache);
 182                 /* fall through */
 183         case SCHED_ALLOC_LTHREAD_CACHE:
 184                 _lthread_queue_destroy(new_sched->pready);
 185                 /* fall through */
 186         case SCHED_ALLOC_PREADY_QUEUE:
 187                 _lthread_queue_destroy(new_sched->ready);
 188                 /* fall through */
 189         case SCHED_ALLOC_READY_QUEUE:
 190                 _qnode_pool_destroy(new_sched->qnode_pool);
 191                 /* fall through */
 192         case SCHED_ALLOC_QNODE_POOL:
 193                 /* fall through */
 194         case SCHED_ALLOC_OK:
 195                 break;
 196         }
 197         return alloc_status;
 198 }
 199
 200
 201 /*
 202  * Create a scheduler on the current lcore
 203  */
 204 struct lthread_sched *_lthread_sched_create(size_t stack_size)
 205 {
 206         int status;
 207         struct lthread_sched *new_sched;
 208         unsigned lcoreid = rte_lcore_id();
 209
 210         RTE_ASSERT(stack_size <= LTHREAD_MAX_STACK_SIZE);
 211
 212         if (stack_size == 0)
 213                 stack_size = LTHREAD_MAX_STACK_SIZE;
 214
 215         new_sched =
 216              rte_calloc_socket(NULL, 1, sizeof(struct lthread_sched),
 217                                 RTE_CACHE_LINE_SIZE,
 218                                 rte_socket_id());
 219         if (new_sched == NULL) {
 220                 RTE_LOG(CRIT, LTHREAD,
 221                         "Failed to allocate memory for scheduler\n");
 222                 return NULL;
 223         }
 224
 225         _lthread_key_pool_init();
 226
 227         new_sched->stack_size = stack_size;
 228         new_sched->birth = rte_rdtsc();
 229         THIS_SCHED = new_sched;
 230
 231         status = _lthread_sched_alloc_resources(new_sched);
 232         if (status != SCHED_ALLOC_OK) {
 233                 RTE_LOG(CRIT, LTHREAD,
 234                         "Failed to allocate resources for scheduler code = %d\n",
 235                         status);
 236                 rte_free(new_sched);
 237                 return NULL;
 238         }
 239
 240         bzero(&new_sched->ctx, sizeof(struct ctx));
 241
 242         new_sched->lcore_id = lcoreid;
 243
 244         schedcore[lcoreid] = new_sched;
 245
 246         new_sched->run_flag = 1;
 247
 248         DIAG_EVENT(new_sched, LT_DIAG_SCHED_CREATE, rte_lcore_id(), 0);
 249
 250         rte_wmb();
 251         return new_sched;
 252 }
 253
 254 /*
 255  * Set the number of schedulers in the system
 256  */
 257 int lthread_num_schedulers_set(int num)
 258 {
 259         __atomic_store_n(&num_schedulers, num, __ATOMIC_RELAXED);
 260         return (int)__atomic_load_n(&num_schedulers, __ATOMIC_RELAXED);
 261 }
 262
 263 /*
 264  * Return the number of schedulers active
 265  */
 266 int lthread_active_schedulers(void)
 267 {
 268         return (int)__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED);
 269 }
 270
 271
 272 /**
 273  * shutdown the scheduler running on the specified lcore
 274  */
 275 void lthread_scheduler_shutdown(unsigned lcoreid)
 276 {
 277         uint64_t coreid = (uint64_t) lcoreid;
 278
 279         if (coreid < LTHREAD_MAX_LCORES) {
 280                 if (schedcore[coreid] != NULL)
 281                         schedcore[coreid]->run_flag = 0;
 282         }
 283 }
 284
 285 /**
 286  * shutdown all schedulers
 287  */
 288 void lthread_scheduler_shutdown_all(void)
 289 {
 290         uint64_t i;
 291
 292         /*
 293          * give time for all schedulers to have started
 294          * Note we use sched_yield() rather than pthread_yield() to allow
 295          * for the possibility of a pthread wrapper on lthread_yield(),
 296          * something that is not possible unless the scheduler is running.
 297          */
 298         while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) <
 299                __atomic_load_n(&num_schedulers, __ATOMIC_RELAXED))
 300                 sched_yield();
 301
 302         for (i = 0; i < LTHREAD_MAX_LCORES; i++) {
 303                 if (schedcore[i] != NULL)
 304                         schedcore[i]->run_flag = 0;
 305         }
 306 }
 307
 308 /*
 309  * Resume a suspended lthread
 310  */
 311 static __rte_always_inline void
 312 _lthread_resume(struct lthread *lt);
 313 static inline void _lthread_resume(struct lthread *lt)
 314 {
 315         struct lthread_sched *sched = THIS_SCHED;
 316         struct lthread_stack *s;
 317         uint64_t state = lt->state;
 318 #if LTHREAD_DIAG
 319         int init = 0;
 320 #endif
 321
 322         sched->current_lthread = lt;
 323
 324         if (state & (BIT(ST_LT_CANCELLED) | BIT(ST_LT_EXITED))) {
 325                 /* if detached we can free the thread now */
 326                 if (state & BIT(ST_LT_DETACH)) {
 327                         _lthread_free(lt);
 328                         sched->current_lthread = NULL;
 329                         return;
 330                 }
 331         }
 332
 333         if (state & BIT(ST_LT_INIT)) {
 334                 /* first time this thread has been run */
 335                 /* assign thread to this scheduler */
 336                 lt->sched = THIS_SCHED;
 337
 338                 /* allocate stack */
 339                 s = _stack_alloc();
 340
 341                 lt->stack_container = s;
 342                 _lthread_set_stack(lt, s->stack, s->stack_size);
 343
 344                 /* allocate memory for TLS used by this thread */
 345                 _lthread_tls_alloc(lt);
 346
 347                 lt->state = BIT(ST_LT_READY);
 348 #if LTHREAD_DIAG
 349                 init = 1;
 350 #endif
 351         }
 352
 353         DIAG_EVENT(lt, LT_DIAG_LTHREAD_RESUMED, init, lt);
 354
 355         /* switch to the new thread */
 356         ctx_switch(&lt->ctx, &sched->ctx);
 357
 358         /* If posting to a queue that could be read by another lcore
 359          * we defer the queue write till now to ensure the context has been
 360          * saved before the other core tries to resume it
 361          * This applies to blocking on mutex, cond, and to set_affinity
 362          */
 363         if (lt->pending_wr_queue != NULL) {
 364                 struct lthread_queue *dest = lt->pending_wr_queue;
 365
 366                 lt->pending_wr_queue = NULL;
 367
 368                 /* queue the current thread to the specified queue */
 369                 _lthread_queue_insert_mp(dest, lt);
 370         }
 371
 372         sched->current_lthread = NULL;
 373 }
 374
 375 /*
 376  * Handle sleep timer expiry
 377 */
 378 void
 379 _sched_timer_cb(struct rte_timer *tim, void *arg)
 380 {
 381         struct lthread *lt = (struct lthread *) arg;
 382         uint64_t state = lt->state;
 383
 384         DIAG_EVENT(lt, LT_DIAG_LTHREAD_TMR_EXPIRED, &lt->tim, 0);
 385
 386         rte_timer_stop(tim);
 387
 388         if (lt->state & BIT(ST_LT_CANCELLED))
 389                 (THIS_SCHED)->nb_blocked_threads--;
 390
 391         lt->state = state | BIT(ST_LT_EXPIRED);
 392         _lthread_resume(lt);
 393         lt->state = state & CLEARBIT(ST_LT_EXPIRED);
 394 }
 395
 396
 397
 398 /*
 399  * Returns 0 if there is a pending job in scheduler or 1 if done and can exit.
 400  */
 401 static inline int _lthread_sched_isdone(struct lthread_sched *sched)
 402 {
 403         return (sched->run_flag == 0) &&
 404                         (_lthread_queue_empty(sched->ready)) &&
 405                         (_lthread_queue_empty(sched->pready)) &&
 406                         (sched->nb_blocked_threads == 0);
 407 }
 408
 409 /*
 410  * Wait for all schedulers to start
 411  */
 412 static inline void _lthread_schedulers_sync_start(void)
 413 {
 414         __atomic_fetch_add(&active_schedulers, 1, __ATOMIC_RELAXED);
 415
 416         /* wait for lthread schedulers
 417          * Note we use sched_yield() rather than pthread_yield() to allow
 418          * for the possibility of a pthread wrapper on lthread_yield(),
 419          * something that is not possible unless the scheduler is running.
 420          */
 421         while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) <
 422                __atomic_load_n(&num_schedulers, __ATOMIC_RELAXED))
 423                 sched_yield();
 424
 425 }
 426
 427 /*
 428  * Wait for all schedulers to stop
 429  */
 430 static inline void _lthread_schedulers_sync_stop(void)
 431 {
 432         __atomic_fetch_sub(&active_schedulers, 1, __ATOMIC_RELAXED);
 433         __atomic_fetch_sub(&num_schedulers, 1, __ATOMIC_RELAXED);
 434
 435         /* wait for schedulers
 436          * Note we use sched_yield() rather than pthread_yield() to allow
 437          * for the possibility of a pthread wrapper on lthread_yield(),
 438          * something that is not possible unless the scheduler is running.
 439          */
 440         while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) > 0)
 441                 sched_yield();
 442
 443 }
 444
 445
 446 /*
 447  * Run the lthread scheduler
 448  * This loop is the heart of the system
 449  */
 450 void lthread_run(void)
 451 {
 452
 453         struct lthread_sched *sched = THIS_SCHED;
 454         struct lthread *lt = NULL;
 455
 456         RTE_LOG(INFO, LTHREAD,
 457                 "starting scheduler %p on lcore %u phys core %u\n",
 458                 sched, rte_lcore_id(),
 459                 rte_lcore_index(rte_lcore_id()));
 460
 461         /* if more than one, wait for all schedulers to start */
 462         _lthread_schedulers_sync_start();
 463
 464
 465         /*
 466          * This is the main scheduling loop
 467          * So long as there are tasks in existence we run this loop.
 468          * We check for:-
 469          *   expired timers,
 470          *   the local ready queue,
 471          *   and the peer ready queue,
 472          *
 473          * and resume lthreads ad infinitum.
 474          */
 475         while (!_lthread_sched_isdone(sched)) {
 476
 477                 rte_timer_manage();
 478
 479                 lt = _lthread_queue_poll(sched->ready);
 480                 if (lt != NULL)
 481                         _lthread_resume(lt);
 482                 lt = _lthread_queue_poll(sched->pready);
 483                 if (lt != NULL)
 484                         _lthread_resume(lt);
 485         }
 486
 487
 488         /* if more than one wait for all schedulers to stop */
 489         _lthread_schedulers_sync_stop();
 490
 491         (THIS_SCHED) = NULL;
 492
 493         RTE_LOG(INFO, LTHREAD,
 494                 "stopping scheduler %p on lcore %u phys core %u\n",
 495                 sched, rte_lcore_id(),
 496                 rte_lcore_index(rte_lcore_id()));
 497         fflush(stdout);
 498 }
 499
 500 /*
 501  * Return the scheduler for this lcore
 502  *
 503  */
 504 struct lthread_sched *_lthread_sched_get(unsigned int lcore_id)
 505 {
 506         struct lthread_sched *res = NULL;
 507
 508         if (lcore_id < LTHREAD_MAX_LCORES)
 509                 res = schedcore[lcore_id];
 510
 511         return res;
 512 }
 513
 514 /*
 515  * migrate the current thread to another scheduler running
 516  * on the specified lcore.
 517  */
 518 int lthread_set_affinity(unsigned lcoreid)
 519 {
 520         struct lthread *lt = THIS_LTHREAD;
 521         struct lthread_sched *dest_sched;
 522
 523         if (unlikely(lcoreid >= LTHREAD_MAX_LCORES))
 524                 return POSIX_ERRNO(EINVAL);
 525
 526         DIAG_EVENT(lt, LT_DIAG_LTHREAD_AFFINITY, lcoreid, 0);
 527
 528         dest_sched = schedcore[lcoreid];
 529
 530         if (unlikely(dest_sched == NULL))
 531                 return POSIX_ERRNO(EINVAL);
 532
 533         if (likely(dest_sched != THIS_SCHED)) {
 534                 lt->sched = dest_sched;
 535                 lt->pending_wr_queue = dest_sched->pready;
 536                 _affinitize();
 537                 return 0;
 538         }
 539         return 0;
 540 }