examples/performance-thread: use compiler atomics for sync

author Joyce Kong <joyce.kong@arm.com>

Wed, 13 Oct 2021 18:54:03 +0000 (13:54 -0500)

committer David Marchand <david.marchand@redhat.com>

Tue, 19 Oct 2021 15:15:57 +0000 (17:15 +0200)
author Joyce Kong <joyce.kong@arm.com>
Wed, 13 Oct 2021 18:54:03 +0000 (13:54 -0500)
committer David Marchand <david.marchand@redhat.com>
Tue, 19 Oct 2021 15:15:57 +0000 (17:15 +0200)
diff --git a/examples/performance-thread/common/lthread.c b/examples/performance-thread/common/lthread.c

index 3f1f48d..98123f3 100644 (file)
--- a/examples/performance-thread/common/lthread.c
+++ b/examples/performance-thread/common/lthread.c
@@ -357,9 +357,10 @@ void lthread_exit(void *ptr)
          *  - if exit before join then we suspend and resume on join
          *  - if join before exit then we resume the joining thread
          */
+       uint64_t join_initial = LT_JOIN_INITIAL;
         if ((lt->join == LT_JOIN_INITIAL)
-           && rte_atomic64_cmpset(&lt->join, LT_JOIN_INITIAL,
-                                  LT_JOIN_EXITING)) {
+           && __atomic_compare_exchange_n(&lt->join, &join_initial,
+               LT_JOIN_EXITING, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
  
                 DIAG_EVENT(lt, LT_DIAG_LTHREAD_EXIT, 1, 0);
                 _suspend();
@@ -415,9 +416,10 @@ int lthread_join(struct lthread *lt, void **ptr)
          *  - if join before exit we suspend and will resume when exit is called
          *  - if exit before join we resume the exiting thread
          */
+       uint64_t join_initial = LT_JOIN_INITIAL;
         if ((lt->join == LT_JOIN_INITIAL)
-           && rte_atomic64_cmpset(&lt->join, LT_JOIN_INITIAL,
-                                  LT_JOIN_THREAD_SET)) {
+           && __atomic_compare_exchange_n(&lt->join, &join_initial,
+               LT_JOIN_THREAD_SET, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
  
                 DIAG_EVENT(current, LT_DIAG_LTHREAD_JOIN, lt, 1);
                 _suspend();
diff --git a/examples/performance-thread/common/lthread_diag.h b/examples/performance-thread/common/lthread_diag.h

index e876dda..7ee89ee 100644 (file)
--- a/examples/performance-thread/common/lthread_diag.h
+++ b/examples/performance-thread/common/lthread_diag.h
@@ -78,11 +78,11 @@ extern uint64_t diag_mask;
         }                                                               \
  } while (0)
  
-#define DIAG_COUNT_DEFINE(x) rte_atomic64_t count_##x
-#define DIAG_COUNT_INIT(o, x) rte_atomic64_init(&((o)->count_##x))
-#define DIAG_COUNT_INC(o, x) rte_atomic64_inc(&((o)->count_##x))
-#define DIAG_COUNT_DEC(o, x) rte_atomic64_dec(&((o)->count_##x))
-#define DIAG_COUNT(o, x) rte_atomic64_read(&((o)->count_##x))
+#define DIAG_COUNT_DEFINE(x) uint64_t count_##x
+#define DIAG_COUNT_INIT(o, x) __atomic_store_n(&((o)->count_##x), 0, __ATOMIC_RELAXED)
+#define DIAG_COUNT_INC(o, x) __atomic_fetch_add(&((o)->count_##x), 1, __ATOMIC_RELAXED)
+#define DIAG_COUNT_DEC(o, x) __atomic_fetch_sub(&((o)->count_##x), 1, __ATOMIC_RELAXED)
+#define DIAG_COUNT(o, x) __atomic_load_n(&((o)->count_##x), __ATOMIC_RELAXED)
  
  #define DIAG_USED
  
diff --git a/examples/performance-thread/common/lthread_int.h b/examples/performance-thread/common/lthread_int.h

index a352f13..d010126 100644 (file)
--- a/examples/performance-thread/common/lthread_int.h
+++ b/examples/performance-thread/common/lthread_int.h
@@ -21,7 +21,6 @@ extern "C" {
  #include <rte_cycles.h>
  #include <rte_per_lcore.h>
  #include <rte_timer.h>
-#include <rte_atomic_64.h>
  #include <rte_spinlock.h>
  #include <ctx.h>
  
diff --git a/examples/performance-thread/common/lthread_mutex.c b/examples/performance-thread/common/lthread_mutex.c

index 01da6ca..061fc5c 100644 (file)
--- a/examples/performance-thread/common/lthread_mutex.c
+++ b/examples/performance-thread/common/lthread_mutex.c
@@ -60,7 +60,7 @@ lthread_mutex_init(char *name, struct lthread_mutex **mutex,
         m->root_sched = THIS_SCHED;
         m->owner = NULL;
  
-       rte_atomic64_init(&m->count);
+       __atomic_store_n(&m->count, 0, __ATOMIC_RELAXED);
  
         DIAG_CREATE_EVENT(m, LT_DIAG_MUTEX_CREATE);
         /* success */
@@ -115,10 +115,11 @@ int lthread_mutex_lock(struct lthread_mutex *m)
         }
  
         for (;;) {
-               rte_atomic64_inc(&m->count);
+               __atomic_fetch_add(&m->count, 1, __ATOMIC_RELAXED);
                 do {
-                       if (rte_atomic64_cmpset
-                           ((uint64_t *) &m->owner, 0, (uint64_t) lt)) {
+                       uint64_t lt_init = 0;
+                       if (__atomic_compare_exchange_n((uint64_t *) &m->owner, &lt_init,
+                               (uint64_t) lt, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
                                 /* happy days, we got the lock */
                                 DIAG_EVENT(m, LT_DIAG_MUTEX_LOCK, m, 0);
                                 return 0;
@@ -126,7 +127,7 @@ int lthread_mutex_lock(struct lthread_mutex *m)
                         /* spin due to race with unlock when
                         * nothing was blocked
                         */
-               } while ((rte_atomic64_read(&m->count) == 1) &&
+               } while ((__atomic_load_n(&m->count, __ATOMIC_RELAXED) == 1) &&
                                 (m->owner == NULL));
  
                 /* queue the current thread in the blocked queue
@@ -160,16 +161,17 @@ int lthread_mutex_trylock(struct lthread_mutex *m)
                 return POSIX_ERRNO(EDEADLK);
         }
  
-       rte_atomic64_inc(&m->count);
-       if (rte_atomic64_cmpset
-           ((uint64_t *) &m->owner, (uint64_t) NULL, (uint64_t) lt)) {
+       __atomic_fetch_add(&m->count, 1, __ATOMIC_RELAXED);
+       uint64_t lt_init = 0;
+       if (__atomic_compare_exchange_n((uint64_t *) &m->owner, &lt_init,
+               (uint64_t) lt, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
                 /* got the lock */
                 DIAG_EVENT(m, LT_DIAG_MUTEX_TRYLOCK, m, 0);
                 return 0;
         }
  
         /* failed so return busy */
-       rte_atomic64_dec(&m->count);
+       __atomic_fetch_sub(&m->count, 1, __ATOMIC_RELAXED);
         DIAG_EVENT(m, LT_DIAG_MUTEX_TRYLOCK, m, POSIX_ERRNO(EBUSY));
         return POSIX_ERRNO(EBUSY);
  }
@@ -193,13 +195,13 @@ int lthread_mutex_unlock(struct lthread_mutex *m)
                 return POSIX_ERRNO(EPERM);
         }
  
-       rte_atomic64_dec(&m->count);
+       __atomic_fetch_sub(&m->count, 1, __ATOMIC_RELAXED);
         /* if there are blocked threads then make one ready */
-       while (rte_atomic64_read(&m->count) > 0) {
+       while (__atomic_load_n(&m->count, __ATOMIC_RELAXED) > 0) {
                 unblocked = _lthread_queue_remove(m->blocked);
  
                 if (unblocked != NULL) {
-                       rte_atomic64_dec(&m->count);
+                       __atomic_fetch_sub(&m->count, 1, __ATOMIC_RELAXED);
                         DIAG_EVENT(m, LT_DIAG_MUTEX_UNLOCKED, m, unblocked);
                         RTE_ASSERT(unblocked->sched != NULL);
                         _ready_queue_insert((struct lthread_sched *)
diff --git a/examples/performance-thread/common/lthread_mutex.h b/examples/performance-thread/common/lthread_mutex.h

index cd866f8..730092b 100644 (file)
--- a/examples/performance-thread/common/lthread_mutex.h
+++ b/examples/performance-thread/common/lthread_mutex.h
@@ -17,7 +17,7 @@ extern "C" {
  
  struct lthread_mutex {
         struct lthread *owner;
-       rte_atomic64_t  count;
+       uint64_t count;
         struct lthread_queue *blocked __rte_cache_aligned;
         struct lthread_sched *root_sched;
         char                    name[MAX_MUTEX_NAME_SIZE];
diff --git a/examples/performance-thread/common/lthread_sched.c b/examples/performance-thread/common/lthread_sched.c

index 38ca0c4..3784b01 100644 (file)
--- a/examples/performance-thread/common/lthread_sched.c
+++ b/examples/performance-thread/common/lthread_sched.c
@@ -22,8 +22,6 @@
  
  #include <rte_prefetch.h>
  #include <rte_per_lcore.h>
-#include <rte_atomic.h>
-#include <rte_atomic_64.h>
  #include <rte_log.h>
  #include <rte_common.h>
  #include <rte_branch_prediction.h>
@@ -47,8 +45,8 @@
   * When a scheduler shuts down it is assumed that the application is terminating
   */
  
-static rte_atomic16_t num_schedulers;
-static rte_atomic16_t active_schedulers;
+static uint16_t num_schedulers;
+static uint16_t active_schedulers;
  
  /* one scheduler per lcore */
  RTE_DEFINE_PER_LCORE(struct lthread_sched *, this_sched) = NULL;
@@ -64,10 +62,8 @@ uint64_t diag_mask;
  RTE_INIT(lthread_sched_ctor)
  {
         memset(schedcore, 0, sizeof(schedcore));
-       rte_atomic16_init(&num_schedulers);
-       rte_atomic16_set(&num_schedulers, 1);
-       rte_atomic16_init(&active_schedulers);
-       rte_atomic16_set(&active_schedulers, 0);
+       __atomic_store_n(&num_schedulers, 1, __ATOMIC_RELAXED);
+       __atomic_store_n(&active_schedulers, 0, __ATOMIC_RELAXED);
         diag_cb = NULL;
  }
  
@@ -260,8 +256,8 @@ struct lthread_sched *_lthread_sched_create(size_t stack_size)
   */
  int lthread_num_schedulers_set(int num)
  {
-       rte_atomic16_set(&num_schedulers, num);
-       return (int)rte_atomic16_read(&num_schedulers);
+       __atomic_store_n(&num_schedulers, num, __ATOMIC_RELAXED);
+       return (int)__atomic_load_n(&num_schedulers, __ATOMIC_RELAXED);
  }
  
  /*
@@ -269,7 +265,7 @@ int lthread_num_schedulers_set(int num)
   */
  int lthread_active_schedulers(void)
  {
-       return (int)rte_atomic16_read(&active_schedulers);
+       return (int)__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED);
  }
  
  
@@ -299,8 +295,8 @@ void lthread_scheduler_shutdown_all(void)
          * for the possibility of a pthread wrapper on lthread_yield(),
          * something that is not possible unless the scheduler is running.
          */
-       while (rte_atomic16_read(&active_schedulers) <
-              rte_atomic16_read(&num_schedulers))
+       while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) <
+              __atomic_load_n(&num_schedulers, __ATOMIC_RELAXED))
                 sched_yield();
  
         for (i = 0; i < LTHREAD_MAX_LCORES; i++) {
@@ -415,15 +411,15 @@ static inline int _lthread_sched_isdone(struct lthread_sched *sched)
   */
  static inline void _lthread_schedulers_sync_start(void)
  {
-       rte_atomic16_inc(&active_schedulers);
+       __atomic_fetch_add(&active_schedulers, 1, __ATOMIC_RELAXED);
  
         /* wait for lthread schedulers
          * Note we use sched_yield() rather than pthread_yield() to allow
          * for the possibility of a pthread wrapper on lthread_yield(),
          * something that is not possible unless the scheduler is running.
          */
-       while (rte_atomic16_read(&active_schedulers) <
-              rte_atomic16_read(&num_schedulers))
+       while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) <
+              __atomic_load_n(&num_schedulers, __ATOMIC_RELAXED))
                 sched_yield();
  
  }
@@ -433,15 +429,15 @@ static inline void _lthread_schedulers_sync_start(void)
   */
  static inline void _lthread_schedulers_sync_stop(void)
  {
-       rte_atomic16_dec(&active_schedulers);
-       rte_atomic16_dec(&num_schedulers);
+       __atomic_fetch_sub(&active_schedulers, 1, __ATOMIC_RELAXED);
+       __atomic_fetch_sub(&num_schedulers, 1, __ATOMIC_RELAXED);
  
         /* wait for schedulers
          * Note we use sched_yield() rather than pthread_yield() to allow
          * for the possibility of a pthread wrapper on lthread_yield(),
          * something that is not possible unless the scheduler is running.
          */
-       while (rte_atomic16_read(&active_schedulers) > 0)
+       while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) > 0)
                 sched_yield();
  
  }
diff --git a/examples/performance-thread/common/lthread_tls.c b/examples/performance-thread/common/lthread_tls.c

index 07de6ca..4ab2e35 100644 (file)
--- a/examples/performance-thread/common/lthread_tls.c
+++ b/examples/performance-thread/common/lthread_tls.c
@@ -18,7 +18,6 @@
  #include <rte_malloc.h>
  #include <rte_log.h>
  #include <rte_ring.h>
-#include <rte_atomic_64.h>
  
  #include "lthread_tls.h"
  #include "lthread_queue.h"
@@ -52,8 +51,10 @@ void _lthread_key_pool_init(void)
  
         bzero(key_table, sizeof(key_table));
  
+       uint64_t pool_init = 0;
         /* only one lcore should do this */
-       if (rte_atomic64_cmpset(&key_pool_init, 0, 1)) {
+       if (__atomic_compare_exchange_n(&key_pool_init, &pool_init, 1, 0,
+                       __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
  
                 snprintf(name,
                         MAX_LTHREAD_NAME_SIZE,
diff --git a/examples/performance-thread/l3fwd-thread/main.c b/examples/performance-thread/l3fwd-thread/main.c

index 2905199..50ecc4e 100644 (file)
--- a/examples/performance-thread/l3fwd-thread/main.c
+++ b/examples/performance-thread/l3fwd-thread/main.c
@@ -26,7 +26,6 @@
  #include <rte_memcpy.h>
  #include <rte_eal.h>
  #include <rte_launch.h>
-#include <rte_atomic.h>
  #include <rte_cycles.h>
  #include <rte_prefetch.h>
  #include <rte_lcore.h>
@@ -570,8 +569,8 @@ RTE_DEFINE_PER_LCORE(struct lcore_conf *, lcore_conf);
   */
  static int lthreads_on = 1; /**< Use lthreads for processing*/
  
-rte_atomic16_t rx_counter;  /**< Number of spawned rx threads */
-rte_atomic16_t tx_counter;  /**< Number of spawned tx threads */
+uint16_t rx_counter;  /**< Number of spawned rx threads */
+uint16_t tx_counter;  /**< Number of spawned tx threads */
  
  struct thread_conf {
         uint16_t lcore_id;      /**< Initial lcore for rx thread */
@@ -1910,11 +1909,8 @@ cpu_load_collector(__rte_unused void *arg) {
         printf("Waiting for %d rx threads and %d tx threads\n", n_rx_thread,
                         n_tx_thread);
  
-       while (rte_atomic16_read(&rx_counter) < n_rx_thread)
-               rte_pause();
-
-       while (rte_atomic16_read(&tx_counter) < n_tx_thread)
-               rte_pause();
+       rte_wait_until_equal_16(&rx_counter, n_rx_thread, __ATOMIC_RELAXED);
+       rte_wait_until_equal_16(&tx_counter, n_tx_thread, __ATOMIC_RELAXED);
  
         for (i = 0; i < n_rx_thread; i++) {
  
@@ -2036,7 +2032,7 @@ lthread_tx_per_ring(void *dummy)
         RTE_LOG(INFO, L3FWD, "entering main tx loop on lcore %u\n", rte_lcore_id());
  
         nb_rx = 0;
-       rte_atomic16_inc(&tx_counter);
+       __atomic_fetch_add(&tx_counter, 1, __ATOMIC_RELAXED);
         while (1) {
  
                 /*
@@ -2161,7 +2157,7 @@ lthread_rx(void *dummy)
         worker_id = 0;
  
         rx_conf->conf.cpu_id = sched_getcpu();
-       rte_atomic16_inc(&rx_counter);
+       __atomic_fetch_add(&rx_counter, 1, __ATOMIC_RELAXED);
         while (1) {
  
                 /*
@@ -2243,7 +2239,7 @@ lthread_spawner(__rte_unused void *arg)
          * scheduler as this lthread, yielding is required to let them to run and
          * prevent deadlock here.
          */
-       while (rte_atomic16_read(&rx_counter) < n_rx_thread)
+       while (__atomic_load_n(&rx_counter, __ATOMIC_RELAXED) < n_rx_thread)
                 lthread_sleep(100000);
  
         /*
@@ -2323,7 +2319,7 @@ pthread_tx(void *dummy)
         RTE_LOG(INFO, L3FWD, "Entering main Tx loop on lcore %u\n", rte_lcore_id());
  
         tx_conf->conf.cpu_id = sched_getcpu();
-       rte_atomic16_inc(&tx_counter);
+       __atomic_fetch_add(&tx_counter, 1, __ATOMIC_RELAXED);
         while (1) {
  
                 cur_tsc = rte_rdtsc();
@@ -2406,7 +2402,7 @@ pthread_rx(void *dummy)
  
         worker_id = 0;
         rx_conf->conf.cpu_id = sched_getcpu();
-       rte_atomic16_inc(&rx_counter);
+       __atomic_fetch_add(&rx_counter, 1, __ATOMIC_RELAXED);
         while (1) {
  
                 /*
author	Joyce Kong <joyce.kong@arm.com>
	Wed, 13 Oct 2021 18:54:03 +0000 (13:54 -0500)
committer	David Marchand <david.marchand@redhat.com>
	Tue, 19 Oct 2021 15:15:57 +0000 (17:15 +0200)
examples/performance-thread/common/lthread.c		patch \| blob \| history
examples/performance-thread/common/lthread_diag.h		patch \| blob \| history
examples/performance-thread/common/lthread_int.h		patch \| blob \| history
examples/performance-thread/common/lthread_mutex.c		patch \| blob \| history
examples/performance-thread/common/lthread_mutex.h		patch \| blob \| history
examples/performance-thread/common/lthread_sched.c		patch \| blob \| history
examples/performance-thread/common/lthread_tls.c		patch \| blob \| history
examples/performance-thread/l3fwd-thread/main.c		patch \| blob \| history