examples/performance-thread: use compiler atomics for sync
authorJoyce Kong <joyce.kong@arm.com>
Wed, 13 Oct 2021 18:54:03 +0000 (13:54 -0500)
committerDavid Marchand <david.marchand@redhat.com>
Tue, 19 Oct 2021 15:15:57 +0000 (17:15 +0200)
Convert rte_atomic usages to compiler atomic built-ins
for thread sync.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
examples/performance-thread/common/lthread.c
examples/performance-thread/common/lthread_diag.h
examples/performance-thread/common/lthread_int.h
examples/performance-thread/common/lthread_mutex.c
examples/performance-thread/common/lthread_mutex.h
examples/performance-thread/common/lthread_sched.c
examples/performance-thread/common/lthread_tls.c
examples/performance-thread/l3fwd-thread/main.c

index 3f1f48d..98123f3 100644 (file)
@@ -357,9 +357,10 @@ void lthread_exit(void *ptr)
         *  - if exit before join then we suspend and resume on join
         *  - if join before exit then we resume the joining thread
         */
+       uint64_t join_initial = LT_JOIN_INITIAL;
        if ((lt->join == LT_JOIN_INITIAL)
-           && rte_atomic64_cmpset(&lt->join, LT_JOIN_INITIAL,
-                                  LT_JOIN_EXITING)) {
+           && __atomic_compare_exchange_n(&lt->join, &join_initial,
+               LT_JOIN_EXITING, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
 
                DIAG_EVENT(lt, LT_DIAG_LTHREAD_EXIT, 1, 0);
                _suspend();
@@ -415,9 +416,10 @@ int lthread_join(struct lthread *lt, void **ptr)
         *  - if join before exit we suspend and will resume when exit is called
         *  - if exit before join we resume the exiting thread
         */
+       uint64_t join_initial = LT_JOIN_INITIAL;
        if ((lt->join == LT_JOIN_INITIAL)
-           && rte_atomic64_cmpset(&lt->join, LT_JOIN_INITIAL,
-                                  LT_JOIN_THREAD_SET)) {
+           && __atomic_compare_exchange_n(&lt->join, &join_initial,
+               LT_JOIN_THREAD_SET, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
 
                DIAG_EVENT(current, LT_DIAG_LTHREAD_JOIN, lt, 1);
                _suspend();
index e876dda..7ee89ee 100644 (file)
@@ -78,11 +78,11 @@ extern uint64_t diag_mask;
        }                                                               \
 } while (0)
 
-#define DIAG_COUNT_DEFINE(x) rte_atomic64_t count_##x
-#define DIAG_COUNT_INIT(o, x) rte_atomic64_init(&((o)->count_##x))
-#define DIAG_COUNT_INC(o, x) rte_atomic64_inc(&((o)->count_##x))
-#define DIAG_COUNT_DEC(o, x) rte_atomic64_dec(&((o)->count_##x))
-#define DIAG_COUNT(o, x) rte_atomic64_read(&((o)->count_##x))
+#define DIAG_COUNT_DEFINE(x) uint64_t count_##x
+#define DIAG_COUNT_INIT(o, x) __atomic_store_n(&((o)->count_##x), 0, __ATOMIC_RELAXED)
+#define DIAG_COUNT_INC(o, x) __atomic_fetch_add(&((o)->count_##x), 1, __ATOMIC_RELAXED)
+#define DIAG_COUNT_DEC(o, x) __atomic_fetch_sub(&((o)->count_##x), 1, __ATOMIC_RELAXED)
+#define DIAG_COUNT(o, x) __atomic_load_n(&((o)->count_##x), __ATOMIC_RELAXED)
 
 #define DIAG_USED
 
index a352f13..d010126 100644 (file)
@@ -21,7 +21,6 @@ extern "C" {
 #include <rte_cycles.h>
 #include <rte_per_lcore.h>
 #include <rte_timer.h>
-#include <rte_atomic_64.h>
 #include <rte_spinlock.h>
 #include <ctx.h>
 
index 01da6ca..061fc5c 100644 (file)
@@ -60,7 +60,7 @@ lthread_mutex_init(char *name, struct lthread_mutex **mutex,
        m->root_sched = THIS_SCHED;
        m->owner = NULL;
 
-       rte_atomic64_init(&m->count);
+       __atomic_store_n(&m->count, 0, __ATOMIC_RELAXED);
 
        DIAG_CREATE_EVENT(m, LT_DIAG_MUTEX_CREATE);
        /* success */
@@ -115,10 +115,11 @@ int lthread_mutex_lock(struct lthread_mutex *m)
        }
 
        for (;;) {
-               rte_atomic64_inc(&m->count);
+               __atomic_fetch_add(&m->count, 1, __ATOMIC_RELAXED);
                do {
-                       if (rte_atomic64_cmpset
-                           ((uint64_t *) &m->owner, 0, (uint64_t) lt)) {
+                       uint64_t lt_init = 0;
+                       if (__atomic_compare_exchange_n((uint64_t *) &m->owner, &lt_init,
+                               (uint64_t) lt, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
                                /* happy days, we got the lock */
                                DIAG_EVENT(m, LT_DIAG_MUTEX_LOCK, m, 0);
                                return 0;
@@ -126,7 +127,7 @@ int lthread_mutex_lock(struct lthread_mutex *m)
                        /* spin due to race with unlock when
                        * nothing was blocked
                        */
-               } while ((rte_atomic64_read(&m->count) == 1) &&
+               } while ((__atomic_load_n(&m->count, __ATOMIC_RELAXED) == 1) &&
                                (m->owner == NULL));
 
                /* queue the current thread in the blocked queue
@@ -160,16 +161,17 @@ int lthread_mutex_trylock(struct lthread_mutex *m)
                return POSIX_ERRNO(EDEADLK);
        }
 
-       rte_atomic64_inc(&m->count);
-       if (rte_atomic64_cmpset
-           ((uint64_t *) &m->owner, (uint64_t) NULL, (uint64_t) lt)) {
+       __atomic_fetch_add(&m->count, 1, __ATOMIC_RELAXED);
+       uint64_t lt_init = 0;
+       if (__atomic_compare_exchange_n((uint64_t *) &m->owner, &lt_init,
+               (uint64_t) lt, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
                /* got the lock */
                DIAG_EVENT(m, LT_DIAG_MUTEX_TRYLOCK, m, 0);
                return 0;
        }
 
        /* failed so return busy */
-       rte_atomic64_dec(&m->count);
+       __atomic_fetch_sub(&m->count, 1, __ATOMIC_RELAXED);
        DIAG_EVENT(m, LT_DIAG_MUTEX_TRYLOCK, m, POSIX_ERRNO(EBUSY));
        return POSIX_ERRNO(EBUSY);
 }
@@ -193,13 +195,13 @@ int lthread_mutex_unlock(struct lthread_mutex *m)
                return POSIX_ERRNO(EPERM);
        }
 
-       rte_atomic64_dec(&m->count);
+       __atomic_fetch_sub(&m->count, 1, __ATOMIC_RELAXED);
        /* if there are blocked threads then make one ready */
-       while (rte_atomic64_read(&m->count) > 0) {
+       while (__atomic_load_n(&m->count, __ATOMIC_RELAXED) > 0) {
                unblocked = _lthread_queue_remove(m->blocked);
 
                if (unblocked != NULL) {
-                       rte_atomic64_dec(&m->count);
+                       __atomic_fetch_sub(&m->count, 1, __ATOMIC_RELAXED);
                        DIAG_EVENT(m, LT_DIAG_MUTEX_UNLOCKED, m, unblocked);
                        RTE_ASSERT(unblocked->sched != NULL);
                        _ready_queue_insert((struct lthread_sched *)
index cd866f8..730092b 100644 (file)
@@ -17,7 +17,7 @@ extern "C" {
 
 struct lthread_mutex {
        struct lthread *owner;
-       rte_atomic64_t  count;
+       uint64_t count;
        struct lthread_queue *blocked __rte_cache_aligned;
        struct lthread_sched *root_sched;
        char                    name[MAX_MUTEX_NAME_SIZE];
index 38ca0c4..3784b01 100644 (file)
@@ -22,8 +22,6 @@
 
 #include <rte_prefetch.h>
 #include <rte_per_lcore.h>
-#include <rte_atomic.h>
-#include <rte_atomic_64.h>
 #include <rte_log.h>
 #include <rte_common.h>
 #include <rte_branch_prediction.h>
@@ -47,8 +45,8 @@
  * When a scheduler shuts down it is assumed that the application is terminating
  */
 
-static rte_atomic16_t num_schedulers;
-static rte_atomic16_t active_schedulers;
+static uint16_t num_schedulers;
+static uint16_t active_schedulers;
 
 /* one scheduler per lcore */
 RTE_DEFINE_PER_LCORE(struct lthread_sched *, this_sched) = NULL;
@@ -64,10 +62,8 @@ uint64_t diag_mask;
 RTE_INIT(lthread_sched_ctor)
 {
        memset(schedcore, 0, sizeof(schedcore));
-       rte_atomic16_init(&num_schedulers);
-       rte_atomic16_set(&num_schedulers, 1);
-       rte_atomic16_init(&active_schedulers);
-       rte_atomic16_set(&active_schedulers, 0);
+       __atomic_store_n(&num_schedulers, 1, __ATOMIC_RELAXED);
+       __atomic_store_n(&active_schedulers, 0, __ATOMIC_RELAXED);
        diag_cb = NULL;
 }
 
@@ -260,8 +256,8 @@ struct lthread_sched *_lthread_sched_create(size_t stack_size)
  */
 int lthread_num_schedulers_set(int num)
 {
-       rte_atomic16_set(&num_schedulers, num);
-       return (int)rte_atomic16_read(&num_schedulers);
+       __atomic_store_n(&num_schedulers, num, __ATOMIC_RELAXED);
+       return (int)__atomic_load_n(&num_schedulers, __ATOMIC_RELAXED);
 }
 
 /*
@@ -269,7 +265,7 @@ int lthread_num_schedulers_set(int num)
  */
 int lthread_active_schedulers(void)
 {
-       return (int)rte_atomic16_read(&active_schedulers);
+       return (int)__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED);
 }
 
 
@@ -299,8 +295,8 @@ void lthread_scheduler_shutdown_all(void)
         * for the possibility of a pthread wrapper on lthread_yield(),
         * something that is not possible unless the scheduler is running.
         */
-       while (rte_atomic16_read(&active_schedulers) <
-              rte_atomic16_read(&num_schedulers))
+       while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) <
+              __atomic_load_n(&num_schedulers, __ATOMIC_RELAXED))
                sched_yield();
 
        for (i = 0; i < LTHREAD_MAX_LCORES; i++) {
@@ -415,15 +411,15 @@ static inline int _lthread_sched_isdone(struct lthread_sched *sched)
  */
 static inline void _lthread_schedulers_sync_start(void)
 {
-       rte_atomic16_inc(&active_schedulers);
+       __atomic_fetch_add(&active_schedulers, 1, __ATOMIC_RELAXED);
 
        /* wait for lthread schedulers
         * Note we use sched_yield() rather than pthread_yield() to allow
         * for the possibility of a pthread wrapper on lthread_yield(),
         * something that is not possible unless the scheduler is running.
         */
-       while (rte_atomic16_read(&active_schedulers) <
-              rte_atomic16_read(&num_schedulers))
+       while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) <
+              __atomic_load_n(&num_schedulers, __ATOMIC_RELAXED))
                sched_yield();
 
 }
@@ -433,15 +429,15 @@ static inline void _lthread_schedulers_sync_start(void)
  */
 static inline void _lthread_schedulers_sync_stop(void)
 {
-       rte_atomic16_dec(&active_schedulers);
-       rte_atomic16_dec(&num_schedulers);
+       __atomic_fetch_sub(&active_schedulers, 1, __ATOMIC_RELAXED);
+       __atomic_fetch_sub(&num_schedulers, 1, __ATOMIC_RELAXED);
 
        /* wait for schedulers
         * Note we use sched_yield() rather than pthread_yield() to allow
         * for the possibility of a pthread wrapper on lthread_yield(),
         * something that is not possible unless the scheduler is running.
         */
-       while (rte_atomic16_read(&active_schedulers) > 0)
+       while (__atomic_load_n(&active_schedulers, __ATOMIC_RELAXED) > 0)
                sched_yield();
 
 }
index 07de6ca..4ab2e35 100644 (file)
@@ -18,7 +18,6 @@
 #include <rte_malloc.h>
 #include <rte_log.h>
 #include <rte_ring.h>
-#include <rte_atomic_64.h>
 
 #include "lthread_tls.h"
 #include "lthread_queue.h"
@@ -52,8 +51,10 @@ void _lthread_key_pool_init(void)
 
        bzero(key_table, sizeof(key_table));
 
+       uint64_t pool_init = 0;
        /* only one lcore should do this */
-       if (rte_atomic64_cmpset(&key_pool_init, 0, 1)) {
+       if (__atomic_compare_exchange_n(&key_pool_init, &pool_init, 1, 0,
+                       __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
 
                snprintf(name,
                        MAX_LTHREAD_NAME_SIZE,
index 2905199..50ecc4e 100644 (file)
@@ -26,7 +26,6 @@
 #include <rte_memcpy.h>
 #include <rte_eal.h>
 #include <rte_launch.h>
-#include <rte_atomic.h>
 #include <rte_cycles.h>
 #include <rte_prefetch.h>
 #include <rte_lcore.h>
@@ -570,8 +569,8 @@ RTE_DEFINE_PER_LCORE(struct lcore_conf *, lcore_conf);
  */
 static int lthreads_on = 1; /**< Use lthreads for processing*/
 
-rte_atomic16_t rx_counter;  /**< Number of spawned rx threads */
-rte_atomic16_t tx_counter;  /**< Number of spawned tx threads */
+uint16_t rx_counter;  /**< Number of spawned rx threads */
+uint16_t tx_counter;  /**< Number of spawned tx threads */
 
 struct thread_conf {
        uint16_t lcore_id;      /**< Initial lcore for rx thread */
@@ -1910,11 +1909,8 @@ cpu_load_collector(__rte_unused void *arg) {
        printf("Waiting for %d rx threads and %d tx threads\n", n_rx_thread,
                        n_tx_thread);
 
-       while (rte_atomic16_read(&rx_counter) < n_rx_thread)
-               rte_pause();
-
-       while (rte_atomic16_read(&tx_counter) < n_tx_thread)
-               rte_pause();
+       rte_wait_until_equal_16(&rx_counter, n_rx_thread, __ATOMIC_RELAXED);
+       rte_wait_until_equal_16(&tx_counter, n_tx_thread, __ATOMIC_RELAXED);
 
        for (i = 0; i < n_rx_thread; i++) {
 
@@ -2036,7 +2032,7 @@ lthread_tx_per_ring(void *dummy)
        RTE_LOG(INFO, L3FWD, "entering main tx loop on lcore %u\n", rte_lcore_id());
 
        nb_rx = 0;
-       rte_atomic16_inc(&tx_counter);
+       __atomic_fetch_add(&tx_counter, 1, __ATOMIC_RELAXED);
        while (1) {
 
                /*
@@ -2161,7 +2157,7 @@ lthread_rx(void *dummy)
        worker_id = 0;
 
        rx_conf->conf.cpu_id = sched_getcpu();
-       rte_atomic16_inc(&rx_counter);
+       __atomic_fetch_add(&rx_counter, 1, __ATOMIC_RELAXED);
        while (1) {
 
                /*
@@ -2243,7 +2239,7 @@ lthread_spawner(__rte_unused void *arg)
         * scheduler as this lthread, yielding is required to let them to run and
         * prevent deadlock here.
         */
-       while (rte_atomic16_read(&rx_counter) < n_rx_thread)
+       while (__atomic_load_n(&rx_counter, __ATOMIC_RELAXED) < n_rx_thread)
                lthread_sleep(100000);
 
        /*
@@ -2323,7 +2319,7 @@ pthread_tx(void *dummy)
        RTE_LOG(INFO, L3FWD, "Entering main Tx loop on lcore %u\n", rte_lcore_id());
 
        tx_conf->conf.cpu_id = sched_getcpu();
-       rte_atomic16_inc(&tx_counter);
+       __atomic_fetch_add(&tx_counter, 1, __ATOMIC_RELAXED);
        while (1) {
 
                cur_tsc = rte_rdtsc();
@@ -2406,7 +2402,7 @@ pthread_rx(void *dummy)
 
        worker_id = 0;
        rx_conf->conf.cpu_id = sched_getcpu();
-       rte_atomic16_inc(&rx_counter);
+       __atomic_fetch_add(&rx_counter, 1, __ATOMIC_RELAXED);
        while (1) {
 
                /*