From 672a15056380d97415eca8ca0b13580639ba7a6b Mon Sep 17 00:00:00 2001 From: Phil Yang Date: Fri, 17 Jul 2020 18:14:36 +0800 Subject: [PATCH] eal: add wrapper for C11 atomic thread fence Provide a wrapper for __atomic_thread_fence builtins to support optimized code for __ATOMIC_SEQ_CST memory order for x86 platforms. Suggested-by: Honnappa Nagarahalli Signed-off-by: Phil Yang Reviewed-by: Ola Liljedahl Acked-by: Konstantin Ananyev --- lib/librte_eal/arm/include/rte_atomic_32.h | 6 ++++++ lib/librte_eal/arm/include/rte_atomic_64.h | 6 ++++++ lib/librte_eal/include/generic/rte_atomic.h | 5 +++++ lib/librte_eal/ppc/include/rte_atomic.h | 6 ++++++ lib/librte_eal/x86/include/rte_atomic.h | 16 ++++++++++++++++ 5 files changed, 39 insertions(+) diff --git a/lib/librte_eal/arm/include/rte_atomic_32.h b/lib/librte_eal/arm/include/rte_atomic_32.h index 7dc0d06d14..368f10ce4d 100644 --- a/lib/librte_eal/arm/include/rte_atomic_32.h +++ b/lib/librte_eal/arm/include/rte_atomic_32.h @@ -37,6 +37,12 @@ extern "C" { #define rte_cio_rmb() rte_rmb() +static __rte_always_inline void +rte_atomic_thread_fence(int memory_order) +{ + __atomic_thread_fence(memory_order); +} + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/arm/include/rte_atomic_64.h b/lib/librte_eal/arm/include/rte_atomic_64.h index e42f69edce..5cae52dcfd 100644 --- a/lib/librte_eal/arm/include/rte_atomic_64.h +++ b/lib/librte_eal/arm/include/rte_atomic_64.h @@ -41,6 +41,12 @@ extern "C" { #define rte_cio_rmb() rte_rmb() +static __rte_always_inline void +rte_atomic_thread_fence(int memory_order) +{ + __atomic_thread_fence(memory_order); +} + /*------------------------ 128 bit atomic operations -------------------------*/ #if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS) diff --git a/lib/librte_eal/include/generic/rte_atomic.h b/lib/librte_eal/include/generic/rte_atomic.h index e6ab15a973..95270f15b9 100644 --- a/lib/librte_eal/include/generic/rte_atomic.h +++ b/lib/librte_eal/include/generic/rte_atomic.h @@ -158,6 +158,11 @@ static inline void rte_cio_rmb(void); asm volatile ("" : : : "memory"); \ } while(0) +/** + * Synchronization fence between threads based on the specified memory order. + */ +static inline void rte_atomic_thread_fence(int memory_order); + /*------------------------- 16 bit atomic operations -------------------------*/ /** diff --git a/lib/librte_eal/ppc/include/rte_atomic.h b/lib/librte_eal/ppc/include/rte_atomic.h index 7e3e13118c..527fcaf80d 100644 --- a/lib/librte_eal/ppc/include/rte_atomic.h +++ b/lib/librte_eal/ppc/include/rte_atomic.h @@ -40,6 +40,12 @@ extern "C" { #define rte_cio_rmb() rte_rmb() +static __rte_always_inline void +rte_atomic_thread_fence(int memory_order) +{ + __atomic_thread_fence(memory_order); +} + /*------------------------- 16 bit atomic operations -------------------------*/ /* To be compatible with Power7, use GCC built-in functions for 16 bit * operations */ diff --git a/lib/librte_eal/x86/include/rte_atomic.h b/lib/librte_eal/x86/include/rte_atomic.h index b9dcd30aba..62ea393187 100644 --- a/lib/librte_eal/x86/include/rte_atomic.h +++ b/lib/librte_eal/x86/include/rte_atomic.h @@ -83,6 +83,22 @@ rte_smp_mb(void) #define rte_cio_rmb() rte_compiler_barrier() +/** + * Synchronization fence between threads based on the specified memory order. + * + * On x86 the __atomic_thread_fence(__ATOMIC_SEQ_CST) generates full 'mfence' + * which is quite expensive. The optimized implementation of rte_smp_mb is + * used instead. + */ +static __rte_always_inline void +rte_atomic_thread_fence(int memory_order) +{ + if (memory_order == __ATOMIC_SEQ_CST) + rte_smp_mb(); + else + __atomic_thread_fence(memory_order); +} + /*------------------------- 16 bit atomic operations -------------------------*/ #ifndef RTE_FORCE_INTRINSICS -- 2.20.1