From 0fa75ccc24f090792a33219956a219e26b21837e Mon Sep 17 00:00:00 2001 From: Intel Date: Mon, 3 Jun 2013 00:00:00 +0000 Subject: [PATCH] eal: use intrinsic functions from compiler RTE_FORCE_INTRINSICS makes it possible to force use of intrinsic functions (defaults to n). Signed-off-by: Intel --- config/defconfig_i686-default-linuxapp-gcc | 5 + config/defconfig_i686-default-linuxapp-icc | 6 + config/defconfig_x86_64-default-linuxapp-gcc | 6 + config/defconfig_x86_64-default-linuxapp-icc | 6 + lib/librte_eal/common/include/rte_atomic.h | 305 ++++++++++++++++++ lib/librte_eal/common/include/rte_byteorder.h | 24 +- lib/librte_eal/common/include/rte_common.h | 4 +- lib/librte_eal/common/include/rte_spinlock.h | 17 + 8 files changed, 368 insertions(+), 5 deletions(-) diff --git a/config/defconfig_i686-default-linuxapp-gcc b/config/defconfig_i686-default-linuxapp-gcc index fdd7423dfe..57d768f838 100644 --- a/config/defconfig_i686-default-linuxapp-gcc +++ b/config/defconfig_i686-default-linuxapp-gcc @@ -69,6 +69,11 @@ CONFIG_RTE_ARCH_I686=y CONFIG_RTE_TOOLCHAIN="gcc" CONFIG_RTE_TOOLCHAIN_GCC=y +# +# Use intrinsics or assembly code for key routines +# +CONFIG_RTE_FORCE_INTRINSICS=n + # # Compile libc directory # diff --git a/config/defconfig_i686-default-linuxapp-icc b/config/defconfig_i686-default-linuxapp-icc index cd14e733a1..44f1b054e3 100644 --- a/config/defconfig_i686-default-linuxapp-icc +++ b/config/defconfig_i686-default-linuxapp-icc @@ -69,6 +69,12 @@ CONFIG_RTE_ARCH_I686=y CONFIG_RTE_TOOLCHAIN="icc" CONFIG_RTE_TOOLCHAIN_ICC=y +# +# Use intrinsics or assembly code for key routines +# +CONFIG_RTE_FORCE_INTRINSICS=n + +# # # Compile libc directory # diff --git a/config/defconfig_x86_64-default-linuxapp-gcc b/config/defconfig_x86_64-default-linuxapp-gcc index 73a3729e04..8e76034642 100644 --- a/config/defconfig_x86_64-default-linuxapp-gcc +++ b/config/defconfig_x86_64-default-linuxapp-gcc @@ -69,6 +69,12 @@ CONFIG_RTE_ARCH_X86_64=y CONFIG_RTE_TOOLCHAIN="gcc" CONFIG_RTE_TOOLCHAIN_GCC=y +# +# Use intrinsics or assembly code for key routines +# +CONFIG_RTE_FORCE_INTRINSICS=n + +# # # Compile libc directory # diff --git a/config/defconfig_x86_64-default-linuxapp-icc b/config/defconfig_x86_64-default-linuxapp-icc index c1a34825b1..edcacf3579 100644 --- a/config/defconfig_x86_64-default-linuxapp-icc +++ b/config/defconfig_x86_64-default-linuxapp-icc @@ -69,6 +69,12 @@ CONFIG_RTE_ARCH_X86_64=y CONFIG_RTE_TOOLCHAIN="icc" CONFIG_RTE_TOOLCHAIN_ICC=y +# +# Use intrinsics or assembly code for key routines +# +CONFIG_RTE_FORCE_INTRINSICS=n + +# # # Compile libc directory # diff --git a/lib/librte_eal/common/include/rte_atomic.h b/lib/librte_eal/common/include/rte_atomic.h index ffd90e49b8..67eff12363 100644 --- a/lib/librte_eal/common/include/rte_atomic.h +++ b/lib/librte_eal/common/include/rte_atomic.h @@ -82,6 +82,8 @@ extern "C" { */ #define rte_rmb() asm volatile("lfence;" : : : "memory") +#include + /** * @file * Atomic Operations on x86_64 @@ -108,6 +110,7 @@ extern "C" { static inline int rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src) { +#ifndef RTE_FORCE_INTRINSICS uint8_t res; asm volatile( @@ -121,6 +124,9 @@ rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src) "m" (*dst) : "memory"); /* no-clobber list */ return res; +#else + return __sync_bool_compare_and_swap(dst, exp, src); +#endif } /** @@ -212,12 +218,16 @@ rte_atomic16_sub(rte_atomic16_t *v, int16_t dec) static inline void rte_atomic16_inc(rte_atomic16_t *v) { +#ifndef RTE_FORCE_INTRINSICS asm volatile( MPLOCKED "incw %[cnt]" : [cnt] "=m" (v->cnt) /* output */ : "m" (v->cnt) /* input */ ); +#else + rte_atomic16_add(v, 1); +#endif } /** @@ -229,12 +239,16 @@ rte_atomic16_inc(rte_atomic16_t *v) static inline void rte_atomic16_dec(rte_atomic16_t *v) { +#ifndef RTE_FORCE_INTRINSICS asm volatile( MPLOCKED "decw %[cnt]" : [cnt] "=m" (v->cnt) /* output */ : "m" (v->cnt) /* input */ ); +#else + rte_atomic16_sub(v, 1); +#endif } /** @@ -289,6 +303,7 @@ rte_atomic16_sub_return(rte_atomic16_t *v, int16_t dec) */ static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v) { +#ifndef RTE_FORCE_INTRINSICS uint8_t ret; asm volatile( @@ -299,6 +314,9 @@ static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v) [ret] "=qm" (ret) ); return (ret != 0); +#else + return (__sync_add_and_fetch(&v->cnt, 1) == 0); +#endif } /** @@ -314,6 +332,7 @@ static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v) */ static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v) { +#ifndef RTE_FORCE_INTRINSICS uint8_t ret; asm volatile(MPLOCKED @@ -323,6 +342,9 @@ static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v) [ret] "=qm" (ret) ); return (ret != 0); +#else + return (__sync_sub_and_fetch(&v->cnt, 1) == 0); +#endif } /** @@ -373,6 +395,7 @@ static inline void rte_atomic16_clear(rte_atomic16_t *v) static inline int rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) { +#ifndef RTE_FORCE_INTRINSICS uint8_t res; asm volatile( @@ -386,6 +409,9 @@ rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) "m" (*dst) : "memory"); /* no-clobber list */ return res; +#else + return __sync_bool_compare_and_swap(dst, exp, src); +#endif } /** @@ -477,12 +503,16 @@ rte_atomic32_sub(rte_atomic32_t *v, int32_t dec) static inline void rte_atomic32_inc(rte_atomic32_t *v) { +#ifndef RTE_FORCE_INTRINSICS asm volatile( MPLOCKED "incl %[cnt]" : [cnt] "=m" (v->cnt) /* output */ : "m" (v->cnt) /* input */ ); +#else + rte_atomic32_add(v, 1); +#endif } /** @@ -494,12 +524,16 @@ rte_atomic32_inc(rte_atomic32_t *v) static inline void rte_atomic32_dec(rte_atomic32_t *v) { +#ifndef RTE_FORCE_INTRINSICS asm volatile( MPLOCKED "decl %[cnt]" : [cnt] "=m" (v->cnt) /* output */ : "m" (v->cnt) /* input */ ); +#else + rte_atomic32_sub(v,1); +#endif } /** @@ -554,6 +588,7 @@ rte_atomic32_sub_return(rte_atomic32_t *v, int32_t dec) */ static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v) { +#ifndef RTE_FORCE_INTRINSICS uint8_t ret; asm volatile( @@ -564,6 +599,9 @@ static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v) [ret] "=qm" (ret) ); return (ret != 0); +#else + return (__sync_add_and_fetch(&v->cnt, 1) == 0); +#endif } /** @@ -579,6 +617,7 @@ static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v) */ static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v) { +#ifndef RTE_FORCE_INTRINSICS uint8_t ret; asm volatile(MPLOCKED @@ -588,6 +627,9 @@ static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v) [ret] "=qm" (ret) ); return (ret != 0); +#else + return (__sync_sub_and_fetch(&v->cnt, 1) == 0); +#endif } /** @@ -617,6 +659,7 @@ static inline void rte_atomic32_clear(rte_atomic32_t *v) v->cnt = 0; } +#ifndef RTE_FORCE_INTRINSICS /* any other functions are in arch specific files */ #include "arch/rte_atomic.h" @@ -811,6 +854,268 @@ rte_atomic64_clear(rte_atomic64_t *v); #endif /* __DOXYGEN__ */ +#else /*RTE_FORCE_INTRINSICS */ + +/*------------------------- 64 bit atomic operations -------------------------*/ + +/** + * An atomic compare and set function used by the mutex functions. + * (atomic) equivalent to: + * if (*dst == exp) + * *dst = src (all 64-bit words) + * + * @param dst + * The destination into which the value will be written. + * @param exp + * The expected value. + * @param src + * The new value. + * @return + * Non-zero on success; 0 on failure. + */ +static inline int +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) +{ + return __sync_bool_compare_and_swap(dst, exp, src); +} + +/** + * The atomic counter structure. + */ +typedef struct { + volatile int64_t cnt; /**< Internal counter value. */ +} rte_atomic64_t; + +/** + * Static initializer for an atomic counter. + */ +#define RTE_ATOMIC64_INIT(val) { (val) } + +/** + * Initialize the atomic counter. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic64_init(rte_atomic64_t *v) +{ +#ifdef __LP64__ + v->cnt = 0; +#else + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, 0); + } +#endif +} + +/** + * Atomically read a 64-bit counter. + * + * @param v + * A pointer to the atomic counter. + * @return + * The value of the counter. + */ +static inline int64_t +rte_atomic64_read(rte_atomic64_t *v) +{ +#ifdef __LP64__ + return v->cnt; +#else + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + /* replace the value by itself */ + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp); + } + return tmp; +#endif +} + +/** + * Atomically set a 64-bit counter. + * + * @param v + * A pointer to the atomic counter. + * @param new_value + * The new value of the counter. + */ +static inline void +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) +{ +#ifdef __LP64__ + v->cnt = new_value; +#else + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, new_value); + } +#endif +} + +/** + * Atomically add a 64-bit value to a counter. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + */ +static inline void +rte_atomic64_add(rte_atomic64_t *v, int64_t inc) +{ + __sync_fetch_and_add(&v->cnt, inc); +} + +/** + * Atomically subtract a 64-bit value from a counter. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be substracted from the counter. + */ +static inline void +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) +{ + __sync_fetch_and_sub(&v->cnt, dec); +} + +/** + * Atomically increment a 64-bit counter by one and test. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic64_inc(rte_atomic64_t *v) +{ + rte_atomic64_add(v, 1); +} + +/** + * Atomically decrement a 64-bit counter by one and test. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic64_dec(rte_atomic64_t *v) +{ + rte_atomic64_sub(v, 1); +} + +/** + * Add a 64-bit value to an atomic counter and return the result. + * + * Atomically adds the 64-bit value (inc) to the atomic counter (v) and + * returns the value of v after the addition. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + * @return + * The value of v after the addition. + */ +static inline int64_t +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) +{ + return __sync_add_and_fetch(&v->cnt, inc); +} + +/** + * Subtract a 64-bit value from an atomic counter and return the result. + * + * Atomically subtracts the 64-bit value (dec) from the atomic counter (v) + * and returns the value of v after the substraction. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be substracted from the counter. + * @return + * The value of v after the substraction. + */ +static inline int64_t +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) +{ + return __sync_sub_and_fetch(&v->cnt, dec); +} + +/** + * Atomically increment a 64-bit counter by one and test. + * + * Atomically increments the atomic counter (v) by one and returns + * true if the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after the addition is 0; false otherwise. + */ +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v) +{ + return rte_atomic64_add_return(v, 1) == 0; +} + +/** + * Atomically decrement a 64-bit counter by one and test. + * + * Atomically decrements the atomic counter (v) by one and returns true if + * the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after substraction is 0; false otherwise. + */ +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v) +{ + return rte_atomic64_sub_return(v, 1) == 0; +} + +/** + * Atomically test and set a 64-bit atomic counter. + * + * If the counter value is already set, return 0 (failed). Otherwise, set + * the counter value to 1 and return 1 (success). + * + * @param v + * A pointer to the atomic counter. + * @return + * 0 if failed; else 1, success. + */ +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) +{ + return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); +} + +/** + * Atomically set a 64-bit counter to 0. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void rte_atomic64_clear(rte_atomic64_t *v) +{ + rte_atomic64_set(v, 0); +} + +#endif /*RTE_FORCE_INTRINSICS */ #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_byteorder.h b/lib/librte_eal/common/include/rte_byteorder.h index a039c56770..4902b48f7f 100644 --- a/lib/librte_eal/common/include/rte_byteorder.h +++ b/lib/librte_eal/common/include/rte_byteorder.h @@ -152,26 +152,44 @@ static inline uint64_t rte_arch_bswap64(uint64_t x) } #endif /* RTE_ARCH_X86_64 */ + /** * Swap bytes in a 16-bit value. */ #define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \ rte_constant_bswap16(x) : \ - rte_arch_bswap16(x))) \ + rte_arch_bswap16(x))) +#ifndef RTE_FORCE_INTRINSICS /** * Swap bytes in a 32-bit value. */ #define rte_bswap32(x) ((uint32_t)(__builtin_constant_p(x) ? \ rte_constant_bswap32(x) : \ - rte_arch_bswap32(x))) \ + rte_arch_bswap32(x))) /** * Swap bytes in a 64-bit value. */ #define rte_bswap64(x) ((uint64_t)(__builtin_constant_p(x) ? \ rte_constant_bswap64(x) : \ - rte_arch_bswap64(x))) \ + rte_arch_bswap64(x))) + +#else + +/* __builtin_bswap16 is only available gcc 4.8 and upwards */ + +/** + * Swap bytes in a 32-bit value. + */ +#define rte_bswap32(x) __builtin_bswap32(x) + +/** + * Swap bytes in a 64-bit value. + */ +#define rte_bswap64(x) __builtin_bswap64(x) + +#endif /** * Convert a 16-bit value from CPU order to little endian. diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h index 22281c63d5..eb82a8baf9 100644 --- a/lib/librte_eal/common/include/rte_common.h +++ b/lib/librte_eal/common/include/rte_common.h @@ -50,7 +50,7 @@ extern "C" { #include #include #include - +#include /*********** Macros to eliminate unused variable warnings ********/ @@ -257,7 +257,7 @@ rte_align32pow2(uint32_t x) static inline void rte_pause (void) { - asm volatile ("pause"); + _mm_pause(); } /** diff --git a/lib/librte_eal/common/include/rte_spinlock.h b/lib/librte_eal/common/include/rte_spinlock.h index 5ce8eca987..bc0b84057e 100644 --- a/lib/librte_eal/common/include/rte_spinlock.h +++ b/lib/librte_eal/common/include/rte_spinlock.h @@ -53,6 +53,9 @@ extern "C" { #endif #include +#ifdef RTE_FORCE_INTRINSICS +#include +#endif /** * The rte_spinlock_t type. @@ -87,6 +90,7 @@ rte_spinlock_init(rte_spinlock_t *sl) static inline void rte_spinlock_lock(rte_spinlock_t *sl) { +#ifndef RTE_FORCE_INTRINSICS int lock_val = 1; asm volatile ( "1:\n" @@ -102,6 +106,11 @@ rte_spinlock_lock(rte_spinlock_t *sl) : [locked] "=m" (sl->locked), [lv] "=q" (lock_val) : "[lv]" (lock_val) : "memory"); +#else + while (__sync_lock_test_and_set(&sl->locked, 1)) + while(sl->locked) + rte_pause(); +#endif } /** @@ -113,12 +122,16 @@ rte_spinlock_lock(rte_spinlock_t *sl) static inline void rte_spinlock_unlock (rte_spinlock_t *sl) { +#ifndef RTE_FORCE_INTRINSICS int unlock_val = 0; asm volatile ( "xchg %[locked], %[ulv]\n" : [locked] "=m" (sl->locked), [ulv] "=q" (unlock_val) : "[ulv]" (unlock_val) : "memory"); +#else + __sync_lock_release(&sl->locked); +#endif } /** @@ -132,6 +145,7 @@ rte_spinlock_unlock (rte_spinlock_t *sl) static inline int rte_spinlock_trylock (rte_spinlock_t *sl) { +#ifndef RTE_FORCE_INTRINSICS int lockval = 1; asm volatile ( @@ -141,6 +155,9 @@ rte_spinlock_trylock (rte_spinlock_t *sl) : "memory"); return (lockval == 0); +#else + return (__sync_lock_test_and_set(&sl->locked,1) == 0); +#endif } /** -- 2.20.1