From f057dc5c7d8d0c064a2cb9899a84abca1282cfdf Mon Sep 17 00:00:00 2001 From: Jan Viktorin Date: Sat, 19 Mar 2016 20:58:04 +0100 Subject: [PATCH] eal/arm: disable NEON for 32-bit memcpy The new flag CONFIG_RTE_ARCH_ARM_NEON_MEMCPY is used to enable memcpy optimizations in EAL. As it is not always the performance benefit, the feature is disabled. Signed-off-by: Jan Viktorin --- config/defconfig_arm-armv7a-linuxapp-gcc | 4 ++++ lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h | 8 ++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/config/defconfig_arm-armv7a-linuxapp-gcc b/config/defconfig_arm-armv7a-linuxapp-gcc index 96c3343b38..e626ec1ebf 100644 --- a/config/defconfig_arm-armv7a-linuxapp-gcc +++ b/config/defconfig_arm-armv7a-linuxapp-gcc @@ -37,6 +37,10 @@ CONFIG_RTE_ARCH_ARM=y CONFIG_RTE_ARCH_ARMv7=y CONFIG_RTE_ARCH_ARM_TUNE="cortex-a9" +# Accelerate memcpy operations. Consider enabling for Cortex-A15. +# For Cortex-A7 and Cortex-A9, It might accelerate short data copies (< 64 B). +CONFIG_RTE_ARCH_ARM_NEON_MEMCPY=n + CONFIG_RTE_FORCE_INTRINSICS=y CONFIG_RTE_ARCH_STRICT_ALIGN=y diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h index df47c0df49..988125b347 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h +++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h @@ -42,7 +42,11 @@ extern "C" { #include "generic/rte_memcpy.h" -#ifdef __ARM_NEON_FP +#ifdef RTE_ARCH_ARM_NEON_MEMCPY + +#ifndef RTE_MACHINE_CPUFLAG_NEON +#error "Cannot optimize memcpy by NEON as the CPU seems to not support this" +#endif /* ARM NEON Intrinsics are used to copy data */ #include @@ -325,7 +329,7 @@ rte_memcpy_func(void *dst, const void *src, size_t n) return memcpy(dst, src, n); } -#endif /* __ARM_NEON_FP */ +#endif /* RTE_ARCH_ARM_NEON_MEMCPY */ #ifdef __cplusplus } -- 2.20.1