#include "generic/rte_memcpy.h"
-#ifdef __ARM_NEON_FP
+#ifdef RTE_ARCH_ARM_NEON_MEMCPY
+
+#ifndef RTE_MACHINE_CPUFLAG_NEON
+#error "Cannot optimize memcpy by NEON as the CPU seems to not support this"
+#endif
/* ARM NEON Intrinsics are used to copy data */
#include <arm_neon.h>
}
#define rte_memcpy(dst, src, n) \
- ({ (__builtin_constant_p(n)) ? \
+ __extension__ ({ \
+ (__builtin_constant_p(n)) ? \
memcpy((dst), (src), (n)) : \
rte_memcpy_func((dst), (src), (n)); })
* We split the remaining bytes (which will be less than 256) into
* 64byte (2^6) chunks.
* Using incrementing integers in the case labels of a switch statement
- * enourages the compiler to use a jump table. To get incrementing
+ * encourages the compiler to use a jump table. To get incrementing
* integers, we shift the 2 relevant bits to the LSB position to first
* get decrementing integers, and then subtract.
*/
return memcpy(dst, src, n);
}
-static inline void *
-rte_memcpy_func(void *dst, const void *src, size_t n)
-{
- return memcpy(dst, src, n);
-}
-
-#endif /* __ARM_NEON_FP */
+#endif /* RTE_ARCH_ARM_NEON_MEMCPY */
#ifdef __cplusplus
}