From 7c6cca6b604d59fe1ab5eb4639507c4dfdbc4592 Mon Sep 17 00:00:00 2001 From: Konstantin Ananyev Date: Tue, 6 Oct 2020 16:03:09 +0100 Subject: [PATCH] acl: add infrastructure for AVX512 classify methods Add necessary changes to support new AVX512 specific ACL classify algorithm: - changes in meson.build to check that build tools (compiler, assembler, etc.) do properly support AVX512. - run-time checks to make sure target platform does support AVX512. - dummy rte_acl_classify_avx512() for targets where AVX512 implementation couldn't be properly supported. Signed-off-by: Konstantin Ananyev Acked-by: Bruce Richardson --- config/x86/meson.build | 19 +++++++++---- lib/librte_acl/acl.h | 8 ++++++ lib/librte_acl/acl_run_avx512.c | 29 ++++++++++++++++++++ lib/librte_acl/meson.build | 48 +++++++++++++++++++++++++++++++++ lib/librte_acl/rte_acl.c | 42 +++++++++++++++++++++++++++++ lib/librte_acl/rte_acl.h | 2 ++ 6 files changed, 143 insertions(+), 5 deletions(-) create mode 100644 lib/librte_acl/acl_run_avx512.c diff --git a/config/x86/meson.build b/config/x86/meson.build index 172b72b725..31bfa63b1c 100644 --- a/config/x86/meson.build +++ b/config/x86/meson.build @@ -21,11 +21,20 @@ foreach f:base_flags compile_time_cpuflags += ['RTE_CPUFLAG_' + f] endforeach -optional_flags = ['AES', 'PCLMUL', - 'AVX', 'AVX2', 'AVX512F', - 'RDRND', 'RDSEED', - 'AVX512BW', 'AVX512DQ', - 'AVX512VL', 'VPCLMULQDQ'] +optional_flags = [ + 'AES', + 'AVX', + 'AVX2', + 'AVX512BW', + 'AVX512CD', + 'AVX512DQ', + 'AVX512F', + 'AVX512VL', + 'PCLMUL', + 'RDRND', + 'RDSEED', + 'VPCLMULQDQ', +] foreach f:optional_flags if cc.get_define('__@0@__'.format(f), args: machine_args) == '1' if f == 'PCLMUL' # special case flags with different defines diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h index 39d45a0c2b..543ce55659 100644 --- a/lib/librte_acl/acl.h +++ b/lib/librte_acl/acl.h @@ -201,6 +201,14 @@ int rte_acl_classify_avx2(const struct rte_acl_ctx *ctx, const uint8_t **data, uint32_t *results, uint32_t num, uint32_t categories); +int +rte_acl_classify_avx512x16(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories); + +int +rte_acl_classify_avx512x32(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories); + int rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data, uint32_t *results, uint32_t num, uint32_t categories); diff --git a/lib/librte_acl/acl_run_avx512.c b/lib/librte_acl/acl_run_avx512.c new file mode 100644 index 0000000000..1817f88b29 --- /dev/null +++ b/lib/librte_acl/acl_run_avx512.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#include "acl_run_sse.h" + +int +rte_acl_classify_avx512x16(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories) +{ + if (num >= MAX_SEARCHES_SSE8) + return search_sse_8(ctx, data, results, num, categories); + if (num >= MAX_SEARCHES_SSE4) + return search_sse_4(ctx, data, results, num, categories); + + return rte_acl_classify_scalar(ctx, data, results, num, categories); +} + +int +rte_acl_classify_avx512x32(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories) +{ + if (num >= MAX_SEARCHES_SSE8) + return search_sse_8(ctx, data, results, num, categories); + if (num >= MAX_SEARCHES_SSE4) + return search_sse_4(ctx, data, results, num, categories); + + return rte_acl_classify_scalar(ctx, data, results, num, categories); +} diff --git a/lib/librte_acl/meson.build b/lib/librte_acl/meson.build index 751ee60e80..ee4e229e59 100644 --- a/lib/librte_acl/meson.build +++ b/lib/librte_acl/meson.build @@ -27,6 +27,54 @@ if dpdk_conf.has('RTE_ARCH_X86') cflags += '-DCC_AVX2_SUPPORT' endif + # compile AVX512 version if: + # we are building 64-bit binary AND binutils can generate proper code + + if dpdk_conf.has('RTE_ARCH_X86_64') and binutils_ok.returncode() == 0 + + # compile AVX512 version if either: + # a. we have AVX512 supported in minimum instruction set + # baseline + # b. it's not minimum instruction set, but supported by + # compiler + # + # in former case, just add avx512 C file to files list + # in latter case, compile c file to static lib, using correct + # compiler flags, and then have the .o file from static lib + # linked into main lib. + + # check if all required flags already enabled (variant a). + acl_avx512_flags = ['__AVX512F__', '__AVX512VL__', + '__AVX512CD__', '__AVX512BW__'] + + acl_avx512_on = true + foreach f:acl_avx512_flags + + if cc.get_define(f, args: machine_args) == '' + acl_avx512_on = false + endif + endforeach + + if acl_avx512_on == true + + sources += files('acl_run_avx512.c') + cflags += '-DCC_AVX512_SUPPORT' + + elif cc.has_multi_arguments('-mavx512f', '-mavx512vl', + '-mavx512cd', '-mavx512bw') + + avx512_tmplib = static_library('avx512_tmp', + 'acl_run_avx512.c', + dependencies: static_rte_eal, + c_args: cflags + + ['-mavx512f', '-mavx512vl', + '-mavx512cd', '-mavx512bw']) + objs += avx512_tmplib.extract_objects( + 'acl_run_avx512.c') + cflags += '-DCC_AVX512_SUPPORT' + endif + endif + elif dpdk_conf.has('RTE_ARCH_ARM') cflags += '-flax-vector-conversions' sources += files('acl_run_neon.c') diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c index d1583c5031..cb8ccc5c10 100644 --- a/lib/librte_acl/rte_acl.c +++ b/lib/librte_acl/rte_acl.c @@ -16,6 +16,32 @@ static struct rte_tailq_elem rte_acl_tailq = { }; EAL_REGISTER_TAILQ(rte_acl_tailq) +#ifndef CC_AVX512_SUPPORT +/* + * If the compiler doesn't support AVX512 instructions, + * then the dummy one would be used instead for AVX512 classify method. + */ +int +rte_acl_classify_avx512x16(__rte_unused const struct rte_acl_ctx *ctx, + __rte_unused const uint8_t **data, + __rte_unused uint32_t *results, + __rte_unused uint32_t num, + __rte_unused uint32_t categories) +{ + return -ENOTSUP; +} + +int +rte_acl_classify_avx512x32(__rte_unused const struct rte_acl_ctx *ctx, + __rte_unused const uint8_t **data, + __rte_unused uint32_t *results, + __rte_unused uint32_t num, + __rte_unused uint32_t categories) +{ + return -ENOTSUP; +} +#endif + #ifndef CC_AVX2_SUPPORT /* * If the compiler doesn't support AVX2 instructions, @@ -75,6 +101,8 @@ static const rte_acl_classify_t classify_fns[] = { [RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2, [RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon, [RTE_ACL_CLASSIFY_ALTIVEC] = rte_acl_classify_altivec, + [RTE_ACL_CLASSIFY_AVX512X16] = rte_acl_classify_avx512x16, + [RTE_ACL_CLASSIFY_AVX512X32] = rte_acl_classify_avx512x32, }; /* @@ -124,6 +152,18 @@ acl_check_alg_ppc(enum rte_acl_classify_alg alg) static int acl_check_alg_x86(enum rte_acl_classify_alg alg) { + if (alg == RTE_ACL_CLASSIFY_AVX512X16 || + alg == RTE_ACL_CLASSIFY_AVX512X32) { +#ifdef CC_AVX512_SUPPORT + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) && + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512VL) && + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512CD) && + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW)) + return 0; +#endif + return -ENOTSUP; + } + if (alg == RTE_ACL_CLASSIFY_AVX2) { #ifdef CC_AVX2_SUPPORT if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) @@ -157,6 +197,8 @@ acl_check_alg(enum rte_acl_classify_alg alg) return acl_check_alg_arm(alg); case RTE_ACL_CLASSIFY_ALTIVEC: return acl_check_alg_ppc(alg); + case RTE_ACL_CLASSIFY_AVX512X32: + case RTE_ACL_CLASSIFY_AVX512X16: case RTE_ACL_CLASSIFY_AVX2: case RTE_ACL_CLASSIFY_SSE: return acl_check_alg_x86(alg); diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h index 3999f15ded..1bfed00743 100644 --- a/lib/librte_acl/rte_acl.h +++ b/lib/librte_acl/rte_acl.h @@ -241,6 +241,8 @@ enum rte_acl_classify_alg { RTE_ACL_CLASSIFY_AVX2 = 3, /**< requires AVX2 support. */ RTE_ACL_CLASSIFY_NEON = 4, /**< requires NEON support. */ RTE_ACL_CLASSIFY_ALTIVEC = 5, /**< requires ALTIVEC support. */ + RTE_ACL_CLASSIFY_AVX512X16 = 6, /**< requires AVX512 support. */ + RTE_ACL_CLASSIFY_AVX512X32 = 7, /**< requires AVX512 support. */ }; /** -- 2.20.1