X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_efd%2Frte_efd.c;h=f601d62e32e5eba9049dc44321f23c04e7377e30;hb=0603df73a077e66f8eef27a5df71aed6135cda3f;hp=2bcfd62a2126c7a13b12e17d69f9d33a023d90e8;hpb=56b6ef874f80891ff5fda833b92ff2cb7e8edfd7;p=dpdk.git diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c index 2bcfd62a21..f601d62e32 100644 --- a/lib/librte_efd/rte_efd.c +++ b/lib/librte_efd/rte_efd.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include @@ -52,6 +51,9 @@ #include #include "rte_efd.h" +#if defined(RTE_ARCH_X86) +#include "rte_efd_x86.h" +#endif #define EFD_KEY(key_idx, table) (table->keys + ((key_idx) * table->key_len)) /** Hash function used to determine chunk_id and bin_id for a group */ @@ -100,6 +102,7 @@ allocated memory /* All different internal lookup functions */ enum efd_lookup_internal_function { EFD_LOOKUP_SCALAR = 0, + EFD_LOOKUP_AVX2, EFD_LOOKUP_NUM }; @@ -553,7 +556,7 @@ rte_efd_create(const char *name, uint32_t max_num_rules, uint32_t key_len, num_chunks = rte_align32pow2((max_num_rules / EFD_TARGET_CHUNK_NUM_RULES) + 1); - num_chunks_shift = log2(num_chunks); + num_chunks_shift = rte_bsf32(num_chunks); rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); @@ -662,7 +665,16 @@ rte_efd_create(const char *name, uint32_t max_num_rules, uint32_t key_len, } } - table->lookup_fn = EFD_LOOKUP_SCALAR; +#if defined(RTE_ARCH_X86) + /* + * For less than 4 bits, scalar function performs better + * than vectorised version + */ + if (RTE_EFD_VALUE_NUM_BITS > 3 && rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) + table->lookup_fn = EFD_LOOKUP_AVX2; + else +#endif + table->lookup_fn = EFD_LOOKUP_SCALAR; /* * Allocate the EFD table offline portion (with the actual rules @@ -1253,6 +1265,13 @@ efd_lookup_internal(const struct efd_online_group_entry * const group, switch (lookup_fn) { +#if defined(RTE_ARCH_X86) + case EFD_LOOKUP_AVX2: + return efd_lookup_internal_avx2(group->hash_idx, + group->lookup_table, + hash_val_a, + hash_val_b); +#endif case EFD_LOOKUP_SCALAR: /* Fall-through */ default: