#include <rte_debug.h>
#include <rte_malloc.h>
-#include <rte_prefetch.h>
#include <rte_errno.h>
#include <rte_memory.h>
-#include <rte_branch_prediction.h>
+#include <rte_vect.h>
#include <rte_rib6.h>
#include <rte_fib6.h>
#include "trie.h"
-/* @internal Total number of tbl24 entries. */
-#define TRIE_TBL24_NUM_ENT (1 << 24)
+#ifdef CC_TRIE_AVX512_SUPPORT
-/* Maximum depth value possible for IPv6 LPM. */
-#define TRIE_MAX_DEPTH 128
+#include "trie_avx512.h"
-/* @internal Number of entries in a tbl8 group. */
-#define TRIE_TBL8_GRP_NUM_ENT 256ULL
-
-/* @internal Total number of tbl8 groups in the tbl8. */
-#define TRIE_TBL8_NUM_GROUPS 65536
-
-/* @internal bitmask with valid and valid_group fields set */
-#define TRIE_EXT_ENT 1
+#endif /* CC_TRIE_AVX512_SUPPORT */
#define TRIE_NAMESIZE 64
-#define BITMAP_SLAB_BIT_SIZE_LOG2 6
-#define BITMAP_SLAB_BIT_SIZE (1ULL << BITMAP_SLAB_BIT_SIZE_LOG2)
-#define BITMAP_SLAB_BITMASK (BITMAP_SLAB_BIT_SIZE - 1)
-
-struct rte_trie_tbl {
- uint32_t number_tbl8s; /**< Total number of tbl8s */
- uint32_t rsvd_tbl8s; /**< Number of reserved tbl8s */
- uint32_t cur_tbl8s; /**< Current cumber of tbl8s */
- uint64_t def_nh; /**< Default next hop */
- enum rte_fib_trie_nh_sz nh_sz; /**< Size of nexthop entry */
- uint64_t *tbl8; /**< tbl8 table. */
- uint32_t *tbl8_pool; /**< bitmap containing free tbl8 idxes*/
- uint32_t tbl8_pool_pos;
- /* tbl24 table. */
- __extension__ uint64_t tbl24[0] __rte_cache_aligned;
-};
-
enum edge {
LEDGE,
REDGE
};
-enum lookup_type {
- MACRO,
- INLINE,
- UNI
-};
-static enum lookup_type test_lookup = MACRO;
-
-static inline uint32_t
-get_tbl24_idx(const uint8_t *ip)
-{
- return ip[0] << 16|ip[1] << 8|ip[2];
-}
-
-static inline void *
-get_tbl24_p(struct rte_trie_tbl *dp, const uint8_t *ip, uint8_t nh_sz)
-{
- uint32_t tbl24_idx;
-
- tbl24_idx = get_tbl24_idx(ip);
- return (void *)&((uint8_t *)dp->tbl24)[tbl24_idx << nh_sz];
-}
-
-static inline uint8_t
-bits_in_nh(uint8_t nh_sz)
-{
- return 8 * (1 << nh_sz);
-}
-
-static inline uint64_t
-get_max_nh(uint8_t nh_sz)
-{
- return ((1ULL << (bits_in_nh(nh_sz) - 1)) - 1);
-}
-
-static inline uint64_t
-lookup_msk(uint8_t nh_sz)
-{
- return ((1ULL << ((1 << (nh_sz + 3)) - 1)) << 1) - 1;
-}
-
-static inline uint8_t
-get_psd_idx(uint32_t val, uint8_t nh_sz)
-{
- return val & ((1 << (3 - nh_sz)) - 1);
-}
-
-static inline uint32_t
-get_tbl_pos(uint32_t val, uint8_t nh_sz)
+static inline rte_fib6_lookup_fn_t
+get_scalar_fn(enum rte_fib_trie_nh_sz nh_sz)
{
- return val >> (3 - nh_sz);
+ switch (nh_sz) {
+ case RTE_FIB6_TRIE_2B:
+ return rte_trie_lookup_bulk_2b;
+ case RTE_FIB6_TRIE_4B:
+ return rte_trie_lookup_bulk_4b;
+ case RTE_FIB6_TRIE_8B:
+ return rte_trie_lookup_bulk_8b;
+ default:
+ return NULL;
+ }
}
-static inline uint64_t
-get_tbl_val_by_idx(uint64_t *tbl, uint32_t idx, uint8_t nh_sz)
+static inline rte_fib6_lookup_fn_t
+get_vector_fn(enum rte_fib_trie_nh_sz nh_sz)
{
- return ((tbl[get_tbl_pos(idx, nh_sz)] >> (get_psd_idx(idx, nh_sz) *
- bits_in_nh(nh_sz))) & lookup_msk(nh_sz));
+#ifdef CC_TRIE_AVX512_SUPPORT
+ if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) <= 0) ||
+ (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512))
+ return NULL;
+ switch (nh_sz) {
+ case RTE_FIB6_TRIE_2B:
+ return rte_trie_vec_lookup_bulk_2b;
+ case RTE_FIB6_TRIE_4B:
+ return rte_trie_vec_lookup_bulk_4b;
+ case RTE_FIB6_TRIE_8B:
+ return rte_trie_vec_lookup_bulk_8b;
+ default:
+ return NULL;
+ }
+#else
+ RTE_SET_USED(nh_sz);
+#endif
+ return NULL;
}
-static inline void *
-get_tbl_p_by_idx(uint64_t *tbl, uint64_t idx, uint8_t nh_sz)
+rte_fib6_lookup_fn_t
+trie_get_lookup_fn(void *p, enum rte_fib6_lookup_type type)
{
- return (uint8_t *)tbl + (idx << nh_sz);
-}
+ enum rte_fib_trie_nh_sz nh_sz;
+ rte_fib6_lookup_fn_t ret_fn;
+ struct rte_trie_tbl *dp = p;
-static inline int
-is_entry_extended(uint64_t ent)
-{
- return (ent & TRIE_EXT_ENT) == TRIE_EXT_ENT;
-}
+ if (dp == NULL)
+ return NULL;
-#define LOOKUP_FUNC(suffix, type, nh_sz) \
-static void rte_trie_lookup_bulk_##suffix(void *p, \
- uint8_t ips[][RTE_FIB6_IPV6_ADDR_SIZE], \
- uint64_t *next_hops, const unsigned int n) \
-{ \
- struct rte_trie_tbl *dp = (struct rte_trie_tbl *)p; \
- uint64_t tmp; \
- uint32_t i, j; \
- \
- for (i = 0; i < n; i++) { \
- tmp = ((type *)dp->tbl24)[get_tbl24_idx(&ips[i][0])]; \
- j = 3; \
- while (is_entry_extended(tmp)) { \
- tmp = ((type *)dp->tbl8)[ips[i][j++] + \
- ((tmp >> 1) * TRIE_TBL8_GRP_NUM_ENT)]; \
- } \
- next_hops[i] = tmp >> 1; \
- } \
-}
-LOOKUP_FUNC(2b, uint16_t, 1)
-LOOKUP_FUNC(4b, uint32_t, 2)
-LOOKUP_FUNC(8b, uint64_t, 3)
+ nh_sz = dp->nh_sz;
-rte_fib6_lookup_fn_t
-rte_trie_get_lookup_fn(struct rte_fib6_conf *conf)
-{
- enum rte_fib_trie_nh_sz nh_sz = conf->trie.nh_sz;
-
- if (test_lookup == MACRO) {
- switch (nh_sz) {
- case RTE_FIB6_TRIE_2B:
- return rte_trie_lookup_bulk_2b;
- case RTE_FIB6_TRIE_4B:
- return rte_trie_lookup_bulk_4b;
- case RTE_FIB6_TRIE_8B:
- return rte_trie_lookup_bulk_8b;
- }
+ switch (type) {
+ case RTE_FIB6_LOOKUP_TRIE_SCALAR:
+ return get_scalar_fn(nh_sz);
+ case RTE_FIB6_LOOKUP_TRIE_VECTOR_AVX512:
+ return get_vector_fn(nh_sz);
+ case RTE_FIB6_LOOKUP_DEFAULT:
+ ret_fn = get_vector_fn(nh_sz);
+ return (ret_fn != NULL) ? ret_fn : get_scalar_fn(nh_sz);
+ default:
+ return NULL;
}
-
return NULL;
}
tbl8_idx = tbl8_get(dp);
if (tbl8_idx < 0)
return tbl8_idx;
- tbl8_ptr = (uint8_t *)dp->tbl8 +
- ((tbl8_idx * TRIE_TBL8_GRP_NUM_ENT) <<
- dp->nh_sz);
+ tbl8_ptr = get_tbl_p_by_idx(dp->tbl8,
+ tbl8_idx * TRIE_TBL8_GRP_NUM_ENT, dp->nh_sz);
/*Init tbl8 entries with nexthop from tbl24*/
write_to_dp((void *)tbl8_ptr, nh, dp->nh_sz,
TRIE_TBL8_GRP_NUM_ENT);
bitshift = (int8_t)(((first_byte + bytes - 1) - i)*BYTE_SIZE);
idx |= ip[i] << bitshift;
}
- return (prev_idx * 256) + idx;
+ return (prev_idx * TRIE_TBL8_GRP_NUM_ENT) + idx;
}
static inline uint64_t
return;
if (common_tbl8 != 0) {
- p = get_tbl_p_by_idx(dp->tbl8, (val >> 1) * 256 + *ip_part,
- dp->nh_sz);
+ p = get_tbl_p_by_idx(dp->tbl8, (val >> 1) *
+ TRIE_TBL8_GRP_NUM_ENT + *ip_part, dp->nh_sz);
recycle_root_path(dp, ip_part + 1, common_tbl8 - 1, p);
}
tbl8_recycle(dp, prev, val >> 1);
j = i;
cur_tbl = dp->tbl8;
}
- *tbl = get_tbl_p_by_idx(cur_tbl, prev_idx * 256, dp->nh_sz);
+ *tbl = get_tbl_p_by_idx(cur_tbl, prev_idx * TRIE_TBL8_GRP_NUM_ENT,
+ dp->nh_sz);
return 0;
}
return tbl8_idx;
val = (tbl8_idx << 1)|TRIE_EXT_ENT;
}
- p = get_tbl_p_by_idx(dp->tbl8, (tbl8_idx * 256) + *ip_part,
- dp->nh_sz);
+ p = get_tbl_p_by_idx(dp->tbl8, (tbl8_idx *
+ TRIE_TBL8_GRP_NUM_ENT) + *ip_part, dp->nh_sz);
ret = write_edge(dp, ip_part + 1, next_hop, len - 1, edge, p);
if (ret < 0)
return ret;
write_to_dp((uint8_t *)p + (1 << dp->nh_sz),
next_hop << 1, dp->nh_sz, UINT8_MAX - *ip_part);
} else {
- write_to_dp(get_tbl_p_by_idx(dp->tbl8, tbl8_idx * 256,
- dp->nh_sz),
+ write_to_dp(get_tbl_p_by_idx(dp->tbl8, tbl8_idx *
+ TRIE_TBL8_GRP_NUM_ENT, dp->nh_sz),
next_hop << 1, dp->nh_sz, *ip_part);
}
tbl8_recycle(dp, &val, tbl8_idx);