1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
3 * Copyright(c) 2019 Intel Corporation
12 #include <rte_debug.h>
13 #include <rte_malloc.h>
14 #include <rte_errno.h>
15 #include <rte_memory.h>
22 #ifdef CC_TRIE_AVX512_SUPPORT
24 #include "trie_avx512.h"
26 #endif /* CC_TRIE_AVX512_SUPPORT */
28 #define TRIE_NAMESIZE 64
35 static inline rte_fib6_lookup_fn_t
36 get_scalar_fn(enum rte_fib_trie_nh_sz nh_sz)
39 case RTE_FIB6_TRIE_2B:
40 return rte_trie_lookup_bulk_2b;
41 case RTE_FIB6_TRIE_4B:
42 return rte_trie_lookup_bulk_4b;
43 case RTE_FIB6_TRIE_8B:
44 return rte_trie_lookup_bulk_8b;
50 static inline rte_fib6_lookup_fn_t
51 get_vector_fn(enum rte_fib_trie_nh_sz nh_sz)
53 #ifdef CC_TRIE_AVX512_SUPPORT
54 if ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) <= 0) ||
55 (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512))
58 case RTE_FIB6_TRIE_2B:
59 return rte_trie_vec_lookup_bulk_2b;
60 case RTE_FIB6_TRIE_4B:
61 return rte_trie_vec_lookup_bulk_4b;
62 case RTE_FIB6_TRIE_8B:
63 return rte_trie_vec_lookup_bulk_8b;
74 trie_get_lookup_fn(void *p, enum rte_fib6_lookup_type type)
76 enum rte_fib_trie_nh_sz nh_sz;
77 rte_fib6_lookup_fn_t ret_fn;
78 struct rte_trie_tbl *dp = p;
86 case RTE_FIB6_LOOKUP_TRIE_SCALAR:
87 return get_scalar_fn(nh_sz);
88 case RTE_FIB6_LOOKUP_TRIE_VECTOR_AVX512:
89 return get_vector_fn(nh_sz);
90 case RTE_FIB6_LOOKUP_DEFAULT:
91 ret_fn = get_vector_fn(nh_sz);
92 return (ret_fn != NULL) ? ret_fn : get_scalar_fn(nh_sz);
100 write_to_dp(void *ptr, uint64_t val, enum rte_fib_trie_nh_sz size, int n)
103 uint16_t *ptr16 = (uint16_t *)ptr;
104 uint32_t *ptr32 = (uint32_t *)ptr;
105 uint64_t *ptr64 = (uint64_t *)ptr;
108 case RTE_FIB6_TRIE_2B:
109 for (i = 0; i < n; i++)
110 ptr16[i] = (uint16_t)val;
112 case RTE_FIB6_TRIE_4B:
113 for (i = 0; i < n; i++)
114 ptr32[i] = (uint32_t)val;
116 case RTE_FIB6_TRIE_8B:
117 for (i = 0; i < n; i++)
118 ptr64[i] = (uint64_t)val;
124 tbl8_pool_init(struct rte_trie_tbl *dp)
128 /* put entire range of indexes to the tbl8 pool */
129 for (i = 0; i < dp->number_tbl8s; i++)
130 dp->tbl8_pool[i] = i;
132 dp->tbl8_pool_pos = 0;
136 * Get an index of a free tbl8 from the pool
138 static inline int32_t
139 tbl8_get(struct rte_trie_tbl *dp)
141 if (dp->tbl8_pool_pos == dp->number_tbl8s)
142 /* no more free tbl8 */
146 return dp->tbl8_pool[dp->tbl8_pool_pos++];
150 * Put an index of a free tbl8 back to the pool
153 tbl8_put(struct rte_trie_tbl *dp, uint32_t tbl8_ind)
155 dp->tbl8_pool[--dp->tbl8_pool_pos] = tbl8_ind;
159 tbl8_alloc(struct rte_trie_tbl *dp, uint64_t nh)
164 tbl8_idx = tbl8_get(dp);
167 tbl8_ptr = get_tbl_p_by_idx(dp->tbl8,
168 tbl8_idx * TRIE_TBL8_GRP_NUM_ENT, dp->nh_sz);
169 /*Init tbl8 entries with nexthop from tbl24*/
170 write_to_dp((void *)tbl8_ptr, nh, dp->nh_sz,
171 TRIE_TBL8_GRP_NUM_ENT);
176 tbl8_recycle(struct rte_trie_tbl *dp, void *par, uint64_t tbl8_idx)
185 case RTE_FIB6_TRIE_2B:
186 ptr16 = &((uint16_t *)dp->tbl8)[tbl8_idx *
187 TRIE_TBL8_GRP_NUM_ENT];
189 if (nh & TRIE_EXT_ENT)
191 for (i = 1; i < TRIE_TBL8_GRP_NUM_ENT; i++) {
195 write_to_dp(par, nh, dp->nh_sz, 1);
196 for (i = 0; i < TRIE_TBL8_GRP_NUM_ENT; i++)
199 case RTE_FIB6_TRIE_4B:
200 ptr32 = &((uint32_t *)dp->tbl8)[tbl8_idx *
201 TRIE_TBL8_GRP_NUM_ENT];
203 if (nh & TRIE_EXT_ENT)
205 for (i = 1; i < TRIE_TBL8_GRP_NUM_ENT; i++) {
209 write_to_dp(par, nh, dp->nh_sz, 1);
210 for (i = 0; i < TRIE_TBL8_GRP_NUM_ENT; i++)
213 case RTE_FIB6_TRIE_8B:
214 ptr64 = &((uint64_t *)dp->tbl8)[tbl8_idx *
215 TRIE_TBL8_GRP_NUM_ENT];
217 if (nh & TRIE_EXT_ENT)
219 for (i = 1; i < TRIE_TBL8_GRP_NUM_ENT; i++) {
223 write_to_dp(par, nh, dp->nh_sz, 1);
224 for (i = 0; i < TRIE_TBL8_GRP_NUM_ENT; i++)
228 tbl8_put(dp, tbl8_idx);
232 static inline uint32_t
233 get_idx(const uint8_t *ip, uint32_t prev_idx, int bytes, int first_byte)
239 for (i = first_byte; i < (first_byte + bytes); i++) {
240 bitshift = (int8_t)(((first_byte + bytes - 1) - i)*BYTE_SIZE);
241 idx |= ip[i] << bitshift;
243 return (prev_idx * TRIE_TBL8_GRP_NUM_ENT) + idx;
246 static inline uint64_t
247 get_val_by_p(void *p, uint8_t nh_sz)
252 case RTE_FIB6_TRIE_2B:
253 val = *(uint16_t *)p;
255 case RTE_FIB6_TRIE_4B:
256 val = *(uint32_t *)p;
258 case RTE_FIB6_TRIE_8B:
259 val = *(uint64_t *)p;
266 * recursively recycle tbl8's
269 recycle_root_path(struct rte_trie_tbl *dp, const uint8_t *ip_part,
270 uint8_t common_tbl8, void *prev)
275 val = get_val_by_p(prev, dp->nh_sz);
276 if (unlikely((val & TRIE_EXT_ENT) != TRIE_EXT_ENT))
279 if (common_tbl8 != 0) {
280 p = get_tbl_p_by_idx(dp->tbl8, (val >> 1) *
281 TRIE_TBL8_GRP_NUM_ENT + *ip_part, dp->nh_sz);
282 recycle_root_path(dp, ip_part + 1, common_tbl8 - 1, p);
284 tbl8_recycle(dp, prev, val >> 1);
288 build_common_root(struct rte_trie_tbl *dp, const uint8_t *ip,
289 int common_bytes, void **tbl)
291 void *tbl_ptr = NULL;
294 int i, j, idx, prev_idx = 0;
297 for (i = 3, j = 0; i <= common_bytes; i++) {
298 idx = get_idx(ip, prev_idx, i - j, j);
299 val = get_tbl_val_by_idx(cur_tbl, idx, dp->nh_sz);
300 tbl_ptr = get_tbl_p_by_idx(cur_tbl, idx, dp->nh_sz);
301 if ((val & TRIE_EXT_ENT) != TRIE_EXT_ENT) {
302 idx = tbl8_alloc(dp, val);
303 if (unlikely(idx < 0))
305 write_to_dp(tbl_ptr, (idx << 1) |
306 TRIE_EXT_ENT, dp->nh_sz, 1);
314 *tbl = get_tbl_p_by_idx(cur_tbl, prev_idx * TRIE_TBL8_GRP_NUM_ENT,
320 write_edge(struct rte_trie_tbl *dp, const uint8_t *ip_part, uint64_t next_hop,
321 int len, enum edge edge, void *ent)
323 uint64_t val = next_hop << 1;
329 val = get_val_by_p(ent, dp->nh_sz);
330 if ((val & TRIE_EXT_ENT) == TRIE_EXT_ENT)
333 tbl8_idx = tbl8_alloc(dp, val);
336 val = (tbl8_idx << 1)|TRIE_EXT_ENT;
338 p = get_tbl_p_by_idx(dp->tbl8, (tbl8_idx *
339 TRIE_TBL8_GRP_NUM_ENT) + *ip_part, dp->nh_sz);
340 ret = write_edge(dp, ip_part + 1, next_hop, len - 1, edge, p);
344 write_to_dp((uint8_t *)p + (1 << dp->nh_sz),
345 next_hop << 1, dp->nh_sz, UINT8_MAX - *ip_part);
347 write_to_dp(get_tbl_p_by_idx(dp->tbl8, tbl8_idx *
348 TRIE_TBL8_GRP_NUM_ENT, dp->nh_sz),
349 next_hop << 1, dp->nh_sz, *ip_part);
351 tbl8_recycle(dp, &val, tbl8_idx);
354 write_to_dp(ent, val, dp->nh_sz, 1);
358 #define IPV6_MAX_IDX (RTE_FIB6_IPV6_ADDR_SIZE - 1)
359 #define TBL24_BYTES 3
360 #define TBL8_LEN (RTE_FIB6_IPV6_ADDR_SIZE - TBL24_BYTES)
363 install_to_dp(struct rte_trie_tbl *dp, const uint8_t *ledge, const uint8_t *r,
366 void *common_root_tbl;
374 /* decrement redge by 1*/
375 rte_rib6_copy_addr(redge, r);
376 for (i = 15; i >= 0; i--) {
378 if (redge[i] != 0xff)
382 for (common_bytes = 0; common_bytes < 15; common_bytes++) {
383 if (ledge[common_bytes] != redge[common_bytes])
387 ret = build_common_root(dp, ledge, common_bytes, &common_root_tbl);
388 if (unlikely(ret != 0))
390 /*first uncommon tbl8 byte idx*/
391 uint8_t first_tbl8_byte = RTE_MAX(common_bytes, TBL24_BYTES);
393 for (i = IPV6_MAX_IDX; i > first_tbl8_byte; i--) {
398 llen = i - first_tbl8_byte + (common_bytes < 3);
400 for (i = IPV6_MAX_IDX; i > first_tbl8_byte; i--) {
401 if (redge[i] != UINT8_MAX)
404 rlen = i - first_tbl8_byte + (common_bytes < 3);
406 /*first noncommon byte*/
407 uint8_t first_byte_idx = (common_bytes < 3) ? 0 : common_bytes;
408 uint8_t first_idx_len = (common_bytes < 3) ? 3 : 1;
410 uint32_t left_idx = get_idx(ledge, 0, first_idx_len, first_byte_idx);
411 uint32_t right_idx = get_idx(redge, 0, first_idx_len, first_byte_idx);
413 ent = get_tbl_p_by_idx(common_root_tbl, left_idx, dp->nh_sz);
414 ret = write_edge(dp, &ledge[first_tbl8_byte + !(common_bytes < 3)],
415 next_hop, llen, LEDGE, ent);
419 if (right_idx > left_idx + 1) {
420 ent = get_tbl_p_by_idx(common_root_tbl, left_idx + 1,
422 write_to_dp(ent, next_hop << 1, dp->nh_sz,
423 right_idx - (left_idx + 1));
425 ent = get_tbl_p_by_idx(common_root_tbl, right_idx, dp->nh_sz);
426 ret = write_edge(dp, &redge[first_tbl8_byte + !((common_bytes < 3))],
427 next_hop, rlen, REDGE, ent);
431 uint8_t common_tbl8 = (common_bytes < TBL24_BYTES) ?
432 0 : common_bytes - (TBL24_BYTES - 1);
433 ent = get_tbl24_p(dp, ledge, dp->nh_sz);
434 recycle_root_path(dp, ledge + TBL24_BYTES, common_tbl8, ent);
439 get_nxt_net(uint8_t *ip, uint8_t depth)
445 for (i = 0, part_depth = depth; part_depth > 8; part_depth -= 8, i++)
449 ip[i] += 1 << (8 - part_depth);
450 if (ip[i] < prev_byte) {
460 modify_dp(struct rte_trie_tbl *dp, struct rte_rib6 *rib,
461 const uint8_t ip[RTE_FIB6_IPV6_ADDR_SIZE],
462 uint8_t depth, uint64_t next_hop)
464 struct rte_rib6_node *tmp = NULL;
465 uint8_t ledge[RTE_FIB6_IPV6_ADDR_SIZE];
466 uint8_t redge[RTE_FIB6_IPV6_ADDR_SIZE];
470 if (next_hop > get_max_nh(dp->nh_sz))
473 rte_rib6_copy_addr(ledge, ip);
475 tmp = rte_rib6_get_nxt(rib, ip, depth, tmp,
476 RTE_RIB6_GET_NXT_COVER);
478 rte_rib6_get_depth(tmp, &tmp_depth);
479 if (tmp_depth == depth)
481 rte_rib6_get_ip(tmp, redge);
482 if (rte_rib6_is_equal(ledge, redge)) {
483 get_nxt_net(ledge, tmp_depth);
486 ret = install_to_dp(dp, ledge, redge,
490 get_nxt_net(redge, tmp_depth);
491 rte_rib6_copy_addr(ledge, redge);
493 rte_rib6_copy_addr(redge, ip);
494 get_nxt_net(redge, depth);
495 if (rte_rib6_is_equal(ledge, redge))
497 ret = install_to_dp(dp, ledge, redge,
508 trie_modify(struct rte_fib6 *fib, const uint8_t ip[RTE_FIB6_IPV6_ADDR_SIZE],
509 uint8_t depth, uint64_t next_hop, int op)
511 struct rte_trie_tbl *dp;
512 struct rte_rib6 *rib;
513 struct rte_rib6_node *tmp = NULL;
514 struct rte_rib6_node *node;
515 struct rte_rib6_node *parent;
516 uint8_t ip_masked[RTE_FIB6_IPV6_ADDR_SIZE];
518 uint64_t par_nh, node_nh;
519 uint8_t tmp_depth, depth_diff = 0, parent_depth = 24;
521 if ((fib == NULL) || (ip == NULL) || (depth > RTE_FIB6_MAXDEPTH))
524 dp = rte_fib6_get_dp(fib);
526 rib = rte_fib6_get_rib(fib);
529 for (i = 0; i < RTE_FIB6_IPV6_ADDR_SIZE; i++)
530 ip_masked[i] = ip[i] & get_msk_part(depth, i);
533 tmp = rte_rib6_get_nxt(rib, ip_masked,
534 RTE_ALIGN_FLOOR(depth, 8), NULL,
535 RTE_RIB6_GET_NXT_COVER);
537 tmp = rte_rib6_lookup(rib, ip);
539 rte_rib6_get_depth(tmp, &tmp_depth);
540 parent_depth = RTE_MAX(tmp_depth, 24);
542 depth_diff = RTE_ALIGN_CEIL(depth, 8) -
543 RTE_ALIGN_CEIL(parent_depth, 8);
544 depth_diff = depth_diff >> 3;
547 node = rte_rib6_lookup_exact(rib, ip_masked, depth);
551 rte_rib6_get_nh(node, &node_nh);
552 if (node_nh == next_hop)
554 ret = modify_dp(dp, rib, ip_masked, depth, next_hop);
556 rte_rib6_set_nh(node, next_hop);
560 if ((depth > 24) && (dp->rsvd_tbl8s >=
561 dp->number_tbl8s - depth_diff))
564 node = rte_rib6_insert(rib, ip_masked, depth);
567 rte_rib6_set_nh(node, next_hop);
568 parent = rte_rib6_lookup_parent(node);
569 if (parent != NULL) {
570 rte_rib6_get_nh(parent, &par_nh);
571 if (par_nh == next_hop)
574 ret = modify_dp(dp, rib, ip_masked, depth, next_hop);
576 rte_rib6_remove(rib, ip_masked, depth);
580 dp->rsvd_tbl8s += depth_diff;
586 parent = rte_rib6_lookup_parent(node);
587 if (parent != NULL) {
588 rte_rib6_get_nh(parent, &par_nh);
589 rte_rib6_get_nh(node, &node_nh);
590 if (par_nh != node_nh)
591 ret = modify_dp(dp, rib, ip_masked, depth,
594 ret = modify_dp(dp, rib, ip_masked, depth, dp->def_nh);
598 rte_rib6_remove(rib, ip, depth);
600 dp->rsvd_tbl8s -= depth_diff;
609 trie_create(const char *name, int socket_id,
610 struct rte_fib6_conf *conf)
612 char mem_name[TRIE_NAMESIZE];
613 struct rte_trie_tbl *dp = NULL;
616 enum rte_fib_trie_nh_sz nh_sz;
618 if ((name == NULL) || (conf == NULL) ||
619 (conf->trie.nh_sz < RTE_FIB6_TRIE_2B) ||
620 (conf->trie.nh_sz > RTE_FIB6_TRIE_8B) ||
621 (conf->trie.num_tbl8 >
622 get_max_nh(conf->trie.nh_sz)) ||
623 (conf->trie.num_tbl8 == 0) ||
625 get_max_nh(conf->trie.nh_sz))) {
631 def_nh = conf->default_nh;
632 nh_sz = conf->trie.nh_sz;
633 num_tbl8 = conf->trie.num_tbl8;
635 snprintf(mem_name, sizeof(mem_name), "DP_%s", name);
636 dp = rte_zmalloc_socket(name, sizeof(struct rte_trie_tbl) +
637 TRIE_TBL24_NUM_ENT * (1 << nh_sz), RTE_CACHE_LINE_SIZE,
644 write_to_dp(&dp->tbl24, (def_nh << 1), nh_sz, 1 << 24);
646 snprintf(mem_name, sizeof(mem_name), "TBL8_%p", dp);
647 dp->tbl8 = rte_zmalloc_socket(mem_name, TRIE_TBL8_GRP_NUM_ENT *
648 (1ll << nh_sz) * (num_tbl8 + 1),
649 RTE_CACHE_LINE_SIZE, socket_id);
650 if (dp->tbl8 == NULL) {
657 dp->number_tbl8s = num_tbl8;
659 snprintf(mem_name, sizeof(mem_name), "TBL8_idxes_%p", dp);
660 dp->tbl8_pool = rte_zmalloc_socket(mem_name,
661 sizeof(uint32_t) * dp->number_tbl8s,
662 RTE_CACHE_LINE_SIZE, socket_id);
663 if (dp->tbl8_pool == NULL) {
678 struct rte_trie_tbl *dp = (struct rte_trie_tbl *)p;
680 rte_free(dp->tbl8_pool);