1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
3 * Copyright(c) 2019 Intel Corporation
12 #include <rte_debug.h>
13 #include <rte_malloc.h>
14 #include <rte_prefetch.h>
15 #include <rte_errno.h>
16 #include <rte_memory.h>
17 #include <rte_branch_prediction.h>
23 /* @internal Total number of tbl24 entries. */
24 #define TRIE_TBL24_NUM_ENT (1 << 24)
26 /* Maximum depth value possible for IPv6 LPM. */
27 #define TRIE_MAX_DEPTH 128
29 /* @internal Number of entries in a tbl8 group. */
30 #define TRIE_TBL8_GRP_NUM_ENT 256ULL
32 /* @internal Total number of tbl8 groups in the tbl8. */
33 #define TRIE_TBL8_NUM_GROUPS 65536
35 /* @internal bitmask with valid and valid_group fields set */
36 #define TRIE_EXT_ENT 1
38 #define TRIE_NAMESIZE 64
40 #define BITMAP_SLAB_BIT_SIZE_LOG2 6
41 #define BITMAP_SLAB_BIT_SIZE (1ULL << BITMAP_SLAB_BIT_SIZE_LOG2)
42 #define BITMAP_SLAB_BITMASK (BITMAP_SLAB_BIT_SIZE - 1)
45 uint32_t number_tbl8s; /**< Total number of tbl8s */
46 uint32_t rsvd_tbl8s; /**< Number of reserved tbl8s */
47 uint32_t cur_tbl8s; /**< Current cumber of tbl8s */
48 uint64_t def_nh; /**< Default next hop */
49 enum rte_fib_trie_nh_sz nh_sz; /**< Size of nexthop entry */
50 uint64_t *tbl8; /**< tbl8 table. */
51 uint32_t *tbl8_pool; /**< bitmap containing free tbl8 idxes*/
52 uint32_t tbl8_pool_pos;
54 __extension__ uint64_t tbl24[0] __rte_cache_aligned;
62 static inline uint32_t
63 get_tbl24_idx(const uint8_t *ip)
65 return ip[0] << 16|ip[1] << 8|ip[2];
69 get_tbl24_p(struct rte_trie_tbl *dp, const uint8_t *ip, uint8_t nh_sz)
73 tbl24_idx = get_tbl24_idx(ip);
74 return (void *)&((uint8_t *)dp->tbl24)[tbl24_idx << nh_sz];
78 bits_in_nh(uint8_t nh_sz)
80 return 8 * (1 << nh_sz);
83 static inline uint64_t
84 get_max_nh(uint8_t nh_sz)
86 return ((1ULL << (bits_in_nh(nh_sz) - 1)) - 1);
89 static inline uint64_t
90 lookup_msk(uint8_t nh_sz)
92 return ((1ULL << ((1 << (nh_sz + 3)) - 1)) << 1) - 1;
96 get_psd_idx(uint32_t val, uint8_t nh_sz)
98 return val & ((1 << (3 - nh_sz)) - 1);
101 static inline uint32_t
102 get_tbl_pos(uint32_t val, uint8_t nh_sz)
104 return val >> (3 - nh_sz);
107 static inline uint64_t
108 get_tbl_val_by_idx(uint64_t *tbl, uint32_t idx, uint8_t nh_sz)
110 return ((tbl[get_tbl_pos(idx, nh_sz)] >> (get_psd_idx(idx, nh_sz) *
111 bits_in_nh(nh_sz))) & lookup_msk(nh_sz));
115 get_tbl_p_by_idx(uint64_t *tbl, uint64_t idx, uint8_t nh_sz)
117 return (uint8_t *)tbl + (idx << nh_sz);
121 is_entry_extended(uint64_t ent)
123 return (ent & TRIE_EXT_ENT) == TRIE_EXT_ENT;
126 #define LOOKUP_FUNC(suffix, type, nh_sz) \
127 static void rte_trie_lookup_bulk_##suffix(void *p, \
128 uint8_t ips[][RTE_FIB6_IPV6_ADDR_SIZE], \
129 uint64_t *next_hops, const unsigned int n) \
131 struct rte_trie_tbl *dp = (struct rte_trie_tbl *)p; \
135 for (i = 0; i < n; i++) { \
136 tmp = ((type *)dp->tbl24)[get_tbl24_idx(&ips[i][0])]; \
138 while (is_entry_extended(tmp)) { \
139 tmp = ((type *)dp->tbl8)[ips[i][j++] + \
140 ((tmp >> 1) * TRIE_TBL8_GRP_NUM_ENT)]; \
142 next_hops[i] = tmp >> 1; \
145 LOOKUP_FUNC(2b, uint16_t, 1)
146 LOOKUP_FUNC(4b, uint32_t, 2)
147 LOOKUP_FUNC(8b, uint64_t, 3)
149 static inline rte_fib6_lookup_fn_t
150 get_scalar_fn(enum rte_fib_trie_nh_sz nh_sz)
153 case RTE_FIB6_TRIE_2B:
154 return rte_trie_lookup_bulk_2b;
155 case RTE_FIB6_TRIE_4B:
156 return rte_trie_lookup_bulk_4b;
157 case RTE_FIB6_TRIE_8B:
158 return rte_trie_lookup_bulk_8b;
165 trie_get_lookup_fn(void *p, enum rte_fib6_lookup_type type)
167 enum rte_fib_trie_nh_sz nh_sz;
168 struct rte_trie_tbl *dp = p;
176 case RTE_FIB6_LOOKUP_TRIE_SCALAR:
177 return get_scalar_fn(nh_sz);
185 write_to_dp(void *ptr, uint64_t val, enum rte_fib_trie_nh_sz size, int n)
188 uint16_t *ptr16 = (uint16_t *)ptr;
189 uint32_t *ptr32 = (uint32_t *)ptr;
190 uint64_t *ptr64 = (uint64_t *)ptr;
193 case RTE_FIB6_TRIE_2B:
194 for (i = 0; i < n; i++)
195 ptr16[i] = (uint16_t)val;
197 case RTE_FIB6_TRIE_4B:
198 for (i = 0; i < n; i++)
199 ptr32[i] = (uint32_t)val;
201 case RTE_FIB6_TRIE_8B:
202 for (i = 0; i < n; i++)
203 ptr64[i] = (uint64_t)val;
209 tbl8_pool_init(struct rte_trie_tbl *dp)
213 /* put entire range of indexes to the tbl8 pool */
214 for (i = 0; i < dp->number_tbl8s; i++)
215 dp->tbl8_pool[i] = i;
217 dp->tbl8_pool_pos = 0;
221 * Get an index of a free tbl8 from the pool
223 static inline int32_t
224 tbl8_get(struct rte_trie_tbl *dp)
226 if (dp->tbl8_pool_pos == dp->number_tbl8s)
227 /* no more free tbl8 */
231 return dp->tbl8_pool[dp->tbl8_pool_pos++];
235 * Put an index of a free tbl8 back to the pool
238 tbl8_put(struct rte_trie_tbl *dp, uint32_t tbl8_ind)
240 dp->tbl8_pool[--dp->tbl8_pool_pos] = tbl8_ind;
244 tbl8_alloc(struct rte_trie_tbl *dp, uint64_t nh)
249 tbl8_idx = tbl8_get(dp);
252 tbl8_ptr = get_tbl_p_by_idx(dp->tbl8,
253 tbl8_idx * TRIE_TBL8_GRP_NUM_ENT, dp->nh_sz);
254 /*Init tbl8 entries with nexthop from tbl24*/
255 write_to_dp((void *)tbl8_ptr, nh, dp->nh_sz,
256 TRIE_TBL8_GRP_NUM_ENT);
261 tbl8_recycle(struct rte_trie_tbl *dp, void *par, uint64_t tbl8_idx)
270 case RTE_FIB6_TRIE_2B:
271 ptr16 = &((uint16_t *)dp->tbl8)[tbl8_idx *
272 TRIE_TBL8_GRP_NUM_ENT];
274 if (nh & TRIE_EXT_ENT)
276 for (i = 1; i < TRIE_TBL8_GRP_NUM_ENT; i++) {
280 write_to_dp(par, nh, dp->nh_sz, 1);
281 for (i = 0; i < TRIE_TBL8_GRP_NUM_ENT; i++)
284 case RTE_FIB6_TRIE_4B:
285 ptr32 = &((uint32_t *)dp->tbl8)[tbl8_idx *
286 TRIE_TBL8_GRP_NUM_ENT];
288 if (nh & TRIE_EXT_ENT)
290 for (i = 1; i < TRIE_TBL8_GRP_NUM_ENT; i++) {
294 write_to_dp(par, nh, dp->nh_sz, 1);
295 for (i = 0; i < TRIE_TBL8_GRP_NUM_ENT; i++)
298 case RTE_FIB6_TRIE_8B:
299 ptr64 = &((uint64_t *)dp->tbl8)[tbl8_idx *
300 TRIE_TBL8_GRP_NUM_ENT];
302 if (nh & TRIE_EXT_ENT)
304 for (i = 1; i < TRIE_TBL8_GRP_NUM_ENT; i++) {
308 write_to_dp(par, nh, dp->nh_sz, 1);
309 for (i = 0; i < TRIE_TBL8_GRP_NUM_ENT; i++)
313 tbl8_put(dp, tbl8_idx);
317 static inline uint32_t
318 get_idx(const uint8_t *ip, uint32_t prev_idx, int bytes, int first_byte)
324 for (i = first_byte; i < (first_byte + bytes); i++) {
325 bitshift = (int8_t)(((first_byte + bytes - 1) - i)*BYTE_SIZE);
326 idx |= ip[i] << bitshift;
328 return (prev_idx * TRIE_TBL8_GRP_NUM_ENT) + idx;
331 static inline uint64_t
332 get_val_by_p(void *p, uint8_t nh_sz)
337 case RTE_FIB6_TRIE_2B:
338 val = *(uint16_t *)p;
340 case RTE_FIB6_TRIE_4B:
341 val = *(uint32_t *)p;
343 case RTE_FIB6_TRIE_8B:
344 val = *(uint64_t *)p;
351 * recursively recycle tbl8's
354 recycle_root_path(struct rte_trie_tbl *dp, const uint8_t *ip_part,
355 uint8_t common_tbl8, void *prev)
360 val = get_val_by_p(prev, dp->nh_sz);
361 if (unlikely((val & TRIE_EXT_ENT) != TRIE_EXT_ENT))
364 if (common_tbl8 != 0) {
365 p = get_tbl_p_by_idx(dp->tbl8, (val >> 1) *
366 TRIE_TBL8_GRP_NUM_ENT + *ip_part, dp->nh_sz);
367 recycle_root_path(dp, ip_part + 1, common_tbl8 - 1, p);
369 tbl8_recycle(dp, prev, val >> 1);
373 build_common_root(struct rte_trie_tbl *dp, const uint8_t *ip,
374 int common_bytes, void **tbl)
376 void *tbl_ptr = NULL;
379 int i, j, idx, prev_idx = 0;
382 for (i = 3, j = 0; i <= common_bytes; i++) {
383 idx = get_idx(ip, prev_idx, i - j, j);
384 val = get_tbl_val_by_idx(cur_tbl, idx, dp->nh_sz);
385 tbl_ptr = get_tbl_p_by_idx(cur_tbl, idx, dp->nh_sz);
386 if ((val & TRIE_EXT_ENT) != TRIE_EXT_ENT) {
387 idx = tbl8_alloc(dp, val);
388 if (unlikely(idx < 0))
390 write_to_dp(tbl_ptr, (idx << 1) |
391 TRIE_EXT_ENT, dp->nh_sz, 1);
399 *tbl = get_tbl_p_by_idx(cur_tbl, prev_idx * TRIE_TBL8_GRP_NUM_ENT,
405 write_edge(struct rte_trie_tbl *dp, const uint8_t *ip_part, uint64_t next_hop,
406 int len, enum edge edge, void *ent)
408 uint64_t val = next_hop << 1;
414 val = get_val_by_p(ent, dp->nh_sz);
415 if ((val & TRIE_EXT_ENT) == TRIE_EXT_ENT)
418 tbl8_idx = tbl8_alloc(dp, val);
421 val = (tbl8_idx << 1)|TRIE_EXT_ENT;
423 p = get_tbl_p_by_idx(dp->tbl8, (tbl8_idx *
424 TRIE_TBL8_GRP_NUM_ENT) + *ip_part, dp->nh_sz);
425 ret = write_edge(dp, ip_part + 1, next_hop, len - 1, edge, p);
429 write_to_dp((uint8_t *)p + (1 << dp->nh_sz),
430 next_hop << 1, dp->nh_sz, UINT8_MAX - *ip_part);
432 write_to_dp(get_tbl_p_by_idx(dp->tbl8, tbl8_idx *
433 TRIE_TBL8_GRP_NUM_ENT, dp->nh_sz),
434 next_hop << 1, dp->nh_sz, *ip_part);
436 tbl8_recycle(dp, &val, tbl8_idx);
439 write_to_dp(ent, val, dp->nh_sz, 1);
443 #define IPV6_MAX_IDX (RTE_FIB6_IPV6_ADDR_SIZE - 1)
444 #define TBL24_BYTES 3
445 #define TBL8_LEN (RTE_FIB6_IPV6_ADDR_SIZE - TBL24_BYTES)
448 install_to_dp(struct rte_trie_tbl *dp, const uint8_t *ledge, const uint8_t *r,
451 void *common_root_tbl;
459 /* decrement redge by 1*/
460 rte_rib6_copy_addr(redge, r);
461 for (i = 15; i >= 0; i--) {
463 if (redge[i] != 0xff)
467 for (common_bytes = 0; common_bytes < 15; common_bytes++) {
468 if (ledge[common_bytes] != redge[common_bytes])
472 ret = build_common_root(dp, ledge, common_bytes, &common_root_tbl);
473 if (unlikely(ret != 0))
475 /*first uncommon tbl8 byte idx*/
476 uint8_t first_tbl8_byte = RTE_MAX(common_bytes, TBL24_BYTES);
478 for (i = IPV6_MAX_IDX; i > first_tbl8_byte; i--) {
483 llen = i - first_tbl8_byte + (common_bytes < 3);
485 for (i = IPV6_MAX_IDX; i > first_tbl8_byte; i--) {
486 if (redge[i] != UINT8_MAX)
489 rlen = i - first_tbl8_byte + (common_bytes < 3);
491 /*first noncommon byte*/
492 uint8_t first_byte_idx = (common_bytes < 3) ? 0 : common_bytes;
493 uint8_t first_idx_len = (common_bytes < 3) ? 3 : 1;
495 uint32_t left_idx = get_idx(ledge, 0, first_idx_len, first_byte_idx);
496 uint32_t right_idx = get_idx(redge, 0, first_idx_len, first_byte_idx);
498 ent = get_tbl_p_by_idx(common_root_tbl, left_idx, dp->nh_sz);
499 ret = write_edge(dp, &ledge[first_tbl8_byte + !(common_bytes < 3)],
500 next_hop, llen, LEDGE, ent);
504 if (right_idx > left_idx + 1) {
505 ent = get_tbl_p_by_idx(common_root_tbl, left_idx + 1,
507 write_to_dp(ent, next_hop << 1, dp->nh_sz,
508 right_idx - (left_idx + 1));
510 ent = get_tbl_p_by_idx(common_root_tbl, right_idx, dp->nh_sz);
511 ret = write_edge(dp, &redge[first_tbl8_byte + !((common_bytes < 3))],
512 next_hop, rlen, REDGE, ent);
516 uint8_t common_tbl8 = (common_bytes < TBL24_BYTES) ?
517 0 : common_bytes - (TBL24_BYTES - 1);
518 ent = get_tbl24_p(dp, ledge, dp->nh_sz);
519 recycle_root_path(dp, ledge + TBL24_BYTES, common_tbl8, ent);
524 get_nxt_net(uint8_t *ip, uint8_t depth)
530 for (i = 0, part_depth = depth; part_depth > 8; part_depth -= 8, i++)
534 ip[i] += 1 << (8 - part_depth);
535 if (ip[i] < prev_byte) {
545 modify_dp(struct rte_trie_tbl *dp, struct rte_rib6 *rib,
546 const uint8_t ip[RTE_FIB6_IPV6_ADDR_SIZE],
547 uint8_t depth, uint64_t next_hop)
549 struct rte_rib6_node *tmp = NULL;
550 uint8_t ledge[RTE_FIB6_IPV6_ADDR_SIZE];
551 uint8_t redge[RTE_FIB6_IPV6_ADDR_SIZE];
555 if (next_hop > get_max_nh(dp->nh_sz))
558 rte_rib6_copy_addr(ledge, ip);
560 tmp = rte_rib6_get_nxt(rib, ip, depth, tmp,
561 RTE_RIB6_GET_NXT_COVER);
563 rte_rib6_get_depth(tmp, &tmp_depth);
564 if (tmp_depth == depth)
566 rte_rib6_get_ip(tmp, redge);
567 if (rte_rib6_is_equal(ledge, redge)) {
568 get_nxt_net(ledge, tmp_depth);
571 ret = install_to_dp(dp, ledge, redge,
575 get_nxt_net(redge, tmp_depth);
576 rte_rib6_copy_addr(ledge, redge);
578 rte_rib6_copy_addr(redge, ip);
579 get_nxt_net(redge, depth);
580 if (rte_rib6_is_equal(ledge, redge))
582 ret = install_to_dp(dp, ledge, redge,
593 trie_modify(struct rte_fib6 *fib, const uint8_t ip[RTE_FIB6_IPV6_ADDR_SIZE],
594 uint8_t depth, uint64_t next_hop, int op)
596 struct rte_trie_tbl *dp;
597 struct rte_rib6 *rib;
598 struct rte_rib6_node *tmp = NULL;
599 struct rte_rib6_node *node;
600 struct rte_rib6_node *parent;
601 uint8_t ip_masked[RTE_FIB6_IPV6_ADDR_SIZE];
603 uint64_t par_nh, node_nh;
604 uint8_t tmp_depth, depth_diff = 0, parent_depth = 24;
606 if ((fib == NULL) || (ip == NULL) || (depth > RTE_FIB6_MAXDEPTH))
609 dp = rte_fib6_get_dp(fib);
611 rib = rte_fib6_get_rib(fib);
614 for (i = 0; i < RTE_FIB6_IPV6_ADDR_SIZE; i++)
615 ip_masked[i] = ip[i] & get_msk_part(depth, i);
618 tmp = rte_rib6_get_nxt(rib, ip_masked,
619 RTE_ALIGN_FLOOR(depth, 8), NULL,
620 RTE_RIB6_GET_NXT_COVER);
622 tmp = rte_rib6_lookup(rib, ip);
624 rte_rib6_get_depth(tmp, &tmp_depth);
625 parent_depth = RTE_MAX(tmp_depth, 24);
627 depth_diff = RTE_ALIGN_CEIL(depth, 8) -
628 RTE_ALIGN_CEIL(parent_depth, 8);
629 depth_diff = depth_diff >> 3;
632 node = rte_rib6_lookup_exact(rib, ip_masked, depth);
636 rte_rib6_get_nh(node, &node_nh);
637 if (node_nh == next_hop)
639 ret = modify_dp(dp, rib, ip_masked, depth, next_hop);
641 rte_rib6_set_nh(node, next_hop);
645 if ((depth > 24) && (dp->rsvd_tbl8s >=
646 dp->number_tbl8s - depth_diff))
649 node = rte_rib6_insert(rib, ip_masked, depth);
652 rte_rib6_set_nh(node, next_hop);
653 parent = rte_rib6_lookup_parent(node);
654 if (parent != NULL) {
655 rte_rib6_get_nh(parent, &par_nh);
656 if (par_nh == next_hop)
659 ret = modify_dp(dp, rib, ip_masked, depth, next_hop);
661 rte_rib6_remove(rib, ip_masked, depth);
665 dp->rsvd_tbl8s += depth_diff;
671 parent = rte_rib6_lookup_parent(node);
672 if (parent != NULL) {
673 rte_rib6_get_nh(parent, &par_nh);
674 rte_rib6_get_nh(node, &node_nh);
675 if (par_nh != node_nh)
676 ret = modify_dp(dp, rib, ip_masked, depth,
679 ret = modify_dp(dp, rib, ip_masked, depth, dp->def_nh);
683 rte_rib6_remove(rib, ip, depth);
685 dp->rsvd_tbl8s -= depth_diff;
694 trie_create(const char *name, int socket_id,
695 struct rte_fib6_conf *conf)
697 char mem_name[TRIE_NAMESIZE];
698 struct rte_trie_tbl *dp = NULL;
701 enum rte_fib_trie_nh_sz nh_sz;
703 if ((name == NULL) || (conf == NULL) ||
704 (conf->trie.nh_sz < RTE_FIB6_TRIE_2B) ||
705 (conf->trie.nh_sz > RTE_FIB6_TRIE_8B) ||
706 (conf->trie.num_tbl8 >
707 get_max_nh(conf->trie.nh_sz)) ||
708 (conf->trie.num_tbl8 == 0) ||
710 get_max_nh(conf->trie.nh_sz))) {
716 def_nh = conf->default_nh;
717 nh_sz = conf->trie.nh_sz;
718 num_tbl8 = conf->trie.num_tbl8;
720 snprintf(mem_name, sizeof(mem_name), "DP_%s", name);
721 dp = rte_zmalloc_socket(name, sizeof(struct rte_trie_tbl) +
722 TRIE_TBL24_NUM_ENT * (1 << nh_sz), RTE_CACHE_LINE_SIZE,
729 write_to_dp(&dp->tbl24, (def_nh << 1), nh_sz, 1 << 24);
731 snprintf(mem_name, sizeof(mem_name), "TBL8_%p", dp);
732 dp->tbl8 = rte_zmalloc_socket(mem_name, TRIE_TBL8_GRP_NUM_ENT *
733 (1ll << nh_sz) * (num_tbl8 + 1),
734 RTE_CACHE_LINE_SIZE, socket_id);
735 if (dp->tbl8 == NULL) {
742 dp->number_tbl8s = num_tbl8;
744 snprintf(mem_name, sizeof(mem_name), "TBL8_idxes_%p", dp);
745 dp->tbl8_pool = rte_zmalloc_socket(mem_name,
746 sizeof(uint32_t) * dp->number_tbl8s,
747 RTE_CACHE_LINE_SIZE, socket_id);
748 if (dp->tbl8_pool == NULL) {
763 struct rte_trie_tbl *dp = (struct rte_trie_tbl *)p;
765 rte_free(dp->tbl8_pool);