1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
3 * Copyright(c) 2019 Intel Corporation
9 #include <rte_eal_memconfig.h>
10 #include <rte_errno.h>
11 #include <rte_malloc.h>
12 #include <rte_mempool.h>
13 #include <rte_string_fns.h>
14 #include <rte_tailq.h>
18 #define RTE_RIB_VALID_NODE 1
19 #define RIB6_MAXDEPTH 128
20 /* Maximum length of a RIB6 name. */
21 #define RTE_RIB6_NAMESIZE 64
23 TAILQ_HEAD(rte_rib6_list, rte_tailq_entry);
24 static struct rte_tailq_elem rte_rib6_tailq = {
27 EAL_REGISTER_TAILQ(rte_rib6_tailq)
29 struct rte_rib6_node {
30 struct rte_rib6_node *left;
31 struct rte_rib6_node *right;
32 struct rte_rib6_node *parent;
34 uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE];
37 __extension__ uint64_t ext[];
41 char name[RTE_RIB6_NAMESIZE];
42 struct rte_rib6_node *tree;
43 struct rte_mempool *node_pool;
50 is_valid_node(const struct rte_rib6_node *node)
52 return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE;
56 is_right_node(const struct rte_rib6_node *node)
58 return node->parent->right == node;
62 * Check if ip1 is covered by ip2/depth prefix
65 is_covered(const uint8_t ip1[RTE_RIB6_IPV6_ADDR_SIZE],
66 const uint8_t ip2[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
70 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
71 if ((ip1[i] ^ ip2[i]) & get_msk_part(depth, i))
78 get_dir(const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
83 * depth & 127 clamps depth to values that will not
84 * read off the end of ip.
85 * depth is the number of bits deep into ip to traverse, and
86 * is incremented in blocks of 8 (1 byte). This means the last
87 * 3 bits are irrelevant to what the index of ip should be.
89 index = (depth & INT8_MAX) / CHAR_BIT;
92 * msk is the bitmask used to extract the bit used to decide the
93 * direction of the next step of the binary search.
95 msk = 1 << (7 - (depth & 7));
97 return (ip[index] & msk) != 0;
100 static inline struct rte_rib6_node *
101 get_nxt_node(struct rte_rib6_node *node,
102 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
104 if (node->depth == RIB6_MAXDEPTH)
107 return (get_dir(ip, node->depth)) ? node->right : node->left;
110 static struct rte_rib6_node *
111 node_alloc(struct rte_rib6 *rib)
113 struct rte_rib6_node *ent;
116 ret = rte_mempool_get(rib->node_pool, (void *)&ent);
117 if (unlikely(ret != 0))
124 node_free(struct rte_rib6 *rib, struct rte_rib6_node *ent)
127 rte_mempool_put(rib->node_pool, ent);
130 struct rte_rib6_node *
131 rte_rib6_lookup(struct rte_rib6 *rib,
132 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
134 struct rte_rib6_node *cur;
135 struct rte_rib6_node *prev = NULL;
137 if (unlikely(rib == NULL)) {
143 while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) {
144 if (is_valid_node(cur))
146 cur = get_nxt_node(cur, ip);
151 struct rte_rib6_node *
152 rte_rib6_lookup_parent(struct rte_rib6_node *ent)
154 struct rte_rib6_node *tmp;
160 while ((tmp != NULL) && (!is_valid_node(tmp)))
166 struct rte_rib6_node *
167 rte_rib6_lookup_exact(struct rte_rib6 *rib,
168 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
170 struct rte_rib6_node *cur;
171 uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
174 if (unlikely(rib == NULL || ip == NULL || depth > RIB6_MAXDEPTH)) {
180 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
181 tmp_ip[i] = ip[i] & get_msk_part(depth, i);
183 while (cur != NULL) {
184 if (rte_rib6_is_equal(cur->ip, tmp_ip) &&
185 (cur->depth == depth) &&
189 if (!(is_covered(tmp_ip, cur->ip, cur->depth)) ||
190 (cur->depth >= depth))
193 cur = get_nxt_node(cur, tmp_ip);
200 * Traverses on subtree and retrieves more specific routes
201 * for a given in args ip/depth prefix
202 * last = NULL means the first invocation
204 struct rte_rib6_node *
205 rte_rib6_get_nxt(struct rte_rib6 *rib,
206 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],
207 uint8_t depth, struct rte_rib6_node *last, int flag)
209 struct rte_rib6_node *tmp, *prev = NULL;
210 uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
213 if (unlikely(rib == NULL || ip == NULL || depth > RIB6_MAXDEPTH)) {
218 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
219 tmp_ip[i] = ip[i] & get_msk_part(depth, i);
223 while ((tmp) && (tmp->depth < depth))
224 tmp = get_nxt_node(tmp, tmp_ip);
227 while ((tmp->parent != NULL) && (is_right_node(tmp) ||
228 (tmp->parent->right == NULL))) {
230 if (is_valid_node(tmp) &&
231 (is_covered(tmp->ip, tmp_ip, depth) &&
232 (tmp->depth > depth)))
235 tmp = (tmp->parent != NULL) ? tmp->parent->right : NULL;
238 if (is_valid_node(tmp) &&
239 (is_covered(tmp->ip, tmp_ip, depth) &&
240 (tmp->depth > depth))) {
242 if (flag == RTE_RIB6_GET_NXT_COVER)
245 tmp = (tmp->left != NULL) ? tmp->left : tmp->right;
251 rte_rib6_remove(struct rte_rib6 *rib,
252 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
254 struct rte_rib6_node *cur, *prev, *child;
256 cur = rte_rib6_lookup_exact(rib, ip, depth);
261 cur->flag &= ~RTE_RIB_VALID_NODE;
262 while (!is_valid_node(cur)) {
263 if ((cur->left != NULL) && (cur->right != NULL))
265 child = (cur->left == NULL) ? cur->right : cur->left;
267 child->parent = cur->parent;
268 if (cur->parent == NULL) {
273 if (cur->parent->left == cur)
274 cur->parent->left = child;
276 cur->parent->right = child;
279 node_free(rib, prev);
283 struct rte_rib6_node *
284 rte_rib6_insert(struct rte_rib6 *rib,
285 const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
287 struct rte_rib6_node **tmp;
288 struct rte_rib6_node *prev = NULL;
289 struct rte_rib6_node *new_node = NULL;
290 struct rte_rib6_node *common_node = NULL;
291 uint8_t common_prefix[RTE_RIB6_IPV6_ADDR_SIZE];
292 uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
294 uint8_t common_depth, ip_xor;
296 if (unlikely((rib == NULL || ip == NULL || depth > RIB6_MAXDEPTH))) {
303 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
304 tmp_ip[i] = ip[i] & get_msk_part(depth, i);
306 new_node = rte_rib6_lookup_exact(rib, tmp_ip, depth);
307 if (new_node != NULL) {
312 new_node = node_alloc(rib);
313 if (new_node == NULL) {
317 new_node->left = NULL;
318 new_node->right = NULL;
319 new_node->parent = NULL;
320 rte_rib6_copy_addr(new_node->ip, tmp_ip);
321 new_node->depth = depth;
322 new_node->flag = RTE_RIB_VALID_NODE;
324 /* traverse down the tree to find matching node or closest matching */
326 /* insert as the last node in the branch */
329 new_node->parent = prev;
334 * Intermediate node found.
335 * Previous rte_rib6_lookup_exact() returned NULL
336 * but node with proper search criteria is found.
337 * Validate intermediate node and return.
339 if (rte_rib6_is_equal(tmp_ip, (*tmp)->ip) &&
340 (depth == (*tmp)->depth)) {
341 node_free(rib, new_node);
342 (*tmp)->flag |= RTE_RIB_VALID_NODE;
347 if (!is_covered(tmp_ip, (*tmp)->ip, (*tmp)->depth) ||
348 ((*tmp)->depth >= depth)) {
353 tmp = (get_dir(tmp_ip, (*tmp)->depth)) ? &(*tmp)->right :
357 /* closest node found, new_node should be inserted in the middle */
358 common_depth = RTE_MIN(depth, (*tmp)->depth);
359 for (i = 0, d = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) {
360 ip_xor = tmp_ip[i] ^ (*tmp)->ip[i];
364 d += __builtin_clz(ip_xor << 24);
369 common_depth = RTE_MIN(d, common_depth);
371 for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
372 common_prefix[i] = tmp_ip[i] & get_msk_part(common_depth, i);
374 if (rte_rib6_is_equal(common_prefix, tmp_ip) &&
375 (common_depth == depth)) {
376 /* insert as a parent */
377 if (get_dir((*tmp)->ip, depth))
378 new_node->right = *tmp;
380 new_node->left = *tmp;
381 new_node->parent = (*tmp)->parent;
382 (*tmp)->parent = new_node;
385 /* create intermediate node */
386 common_node = node_alloc(rib);
387 if (common_node == NULL) {
388 node_free(rib, new_node);
392 rte_rib6_copy_addr(common_node->ip, common_prefix);
393 common_node->depth = common_depth;
394 common_node->flag = 0;
395 common_node->parent = (*tmp)->parent;
396 new_node->parent = common_node;
397 (*tmp)->parent = common_node;
398 if (get_dir((*tmp)->ip, common_depth) == 1) {
399 common_node->left = new_node;
400 common_node->right = *tmp;
402 common_node->left = *tmp;
403 common_node->right = new_node;
412 rte_rib6_get_ip(const struct rte_rib6_node *node,
413 uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
415 if (unlikely(node == NULL || ip == NULL)) {
419 rte_rib6_copy_addr(ip, node->ip);
424 rte_rib6_get_depth(const struct rte_rib6_node *node, uint8_t *depth)
426 if (unlikely(node == NULL || depth == NULL)) {
430 *depth = node->depth;
435 rte_rib6_get_ext(struct rte_rib6_node *node)
437 return (node == NULL) ? NULL : &node->ext[0];
441 rte_rib6_get_nh(const struct rte_rib6_node *node, uint64_t *nh)
443 if (unlikely(node == NULL || nh == NULL)) {
452 rte_rib6_set_nh(struct rte_rib6_node *node, uint64_t nh)
454 if (unlikely(node == NULL)) {
463 rte_rib6_create(const char *name, int socket_id,
464 const struct rte_rib6_conf *conf)
466 char mem_name[RTE_RIB6_NAMESIZE];
467 struct rte_rib6 *rib = NULL;
468 struct rte_tailq_entry *te;
469 struct rte_rib6_list *rib6_list;
470 struct rte_mempool *node_pool;
472 /* Check user arguments. */
473 if (unlikely(name == NULL || conf == NULL || conf->max_nodes <= 0)) {
478 snprintf(mem_name, sizeof(mem_name), "MP_%s", name);
479 node_pool = rte_mempool_create(mem_name, conf->max_nodes,
480 sizeof(struct rte_rib6_node) + conf->ext_sz, 0, 0,
481 NULL, NULL, NULL, NULL, socket_id, 0);
483 if (node_pool == NULL) {
485 "Can not allocate mempool for RIB6 %s\n", name);
489 snprintf(mem_name, sizeof(mem_name), "RIB6_%s", name);
490 rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
492 rte_mcfg_tailq_write_lock();
494 /* guarantee there's no existing */
495 TAILQ_FOREACH(te, rib6_list, next) {
496 rib = (struct rte_rib6 *)te->data;
497 if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0)
506 /* allocate tailq entry */
507 te = rte_zmalloc("RIB6_TAILQ_ENTRY", sizeof(*te), 0);
508 if (unlikely(te == NULL)) {
510 "Can not allocate tailq entry for RIB6 %s\n", name);
515 /* Allocate memory to store the RIB6 data structures. */
516 rib = rte_zmalloc_socket(mem_name,
517 sizeof(struct rte_rib6), RTE_CACHE_LINE_SIZE, socket_id);
518 if (unlikely(rib == NULL)) {
519 RTE_LOG(ERR, LPM, "RIB6 %s memory allocation failed\n", name);
524 rte_strlcpy(rib->name, name, sizeof(rib->name));
526 rib->max_nodes = conf->max_nodes;
527 rib->node_pool = node_pool;
529 te->data = (void *)rib;
530 TAILQ_INSERT_TAIL(rib6_list, te, next);
532 rte_mcfg_tailq_write_unlock();
539 rte_mcfg_tailq_write_unlock();
540 rte_mempool_free(node_pool);
546 rte_rib6_find_existing(const char *name)
548 struct rte_rib6 *rib = NULL;
549 struct rte_tailq_entry *te;
550 struct rte_rib6_list *rib6_list;
552 if (unlikely(name == NULL)) {
557 rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
559 rte_mcfg_tailq_read_lock();
560 TAILQ_FOREACH(te, rib6_list, next) {
561 rib = (struct rte_rib6 *) te->data;
562 if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0)
565 rte_mcfg_tailq_read_unlock();
576 rte_rib6_free(struct rte_rib6 *rib)
578 struct rte_tailq_entry *te;
579 struct rte_rib6_list *rib6_list;
580 struct rte_rib6_node *tmp = NULL;
582 if (unlikely(rib == NULL)) {
587 rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
589 rte_mcfg_tailq_write_lock();
591 /* find our tailq entry */
592 TAILQ_FOREACH(te, rib6_list, next) {
593 if (te->data == (void *)rib)
597 TAILQ_REMOVE(rib6_list, te, next);
599 rte_mcfg_tailq_write_unlock();
601 while ((tmp = rte_rib6_get_nxt(rib, 0, 0, tmp,
602 RTE_RIB6_GET_NXT_ALL)) != NULL)
603 rte_rib6_remove(rib, tmp->ip, tmp->depth);
605 rte_mempool_free(rib->node_pool);