--- /dev/null
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_mempool.h>
+#include <rte_rwlock.h>
+#include <rte_string_fns.h>
+#include <rte_tailq.h>
+
+#include <rte_rib6.h>
+
+#define RTE_RIB_VALID_NODE 1
+#define RIB6_MAXDEPTH 128
+/* Maximum length of a RIB6 name. */
+#define RTE_RIB6_NAMESIZE 64
+
+TAILQ_HEAD(rte_rib6_list, rte_tailq_entry);
+static struct rte_tailq_elem rte_rib6_tailq = {
+ .name = "RTE_RIB6",
+};
+EAL_REGISTER_TAILQ(rte_rib6_tailq)
+
+struct rte_rib6_node {
+ struct rte_rib6_node *left;
+ struct rte_rib6_node *right;
+ struct rte_rib6_node *parent;
+ uint64_t nh;
+ uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE];
+ uint8_t depth;
+ uint8_t flag;
+ __extension__ uint64_t ext[0];
+};
+
+struct rte_rib6 {
+ char name[RTE_RIB6_NAMESIZE];
+ struct rte_rib6_node *tree;
+ struct rte_mempool *node_pool;
+ uint32_t cur_nodes;
+ uint32_t cur_routes;
+ int max_nodes;
+};
+
+static inline bool
+is_valid_node(struct rte_rib6_node *node)
+{
+ return (node->flag & RTE_RIB_VALID_NODE) == RTE_RIB_VALID_NODE;
+}
+
+static inline bool
+is_right_node(struct rte_rib6_node *node)
+{
+ return node->parent->right == node;
+}
+
+/*
+ * Check if ip1 is covered by ip2/depth prefix
+ */
+static inline bool
+is_covered(const uint8_t ip1[RTE_RIB6_IPV6_ADDR_SIZE],
+ const uint8_t ip2[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
+{
+ int i;
+
+ for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
+ if ((ip1[i] ^ ip2[i]) & get_msk_part(depth, i))
+ return false;
+
+ return true;
+}
+
+static inline int
+get_dir(const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
+{
+ int i = 0;
+ uint8_t p_depth, msk;
+
+ for (p_depth = depth; p_depth >= 8; p_depth -= 8)
+ i++;
+
+ msk = 1 << (7 - p_depth);
+ return (ip[i] & msk) != 0;
+}
+
+static inline struct rte_rib6_node *
+get_nxt_node(struct rte_rib6_node *node,
+ const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
+{
+ return (get_dir(ip, node->depth)) ? node->right : node->left;
+}
+
+static struct rte_rib6_node *
+node_alloc(struct rte_rib6 *rib)
+{
+ struct rte_rib6_node *ent;
+ int ret;
+
+ ret = rte_mempool_get(rib->node_pool, (void *)&ent);
+ if (unlikely(ret != 0))
+ return NULL;
+ ++rib->cur_nodes;
+ return ent;
+}
+
+static void
+node_free(struct rte_rib6 *rib, struct rte_rib6_node *ent)
+{
+ --rib->cur_nodes;
+ rte_mempool_put(rib->node_pool, ent);
+}
+
+struct rte_rib6_node *
+rte_rib6_lookup(struct rte_rib6 *rib,
+ const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
+{
+ struct rte_rib6_node *cur;
+ struct rte_rib6_node *prev = NULL;
+
+ if (unlikely(rib == NULL)) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+ cur = rib->tree;
+
+ while ((cur != NULL) && is_covered(ip, cur->ip, cur->depth)) {
+ if (is_valid_node(cur))
+ prev = cur;
+ cur = get_nxt_node(cur, ip);
+ }
+ return prev;
+}
+
+struct rte_rib6_node *
+rte_rib6_lookup_parent(struct rte_rib6_node *ent)
+{
+ struct rte_rib6_node *tmp;
+
+ if (ent == NULL)
+ return NULL;
+
+ tmp = ent->parent;
+ while ((tmp != NULL) && (!is_valid_node(tmp)))
+ tmp = tmp->parent;
+
+ return tmp;
+}
+
+struct rte_rib6_node *
+rte_rib6_lookup_exact(struct rte_rib6 *rib,
+ const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
+{
+ struct rte_rib6_node *cur;
+ uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
+ int i;
+
+ if ((rib == NULL) || (ip == NULL) || (depth > RIB6_MAXDEPTH)) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+ cur = rib->tree;
+
+ for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
+ tmp_ip[i] = ip[i] & get_msk_part(depth, i);
+
+ while (cur != NULL) {
+ if (rte_rib6_is_equal(cur->ip, tmp_ip) &&
+ (cur->depth == depth) &&
+ is_valid_node(cur))
+ return cur;
+
+ if (!(is_covered(tmp_ip, cur->ip, cur->depth)) ||
+ (cur->depth >= depth))
+ break;
+
+ cur = get_nxt_node(cur, tmp_ip);
+ }
+
+ return NULL;
+}
+
+/*
+ * Traverses on subtree and retreeves more specific routes
+ * for a given in args ip/depth prefix
+ * last = NULL means the first invocation
+ */
+struct rte_rib6_node *
+rte_rib6_get_nxt(struct rte_rib6 *rib,
+ const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE],
+ uint8_t depth, struct rte_rib6_node *last, int flag)
+{
+ struct rte_rib6_node *tmp, *prev = NULL;
+ uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
+ int i;
+
+ if ((rib == NULL) || (ip == NULL) || (depth > RIB6_MAXDEPTH)) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
+ tmp_ip[i] = ip[i] & get_msk_part(depth, i);
+
+ if (last == NULL) {
+ tmp = rib->tree;
+ while ((tmp) && (tmp->depth < depth))
+ tmp = get_nxt_node(tmp, tmp_ip);
+ } else {
+ tmp = last;
+ while ((tmp->parent != NULL) && (is_right_node(tmp) ||
+ (tmp->parent->right == NULL))) {
+ tmp = tmp->parent;
+ if (is_valid_node(tmp) &&
+ (is_covered(tmp->ip, tmp_ip, depth) &&
+ (tmp->depth > depth)))
+ return tmp;
+ }
+ tmp = (tmp->parent != NULL) ? tmp->parent->right : NULL;
+ }
+ while (tmp) {
+ if (is_valid_node(tmp) &&
+ (is_covered(tmp->ip, tmp_ip, depth) &&
+ (tmp->depth > depth))) {
+ prev = tmp;
+ if (flag == RTE_RIB6_GET_NXT_COVER)
+ return prev;
+ }
+ tmp = (tmp->left != NULL) ? tmp->left : tmp->right;
+ }
+ return prev;
+}
+
+void
+rte_rib6_remove(struct rte_rib6 *rib,
+ const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
+{
+ struct rte_rib6_node *cur, *prev, *child;
+
+ cur = rte_rib6_lookup_exact(rib, ip, depth);
+ if (cur == NULL)
+ return;
+
+ --rib->cur_routes;
+ cur->flag &= ~RTE_RIB_VALID_NODE;
+ while (!is_valid_node(cur)) {
+ if ((cur->left != NULL) && (cur->right != NULL))
+ return;
+ child = (cur->left == NULL) ? cur->right : cur->left;
+ if (child != NULL)
+ child->parent = cur->parent;
+ if (cur->parent == NULL) {
+ rib->tree = child;
+ node_free(rib, cur);
+ return;
+ }
+ if (cur->parent->left == cur)
+ cur->parent->left = child;
+ else
+ cur->parent->right = child;
+ prev = cur;
+ cur = cur->parent;
+ node_free(rib, prev);
+ }
+}
+
+struct rte_rib6_node *
+rte_rib6_insert(struct rte_rib6 *rib,
+ const uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE], uint8_t depth)
+{
+ struct rte_rib6_node **tmp;
+ struct rte_rib6_node *prev = NULL;
+ struct rte_rib6_node *new_node = NULL;
+ struct rte_rib6_node *common_node = NULL;
+ uint8_t common_prefix[RTE_RIB6_IPV6_ADDR_SIZE];
+ uint8_t tmp_ip[RTE_RIB6_IPV6_ADDR_SIZE];
+ int i, d;
+ uint8_t common_depth, ip_xor;
+
+ if (unlikely((rib == NULL) || (ip == NULL) ||
+ (depth > RIB6_MAXDEPTH))) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ tmp = &rib->tree;
+
+ for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
+ tmp_ip[i] = ip[i] & get_msk_part(depth, i);
+
+ new_node = rte_rib6_lookup_exact(rib, tmp_ip, depth);
+ if (new_node != NULL) {
+ rte_errno = EEXIST;
+ return NULL;
+ }
+
+ new_node = node_alloc(rib);
+ if (new_node == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+ new_node->left = NULL;
+ new_node->right = NULL;
+ new_node->parent = NULL;
+ rte_rib6_copy_addr(new_node->ip, tmp_ip);
+ new_node->depth = depth;
+ new_node->flag = RTE_RIB_VALID_NODE;
+
+ /* traverse down the tree to find matching node or closest matching */
+ while (1) {
+ /* insert as the last node in the branch */
+ if (*tmp == NULL) {
+ *tmp = new_node;
+ new_node->parent = prev;
+ ++rib->cur_routes;
+ return *tmp;
+ }
+ /*
+ * Intermediate node found.
+ * Previous rte_rib6_lookup_exact() returned NULL
+ * but node with proper search criteria is found.
+ * Validate intermediate node and return.
+ */
+ if (rte_rib6_is_equal(tmp_ip, (*tmp)->ip) &&
+ (depth == (*tmp)->depth)) {
+ node_free(rib, new_node);
+ (*tmp)->flag |= RTE_RIB_VALID_NODE;
+ ++rib->cur_routes;
+ return *tmp;
+ }
+
+ if (!is_covered(tmp_ip, (*tmp)->ip, (*tmp)->depth) ||
+ ((*tmp)->depth >= depth)) {
+ break;
+ }
+ prev = *tmp;
+
+ tmp = (get_dir(tmp_ip, (*tmp)->depth)) ? &(*tmp)->right :
+ &(*tmp)->left;
+ }
+
+ /* closest node found, new_node should be inserted in the middle */
+ common_depth = RTE_MIN(depth, (*tmp)->depth);
+ for (i = 0, d = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++) {
+ ip_xor = tmp_ip[i] ^ (*tmp)->ip[i];
+ if (ip_xor == 0)
+ d += 8;
+ else {
+ d += __builtin_clz(ip_xor << 24);
+ break;
+ }
+ }
+
+ common_depth = RTE_MIN(d, common_depth);
+
+ for (i = 0; i < RTE_RIB6_IPV6_ADDR_SIZE; i++)
+ common_prefix[i] = tmp_ip[i] & get_msk_part(common_depth, i);
+
+ if (rte_rib6_is_equal(common_prefix, tmp_ip) &&
+ (common_depth == depth)) {
+ /* insert as a parent */
+ if (get_dir((*tmp)->ip, depth))
+ new_node->right = *tmp;
+ else
+ new_node->left = *tmp;
+ new_node->parent = (*tmp)->parent;
+ (*tmp)->parent = new_node;
+ *tmp = new_node;
+ } else {
+ /* create intermediate node */
+ common_node = node_alloc(rib);
+ if (common_node == NULL) {
+ node_free(rib, new_node);
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+ rte_rib6_copy_addr(common_node->ip, common_prefix);
+ common_node->depth = common_depth;
+ common_node->flag = 0;
+ common_node->parent = (*tmp)->parent;
+ new_node->parent = common_node;
+ (*tmp)->parent = common_node;
+ if (get_dir((*tmp)->ip, common_depth) == 1) {
+ common_node->left = new_node;
+ common_node->right = *tmp;
+ } else {
+ common_node->left = *tmp;
+ common_node->right = new_node;
+ }
+ *tmp = common_node;
+ }
+ ++rib->cur_routes;
+ return new_node;
+}
+
+int
+rte_rib6_get_ip(struct rte_rib6_node *node, uint8_t ip[RTE_RIB6_IPV6_ADDR_SIZE])
+{
+ if ((node == NULL) || (ip == NULL)) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_rib6_copy_addr(ip, node->ip);
+ return 0;
+}
+
+int
+rte_rib6_get_depth(struct rte_rib6_node *node, uint8_t *depth)
+{
+ if ((node == NULL) || (depth == NULL)) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ *depth = node->depth;
+ return 0;
+}
+
+void *
+rte_rib6_get_ext(struct rte_rib6_node *node)
+{
+ return (node == NULL) ? NULL : &node->ext[0];
+}
+
+int
+rte_rib6_get_nh(struct rte_rib6_node *node, uint64_t *nh)
+{
+ if ((node == NULL) || (nh == NULL)) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ *nh = node->nh;
+ return 0;
+}
+
+int
+rte_rib6_set_nh(struct rte_rib6_node *node, uint64_t nh)
+{
+ if (node == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ node->nh = nh;
+ return 0;
+}
+
+struct rte_rib6 *
+rte_rib6_create(const char *name, int socket_id, struct rte_rib6_conf *conf)
+{
+ char mem_name[RTE_RIB6_NAMESIZE];
+ struct rte_rib6 *rib = NULL;
+ struct rte_tailq_entry *te;
+ struct rte_rib6_list *rib6_list;
+ struct rte_mempool *node_pool;
+
+ /* Check user arguments. */
+ if ((name == NULL) || (conf == NULL) ||
+ (conf->max_nodes == 0)) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ snprintf(mem_name, sizeof(mem_name), "MP_%s", name);
+ node_pool = rte_mempool_create(mem_name, conf->max_nodes,
+ sizeof(struct rte_rib6_node) + conf->ext_sz, 0, 0,
+ NULL, NULL, NULL, NULL, socket_id, 0);
+
+ if (node_pool == NULL) {
+ RTE_LOG(ERR, LPM,
+ "Can not allocate mempool for RIB6 %s\n", name);
+ return NULL;
+ }
+
+ snprintf(mem_name, sizeof(mem_name), "RIB6_%s", name);
+ rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
+
+ rte_mcfg_tailq_write_lock();
+
+ /* guarantee there's no existing */
+ TAILQ_FOREACH(te, rib6_list, next) {
+ rib = (struct rte_rib6 *)te->data;
+ if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0)
+ break;
+ }
+ rib = NULL;
+ if (te != NULL) {
+ rte_errno = EEXIST;
+ goto exit;
+ }
+
+ /* allocate tailq entry */
+ te = rte_zmalloc("RIB6_TAILQ_ENTRY", sizeof(*te), 0);
+ if (te == NULL) {
+ RTE_LOG(ERR, LPM,
+ "Can not allocate tailq entry for RIB6 %s\n", name);
+ rte_errno = ENOMEM;
+ goto exit;
+ }
+
+ /* Allocate memory to store the RIB6 data structures. */
+ rib = rte_zmalloc_socket(mem_name,
+ sizeof(struct rte_rib6), RTE_CACHE_LINE_SIZE, socket_id);
+ if (rib == NULL) {
+ RTE_LOG(ERR, LPM, "RIB6 %s memory allocation failed\n", name);
+ rte_errno = ENOMEM;
+ goto free_te;
+ }
+
+ rte_strlcpy(rib->name, name, sizeof(rib->name));
+ rib->tree = NULL;
+ rib->max_nodes = conf->max_nodes;
+ rib->node_pool = node_pool;
+
+ te->data = (void *)rib;
+ TAILQ_INSERT_TAIL(rib6_list, te, next);
+
+ rte_mcfg_tailq_write_unlock();
+
+ return rib;
+
+free_te:
+ rte_free(te);
+exit:
+ rte_mcfg_tailq_write_unlock();
+ rte_mempool_free(node_pool);
+
+ return NULL;
+}
+
+struct rte_rib6 *
+rte_rib6_find_existing(const char *name)
+{
+ struct rte_rib6 *rib = NULL;
+ struct rte_tailq_entry *te;
+ struct rte_rib6_list *rib6_list;
+
+ if (unlikely(name == NULL)) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
+
+ rte_mcfg_tailq_read_lock();
+ TAILQ_FOREACH(te, rib6_list, next) {
+ rib = (struct rte_rib6 *) te->data;
+ if (strncmp(name, rib->name, RTE_RIB6_NAMESIZE) == 0)
+ break;
+ }
+ rte_mcfg_tailq_read_unlock();
+
+ if (te == NULL) {
+ rte_errno = ENOENT;
+ return NULL;
+ }
+
+ return rib;
+}
+
+void
+rte_rib6_free(struct rte_rib6 *rib)
+{
+ struct rte_tailq_entry *te;
+ struct rte_rib6_list *rib6_list;
+ struct rte_rib6_node *tmp = NULL;
+
+ if (unlikely(rib == NULL)) {
+ rte_errno = EINVAL;
+ return;
+ }
+
+ rib6_list = RTE_TAILQ_CAST(rte_rib6_tailq.head, rte_rib6_list);
+
+ rte_mcfg_tailq_write_lock();
+
+ /* find our tailq entry */
+ TAILQ_FOREACH(te, rib6_list, next) {
+ if (te->data == (void *)rib)
+ break;
+ }
+ if (te != NULL)
+ TAILQ_REMOVE(rib6_list, te, next);
+
+ rte_mcfg_tailq_write_unlock();
+
+ while ((tmp = rte_rib6_get_nxt(rib, 0, 0, tmp,
+ RTE_RIB6_GET_NXT_ALL)) != NULL)
+ rte_rib6_remove(rib, tmp->ip, tmp->depth);
+
+ rte_mempool_free(rib->node_pool);
+
+ rte_free(rib);
+ rte_free(te);
+}