fib: add dataplane algorithm for IPv6
authorVladimir Medvedkin <vladimir.medvedkin@intel.com>
Fri, 1 Nov 2019 15:21:41 +0000 (15:21 +0000)
committerThomas Monjalon <thomas@monjalon.net>
Tue, 5 Nov 2019 23:11:44 +0000 (00:11 +0100)
Add fib implementation for ipv6 using modified DIR24_8 algorithm.
Implementation is similar to current LPM6 implementation but has
few enhancements:
faster control plane operations
more bits for userdata in table entries
configurable userdata size

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
lib/librte_fib/Makefile
lib/librte_fib/meson.build
lib/librte_fib/rte_fib6.c
lib/librte_fib/rte_fib6.h
lib/librte_fib/trie.c [new file with mode: 0644]
lib/librte_fib/trie.h [new file with mode: 0644]

index 67fde9a..4abd80b 100644 (file)
@@ -17,7 +17,7 @@ EXPORT_MAP := rte_fib_version.map
 LIBABIVER := 1
 
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_FIB) := rte_fib.c rte_fib6.c dir24_8.c
+SRCS-$(CONFIG_RTE_LIBRTE_FIB) := rte_fib.c rte_fib6.c dir24_8.c trie.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_FIB)-include := rte_fib.h rte_fib6.h
index c63cac8..e2c6f44 100644 (file)
@@ -3,6 +3,6 @@
 # Copyright(c) 2019 Intel Corporation
 
 allow_experimental_apis = true
-sources = files('rte_fib.c', 'rte_fib6.c', 'dir24_8.c')
+sources = files('rte_fib.c', 'rte_fib6.c', 'dir24_8.c', 'trie.c')
 headers = files('rte_fib.h', 'rte_fib6.h')
 deps += ['rib']
index 9f00a80..354227d 100644 (file)
@@ -17,6 +17,8 @@
 #include <rte_rib6.h>
 #include <rte_fib6.h>
 
+#include "trie.h"
+
 TAILQ_HEAD(rte_fib6_list, rte_tailq_entry);
 static struct rte_tailq_elem rte_fib6_tailq = {
        .name = "RTE_FIB6",
@@ -92,12 +94,22 @@ static int
 init_dataplane(struct rte_fib6 *fib, __rte_unused int socket_id,
        struct rte_fib6_conf *conf)
 {
+       char dp_name[sizeof(void *)];
+
+       snprintf(dp_name, sizeof(dp_name), "%p", fib);
        switch (conf->type) {
        case RTE_FIB6_DUMMY:
                fib->dp = fib;
                fib->lookup = dummy_lookup;
                fib->modify = dummy_modify;
                return 0;
+       case RTE_FIB6_TRIE:
+               fib->dp = trie_create(dp_name, socket_id, conf);
+               if (fib->dp == NULL)
+                       return -rte_errno;
+               fib->lookup = rte_trie_get_lookup_fn(conf);
+               fib->modify = trie_modify;
+               return 0;
        default:
                return -EINVAL;
        }
@@ -260,6 +272,8 @@ free_dataplane(struct rte_fib6 *fib)
        switch (fib->type) {
        case RTE_FIB6_DUMMY:
                return;
+       case RTE_FIB6_TRIE:
+               trie_free(fib->dp);
        default:
                return;
        }
index 3322123..4268704 100644 (file)
 #define RTE_FIB6_MAXDEPTH       128
 
 struct rte_fib6;
+struct rte_rib6;
 
 /** Type of FIB struct */
 enum rte_fib6_type {
        RTE_FIB6_DUMMY,         /**< RIB6 tree based FIB */
+       RTE_FIB6_TRIE,          /**< TRIE based fib  */
        RTE_FIB6_TYPE_MAX
 };
 
@@ -40,12 +42,24 @@ enum rte_fib6_op {
        RTE_FIB6_DEL,
 };
 
+enum rte_fib_trie_nh_sz {
+       RTE_FIB6_TRIE_2B = 1,
+       RTE_FIB6_TRIE_4B,
+       RTE_FIB6_TRIE_8B
+};
+
 /** FIB configuration structure */
 struct rte_fib6_conf {
        enum rte_fib6_type type; /**< Type of FIB struct */
        /** Default value returned on lookup if there is no route */
        uint64_t default_nh;
        int     max_routes;
+       union {
+               struct {
+                       enum rte_fib_trie_nh_sz nh_sz;
+                       uint32_t        num_tbl8;
+               } trie;
+       };
 };
 
 /**
diff --git a/lib/librte_fib/trie.c b/lib/librte_fib/trie.c
new file mode 100644 (file)
index 0000000..198e815
--- /dev/null
@@ -0,0 +1,760 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <rte_debug.h>
+#include <rte_malloc.h>
+#include <rte_prefetch.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_branch_prediction.h>
+
+#include <rte_rib6.h>
+#include <rte_fib6.h>
+#include "trie.h"
+
+/* @internal Total number of tbl24 entries. */
+#define TRIE_TBL24_NUM_ENT     (1 << 24)
+
+/* Maximum depth value possible for IPv6 LPM. */
+#define TRIE_MAX_DEPTH         128
+
+/* @internal Number of entries in a tbl8 group. */
+#define TRIE_TBL8_GRP_NUM_ENT  256ULL
+
+/* @internal Total number of tbl8 groups in the tbl8. */
+#define TRIE_TBL8_NUM_GROUPS   65536
+
+/* @internal bitmask with valid and valid_group fields set */
+#define TRIE_EXT_ENT           1
+
+#define TRIE_NAMESIZE          64
+
+#define BITMAP_SLAB_BIT_SIZE_LOG2      6
+#define BITMAP_SLAB_BIT_SIZE           (1ULL << BITMAP_SLAB_BIT_SIZE_LOG2)
+#define BITMAP_SLAB_BITMASK            (BITMAP_SLAB_BIT_SIZE - 1)
+
+struct rte_trie_tbl {
+       uint32_t        number_tbl8s;   /**< Total number of tbl8s */
+       uint32_t        rsvd_tbl8s;     /**< Number of reserved tbl8s */
+       uint32_t        cur_tbl8s;      /**< Current cumber of tbl8s */
+       uint64_t        def_nh;         /**< Default next hop */
+       enum rte_fib_trie_nh_sz nh_sz;  /**< Size of nexthop entry */
+       uint64_t        *tbl8;          /**< tbl8 table. */
+       uint32_t        *tbl8_pool;     /**< bitmap containing free tbl8 idxes*/
+       uint32_t        tbl8_pool_pos;
+       /* tbl24 table. */
+       __extension__ uint64_t  tbl24[0] __rte_cache_aligned;
+};
+
+enum edge {
+       LEDGE,
+       REDGE
+};
+
+enum lookup_type {
+       MACRO,
+       INLINE,
+       UNI
+};
+static enum lookup_type test_lookup = MACRO;
+
+static inline uint32_t
+get_tbl24_idx(const uint8_t *ip)
+{
+       return ip[0] << 16|ip[1] << 8|ip[2];
+}
+
+static inline void *
+get_tbl24_p(struct rte_trie_tbl *dp, const uint8_t *ip, uint8_t nh_sz)
+{
+       uint32_t tbl24_idx;
+
+       tbl24_idx = get_tbl24_idx(ip);
+       return (void *)&((uint8_t *)dp->tbl24)[tbl24_idx << nh_sz];
+}
+
+static inline uint8_t
+bits_in_nh(uint8_t nh_sz)
+{
+       return 8 * (1 << nh_sz);
+}
+
+static inline uint64_t
+get_max_nh(uint8_t nh_sz)
+{
+       return ((1ULL << (bits_in_nh(nh_sz) - 1)) - 1);
+}
+
+static inline uint64_t
+lookup_msk(uint8_t nh_sz)
+{
+       return ((1ULL << ((1 << (nh_sz + 3)) - 1)) << 1) - 1;
+}
+
+static inline uint8_t
+get_psd_idx(uint32_t val, uint8_t nh_sz)
+{
+       return val & ((1 << (3 - nh_sz)) - 1);
+}
+
+static inline uint32_t
+get_tbl_pos(uint32_t val, uint8_t nh_sz)
+{
+       return val >> (3 - nh_sz);
+}
+
+static inline uint64_t
+get_tbl_val_by_idx(uint64_t *tbl, uint32_t idx, uint8_t nh_sz)
+{
+       return ((tbl[get_tbl_pos(idx, nh_sz)] >> (get_psd_idx(idx, nh_sz) *
+               bits_in_nh(nh_sz))) & lookup_msk(nh_sz));
+}
+
+static inline void *
+get_tbl_p_by_idx(uint64_t *tbl, uint64_t idx, uint8_t nh_sz)
+{
+       return (uint8_t *)tbl + (idx << nh_sz);
+}
+
+static inline int
+is_entry_extended(uint64_t ent)
+{
+       return (ent & TRIE_EXT_ENT) == TRIE_EXT_ENT;
+}
+
+#define LOOKUP_FUNC(suffix, type, nh_sz)                               \
+static void rte_trie_lookup_bulk_##suffix(void *p,                     \
+       uint8_t ips[][RTE_FIB6_IPV6_ADDR_SIZE],                 \
+       uint64_t *next_hops, const unsigned int n)                      \
+{                                                                      \
+       struct rte_trie_tbl *dp = (struct rte_trie_tbl *)p;             \
+       uint64_t tmp;                                                   \
+       uint32_t i, j;                                                  \
+                                                                       \
+       for (i = 0; i < n; i++) {                                       \
+               tmp = ((type *)dp->tbl24)[get_tbl24_idx(&ips[i][0])];   \
+               j = 3;                                                  \
+               while (is_entry_extended(tmp)) {                        \
+                       tmp = ((type *)dp->tbl8)[ips[i][j++] +          \
+                               ((tmp >> 1) * TRIE_TBL8_GRP_NUM_ENT)];  \
+               }                                                       \
+               next_hops[i] = tmp >> 1;                                \
+       }                                                               \
+}
+LOOKUP_FUNC(2b, uint16_t, 1)
+LOOKUP_FUNC(4b, uint32_t, 2)
+LOOKUP_FUNC(8b, uint64_t, 3)
+
+rte_fib6_lookup_fn_t
+rte_trie_get_lookup_fn(struct rte_fib6_conf *conf)
+{
+       enum rte_fib_trie_nh_sz nh_sz = conf->trie.nh_sz;
+
+       if (test_lookup == MACRO) {
+               switch (nh_sz) {
+               case RTE_FIB6_TRIE_2B:
+                       return rte_trie_lookup_bulk_2b;
+               case RTE_FIB6_TRIE_4B:
+                       return rte_trie_lookup_bulk_4b;
+               case RTE_FIB6_TRIE_8B:
+                       return rte_trie_lookup_bulk_8b;
+               }
+       }
+
+       return NULL;
+}
+
+static void
+write_to_dp(void *ptr, uint64_t val, enum rte_fib_trie_nh_sz size, int n)
+{
+       int i;
+       uint16_t *ptr16 = (uint16_t *)ptr;
+       uint32_t *ptr32 = (uint32_t *)ptr;
+       uint64_t *ptr64 = (uint64_t *)ptr;
+
+       switch (size) {
+       case RTE_FIB6_TRIE_2B:
+               for (i = 0; i < n; i++)
+                       ptr16[i] = (uint16_t)val;
+               break;
+       case RTE_FIB6_TRIE_4B:
+               for (i = 0; i < n; i++)
+                       ptr32[i] = (uint32_t)val;
+               break;
+       case RTE_FIB6_TRIE_8B:
+               for (i = 0; i < n; i++)
+                       ptr64[i] = (uint64_t)val;
+               break;
+       }
+}
+
+static void
+tbl8_pool_init(struct rte_trie_tbl *dp)
+{
+       uint32_t i;
+
+       /* put entire range of indexes to the tbl8 pool */
+       for (i = 0; i < dp->number_tbl8s; i++)
+               dp->tbl8_pool[i] = i;
+
+       dp->tbl8_pool_pos = 0;
+}
+
+/*
+ * Get an index of a free tbl8 from the pool
+ */
+static inline int32_t
+tbl8_get(struct rte_trie_tbl *dp)
+{
+       if (dp->tbl8_pool_pos == dp->number_tbl8s)
+               /* no more free tbl8 */
+               return -ENOSPC;
+
+       /* next index */
+       return dp->tbl8_pool[dp->tbl8_pool_pos++];
+}
+
+/*
+ * Put an index of a free tbl8 back to the pool
+ */
+static inline void
+tbl8_put(struct rte_trie_tbl *dp, uint32_t tbl8_ind)
+{
+       dp->tbl8_pool[--dp->tbl8_pool_pos] = tbl8_ind;
+}
+
+static int
+tbl8_alloc(struct rte_trie_tbl *dp, uint64_t nh)
+{
+       int64_t         tbl8_idx;
+       uint8_t         *tbl8_ptr;
+
+       tbl8_idx = tbl8_get(dp);
+       if (tbl8_idx < 0)
+               return tbl8_idx;
+       tbl8_ptr = (uint8_t *)dp->tbl8 +
+               ((tbl8_idx * TRIE_TBL8_GRP_NUM_ENT) <<
+               dp->nh_sz);
+       /*Init tbl8 entries with nexthop from tbl24*/
+       write_to_dp((void *)tbl8_ptr, nh, dp->nh_sz,
+               TRIE_TBL8_GRP_NUM_ENT);
+       return tbl8_idx;
+}
+
+static void
+tbl8_recycle(struct rte_trie_tbl *dp, void *par, uint64_t tbl8_idx)
+{
+       uint32_t i;
+       uint64_t nh;
+       uint16_t *ptr16;
+       uint32_t *ptr32;
+       uint64_t *ptr64;
+
+       switch (dp->nh_sz) {
+       case RTE_FIB6_TRIE_2B:
+               ptr16 = &((uint16_t *)dp->tbl8)[tbl8_idx *
+                               TRIE_TBL8_GRP_NUM_ENT];
+               nh = *ptr16;
+               if (nh & TRIE_EXT_ENT)
+                       return;
+               for (i = 1; i < TRIE_TBL8_GRP_NUM_ENT; i++) {
+                       if (nh != ptr16[i])
+                               return;
+               }
+               write_to_dp(par, nh, dp->nh_sz, 1);
+               for (i = 0; i < TRIE_TBL8_GRP_NUM_ENT; i++)
+                       ptr16[i] = 0;
+               break;
+       case RTE_FIB6_TRIE_4B:
+               ptr32 = &((uint32_t *)dp->tbl8)[tbl8_idx *
+                               TRIE_TBL8_GRP_NUM_ENT];
+               nh = *ptr32;
+               if (nh & TRIE_EXT_ENT)
+                       return;
+               for (i = 1; i < TRIE_TBL8_GRP_NUM_ENT; i++) {
+                       if (nh != ptr32[i])
+                               return;
+               }
+               write_to_dp(par, nh, dp->nh_sz, 1);
+               for (i = 0; i < TRIE_TBL8_GRP_NUM_ENT; i++)
+                       ptr32[i] = 0;
+               break;
+       case RTE_FIB6_TRIE_8B:
+               ptr64 = &((uint64_t *)dp->tbl8)[tbl8_idx *
+                               TRIE_TBL8_GRP_NUM_ENT];
+               nh = *ptr64;
+               if (nh & TRIE_EXT_ENT)
+                       return;
+               for (i = 1; i < TRIE_TBL8_GRP_NUM_ENT; i++) {
+                       if (nh != ptr64[i])
+                               return;
+               }
+               write_to_dp(par, nh, dp->nh_sz, 1);
+               for (i = 0; i < TRIE_TBL8_GRP_NUM_ENT; i++)
+                       ptr64[i] = 0;
+               break;
+       }
+       tbl8_put(dp, tbl8_idx);
+}
+
+#define BYTE_SIZE      8
+static inline uint32_t
+get_idx(const uint8_t *ip, uint32_t prev_idx, int bytes, int first_byte)
+{
+       int i;
+       uint32_t idx = 0;
+       uint8_t bitshift;
+
+       for (i = first_byte; i < (first_byte + bytes); i++) {
+               bitshift = (int8_t)(((first_byte + bytes - 1) - i)*BYTE_SIZE);
+               idx |= ip[i] <<  bitshift;
+       }
+       return (prev_idx * 256) + idx;
+}
+
+static inline uint64_t
+get_val_by_p(void *p, uint8_t nh_sz)
+{
+       uint64_t val = 0;
+
+       switch (nh_sz) {
+       case RTE_FIB6_TRIE_2B:
+               val = *(uint16_t *)p;
+               break;
+       case RTE_FIB6_TRIE_4B:
+               val = *(uint32_t *)p;
+               break;
+       case RTE_FIB6_TRIE_8B:
+               val = *(uint64_t *)p;
+               break;
+       }
+       return val;
+}
+
+/*
+ * recursively recycle tbl8's
+ */
+static void
+recycle_root_path(struct rte_trie_tbl *dp, const uint8_t *ip_part,
+       uint8_t common_tbl8, void *prev)
+{
+       void *p;
+       uint64_t val;
+
+       val = get_val_by_p(prev, dp->nh_sz);
+       if (unlikely((val & TRIE_EXT_ENT) != TRIE_EXT_ENT))
+               return;
+
+       if (common_tbl8 != 0) {
+               p = get_tbl_p_by_idx(dp->tbl8, (val >> 1) * 256 + *ip_part,
+                       dp->nh_sz);
+               recycle_root_path(dp, ip_part + 1, common_tbl8 - 1, p);
+       }
+       tbl8_recycle(dp, prev, val >> 1);
+}
+
+static inline int
+build_common_root(struct rte_trie_tbl *dp, const uint8_t *ip,
+       int common_bytes, void **tbl)
+{
+       void *tbl_ptr = NULL;
+       uint64_t *cur_tbl;
+       uint64_t val;
+       int i, j, idx, prev_idx = 0;
+
+       cur_tbl = dp->tbl24;
+       for (i = 3, j = 0; i <= common_bytes; i++) {
+               idx = get_idx(ip, prev_idx, i - j, j);
+               val = get_tbl_val_by_idx(cur_tbl, idx, dp->nh_sz);
+               tbl_ptr = get_tbl_p_by_idx(cur_tbl, idx, dp->nh_sz);
+               if ((val & TRIE_EXT_ENT) != TRIE_EXT_ENT) {
+                       idx = tbl8_alloc(dp, val);
+                       if (unlikely(idx < 0))
+                               return idx;
+                       write_to_dp(tbl_ptr, (idx << 1) |
+                               TRIE_EXT_ENT, dp->nh_sz, 1);
+                       prev_idx = idx;
+               } else
+                       prev_idx = val >> 1;
+
+               j = i;
+               cur_tbl = dp->tbl8;
+       }
+       *tbl = get_tbl_p_by_idx(cur_tbl, prev_idx * 256, dp->nh_sz);
+       return 0;
+}
+
+static int
+write_edge(struct rte_trie_tbl *dp, const uint8_t *ip_part, uint64_t next_hop,
+       int len, enum edge edge, void *ent)
+{
+       uint64_t val = next_hop << 1;
+       int tbl8_idx;
+       int ret = 0;
+       void *p;
+
+       if (len != 0) {
+               val = get_val_by_p(ent, dp->nh_sz);
+               if ((val & TRIE_EXT_ENT) == TRIE_EXT_ENT)
+                       tbl8_idx = val >> 1;
+               else {
+                       tbl8_idx = tbl8_alloc(dp, val);
+                       if (tbl8_idx < 0)
+                               return tbl8_idx;
+                       val = (tbl8_idx << 1)|TRIE_EXT_ENT;
+               }
+               p = get_tbl_p_by_idx(dp->tbl8, (tbl8_idx * 256) + *ip_part,
+                       dp->nh_sz);
+               ret = write_edge(dp, ip_part + 1, next_hop, len - 1, edge, p);
+               if (ret < 0)
+                       return ret;
+               if (edge == LEDGE) {
+                       write_to_dp((uint8_t *)p + (1 << dp->nh_sz),
+                               next_hop << 1, dp->nh_sz, UINT8_MAX - *ip_part);
+               } else {
+                       write_to_dp(get_tbl_p_by_idx(dp->tbl8, tbl8_idx * 256,
+                               dp->nh_sz),
+                               next_hop << 1, dp->nh_sz, *ip_part);
+               }
+               tbl8_recycle(dp, &val, tbl8_idx);
+       }
+
+       write_to_dp(ent, val, dp->nh_sz, 1);
+       return ret;
+}
+
+#define IPV6_MAX_IDX   (RTE_FIB6_IPV6_ADDR_SIZE - 1)
+#define TBL24_BYTES    3
+#define TBL8_LEN       (RTE_FIB6_IPV6_ADDR_SIZE - TBL24_BYTES)
+
+static int
+install_to_dp(struct rte_trie_tbl *dp, const uint8_t *ledge, const uint8_t *r,
+       uint64_t next_hop)
+{
+       void *common_root_tbl;
+       void *ent;
+       int ret;
+       int i;
+       int common_bytes;
+       int llen, rlen;
+       uint8_t redge[16];
+
+       /* decrement redge by 1*/
+       rte_rib6_copy_addr(redge, r);
+       for (i = 15; i >= 0; i--) {
+               redge[i]--;
+               if (redge[i] != 0xff)
+                       break;
+       }
+
+       for (common_bytes = 0; common_bytes < 15; common_bytes++) {
+               if (ledge[common_bytes] != redge[common_bytes])
+                       break;
+       }
+
+       ret = build_common_root(dp, ledge, common_bytes, &common_root_tbl);
+       if (unlikely(ret != 0))
+               return ret;
+       /*first uncommon tbl8 byte idx*/
+       uint8_t first_tbl8_byte = RTE_MAX(common_bytes, TBL24_BYTES);
+
+       for (i = IPV6_MAX_IDX; i > first_tbl8_byte; i--) {
+               if (ledge[i] != 0)
+                       break;
+       }
+
+       llen = i - first_tbl8_byte + (common_bytes < 3);
+
+       for (i = IPV6_MAX_IDX; i > first_tbl8_byte; i--) {
+               if (redge[i] != UINT8_MAX)
+                       break;
+       }
+       rlen = i - first_tbl8_byte + (common_bytes < 3);
+
+       /*first noncommon byte*/
+       uint8_t first_byte_idx = (common_bytes < 3) ? 0 : common_bytes;
+       uint8_t first_idx_len = (common_bytes < 3) ? 3 : 1;
+
+       uint32_t left_idx = get_idx(ledge, 0, first_idx_len, first_byte_idx);
+       uint32_t right_idx = get_idx(redge, 0, first_idx_len, first_byte_idx);
+
+       ent = get_tbl_p_by_idx(common_root_tbl, left_idx, dp->nh_sz);
+       ret = write_edge(dp, &ledge[first_tbl8_byte + !(common_bytes < 3)],
+               next_hop, llen, LEDGE, ent);
+       if (ret < 0)
+               return ret;
+
+       if (right_idx > left_idx + 1) {
+               ent = get_tbl_p_by_idx(common_root_tbl, left_idx + 1,
+                       dp->nh_sz);
+               write_to_dp(ent, next_hop << 1, dp->nh_sz,
+                       right_idx - (left_idx + 1));
+       }
+       ent = get_tbl_p_by_idx(common_root_tbl, right_idx, dp->nh_sz);
+       ret = write_edge(dp, &redge[first_tbl8_byte + !((common_bytes < 3))],
+               next_hop, rlen, REDGE, ent);
+       if (ret < 0)
+               return ret;
+
+       uint8_t common_tbl8 = (common_bytes < TBL24_BYTES) ?
+                       0 : common_bytes - (TBL24_BYTES - 1);
+       ent = get_tbl24_p(dp, ledge, dp->nh_sz);
+       recycle_root_path(dp, ledge + TBL24_BYTES, common_tbl8, ent);
+       return 0;
+}
+
+static void
+get_nxt_net(uint8_t *ip, uint8_t depth)
+{
+       int i;
+       uint8_t part_depth;
+       uint8_t prev_byte;
+
+       for (i = 0, part_depth = depth; part_depth > 8; part_depth -= 8, i++)
+               ;
+
+       prev_byte = ip[i];
+       ip[i] += 1 << (8 - part_depth);
+       if (ip[i] < prev_byte) {
+               while (i > 0) {
+                       ip[--i] += 1;
+                       if (ip[i] != 0)
+                               break;
+               }
+       }
+}
+
+static int
+modify_dp(struct rte_trie_tbl *dp, struct rte_rib6 *rib,
+       const uint8_t ip[RTE_FIB6_IPV6_ADDR_SIZE],
+       uint8_t depth, uint64_t next_hop)
+{
+       struct rte_rib6_node *tmp = NULL;
+       uint8_t ledge[RTE_FIB6_IPV6_ADDR_SIZE];
+       uint8_t redge[RTE_FIB6_IPV6_ADDR_SIZE];
+       int ret;
+       uint8_t tmp_depth;
+
+       if (next_hop > get_max_nh(dp->nh_sz))
+               return -EINVAL;
+
+       rte_rib6_copy_addr(ledge, ip);
+       do {
+               tmp = rte_rib6_get_nxt(rib, ip, depth, tmp,
+                       RTE_RIB6_GET_NXT_COVER);
+               if (tmp != NULL) {
+                       rte_rib6_get_depth(tmp, &tmp_depth);
+                       if (tmp_depth == depth)
+                               continue;
+                       rte_rib6_get_ip(tmp, redge);
+                       if (rte_rib6_is_equal(ledge, redge)) {
+                               get_nxt_net(ledge, tmp_depth);
+                               continue;
+                       }
+                       ret = install_to_dp(dp, ledge, redge,
+                               next_hop);
+                       if (ret != 0)
+                               return ret;
+                       get_nxt_net(redge, tmp_depth);
+                       rte_rib6_copy_addr(ledge, redge);
+               } else {
+                       rte_rib6_copy_addr(redge, ip);
+                       get_nxt_net(redge, depth);
+                       if (rte_rib6_is_equal(ledge, redge))
+                               break;
+                       ret = install_to_dp(dp, ledge, redge,
+                               next_hop);
+                       if (ret != 0)
+                               return ret;
+               }
+       } while (tmp);
+
+       return 0;
+}
+
+int
+trie_modify(struct rte_fib6 *fib, const uint8_t ip[RTE_FIB6_IPV6_ADDR_SIZE],
+       uint8_t depth, uint64_t next_hop, int op)
+{
+       struct rte_trie_tbl *dp;
+       struct rte_rib6 *rib;
+       struct rte_rib6_node *tmp = NULL;
+       struct rte_rib6_node *node;
+       struct rte_rib6_node *parent;
+       uint8_t ip_masked[RTE_FIB6_IPV6_ADDR_SIZE];
+       int i, ret = 0;
+       uint64_t par_nh, node_nh;
+       uint8_t tmp_depth, depth_diff = 0, parent_depth = 24;
+
+       if ((fib == NULL) || (ip == NULL) || (depth > RTE_FIB6_MAXDEPTH))
+               return -EINVAL;
+
+       dp = rte_fib6_get_dp(fib);
+       RTE_ASSERT(dp);
+       rib = rte_fib6_get_rib(fib);
+       RTE_ASSERT(rib);
+
+       for (i = 0; i < RTE_FIB6_IPV6_ADDR_SIZE; i++)
+               ip_masked[i] = ip[i] & get_msk_part(depth, i);
+
+       if (depth > 24) {
+               tmp = rte_rib6_get_nxt(rib, ip_masked,
+                       RTE_ALIGN_FLOOR(depth, 8), NULL,
+                       RTE_RIB6_GET_NXT_COVER);
+               if (tmp == NULL) {
+                       tmp = rte_rib6_lookup(rib, ip);
+                       if (tmp != NULL) {
+                               rte_rib6_get_depth(tmp, &tmp_depth);
+                               parent_depth = RTE_MAX(tmp_depth, 24);
+                       }
+                       depth_diff = RTE_ALIGN_CEIL(depth, 8) -
+                               RTE_ALIGN_CEIL(parent_depth, 8);
+                       depth_diff = depth_diff >> 3;
+               }
+       }
+       node = rte_rib6_lookup_exact(rib, ip_masked, depth);
+       switch (op) {
+       case RTE_FIB6_ADD:
+               if (node != NULL) {
+                       rte_rib6_get_nh(node, &node_nh);
+                       if (node_nh == next_hop)
+                               return 0;
+                       ret = modify_dp(dp, rib, ip_masked, depth, next_hop);
+                       if (ret == 0)
+                               rte_rib6_set_nh(node, next_hop);
+                       return 0;
+               }
+
+               if ((depth > 24) && (dp->rsvd_tbl8s >=
+                               dp->number_tbl8s - depth_diff))
+                       return -ENOSPC;
+
+               node = rte_rib6_insert(rib, ip_masked, depth);
+               if (node == NULL)
+                       return -rte_errno;
+               rte_rib6_set_nh(node, next_hop);
+               parent = rte_rib6_lookup_parent(node);
+               if (parent != NULL) {
+                       rte_rib6_get_nh(parent, &par_nh);
+                       if (par_nh == next_hop)
+                               return 0;
+               }
+               ret = modify_dp(dp, rib, ip_masked, depth, next_hop);
+               if (ret != 0) {
+                       rte_rib6_remove(rib, ip_masked, depth);
+                       return ret;
+               }
+
+               dp->rsvd_tbl8s += depth_diff;
+               return 0;
+       case RTE_FIB6_DEL:
+               if (node == NULL)
+                       return -ENOENT;
+
+               parent = rte_rib6_lookup_parent(node);
+               if (parent != NULL) {
+                       rte_rib6_get_nh(parent, &par_nh);
+                       rte_rib6_get_nh(node, &node_nh);
+                       if (par_nh != node_nh)
+                               ret = modify_dp(dp, rib, ip_masked, depth,
+                                       par_nh);
+               } else
+                       ret = modify_dp(dp, rib, ip_masked, depth, dp->def_nh);
+
+               if (ret != 0)
+                       return ret;
+               rte_rib6_remove(rib, ip, depth);
+
+               dp->rsvd_tbl8s -= depth_diff;
+               return 0;
+       default:
+               break;
+       }
+       return -EINVAL;
+}
+
+void *
+trie_create(const char *name, int socket_id,
+       struct rte_fib6_conf *conf)
+{
+       char mem_name[TRIE_NAMESIZE];
+       struct rte_trie_tbl *dp = NULL;
+       uint64_t        def_nh;
+       uint32_t        num_tbl8;
+       enum rte_fib_trie_nh_sz nh_sz;
+
+       if ((name == NULL) || (conf == NULL) ||
+                       (conf->trie.nh_sz < RTE_FIB6_TRIE_2B) ||
+                       (conf->trie.nh_sz > RTE_FIB6_TRIE_8B) ||
+                       (conf->trie.num_tbl8 >
+                       get_max_nh(conf->trie.nh_sz)) ||
+                       (conf->trie.num_tbl8 == 0) ||
+                       (conf->default_nh >
+                       get_max_nh(conf->trie.nh_sz))) {
+
+               rte_errno = EINVAL;
+               return NULL;
+       }
+
+       def_nh = conf->default_nh;
+       nh_sz = conf->trie.nh_sz;
+       num_tbl8 = conf->trie.num_tbl8;
+
+       snprintf(mem_name, sizeof(mem_name), "DP_%s", name);
+       dp = rte_zmalloc_socket(name, sizeof(struct rte_trie_tbl) +
+               TRIE_TBL24_NUM_ENT * (1 << nh_sz), RTE_CACHE_LINE_SIZE,
+               socket_id);
+       if (dp == NULL) {
+               rte_errno = ENOMEM;
+               return dp;
+       }
+
+       write_to_dp(&dp->tbl24, (def_nh << 1), nh_sz, 1 << 24);
+
+       snprintf(mem_name, sizeof(mem_name), "TBL8_%p", dp);
+       dp->tbl8 = rte_zmalloc_socket(mem_name, TRIE_TBL8_GRP_NUM_ENT *
+                       (1ll << nh_sz) * (num_tbl8 + 1),
+                       RTE_CACHE_LINE_SIZE, socket_id);
+       if (dp->tbl8 == NULL) {
+               rte_errno = ENOMEM;
+               rte_free(dp);
+               return NULL;
+       }
+       dp->def_nh = def_nh;
+       dp->nh_sz = nh_sz;
+       dp->number_tbl8s = num_tbl8;
+
+       snprintf(mem_name, sizeof(mem_name), "TBL8_idxes_%p", dp);
+       dp->tbl8_pool = rte_zmalloc_socket(mem_name,
+                       sizeof(uint32_t) * dp->number_tbl8s,
+                       RTE_CACHE_LINE_SIZE, socket_id);
+       if (dp->tbl8_pool == NULL) {
+               rte_errno = ENOMEM;
+               rte_free(dp->tbl8);
+               rte_free(dp);
+               return NULL;
+       }
+
+       tbl8_pool_init(dp);
+
+       return dp;
+}
+
+void
+trie_free(void *p)
+{
+       struct rte_trie_tbl *dp = (struct rte_trie_tbl *)p;
+
+       rte_free(dp->tbl8_pool);
+       rte_free(dp->tbl8);
+       rte_free(dp);
+}
+
diff --git a/lib/librte_fib/trie.h b/lib/librte_fib/trie.h
new file mode 100644 (file)
index 0000000..7762fb9
--- /dev/null
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv@gmail.com>
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _TRIE_H_
+#define _TRIE_H_
+
+/**
+ * @file
+ * RTE IPv6 Longest Prefix Match (LPM)
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void *
+trie_create(const char *name, int socket_id, struct rte_fib6_conf *conf);
+
+void
+trie_free(void *p);
+
+rte_fib6_lookup_fn_t
+rte_trie_get_lookup_fn(struct rte_fib6_conf *fib_conf);
+
+int
+trie_modify(struct rte_fib6 *fib, const uint8_t ip[RTE_FIB6_IPV6_ADDR_SIZE],
+       uint8_t depth, uint64_t next_hop, int op);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TRIE_H_ */
+