acl: new library
authorKonstantin Ananyev <konstantin.ananyev@intel.com>
Fri, 13 Jun 2014 11:26:50 +0000 (12:26 +0100)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Fri, 13 Jun 2014 23:29:45 +0000 (01:29 +0200)
The ACL library is used to perform an N-tuple search over a set of rules with
multiple categories and find the best match for each category.

Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Tested-by: Waterman Cao <waterman.cao@intel.com>
Acked-by: Pablo de Lara Guarch <pablo.de.lara.guarch@intel.com>
[Thomas: some code-style changes]

17 files changed:
config/common_bsdapp
config/common_linuxapp
doc/doxy-api-index.md
doc/doxy-api.conf
lib/Makefile
lib/librte_acl/Makefile [new file with mode: 0644]
lib/librte_acl/acl.h [new file with mode: 0644]
lib/librte_acl/acl_bld.c [new file with mode: 0644]
lib/librte_acl/acl_gen.c [new file with mode: 0644]
lib/librte_acl/acl_run.c [new file with mode: 0644]
lib/librte_acl/acl_vect.h [new file with mode: 0644]
lib/librte_acl/rte_acl.c [new file with mode: 0644]
lib/librte_acl/rte_acl.h [new file with mode: 0644]
lib/librte_acl/rte_acl_osdep.h [new file with mode: 0644]
lib/librte_acl/rte_acl_osdep_alone.h [new file with mode: 0644]
lib/librte_acl/tb_mem.c [new file with mode: 0644]
lib/librte_acl/tb_mem.h [new file with mode: 0644]

index ec7b159..568d64b 100644 (file)
@@ -245,6 +245,13 @@ CONFIG_RTE_LIBRTE_HASH_DEBUG=n
 CONFIG_RTE_LIBRTE_LPM=y
 CONFIG_RTE_LIBRTE_LPM_DEBUG=n
 
+#
+# Compile librte_acl
+#
+CONFIG_RTE_LIBRTE_ACL=y
+CONFIG_RTE_LIBRTE_ACL_DEBUG=n
+CONFIG_RTE_LIBRTE_ACL_STANDALONE=n
+
 #
 # Compile librte_power
 #
index 7c143eb..8979828 100644 (file)
@@ -279,6 +279,13 @@ CONFIG_RTE_LIBRTE_HASH_DEBUG=n
 CONFIG_RTE_LIBRTE_LPM=y
 CONFIG_RTE_LIBRTE_LPM_DEBUG=n
 
+#
+# Compile librte_acl
+#
+CONFIG_RTE_LIBRTE_ACL=y
+CONFIG_RTE_LIBRTE_ACL_DEBUG=n
+CONFIG_RTE_LIBRTE_ACL_STANDALONE=n
+
 #
 # Compile librte_power
 #
index 6e75a6e..83303a1 100644 (file)
@@ -78,7 +78,8 @@ There are many libraries, so their headers may be grouped by topics:
   [SCTP]               (@ref rte_sctp.h),
   [TCP]                (@ref rte_tcp.h),
   [UDP]                (@ref rte_udp.h),
-  [LPM route]          (@ref rte_lpm.h)
+  [LPM route]          (@ref rte_lpm.h),
+  [ACL]                (@ref rte_acl.h)
 
 - **QoS**:
   [metering]           (@ref rte_meter.h),
index 9df7356..e5a8520 100644 (file)
@@ -31,6 +31,7 @@
 PROJECT_NAME            = DPDK
 INPUT                   = doc/doxy-api-index.md \
                           lib/librte_eal/common/include \
+                          lib/librte_acl \
                           lib/librte_distributor \
                           lib/librte_ether \
                           lib/librte_hash \
index 8cdfa7d..a9f94b4 100644 (file)
@@ -49,11 +49,11 @@ DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += librte_pmd_vmxnet3
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt
 DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
+DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
 DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net
 DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power
 DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter
 DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched
-DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
 DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs
 DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += librte_distributor
 
diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile
new file mode 100644 (file)
index 0000000..4fe4593
--- /dev/null
@@ -0,0 +1,60 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_acl.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_ACL) += tb_mem.c
+
+SRCS-$(CONFIG_RTE_LIBRTE_ACL) += rte_acl.c
+SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_bld.c
+SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_gen.c
+SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_ACL)-include := rte_acl_osdep.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_ACL)-include += rte_acl.h
+
+ifeq ($(CONFIG_RTE_LIBRTE_ACL_STANDALONE),y)
+# standalone build
+SYMLINK-$(CONFIG_RTE_LIBRTE_ACL)-include += rte_acl_osdep_alone.h
+else
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_ACL) += lib/librte_eal lib/librte_malloc
+endif
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h
new file mode 100644 (file)
index 0000000..e6d7985
--- /dev/null
@@ -0,0 +1,182 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef        _ACL_H_
+#define        _ACL_H_
+
+#ifdef __cplusplus
+extern"C" {
+#endif /* __cplusplus */
+
+#define RTE_ACL_QUAD_MAX       5
+#define RTE_ACL_QUAD_SIZE      4
+#define RTE_ACL_QUAD_SINGLE    UINT64_C(0x7f7f7f7f00000000)
+
+#define RTE_ACL_SINGLE_TRIE_SIZE       2000
+
+#define RTE_ACL_DFA_MAX                UINT8_MAX
+#define RTE_ACL_DFA_SIZE       (UINT8_MAX + 1)
+
+typedef int bits_t;
+
+#define        RTE_ACL_BIT_SET_SIZE    ((UINT8_MAX + 1) / (sizeof(bits_t) * CHAR_BIT))
+
+struct rte_acl_bitset {
+       bits_t             bits[RTE_ACL_BIT_SET_SIZE];
+};
+
+#define        RTE_ACL_NODE_DFA        (0 << RTE_ACL_TYPE_SHIFT)
+#define        RTE_ACL_NODE_SINGLE     (1U << RTE_ACL_TYPE_SHIFT)
+#define        RTE_ACL_NODE_QEXACT     (2U << RTE_ACL_TYPE_SHIFT)
+#define        RTE_ACL_NODE_QRANGE     (3U << RTE_ACL_TYPE_SHIFT)
+#define        RTE_ACL_NODE_MATCH      (4U << RTE_ACL_TYPE_SHIFT)
+#define        RTE_ACL_NODE_TYPE       (7U << RTE_ACL_TYPE_SHIFT)
+#define        RTE_ACL_NODE_UNDEFINED  UINT32_MAX
+
+/*
+ * Structure of a node is a set of ptrs and each ptr has a bit map
+ * of values associated with this transition.
+ */
+struct rte_acl_ptr_set {
+       struct rte_acl_bitset values;   /* input values associated with ptr */
+       struct rte_acl_node  *ptr;      /* transition to next node */
+};
+
+struct rte_acl_classifier_results {
+       int results[RTE_ACL_MAX_CATEGORIES];
+};
+
+struct rte_acl_match_results {
+       uint32_t results[RTE_ACL_MAX_CATEGORIES];
+       int32_t priority[RTE_ACL_MAX_CATEGORIES];
+};
+
+struct rte_acl_node {
+       uint64_t node_index;  /* index for this node */
+       uint32_t level;       /* level 0-n in the trie */
+       uint32_t ref_count;   /* ref count for this node */
+       struct rte_acl_bitset  values;
+       /* set of all values that map to another node
+        * (union of bits in each transition.
+        */
+       uint32_t                num_ptrs; /* number of ptr_set in use */
+       uint32_t                max_ptrs; /* number of allocated ptr_set */
+       uint32_t                min_add;  /* number of ptr_set per allocation */
+       struct rte_acl_ptr_set *ptrs;     /* transitions array for this node */
+       int32_t                 match_flag;
+       int32_t                 match_index; /* index to match data */
+       uint32_t                node_type;
+       int32_t                 fanout;
+       /* number of ranges (transitions w/ consecutive bits) */
+       int32_t                 id;
+       struct rte_acl_match_results *mrt; /* only valid when match_flag != 0 */
+       char                         transitions[RTE_ACL_QUAD_SIZE];
+       /* boundaries for ranged node */
+       struct rte_acl_node     *next;
+       /* free list link or pointer to duplicate node during merge */
+       struct rte_acl_node     *prev;
+       /* points to node from which this node was duplicated */
+
+       uint32_t                subtree_id;
+       uint32_t                subtree_ref_count;
+
+};
+enum {
+       RTE_ACL_SUBTREE_NODE = 0x80000000
+};
+
+/*
+ * Types of tries used to generate runtime structure(s)
+ */
+enum {
+       RTE_ACL_FULL_TRIE = 0,
+       RTE_ACL_NOSRC_TRIE = 1,
+       RTE_ACL_NODST_TRIE = 2,
+       RTE_ACL_NOPORTS_TRIE = 4,
+       RTE_ACL_NOVLAN_TRIE = 8,
+       RTE_ACL_UNUSED_TRIE = 0x80000000
+};
+
+
+/** MAX number of tries per one ACL context.*/
+#define RTE_ACL_MAX_TRIES      8
+
+/** Max number of characters in PM name.*/
+#define RTE_ACL_NAMESIZE       32
+
+
+struct rte_acl_trie {
+       uint32_t        type;
+       uint32_t        count;
+       int32_t         smallest;  /* smallest rule in this trie */
+       uint32_t        root_index;
+       const uint32_t *data_index;
+       uint32_t        num_data_indexes;
+};
+
+struct rte_acl_bld_trie {
+       struct rte_acl_node *trie;
+};
+
+struct rte_acl_ctx {
+       TAILQ_ENTRY(rte_acl_ctx) next;    /**< Next in list. */
+       char                name[RTE_ACL_NAMESIZE];
+       /** Name of the ACL context. */
+       int32_t             socket_id;
+       /** Socket ID to allocate memory from. */
+       void               *rules;
+       uint32_t            max_rules;
+       uint32_t            rule_sz;
+       uint32_t            num_rules;
+       uint32_t            num_categories;
+       uint32_t            num_tries;
+       uint32_t            match_index;
+       uint64_t            no_match;
+       uint64_t            idle;
+       uint64_t           *trans_table;
+       uint32_t           *data_indexes;
+       struct rte_acl_trie trie[RTE_ACL_MAX_TRIES];
+       void               *mem;
+       size_t              mem_sz;
+       struct rte_acl_config config; /* copy of build config. */
+};
+
+int rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
+       struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
+       uint32_t num_categories, uint32_t data_index_sz, int match_num);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _ACL_H_ */
diff --git a/lib/librte_acl/acl_bld.c b/lib/librte_acl/acl_bld.c
new file mode 100644 (file)
index 0000000..fe7b824
--- /dev/null
@@ -0,0 +1,2008 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_acl.h>
+#include "tb_mem.h"
+#include "acl.h"
+
+#define        ACL_POOL_ALIGN          8
+#define        ACL_POOL_ALLOC_MIN      0x800000
+
+/* number of pointers per alloc */
+#define ACL_PTR_ALLOC  32
+
+/* variable for dividing rule sets */
+#define NODE_MAX       2500
+#define NODE_PERCENTAGE        (0.40)
+#define RULE_PERCENTAGE        (0.40)
+
+/* TALLY are statistics per field */
+enum {
+       TALLY_0 = 0,        /* number of rules that are 0% or more wild. */
+       TALLY_25,           /* number of rules that are 25% or more wild. */
+       TALLY_50,
+       TALLY_75,
+       TALLY_100,
+       TALLY_DEACTIVATED, /* deactivated fields (100% wild in all rules). */
+       TALLY_DEPTH,
+       /* number of rules that are 100% wild for this field and higher. */
+       TALLY_NUM
+};
+
+static const uint32_t wild_limits[TALLY_DEACTIVATED] = {0, 25, 50, 75, 100};
+
+enum {
+       ACL_INTERSECT_NONE = 0,
+       ACL_INTERSECT_A = 1,    /* set A is a superset of A and B intersect */
+       ACL_INTERSECT_B = 2,    /* set B is a superset of A and B intersect */
+       ACL_INTERSECT = 4,      /* sets A and B intersect */
+};
+
+enum {
+       ACL_PRIORITY_EQUAL = 0,
+       ACL_PRIORITY_NODE_A = 1,
+       ACL_PRIORITY_NODE_B = 2,
+       ACL_PRIORITY_MIXED = 3
+};
+
+
+struct acl_mem_block {
+       uint32_t block_size;
+       void     *mem_ptr;
+};
+
+#define        MEM_BLOCK_NUM   16
+
+/* Single ACL rule, build representation.*/
+struct rte_acl_build_rule {
+       struct rte_acl_build_rule   *next;
+       struct rte_acl_config       *config;
+       /**< configuration for each field in the rule. */
+       const struct rte_acl_rule   *f;
+       uint32_t                    *wildness;
+};
+
+/* Context for build phase */
+struct acl_build_context {
+       const struct rte_acl_ctx *acx;
+       struct rte_acl_build_rule *build_rules;
+       struct rte_acl_config     cfg;
+       uint32_t                  node;
+       uint32_t                  num_nodes;
+       uint32_t                  category_mask;
+       uint32_t                  num_rules;
+       uint32_t                  node_id;
+       uint32_t                  src_mask;
+       uint32_t                  num_build_rules;
+       uint32_t                  num_tries;
+       struct tb_mem_pool        pool;
+       struct rte_acl_trie       tries[RTE_ACL_MAX_TRIES];
+       struct rte_acl_bld_trie   bld_tries[RTE_ACL_MAX_TRIES];
+       uint32_t            data_indexes[RTE_ACL_MAX_TRIES][RTE_ACL_MAX_FIELDS];
+
+       /* memory free lists for nodes and blocks used for node ptrs */
+       struct acl_mem_block      blocks[MEM_BLOCK_NUM];
+       struct rte_acl_node       *node_free_list;
+};
+
+static int acl_merge_trie(struct acl_build_context *context,
+       struct rte_acl_node *node_a, struct rte_acl_node *node_b,
+       uint32_t level, uint32_t subtree_id, struct rte_acl_node **node_c);
+
+static int acl_merge(struct acl_build_context *context,
+       struct rte_acl_node *node_a, struct rte_acl_node *node_b,
+       int move, int a_subset, int level);
+
+static void
+acl_deref_ptr(struct acl_build_context *context,
+       struct rte_acl_node *node, int index);
+
+static void *
+acl_build_alloc(struct acl_build_context *context, size_t n, size_t s)
+{
+       uint32_t m;
+       void *p;
+       size_t alloc_size = n * s;
+
+       /*
+        * look for memory in free lists
+        */
+       for (m = 0; m < RTE_DIM(context->blocks); m++) {
+               if (context->blocks[m].block_size ==
+                  alloc_size && context->blocks[m].mem_ptr != NULL) {
+                       p = context->blocks[m].mem_ptr;
+                       context->blocks[m].mem_ptr = *((void **)p);
+                       memset(p, 0, alloc_size);
+                       return p;
+               }
+       }
+
+       /*
+        * return allocation from memory pool
+        */
+       p = tb_alloc(&context->pool, alloc_size);
+       return p;
+}
+
+/*
+ * Free memory blocks (kept in context for reuse).
+ */
+static void
+acl_build_free(struct acl_build_context *context, size_t s, void *p)
+{
+       uint32_t n;
+
+       for (n = 0; n < RTE_DIM(context->blocks); n++) {
+               if (context->blocks[n].block_size == s) {
+                       *((void **)p) = context->blocks[n].mem_ptr;
+                       context->blocks[n].mem_ptr = p;
+                       return;
+               }
+       }
+       for (n = 0; n < RTE_DIM(context->blocks); n++) {
+               if (context->blocks[n].block_size == 0) {
+                       context->blocks[n].block_size = s;
+                       *((void **)p) = NULL;
+                       context->blocks[n].mem_ptr = p;
+                       return;
+               }
+       }
+}
+
+/*
+ * Allocate and initialize a new node.
+ */
+static struct rte_acl_node *
+acl_alloc_node(struct acl_build_context *context, int level)
+{
+       struct rte_acl_node *node;
+
+       if (context->node_free_list != NULL) {
+               node = context->node_free_list;
+               context->node_free_list = node->next;
+               memset(node, 0, sizeof(struct rte_acl_node));
+       } else {
+               node = acl_build_alloc(context, sizeof(struct rte_acl_node), 1);
+       }
+
+       if (node != NULL) {
+               node->num_ptrs = 0;
+               node->level = level;
+               node->node_type = RTE_ACL_NODE_UNDEFINED;
+               node->node_index = RTE_ACL_NODE_UNDEFINED;
+               context->num_nodes++;
+               node->id = context->node_id++;
+       }
+       return node;
+}
+
+/*
+ * Dereference all nodes to which this node points
+ */
+static void
+acl_free_node(struct acl_build_context *context,
+       struct rte_acl_node *node)
+{
+       uint32_t n;
+
+       if (node->prev != NULL)
+               node->prev->next = NULL;
+       for (n = 0; n < node->num_ptrs; n++)
+               acl_deref_ptr(context, node, n);
+
+       /* free mrt if this is a match node */
+       if (node->mrt != NULL) {
+               acl_build_free(context, sizeof(struct rte_acl_match_results),
+                       node->mrt);
+               node->mrt = NULL;
+       }
+
+       /* free transitions to other nodes */
+       if (node->ptrs != NULL) {
+               acl_build_free(context,
+                       node->max_ptrs * sizeof(struct rte_acl_ptr_set),
+                       node->ptrs);
+               node->ptrs = NULL;
+       }
+
+       /* put it on the free list */
+       context->num_nodes--;
+       node->next = context->node_free_list;
+       context->node_free_list = node;
+}
+
+
+/*
+ * Include src bitset in dst bitset
+ */
+static void
+acl_include(struct rte_acl_bitset *dst, struct rte_acl_bitset *src, bits_t mask)
+{
+       uint32_t n;
+
+       for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++)
+               dst->bits[n] = (dst->bits[n] & mask) | src->bits[n];
+}
+
+/*
+ * Set dst to bits of src1 that are not in src2
+ */
+static int
+acl_exclude(struct rte_acl_bitset *dst,
+       struct rte_acl_bitset *src1,
+       struct rte_acl_bitset *src2)
+{
+       uint32_t n;
+       bits_t all_bits = 0;
+
+       for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++) {
+               dst->bits[n] = src1->bits[n] & ~src2->bits[n];
+               all_bits |= dst->bits[n];
+       }
+       return all_bits != 0;
+}
+
+/*
+ * Add a pointer (ptr) to a node.
+ */
+static int
+acl_add_ptr(struct acl_build_context *context,
+       struct rte_acl_node *node,
+       struct rte_acl_node *ptr,
+       struct rte_acl_bitset *bits)
+{
+       uint32_t n, num_ptrs;
+       struct rte_acl_ptr_set *ptrs = NULL;
+
+       /*
+        * If there's already a pointer to the same node, just add to the bitset
+        */
+       for (n = 0; n < node->num_ptrs; n++) {
+               if (node->ptrs[n].ptr != NULL) {
+                       if (node->ptrs[n].ptr == ptr) {
+                               acl_include(&node->ptrs[n].values, bits, -1);
+                               acl_include(&node->values, bits, -1);
+                               return 0;
+                       }
+               }
+       }
+
+       /* if there's no room for another pointer, make room */
+       if (node->num_ptrs >= node->max_ptrs) {
+               /* add room for more pointers */
+               num_ptrs = node->max_ptrs + ACL_PTR_ALLOC;
+               ptrs = acl_build_alloc(context, num_ptrs, sizeof(*ptrs));
+               if (ptrs == NULL)
+                       return -ENOMEM;
+
+               /* copy current points to new memory allocation */
+               if (node->ptrs != NULL) {
+                       memcpy(ptrs, node->ptrs,
+                               node->num_ptrs * sizeof(*ptrs));
+                       acl_build_free(context, node->max_ptrs * sizeof(*ptrs),
+                               node->ptrs);
+               }
+               node->ptrs = ptrs;
+               node->max_ptrs = num_ptrs;
+       }
+
+       /* Find available ptr and add a new pointer to this node */
+       for (n = node->min_add; n < node->max_ptrs; n++) {
+               if (node->ptrs[n].ptr == NULL) {
+                       node->ptrs[n].ptr = ptr;
+                       acl_include(&node->ptrs[n].values, bits, 0);
+                       acl_include(&node->values, bits, -1);
+                       if (ptr != NULL)
+                               ptr->ref_count++;
+                       if (node->num_ptrs <= n)
+                               node->num_ptrs = n + 1;
+                       return 0;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Add a pointer for a range of values
+ */
+static int
+acl_add_ptr_range(struct acl_build_context *context,
+       struct rte_acl_node *root,
+       struct rte_acl_node *node,
+       uint8_t low,
+       uint8_t high)
+{
+       uint32_t n;
+       struct rte_acl_bitset bitset;
+
+       /* clear the bitset values */
+       for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++)
+               bitset.bits[n] = 0;
+
+       /* for each bit in range, add bit to set */
+       for (n = 0; n < UINT8_MAX + 1; n++)
+               if (n >= low && n <= high)
+                       bitset.bits[n / (sizeof(bits_t) * 8)] |=
+                               1 << (n % (sizeof(bits_t) * 8));
+
+       return acl_add_ptr(context, root, node, &bitset);
+}
+
+/*
+ * Generate a bitset from a byte value and mask.
+ */
+static int
+acl_gen_mask(struct rte_acl_bitset *bitset, uint32_t value, uint32_t mask)
+{
+       int range = 0;
+       uint32_t n;
+
+       /* clear the bitset values */
+       for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++)
+               bitset->bits[n] = 0;
+
+       /* for each bit in value/mask, add bit to set */
+       for (n = 0; n < UINT8_MAX + 1; n++) {
+               if ((n & mask) == value) {
+                       range++;
+                       bitset->bits[n / (sizeof(bits_t) * 8)] |=
+                               1 << (n % (sizeof(bits_t) * 8));
+               }
+       }
+       return range;
+}
+
+/*
+ * Determine how A and B intersect.
+ * Determine if A and/or B are supersets of the intersection.
+ */
+static int
+acl_intersect_type(struct rte_acl_bitset *a_bits,
+       struct rte_acl_bitset *b_bits,
+       struct rte_acl_bitset *intersect)
+{
+       uint32_t n;
+       bits_t intersect_bits = 0;
+       bits_t a_superset = 0;
+       bits_t b_superset = 0;
+
+       /*
+        * calculate and store intersection and check if A and/or B have
+        * bits outside the intersection (superset)
+        */
+       for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++) {
+               intersect->bits[n] = a_bits->bits[n] & b_bits->bits[n];
+               a_superset |= a_bits->bits[n] ^ intersect->bits[n];
+               b_superset |= b_bits->bits[n] ^ intersect->bits[n];
+               intersect_bits |= intersect->bits[n];
+       }
+
+       n = (intersect_bits == 0 ? ACL_INTERSECT_NONE : ACL_INTERSECT) |
+               (b_superset == 0 ? 0 : ACL_INTERSECT_B) |
+               (a_superset == 0 ? 0 : ACL_INTERSECT_A);
+
+       return n;
+}
+
+/*
+ * Check if all bits in the bitset are on
+ */
+static int
+acl_full(struct rte_acl_node *node)
+{
+       uint32_t n;
+       bits_t all_bits = -1;
+
+       for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++)
+               all_bits &= node->values.bits[n];
+       return all_bits == -1;
+}
+
+/*
+ * Check if all bits in the bitset are off
+ */
+static int
+acl_empty(struct rte_acl_node *node)
+{
+       uint32_t n;
+
+       if (node->ref_count == 0) {
+               for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++) {
+                       if (0 != node->values.bits[n])
+                               return 0;
+               }
+               return 1;
+       } else {
+               return 0;
+       }
+}
+
+/*
+ * Compute intersection of A and B
+ * return 1 if there is an intersection else 0.
+ */
+static int
+acl_intersect(struct rte_acl_bitset *a_bits,
+       struct rte_acl_bitset *b_bits,
+       struct rte_acl_bitset *intersect)
+{
+       uint32_t n;
+       bits_t all_bits = 0;
+
+       for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++) {
+               intersect->bits[n] = a_bits->bits[n] & b_bits->bits[n];
+               all_bits |= intersect->bits[n];
+       }
+       return all_bits != 0;
+}
+
+/*
+ * Duplicate a node
+ */
+static struct rte_acl_node *
+acl_dup_node(struct acl_build_context *context, struct rte_acl_node *node)
+{
+       uint32_t n;
+       struct rte_acl_node *next;
+
+       next = acl_alloc_node(context, node->level);
+       if (next == NULL)
+               return NULL;
+
+       /* allocate the pointers */
+       if (node->num_ptrs > 0) {
+               next->ptrs = acl_build_alloc(context,
+                       node->max_ptrs,
+                       sizeof(struct rte_acl_ptr_set));
+               if (next->ptrs == NULL)
+                       return NULL;
+               next->max_ptrs = node->max_ptrs;
+       }
+
+       /* copy over the pointers */
+       for (n = 0; n < node->num_ptrs; n++) {
+               if (node->ptrs[n].ptr != NULL) {
+                       next->ptrs[n].ptr = node->ptrs[n].ptr;
+                       next->ptrs[n].ptr->ref_count++;
+                       acl_include(&next->ptrs[n].values,
+                               &node->ptrs[n].values, -1);
+               }
+       }
+
+       next->num_ptrs = node->num_ptrs;
+
+       /* copy over node's match results */
+       if (node->match_flag == 0)
+               next->match_flag = 0;
+       else {
+               next->match_flag = -1;
+               next->mrt = acl_build_alloc(context, 1, sizeof(*next->mrt));
+               memcpy(next->mrt, node->mrt, sizeof(*next->mrt));
+       }
+
+       /* copy over node's bitset */
+       acl_include(&next->values, &node->values, -1);
+
+       node->next = next;
+       next->prev = node;
+
+       return next;
+}
+
+/*
+ * Dereference a pointer from a node
+ */
+static void
+acl_deref_ptr(struct acl_build_context *context,
+       struct rte_acl_node *node, int index)
+{
+       struct rte_acl_node *ref_node;
+
+       /* De-reference the node at the specified pointer */
+       if (node != NULL && node->ptrs[index].ptr != NULL) {
+               ref_node = node->ptrs[index].ptr;
+               ref_node->ref_count--;
+               if (ref_node->ref_count == 0)
+                       acl_free_node(context, ref_node);
+       }
+}
+
+/*
+ * Exclude bitset from a node pointer
+ * returns  0 if poiter was deref'd
+ *          1 otherwise.
+ */
+static int
+acl_exclude_ptr(struct acl_build_context *context,
+       struct rte_acl_node *node,
+       int index,
+       struct rte_acl_bitset *b_bits)
+{
+       int retval = 1;
+
+       /*
+        * remove bitset from node pointer and deref
+        * if the bitset becomes empty.
+        */
+       if (!acl_exclude(&node->ptrs[index].values,
+                       &node->ptrs[index].values,
+                       b_bits)) {
+               acl_deref_ptr(context, node, index);
+               node->ptrs[index].ptr = NULL;
+               retval = 0;
+       }
+
+       /* exclude bits from the composite bits for the node */
+       acl_exclude(&node->values, &node->values, b_bits);
+       return retval;
+}
+
+/*
+ * Remove a bitset from src ptr and move remaining ptr to dst
+ */
+static int
+acl_move_ptr(struct acl_build_context *context,
+       struct rte_acl_node *dst,
+       struct rte_acl_node *src,
+       int index,
+       struct rte_acl_bitset *b_bits)
+{
+       int rc;
+
+       if (b_bits != NULL)
+               if (!acl_exclude_ptr(context, src, index, b_bits))
+                       return 0;
+
+       /* add src pointer to dst node */
+       rc = acl_add_ptr(context, dst, src->ptrs[index].ptr,
+                       &src->ptrs[index].values);
+       if (rc < 0)
+               return rc;
+
+       /* remove ptr from src */
+       acl_exclude_ptr(context, src, index, &src->ptrs[index].values);
+       return 1;
+}
+
+/*
+ * acl_exclude rte_acl_bitset from src and copy remaining pointer to dst
+ */
+static int
+acl_copy_ptr(struct acl_build_context *context,
+       struct rte_acl_node *dst,
+       struct rte_acl_node *src,
+       int index,
+       struct rte_acl_bitset *b_bits)
+{
+       int rc;
+       struct rte_acl_bitset bits;
+
+       if (b_bits != NULL)
+               if (!acl_exclude(&bits, &src->ptrs[index].values, b_bits))
+                       return 0;
+
+       rc = acl_add_ptr(context, dst, src->ptrs[index].ptr, &bits);
+       if (rc < 0)
+               return rc;
+       return 1;
+}
+
+/*
+ * Fill in gaps in ptrs list with the ptr at the end of the list
+ */
+static void
+acl_compact_node_ptrs(struct rte_acl_node *node_a)
+{
+       uint32_t n;
+       int min_add = node_a->min_add;
+
+       while (node_a->num_ptrs > 0  &&
+                       node_a->ptrs[node_a->num_ptrs - 1].ptr == NULL)
+               node_a->num_ptrs--;
+
+       for (n = min_add; n + 1 < node_a->num_ptrs; n++) {
+
+               /* if this entry is empty */
+               if (node_a->ptrs[n].ptr == NULL) {
+
+                       /* move the last pointer to this entry */
+                       acl_include(&node_a->ptrs[n].values,
+                               &node_a->ptrs[node_a->num_ptrs - 1].values,
+                               0);
+                       node_a->ptrs[n].ptr =
+                               node_a->ptrs[node_a->num_ptrs - 1].ptr;
+
+                       /*
+                        * mark the end as empty and adjust the number
+                        * of used pointer enum_tries
+                        */
+                       node_a->ptrs[node_a->num_ptrs - 1].ptr = NULL;
+                       while (node_a->num_ptrs > 0  &&
+                               node_a->ptrs[node_a->num_ptrs - 1].ptr == NULL)
+                               node_a->num_ptrs--;
+               }
+       }
+}
+
+/*
+ * acl_merge helper routine.
+ */
+static int
+acl_merge_intersect(struct acl_build_context *context,
+       struct rte_acl_node *node_a, uint32_t idx_a,
+       struct rte_acl_node *node_b, uint32_t idx_b,
+       int next_move, int level,
+       struct rte_acl_bitset *intersect_ptr)
+{
+       struct rte_acl_node *node_c;
+
+       /* Duplicate A for intersection */
+       node_c = acl_dup_node(context, node_a->ptrs[idx_a].ptr);
+       if (node_c == NULL)
+               return -1;
+
+       /* Remove intersection from A */
+       acl_exclude_ptr(context, node_a, idx_a, intersect_ptr);
+
+       /*
+        * Added link from A to C for all transitions
+        * in the intersection
+        */
+       if (acl_add_ptr(context, node_a, node_c, intersect_ptr) < 0)
+               return -1;
+
+       /* merge B->node into C */
+       return acl_merge(context, node_c, node_b->ptrs[idx_b].ptr, next_move,
+               0, level + 1);
+}
+
+
+/*
+ * Merge the children of nodes A and B together.
+ *
+ * if match node
+ *     For each category
+ *             node A result = highest priority result
+ * if any pointers in A intersect with any in B
+ *     For each intersection
+ *             C = copy of node that A points to
+ *             remove intersection from A pointer
+ *             add a pointer to A that points to C for the intersection
+ *             Merge C and node that B points to
+ * Compact the pointers in A and B
+ * if move flag
+ *     If B has only one reference
+ *             Move B pointers to A
+ *     else
+ *             Copy B pointers to A
+ */
+static int
+acl_merge(struct acl_build_context *context,
+       struct rte_acl_node *node_a, struct rte_acl_node *node_b,
+       int move, int a_subset, int level)
+{
+       uint32_t n, m, ptrs_a, ptrs_b;
+       uint32_t min_add_a, min_add_b;
+       int intersect_type;
+       int node_intersect_type;
+       int b_full, next_move, rc;
+       struct rte_acl_bitset intersect_values;
+       struct rte_acl_bitset intersect_ptr;
+
+       min_add_a = 0;
+       min_add_b = 0;
+       intersect_type = 0;
+       node_intersect_type = 0;
+
+       if (level == 0)
+               a_subset = 1;
+
+       /*
+        *  Resolve match priorities
+        */
+       if (node_a->match_flag != 0 || node_b->match_flag != 0) {
+
+               if (node_a->match_flag == 0 || node_b->match_flag == 0)
+                       RTE_LOG(ERR, ACL, "Not both matches\n");
+
+               if (node_b->match_flag < node_a->match_flag)
+                       RTE_LOG(ERR, ACL, "Not same match\n");
+
+               for (n = 0; n < context->cfg.num_categories; n++) {
+                       if (node_a->mrt->priority[n] <
+                                       node_b->mrt->priority[n]) {
+                               node_a->mrt->priority[n] =
+                                       node_b->mrt->priority[n];
+                               node_a->mrt->results[n] =
+                                       node_b->mrt->results[n];
+                       }
+               }
+       }
+
+       /*
+        * If the two node transitions intersect then merge the transitions.
+        * Check intersection for entire node (all pointers)
+        */
+       node_intersect_type = acl_intersect_type(&node_a->values,
+               &node_b->values,
+               &intersect_values);
+
+       if (node_intersect_type & ACL_INTERSECT) {
+
+               b_full = acl_full(node_b);
+
+               min_add_b = node_b->min_add;
+               node_b->min_add = node_b->num_ptrs;
+               ptrs_b = node_b->num_ptrs;
+
+               min_add_a = node_a->min_add;
+               node_a->min_add = node_a->num_ptrs;
+               ptrs_a = node_a->num_ptrs;
+
+               for (n = 0; n < ptrs_a; n++) {
+                       for (m = 0; m < ptrs_b; m++) {
+
+                               if (node_a->ptrs[n].ptr == NULL ||
+                                               node_b->ptrs[m].ptr == NULL ||
+                                               node_a->ptrs[n].ptr ==
+                                               node_b->ptrs[m].ptr)
+                                               continue;
+
+                               intersect_type = acl_intersect_type(
+                                       &node_a->ptrs[n].values,
+                                       &node_b->ptrs[m].values,
+                                       &intersect_ptr);
+
+                               /* If this node is not a 'match' node */
+                               if ((intersect_type & ACL_INTERSECT) &&
+                                       (context->cfg.num_categories != 1 ||
+                                       !(node_a->ptrs[n].ptr->match_flag))) {
+
+                                       /*
+                                        * next merge is a 'move' pointer,
+                                        * if this one is and B is a
+                                        * subset of the intersection.
+                                        */
+                                       next_move = move &&
+                                               (intersect_type &
+                                               ACL_INTERSECT_B) == 0;
+
+                                       if (a_subset && b_full) {
+                                               rc = acl_merge(context,
+                                                       node_a->ptrs[n].ptr,
+                                                       node_b->ptrs[m].ptr,
+                                                       next_move,
+                                                       1, level + 1);
+                                               if (rc != 0)
+                                                       return rc;
+                                       } else {
+                                               rc = acl_merge_intersect(
+                                                       context, node_a, n,
+                                                       node_b, m, next_move,
+                                                       level, &intersect_ptr);
+                                               if (rc != 0)
+                                                       return rc;
+                                       }
+                               }
+                       }
+               }
+       }
+
+       /* Compact pointers */
+       node_a->min_add = min_add_a;
+       acl_compact_node_ptrs(node_a);
+       node_b->min_add = min_add_b;
+       acl_compact_node_ptrs(node_b);
+
+       /*
+        *  Either COPY or MOVE pointers from B to A
+        */
+       acl_intersect(&node_a->values, &node_b->values, &intersect_values);
+
+       if (move && node_b->ref_count == 1) {
+               for (m = 0; m < node_b->num_ptrs; m++) {
+                       if (node_b->ptrs[m].ptr != NULL &&
+                                       acl_move_ptr(context, node_a, node_b, m,
+                                       &intersect_values) < 0)
+                               return -1;
+               }
+       } else {
+               for (m = 0; m < node_b->num_ptrs; m++) {
+                       if (node_b->ptrs[m].ptr != NULL &&
+                                       acl_copy_ptr(context, node_a, node_b, m,
+                                       &intersect_values) < 0)
+                               return -1;
+               }
+       }
+
+       /*
+        *  Free node if its empty (no longer used)
+        */
+       if (acl_empty(node_b))
+               acl_free_node(context, node_b);
+       return 0;
+}
+
+static int
+acl_resolve_leaf(struct acl_build_context *context,
+       struct rte_acl_node *node_a,
+       struct rte_acl_node *node_b,
+       struct rte_acl_node **node_c)
+{
+       uint32_t n;
+       int combined_priority = ACL_PRIORITY_EQUAL;
+
+       for (n = 0; n < context->cfg.num_categories; n++) {
+               if (node_a->mrt->priority[n] != node_b->mrt->priority[n]) {
+                       combined_priority |= (node_a->mrt->priority[n] >
+                               node_b->mrt->priority[n]) ?
+                               ACL_PRIORITY_NODE_A : ACL_PRIORITY_NODE_B;
+               }
+       }
+
+       /*
+        * if node a is higher or equal priority for all categories,
+        * then return node_a.
+        */
+       if (combined_priority == ACL_PRIORITY_NODE_A ||
+                       combined_priority == ACL_PRIORITY_EQUAL) {
+               *node_c = node_a;
+               return 0;
+       }
+
+       /*
+        * if node b is higher or equal priority for all categories,
+        * then return node_b.
+        */
+       if (combined_priority == ACL_PRIORITY_NODE_B) {
+               *node_c = node_b;
+               return 0;
+       }
+
+       /*
+        * mixed priorities - create a new node with the highest priority
+        * for each category.
+        */
+
+       /* force new duplication. */
+       node_a->next = NULL;
+
+       *node_c = acl_dup_node(context, node_a);
+       for (n = 0; n < context->cfg.num_categories; n++) {
+               if ((*node_c)->mrt->priority[n] < node_b->mrt->priority[n]) {
+                       (*node_c)->mrt->priority[n] = node_b->mrt->priority[n];
+                       (*node_c)->mrt->results[n] = node_b->mrt->results[n];
+               }
+       }
+       return 0;
+}
+
+/*
+* Within the existing trie structure, determine which nodes are
+* part of the subtree of the trie to be merged.
+*
+* For these purposes, a subtree is defined as the set of nodes that
+* are 1) not a superset of the intersection with the same level of
+* the merging tree, and 2) do not have any references from a node
+* outside of the subtree.
+*/
+static void
+mark_subtree(struct rte_acl_node *node,
+       struct rte_acl_bitset *level_bits,
+       uint32_t level,
+       uint32_t id)
+{
+       uint32_t n;
+
+       /* mark this node as part of the subtree */
+       node->subtree_id = id | RTE_ACL_SUBTREE_NODE;
+
+       for (n = 0; n < node->num_ptrs; n++) {
+
+               if (node->ptrs[n].ptr != NULL) {
+
+                       struct rte_acl_bitset intersect_bits;
+                       int intersect;
+
+                       /*
+                       * Item 1) :
+                       * check if this child pointer is not a superset of the
+                       * same level of the merging tree.
+                       */
+                       intersect = acl_intersect_type(&node->ptrs[n].values,
+                               &level_bits[level],
+                               &intersect_bits);
+
+                       if ((intersect & ACL_INTERSECT_A) == 0) {
+
+                               struct rte_acl_node *child = node->ptrs[n].ptr;
+
+                               /*
+                                * reset subtree reference if this is
+                                * the first visit by this subtree.
+                                */
+                               if (child->subtree_id != id) {
+                                       child->subtree_id = id;
+                                       child->subtree_ref_count = 0;
+                               }
+
+                               /*
+                               * Item 2) :
+                               * increment the subtree reference count and if
+                               * all references are from this subtree then
+                               * recurse to that child
+                               */
+                               child->subtree_ref_count++;
+                               if (child->subtree_ref_count ==
+                                               child->ref_count)
+                                       mark_subtree(child, level_bits,
+                                               level + 1, id);
+                       }
+               }
+       }
+}
+
+/*
+ * Build the set of bits that define the set of transitions
+ * for each level of a trie.
+ */
+static void
+build_subset_mask(struct rte_acl_node *node,
+       struct rte_acl_bitset *level_bits,
+       int level)
+{
+       uint32_t n;
+
+       /* Add this node's transitions to the set for this level */
+       for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++)
+               level_bits[level].bits[n] &= node->values.bits[n];
+
+       /* For each child, add the transitions for the next level */
+       for (n = 0; n < node->num_ptrs; n++)
+               if (node->ptrs[n].ptr != NULL)
+                       build_subset_mask(node->ptrs[n].ptr, level_bits,
+                               level + 1);
+}
+
+
+/*
+ * Merge nodes A and B together,
+ *   returns a node that is the path for the intersection
+ *
+ * If match node (leaf on trie)
+ *     For each category
+ *             return node = highest priority result
+ *
+ * Create C as a duplicate of A to point to child intersections
+ * If any pointers in C intersect with any in B
+ *     For each intersection
+ *             merge children
+ *             remove intersection from C pointer
+ *             add a pointer from C to child intersection node
+ * Compact the pointers in A and B
+ * Copy any B pointers that are outside of the intersection to C
+ * If C has no references to the B trie
+ *   free C and return A
+ * Else If C has no references to the A trie
+ *   free C and return B
+ * Else
+ *   return C
+ */
+static int
+acl_merge_trie(struct acl_build_context *context,
+       struct rte_acl_node *node_a, struct rte_acl_node *node_b,
+       uint32_t level, uint32_t subtree_id, struct rte_acl_node **return_c)
+{
+       uint32_t n, m, ptrs_c, ptrs_b;
+       uint32_t min_add_c, min_add_b;
+       int node_intersect_type;
+       struct rte_acl_bitset node_intersect;
+       struct rte_acl_node *node_c;
+       struct rte_acl_node *node_a_next;
+       int node_b_refs;
+       int node_a_refs;
+
+       node_c = node_a;
+       node_a_next = node_a->next;
+       min_add_c = 0;
+       min_add_b = 0;
+       node_a_refs = node_a->num_ptrs;
+       node_b_refs = 0;
+       node_intersect_type = 0;
+
+       /* Resolve leaf nodes (matches) */
+       if (node_a->match_flag != 0) {
+               acl_resolve_leaf(context, node_a, node_b, return_c);
+               return 0;
+       }
+
+       /*
+        * Create node C as a copy of node A if node A is not part of
+        * a subtree of the merging tree (node B side). Otherwise,
+        * just use node A.
+        */
+       if (level > 0 &&
+                       node_a->subtree_id !=
+                       (subtree_id | RTE_ACL_SUBTREE_NODE)) {
+               node_c = acl_dup_node(context, node_a);
+               node_c->subtree_id = subtree_id | RTE_ACL_SUBTREE_NODE;
+       }
+
+       /*
+        * If the two node transitions intersect then merge the transitions.
+        * Check intersection for entire node (all pointers)
+        */
+       node_intersect_type = acl_intersect_type(&node_c->values,
+               &node_b->values,
+               &node_intersect);
+
+       if (node_intersect_type & ACL_INTERSECT) {
+
+               min_add_b = node_b->min_add;
+               node_b->min_add = node_b->num_ptrs;
+               ptrs_b = node_b->num_ptrs;
+
+               min_add_c = node_c->min_add;
+               node_c->min_add = node_c->num_ptrs;
+               ptrs_c = node_c->num_ptrs;
+
+               for (n = 0; n < ptrs_c; n++) {
+                       if (node_c->ptrs[n].ptr == NULL) {
+                               node_a_refs--;
+                               continue;
+                       }
+                       node_c->ptrs[n].ptr->next = NULL;
+                       for (m = 0; m < ptrs_b; m++) {
+
+                               struct rte_acl_bitset child_intersect;
+                               int child_intersect_type;
+                               struct rte_acl_node *child_node_c = NULL;
+
+                               if (node_b->ptrs[m].ptr == NULL ||
+                                               node_c->ptrs[n].ptr ==
+                                               node_b->ptrs[m].ptr)
+                                               continue;
+
+                               child_intersect_type = acl_intersect_type(
+                                       &node_c->ptrs[n].values,
+                                       &node_b->ptrs[m].values,
+                                       &child_intersect);
+
+                               if ((child_intersect_type & ACL_INTERSECT) !=
+                                               0) {
+                                       if (acl_merge_trie(context,
+                                                       node_c->ptrs[n].ptr,
+                                                       node_b->ptrs[m].ptr,
+                                                       level + 1, subtree_id,
+                                                       &child_node_c))
+                                               return 1;
+
+                                       if (child_node_c != NULL &&
+                                                       child_node_c !=
+                                                       node_c->ptrs[n].ptr) {
+
+                                               node_b_refs++;
+
+                                               /*
+                                                * Added link from C to
+                                                * child_C for all transitions
+                                                * in the intersection.
+                                                */
+                                               acl_add_ptr(context, node_c,
+                                                       child_node_c,
+                                                       &child_intersect);
+
+                                               /*
+                                                * inc refs if pointer is not
+                                                * to node b.
+                                                */
+                                               node_a_refs += (child_node_c !=
+                                                       node_b->ptrs[m].ptr);
+
+                                               /*
+                                                * Remove intersection from C
+                                                * pointer.
+                                                */
+                                               if (!acl_exclude(
+                                                       &node_c->ptrs[n].values,
+                                                       &node_c->ptrs[n].values,
+                                                       &child_intersect)) {
+                                                       acl_deref_ptr(context,
+                                                               node_c, n);
+                                                       node_c->ptrs[n].ptr =
+                                                               NULL;
+                                                       node_a_refs--;
+                                               }
+                                       }
+                               }
+                       }
+               }
+
+               /* Compact pointers */
+               node_c->min_add = min_add_c;
+               acl_compact_node_ptrs(node_c);
+               node_b->min_add = min_add_b;
+               acl_compact_node_ptrs(node_b);
+       }
+
+       /*
+        *  Copy pointers outside of the intersection from B to C
+        */
+       if ((node_intersect_type & ACL_INTERSECT_B) != 0) {
+               node_b_refs++;
+               for (m = 0; m < node_b->num_ptrs; m++)
+                       if (node_b->ptrs[m].ptr != NULL)
+                               acl_copy_ptr(context, node_c,
+                                       node_b, m, &node_intersect);
+       }
+
+       /*
+        * Free node C if top of trie is contained in A or B
+        *  if node C is a duplicate of node A &&
+        *     node C was not an existing duplicate
+        */
+       if (node_c != node_a && node_c != node_a_next) {
+
+               /*
+                * if the intersection has no references to the
+                * B side, then it is contained in A
+                */
+               if (node_b_refs == 0) {
+                       acl_free_node(context, node_c);
+                       node_c = node_a;
+               } else {
+                       /*
+                        * if the intersection has no references to the
+                        * A side, then it is contained in B.
+                        */
+                       if (node_a_refs == 0) {
+                               acl_free_node(context, node_c);
+                               node_c = node_b;
+                       }
+               }
+       }
+
+       if (return_c != NULL)
+               *return_c = node_c;
+
+       if (level == 0)
+               acl_free_node(context, node_b);
+
+       return 0;
+}
+
+/*
+ * Reset current runtime fields before next build:
+ *  - free allocated RT memory.
+ *  - reset all RT related fields to zero.
+ */
+static void
+acl_build_reset(struct rte_acl_ctx *ctx)
+{
+       rte_free(ctx->mem);
+       memset(&ctx->num_categories, 0,
+               sizeof(*ctx) - offsetof(struct rte_acl_ctx, num_categories));
+}
+
+static void
+acl_gen_range(struct acl_build_context *context,
+       const uint8_t *hi, const uint8_t *lo, int size, int level,
+       struct rte_acl_node *root, struct rte_acl_node *end)
+{
+       struct rte_acl_node *node, *prev;
+       uint32_t n;
+
+       prev = root;
+       for (n = size - 1; n > 0; n--) {
+               node = acl_alloc_node(context, level++);
+               acl_add_ptr_range(context, prev, node, lo[n], hi[n]);
+               prev = node;
+       }
+       acl_add_ptr_range(context, prev, end, lo[0], hi[0]);
+}
+
+static struct rte_acl_node *
+acl_gen_range_trie(struct acl_build_context *context,
+       const void *min, const void *max,
+       int size, int level, struct rte_acl_node **pend)
+{
+       int32_t n;
+       struct rte_acl_node *root;
+       const uint8_t *lo = (const uint8_t *)min;
+       const uint8_t *hi = (const uint8_t *)max;
+
+       *pend = acl_alloc_node(context, level+size);
+       root = acl_alloc_node(context, level++);
+
+       if (lo[size - 1] == hi[size - 1]) {
+               acl_gen_range(context, hi, lo, size, level, root, *pend);
+       } else {
+               uint8_t limit_lo[64];
+               uint8_t limit_hi[64];
+               uint8_t hi_ff = UINT8_MAX;
+               uint8_t lo_00 = 0;
+
+               memset(limit_lo, 0, RTE_DIM(limit_lo));
+               memset(limit_hi, UINT8_MAX, RTE_DIM(limit_hi));
+
+               for (n = size - 2; n >= 0; n--) {
+                       hi_ff = (uint8_t)(hi_ff & hi[n]);
+                       lo_00 = (uint8_t)(lo_00 | lo[n]);
+               }
+
+               if (hi_ff != UINT8_MAX) {
+                       limit_lo[size - 1] = hi[size - 1];
+                       acl_gen_range(context, hi, limit_lo, size, level,
+                               root, *pend);
+               }
+
+               if (lo_00 != 0) {
+                       limit_hi[size - 1] = lo[size - 1];
+                       acl_gen_range(context, limit_hi, lo, size, level,
+                               root, *pend);
+               }
+
+               if (hi[size - 1] - lo[size - 1] > 1 ||
+                               lo_00 == 0 ||
+                               hi_ff == UINT8_MAX) {
+                       limit_lo[size-1] = (uint8_t)(lo[size-1] + (lo_00 != 0));
+                       limit_hi[size-1] = (uint8_t)(hi[size-1] -
+                               (hi_ff != UINT8_MAX));
+                       acl_gen_range(context, limit_hi, limit_lo, size,
+                               level, root, *pend);
+               }
+       }
+       return root;
+}
+
+static struct rte_acl_node *
+acl_gen_mask_trie(struct acl_build_context *context,
+       const void *value, const void *mask,
+       int size, int level, struct rte_acl_node **pend)
+{
+       int32_t n;
+       struct rte_acl_node *root;
+       struct rte_acl_node *node, *prev;
+       struct rte_acl_bitset bits;
+       const uint8_t *val = (const uint8_t *)value;
+       const uint8_t *msk = (const uint8_t *)mask;
+
+       root = acl_alloc_node(context, level++);
+       prev = root;
+
+       for (n = size - 1; n >= 0; n--) {
+               node = acl_alloc_node(context, level++);
+               acl_gen_mask(&bits, val[n] & msk[n], msk[n]);
+               acl_add_ptr(context, prev, node, &bits);
+               prev = node;
+       }
+
+       *pend = prev;
+       return root;
+}
+
+static struct rte_acl_node *
+build_trie(struct acl_build_context *context, struct rte_acl_build_rule *head,
+       struct rte_acl_build_rule **last, uint32_t *count)
+{
+       uint32_t n, m;
+       int field_index, node_count;
+       struct rte_acl_node *trie;
+       struct rte_acl_build_rule *prev, *rule;
+       struct rte_acl_node *end, *merge, *root, *end_prev;
+       const struct rte_acl_field *fld;
+       struct rte_acl_bitset level_bits[RTE_ACL_MAX_LEVELS];
+
+       prev = head;
+       rule = head;
+
+       trie = acl_alloc_node(context, 0);
+       if (trie == NULL)
+               return NULL;
+
+       while (rule != NULL) {
+
+               root = acl_alloc_node(context, 0);
+               if (root == NULL)
+                       return NULL;
+
+               root->ref_count = 1;
+               end = root;
+
+               for (n = 0; n < rule->config->num_fields; n++) {
+
+                       field_index = rule->config->defs[n].field_index;
+                       fld = rule->f->field + field_index;
+                       end_prev = end;
+
+                       /* build a mini-trie for this field */
+                       switch (rule->config->defs[n].type) {
+
+                       case RTE_ACL_FIELD_TYPE_BITMASK:
+                               merge = acl_gen_mask_trie(context,
+                                       &fld->value,
+                                       &fld->mask_range,
+                                       rule->config->defs[n].size,
+                                       end->level + 1,
+                                       &end);
+                               break;
+
+                       case RTE_ACL_FIELD_TYPE_MASK:
+                       {
+                               /*
+                                * set msb for the size of the field and
+                                * all higher bits.
+                                */
+                               uint64_t mask;
+
+                               if (fld->mask_range.u32 == 0) {
+                                       mask = 0;
+
+                               /*
+                                * arithmetic right shift for the length of
+                                * the mask less the msb.
+                                */
+                               } else {
+                                       mask = -1 <<
+                                               (rule->config->defs[n].size *
+                                               CHAR_BIT - fld->mask_range.u32);
+                               }
+
+                               /* gen a mini-trie for this field */
+                               merge = acl_gen_mask_trie(context,
+                                       &fld->value,
+                                       (char *)&mask,
+                                       rule->config->defs[n].size,
+                                       end->level + 1,
+                                       &end);
+                       }
+                       break;
+
+                       case RTE_ACL_FIELD_TYPE_RANGE:
+                               merge = acl_gen_range_trie(context,
+                                       &rule->f->field[field_index].value,
+                                       &rule->f->field[field_index].mask_range,
+                                       rule->config->defs[n].size,
+                                       end->level + 1,
+                                       &end);
+                               break;
+
+                       default:
+                               RTE_LOG(ERR, ACL,
+                                       "Error in rule[%u] type - %hhu\n",
+                                       rule->f->data.userdata,
+                                       rule->config->defs[n].type);
+                               return NULL;
+                       }
+
+                       /* merge this field on to the end of the rule */
+                       if (acl_merge_trie(context, end_prev, merge, 0,
+                                       0, NULL) != 0) {
+                               return NULL;
+                       }
+               }
+
+               end->match_flag = ++context->num_build_rules;
+
+               /*
+                * Setup the results for this rule.
+                * The result and priority of each category.
+                */
+               if (end->mrt == NULL &&
+                               (end->mrt = acl_build_alloc(context, 1,
+                               sizeof(*end->mrt))) == NULL)
+                       return NULL;
+
+               for (m = 0; m < context->cfg.num_categories; m++) {
+                       if (rule->f->data.category_mask & (1 << m)) {
+                               end->mrt->results[m] = rule->f->data.userdata;
+                               end->mrt->priority[m] = rule->f->data.priority;
+                       } else {
+                               end->mrt->results[m] = 0;
+                               end->mrt->priority[m] = 0;
+                       }
+               }
+
+               node_count = context->num_nodes;
+
+               memset(&level_bits[0], UINT8_MAX, sizeof(level_bits));
+               build_subset_mask(root, &level_bits[0], 0);
+               mark_subtree(trie, &level_bits[0], 0, end->match_flag);
+               (*count)++;
+
+               /* merge this rule into the trie */
+               if (acl_merge_trie(context, trie, root, 0, end->match_flag,
+                       NULL))
+                       return NULL;
+
+               node_count = context->num_nodes - node_count;
+               if (node_count > NODE_MAX) {
+                       *last = prev;
+                       return trie;
+               }
+
+               prev = rule;
+               rule = rule->next;
+       }
+
+       *last = NULL;
+       return trie;
+}
+
+static int
+acl_calc_wildness(struct rte_acl_build_rule *head,
+       const struct rte_acl_config *config)
+{
+       uint32_t n;
+       struct rte_acl_build_rule *rule;
+
+       for (rule = head; rule != NULL; rule = rule->next) {
+
+               for (n = 0; n < config->num_fields; n++) {
+
+                       double wild = 0;
+                       double size = CHAR_BIT * config->defs[n].size;
+                       int field_index = config->defs[n].field_index;
+                       const struct rte_acl_field *fld = rule->f->field +
+                               field_index;
+
+                       switch (rule->config->defs[n].type) {
+                       case RTE_ACL_FIELD_TYPE_BITMASK:
+                               wild = (size -
+                                       _mm_popcnt_u32(fld->mask_range.u8)) /
+                                       size;
+                               break;
+
+                       case RTE_ACL_FIELD_TYPE_MASK:
+                               wild = (size - fld->mask_range.u32) / size;
+                               break;
+
+                       case RTE_ACL_FIELD_TYPE_RANGE:
+                               switch (rule->config->defs[n].size) {
+                               case sizeof(uint8_t):
+                                       wild = ((double)fld->mask_range.u8 -
+                                               fld->value.u8) / UINT8_MAX;
+                                       break;
+                               case sizeof(uint16_t):
+                                       wild = ((double)fld->mask_range.u16 -
+                                               fld->value.u16) / UINT16_MAX;
+                                       break;
+                               case sizeof(uint32_t):
+                                       wild = ((double)fld->mask_range.u32 -
+                                               fld->value.u32) / UINT32_MAX;
+                                       break;
+                               case sizeof(uint64_t):
+                                       wild = ((double)fld->mask_range.u64 -
+                                               fld->value.u64) / UINT64_MAX;
+                                       break;
+                               default:
+                                       RTE_LOG(ERR, ACL,
+                                               "%s(rule: %u) invalid %u-th "
+                                               "field, type: %hhu, "
+                                               "unknown size: %hhu\n",
+                                               __func__,
+                                               rule->f->data.userdata,
+                                               n,
+                                               rule->config->defs[n].type,
+                                               rule->config->defs[n].size);
+                                       return -EINVAL;
+                               }
+                               break;
+
+                       default:
+                               RTE_LOG(ERR, ACL,
+                                       "%s(rule: %u) invalid %u-th "
+                                       "field, unknown type: %hhu\n",
+                                       __func__,
+                                       rule->f->data.userdata,
+                                       n,
+                                       rule->config->defs[n].type);
+                                       return -EINVAL;
+
+                       }
+
+                       rule->wildness[field_index] = (uint32_t)(wild * 100);
+               }
+       }
+
+       return 0;
+}
+
+static int
+acl_rule_stats(struct rte_acl_build_rule *head, struct rte_acl_config *config,
+       uint32_t *wild_limit)
+{
+       int min;
+       struct rte_acl_build_rule *rule;
+       uint32_t n, m, fields_deactivated = 0;
+       uint32_t start = 0, deactivate = 0;
+       int tally[RTE_ACL_MAX_LEVELS][TALLY_NUM];
+
+       memset(tally, 0, sizeof(tally));
+
+       for (rule = head; rule != NULL; rule = rule->next) {
+
+               for (n = 0; n < config->num_fields; n++) {
+                       uint32_t field_index = config->defs[n].field_index;
+
+                       tally[n][TALLY_0]++;
+                       for (m = 1; m < RTE_DIM(wild_limits); m++) {
+                               if (rule->wildness[field_index] >=
+                                               wild_limits[m])
+                                       tally[n][m]++;
+                       }
+               }
+
+               for (n = config->num_fields - 1; n > 0; n--) {
+                       uint32_t field_index = config->defs[n].field_index;
+
+                       if (rule->wildness[field_index] == 100)
+                               tally[n][TALLY_DEPTH]++;
+                       else
+                               break;
+               }
+       }
+
+       /*
+        * Look for any field that is always wild and drop it from the config
+        * Only deactivate if all fields for a given input loop are deactivated.
+        */
+       for (n = 1; n < config->num_fields; n++) {
+               if (config->defs[n].input_index !=
+                               config->defs[n - 1].input_index) {
+                       for (m = start; m < n; m++)
+                               tally[m][TALLY_DEACTIVATED] = deactivate;
+                       fields_deactivated += deactivate;
+                       start = n;
+                       deactivate = 1;
+               }
+
+               /* if the field is not always completely wild */
+               if (tally[n][TALLY_100] != tally[n][TALLY_0])
+                       deactivate = 0;
+       }
+
+       for (m = start; m < n; m++)
+               tally[m][TALLY_DEACTIVATED] = deactivate;
+
+       fields_deactivated += deactivate;
+
+       /* remove deactivated fields */
+       if (fields_deactivated) {
+               uint32_t k, l = 0;
+
+               for (k = 0; k < config->num_fields; k++) {
+                       if (tally[k][TALLY_DEACTIVATED] == 0) {
+                               memcpy(&tally[l][0], &tally[k][0],
+                                       TALLY_NUM * sizeof(tally[0][0]));
+                               memcpy(&config->defs[l++],
+                                       &config->defs[k],
+                                       sizeof(struct rte_acl_field_def));
+                       }
+               }
+               config->num_fields = l;
+       }
+
+       min = RTE_ACL_SINGLE_TRIE_SIZE;
+       if (config->num_fields == 2)
+               min *= 4;
+       else if (config->num_fields == 3)
+               min *= 3;
+       else if (config->num_fields == 4)
+               min *= 2;
+
+       if (tally[0][TALLY_0] < min)
+               return 0;
+       for (n = 0; n < config->num_fields; n++)
+               wild_limit[n] = 0;
+
+       /*
+        * If trailing fields are 100% wild, group those together.
+        * This allows the search length of the trie to be shortened.
+        */
+       for (n = 1; n < config->num_fields; n++) {
+
+               double rule_percentage = (double)tally[n][TALLY_DEPTH] /
+                       tally[n][0];
+
+               if (rule_percentage > RULE_PERCENTAGE) {
+                       /* if it crosses an input boundary then round up */
+                       while (config->defs[n - 1].input_index ==
+                                       config->defs[n].input_index)
+                               n++;
+
+                       /* set the limit for selecting rules */
+                       while (n < config->num_fields)
+                               wild_limit[n++] = 100;
+
+                       if (wild_limit[n - 1] == 100)
+                               return 1;
+               }
+       }
+
+       /* look for the most wild that's 40% or more of the rules */
+       for (n = 1; n < config->num_fields; n++) {
+               for (m = TALLY_100; m > 0; m--) {
+
+                       double rule_percentage = (double)tally[n][m] /
+                               tally[n][0];
+
+                       if (tally[n][TALLY_DEACTIVATED] == 0 &&
+                                       tally[n][TALLY_0] >
+                                       RTE_ACL_SINGLE_TRIE_SIZE &&
+                                       rule_percentage > NODE_PERCENTAGE &&
+                                       rule_percentage < 0.80) {
+                               wild_limit[n] = wild_limits[m];
+                               return 1;
+                       }
+               }
+       }
+       return 0;
+}
+
+static int
+order(struct rte_acl_build_rule **insert, struct rte_acl_build_rule *rule)
+{
+       uint32_t n;
+       struct rte_acl_build_rule *left = *insert;
+
+       if (left == NULL)
+               return 0;
+
+       for (n = 1; n < left->config->num_fields; n++) {
+               int field_index = left->config->defs[n].field_index;
+
+               if (left->wildness[field_index] != rule->wildness[field_index])
+                       return (left->wildness[field_index] >=
+                               rule->wildness[field_index]);
+       }
+       return 0;
+}
+
+static struct rte_acl_build_rule *
+ordered_insert_rule(struct rte_acl_build_rule *head,
+       struct rte_acl_build_rule *rule)
+{
+       struct rte_acl_build_rule **insert;
+
+       if (rule == NULL)
+               return head;
+
+       rule->next = head;
+       if (head == NULL)
+               return rule;
+
+       insert = &head;
+       while (order(insert, rule))
+               insert = &(*insert)->next;
+
+       rule->next = *insert;
+       *insert = rule;
+       return head;
+}
+
+static struct rte_acl_build_rule *
+sort_rules(struct rte_acl_build_rule *head)
+{
+       struct rte_acl_build_rule *rule, *reordered_head = NULL;
+       struct rte_acl_build_rule *last_rule = NULL;
+
+       for (rule = head; rule != NULL; rule = rule->next) {
+               reordered_head = ordered_insert_rule(reordered_head, last_rule);
+               last_rule = rule;
+       }
+
+       if (last_rule != reordered_head)
+               reordered_head = ordered_insert_rule(reordered_head, last_rule);
+
+       return reordered_head;
+}
+
+static uint32_t
+acl_build_index(const struct rte_acl_config *config, uint32_t *data_index)
+{
+       uint32_t n, m;
+       int32_t last_header;
+
+       m = 0;
+       last_header = -1;
+
+       for (n = 0; n < config->num_fields; n++) {
+               if (last_header != config->defs[n].input_index) {
+                       last_header = config->defs[n].input_index;
+                       data_index[m++] = config->defs[n].offset;
+               }
+       }
+
+       return m;
+}
+
+static int
+acl_build_tries(struct acl_build_context *context,
+       struct rte_acl_build_rule *head)
+{
+       int32_t rc;
+       uint32_t n, m, num_tries;
+       struct rte_acl_config *config;
+       struct rte_acl_build_rule *last, *rule;
+       uint32_t wild_limit[RTE_ACL_MAX_LEVELS];
+       struct rte_acl_build_rule *rule_sets[RTE_ACL_MAX_TRIES];
+
+       config = head->config;
+       rule = head;
+       rule_sets[0] = head;
+       num_tries = 1;
+
+       /* initialize tries */
+       for (n = 0; n < RTE_DIM(context->tries); n++) {
+               context->tries[n].type = RTE_ACL_UNUSED_TRIE;
+               context->bld_tries[n].trie = NULL;
+               context->tries[n].count = 0;
+               context->tries[n].smallest = INT32_MAX;
+       }
+
+       context->tries[0].type = RTE_ACL_FULL_TRIE;
+
+       /* calc wildness of each field of each rule */
+       rc = acl_calc_wildness(head, config);
+       if (rc != 0)
+               return rc;
+
+       n = acl_rule_stats(head, config, &wild_limit[0]);
+
+       /* put all rules that fit the wildness criteria into a seperate trie */
+       while (n > 0 && num_tries < RTE_ACL_MAX_TRIES) {
+
+               struct rte_acl_config *new_config;
+               struct rte_acl_build_rule **prev = &rule_sets[num_tries - 1];
+               struct rte_acl_build_rule *next = head->next;
+
+               new_config = acl_build_alloc(context, 1, sizeof(*new_config));
+               if (new_config == NULL) {
+                       RTE_LOG(ERR, ACL,
+                               "Failed to geti space for new config\n");
+                       return -ENOMEM;
+               }
+
+               memcpy(new_config, config, sizeof(*new_config));
+               config = new_config;
+               rule_sets[num_tries] = NULL;
+
+               for (rule = head; rule != NULL; rule = next) {
+
+                       int move = 1;
+
+                       next = rule->next;
+                       for (m = 0; m < config->num_fields; m++) {
+                               int x = config->defs[m].field_index;
+                               if (rule->wildness[x] < wild_limit[m]) {
+                                       move = 0;
+                                       break;
+                               }
+                       }
+
+                       if (move) {
+                               rule->config = new_config;
+                               rule->next = rule_sets[num_tries];
+                               rule_sets[num_tries] = rule;
+                               *prev = next;
+                       } else
+                               prev = &rule->next;
+               }
+
+               head = rule_sets[num_tries];
+               n = acl_rule_stats(rule_sets[num_tries], config,
+                       &wild_limit[0]);
+               num_tries++;
+       }
+
+       if (n > 0)
+               RTE_LOG(DEBUG, ACL,
+                       "Number of tries(%d) exceeded.\n", RTE_ACL_MAX_TRIES);
+
+       for (n = 0; n < num_tries; n++) {
+
+               rule_sets[n] = sort_rules(rule_sets[n]);
+               context->tries[n].type = RTE_ACL_FULL_TRIE;
+               context->tries[n].count = 0;
+               context->tries[n].num_data_indexes =
+                       acl_build_index(rule_sets[n]->config,
+                       context->data_indexes[n]);
+               context->tries[n].data_index = context->data_indexes[n];
+
+               context->bld_tries[n].trie =
+                               build_trie(context, rule_sets[n],
+                               &last, &context->tries[n].count);
+               if (context->bld_tries[n].trie == NULL) {
+                       RTE_LOG(ERR, ACL, "Build of %u-th trie failed\n", n);
+                       return -ENOMEM;
+               }
+
+               if (last != NULL) {
+                       rule_sets[num_tries++] = last->next;
+                       last->next = NULL;
+                       acl_free_node(context, context->bld_tries[n].trie);
+                       context->tries[n].count = 0;
+
+                       context->bld_tries[n].trie =
+                                       build_trie(context, rule_sets[n],
+                                       &last, &context->tries[n].count);
+                       if (context->bld_tries[n].trie == NULL) {
+                               RTE_LOG(ERR, ACL,
+                                       "Build of %u-th trie failed\n", n);
+                                       return -ENOMEM;
+                       }
+               }
+       }
+
+       context->num_tries = num_tries;
+       return 0;
+}
+
+static void
+acl_build_log(const struct acl_build_context *ctx)
+{
+       uint32_t n;
+
+       RTE_LOG(DEBUG, ACL, "Build phase for ACL \"%s\":\n"
+               "memory consumed: %zu\n",
+               ctx->acx->name,
+               ctx->pool.alloc);
+
+       for (n = 0; n < RTE_DIM(ctx->tries); n++) {
+               if (ctx->tries[n].count != 0)
+                       RTE_LOG(DEBUG, ACL,
+                               "trie %u: number of rules: %u\n",
+                               n, ctx->tries[n].count);
+       }
+}
+
+static int
+acl_build_rules(struct acl_build_context *bcx)
+{
+       struct rte_acl_build_rule *br, *head;
+       const struct rte_acl_rule *rule;
+       uint32_t *wp;
+       uint32_t fn, i, n, num;
+       size_t ofs, sz;
+
+       fn = bcx->cfg.num_fields;
+       n = bcx->acx->num_rules;
+       ofs = n * sizeof(*br);
+       sz = ofs + n * fn * sizeof(*wp);
+
+       br = tb_alloc(&bcx->pool, sz);
+       if (br == NULL) {
+               RTE_LOG(ERR, ACL, "ACL conext %s: failed to create a copy "
+                       "of %u build rules (%zu bytes)\n",
+                       bcx->acx->name, n, sz);
+               return -ENOMEM;
+       }
+
+       wp = (uint32_t *)((uintptr_t)br + ofs);
+       num = 0;
+       head = NULL;
+
+       for (i = 0; i != n; i++) {
+               rule = (const struct rte_acl_rule *)
+                       ((uintptr_t)bcx->acx->rules + bcx->acx->rule_sz * i);
+               if ((rule->data.category_mask & bcx->category_mask) != 0) {
+                       br[num].next = head;
+                       br[num].config = &bcx->cfg;
+                       br[num].f = rule;
+                       br[num].wildness = wp;
+                       wp += fn;
+                       head = br + num;
+                       num++;
+               }
+       }
+
+       bcx->num_rules = num;
+       bcx->build_rules = head;
+
+       return 0;
+}
+
+/*
+ * Copy data_indexes for each trie into RT location.
+ */
+static void
+acl_set_data_indexes(struct rte_acl_ctx *ctx)
+{
+       uint32_t i, n, ofs;
+
+       ofs = 0;
+       for (i = 0; i != ctx->num_tries; i++) {
+               n = ctx->trie[i].num_data_indexes;
+               memcpy(ctx->data_indexes + ofs, ctx->trie[i].data_index,
+                       n * sizeof(ctx->data_indexes[0]));
+               ctx->trie[i].data_index = ctx->data_indexes + ofs;
+               ofs += n;
+       }
+}
+
+
+int
+rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg)
+{
+       int rc;
+       struct acl_build_context bcx;
+
+       if (ctx == NULL || cfg == NULL || cfg->num_categories == 0 ||
+                       cfg->num_categories > RTE_ACL_MAX_CATEGORIES)
+               return -EINVAL;
+
+       acl_build_reset(ctx);
+
+       memset(&bcx, 0, sizeof(bcx));
+       bcx.acx = ctx;
+       bcx.pool.alignment = ACL_POOL_ALIGN;
+       bcx.pool.min_alloc = ACL_POOL_ALLOC_MIN;
+       bcx.cfg = *cfg;
+       bcx.category_mask = LEN2MASK(bcx.cfg.num_categories);
+
+
+       /* Create a buid rules copy. */
+       rc = acl_build_rules(&bcx);
+       if (rc != 0)
+               return rc;
+
+       /* No rules to build for that context+config */
+       if (bcx.build_rules == NULL) {
+               rc = -EINVAL;
+
+       /* build internal trie representation. */
+       } else if ((rc = acl_build_tries(&bcx, bcx.build_rules)) == 0) {
+
+               /* allocate and fill run-time  structures. */
+               rc = rte_acl_gen(ctx, bcx.tries, bcx.bld_tries,
+                               bcx.num_tries, bcx.cfg.num_categories,
+                               RTE_ACL_IPV4VLAN_NUM * RTE_DIM(bcx.tries),
+                               bcx.num_build_rules);
+               if (rc == 0) {
+
+                       /* set data indexes. */
+                       acl_set_data_indexes(ctx);
+
+                       /* copy in build config. */
+                       ctx->config = *cfg;
+               }
+       }
+
+       acl_build_log(&bcx);
+
+       /* cleanup after build. */
+       tb_free_pool(&bcx.pool);
+       return rc;
+}
diff --git a/lib/librte_acl/acl_gen.c b/lib/librte_acl/acl_gen.c
new file mode 100644 (file)
index 0000000..65cbaf3
--- /dev/null
@@ -0,0 +1,475 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_acl.h>
+#include "acl_vect.h"
+#include "acl.h"
+
+#define        QRANGE_MIN      ((uint8_t)INT8_MIN)
+
+#define        RTE_ACL_VERIFY(exp)     do {                                          \
+       if (!(exp))                                                           \
+               rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \
+} while (0)
+
+struct acl_node_counters {
+       int                match;
+       int                match_used;
+       int                single;
+       int                quad;
+       int                quad_vectors;
+       int                dfa;
+       int                smallest_match;
+};
+
+struct rte_acl_indices {
+       int                dfa_index;
+       int                quad_index;
+       int                single_index;
+       int                match_index;
+};
+
+static void
+acl_gen_log_stats(const struct rte_acl_ctx *ctx,
+       const struct acl_node_counters *counts)
+{
+       RTE_LOG(DEBUG, ACL, "Gen phase for ACL \"%s\":\n"
+               "runtime memory footprint on socket %d:\n"
+               "single nodes/bytes used: %d/%zu\n"
+               "quad nodes/bytes used: %d/%zu\n"
+               "DFA nodes/bytes used: %d/%zu\n"
+               "match nodes/bytes used: %d/%zu\n"
+               "total: %zu bytes\n",
+               ctx->name, ctx->socket_id,
+               counts->single, counts->single * sizeof(uint64_t),
+               counts->quad, counts->quad_vectors * sizeof(uint64_t),
+               counts->dfa, counts->dfa * RTE_ACL_DFA_SIZE * sizeof(uint64_t),
+               counts->match,
+               counts->match * sizeof(struct rte_acl_match_results),
+               ctx->mem_sz);
+}
+
+/*
+*  Counts the number of groups of sequential bits that are
+*  either 0 or 1, as specified by the zero_one parameter. This is used to
+*  calculate the number of ranges in a node to see if it fits in a quad range
+*  node.
+*/
+static int
+acl_count_sequential_groups(struct rte_acl_bitset *bits, int zero_one)
+{
+       int n, ranges, last_bit;
+
+       ranges = 0;
+       last_bit = zero_one ^ 1;
+
+       for (n = QRANGE_MIN; n < UINT8_MAX + 1; n++) {
+               if (bits->bits[n / (sizeof(bits_t) * 8)] &
+                               (1 << (n % (sizeof(bits_t) * 8)))) {
+                       if (zero_one == 1 && last_bit != 1)
+                               ranges++;
+                       last_bit = 1;
+               } else {
+                       if (zero_one == 0 && last_bit != 0)
+                               ranges++;
+                       last_bit = 0;
+               }
+       }
+       for (n = 0; n < QRANGE_MIN; n++) {
+               if (bits->bits[n / (sizeof(bits_t) * 8)] &
+                               (1 << (n % (sizeof(bits_t) * 8)))) {
+                       if (zero_one == 1 && last_bit != 1)
+                               ranges++;
+                       last_bit = 1;
+               } else {
+                       if (zero_one == 0 && last_bit != 0)
+                               ranges++;
+                       last_bit = 0;
+               }
+       }
+
+       return ranges;
+}
+
+/*
+ * Count number of ranges spanned by the node's pointers
+ */
+static int
+acl_count_fanout(struct rte_acl_node *node)
+{
+       uint32_t n;
+       int ranges;
+
+       if (node->fanout != 0)
+               return node->fanout;
+
+       ranges = acl_count_sequential_groups(&node->values, 0);
+
+       for (n = 0; n < node->num_ptrs; n++) {
+               if (node->ptrs[n].ptr != NULL)
+                       ranges += acl_count_sequential_groups(
+                               &node->ptrs[n].values, 1);
+       }
+
+       node->fanout = ranges;
+       return node->fanout;
+}
+
+/*
+ * Determine the type of nodes and count each type
+ */
+static int
+acl_count_trie_types(struct acl_node_counters *counts,
+       struct rte_acl_node *node, int match, int force_dfa)
+{
+       uint32_t n;
+       int num_ptrs;
+
+       /* skip if this node has been counted */
+       if (node->node_type != (uint32_t)RTE_ACL_NODE_UNDEFINED)
+               return match;
+
+       if (node->match_flag != 0 || node->num_ptrs == 0) {
+               counts->match++;
+               if (node->match_flag == -1)
+                       node->match_flag = match++;
+               node->node_type = RTE_ACL_NODE_MATCH;
+               if (counts->smallest_match > node->match_flag)
+                       counts->smallest_match = node->match_flag;
+               return match;
+       }
+
+       num_ptrs = acl_count_fanout(node);
+
+       /* Force type to dfa */
+       if (force_dfa)
+               num_ptrs = RTE_ACL_DFA_SIZE;
+
+       /* determine node type based on number of ranges */
+       if (num_ptrs == 1) {
+               counts->single++;
+               node->node_type = RTE_ACL_NODE_SINGLE;
+       } else if (num_ptrs <= RTE_ACL_QUAD_MAX) {
+               counts->quad++;
+               counts->quad_vectors += node->fanout;
+               node->node_type = RTE_ACL_NODE_QRANGE;
+       } else {
+               counts->dfa++;
+               node->node_type = RTE_ACL_NODE_DFA;
+       }
+
+       /*
+        * recursively count the types of all children
+        */
+       for (n = 0; n < node->num_ptrs; n++) {
+               if (node->ptrs[n].ptr != NULL)
+                       match = acl_count_trie_types(counts, node->ptrs[n].ptr,
+                               match, 0);
+       }
+
+       return match;
+}
+
+static void
+acl_add_ptrs(struct rte_acl_node *node, uint64_t *node_array, uint64_t no_match,
+       int resolved)
+{
+       uint32_t n, x;
+       int m, ranges, last_bit;
+       struct rte_acl_node *child;
+       struct rte_acl_bitset *bits;
+       uint64_t *node_a, index, dfa[RTE_ACL_DFA_SIZE];
+
+       ranges = 0;
+       last_bit = 0;
+
+       for (n = 0; n < RTE_DIM(dfa); n++)
+               dfa[n] = no_match;
+
+       for (x = 0; x < node->num_ptrs; x++) {
+
+               child = node->ptrs[x].ptr;
+               if (child == NULL)
+                       continue;
+
+               bits = &node->ptrs[x].values;
+               for (n = 0; n < RTE_DIM(dfa); n++) {
+
+                       if (bits->bits[n / (sizeof(bits_t) * CHAR_BIT)] &
+                               (1 << (n % (sizeof(bits_t) * CHAR_BIT)))) {
+
+                               dfa[n] = resolved ? child->node_index : x;
+                               ranges += (last_bit == 0);
+                               last_bit = 1;
+                       } else {
+                               last_bit = 0;
+                       }
+               }
+       }
+
+       /*
+        * Rather than going from 0 to 256, the range count and
+        * the layout are from 80-ff then 0-7f due to signed compare
+        * for SSE (cmpgt).
+        */
+       if (node->node_type == RTE_ACL_NODE_QRANGE) {
+
+               m = 0;
+               node_a = node_array;
+               index = dfa[QRANGE_MIN];
+               *node_a++ = index;
+
+               for (x = QRANGE_MIN + 1; x < UINT8_MAX + 1; x++) {
+                       if (dfa[x] != index) {
+                               index = dfa[x];
+                               *node_a++ = index;
+                               node->transitions[m++] = (uint8_t)(x - 1);
+                       }
+               }
+
+               for (x = 0; x < INT8_MAX + 1; x++) {
+                       if (dfa[x] != index) {
+                               index = dfa[x];
+                               *node_a++ = index;
+                               node->transitions[m++] = (uint8_t)(x - 1);
+                       }
+               }
+
+               /* fill unused locations with max value - nothing is greater */
+               for (; m < RTE_ACL_QUAD_SIZE; m++)
+                       node->transitions[m] = INT8_MAX;
+
+               RTE_ACL_VERIFY(m <= RTE_ACL_QUAD_SIZE);
+
+       } else if (node->node_type == RTE_ACL_NODE_DFA && resolved) {
+               for (n = 0; n < RTE_DIM(dfa); n++)
+                       node_array[n] = dfa[n];
+       }
+}
+
+/*
+ * Routine that allocates space for this node and recursively calls
+ * to allocate space for each child. Once all the children are allocated,
+ * then resolve all transitions for this node.
+ */
+static void
+acl_gen_node(struct rte_acl_node *node, uint64_t *node_array,
+       uint64_t no_match, struct rte_acl_indices *index, int num_categories)
+{
+       uint32_t n, *qtrp;
+       uint64_t *array_ptr;
+       struct rte_acl_match_results *match;
+
+       if (node->node_index != RTE_ACL_NODE_UNDEFINED)
+               return;
+
+       array_ptr = NULL;
+
+       switch (node->node_type) {
+       case RTE_ACL_NODE_DFA:
+               node->node_index = index->dfa_index | node->node_type;
+               array_ptr = &node_array[index->dfa_index];
+               index->dfa_index += RTE_ACL_DFA_SIZE;
+               for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
+                       array_ptr[n] = no_match;
+               break;
+       case RTE_ACL_NODE_SINGLE:
+               node->node_index = RTE_ACL_QUAD_SINGLE | index->single_index |
+                       node->node_type;
+               array_ptr = &node_array[index->single_index];
+               index->single_index += 1;
+               array_ptr[0] = no_match;
+               break;
+       case RTE_ACL_NODE_QRANGE:
+               array_ptr = &node_array[index->quad_index];
+               acl_add_ptrs(node, array_ptr, no_match,  0);
+               qtrp = (uint32_t *)node->transitions;
+               node->node_index = qtrp[0];
+               node->node_index <<= sizeof(index->quad_index) * CHAR_BIT;
+               node->node_index |= index->quad_index | node->node_type;
+               index->quad_index += node->fanout;
+               break;
+       case RTE_ACL_NODE_MATCH:
+               match = ((struct rte_acl_match_results *)
+                       (node_array + index->match_index));
+               memcpy(match + node->match_flag, node->mrt, sizeof(*node->mrt));
+               node->node_index = node->match_flag | node->node_type;
+               break;
+       case RTE_ACL_NODE_UNDEFINED:
+               RTE_ACL_VERIFY(node->node_type !=
+                       (uint32_t)RTE_ACL_NODE_UNDEFINED);
+               break;
+       }
+
+       /* recursively allocate space for all children */
+       for (n = 0; n < node->num_ptrs; n++) {
+               if (node->ptrs[n].ptr != NULL)
+                       acl_gen_node(node->ptrs[n].ptr,
+                               node_array,
+                               no_match,
+                               index,
+                               num_categories);
+       }
+
+       /* All children are resolved, resolve this node's pointers */
+       switch (node->node_type) {
+       case RTE_ACL_NODE_DFA:
+               acl_add_ptrs(node, array_ptr, no_match, 1);
+               break;
+       case RTE_ACL_NODE_SINGLE:
+               for (n = 0; n < node->num_ptrs; n++) {
+                       if (node->ptrs[n].ptr != NULL)
+                               array_ptr[0] = node->ptrs[n].ptr->node_index;
+               }
+               break;
+       case RTE_ACL_NODE_QRANGE:
+               acl_add_ptrs(node, array_ptr, no_match, 1);
+               break;
+       case RTE_ACL_NODE_MATCH:
+               break;
+       case RTE_ACL_NODE_UNDEFINED:
+               RTE_ACL_VERIFY(node->node_type !=
+                       (uint32_t)RTE_ACL_NODE_UNDEFINED);
+               break;
+       }
+}
+
+static int
+acl_calc_counts_indicies(struct acl_node_counters *counts,
+       struct rte_acl_indices *indices, struct rte_acl_trie *trie,
+       struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
+       int match_num)
+{
+       uint32_t n;
+
+       memset(indices, 0, sizeof(*indices));
+       memset(counts, 0, sizeof(*counts));
+
+       /* Get stats on nodes */
+       for (n = 0; n < num_tries; n++) {
+               counts->smallest_match = INT32_MAX;
+               match_num = acl_count_trie_types(counts, node_bld_trie[n].trie,
+                       match_num, 1);
+               trie[n].smallest = counts->smallest_match;
+       }
+
+       indices->dfa_index = RTE_ACL_DFA_SIZE + 1;
+       indices->quad_index = indices->dfa_index +
+               counts->dfa * RTE_ACL_DFA_SIZE;
+       indices->single_index = indices->quad_index + counts->quad_vectors;
+       indices->match_index = indices->single_index + counts->single + 1;
+       indices->match_index = RTE_ALIGN(indices->match_index,
+               (XMM_SIZE / sizeof(uint64_t)));
+
+       return match_num;
+}
+
+/*
+ * Generate the runtime structure using build structure
+ */
+int
+rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
+       struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
+       uint32_t num_categories, uint32_t data_index_sz, int match_num)
+{
+       void *mem;
+       size_t total_size;
+       uint64_t *node_array, no_match;
+       uint32_t n, match_index;
+       struct rte_acl_match_results *match;
+       struct acl_node_counters counts;
+       struct rte_acl_indices indices;
+
+       /* Fill counts and indicies arrays from the nodes. */
+       match_num = acl_calc_counts_indicies(&counts, &indices, trie,
+               node_bld_trie, num_tries, match_num);
+
+       /* Allocate runtime memory (align to cache boundary) */
+       total_size = RTE_ALIGN(data_index_sz, CACHE_LINE_SIZE) +
+               indices.match_index * sizeof(uint64_t) +
+               (match_num + 2) * sizeof(struct rte_acl_match_results) +
+               XMM_SIZE;
+
+       mem = rte_zmalloc_socket(ctx->name, total_size, CACHE_LINE_SIZE,
+                       ctx->socket_id);
+       if (mem == NULL) {
+               RTE_LOG(ERR, ACL,
+                       "allocation of %zu bytes on socket %d for %s failed\n",
+                       total_size, ctx->socket_id, ctx->name);
+               return -ENOMEM;
+       }
+
+       /* Fill the runtime structure */
+       match_index = indices.match_index;
+       node_array = (uint64_t *)((uintptr_t)mem +
+               RTE_ALIGN(data_index_sz, CACHE_LINE_SIZE));
+
+       /*
+        * Setup the NOMATCH node (a SINGLE at the
+        * highest index, that points to itself)
+        */
+
+       node_array[RTE_ACL_DFA_SIZE] = RTE_ACL_DFA_SIZE | RTE_ACL_NODE_SINGLE;
+       no_match = RTE_ACL_NODE_MATCH;
+
+       for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
+               node_array[n] = no_match;
+
+       match = ((struct rte_acl_match_results *)(node_array + match_index));
+       memset(match, 0, sizeof(*match));
+
+       for (n = 0; n < num_tries; n++) {
+
+               acl_gen_node(node_bld_trie[n].trie, node_array, no_match,
+                       &indices, num_categories);
+
+               if (node_bld_trie[n].trie->node_index == no_match)
+                       trie[n].root_index = 0;
+               else
+                       trie[n].root_index = node_bld_trie[n].trie->node_index;
+       }
+
+       ctx->mem = mem;
+       ctx->mem_sz = total_size;
+       ctx->data_indexes = mem;
+       ctx->num_tries = num_tries;
+       ctx->num_categories = num_categories;
+       ctx->match_index = match_index;
+       ctx->no_match = no_match;
+       ctx->idle = node_array[RTE_ACL_DFA_SIZE];
+       ctx->trans_table = node_array;
+       memcpy(ctx->trie, trie, sizeof(ctx->trie));
+
+       acl_gen_log_stats(ctx, &counts);
+       return 0;
+}
diff --git a/lib/librte_acl/acl_run.c b/lib/librte_acl/acl_run.c
new file mode 100644 (file)
index 0000000..e3d9fc1
--- /dev/null
@@ -0,0 +1,944 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_acl.h>
+#include "acl_vect.h"
+#include "acl.h"
+
+#define MAX_SEARCHES_SSE8      8
+#define MAX_SEARCHES_SSE4      4
+#define MAX_SEARCHES_SSE2      2
+#define MAX_SEARCHES_SCALAR    2
+
+#define GET_NEXT_4BYTES(prm, idx)      \
+       (*((const int32_t *)((prm)[(idx)].data + *(prm)[idx].data_index++)))
+
+
+#define RTE_ACL_NODE_INDEX     ((uint32_t)~RTE_ACL_NODE_TYPE)
+
+#define        SCALAR_QRANGE_MULT      0x01010101
+#define        SCALAR_QRANGE_MASK      0x7f7f7f7f
+#define        SCALAR_QRANGE_MIN       0x80808080
+
+enum {
+       SHUFFLE32_SLOT1 = 0xe5,
+       SHUFFLE32_SLOT2 = 0xe6,
+       SHUFFLE32_SLOT3 = 0xe7,
+       SHUFFLE32_SWAP64 = 0x4e,
+};
+
+/*
+ * Structure to manage N parallel trie traversals.
+ * The runtime trie traversal routines can process 8, 4, or 2 tries
+ * in parallel. Each packet may require multiple trie traversals (up to 4).
+ * This structure is used to fill the slots (0 to n-1) for parallel processing
+ * with the trie traversals needed for each packet.
+ */
+struct acl_flow_data {
+       uint32_t            num_packets;
+       /* number of packets processed */
+       uint32_t            started;
+       /* number of trie traversals in progress */
+       uint32_t            trie;
+       /* current trie index (0 to N-1) */
+       uint32_t            cmplt_size;
+       uint32_t            total_packets;
+       uint32_t            categories;
+       /* number of result categories per packet. */
+       /* maximum number of packets to process */
+       const uint64_t     *trans;
+       const uint8_t     **data;
+       uint32_t           *results;
+       struct completion  *last_cmplt;
+       struct completion  *cmplt_array;
+};
+
+/*
+ * Structure to maintain running results for
+ * a single packet (up to 4 tries).
+ */
+struct completion {
+       uint32_t *results;                          /* running results. */
+       int32_t   priority[RTE_ACL_MAX_CATEGORIES]; /* running priorities. */
+       uint32_t  count;                            /* num of remaining tries */
+       /* true for allocated struct */
+} __attribute__((aligned(XMM_SIZE)));
+
+/*
+ * One parms structure for each slot in the search engine.
+ */
+struct parms {
+       const uint8_t              *data;
+       /* input data for this packet */
+       const uint32_t             *data_index;
+       /* data indirection for this trie */
+       struct completion          *cmplt;
+       /* completion data for this packet */
+};
+
+/*
+ * Define an global idle node for unused engine slots
+ */
+static const uint32_t idle[UINT8_MAX + 1];
+
+static const rte_xmm_t mm_type_quad_range = {
+       .u32 = {
+               RTE_ACL_NODE_QRANGE,
+               RTE_ACL_NODE_QRANGE,
+               RTE_ACL_NODE_QRANGE,
+               RTE_ACL_NODE_QRANGE,
+       },
+};
+
+static const rte_xmm_t mm_type_quad_range64 = {
+       .u32 = {
+               RTE_ACL_NODE_QRANGE,
+               RTE_ACL_NODE_QRANGE,
+               0,
+               0,
+       },
+};
+
+static const rte_xmm_t mm_shuffle_input = {
+       .u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c},
+};
+
+static const rte_xmm_t mm_shuffle_input64 = {
+       .u32 = {0x00000000, 0x04040404, 0x80808080, 0x80808080},
+};
+
+static const rte_xmm_t mm_ones_16 = {
+       .u16 = {1, 1, 1, 1, 1, 1, 1, 1},
+};
+
+static const rte_xmm_t mm_bytes = {
+       .u32 = {UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX},
+};
+
+static const rte_xmm_t mm_bytes64 = {
+       .u32 = {UINT8_MAX, UINT8_MAX, 0, 0},
+};
+
+static const rte_xmm_t mm_match_mask = {
+       .u32 = {
+               RTE_ACL_NODE_MATCH,
+               RTE_ACL_NODE_MATCH,
+               RTE_ACL_NODE_MATCH,
+               RTE_ACL_NODE_MATCH,
+       },
+};
+
+static const rte_xmm_t mm_match_mask64 = {
+       .u32 = {
+               RTE_ACL_NODE_MATCH,
+               0,
+               RTE_ACL_NODE_MATCH,
+               0,
+       },
+};
+
+static const rte_xmm_t mm_index_mask = {
+       .u32 = {
+               RTE_ACL_NODE_INDEX,
+               RTE_ACL_NODE_INDEX,
+               RTE_ACL_NODE_INDEX,
+               RTE_ACL_NODE_INDEX,
+       },
+};
+
+static const rte_xmm_t mm_index_mask64 = {
+       .u32 = {
+               RTE_ACL_NODE_INDEX,
+               RTE_ACL_NODE_INDEX,
+               0,
+               0,
+       },
+};
+
+/*
+ * Allocate a completion structure to manage the tries for a packet.
+ */
+static inline struct completion *
+alloc_completion(struct completion *p, uint32_t size, uint32_t tries,
+       uint32_t *results)
+{
+       uint32_t n;
+
+       for (n = 0; n < size; n++) {
+
+               if (p[n].count == 0) {
+
+                       /* mark as allocated and set number of tries. */
+                       p[n].count = tries;
+                       p[n].results = results;
+                       return &(p[n]);
+               }
+       }
+
+       /* should never get here */
+       return NULL;
+}
+
+/*
+ * Resolve priority for a single result trie.
+ */
+static inline void
+resolve_single_priority(uint64_t transition, int n,
+       const struct rte_acl_ctx *ctx, struct parms *parms,
+       const struct rte_acl_match_results *p)
+{
+       if (parms[n].cmplt->count == ctx->num_tries ||
+                       parms[n].cmplt->priority[0] <=
+                       p[transition].priority[0]) {
+
+               parms[n].cmplt->priority[0] = p[transition].priority[0];
+               parms[n].cmplt->results[0] = p[transition].results[0];
+       }
+
+       parms[n].cmplt->count--;
+}
+
+/*
+ * Resolve priority for multiple results. This consists comparing
+ * the priority of the current traversal with the running set of
+ * results for the packet. For each result, keep a running array of
+ * the result (rule number) and its priority for each category.
+ */
+static inline void
+resolve_priority(uint64_t transition, int n, const struct rte_acl_ctx *ctx,
+       struct parms *parms, const struct rte_acl_match_results *p,
+       uint32_t categories)
+{
+       uint32_t x;
+       xmm_t results, priority, results1, priority1, selector;
+       xmm_t *saved_results, *saved_priority;
+
+       for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) {
+
+               saved_results = (xmm_t *)(&parms[n].cmplt->results[x]);
+               saved_priority =
+                       (xmm_t *)(&parms[n].cmplt->priority[x]);
+
+               /* get results and priorities for completed trie */
+               results = MM_LOADU((const xmm_t *)&p[transition].results[x]);
+               priority = MM_LOADU((const xmm_t *)&p[transition].priority[x]);
+
+               /* if this is not the first completed trie */
+               if (parms[n].cmplt->count != ctx->num_tries) {
+
+                       /* get running best results and their priorities */
+                       results1 = MM_LOADU(saved_results);
+                       priority1 = MM_LOADU(saved_priority);
+
+                       /* select results that are highest priority */
+                       selector = MM_CMPGT32(priority1, priority);
+                       results = MM_BLENDV8(results, results1, selector);
+                       priority = MM_BLENDV8(priority, priority1, selector);
+               }
+
+               /* save running best results and their priorities */
+               MM_STOREU(saved_results, results);
+               MM_STOREU(saved_priority, priority);
+       }
+
+       /* Count down completed tries for this search request */
+       parms[n].cmplt->count--;
+}
+
+/*
+ * Routine to fill a slot in the parallel trie traversal array (parms) from
+ * the list of packets (flows).
+ */
+static inline uint64_t
+acl_start_next_trie(struct acl_flow_data *flows, struct parms *parms, int n,
+       const struct rte_acl_ctx *ctx)
+{
+       uint64_t transition;
+
+       /* if there are any more packets to process */
+       if (flows->num_packets < flows->total_packets) {
+               parms[n].data = flows->data[flows->num_packets];
+               parms[n].data_index = ctx->trie[flows->trie].data_index;
+
+               /* if this is the first trie for this packet */
+               if (flows->trie == 0) {
+                       flows->last_cmplt = alloc_completion(flows->cmplt_array,
+                               flows->cmplt_size, ctx->num_tries,
+                               flows->results +
+                               flows->num_packets * flows->categories);
+               }
+
+               /* set completion parameters and starting index for this slot */
+               parms[n].cmplt = flows->last_cmplt;
+               transition =
+                       flows->trans[parms[n].data[*parms[n].data_index++] +
+                       ctx->trie[flows->trie].root_index];
+
+               /*
+                * if this is the last trie for this packet,
+                * then setup next packet.
+                */
+               flows->trie++;
+               if (flows->trie >= ctx->num_tries) {
+                       flows->trie = 0;
+                       flows->num_packets++;
+               }
+
+               /* keep track of number of active trie traversals */
+               flows->started++;
+
+       /* no more tries to process, set slot to an idle position */
+       } else {
+               transition = ctx->idle;
+               parms[n].data = (const uint8_t *)idle;
+               parms[n].data_index = idle;
+       }
+       return transition;
+}
+
+/*
+ * Detect matches. If a match node transition is found, then this trie
+ * traversal is complete and fill the slot with the next trie
+ * to be processed.
+ */
+static inline uint64_t
+acl_match_check_transition(uint64_t transition, int slot,
+       const struct rte_acl_ctx *ctx, struct parms *parms,
+       struct acl_flow_data *flows)
+{
+       const struct rte_acl_match_results *p;
+
+       p = (const struct rte_acl_match_results *)
+               (flows->trans + ctx->match_index);
+
+       if (transition & RTE_ACL_NODE_MATCH) {
+
+               /* Remove flags from index and decrement active traversals */
+               transition &= RTE_ACL_NODE_INDEX;
+               flows->started--;
+
+               /* Resolve priorities for this trie and running results */
+               if (flows->categories == 1)
+                       resolve_single_priority(transition, slot, ctx,
+                               parms, p);
+               else
+                       resolve_priority(transition, slot, ctx, parms, p,
+                               flows->categories);
+
+               /* Fill the slot with the next trie or idle trie */
+               transition = acl_start_next_trie(flows, parms, slot, ctx);
+
+       } else if (transition == ctx->idle) {
+               /* reset indirection table for idle slots */
+               parms[slot].data_index = idle;
+       }
+
+       return transition;
+}
+
+/*
+ * Extract transitions from an XMM register and check for any matches
+ */
+static void
+acl_process_matches(xmm_t *indicies, int slot, const struct rte_acl_ctx *ctx,
+       struct parms *parms, struct acl_flow_data *flows)
+{
+       uint64_t transition1, transition2;
+
+       /* extract transition from low 64 bits. */
+       transition1 = MM_CVT64(*indicies);
+
+       /* extract transition from high 64 bits. */
+       *indicies = MM_SHUFFLE32(*indicies, SHUFFLE32_SWAP64);
+       transition2 = MM_CVT64(*indicies);
+
+       transition1 = acl_match_check_transition(transition1, slot, ctx,
+               parms, flows);
+       transition2 = acl_match_check_transition(transition2, slot + 1, ctx,
+               parms, flows);
+
+       /* update indicies with new transitions. */
+       *indicies = MM_SET64(transition2, transition1);
+}
+
+/*
+ * Check for a match in 2 transitions (contained in SSE register)
+ */
+static inline void
+acl_match_check_x2(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
+       struct acl_flow_data *flows, xmm_t *indicies, xmm_t match_mask)
+{
+       xmm_t temp;
+
+       temp = MM_AND(match_mask, *indicies);
+       while (!MM_TESTZ(temp, temp)) {
+               acl_process_matches(indicies, slot, ctx, parms, flows);
+               temp = MM_AND(match_mask, *indicies);
+       }
+}
+
+/*
+ * Check for any match in 4 transitions (contained in 2 SSE registers)
+ */
+static inline void
+acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
+       struct acl_flow_data *flows, xmm_t *indicies1, xmm_t *indicies2,
+       xmm_t match_mask)
+{
+       xmm_t temp;
+
+       /* put low 32 bits of each transition into one register */
+       temp = (xmm_t)MM_SHUFFLEPS((__m128)*indicies1, (__m128)*indicies2,
+               0x88);
+       /* test for match node */
+       temp = MM_AND(match_mask, temp);
+
+       while (!MM_TESTZ(temp, temp)) {
+               acl_process_matches(indicies1, slot, ctx, parms, flows);
+               acl_process_matches(indicies2, slot + 2, ctx, parms, flows);
+
+               temp = (xmm_t)MM_SHUFFLEPS((__m128)*indicies1,
+                                       (__m128)*indicies2,
+                                       0x88);
+               temp = MM_AND(match_mask, temp);
+       }
+}
+
+/*
+ * Calculate the address of the next transition for
+ * all types of nodes. Note that only DFA nodes and range
+ * nodes actually transition to another node. Match
+ * nodes don't move.
+ */
+static inline xmm_t
+acl_calc_addr(xmm_t index_mask, xmm_t next_input, xmm_t shuffle_input,
+       xmm_t ones_16, xmm_t bytes, xmm_t type_quad_range,
+       xmm_t *indicies1, xmm_t *indicies2)
+{
+       xmm_t addr, node_types, temp;
+
+       /*
+        * Note that no transition is done for a match
+        * node and therefore a stream freezes when
+        * it reaches a match.
+        */
+
+       /* Shuffle low 32 into temp and high 32 into indicies2 */
+       temp = (xmm_t)MM_SHUFFLEPS((__m128)*indicies1, (__m128)*indicies2,
+               0x88);
+       *indicies2 = (xmm_t)MM_SHUFFLEPS((__m128)*indicies1,
+               (__m128)*indicies2, 0xdd);
+
+       /* Calc node type and node addr */
+       node_types = MM_ANDNOT(index_mask, temp);
+       addr = MM_AND(index_mask, temp);
+
+       /*
+        * Calc addr for DFAs - addr = dfa_index + input_byte
+        */
+
+       /* mask for DFA type (0) nodes */
+       temp = MM_CMPEQ32(node_types, MM_XOR(node_types, node_types));
+
+       /* add input byte to DFA position */
+       temp = MM_AND(temp, bytes);
+       temp = MM_AND(temp, next_input);
+       addr = MM_ADD32(addr, temp);
+
+       /*
+        * Calc addr for Range nodes -> range_index + range(input)
+        */
+       node_types = MM_CMPEQ32(node_types, type_quad_range);
+
+       /*
+        * Calculate number of range boundaries that are less than the
+        * input value. Range boundaries for each node are in signed 8 bit,
+        * ordered from -128 to 127 in the indicies2 register.
+        * This is effectively a popcnt of bytes that are greater than the
+        * input byte.
+        */
+
+       /* shuffle input byte to all 4 positions of 32 bit value */
+       temp = MM_SHUFFLE8(next_input, shuffle_input);
+
+       /* check ranges */
+       temp = MM_CMPGT8(temp, *indicies2);
+
+       /* convert -1 to 1 (bytes greater than input byte */
+       temp = MM_SIGN8(temp, temp);
+
+       /* horizontal add pairs of bytes into words */
+       temp = MM_MADD8(temp, temp);
+
+       /* horizontal add pairs of words into dwords */
+       temp = MM_MADD16(temp, ones_16);
+
+       /* mask to range type nodes */
+       temp = MM_AND(temp, node_types);
+
+       /* add index into node position */
+       return MM_ADD32(addr, temp);
+}
+
+/*
+ * Process 4 transitions (in 2 SIMD registers) in parallel
+ */
+static inline xmm_t
+transition4(xmm_t index_mask, xmm_t next_input, xmm_t shuffle_input,
+       xmm_t ones_16, xmm_t bytes, xmm_t type_quad_range,
+       const uint64_t *trans, xmm_t *indicies1, xmm_t *indicies2)
+{
+       xmm_t addr;
+       uint64_t trans0, trans2;
+
+        /* Calculate the address (array index) for all 4 transitions. */
+
+       addr = acl_calc_addr(index_mask, next_input, shuffle_input, ones_16,
+               bytes, type_quad_range, indicies1, indicies2);
+
+        /* Gather 64 bit transitions and pack back into 2 registers. */
+
+       trans0 = trans[MM_CVT32(addr)];
+
+       /* get slot 2 */
+
+       /* {x0, x1, x2, x3} -> {x2, x1, x2, x3} */
+       addr = MM_SHUFFLE32(addr, SHUFFLE32_SLOT2);
+       trans2 = trans[MM_CVT32(addr)];
+
+       /* get slot 1 */
+
+       /* {x2, x1, x2, x3} -> {x1, x1, x2, x3} */
+       addr = MM_SHUFFLE32(addr, SHUFFLE32_SLOT1);
+       *indicies1 = MM_SET64(trans[MM_CVT32(addr)], trans0);
+
+       /* get slot 3 */
+
+       /* {x1, x1, x2, x3} -> {x3, x1, x2, x3} */
+       addr = MM_SHUFFLE32(addr, SHUFFLE32_SLOT3);
+       *indicies2 = MM_SET64(trans[MM_CVT32(addr)], trans2);
+
+       return MM_SRL32(next_input, 8);
+}
+
+static inline void
+acl_set_flow(struct acl_flow_data *flows, struct completion *cmplt,
+       uint32_t cmplt_size, const uint8_t **data, uint32_t *results,
+       uint32_t data_num, uint32_t categories, const uint64_t *trans)
+{
+       flows->num_packets = 0;
+       flows->started = 0;
+       flows->trie = 0;
+       flows->last_cmplt = NULL;
+       flows->cmplt_array = cmplt;
+       flows->total_packets = data_num;
+       flows->categories = categories;
+       flows->cmplt_size = cmplt_size;
+       flows->data = data;
+       flows->results = results;
+       flows->trans = trans;
+}
+
+/*
+ * Execute trie traversal with 8 traversals in parallel
+ */
+static inline void
+search_sse_8(const struct rte_acl_ctx *ctx, const uint8_t **data,
+       uint32_t *results, uint32_t total_packets, uint32_t categories)
+{
+       int n;
+       struct acl_flow_data flows;
+       uint64_t index_array[MAX_SEARCHES_SSE8];
+       struct completion cmplt[MAX_SEARCHES_SSE8];
+       struct parms parms[MAX_SEARCHES_SSE8];
+       xmm_t input0, input1;
+       xmm_t indicies1, indicies2, indicies3, indicies4;
+
+       acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+               total_packets, categories, ctx->trans_table);
+
+       for (n = 0; n < MAX_SEARCHES_SSE8; n++) {
+               cmplt[n].count = 0;
+               index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+       }
+
+       /*
+        * indicies1 contains index_array[0,1]
+        * indicies2 contains index_array[2,3]
+        * indicies3 contains index_array[4,5]
+        * indicies4 contains index_array[6,7]
+        */
+
+       indicies1 = MM_LOADU((xmm_t *) &index_array[0]);
+       indicies2 = MM_LOADU((xmm_t *) &index_array[2]);
+
+       indicies3 = MM_LOADU((xmm_t *) &index_array[4]);
+       indicies4 = MM_LOADU((xmm_t *) &index_array[6]);
+
+        /* Check for any matches. */
+       acl_match_check_x4(0, ctx, parms, &flows,
+               &indicies1, &indicies2, mm_match_mask.m);
+       acl_match_check_x4(4, ctx, parms, &flows,
+               &indicies3, &indicies4, mm_match_mask.m);
+
+       while (flows.started > 0) {
+
+               /* Gather 4 bytes of input data for each stream. */
+               input0 = MM_INSERT32(mm_ones_16.m, GET_NEXT_4BYTES(parms, 0),
+                       0);
+               input1 = MM_INSERT32(mm_ones_16.m, GET_NEXT_4BYTES(parms, 4),
+                       0);
+
+               input0 = MM_INSERT32(input0, GET_NEXT_4BYTES(parms, 1), 1);
+               input1 = MM_INSERT32(input1, GET_NEXT_4BYTES(parms, 5), 1);
+
+               input0 = MM_INSERT32(input0, GET_NEXT_4BYTES(parms, 2), 2);
+               input1 = MM_INSERT32(input1, GET_NEXT_4BYTES(parms, 6), 2);
+
+               input0 = MM_INSERT32(input0, GET_NEXT_4BYTES(parms, 3), 3);
+               input1 = MM_INSERT32(input1, GET_NEXT_4BYTES(parms, 7), 3);
+
+                /* Process the 4 bytes of input on each stream. */
+
+               input0 = transition4(mm_index_mask.m, input0,
+                       mm_shuffle_input.m, mm_ones_16.m,
+                       mm_bytes.m, mm_type_quad_range.m,
+                       flows.trans, &indicies1, &indicies2);
+
+               input1 = transition4(mm_index_mask.m, input1,
+                       mm_shuffle_input.m, mm_ones_16.m,
+                       mm_bytes.m, mm_type_quad_range.m,
+                       flows.trans, &indicies3, &indicies4);
+
+               input0 = transition4(mm_index_mask.m, input0,
+                       mm_shuffle_input.m, mm_ones_16.m,
+                       mm_bytes.m, mm_type_quad_range.m,
+                       flows.trans, &indicies1, &indicies2);
+
+               input1 = transition4(mm_index_mask.m, input1,
+                       mm_shuffle_input.m, mm_ones_16.m,
+                       mm_bytes.m, mm_type_quad_range.m,
+                       flows.trans, &indicies3, &indicies4);
+
+               input0 = transition4(mm_index_mask.m, input0,
+                       mm_shuffle_input.m, mm_ones_16.m,
+                       mm_bytes.m, mm_type_quad_range.m,
+                       flows.trans, &indicies1, &indicies2);
+
+               input1 = transition4(mm_index_mask.m, input1,
+                       mm_shuffle_input.m, mm_ones_16.m,
+                       mm_bytes.m, mm_type_quad_range.m,
+                       flows.trans, &indicies3, &indicies4);
+
+               input0 = transition4(mm_index_mask.m, input0,
+                       mm_shuffle_input.m, mm_ones_16.m,
+                       mm_bytes.m, mm_type_quad_range.m,
+                       flows.trans, &indicies1, &indicies2);
+
+               input1 = transition4(mm_index_mask.m, input1,
+                       mm_shuffle_input.m, mm_ones_16.m,
+                       mm_bytes.m, mm_type_quad_range.m,
+                       flows.trans, &indicies3, &indicies4);
+
+                /* Check for any matches. */
+               acl_match_check_x4(0, ctx, parms, &flows,
+                       &indicies1, &indicies2, mm_match_mask.m);
+               acl_match_check_x4(4, ctx, parms, &flows,
+                       &indicies3, &indicies4, mm_match_mask.m);
+       }
+}
+
+/*
+ * Execute trie traversal with 4 traversals in parallel
+ */
+static inline void
+search_sse_4(const struct rte_acl_ctx *ctx, const uint8_t **data,
+        uint32_t *results, int total_packets, uint32_t categories)
+{
+       int n;
+       struct acl_flow_data flows;
+       uint64_t index_array[MAX_SEARCHES_SSE4];
+       struct completion cmplt[MAX_SEARCHES_SSE4];
+       struct parms parms[MAX_SEARCHES_SSE4];
+       xmm_t input, indicies1, indicies2;
+
+       acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+               total_packets, categories, ctx->trans_table);
+
+       for (n = 0; n < MAX_SEARCHES_SSE4; n++) {
+               cmplt[n].count = 0;
+               index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+       }
+
+       indicies1 = MM_LOADU((xmm_t *) &index_array[0]);
+       indicies2 = MM_LOADU((xmm_t *) &index_array[2]);
+
+       /* Check for any matches. */
+       acl_match_check_x4(0, ctx, parms, &flows,
+               &indicies1, &indicies2, mm_match_mask.m);
+
+       while (flows.started > 0) {
+
+               /* Gather 4 bytes of input data for each stream. */
+               input = MM_INSERT32(mm_ones_16.m, GET_NEXT_4BYTES(parms, 0), 0);
+               input = MM_INSERT32(input, GET_NEXT_4BYTES(parms, 1), 1);
+               input = MM_INSERT32(input, GET_NEXT_4BYTES(parms, 2), 2);
+               input = MM_INSERT32(input, GET_NEXT_4BYTES(parms, 3), 3);
+
+               /* Process the 4 bytes of input on each stream. */
+               input = transition4(mm_index_mask.m, input,
+                       mm_shuffle_input.m, mm_ones_16.m,
+                       mm_bytes.m, mm_type_quad_range.m,
+                       flows.trans, &indicies1, &indicies2);
+
+                input = transition4(mm_index_mask.m, input,
+                       mm_shuffle_input.m, mm_ones_16.m,
+                       mm_bytes.m, mm_type_quad_range.m,
+                       flows.trans, &indicies1, &indicies2);
+
+                input = transition4(mm_index_mask.m, input,
+                       mm_shuffle_input.m, mm_ones_16.m,
+                       mm_bytes.m, mm_type_quad_range.m,
+                       flows.trans, &indicies1, &indicies2);
+
+                input = transition4(mm_index_mask.m, input,
+                       mm_shuffle_input.m, mm_ones_16.m,
+                       mm_bytes.m, mm_type_quad_range.m,
+                       flows.trans, &indicies1, &indicies2);
+
+               /* Check for any matches. */
+               acl_match_check_x4(0, ctx, parms, &flows,
+                       &indicies1, &indicies2, mm_match_mask.m);
+       }
+}
+
+static inline xmm_t
+transition2(xmm_t index_mask, xmm_t next_input, xmm_t shuffle_input,
+       xmm_t ones_16, xmm_t bytes, xmm_t type_quad_range,
+       const uint64_t *trans, xmm_t *indicies1)
+{
+       uint64_t t;
+       xmm_t addr, indicies2;
+
+       indicies2 = MM_XOR(ones_16, ones_16);
+
+       addr = acl_calc_addr(index_mask, next_input, shuffle_input, ones_16,
+               bytes, type_quad_range, indicies1, &indicies2);
+
+       /* Gather 64 bit transitions and pack 2 per register. */
+
+       t = trans[MM_CVT32(addr)];
+
+       /* get slot 1 */
+       addr = MM_SHUFFLE32(addr, SHUFFLE32_SLOT1);
+       *indicies1 = MM_SET64(trans[MM_CVT32(addr)], t);
+
+       return MM_SRL32(next_input, 8);
+}
+
+/*
+ * Execute trie traversal with 2 traversals in parallel.
+ */
+static inline void
+search_sse_2(const struct rte_acl_ctx *ctx, const uint8_t **data,
+       uint32_t *results, uint32_t total_packets, uint32_t categories)
+{
+       int n;
+       struct acl_flow_data flows;
+       uint64_t index_array[MAX_SEARCHES_SSE2];
+       struct completion cmplt[MAX_SEARCHES_SSE2];
+       struct parms parms[MAX_SEARCHES_SSE2];
+       xmm_t input, indicies;
+
+       acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+               total_packets, categories, ctx->trans_table);
+
+       for (n = 0; n < MAX_SEARCHES_SSE2; n++) {
+               cmplt[n].count = 0;
+               index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+       }
+
+       indicies = MM_LOADU((xmm_t *) &index_array[0]);
+
+       /* Check for any matches. */
+       acl_match_check_x2(0, ctx, parms, &flows, &indicies, mm_match_mask64.m);
+
+       while (flows.started > 0) {
+
+               /* Gather 4 bytes of input data for each stream. */
+               input = MM_INSERT32(mm_ones_16.m, GET_NEXT_4BYTES(parms, 0), 0);
+               input = MM_INSERT32(input, GET_NEXT_4BYTES(parms, 1), 1);
+
+               /* Process the 4 bytes of input on each stream. */
+
+               input = transition2(mm_index_mask64.m, input,
+                       mm_shuffle_input64.m, mm_ones_16.m,
+                       mm_bytes64.m, mm_type_quad_range64.m,
+                       flows.trans, &indicies);
+
+               input = transition2(mm_index_mask64.m, input,
+                       mm_shuffle_input64.m, mm_ones_16.m,
+                       mm_bytes64.m, mm_type_quad_range64.m,
+                       flows.trans, &indicies);
+
+               input = transition2(mm_index_mask64.m, input,
+                       mm_shuffle_input64.m, mm_ones_16.m,
+                       mm_bytes64.m, mm_type_quad_range64.m,
+                       flows.trans, &indicies);
+
+               input = transition2(mm_index_mask64.m, input,
+                       mm_shuffle_input64.m, mm_ones_16.m,
+                       mm_bytes64.m, mm_type_quad_range64.m,
+                       flows.trans, &indicies);
+
+               /* Check for any matches. */
+               acl_match_check_x2(0, ctx, parms, &flows, &indicies,
+                       mm_match_mask64.m);
+       }
+}
+
+/*
+ * When processing the transition, rather than using if/else
+ * construct, the offset is calculated for DFA and QRANGE and
+ * then conditionally added to the address based on node type.
+ * This is done to avoid branch mis-predictions. Since the
+ * offset is rather simple calculation it is more efficient
+ * to do the calculation and do a condition move rather than
+ * a conditional branch to determine which calculation to do.
+ */
+static inline uint32_t
+scan_forward(uint32_t input, uint32_t max)
+{
+       return (input == 0) ? max : rte_bsf32(input);
+}
+
+static inline uint64_t
+scalar_transition(const uint64_t *trans_table, uint64_t transition,
+       uint8_t input)
+{
+       uint32_t addr, index, ranges, x, a, b, c;
+
+       /* break transition into component parts */
+       ranges = transition >> (sizeof(index) * CHAR_BIT);
+
+       /* calc address for a QRANGE node */
+       c = input * SCALAR_QRANGE_MULT;
+       a = ranges | SCALAR_QRANGE_MIN;
+       index = transition & ~RTE_ACL_NODE_INDEX;
+       a -= (c & SCALAR_QRANGE_MASK);
+       b = c & SCALAR_QRANGE_MIN;
+       addr = transition ^ index;
+       a &= SCALAR_QRANGE_MIN;
+       a ^= (ranges ^ b) & (a ^ b);
+       x = scan_forward(a, 32) >> 3;
+       addr += (index == RTE_ACL_NODE_DFA) ? input : x;
+
+       /* pickup next transition */
+       transition = *(trans_table + addr);
+       return transition;
+}
+
+int
+rte_acl_classify_scalar(const struct rte_acl_ctx *ctx, const uint8_t **data,
+       uint32_t *results, uint32_t num, uint32_t categories)
+{
+       int n;
+       uint64_t transition0, transition1;
+       uint32_t input0, input1;
+       struct acl_flow_data flows;
+       uint64_t index_array[MAX_SEARCHES_SCALAR];
+       struct completion cmplt[MAX_SEARCHES_SCALAR];
+       struct parms parms[MAX_SEARCHES_SCALAR];
+
+       if (categories != 1 &&
+               ((RTE_ACL_RESULTS_MULTIPLIER - 1) & categories) != 0)
+               return -EINVAL;
+
+       acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, num,
+               categories, ctx->trans_table);
+
+       for (n = 0; n < MAX_SEARCHES_SCALAR; n++) {
+               cmplt[n].count = 0;
+               index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+       }
+
+       transition0 = index_array[0];
+       transition1 = index_array[1];
+
+       while (flows.started > 0) {
+
+               input0 = GET_NEXT_4BYTES(parms, 0);
+               input1 = GET_NEXT_4BYTES(parms, 1);
+
+               for (n = 0; n < 4; n++) {
+                       if (likely((transition0 & RTE_ACL_NODE_MATCH) == 0))
+                               transition0 = scalar_transition(flows.trans,
+                                       transition0, (uint8_t)input0);
+
+                       input0 >>= CHAR_BIT;
+
+                       if (likely((transition1 & RTE_ACL_NODE_MATCH) == 0))
+                               transition1 = scalar_transition(flows.trans,
+                                       transition1, (uint8_t)input1);
+
+                       input1 >>= CHAR_BIT;
+
+               }
+               if ((transition0 | transition1) & RTE_ACL_NODE_MATCH) {
+                       transition0 = acl_match_check_transition(transition0,
+                               0, ctx, parms, &flows);
+                       transition1 = acl_match_check_transition(transition1,
+                               1, ctx, parms, &flows);
+
+               }
+       }
+       return 0;
+}
+
+int
+rte_acl_classify(const struct rte_acl_ctx *ctx, const uint8_t **data,
+       uint32_t *results, uint32_t num, uint32_t categories)
+{
+       if (categories != 1 &&
+               ((RTE_ACL_RESULTS_MULTIPLIER - 1) & categories) != 0)
+               return -EINVAL;
+
+       if (likely(num >= MAX_SEARCHES_SSE8))
+               search_sse_8(ctx, data, results, num, categories);
+       else if (num >= MAX_SEARCHES_SSE4)
+               search_sse_4(ctx, data, results, num, categories);
+       else
+               search_sse_2(ctx, data, results, num, categories);
+
+       return 0;
+}
diff --git a/lib/librte_acl/acl_vect.h b/lib/librte_acl/acl_vect.h
new file mode 100644 (file)
index 0000000..d813600
--- /dev/null
@@ -0,0 +1,132 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ACL_VECT_H_
+#define _RTE_ACL_VECT_H_
+
+/**
+ * @file
+ *
+ * RTE ACL SSE/AVX related header.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define        MM_ADD16(a, b)          _mm_add_epi16(a, b)
+#define        MM_ADD32(a, b)          _mm_add_epi32(a, b)
+#define        MM_ALIGNR8(a, b, c)     _mm_alignr_epi8(a, b, c)
+#define        MM_AND(a, b)            _mm_and_si128(a, b)
+#define MM_ANDNOT(a, b)                _mm_andnot_si128(a, b)
+#define MM_BLENDV8(a, b, c)    _mm_blendv_epi8(a, b, c)
+#define MM_CMPEQ16(a, b)       _mm_cmpeq_epi16(a, b)
+#define MM_CMPEQ32(a, b)       _mm_cmpeq_epi32(a, b)
+#define        MM_CMPEQ8(a, b)         _mm_cmpeq_epi8(a, b)
+#define MM_CMPGT32(a, b)       _mm_cmpgt_epi32(a, b)
+#define MM_CMPGT8(a, b)                _mm_cmpgt_epi8(a, b)
+#define MM_CVT(a)              _mm_cvtsi32_si128(a)
+#define        MM_CVT32(a)             _mm_cvtsi128_si32(a)
+#define MM_CVTU32(a)           _mm_cvtsi32_si128(a)
+#define        MM_INSERT16(a, c, b)    _mm_insert_epi16(a, c, b)
+#define        MM_INSERT32(a, c, b)    _mm_insert_epi32(a, c, b)
+#define        MM_LOAD(a)              _mm_load_si128(a)
+#define        MM_LOADH_PI(a, b)       _mm_loadh_pi(a, b)
+#define        MM_LOADU(a)             _mm_loadu_si128(a)
+#define        MM_MADD16(a, b)         _mm_madd_epi16(a, b)
+#define        MM_MADD8(a, b)          _mm_maddubs_epi16(a, b)
+#define        MM_MOVEMASK8(a)         _mm_movemask_epi8(a)
+#define MM_OR(a, b)            _mm_or_si128(a, b)
+#define        MM_SET1_16(a)           _mm_set1_epi16(a)
+#define        MM_SET1_32(a)           _mm_set1_epi32(a)
+#define        MM_SET1_64(a)           _mm_set1_epi64(a)
+#define        MM_SET1_8(a)            _mm_set1_epi8(a)
+#define        MM_SET32(a, b, c, d)    _mm_set_epi32(a, b, c, d)
+#define        MM_SHUFFLE32(a, b)      _mm_shuffle_epi32(a, b)
+#define        MM_SHUFFLE8(a, b)       _mm_shuffle_epi8(a, b)
+#define        MM_SHUFFLEPS(a, b, c)   _mm_shuffle_ps(a, b, c)
+#define        MM_SIGN8(a, b)          _mm_sign_epi8(a, b)
+#define        MM_SLL64(a, b)          _mm_sll_epi64(a, b)
+#define        MM_SRL128(a, b)         _mm_srli_si128(a, b)
+#define MM_SRL16(a, b)         _mm_srli_epi16(a, b)
+#define        MM_SRL32(a, b)          _mm_srli_epi32(a, b)
+#define        MM_STORE(a, b)          _mm_store_si128(a, b)
+#define        MM_STOREU(a, b)         _mm_storeu_si128(a, b)
+#define        MM_TESTZ(a, b)          _mm_testz_si128(a, b)
+#define        MM_XOR(a, b)            _mm_xor_si128(a, b)
+
+#define        MM_SET16(a, b, c, d, e, f, g, h)        \
+       _mm_set_epi16(a, b, c, d, e, f, g, h)
+
+#define        MM_SET8(c0, c1, c2, c3, c4, c5, c6, c7, \
+               c8, c9, cA, cB, cC, cD, cE, cF) \
+       _mm_set_epi8(c0, c1, c2, c3, c4, c5, c6, c7,    \
+               c8, c9, cA, cB, cC, cD, cE, cF)
+
+#ifdef RTE_ARCH_X86_64
+
+#define        MM_CVT64(a)             _mm_cvtsi128_si64(a)
+
+#else
+
+#define        MM_CVT64(a)     ({ \
+       rte_xmm_t m;       \
+       m.m = (a);         \
+       (m.u64[0]);        \
+})
+
+#endif /*RTE_ARCH_X86_64 */
+
+/*
+ * Prior to version 12.1 icc doesn't support _mm_set_epi64x.
+ */
+#if (defined(__ICC) && __ICC < 1210)
+
+#define        MM_SET64(a, b)  ({ \
+       rte_xmm_t m;       \
+       m.u64[0] = b;      \
+       m.u64[1] = a;      \
+       (m.m);             \
+})
+
+#else
+
+#define        MM_SET64(a, b)          _mm_set_epi64x(a, b)
+
+#endif /* (defined(__ICC) && __ICC < 1210) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ACL_VECT_H_ */
diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c
new file mode 100644 (file)
index 0000000..129a41f
--- /dev/null
@@ -0,0 +1,415 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_acl.h>
+#include "acl.h"
+
+#define        BIT_SIZEOF(x)   (sizeof(x) * CHAR_BIT)
+
+TAILQ_HEAD(rte_acl_list, rte_acl_ctx);
+
+struct rte_acl_ctx *
+rte_acl_find_existing(const char *name)
+{
+       struct rte_acl_ctx *ctx;
+       struct rte_acl_list *acl_list;
+
+       /* check that we have an initialised tail queue */
+       acl_list = RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_ACL, rte_acl_list);
+       if (acl_list == NULL) {
+               rte_errno = E_RTE_NO_TAILQ;
+               return NULL;
+       }
+
+       rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+       TAILQ_FOREACH(ctx, acl_list, next) {
+               if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
+                       break;
+       }
+       rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+       if (ctx == NULL)
+               rte_errno = ENOENT;
+       return ctx;
+}
+
+void
+rte_acl_free(struct rte_acl_ctx *ctx)
+{
+       if (ctx == NULL)
+               return;
+
+       RTE_EAL_TAILQ_REMOVE(RTE_TAILQ_ACL, rte_acl_list, ctx);
+
+       rte_free(ctx->mem);
+       rte_free(ctx);
+}
+
+struct rte_acl_ctx *
+rte_acl_create(const struct rte_acl_param *param)
+{
+       size_t sz;
+       struct rte_acl_ctx *ctx;
+       struct rte_acl_list *acl_list;
+       char name[sizeof(ctx->name)];
+
+       /* check that we have an initialised tail queue */
+       acl_list = RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_ACL, rte_acl_list);
+       if (acl_list == NULL) {
+               rte_errno = E_RTE_NO_TAILQ;
+               return NULL;
+       }
+
+       /* check that input parameters are valid. */
+       if (param == NULL || param->name == NULL) {
+               rte_errno = EINVAL;
+               return NULL;
+       }
+
+       rte_snprintf(name, sizeof(name), "ACL_%s", param->name);
+
+       /* calculate amount of memory required for pattern set. */
+       sz = sizeof(*ctx) + param->max_rule_num * param->rule_size;
+
+       /* get EAL TAILQ lock. */
+       rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+       /* if we already have one with that name */
+       TAILQ_FOREACH(ctx, acl_list, next) {
+               if (strncmp(param->name, ctx->name, sizeof(ctx->name)) == 0)
+                       break;
+       }
+
+       /* if ACL with such name doesn't exist, then create a new one. */
+       if (ctx == NULL && (ctx = rte_zmalloc_socket(name, sz, CACHE_LINE_SIZE,
+                       param->socket_id)) != NULL) {
+
+               /* init new allocated context. */
+               ctx->rules = ctx + 1;
+               ctx->max_rules = param->max_rule_num;
+               ctx->rule_sz = param->rule_size;
+               ctx->socket_id = param->socket_id;
+               rte_snprintf(ctx->name, sizeof(ctx->name), "%s", param->name);
+
+               TAILQ_INSERT_TAIL(acl_list, ctx, next);
+
+       } else if (ctx == NULL) {
+               RTE_LOG(ERR, ACL,
+                       "allocation of %zu bytes on socket %d for %s failed\n",
+                       sz, param->socket_id, name);
+       }
+
+       rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+       return ctx;
+}
+
+static int
+acl_add_rules(struct rte_acl_ctx *ctx, const void *rules, uint32_t num)
+{
+       uint8_t *pos;
+
+       if (num + ctx->num_rules > ctx->max_rules)
+               return -ENOMEM;
+
+       pos = ctx->rules;
+       pos += ctx->rule_sz * ctx->num_rules;
+       memcpy(pos, rules, num * ctx->rule_sz);
+       ctx->num_rules += num;
+
+       return 0;
+}
+
+static int
+acl_check_rule(const struct rte_acl_rule_data *rd)
+{
+       if ((rd->category_mask & LEN2MASK(RTE_ACL_MAX_CATEGORIES)) == 0 ||
+                       rd->priority > RTE_ACL_MAX_PRIORITY ||
+                       rd->priority < RTE_ACL_MIN_PRIORITY ||
+                       rd->userdata == RTE_ACL_INVALID_USERDATA)
+               return -EINVAL;
+       return 0;
+}
+
+int
+rte_acl_add_rules(struct rte_acl_ctx *ctx, const struct rte_acl_rule *rules,
+       uint32_t num)
+{
+       const struct rte_acl_rule *rv;
+       uint32_t i;
+       int32_t rc;
+
+       if (ctx == NULL || rules == NULL || 0 == ctx->rule_sz)
+               return -EINVAL;
+
+       for (i = 0; i != num; i++) {
+               rv = (const struct rte_acl_rule *)
+                       ((uintptr_t)rules + i * ctx->rule_sz);
+               rc = acl_check_rule(&rv->data);
+               if (rc != 0) {
+                       RTE_LOG(ERR, ACL, "%s(%s): rule #%u is invalid\n",
+                               __func__, ctx->name, i + 1);
+                       return rc;
+               }
+       }
+
+       return acl_add_rules(ctx, rules, num);
+}
+
+/*
+ * Reset all rules.
+ * Note that RT structures are not affected.
+ */
+void
+rte_acl_reset_rules(struct rte_acl_ctx *ctx)
+{
+       if (ctx != NULL)
+               ctx->num_rules = 0;
+}
+
+/*
+ * Reset all rules and destroys RT structures.
+ */
+void
+rte_acl_reset(struct rte_acl_ctx *ctx)
+{
+       if (ctx != NULL) {
+               rte_acl_reset_rules(ctx);
+               rte_acl_build(ctx, &ctx->config);
+       }
+}
+
+/*
+ * Dump ACL context to the stdout.
+ */
+void
+rte_acl_dump(const struct rte_acl_ctx *ctx)
+{
+       if (!ctx)
+               return;
+       printf("acl context <%s>@%p\n", ctx->name, ctx);
+       printf("  max_rules=%"PRIu32"\n", ctx->max_rules);
+       printf("  rule_size=%"PRIu32"\n", ctx->rule_sz);
+       printf("  num_rules=%"PRIu32"\n", ctx->num_rules);
+       printf("  num_categories=%"PRIu32"\n", ctx->num_categories);
+       printf("  num_tries=%"PRIu32"\n", ctx->num_tries);
+}
+
+/*
+ * Dump all ACL contexts to the stdout.
+ */
+void
+rte_acl_list_dump(void)
+{
+       struct rte_acl_ctx *ctx;
+       struct rte_acl_list *acl_list;
+
+       /* check that we have an initialised tail queue */
+       acl_list = RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_ACL, rte_acl_list);
+       if (acl_list == NULL) {
+               rte_errno = E_RTE_NO_TAILQ;
+               return;
+       }
+
+       rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+       TAILQ_FOREACH(ctx, acl_list, next) {
+               rte_acl_dump(ctx);
+       }
+       rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+}
+
+/*
+ * Support for legacy ipv4vlan rules.
+ */
+
+RTE_ACL_RULE_DEF(acl_ipv4vlan_rule, RTE_ACL_IPV4VLAN_NUM_FIELDS);
+
+static int
+acl_ipv4vlan_check_rule(const struct rte_acl_ipv4vlan_rule *rule)
+{
+       if (rule->src_port_low > rule->src_port_high ||
+                       rule->dst_port_low > rule->dst_port_high ||
+                       rule->src_mask_len > BIT_SIZEOF(rule->src_addr) ||
+                       rule->dst_mask_len > BIT_SIZEOF(rule->dst_addr))
+               return -EINVAL;
+
+       return acl_check_rule(&rule->data);
+}
+
+static void
+acl_ipv4vlan_convert_rule(const struct rte_acl_ipv4vlan_rule *ri,
+       struct acl_ipv4vlan_rule *ro)
+{
+       ro->data = ri->data;
+
+       ro->field[RTE_ACL_IPV4VLAN_PROTO_FIELD].value.u8 = ri->proto;
+       ro->field[RTE_ACL_IPV4VLAN_VLAN1_FIELD].value.u16 = ri->vlan;
+       ro->field[RTE_ACL_IPV4VLAN_VLAN2_FIELD].value.u16 = ri->domain;
+       ro->field[RTE_ACL_IPV4VLAN_SRC_FIELD].value.u32 = ri->src_addr;
+       ro->field[RTE_ACL_IPV4VLAN_DST_FIELD].value.u32 = ri->dst_addr;
+       ro->field[RTE_ACL_IPV4VLAN_SRCP_FIELD].value.u16 = ri->src_port_low;
+       ro->field[RTE_ACL_IPV4VLAN_DSTP_FIELD].value.u16 = ri->dst_port_low;
+
+       ro->field[RTE_ACL_IPV4VLAN_PROTO_FIELD].mask_range.u8 = ri->proto_mask;
+       ro->field[RTE_ACL_IPV4VLAN_VLAN1_FIELD].mask_range.u16 = ri->vlan_mask;
+       ro->field[RTE_ACL_IPV4VLAN_VLAN2_FIELD].mask_range.u16 =
+               ri->domain_mask;
+       ro->field[RTE_ACL_IPV4VLAN_SRC_FIELD].mask_range.u32 =
+               ri->src_mask_len;
+       ro->field[RTE_ACL_IPV4VLAN_DST_FIELD].mask_range.u32 = ri->dst_mask_len;
+       ro->field[RTE_ACL_IPV4VLAN_SRCP_FIELD].mask_range.u16 =
+               ri->src_port_high;
+       ro->field[RTE_ACL_IPV4VLAN_DSTP_FIELD].mask_range.u16 =
+               ri->dst_port_high;
+}
+
+int
+rte_acl_ipv4vlan_add_rules(struct rte_acl_ctx *ctx,
+       const struct rte_acl_ipv4vlan_rule *rules,
+       uint32_t num)
+{
+       int32_t rc;
+       uint32_t i;
+       struct acl_ipv4vlan_rule rv;
+
+       if (ctx == NULL || rules == NULL || ctx->rule_sz != sizeof(rv))
+               return -EINVAL;
+
+       /* check input rules. */
+       for (i = 0; i != num; i++) {
+               rc = acl_ipv4vlan_check_rule(rules + i);
+               if (rc != 0) {
+                       RTE_LOG(ERR, ACL, "%s(%s): rule #%u is invalid\n",
+                               __func__, ctx->name, i + 1);
+                       return rc;
+               }
+       }
+
+       if (num + ctx->num_rules > ctx->max_rules)
+               return -ENOMEM;
+
+       /* perform conversion to the internal format and add to the context. */
+       for (i = 0, rc = 0; i != num && rc == 0; i++) {
+               acl_ipv4vlan_convert_rule(rules + i, &rv);
+               rc = acl_add_rules(ctx, &rv, 1);
+       }
+
+       return rc;
+}
+
+static void
+acl_ipv4vlan_config(struct rte_acl_config *cfg,
+       const uint32_t layout[RTE_ACL_IPV4VLAN_NUM],
+       uint32_t num_categories)
+{
+       static const struct rte_acl_field_def
+               ipv4_defs[RTE_ACL_IPV4VLAN_NUM_FIELDS] = {
+               {
+                       .type = RTE_ACL_FIELD_TYPE_BITMASK,
+                       .size = sizeof(uint8_t),
+                       .field_index = RTE_ACL_IPV4VLAN_PROTO_FIELD,
+                       .input_index = RTE_ACL_IPV4VLAN_PROTO,
+               },
+               {
+                       .type = RTE_ACL_FIELD_TYPE_BITMASK,
+                       .size = sizeof(uint16_t),
+                       .field_index = RTE_ACL_IPV4VLAN_VLAN1_FIELD,
+                       .input_index = RTE_ACL_IPV4VLAN_VLAN,
+               },
+               {
+                       .type = RTE_ACL_FIELD_TYPE_BITMASK,
+                       .size = sizeof(uint16_t),
+                       .field_index = RTE_ACL_IPV4VLAN_VLAN2_FIELD,
+                       .input_index = RTE_ACL_IPV4VLAN_VLAN,
+               },
+               {
+                       .type = RTE_ACL_FIELD_TYPE_MASK,
+                       .size = sizeof(uint32_t),
+                       .field_index = RTE_ACL_IPV4VLAN_SRC_FIELD,
+                       .input_index = RTE_ACL_IPV4VLAN_SRC,
+               },
+               {
+                       .type = RTE_ACL_FIELD_TYPE_MASK,
+                       .size = sizeof(uint32_t),
+                       .field_index = RTE_ACL_IPV4VLAN_DST_FIELD,
+                       .input_index = RTE_ACL_IPV4VLAN_DST,
+               },
+               {
+                       .type = RTE_ACL_FIELD_TYPE_RANGE,
+                       .size = sizeof(uint16_t),
+                       .field_index = RTE_ACL_IPV4VLAN_SRCP_FIELD,
+                       .input_index = RTE_ACL_IPV4VLAN_PORTS,
+               },
+               {
+                       .type = RTE_ACL_FIELD_TYPE_RANGE,
+                       .size = sizeof(uint16_t),
+                       .field_index = RTE_ACL_IPV4VLAN_DSTP_FIELD,
+                       .input_index = RTE_ACL_IPV4VLAN_PORTS,
+               },
+       };
+
+       memcpy(&cfg->defs, ipv4_defs, sizeof(ipv4_defs));
+       cfg->num_fields = RTE_DIM(ipv4_defs);
+
+       cfg->defs[RTE_ACL_IPV4VLAN_PROTO_FIELD].offset =
+               layout[RTE_ACL_IPV4VLAN_PROTO];
+       cfg->defs[RTE_ACL_IPV4VLAN_VLAN1_FIELD].offset =
+               layout[RTE_ACL_IPV4VLAN_VLAN];
+       cfg->defs[RTE_ACL_IPV4VLAN_VLAN2_FIELD].offset =
+               layout[RTE_ACL_IPV4VLAN_VLAN] +
+               cfg->defs[RTE_ACL_IPV4VLAN_VLAN1_FIELD].size;
+       cfg->defs[RTE_ACL_IPV4VLAN_SRC_FIELD].offset =
+               layout[RTE_ACL_IPV4VLAN_SRC];
+       cfg->defs[RTE_ACL_IPV4VLAN_DST_FIELD].offset =
+               layout[RTE_ACL_IPV4VLAN_DST];
+       cfg->defs[RTE_ACL_IPV4VLAN_SRCP_FIELD].offset =
+               layout[RTE_ACL_IPV4VLAN_PORTS];
+       cfg->defs[RTE_ACL_IPV4VLAN_DSTP_FIELD].offset =
+               layout[RTE_ACL_IPV4VLAN_PORTS] +
+               cfg->defs[RTE_ACL_IPV4VLAN_SRCP_FIELD].size;
+
+       cfg->num_categories = num_categories;
+}
+
+int
+rte_acl_ipv4vlan_build(struct rte_acl_ctx *ctx,
+       const uint32_t layout[RTE_ACL_IPV4VLAN_NUM],
+       uint32_t num_categories)
+{
+       struct rte_acl_config cfg;
+
+       if (ctx == NULL || layout == NULL)
+               return -EINVAL;
+
+       acl_ipv4vlan_config(&cfg, layout, num_categories);
+       return rte_acl_build(ctx, &cfg);
+}
diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h
new file mode 100644 (file)
index 0000000..afc0f69
--- /dev/null
@@ -0,0 +1,453 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ACL_H_
+#define _RTE_ACL_H_
+
+/**
+ * @file
+ *
+ * RTE Classifier.
+ */
+
+#include <rte_acl_osdep.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define        RTE_ACL_MAX_CATEGORIES  16
+
+#define        RTE_ACL_RESULTS_MULTIPLIER      (XMM_SIZE / sizeof(uint32_t))
+
+#define RTE_ACL_MAX_LEVELS 64
+#define RTE_ACL_MAX_FIELDS 64
+
+union rte_acl_field_types {
+       uint8_t  u8;
+       uint16_t u16;
+       uint32_t u32;
+       uint64_t u64;
+};
+
+enum {
+       RTE_ACL_FIELD_TYPE_MASK = 0,
+       RTE_ACL_FIELD_TYPE_RANGE,
+       RTE_ACL_FIELD_TYPE_BITMASK
+};
+
+/**
+ * ACL Field defintion.
+ * Each field in the ACL rule has an associate definition.
+ * It defines the type of field, its size, its offset in the input buffer,
+ * the field index, and the input index.
+ * For performance reasons, the inner loop of the search function is unrolled
+ * to process four input bytes at a time. This requires the input to be grouped
+ * into sets of 4 consecutive bytes. The loop processes the first input byte as
+ * part of the setup and then subsequent bytes must be in groups of 4
+ * consecutive bytes.
+ */
+struct rte_acl_field_def {
+       uint8_t  type;        /**< type - RTE_ACL_FIELD_TYPE_*. */
+       uint8_t  size;        /**< size of field 1,2,4, or 8. */
+       uint8_t  field_index; /**< index of field inside the rule. */
+       uint8_t  input_index; /**< 0-N input index. */
+       uint32_t offset;      /**< offset to start of field. */
+};
+
+/**
+ * ACL build configuration.
+ * Defines the fields of an ACL trie and number of categories to build with.
+ */
+struct rte_acl_config {
+       uint32_t num_categories; /**< Number of categories to build with. */
+       uint32_t num_fields;     /**< Number of field definitions. */
+       struct rte_acl_field_def defs[RTE_ACL_MAX_FIELDS];
+       /**< array of field definitions. */
+};
+
+/**
+ * Defines the value of a field for a rule.
+ */
+struct rte_acl_field {
+       union rte_acl_field_types value;
+       /**< a 1,2,4, or 8 byte value of the field. */
+       union rte_acl_field_types mask_range;
+       /**<
+        * depending on field type:
+        * mask -> 1.2.3.4/32 value=0x1020304, mask_range=32,
+        * range -> 0 : 65535 value=0, mask_range=65535,
+        * bitmask -> 0x06/0xff value=6, mask_range=0xff.
+        */
+};
+
+enum {
+       RTE_ACL_TYPE_SHIFT = 29,
+       RTE_ACL_MAX_INDEX = LEN2MASK(RTE_ACL_TYPE_SHIFT),
+       RTE_ACL_MAX_PRIORITY = RTE_ACL_MAX_INDEX,
+       RTE_ACL_MIN_PRIORITY = 0,
+};
+
+#define        RTE_ACL_INVALID_USERDATA        0
+
+/**
+ * Miscellaneous data for ACL rule.
+ */
+struct rte_acl_rule_data {
+       uint32_t category_mask; /**< Mask of categories for that rule. */
+       int32_t  priority;      /**< Priority for that rule. */
+       uint32_t userdata;      /**< Associated with the rule user data. */
+};
+
+/**
+ * Defines single ACL rule.
+ * data - miscellaneous data for the rule.
+ * field[] - value and mask or range for each field.
+ */
+#define        RTE_ACL_RULE_DEF(name, fld_num) struct name {\
+       struct rte_acl_rule_data data;               \
+       struct rte_acl_field field[fld_num];         \
+}
+
+RTE_ACL_RULE_DEF(rte_acl_rule, 0);
+
+#define        RTE_ACL_RULE_SZ(fld_num)        \
+       (sizeof(struct rte_acl_rule) + sizeof(struct rte_acl_field) * (fld_num))
+
+
+/** Max number of characters in name.*/
+#define        RTE_ACL_NAMESIZE                32
+
+/**
+ * Parameters used when creating the ACL context.
+ */
+struct rte_acl_param {
+       const char *name;         /**< Name of the ACL context. */
+       int         socket_id;    /**< Socket ID to allocate memory for. */
+       uint32_t    rule_size;    /**< Size of each rule. */
+       uint32_t    max_rule_num; /**< Maximum number of rules. */
+};
+
+
+/**
+ * Create a new ACL context.
+ *
+ * @param param
+ *   Parameters used to create and initialise the ACL context.
+ * @return
+ *   Pointer to ACL context structure that is used in future ACL
+ *   operations, or NULL on error, with error code set in rte_errno.
+ *   Possible rte_errno errors include:
+ *   - E_RTE_NO_TAILQ - no tailq list could be got for the ACL context list
+ *   - EINVAL - invalid parameter passed to function
+ */
+struct rte_acl_ctx *
+rte_acl_create(const struct rte_acl_param *param);
+
+/**
+ * Find an existing ACL context object and return a pointer to it.
+ *
+ * @param name
+ *   Name of the ACL context as passed to rte_acl_create()
+ * @return
+ *   Pointer to ACL context or NULL if object not found
+ *   with rte_errno set appropriately. Possible rte_errno values include:
+ *    - ENOENT - value not available for return
+ */
+struct rte_acl_ctx *
+rte_acl_find_existing(const char *name);
+
+/**
+ * De-allocate all memory used by ACL context.
+ *
+ * @param ctx
+ *   ACL context to free
+ */
+void
+rte_acl_free(struct rte_acl_ctx *ctx);
+
+/**
+ * Add rules to an existing ACL context.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ *   ACL context to add patterns to.
+ * @param rules
+ *   Array of rules to add to the ACL context.
+ *   Note that all fields in rte_acl_rule structures are expected
+ *   to be in host byte order.
+ *   Each rule expected to be in the same format and not exceed size
+ *   specified at ACL context creation time.
+ * @param num
+ *   Number of elements in the input array of rules.
+ * @return
+ *   - -ENOMEM if there is no space in the ACL context for these rules.
+ *   - -EINVAL if the parameters are invalid.
+ *   - Zero if operation completed successfully.
+ */
+int
+rte_acl_add_rules(struct rte_acl_ctx *ctx, const struct rte_acl_rule *rules,
+       uint32_t num);
+
+/**
+ * Delete all rules from the ACL context.
+ * This function is not multi-thread safe.
+ * Note that internal run-time structures are not affected.
+ *
+ * @param ctx
+ *   ACL context to delete rules from.
+ */
+void
+rte_acl_reset_rules(struct rte_acl_ctx *ctx);
+
+/**
+ * Analyze set of rules and build required internal run-time structures.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ *   ACL context to build.
+ * @param cfg
+ *   Pointer to struct rte_acl_config - defines build parameters.
+ * @return
+ *   - -ENOMEM if couldn't allocate enough memory.
+ *   - -EINVAL if the parameters are invalid.
+ *   - Negative error code if operation failed.
+ *   - Zero if operation completed successfully.
+ */
+int
+rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg);
+
+/**
+ * Delete all rules from the ACL context and
+ * destroy all internal run-time structures.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ *   ACL context to reset.
+ */
+void
+rte_acl_reset(struct rte_acl_ctx *ctx);
+
+/**
+ * Search for a matching ACL rule for each input data buffer.
+ * Each input data buffer can have up to *categories* matches.
+ * That implies that results array should be big enough to hold
+ * (categories * num) elements.
+ * Also categories parameter should be either one or multiple of
+ * RTE_ACL_RESULTS_MULTIPLIER and can't be bigger than RTE_ACL_MAX_CATEGORIES.
+ * If more than one rule is applicable for given input buffer and
+ * given category, then rule with highest priority will be returned as a match.
+ * Note, that it is a caller responsibility to ensure that input parameters
+ * are valid and point to correct memory locations.
+ *
+ * @param ctx
+ *   ACL context to search with.
+ * @param data
+ *   Array of pointers to input data buffers to perform search.
+ *   Note that all fields in input data buffers supposed to be in network
+ *   byte order (MSB).
+ * @param results
+ *   Array of search results, *categories* results per each input data buffer.
+ * @param num
+ *   Number of elements in the input data buffers array.
+ * @param categories
+ *   Number of maximum possible matches for each input buffer, one possible
+ *   match per category.
+ * @return
+ *   zero on successful completion.
+ *   -EINVAL for incorrect arguments.
+ */
+int
+rte_acl_classify(const struct rte_acl_ctx *ctx, const uint8_t **data,
+       uint32_t *results, uint32_t num, uint32_t categories);
+
+/**
+ * Perform scalar search for a matching ACL rule for each input data buffer.
+ * Note, that while the search itself will avoid explicit use of SSE/AVX
+ * intrinsics, code for comparing matching results/priorities sill might use
+ * vector intrinsics (for  categories > 1).
+ * Each input data buffer can have up to *categories* matches.
+ * That implies that results array should be big enough to hold
+ * (categories * num) elements.
+ * Also categories parameter should be either one or multiple of
+ * RTE_ACL_RESULTS_MULTIPLIER and can't be bigger than RTE_ACL_MAX_CATEGORIES.
+ * If more than one rule is applicable for given input buffer and
+ * given category, then rule with highest priority will be returned as a match.
+ * Note, that it is a caller's responsibility to ensure that input parameters
+ * are valid and point to correct memory locations.
+ *
+ * @param ctx
+ *   ACL context to search with.
+ * @param data
+ *   Array of pointers to input data buffers to perform search.
+ *   Note that all fields in input data buffers supposed to be in network
+ *   byte order (MSB).
+ * @param results
+ *   Array of search results, *categories* results per each input data buffer.
+ * @param num
+ *   Number of elements in the input data buffers array.
+ * @param categories
+ *   Number of maximum possible matches for each input buffer, one possible
+ *   match per category.
+ * @return
+ *   zero on successful completion.
+ *   -EINVAL for incorrect arguments.
+ */
+int
+rte_acl_classify_scalar(const struct rte_acl_ctx *ctx, const uint8_t **data,
+       uint32_t *results, uint32_t num, uint32_t categories);
+
+/**
+ * Dump an ACL context structure to the console.
+ *
+ * @param ctx
+ *   ACL context to dump.
+ */
+void
+rte_acl_dump(const struct rte_acl_ctx *ctx);
+
+/**
+ * Dump all ACL context structures to the console.
+ */
+void
+rte_acl_list_dump(void);
+
+/**
+ * Legacy support for 7-tuple IPv4 and VLAN rule.
+ * This structure and corresponding API is deprecated.
+ */
+struct rte_acl_ipv4vlan_rule {
+       struct rte_acl_rule_data data; /**< Miscellaneous data for the rule. */
+       uint8_t proto;                 /**< IPv4 protocol ID. */
+       uint8_t proto_mask;            /**< IPv4 protocol ID mask. */
+       uint16_t vlan;                 /**< VLAN ID. */
+       uint16_t vlan_mask;            /**< VLAN ID mask. */
+       uint16_t domain;               /**< VLAN domain. */
+       uint16_t domain_mask;          /**< VLAN domain mask. */
+       uint32_t src_addr;             /**< IPv4 source address. */
+       uint32_t src_mask_len;         /**< IPv4 source address mask. */
+       uint32_t dst_addr;             /**< IPv4 destination address. */
+       uint32_t dst_mask_len;         /**< IPv4 destination address mask. */
+       uint16_t src_port_low;         /**< L4 source port low. */
+       uint16_t src_port_high;        /**< L4 source port high. */
+       uint16_t dst_port_low;         /**< L4 destination port low. */
+       uint16_t dst_port_high;        /**< L4 destination port high. */
+};
+
+/**
+ * Specifies fields layout inside rte_acl_rule for rte_acl_ipv4vlan_rule.
+ */
+enum {
+       RTE_ACL_IPV4VLAN_PROTO_FIELD,
+       RTE_ACL_IPV4VLAN_VLAN1_FIELD,
+       RTE_ACL_IPV4VLAN_VLAN2_FIELD,
+       RTE_ACL_IPV4VLAN_SRC_FIELD,
+       RTE_ACL_IPV4VLAN_DST_FIELD,
+       RTE_ACL_IPV4VLAN_SRCP_FIELD,
+       RTE_ACL_IPV4VLAN_DSTP_FIELD,
+       RTE_ACL_IPV4VLAN_NUM_FIELDS
+};
+
+/**
+ * Macro to define rule size for rte_acl_ipv4vlan_rule.
+ */
+#define        RTE_ACL_IPV4VLAN_RULE_SZ        \
+       RTE_ACL_RULE_SZ(RTE_ACL_IPV4VLAN_NUM_FIELDS)
+
+/*
+ * That effectively defines order of IPV4VLAN classifications:
+ *  - PROTO
+ *  - VLAN (TAG and DOMAIN)
+ *  - SRC IP ADDRESS
+ *  - DST IP ADDRESS
+ *  - PORTS (SRC and DST)
+ */
+enum {
+       RTE_ACL_IPV4VLAN_PROTO,
+       RTE_ACL_IPV4VLAN_VLAN,
+       RTE_ACL_IPV4VLAN_SRC,
+       RTE_ACL_IPV4VLAN_DST,
+       RTE_ACL_IPV4VLAN_PORTS,
+       RTE_ACL_IPV4VLAN_NUM
+};
+
+/**
+ * Add ipv4vlan rules to an existing ACL context.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ *   ACL context to add patterns to.
+ * @param rules
+ *   Array of rules to add to the ACL context.
+ *   Note that all fields in rte_acl_ipv4vlan_rule structures are expected
+ *   to be in host byte order.
+ * @param num
+ *   Number of elements in the input array of rules.
+ * @return
+ *   - -ENOMEM if there is no space in the ACL context for these rules.
+ *   - -EINVAL if the parameters are invalid.
+ *   - Zero if operation completed successfully.
+ */
+int
+rte_acl_ipv4vlan_add_rules(struct rte_acl_ctx *ctx,
+       const struct rte_acl_ipv4vlan_rule *rules,
+       uint32_t num);
+
+/**
+ * Analyze set of ipv4vlan rules and build required internal
+ * run-time structures.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ *   ACL context to build.
+ * @param layout
+ *   Layout of input data to search through.
+ * @param num_categories
+ *   Maximum number of categories to use in that build.
+ * @return
+ *   - -ENOMEM if couldn't allocate enough memory.
+ *   - -EINVAL if the parameters are invalid.
+ *   - Negative error code if operation failed.
+ *   - Zero if operation completed successfully.
+ */
+int
+rte_acl_ipv4vlan_build(struct rte_acl_ctx *ctx,
+       const uint32_t layout[RTE_ACL_IPV4VLAN_NUM],
+       uint32_t num_categories);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ACL_H_ */
diff --git a/lib/librte_acl/rte_acl_osdep.h b/lib/librte_acl/rte_acl_osdep.h
new file mode 100644 (file)
index 0000000..046b22d
--- /dev/null
@@ -0,0 +1,92 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ACL_OSDEP_H_
+#define _RTE_ACL_OSDEP_H_
+
+/**
+ * @file
+ *
+ * RTE ACL DPDK/OS dependent file.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <sys/queue.h>
+
+/*
+ * Common defines.
+ */
+
+#define        LEN2MASK(ln)    ((uint32_t)(((uint64_t)1 << (ln)) - 1))
+
+#define DIM(x) RTE_DIM(x)
+
+/*
+ * To build ACL standalone.
+ */
+#ifdef RTE_LIBRTE_ACL_STANDALONE
+#include <rte_acl_osdep_alone.h>
+#else
+
+#include <rte_common.h>
+#include <rte_common_vect.h>
+#include <rte_memory.h>
+#include <rte_log.h>
+#include <rte_memcpy.h>
+#include <rte_prefetch.h>
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_cpuflags.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+
+#endif /* RTE_LIBRTE_ACL_STANDALONE */
+
+#endif /* _RTE_ACL_OSDEP_H_ */
diff --git a/lib/librte_acl/rte_acl_osdep_alone.h b/lib/librte_acl/rte_acl_osdep_alone.h
new file mode 100644 (file)
index 0000000..a46d71e
--- /dev/null
@@ -0,0 +1,278 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ACL_OSDEP_ALONE_H_
+#define _RTE_ACL_OSDEP_ALONE_H_
+
+/**
+ * @file
+ *
+ * RTE ACL OS dependent file.
+ * An example how to build/use ACL library standalone
+ * (without rest of DPDK).
+ * Don't include that file on it's own, use <rte_acl_osdep.h>.
+ */
+
+#if (defined(__ICC) || (__GNUC__ == 4 &&  __GNUC_MINOR__ < 4))
+
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+#if defined(__SSE4_2__) || defined(__SSE4_1__)
+#include <smmintrin.h>
+#endif
+
+#else
+
+#include <x86intrin.h>
+
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define        DUMMY_MACRO     do {} while (0)
+
+/*
+ * rte_common related.
+ */
+#define        __rte_unused    __attribute__((__unused__))
+
+#define RTE_PTR_ADD(ptr, x)    ((typeof(ptr))((uintptr_t)(ptr) + (x)))
+
+#define        RTE_PTR_ALIGN_FLOOR(ptr, align) \
+       (typeof(ptr))((uintptr_t)(ptr) & ~((uintptr_t)(align) - 1))
+
+#define        RTE_PTR_ALIGN_CEIL(ptr, align) \
+       RTE_PTR_ALIGN_FLOOR(RTE_PTR_ADD(ptr, (align) - 1), align)
+
+#define        RTE_PTR_ALIGN(ptr, align)       RTE_PTR_ALIGN_CEIL(ptr, align)
+
+#define        RTE_ALIGN_FLOOR(val, align) \
+       (typeof(val))((val) & (~((typeof(val))((align) - 1))))
+
+#define        RTE_ALIGN_CEIL(val, align) \
+       RTE_ALIGN_FLOOR(((val) + ((typeof(val))(align) - 1)), align)
+
+#define        RTE_ALIGN(ptr, align)   RTE_ALIGN_CEIL(ptr, align)
+
+#define        RTE_MIN(a, b)   ({ \
+               typeof(a) _a = (a); \
+               typeof(b) _b = (b); \
+               _a < _b ? _a : _b;   \
+       })
+
+#define        RTE_DIM(a)              (sizeof(a) / sizeof((a)[0]))
+
+/**
+ * Searches the input parameter for the least significant set bit
+ * (starting from zero).
+ * If a least significant 1 bit is found, its bit index is returned.
+ * If the content of the input paramer is zero, then the content of the return
+ * value is undefined.
+ * @param v
+ *     input parameter, should not be zero.
+ * @return
+ *     least significant set bit in the input parameter.
+ */
+static inline uint32_t
+rte_bsf32(uint32_t v)
+{
+       asm("bsf %1,%0"
+               : "=r" (v)
+               : "rm" (v));
+       return v;
+}
+
+/*
+ * rte_common_vect related.
+ */
+typedef __m128i xmm_t;
+
+#define        XMM_SIZE        (sizeof(xmm_t))
+#define        XMM_MASK        (XMM_SIZE - 1)
+
+typedef union rte_mmsse {
+       xmm_t    m;
+       uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
+       uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
+       uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
+       uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
+       double   pd[XMM_SIZE / sizeof(double)];
+} rte_xmm_t;
+
+/*
+ * rte_cycles related.
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+       union {
+               uint64_t tsc_64;
+               struct {
+                       uint32_t lo_32;
+                       uint32_t hi_32;
+               };
+       } tsc;
+
+       asm volatile("rdtsc" :
+               "=a" (tsc.lo_32),
+               "=d" (tsc.hi_32));
+       return tsc.tsc_64;
+}
+
+/*
+ * rte_lcore related.
+ */
+#define rte_lcore_id() (0)
+
+/*
+ * rte_errno related.
+ */
+#define        rte_errno       errno
+#define        E_RTE_NO_TAILQ  (-1)
+
+/*
+ * rte_rwlock related.
+ */
+#define        rte_rwlock_read_lock(x)         DUMMY_MACRO
+#define        rte_rwlock_read_unlock(x)       DUMMY_MACRO
+#define        rte_rwlock_write_lock(x)        DUMMY_MACRO
+#define        rte_rwlock_write_unlock(x)      DUMMY_MACRO
+
+/*
+ * rte_memory related.
+ */
+#define        SOCKET_ID_ANY   -1                  /**< Any NUMA socket. */
+#define        CACHE_LINE_SIZE 64                  /**< Cache line size. */
+#define        CACHE_LINE_MASK (CACHE_LINE_SIZE-1) /**< Cache line mask. */
+
+/**
+ * Force alignment to cache line.
+ */
+#define        __rte_cache_aligned     __attribute__((__aligned__(CACHE_LINE_SIZE)))
+
+
+/*
+ * rte_byteorder related.
+ */
+#define        rte_le_to_cpu_16(x)     (x)
+#define        rte_le_to_cpu_32(x)     (x)
+
+#define rte_cpu_to_be_16(x)    \
+       (((x) & UINT8_MAX) << CHAR_BIT | ((x) >> CHAR_BIT & UINT8_MAX))
+#define rte_cpu_to_be_32(x)    __builtin_bswap32(x)
+
+/*
+ * rte_branch_prediction related.
+ */
+#ifndef        likely
+#define        likely(x)       __builtin_expect((x), 1)
+#endif /* likely */
+
+#ifndef        unlikely
+#define        unlikely(x)     __builtin_expect((x), 0)
+#endif /* unlikely */
+
+
+/*
+ * rte_tailq related.
+ */
+static inline void *
+rte_dummy_tailq(void)
+{
+       static __thread TAILQ_HEAD(rte_dummy_head, rte_dummy) dummy_head;
+       TAILQ_INIT(&dummy_head);
+       return &dummy_head;
+}
+
+#define        RTE_TAILQ_LOOKUP_BY_IDX(idx, struct_name)       rte_dummy_tailq()
+
+#define RTE_EAL_TAILQ_REMOVE(idx, type, elm)   DUMMY_MACRO
+
+/*
+ * rte_string related
+ */
+#define        rte_snprintf(str, len, frmt, args...)   snprintf(str, len, frmt, ##args)
+
+/*
+ * rte_log related
+ */
+#define RTE_LOG(l, t, fmt, args...)    printf(fmt, ##args)
+
+/*
+ * rte_malloc related
+ */
+#define        rte_free(x)     free(x)
+
+static inline void *
+rte_zmalloc_socket(__rte_unused const char *type, size_t size, unsigned align,
+       __rte_unused int socket)
+{
+       void *ptr;
+       int rc;
+
+       rc = posix_memalign(&ptr, align, size);
+       if (rc != 0) {
+               rte_errno = rc;
+               return NULL;
+       }
+
+       memset(ptr, 0, size);
+       return ptr;
+}
+
+/*
+ * rte_debug related
+ */
+#define        rte_panic(fmt, args...) do {         \
+       RTE_LOG(CRIT, EAL, fmt, ##args);     \
+       abort();                             \
+} while (0)
+
+#define        rte_exit(err, fmt, args...)     do { \
+       RTE_LOG(CRIT, EAL, fmt, ##args);     \
+       exit(err);                           \
+} while (0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ACL_OSDEP_ALONE_H_ */
diff --git a/lib/librte_acl/tb_mem.c b/lib/librte_acl/tb_mem.c
new file mode 100644 (file)
index 0000000..b303d15
--- /dev/null
@@ -0,0 +1,104 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "tb_mem.h"
+
+/*
+ *  Memory managment routines for temporary memory.
+ *  That memory is used only during build phase and is released after
+ *  build is finished.
+ */
+
+static struct tb_mem_block *
+tb_pool(struct tb_mem_pool *pool, size_t sz)
+{
+       struct tb_mem_block *block;
+       uint8_t *ptr;
+       size_t size;
+
+       size = sz + pool->alignment - 1;
+       block = calloc(1, size + sizeof(*pool->block));
+       if (block == NULL) {
+               RTE_LOG(ERR, MALLOC, "%s(%zu)\n failed, currently allocated "
+                       "by pool: %zu bytes\n", __func__, sz, pool->alloc);
+               return NULL;
+       }
+
+       block->pool = pool;
+
+       block->next = pool->block;
+       pool->block = block;
+
+       pool->alloc += size;
+
+       ptr = (uint8_t *)(block + 1);
+       block->mem = RTE_PTR_ALIGN_CEIL(ptr, pool->alignment);
+       block->size = size - (block->mem - ptr);
+
+       return block;
+}
+
+void *
+tb_alloc(struct tb_mem_pool *pool, size_t size)
+{
+       struct tb_mem_block *block;
+       void *ptr;
+       size_t new_sz;
+
+       size = RTE_ALIGN_CEIL(size, pool->alignment);
+
+       block = pool->block;
+       if (block == NULL || block->size < size) {
+               new_sz = (size > pool->min_alloc) ? size : pool->min_alloc;
+               block = tb_pool(pool, new_sz);
+               if (block == NULL)
+                       return NULL;
+       }
+       ptr = block->mem;
+       block->size -= size;
+       block->mem += size;
+       return ptr;
+}
+
+void
+tb_free_pool(struct tb_mem_pool *pool)
+{
+       struct tb_mem_block *next, *block;
+
+       for (block = pool->block; block != NULL; block = next) {
+               next = block->next;
+               free(block);
+       }
+       pool->block = NULL;
+       pool->alloc = 0;
+}
diff --git a/lib/librte_acl/tb_mem.h b/lib/librte_acl/tb_mem.h
new file mode 100644 (file)
index 0000000..a3ed795
--- /dev/null
@@ -0,0 +1,73 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TB_MEM_H_
+#define _TB_MEM_H_
+
+/**
+ * @file
+ *
+ * RTE ACL temporary (build phase) memory managment.
+ * Contains structures and functions to manage temporary (used by build only)
+ * memory. Memory allocated in large blocks to speed 'free' when trie is
+ * destructed (finish of build phase).
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_acl_osdep.h>
+
+struct tb_mem_block {
+       struct tb_mem_block *next;
+       struct tb_mem_pool  *pool;
+       size_t               size;
+       uint8_t             *mem;
+};
+
+struct tb_mem_pool {
+       struct tb_mem_block *block;
+       size_t               alignment;
+       size_t               min_alloc;
+       size_t               alloc;
+};
+
+void *tb_alloc(struct tb_mem_pool *pool, size_t size);
+void tb_free_pool(struct tb_mem_pool *pool);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TB_MEM_H_ */