4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #define QRANGE_MIN ((uint8_t)INT8_MIN)
40 #define RTE_ACL_VERIFY(exp) do { \
42 rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \
45 struct acl_node_counters {
55 struct rte_acl_indices {
63 acl_gen_log_stats(const struct rte_acl_ctx *ctx,
64 const struct acl_node_counters *counts)
66 RTE_LOG(DEBUG, ACL, "Gen phase for ACL \"%s\":\n"
67 "runtime memory footprint on socket %d:\n"
68 "single nodes/bytes used: %d/%zu\n"
69 "quad nodes/bytes used: %d/%zu\n"
70 "DFA nodes/bytes used: %d/%zu\n"
71 "match nodes/bytes used: %d/%zu\n"
73 ctx->name, ctx->socket_id,
74 counts->single, counts->single * sizeof(uint64_t),
75 counts->quad, counts->quad_vectors * sizeof(uint64_t),
76 counts->dfa, counts->dfa * RTE_ACL_DFA_SIZE * sizeof(uint64_t),
78 counts->match * sizeof(struct rte_acl_match_results),
83 * Counts the number of groups of sequential bits that are
84 * either 0 or 1, as specified by the zero_one parameter. This is used to
85 * calculate the number of ranges in a node to see if it fits in a quad range
89 acl_count_sequential_groups(struct rte_acl_bitset *bits, int zero_one)
91 int n, ranges, last_bit;
94 last_bit = zero_one ^ 1;
96 for (n = QRANGE_MIN; n < UINT8_MAX + 1; n++) {
97 if (bits->bits[n / (sizeof(bits_t) * 8)] &
98 (1 << (n % (sizeof(bits_t) * 8)))) {
99 if (zero_one == 1 && last_bit != 1)
103 if (zero_one == 0 && last_bit != 0)
108 for (n = 0; n < QRANGE_MIN; n++) {
109 if (bits->bits[n / (sizeof(bits_t) * 8)] &
110 (1 << (n % (sizeof(bits_t) * 8)))) {
111 if (zero_one == 1 && last_bit != 1)
115 if (zero_one == 0 && last_bit != 0)
125 * Count number of ranges spanned by the node's pointers
128 acl_count_fanout(struct rte_acl_node *node)
133 if (node->fanout != 0)
136 ranges = acl_count_sequential_groups(&node->values, 0);
138 for (n = 0; n < node->num_ptrs; n++) {
139 if (node->ptrs[n].ptr != NULL)
140 ranges += acl_count_sequential_groups(
141 &node->ptrs[n].values, 1);
144 node->fanout = ranges;
149 * Determine the type of nodes and count each type
152 acl_count_trie_types(struct acl_node_counters *counts,
153 struct rte_acl_node *node, int match, int force_dfa)
158 /* skip if this node has been counted */
159 if (node->node_type != (uint32_t)RTE_ACL_NODE_UNDEFINED)
162 if (node->match_flag != 0 || node->num_ptrs == 0) {
164 if (node->match_flag == -1)
165 node->match_flag = match++;
166 node->node_type = RTE_ACL_NODE_MATCH;
167 if (counts->smallest_match > node->match_flag)
168 counts->smallest_match = node->match_flag;
172 num_ptrs = acl_count_fanout(node);
174 /* Force type to dfa */
176 num_ptrs = RTE_ACL_DFA_SIZE;
178 /* determine node type based on number of ranges */
181 node->node_type = RTE_ACL_NODE_SINGLE;
182 } else if (num_ptrs <= RTE_ACL_QUAD_MAX) {
184 counts->quad_vectors += node->fanout;
185 node->node_type = RTE_ACL_NODE_QRANGE;
188 node->node_type = RTE_ACL_NODE_DFA;
192 * recursively count the types of all children
194 for (n = 0; n < node->num_ptrs; n++) {
195 if (node->ptrs[n].ptr != NULL)
196 match = acl_count_trie_types(counts, node->ptrs[n].ptr,
204 acl_add_ptrs(struct rte_acl_node *node, uint64_t *node_array, uint64_t no_match,
208 int m, ranges, last_bit;
209 struct rte_acl_node *child;
210 struct rte_acl_bitset *bits;
211 uint64_t *node_a, index, dfa[RTE_ACL_DFA_SIZE];
216 for (n = 0; n < RTE_DIM(dfa); n++)
219 for (x = 0; x < node->num_ptrs; x++) {
221 child = node->ptrs[x].ptr;
225 bits = &node->ptrs[x].values;
226 for (n = 0; n < RTE_DIM(dfa); n++) {
228 if (bits->bits[n / (sizeof(bits_t) * CHAR_BIT)] &
229 (1 << (n % (sizeof(bits_t) * CHAR_BIT)))) {
231 dfa[n] = resolved ? child->node_index : x;
232 ranges += (last_bit == 0);
241 * Rather than going from 0 to 256, the range count and
242 * the layout are from 80-ff then 0-7f due to signed compare
245 if (node->node_type == RTE_ACL_NODE_QRANGE) {
249 index = dfa[QRANGE_MIN];
252 for (x = QRANGE_MIN + 1; x < UINT8_MAX + 1; x++) {
253 if (dfa[x] != index) {
256 node->transitions[m++] = (uint8_t)(x - 1);
260 for (x = 0; x < INT8_MAX + 1; x++) {
261 if (dfa[x] != index) {
264 node->transitions[m++] = (uint8_t)(x - 1);
268 /* fill unused locations with max value - nothing is greater */
269 for (; m < RTE_ACL_QUAD_SIZE; m++)
270 node->transitions[m] = INT8_MAX;
272 RTE_ACL_VERIFY(m <= RTE_ACL_QUAD_SIZE);
274 } else if (node->node_type == RTE_ACL_NODE_DFA && resolved) {
275 for (n = 0; n < RTE_DIM(dfa); n++)
276 node_array[n] = dfa[n];
281 * Routine that allocates space for this node and recursively calls
282 * to allocate space for each child. Once all the children are allocated,
283 * then resolve all transitions for this node.
286 acl_gen_node(struct rte_acl_node *node, uint64_t *node_array,
287 uint64_t no_match, struct rte_acl_indices *index, int num_categories)
291 struct rte_acl_match_results *match;
293 if (node->node_index != RTE_ACL_NODE_UNDEFINED)
298 switch (node->node_type) {
299 case RTE_ACL_NODE_DFA:
300 node->node_index = index->dfa_index | node->node_type;
301 array_ptr = &node_array[index->dfa_index];
302 index->dfa_index += RTE_ACL_DFA_SIZE;
303 for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
304 array_ptr[n] = no_match;
306 case RTE_ACL_NODE_SINGLE:
307 node->node_index = RTE_ACL_QUAD_SINGLE | index->single_index |
309 array_ptr = &node_array[index->single_index];
310 index->single_index += 1;
311 array_ptr[0] = no_match;
313 case RTE_ACL_NODE_QRANGE:
314 array_ptr = &node_array[index->quad_index];
315 acl_add_ptrs(node, array_ptr, no_match, 0);
316 qtrp = (uint32_t *)node->transitions;
317 node->node_index = qtrp[0];
318 node->node_index <<= sizeof(index->quad_index) * CHAR_BIT;
319 node->node_index |= index->quad_index | node->node_type;
320 index->quad_index += node->fanout;
322 case RTE_ACL_NODE_MATCH:
323 match = ((struct rte_acl_match_results *)
324 (node_array + index->match_index));
325 memcpy(match + node->match_flag, node->mrt, sizeof(*node->mrt));
326 node->node_index = node->match_flag | node->node_type;
328 case RTE_ACL_NODE_UNDEFINED:
329 RTE_ACL_VERIFY(node->node_type !=
330 (uint32_t)RTE_ACL_NODE_UNDEFINED);
334 /* recursively allocate space for all children */
335 for (n = 0; n < node->num_ptrs; n++) {
336 if (node->ptrs[n].ptr != NULL)
337 acl_gen_node(node->ptrs[n].ptr,
344 /* All children are resolved, resolve this node's pointers */
345 switch (node->node_type) {
346 case RTE_ACL_NODE_DFA:
347 acl_add_ptrs(node, array_ptr, no_match, 1);
349 case RTE_ACL_NODE_SINGLE:
350 for (n = 0; n < node->num_ptrs; n++) {
351 if (node->ptrs[n].ptr != NULL)
352 array_ptr[0] = node->ptrs[n].ptr->node_index;
355 case RTE_ACL_NODE_QRANGE:
356 acl_add_ptrs(node, array_ptr, no_match, 1);
358 case RTE_ACL_NODE_MATCH:
360 case RTE_ACL_NODE_UNDEFINED:
361 RTE_ACL_VERIFY(node->node_type !=
362 (uint32_t)RTE_ACL_NODE_UNDEFINED);
368 acl_calc_counts_indicies(struct acl_node_counters *counts,
369 struct rte_acl_indices *indices, struct rte_acl_trie *trie,
370 struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
375 memset(indices, 0, sizeof(*indices));
376 memset(counts, 0, sizeof(*counts));
378 /* Get stats on nodes */
379 for (n = 0; n < num_tries; n++) {
380 counts->smallest_match = INT32_MAX;
381 match_num = acl_count_trie_types(counts, node_bld_trie[n].trie,
383 trie[n].smallest = counts->smallest_match;
386 indices->dfa_index = RTE_ACL_DFA_SIZE + 1;
387 indices->quad_index = indices->dfa_index +
388 counts->dfa * RTE_ACL_DFA_SIZE;
389 indices->single_index = indices->quad_index + counts->quad_vectors;
390 indices->match_index = indices->single_index + counts->single + 1;
391 indices->match_index = RTE_ALIGN(indices->match_index,
392 (XMM_SIZE / sizeof(uint64_t)));
398 * Generate the runtime structure using build structure
401 rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
402 struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
403 uint32_t num_categories, uint32_t data_index_sz, int match_num)
407 uint64_t *node_array, no_match;
408 uint32_t n, match_index;
409 struct rte_acl_match_results *match;
410 struct acl_node_counters counts;
411 struct rte_acl_indices indices;
413 /* Fill counts and indicies arrays from the nodes. */
414 match_num = acl_calc_counts_indicies(&counts, &indices, trie,
415 node_bld_trie, num_tries, match_num);
417 /* Allocate runtime memory (align to cache boundary) */
418 total_size = RTE_ALIGN(data_index_sz, CACHE_LINE_SIZE) +
419 indices.match_index * sizeof(uint64_t) +
420 (match_num + 2) * sizeof(struct rte_acl_match_results) +
423 mem = rte_zmalloc_socket(ctx->name, total_size, CACHE_LINE_SIZE,
427 "allocation of %zu bytes on socket %d for %s failed\n",
428 total_size, ctx->socket_id, ctx->name);
432 /* Fill the runtime structure */
433 match_index = indices.match_index;
434 node_array = (uint64_t *)((uintptr_t)mem +
435 RTE_ALIGN(data_index_sz, CACHE_LINE_SIZE));
438 * Setup the NOMATCH node (a SINGLE at the
439 * highest index, that points to itself)
442 node_array[RTE_ACL_DFA_SIZE] = RTE_ACL_DFA_SIZE | RTE_ACL_NODE_SINGLE;
443 no_match = RTE_ACL_NODE_MATCH;
445 for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
446 node_array[n] = no_match;
448 match = ((struct rte_acl_match_results *)(node_array + match_index));
449 memset(match, 0, sizeof(*match));
451 for (n = 0; n < num_tries; n++) {
453 acl_gen_node(node_bld_trie[n].trie, node_array, no_match,
454 &indices, num_categories);
456 if (node_bld_trie[n].trie->node_index == no_match)
457 trie[n].root_index = 0;
459 trie[n].root_index = node_bld_trie[n].trie->node_index;
463 ctx->mem_sz = total_size;
464 ctx->data_indexes = mem;
465 ctx->num_tries = num_tries;
466 ctx->num_categories = num_categories;
467 ctx->match_index = match_index;
468 ctx->no_match = no_match;
469 ctx->idle = node_array[RTE_ACL_DFA_SIZE];
470 ctx->trans_table = node_array;
471 memcpy(ctx->trie, trie, sizeof(ctx->trie));
473 acl_gen_log_stats(ctx, &counts);