4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #define QRANGE_MIN ((uint8_t)INT8_MIN)
40 #define RTE_ACL_VERIFY(exp) do { \
42 rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \
45 struct acl_node_counters {
53 int32_t smallest_match;
56 struct rte_acl_indices {
64 acl_gen_log_stats(const struct rte_acl_ctx *ctx,
65 const struct acl_node_counters *counts,
66 const struct rte_acl_indices *indices)
68 RTE_LOG(DEBUG, ACL, "Gen phase for ACL \"%s\":\n"
69 "runtime memory footprint on socket %d:\n"
70 "single nodes/bytes used: %d/%zu\n"
71 "quad nodes/vectors/bytes used: %d/%d/%zu\n"
72 "DFA nodes/group64/bytes used: %d/%d/%zu\n"
73 "match nodes/bytes used: %d/%zu\n"
75 ctx->name, ctx->socket_id,
76 counts->single, counts->single * sizeof(uint64_t),
77 counts->quad, counts->quad_vectors,
78 (indices->quad_index - indices->dfa_index) * sizeof(uint64_t),
79 counts->dfa, counts->dfa_gr64,
80 indices->dfa_index * sizeof(uint64_t),
82 counts->match * sizeof(struct rte_acl_match_results),
87 acl_dfa_gen_idx(const struct rte_acl_node *node, uint32_t index)
93 for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) {
94 RTE_ACL_VERIFY(node->dfa_gr64[i] < RTE_ACL_DFA_GR64_NUM);
95 RTE_ACL_VERIFY(node->dfa_gr64[i] < node->fanout);
96 idx |= (i - node->dfa_gr64[i]) <<
97 (6 + RTE_ACL_DFA_GR64_BIT * i);
100 return idx << (CHAR_BIT * sizeof(index)) | index | node->node_type;
104 acl_dfa_fill_gr64(const struct rte_acl_node *node,
105 const uint64_t src[RTE_ACL_DFA_SIZE], uint64_t dst[RTE_ACL_DFA_SIZE])
109 for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) {
110 memcpy(dst + node->dfa_gr64[i] * RTE_ACL_DFA_GR64_SIZE,
111 src + i * RTE_ACL_DFA_GR64_SIZE,
112 RTE_ACL_DFA_GR64_SIZE * sizeof(dst[0]));
117 acl_dfa_count_gr64(const uint64_t array_ptr[RTE_ACL_DFA_SIZE],
118 uint8_t gr64[RTE_ACL_DFA_GR64_NUM])
123 for (i = 0; i != RTE_ACL_DFA_GR64_NUM; i++) {
125 for (j = 0; j != i; j++) {
126 if (memcmp(array_ptr + i * RTE_ACL_DFA_GR64_SIZE,
127 array_ptr + j * RTE_ACL_DFA_GR64_SIZE,
128 RTE_ACL_DFA_GR64_SIZE *
129 sizeof(array_ptr[0])) == 0)
132 gr64[i] = (j != i) ? gr64[j] : k++;
139 acl_node_fill_dfa(const struct rte_acl_node *node,
140 uint64_t dfa[RTE_ACL_DFA_SIZE], uint64_t no_match, int32_t resolved)
143 uint32_t ranges, last_bit;
144 struct rte_acl_node *child;
145 struct rte_acl_bitset *bits;
150 for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
153 for (x = 0; x < node->num_ptrs; x++) {
155 child = node->ptrs[x].ptr;
159 bits = &node->ptrs[x].values;
160 for (n = 0; n < RTE_ACL_DFA_SIZE; n++) {
162 if (bits->bits[n / (sizeof(bits_t) * CHAR_BIT)] &
163 (1 << (n % (sizeof(bits_t) * CHAR_BIT)))) {
165 dfa[n] = resolved ? child->node_index : x;
166 ranges += (last_bit == 0);
178 * Counts the number of groups of sequential bits that are
179 * either 0 or 1, as specified by the zero_one parameter. This is used to
180 * calculate the number of ranges in a node to see if it fits in a quad range
184 acl_count_sequential_groups(struct rte_acl_bitset *bits, int zero_one)
186 int n, ranges, last_bit;
189 last_bit = zero_one ^ 1;
191 for (n = QRANGE_MIN; n < UINT8_MAX + 1; n++) {
192 if (bits->bits[n / (sizeof(bits_t) * 8)] &
193 (1 << (n % (sizeof(bits_t) * 8)))) {
194 if (zero_one == 1 && last_bit != 1)
198 if (zero_one == 0 && last_bit != 0)
203 for (n = 0; n < QRANGE_MIN; n++) {
204 if (bits->bits[n / (sizeof(bits_t) * 8)] &
205 (1 << (n % (sizeof(bits_t) * 8)))) {
206 if (zero_one == 1 && last_bit != 1)
210 if (zero_one == 0 && last_bit != 0)
220 * Count number of ranges spanned by the node's pointers
223 acl_count_fanout(struct rte_acl_node *node)
228 if (node->fanout != 0)
231 ranges = acl_count_sequential_groups(&node->values, 0);
233 for (n = 0; n < node->num_ptrs; n++) {
234 if (node->ptrs[n].ptr != NULL)
235 ranges += acl_count_sequential_groups(
236 &node->ptrs[n].values, 1);
239 node->fanout = ranges;
244 * Determine the type of nodes and count each type
247 acl_count_trie_types(struct acl_node_counters *counts,
248 struct rte_acl_node *node, uint64_t no_match, int match, int force_dfa)
252 uint64_t dfa[RTE_ACL_DFA_SIZE];
254 /* skip if this node has been counted */
255 if (node->node_type != (uint32_t)RTE_ACL_NODE_UNDEFINED)
258 if (node->match_flag != 0 || node->num_ptrs == 0) {
260 if (node->match_flag == -1)
261 node->match_flag = match++;
262 node->node_type = RTE_ACL_NODE_MATCH;
263 if (counts->smallest_match > node->match_flag)
264 counts->smallest_match = node->match_flag;
268 num_ptrs = acl_count_fanout(node);
270 /* Force type to dfa */
272 num_ptrs = RTE_ACL_DFA_SIZE;
274 /* determine node type based on number of ranges */
277 node->node_type = RTE_ACL_NODE_SINGLE;
278 } else if (num_ptrs <= RTE_ACL_QUAD_MAX) {
280 counts->quad_vectors += node->fanout;
281 node->node_type = RTE_ACL_NODE_QRANGE;
284 node->node_type = RTE_ACL_NODE_DFA;
285 if (force_dfa != 0) {
286 /* always expand to a max number of nodes. */
287 for (n = 0; n != RTE_DIM(node->dfa_gr64); n++)
288 node->dfa_gr64[n] = n;
291 acl_node_fill_dfa(node, dfa, no_match, 0);
292 node->fanout = acl_dfa_count_gr64(dfa, node->dfa_gr64);
294 counts->dfa_gr64 += node->fanout;
298 * recursively count the types of all children
300 for (n = 0; n < node->num_ptrs; n++) {
301 if (node->ptrs[n].ptr != NULL)
302 match = acl_count_trie_types(counts, node->ptrs[n].ptr,
310 acl_add_ptrs(struct rte_acl_node *node, uint64_t *node_array, uint64_t no_match,
315 uint64_t *node_a, index, dfa[RTE_ACL_DFA_SIZE];
317 acl_node_fill_dfa(node, dfa, no_match, resolved);
320 * Rather than going from 0 to 256, the range count and
321 * the layout are from 80-ff then 0-7f due to signed compare
324 if (node->node_type == RTE_ACL_NODE_QRANGE) {
328 index = dfa[QRANGE_MIN];
331 for (x = QRANGE_MIN + 1; x < UINT8_MAX + 1; x++) {
332 if (dfa[x] != index) {
335 node->transitions[m++] = (uint8_t)(x - 1);
339 for (x = 0; x < INT8_MAX + 1; x++) {
340 if (dfa[x] != index) {
343 node->transitions[m++] = (uint8_t)(x - 1);
347 /* fill unused locations with max value - nothing is greater */
348 for (; m < RTE_ACL_QUAD_SIZE; m++)
349 node->transitions[m] = INT8_MAX;
351 RTE_ACL_VERIFY(m <= RTE_ACL_QUAD_SIZE);
353 } else if (node->node_type == RTE_ACL_NODE_DFA && resolved) {
354 acl_dfa_fill_gr64(node, dfa, node_array);
359 * Routine that allocates space for this node and recursively calls
360 * to allocate space for each child. Once all the children are allocated,
361 * then resolve all transitions for this node.
364 acl_gen_node(struct rte_acl_node *node, uint64_t *node_array,
365 uint64_t no_match, struct rte_acl_indices *index, int num_categories)
367 uint32_t n, sz, *qtrp;
369 struct rte_acl_match_results *match;
371 if (node->node_index != RTE_ACL_NODE_UNDEFINED)
376 switch (node->node_type) {
377 case RTE_ACL_NODE_DFA:
378 array_ptr = &node_array[index->dfa_index];
379 node->node_index = acl_dfa_gen_idx(node, index->dfa_index);
380 sz = node->fanout * RTE_ACL_DFA_GR64_SIZE;
381 index->dfa_index += sz;
382 for (n = 0; n < sz; n++)
383 array_ptr[n] = no_match;
385 case RTE_ACL_NODE_SINGLE:
386 node->node_index = RTE_ACL_QUAD_SINGLE | index->single_index |
388 array_ptr = &node_array[index->single_index];
389 index->single_index += 1;
390 array_ptr[0] = no_match;
392 case RTE_ACL_NODE_QRANGE:
393 array_ptr = &node_array[index->quad_index];
394 acl_add_ptrs(node, array_ptr, no_match, 0);
395 qtrp = (uint32_t *)node->transitions;
396 node->node_index = qtrp[0];
397 node->node_index <<= sizeof(index->quad_index) * CHAR_BIT;
398 node->node_index |= index->quad_index | node->node_type;
399 index->quad_index += node->fanout;
401 case RTE_ACL_NODE_MATCH:
402 match = ((struct rte_acl_match_results *)
403 (node_array + index->match_index));
404 memcpy(match + node->match_flag, node->mrt, sizeof(*node->mrt));
405 node->node_index = node->match_flag | node->node_type;
407 case RTE_ACL_NODE_UNDEFINED:
408 RTE_ACL_VERIFY(node->node_type !=
409 (uint32_t)RTE_ACL_NODE_UNDEFINED);
413 /* recursively allocate space for all children */
414 for (n = 0; n < node->num_ptrs; n++) {
415 if (node->ptrs[n].ptr != NULL)
416 acl_gen_node(node->ptrs[n].ptr,
423 /* All children are resolved, resolve this node's pointers */
424 switch (node->node_type) {
425 case RTE_ACL_NODE_DFA:
426 acl_add_ptrs(node, array_ptr, no_match, 1);
428 case RTE_ACL_NODE_SINGLE:
429 for (n = 0; n < node->num_ptrs; n++) {
430 if (node->ptrs[n].ptr != NULL)
431 array_ptr[0] = node->ptrs[n].ptr->node_index;
434 case RTE_ACL_NODE_QRANGE:
435 acl_add_ptrs(node, array_ptr, no_match, 1);
437 case RTE_ACL_NODE_MATCH:
439 case RTE_ACL_NODE_UNDEFINED:
440 RTE_ACL_VERIFY(node->node_type !=
441 (uint32_t)RTE_ACL_NODE_UNDEFINED);
447 acl_calc_counts_indices(struct acl_node_counters *counts,
448 struct rte_acl_indices *indices, struct rte_acl_trie *trie,
449 struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
450 int match_num, uint64_t no_match)
454 memset(indices, 0, sizeof(*indices));
455 memset(counts, 0, sizeof(*counts));
457 /* Get stats on nodes */
458 for (n = 0; n < num_tries; n++) {
459 counts->smallest_match = INT32_MAX;
460 match_num = acl_count_trie_types(counts, node_bld_trie[n].trie,
461 no_match, match_num, 1);
462 trie[n].smallest = counts->smallest_match;
465 indices->dfa_index = RTE_ACL_DFA_SIZE + 1;
466 indices->quad_index = indices->dfa_index +
467 counts->dfa_gr64 * RTE_ACL_DFA_GR64_SIZE;
468 indices->single_index = indices->quad_index + counts->quad_vectors;
469 indices->match_index = indices->single_index + counts->single + 1;
470 indices->match_index = RTE_ALIGN(indices->match_index,
471 (XMM_SIZE / sizeof(uint64_t)));
477 * Generate the runtime structure using build structure
480 rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
481 struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
482 uint32_t num_categories, uint32_t data_index_sz, int match_num)
486 uint64_t *node_array, no_match;
487 uint32_t n, match_index;
488 struct rte_acl_match_results *match;
489 struct acl_node_counters counts;
490 struct rte_acl_indices indices;
492 no_match = RTE_ACL_NODE_MATCH;
494 /* Fill counts and indices arrays from the nodes. */
495 match_num = acl_calc_counts_indices(&counts, &indices, trie,
496 node_bld_trie, num_tries, match_num, no_match);
498 /* Allocate runtime memory (align to cache boundary) */
499 total_size = RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE) +
500 indices.match_index * sizeof(uint64_t) +
501 (match_num + 2) * sizeof(struct rte_acl_match_results) +
504 mem = rte_zmalloc_socket(ctx->name, total_size, RTE_CACHE_LINE_SIZE,
508 "allocation of %zu bytes on socket %d for %s failed\n",
509 total_size, ctx->socket_id, ctx->name);
513 /* Fill the runtime structure */
514 match_index = indices.match_index;
515 node_array = (uint64_t *)((uintptr_t)mem +
516 RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE));
519 * Setup the NOMATCH node (a SINGLE at the
520 * highest index, that points to itself)
523 node_array[RTE_ACL_DFA_SIZE] = RTE_ACL_DFA_SIZE | RTE_ACL_NODE_SINGLE;
525 for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
526 node_array[n] = no_match;
528 /* NOMATCH result at index 0 */
529 match = ((struct rte_acl_match_results *)(node_array + match_index));
530 memset(match, 0, sizeof(*match));
532 for (n = 0; n < num_tries; n++) {
534 acl_gen_node(node_bld_trie[n].trie, node_array, no_match,
535 &indices, num_categories);
537 if (node_bld_trie[n].trie->node_index == no_match)
538 trie[n].root_index = 0;
540 trie[n].root_index = node_bld_trie[n].trie->node_index;
544 ctx->mem_sz = total_size;
545 ctx->data_indexes = mem;
546 ctx->num_tries = num_tries;
547 ctx->num_categories = num_categories;
548 ctx->match_index = match_index;
549 ctx->no_match = no_match;
550 ctx->idle = node_array[RTE_ACL_DFA_SIZE];
551 ctx->trans_table = node_array;
552 memcpy(ctx->trie, trie, sizeof(ctx->trie));
554 acl_gen_log_stats(ctx, &counts, &indices);