1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020 Intel Corporation
5 #include "acl_run_sse.h"
7 /*sizeof(uint32_t) << ACL_MATCH_LOG == sizeof(struct rte_acl_match_results)*/
8 #define ACL_MATCH_LOG 5
10 struct acl_flow_avx512 {
11 uint32_t num_packets; /* number of packets processed */
12 uint32_t total_packets; /* max number of packets to process */
13 uint32_t root_index; /* current root index */
14 uint32_t first_load_sz; /* first load size for new packet */
15 const uint64_t *trans; /* transition table */
16 const uint32_t *data_index; /* input data indexes */
17 const uint8_t **idata; /* input data */
18 uint32_t *matches; /* match indexes */
22 acl_set_flow_avx512(struct acl_flow_avx512 *flow, const struct rte_acl_ctx *ctx,
23 uint32_t trie, const uint8_t *data[], uint32_t *matches,
24 uint32_t total_packets)
26 flow->num_packets = 0;
27 flow->total_packets = total_packets;
28 flow->first_load_sz = ctx->first_load_sz;
29 flow->root_index = ctx->trie[trie].root_index;
30 flow->trans = ctx->trans_table;
31 flow->data_index = ctx->trie[trie].data_index;
33 flow->matches = matches;
37 * Update flow and result masks based on the number of unprocessed flows.
39 static inline uint32_t
40 update_flow_mask(const struct acl_flow_avx512 *flow, uint32_t *fmsk,
43 uint32_t i, j, k, m, n;
48 k = __builtin_popcount(m);
49 n = flow->total_packets - flow->num_packets;
53 for (i = k - n; i != 0; i--) {
54 j = sizeof(m) * CHAR_BIT - 1 - __builtin_clz(m);
67 * Resolve matches for multiple categories (LE 8, use 128b instuctions/regs)
70 resolve_mcle8_avx512x1(uint32_t result[],
71 const struct rte_acl_match_results pr[], const uint32_t match[],
72 uint32_t nb_pkt, uint32_t nb_cat, uint32_t nb_trie)
75 const uint32_t *pm, *res;
76 uint32_t i, j, k, mi, mn;
83 for (k = 0; k != nb_pkt; k++, result += nb_cat) {
85 mi = match[k] << ACL_MATCH_LOG;
87 for (j = 0; j != nb_cat; j += RTE_ACL_RESULTS_MULTIPLIER) {
89 cr = _mm_loadu_si128((const xmm_t *)(res + mi + j));
90 cp = _mm_loadu_si128((const xmm_t *)(pri + mi + j));
92 for (i = 1, pm = match + nb_pkt; i != nb_trie;
95 mn = j + (pm[k] << ACL_MATCH_LOG);
97 nr = _mm_loadu_si128((const xmm_t *)(res + mn));
98 np = _mm_loadu_si128((const xmm_t *)(pri + mn));
100 msk = _mm_cmpgt_epi32_mask(cp, np);
101 cr = _mm_mask_mov_epi32(nr, msk, cr);
102 cp = _mm_mask_mov_epi32(np, msk, cp);
105 _mm_storeu_si128((xmm_t *)(result + j), cr);
110 #include "acl_run_avx512x8.h"
113 rte_acl_classify_avx512x16(const struct rte_acl_ctx *ctx, const uint8_t **data,
114 uint32_t *results, uint32_t num, uint32_t categories)
116 const uint32_t max_iter = MAX_SEARCHES_AVX16 * MAX_SEARCHES_AVX16;
118 /* split huge lookup (gt 256) into series of fixed size ones */
119 while (num > max_iter) {
120 search_avx512x8x2(ctx, data, results, max_iter, categories);
122 results += max_iter * categories;
126 /* select classify method based on number of remaining requests */
127 if (num >= MAX_SEARCHES_AVX16)
128 return search_avx512x8x2(ctx, data, results, num, categories);
129 if (num >= MAX_SEARCHES_SSE8)
130 return search_sse_8(ctx, data, results, num, categories);
131 if (num >= MAX_SEARCHES_SSE4)
132 return search_sse_4(ctx, data, results, num, categories);
134 return rte_acl_classify_scalar(ctx, data, results, num, categories);
137 #include "acl_run_avx512x16.h"
140 rte_acl_classify_avx512x32(const struct rte_acl_ctx *ctx, const uint8_t **data,
141 uint32_t *results, uint32_t num, uint32_t categories)
143 const uint32_t max_iter = MAX_SEARCHES_AVX16 * MAX_SEARCHES_AVX16;
145 /* split huge lookup (gt 256) into series of fixed size ones */
146 while (num > max_iter) {
147 search_avx512x16x2(ctx, data, results, max_iter, categories);
149 results += max_iter * categories;
153 /* select classify method based on number of remaining requests */
154 if (num >= 2 * MAX_SEARCHES_AVX16)
155 return search_avx512x16x2(ctx, data, results, num, categories);
156 if (num >= MAX_SEARCHES_AVX16)
157 return search_avx512x8x2(ctx, data, results, num, categories);
158 if (num >= MAX_SEARCHES_SSE8)
159 return search_sse_8(ctx, data, results, num, categories);
160 if (num >= MAX_SEARCHES_SSE4)
161 return search_sse_4(ctx, data, results, num, categories);
163 return rte_acl_classify_scalar(ctx, data, results, num, categories);