X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_acl%2Facl_run_avx512.c;h=f5bc628b7c49453b4fab1e7921928d014cf51a06;hb=b64c2295f7fcfae9273c1501819826e59449e185;hp=1817f88b29f738a81622198792e77625cec7d30f;hpb=7c6cca6b604d59fe1ab5eb4639507c4dfdbc4592;p=dpdk.git diff --git a/lib/librte_acl/acl_run_avx512.c b/lib/librte_acl/acl_run_avx512.c index 1817f88b29..f5bc628b7c 100644 --- a/lib/librte_acl/acl_run_avx512.c +++ b/lib/librte_acl/acl_run_avx512.c @@ -4,10 +4,126 @@ #include "acl_run_sse.h" +/*sizeof(uint32_t) << match_log == sizeof(struct rte_acl_match_results)*/ +static const uint32_t match_log = 5; + +struct acl_flow_avx512 { + uint32_t num_packets; /* number of packets processed */ + uint32_t total_packets; /* max number of packets to process */ + uint32_t root_index; /* current root index */ + const uint64_t *trans; /* transition table */ + const uint32_t *data_index; /* input data indexes */ + const uint8_t **idata; /* input data */ + uint32_t *matches; /* match indexes */ +}; + +static inline void +acl_set_flow_avx512(struct acl_flow_avx512 *flow, const struct rte_acl_ctx *ctx, + uint32_t trie, const uint8_t *data[], uint32_t *matches, + uint32_t total_packets) +{ + flow->num_packets = 0; + flow->total_packets = total_packets; + flow->root_index = ctx->trie[trie].root_index; + flow->trans = ctx->trans_table; + flow->data_index = ctx->trie[trie].data_index; + flow->idata = data; + flow->matches = matches; +} + +/* + * Update flow and result masks based on the number of unprocessed flows. + */ +static inline uint32_t +update_flow_mask(const struct acl_flow_avx512 *flow, uint32_t *fmsk, + uint32_t *rmsk) +{ + uint32_t i, j, k, m, n; + + fmsk[0] ^= rmsk[0]; + m = rmsk[0]; + + k = __builtin_popcount(m); + n = flow->total_packets - flow->num_packets; + + if (n < k) { + /* reduce mask */ + for (i = k - n; i != 0; i--) { + j = sizeof(m) * CHAR_BIT - 1 - __builtin_clz(m); + m ^= 1 << j; + } + } else + n = k; + + rmsk[0] = m; + fmsk[0] |= rmsk[0]; + + return n; +} + +/* + * Resolve matches for multiple categories (LE 8, use 128b instuctions/regs) + */ +static inline void +resolve_mcle8_avx512x1(uint32_t result[], + const struct rte_acl_match_results pr[], const uint32_t match[], + uint32_t nb_pkt, uint32_t nb_cat, uint32_t nb_trie) +{ + const int32_t *pri; + const uint32_t *pm, *res; + uint32_t i, j, k, mi, mn; + __mmask8 msk; + xmm_t cp, cr, np, nr; + + res = pr->results; + pri = pr->priority; + + for (k = 0; k != nb_pkt; k++, result += nb_cat) { + + mi = match[k] << match_log; + + for (j = 0; j != nb_cat; j += RTE_ACL_RESULTS_MULTIPLIER) { + + cr = _mm_loadu_si128((const xmm_t *)(res + mi + j)); + cp = _mm_loadu_si128((const xmm_t *)(pri + mi + j)); + + for (i = 1, pm = match + nb_pkt; i != nb_trie; + i++, pm += nb_pkt) { + + mn = j + (pm[k] << match_log); + + nr = _mm_loadu_si128((const xmm_t *)(res + mn)); + np = _mm_loadu_si128((const xmm_t *)(pri + mn)); + + msk = _mm_cmpgt_epi32_mask(cp, np); + cr = _mm_mask_mov_epi32(nr, msk, cr); + cp = _mm_mask_mov_epi32(np, msk, cp); + } + + _mm_storeu_si128((xmm_t *)(result + j), cr); + } + } +} + +#include "acl_run_avx512x8.h" + int rte_acl_classify_avx512x16(const struct rte_acl_ctx *ctx, const uint8_t **data, uint32_t *results, uint32_t num, uint32_t categories) { + const uint32_t max_iter = MAX_SEARCHES_AVX16 * MAX_SEARCHES_AVX16; + + /* split huge lookup (gt 256) into series of fixed size ones */ + while (num > max_iter) { + search_avx512x8x2(ctx, data, results, max_iter, categories); + data += max_iter; + results += max_iter * categories; + num -= max_iter; + } + + /* select classify method based on number of remaining requests */ + if (num >= MAX_SEARCHES_AVX16) + return search_avx512x8x2(ctx, data, results, num, categories); if (num >= MAX_SEARCHES_SSE8) return search_sse_8(ctx, data, results, num, categories); if (num >= MAX_SEARCHES_SSE4) @@ -20,6 +136,19 @@ int rte_acl_classify_avx512x32(const struct rte_acl_ctx *ctx, const uint8_t **data, uint32_t *results, uint32_t num, uint32_t categories) { + const uint32_t max_iter = MAX_SEARCHES_AVX16 * MAX_SEARCHES_AVX16; + + /* split huge lookup (gt 256) into series of fixed size ones */ + while (num > max_iter) { + search_avx512x8x2(ctx, data, results, max_iter, categories); + data += max_iter; + results += max_iter * categories; + num -= max_iter; + } + + /* select classify method based on number of remaining requests */ + if (num >= MAX_SEARCHES_AVX16) + return search_avx512x8x2(ctx, data, results, num, categories); if (num >= MAX_SEARCHES_SSE8) return search_sse_8(ctx, data, results, num, categories); if (num >= MAX_SEARCHES_SSE4)