int32_t socket_id;
/** Socket ID to allocate memory from. */
enum rte_acl_classify_alg alg;
+ uint32_t first_load_sz;
void *rules;
uint32_t max_rules;
uint32_t rule_sz;
return 0;
}
+/*
+ * With current ACL implementation first field in the rule definition
+ * has always to be one byte long. Though for optimising *classify*
+ * implementation it might be useful to be able to use 4B reads
+ * (as we do for rest of the fields).
+ * This function checks input config to determine is it safe to do 4B
+ * loads for first ACL field. For that we need to make sure that
+ * first field in our rule definition doesn't have the biggest offset,
+ * i.e. we still do have other fields located after the first one.
+ * Contrary if first field has the largest offset, then it means
+ * first field can occupy the very last byte in the input data buffer,
+ * and we have to do single byte load for it.
+ */
+static uint32_t
+get_first_load_size(const struct rte_acl_config *cfg)
+{
+ uint32_t i, max_ofs, ofs;
+
+ ofs = 0;
+ max_ofs = 0;
+
+ for (i = 0; i != cfg->num_fields; i++) {
+ if (cfg->defs[i].field_index == 0)
+ ofs = cfg->defs[i].offset;
+ else if (max_ofs < cfg->defs[i].offset)
+ max_ofs = cfg->defs[i].offset;
+ }
+
+ return (ofs < max_ofs) ? sizeof(uint32_t) : sizeof(uint8_t);
+}
+
int
rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg)
{
/* set data indexes. */
acl_set_data_indexes(ctx);
+ /* determine can we always do 4B load */
+ ctx->first_load_sz = get_first_load_size(cfg);
+
/* copy in build config. */
ctx->config = *cfg;
}
uint32_t num_packets; /* number of packets processed */
uint32_t total_packets; /* max number of packets to process */
uint32_t root_index; /* current root index */
+ uint32_t first_load_sz; /* first load size for new packet */
const uint64_t *trans; /* transition table */
const uint32_t *data_index; /* input data indexes */
const uint8_t **idata; /* input data */
{
flow->num_packets = 0;
flow->total_packets = total_packets;
+ flow->first_load_sz = ctx->first_load_sz;
flow->root_index = ctx->trie[trie].root_index;
flow->trans = ctx->trans_table;
flow->data_index = ctx->trie[trie].data_index;
if (n[0] != 0) {
inp[0] = get_next_bytes_avx512x16(flow, &pdata[0],
- rm[0], &di[0], sizeof(uint8_t));
+ rm[0], &di[0], flow->first_load_sz);
first_trans16(flow, inp[0], rm[0], &tr_lo[0],
&tr_hi[0]);
rm[0] = _mm512_test_epi32_mask(tr_lo[0],
if (n[1] != 0) {
inp[1] = get_next_bytes_avx512x16(flow, &pdata[2],
- rm[1], &di[1], sizeof(uint8_t));
+ rm[1], &di[1], flow->first_load_sz);
first_trans16(flow, inp[1], rm[1], &tr_lo[1],
&tr_hi[1]);
rm[1] = _mm512_test_epi32_mask(tr_lo[1],
start_flow16(flow, MASK16_BIT, UINT16_MAX, &pdata[2], &idx[1], &di[1]);
in[0] = get_next_bytes_avx512x16(flow, &pdata[0], UINT16_MAX, &di[0],
- sizeof(uint8_t));
+ flow->first_load_sz);
in[1] = get_next_bytes_avx512x16(flow, &pdata[2], UINT16_MAX, &di[1],
- sizeof(uint8_t));
+ flow->first_load_sz);
first_trans16(flow, in[0], UINT16_MAX, &tr_lo[0], &tr_hi[0]);
first_trans16(flow, in[1], UINT16_MAX, &tr_lo[1], &tr_hi[1]);
if (n[0] != 0) {
inp[0] = get_next_bytes_avx512x8(flow, &pdata[0],
- rm[0], &di[0], sizeof(uint8_t));
+ rm[0], &di[0], flow->first_load_sz);
first_trans8(flow, inp[0], rm[0], &tr_lo[0],
&tr_hi[0]);
rm[0] = _mm256_test_epi32_mask(tr_lo[0],
if (n[1] != 0) {
inp[1] = get_next_bytes_avx512x8(flow, &pdata[2],
- rm[1], &di[1], sizeof(uint8_t));
+ rm[1], &di[1], flow->first_load_sz);
first_trans8(flow, inp[1], rm[1], &tr_lo[1],
&tr_hi[1]);
rm[1] = _mm256_test_epi32_mask(tr_lo[1],
start_flow8(flow, MASK8_BIT, UINT8_MAX, &pdata[2], &idx[1], &di[1]);
in[0] = get_next_bytes_avx512x8(flow, &pdata[0], UINT8_MAX, &di[0],
- sizeof(uint8_t));
+ flow->first_load_sz);
in[1] = get_next_bytes_avx512x8(flow, &pdata[2], UINT8_MAX, &di[1],
- sizeof(uint8_t));
+ flow->first_load_sz);
first_trans8(flow, in[0], UINT8_MAX, &tr_lo[0], &tr_hi[0]);
first_trans8(flow, in[1], UINT8_MAX, &tr_lo[1], &tr_hi[1]);
printf("acl context <%s>@%p\n", ctx->name, ctx);
printf(" socket_id=%"PRId32"\n", ctx->socket_id);
printf(" alg=%"PRId32"\n", ctx->alg);
+ printf(" first_load_sz=%"PRIu32"\n", ctx->first_load_sz);
printf(" max_rules=%"PRIu32"\n", ctx->max_rules);
printf(" rule_size=%"PRIu32"\n", ctx->rule_sz);
printf(" num_rules=%"PRIu32"\n", ctx->num_rules);