-static inline __attribute__((always_inline)) ymm_t
-calc_addr_avx2(ymm_t index_mask, ymm_t next_input, ymm_t shuffle_input,
- ymm_t ones_16, ymm_t tr_lo, ymm_t tr_hi)
-{
- ymm_t in, node_type, r, t;
- ymm_t dfa_msk, dfa_ofs, quad_ofs;
- ymm_t addr;
-
- const ymm_t range_base = _mm256_set_epi32(
- 0xffffff0c, 0xffffff08, 0xffffff04, 0xffffff00,
- 0xffffff0c, 0xffffff08, 0xffffff04, 0xffffff00);
-
- t = _mm256_xor_si256(index_mask, index_mask);
- in = _mm256_shuffle_epi8(next_input, shuffle_input);
-
- /* Calc node type and node addr */
- node_type = _mm256_andnot_si256(index_mask, tr_lo);
- addr = _mm256_and_si256(index_mask, tr_lo);
-
- /* DFA calculations. */
-
- dfa_msk = _mm256_cmpeq_epi32(node_type, t);
-
- r = _mm256_srli_epi32(in, 30);
- r = _mm256_add_epi8(r, range_base);
-
- t = _mm256_srli_epi32(in, 24);
- r = _mm256_shuffle_epi8(tr_hi, r);
-
- dfa_ofs = _mm256_sub_epi32(t, r);
-
- /* QUAD/SINGLE caluclations. */
-
- t = _mm256_cmpgt_epi8(in, tr_hi);
- t = _mm256_sign_epi8(t, t);
- t = _mm256_maddubs_epi16(t, t);
- quad_ofs = _mm256_madd_epi16(t, ones_16);
-
- /* blend DFA and QUAD/SINGLE. */
- t = _mm256_blendv_epi8(quad_ofs, dfa_ofs, dfa_msk);
-
- addr = _mm256_add_epi32(addr, t);
- return addr;
-}