4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #ifndef _RTE_ACL_VECT_H_
35 #define _RTE_ACL_VECT_H_
40 * RTE ACL SSE/AVX related header.
49 * Takes 2 SIMD registers containing N transitions eachi (tr0, tr1).
50 * Shuffles it into different representation:
51 * lo - contains low 32 bits of given N transitions.
52 * hi - contains high 32 bits of given N transitions.
54 #define ACL_TR_HILO(P, TC, tr0, tr1, lo, hi) do { \
55 lo = (typeof(lo))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0x88); \
56 hi = (typeof(hi))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0xdd); \
61 * Calculate the address of the next transition for
62 * all types of nodes. Note that only DFA nodes and range
63 * nodes actually transition to another node. Match
64 * nodes not supposed to be encountered here.
65 * For quad range nodes:
66 * Calculate number of range boundaries that are less than the
67 * input value. Range boundaries for each node are in signed 8 bit,
68 * ordered from -128 to 127.
69 * This is effectively a popcnt of bytes that are greater than the
71 * Single nodes are processed in the same ways as quad range nodes.
73 #define ACL_TR_CALC_ADDR(P, S, \
74 addr, index_mask, next_input, shuffle_input, \
75 ones_16, range_base, tr_lo, tr_hi) do { \
77 typeof(addr) in, node_type, r, t; \
78 typeof(addr) dfa_msk, dfa_ofs, quad_ofs; \
80 t = _##P##_xor_si##S(index_mask, index_mask); \
81 in = _##P##_shuffle_epi8(next_input, shuffle_input); \
83 /* Calc node type and node addr */ \
84 node_type = _##P##_andnot_si##S(index_mask, tr_lo); \
85 addr = _##P##_and_si##S(index_mask, tr_lo); \
87 /* mask for DFA type(0) nodes */ \
88 dfa_msk = _##P##_cmpeq_epi32(node_type, t); \
90 /* DFA calculations. */ \
91 r = _##P##_srli_epi32(in, 30); \
92 r = _##P##_add_epi8(r, range_base); \
93 t = _##P##_srli_epi32(in, 24); \
94 r = _##P##_shuffle_epi8(tr_hi, r); \
96 dfa_ofs = _##P##_sub_epi32(t, r); \
98 /* QUAD/SINGLE caluclations. */ \
99 t = _##P##_cmpgt_epi8(in, tr_hi); \
100 t = _##P##_sign_epi8(t, t); \
101 t = _##P##_maddubs_epi16(t, t); \
102 quad_ofs = _##P##_madd_epi16(t, ones_16); \
104 /* blend DFA and QUAD/SINGLE. */ \
105 t = _##P##_blendv_epi8(quad_ofs, dfa_ofs, dfa_msk); \
107 /* calculate address for next transitions. */ \
108 addr = _##P##_add_epi32(addr, t); \
116 #endif /* _RTE_ACL_VECT_H_ */