X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_acl%2Facl_vect.h;h=194fca902a3e60d917ac41e871cdc046c56c4fdc;hb=816c924e9e26ae82c3d7186d3206f0ee72e78213;hp=d8136003f62bc7650d14160ac8c8ffd737e60e52;hpb=dc276b5780c29a86c537774b6f7b91379ee0690d;p=dpdk.git diff --git a/lib/librte_acl/acl_vect.h b/lib/librte_acl/acl_vect.h index d8136003f6..194fca902a 100644 --- a/lib/librte_acl/acl_vect.h +++ b/lib/librte_acl/acl_vect.h @@ -1,34 +1,5 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation */ #ifndef _RTE_ACL_VECT_H_ @@ -44,86 +15,70 @@ extern "C" { #endif -#define MM_ADD16(a, b) _mm_add_epi16(a, b) -#define MM_ADD32(a, b) _mm_add_epi32(a, b) -#define MM_ALIGNR8(a, b, c) _mm_alignr_epi8(a, b, c) -#define MM_AND(a, b) _mm_and_si128(a, b) -#define MM_ANDNOT(a, b) _mm_andnot_si128(a, b) -#define MM_BLENDV8(a, b, c) _mm_blendv_epi8(a, b, c) -#define MM_CMPEQ16(a, b) _mm_cmpeq_epi16(a, b) -#define MM_CMPEQ32(a, b) _mm_cmpeq_epi32(a, b) -#define MM_CMPEQ8(a, b) _mm_cmpeq_epi8(a, b) -#define MM_CMPGT32(a, b) _mm_cmpgt_epi32(a, b) -#define MM_CMPGT8(a, b) _mm_cmpgt_epi8(a, b) -#define MM_CVT(a) _mm_cvtsi32_si128(a) -#define MM_CVT32(a) _mm_cvtsi128_si32(a) -#define MM_CVTU32(a) _mm_cvtsi32_si128(a) -#define MM_INSERT16(a, c, b) _mm_insert_epi16(a, c, b) -#define MM_INSERT32(a, c, b) _mm_insert_epi32(a, c, b) -#define MM_LOAD(a) _mm_load_si128(a) -#define MM_LOADH_PI(a, b) _mm_loadh_pi(a, b) -#define MM_LOADU(a) _mm_loadu_si128(a) -#define MM_MADD16(a, b) _mm_madd_epi16(a, b) -#define MM_MADD8(a, b) _mm_maddubs_epi16(a, b) -#define MM_MOVEMASK8(a) _mm_movemask_epi8(a) -#define MM_OR(a, b) _mm_or_si128(a, b) -#define MM_SET1_16(a) _mm_set1_epi16(a) -#define MM_SET1_32(a) _mm_set1_epi32(a) -#define MM_SET1_64(a) _mm_set1_epi64(a) -#define MM_SET1_8(a) _mm_set1_epi8(a) -#define MM_SET32(a, b, c, d) _mm_set_epi32(a, b, c, d) -#define MM_SHUFFLE32(a, b) _mm_shuffle_epi32(a, b) -#define MM_SHUFFLE8(a, b) _mm_shuffle_epi8(a, b) -#define MM_SHUFFLEPS(a, b, c) _mm_shuffle_ps(a, b, c) -#define MM_SIGN8(a, b) _mm_sign_epi8(a, b) -#define MM_SLL64(a, b) _mm_sll_epi64(a, b) -#define MM_SRL128(a, b) _mm_srli_si128(a, b) -#define MM_SRL16(a, b) _mm_srli_epi16(a, b) -#define MM_SRL32(a, b) _mm_srli_epi32(a, b) -#define MM_STORE(a, b) _mm_store_si128(a, b) -#define MM_STOREU(a, b) _mm_storeu_si128(a, b) -#define MM_TESTZ(a, b) _mm_testz_si128(a, b) -#define MM_XOR(a, b) _mm_xor_si128(a, b) - -#define MM_SET16(a, b, c, d, e, f, g, h) \ - _mm_set_epi16(a, b, c, d, e, f, g, h) - -#define MM_SET8(c0, c1, c2, c3, c4, c5, c6, c7, \ - c8, c9, cA, cB, cC, cD, cE, cF) \ - _mm_set_epi8(c0, c1, c2, c3, c4, c5, c6, c7, \ - c8, c9, cA, cB, cC, cD, cE, cF) - -#ifdef RTE_ARCH_X86_64 - -#define MM_CVT64(a) _mm_cvtsi128_si64(a) - -#else - -#define MM_CVT64(a) ({ \ - rte_xmm_t m; \ - m.m = (a); \ - (m.u64[0]); \ -}) - -#endif /*RTE_ARCH_X86_64 */ /* - * Prior to version 12.1 icc doesn't support _mm_set_epi64x. + * Takes 2 SIMD registers containing N transitions eachi (tr0, tr1). + * Shuffles it into different representation: + * lo - contains low 32 bits of given N transitions. + * hi - contains high 32 bits of given N transitions. */ -#if (defined(__ICC) && __ICC < 1210) - -#define MM_SET64(a, b) ({ \ - rte_xmm_t m; \ - m.u64[0] = b; \ - m.u64[1] = a; \ - (m.m); \ -}) +#define ACL_TR_HILO(P, TC, tr0, tr1, lo, hi) do { \ + lo = (typeof(lo))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0x88); \ + hi = (typeof(hi))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0xdd); \ +} while (0) -#else -#define MM_SET64(a, b) _mm_set_epi64x(a, b) +/* + * Calculate the address of the next transition for + * all types of nodes. Note that only DFA nodes and range + * nodes actually transition to another node. Match + * nodes not supposed to be encountered here. + * For quad range nodes: + * Calculate number of range boundaries that are less than the + * input value. Range boundaries for each node are in signed 8 bit, + * ordered from -128 to 127. + * This is effectively a popcnt of bytes that are greater than the + * input byte. + * Single nodes are processed in the same ways as quad range nodes. +*/ +#define ACL_TR_CALC_ADDR(P, S, \ + addr, index_mask, next_input, shuffle_input, \ + ones_16, range_base, tr_lo, tr_hi) do { \ + \ + typeof(addr) in, node_type, r, t; \ + typeof(addr) dfa_msk, dfa_ofs, quad_ofs; \ + \ + t = _##P##_xor_si##S(index_mask, index_mask); \ + in = _##P##_shuffle_epi8(next_input, shuffle_input); \ + \ + /* Calc node type and node addr */ \ + node_type = _##P##_andnot_si##S(index_mask, tr_lo); \ + addr = _##P##_and_si##S(index_mask, tr_lo); \ + \ + /* mask for DFA type(0) nodes */ \ + dfa_msk = _##P##_cmpeq_epi32(node_type, t); \ + \ + /* DFA calculations. */ \ + r = _##P##_srli_epi32(in, 30); \ + r = _##P##_add_epi8(r, range_base); \ + t = _##P##_srli_epi32(in, 24); \ + r = _##P##_shuffle_epi8(tr_hi, r); \ + \ + dfa_ofs = _##P##_sub_epi32(t, r); \ + \ + /* QUAD/SINGLE caluclations. */ \ + t = _##P##_cmpgt_epi8(in, tr_hi); \ + t = _##P##_sign_epi8(t, t); \ + t = _##P##_maddubs_epi16(t, t); \ + quad_ofs = _##P##_madd_epi16(t, ones_16); \ + \ + /* blend DFA and QUAD/SINGLE. */ \ + t = _##P##_blendv_epi8(quad_ofs, dfa_ofs, dfa_msk); \ + \ + /* calculate address for next transitions. */ \ + addr = _##P##_add_epi32(addr, t); \ +} while (0) -#endif /* (defined(__ICC) && __ICC < 1210) */ #ifdef __cplusplus }