lib/librte_acl/acl_vect.h

   1 /*-
   2  *   BSD LICENSE
   3  *
   4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
   5  *   All rights reserved.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following conditions
   9  *   are met:
  10  *
  11  *     * Redistributions of source code must retain the above copyright
  12  *       notice, this list of conditions and the following disclaimer.
  13  *     * Redistributions in binary form must reproduce the above copyright
  14  *       notice, this list of conditions and the following disclaimer in
  15  *       the documentation and/or other materials provided with the
  16  *       distribution.
  17  *     * Neither the name of Intel Corporation nor the names of its
  18  *       contributors may be used to endorse or promote products derived
  19  *       from this software without specific prior written permission.
  20  *
  21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 #ifndef _RTE_ACL_VECT_H_
  35 #define _RTE_ACL_VECT_H_
  36
  37 /**
  38  * @file
  39  *
  40  * RTE ACL SSE/AVX related header.
  41  */
  42
  43 #ifdef __cplusplus
  44 extern "C" {
  45 #endif
  46
  47
  48 /*
  49  * Takes 2 SIMD registers containing N transitions eachi (tr0, tr1).
  50  * Shuffles it into different representation:
  51  * lo - contains low 32 bits of given N transitions.
  52  * hi - contains high 32 bits of given N transitions.
  53  */
  54 #define ACL_TR_HILO(P, TC, tr0, tr1, lo, hi)                        do { \
  55         lo = (typeof(lo))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0x88);  \
  56         hi = (typeof(hi))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0xdd);  \
  57 } while (0)
  58
  59
  60 /*
  61  * Calculate the address of the next transition for
  62  * all types of nodes. Note that only DFA nodes and range
  63  * nodes actually transition to another node. Match
  64  * nodes not supposed to be encountered here.
  65  * For quad range nodes:
  66  * Calculate number of range boundaries that are less than the
  67  * input value. Range boundaries for each node are in signed 8 bit,
  68  * ordered from -128 to 127.
  69  * This is effectively a popcnt of bytes that are greater than the
  70  * input byte.
  71  * Single nodes are processed in the same ways as quad range nodes.
  72 */
  73 #define ACL_TR_CALC_ADDR(P, S,                                  \
  74         addr, index_mask, next_input, shuffle_input,            \
  75         ones_16, range_base, tr_lo, tr_hi)               do {   \
  76                                                                 \
  77         typeof(addr) in, node_type, r, t;                       \
  78         typeof(addr) dfa_msk, dfa_ofs, quad_ofs;                \
  79                                                                 \
  80         t = _##P##_xor_si##S(index_mask, index_mask);           \
  81         in = _##P##_shuffle_epi8(next_input, shuffle_input);    \
  82                                                                 \
  83         /* Calc node type and node addr */                      \
  84         node_type = _##P##_andnot_si##S(index_mask, tr_lo);     \
  85         addr = _##P##_and_si##S(index_mask, tr_lo);             \
  86                                                                 \
  87         /* mask for DFA type(0) nodes */                        \
  88         dfa_msk = _##P##_cmpeq_epi32(node_type, t);             \
  89                                                                 \
  90         /* DFA calculations. */                                 \
  91         r = _##P##_srli_epi32(in, 30);                          \
  92         r = _##P##_add_epi8(r, range_base);                     \
  93         t = _##P##_srli_epi32(in, 24);                          \
  94         r = _##P##_shuffle_epi8(tr_hi, r);                      \
  95                                                                 \
  96         dfa_ofs = _##P##_sub_epi32(t, r);                       \
  97                                                                 \
  98         /* QUAD/SINGLE caluclations. */                         \
  99         t = _##P##_cmpgt_epi8(in, tr_hi);                       \
 100         t = _##P##_sign_epi8(t, t);                             \
 101         t = _##P##_maddubs_epi16(t, t);                         \
 102         quad_ofs = _##P##_madd_epi16(t, ones_16);               \
 103                                                                 \
 104         /* blend DFA and QUAD/SINGLE. */                        \
 105         t = _##P##_blendv_epi8(quad_ofs, dfa_ofs, dfa_msk);     \
 106                                                                 \
 107         /* calculate address for next transitions. */           \
 108         addr = _##P##_add_epi32(addr, t);                       \
 109 } while (0)
 110
 111
 112 #ifdef __cplusplus
 113 }
 114 #endif
 115
 116 #endif /* _RTE_ACL_VECT_H_ */