X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_acl%2Facl_vect.h;h=6cc19997382ec7bb3743e192a75be2a6a5ef92ac;hb=a0e3310e7a4f92622f7d86369c8f8e3c389edf18;hp=d8136003f62bc7650d14160ac8c8ffd737e60e52;hpb=cf59b29bb9d908ea8bbc5d4597db9ddd0febef47;p=dpdk.git

diff --git a/lib/librte_acl/acl_vect.h b/lib/librte_acl/acl_vect.h
index d8136003f6..6cc1999738 100644
--- a/lib/librte_acl/acl_vect.h
+++ b/lib/librte_acl/acl_vect.h
@@ -44,86 +44,70 @@
 extern "C" {
 #endif
 
-#define	MM_ADD16(a, b)		_mm_add_epi16(a, b)
-#define	MM_ADD32(a, b)		_mm_add_epi32(a, b)
-#define	MM_ALIGNR8(a, b, c)	_mm_alignr_epi8(a, b, c)
-#define	MM_AND(a, b)		_mm_and_si128(a, b)
-#define MM_ANDNOT(a, b)		_mm_andnot_si128(a, b)
-#define MM_BLENDV8(a, b, c)	_mm_blendv_epi8(a, b, c)
-#define MM_CMPEQ16(a, b)	_mm_cmpeq_epi16(a, b)
-#define MM_CMPEQ32(a, b)	_mm_cmpeq_epi32(a, b)
-#define	MM_CMPEQ8(a, b)		_mm_cmpeq_epi8(a, b)
-#define MM_CMPGT32(a, b)	_mm_cmpgt_epi32(a, b)
-#define MM_CMPGT8(a, b)		_mm_cmpgt_epi8(a, b)
-#define MM_CVT(a)		_mm_cvtsi32_si128(a)
-#define	MM_CVT32(a)		_mm_cvtsi128_si32(a)
-#define MM_CVTU32(a)		_mm_cvtsi32_si128(a)
-#define	MM_INSERT16(a, c, b)	_mm_insert_epi16(a, c, b)
-#define	MM_INSERT32(a, c, b)	_mm_insert_epi32(a, c, b)
-#define	MM_LOAD(a)		_mm_load_si128(a)
-#define	MM_LOADH_PI(a, b)	_mm_loadh_pi(a, b)
-#define	MM_LOADU(a)		_mm_loadu_si128(a)
-#define	MM_MADD16(a, b)		_mm_madd_epi16(a, b)
-#define	MM_MADD8(a, b)		_mm_maddubs_epi16(a, b)
-#define	MM_MOVEMASK8(a)		_mm_movemask_epi8(a)
-#define MM_OR(a, b)		_mm_or_si128(a, b)
-#define	MM_SET1_16(a)		_mm_set1_epi16(a)
-#define	MM_SET1_32(a)		_mm_set1_epi32(a)
-#define	MM_SET1_64(a)		_mm_set1_epi64(a)
-#define	MM_SET1_8(a)		_mm_set1_epi8(a)
-#define	MM_SET32(a, b, c, d)	_mm_set_epi32(a, b, c, d)
-#define	MM_SHUFFLE32(a, b)	_mm_shuffle_epi32(a, b)
-#define	MM_SHUFFLE8(a, b)	_mm_shuffle_epi8(a, b)
-#define	MM_SHUFFLEPS(a, b, c)	_mm_shuffle_ps(a, b, c)
-#define	MM_SIGN8(a, b)		_mm_sign_epi8(a, b)
-#define	MM_SLL64(a, b)		_mm_sll_epi64(a, b)
-#define	MM_SRL128(a, b)		_mm_srli_si128(a, b)
-#define MM_SRL16(a, b)		_mm_srli_epi16(a, b)
-#define	MM_SRL32(a, b)		_mm_srli_epi32(a, b)
-#define	MM_STORE(a, b)		_mm_store_si128(a, b)
-#define	MM_STOREU(a, b)		_mm_storeu_si128(a, b)
-#define	MM_TESTZ(a, b)		_mm_testz_si128(a, b)
-#define	MM_XOR(a, b)		_mm_xor_si128(a, b)
-
-#define	MM_SET16(a, b, c, d, e, f, g, h)	\
-	_mm_set_epi16(a, b, c, d, e, f, g, h)
-
-#define	MM_SET8(c0, c1, c2, c3, c4, c5, c6, c7,	\
-		c8, c9, cA, cB, cC, cD, cE, cF)	\
-	_mm_set_epi8(c0, c1, c2, c3, c4, c5, c6, c7,	\
-		c8, c9, cA, cB, cC, cD, cE, cF)
-
-#ifdef RTE_ARCH_X86_64
-
-#define	MM_CVT64(a)		_mm_cvtsi128_si64(a)
-
-#else
-
-#define	MM_CVT64(a)	({ \
-	rte_xmm_t m;       \
-	m.m = (a);         \
-	(m.u64[0]);        \
-})
-
-#endif /*RTE_ARCH_X86_64 */
 
 /*
- * Prior to version 12.1 icc doesn't support _mm_set_epi64x.
+ * Takes 2 SIMD registers containing N transitions eachi (tr0, tr1).
+ * Shuffles it into different representation:
+ * lo - contains low 32 bits of given N transitions.
+ * hi - contains high 32 bits of given N transitions.
  */
-#if (defined(__ICC) && __ICC < 1210)
+#define	ACL_TR_HILO(P, TC, tr0, tr1, lo, hi)                        do { \
+	lo = (typeof(lo))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0x88);  \
+	hi = (typeof(hi))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0xdd);  \
+} while (0)
 
-#define	MM_SET64(a, b)	({ \
-	rte_xmm_t m;       \
-	m.u64[0] = b;      \
-	m.u64[1] = a;      \
-	(m.m);             \
-})
 
-#else
-
-#define	MM_SET64(a, b)		_mm_set_epi64x(a, b)
+/*
+ * Calculate the address of the next transition for
+ * all types of nodes. Note that only DFA nodes and range
+ * nodes actually transition to another node. Match
+ * nodes not supposed to be encountered here.
+ * For quad range nodes:
+ * Calculate number of range boundaries that are less than the
+ * input value. Range boundaries for each node are in signed 8 bit,
+ * ordered from -128 to 127.
+ * This is effectively a popcnt of bytes that are greater than the
+ * input byte.
+ * Single nodes are processed in the same ways as quad range nodes.
+*/
+#define ACL_TR_CALC_ADDR(P, S,					\
+	addr, index_mask, next_input, shuffle_input,		\
+	ones_16, range_base, tr_lo, tr_hi)               do {	\
+								\
+	typeof(addr) in, node_type, r, t;			\
+	typeof(addr) dfa_msk, dfa_ofs, quad_ofs;		\
+								\
+	t = _##P##_xor_si##S(index_mask, index_mask);		\
+	in = _##P##_shuffle_epi8(next_input, shuffle_input);	\
+								\
+	/* Calc node type and node addr */			\
+	node_type = _##P##_andnot_si##S(index_mask, tr_lo);	\
+	addr = _##P##_and_si##S(index_mask, tr_lo);		\
+								\
+	/* mask for DFA type(0) nodes */			\
+	dfa_msk = _##P##_cmpeq_epi32(node_type, t);		\
+								\
+	/* DFA calculations. */					\
+	r = _##P##_srli_epi32(in, 30);				\
+	r = _##P##_add_epi8(r, range_base);			\
+	t = _##P##_srli_epi32(in, 24);				\
+	r = _##P##_shuffle_epi8(tr_hi, r);			\
+								\
+	dfa_ofs = _##P##_sub_epi32(t, r);			\
+								\
+	/* QUAD/SINGLE caluclations. */				\
+	t = _##P##_cmpgt_epi8(in, tr_hi);			\
+	t = _##P##_sign_epi8(t, t);				\
+	t = _##P##_maddubs_epi16(t, t);				\
+	quad_ofs = _##P##_madd_epi16(t, ones_16);		\
+								\
+	/* blend DFA and QUAD/SINGLE. */			\
+	t = _##P##_blendv_epi8(quad_ofs, dfa_ofs, dfa_msk);	\
+								\
+	/* calculate address for next transitions. */		\
+	addr = _##P##_add_epi32(addr, t);			\
+} while (0)
 
-#endif /* (defined(__ICC) && __ICC < 1210) */
 
 #ifdef __cplusplus
 }