#endif
#include <stdint.h>
-#include <nmmintrin.h>
+#include <rte_cpuflags.h>
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
/* Lookup tables for software implementation of CRC32C */
static const uint32_t crc32c_tables[8][256] = {{
return crc;
}
+#if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64)
+static inline uint32_t
+crc32c_sse42_u32(uint32_t data, uint32_t init_val)
+{
+ __asm__ volatile(
+ "crc32l %[data], %[init_val];"
+ : [init_val] "+r" (init_val)
+ : [data] "rm" (data));
+ return init_val;
+}
+
+static inline uint32_t
+crc32c_sse42_u64_mimic(uint64_t data, uint64_t init_val)
+{
+ union {
+ uint32_t u32[2];
+ uint64_t u64;
+ } d;
+
+ d.u64 = data;
+ init_val = crc32c_sse42_u32(d.u32[0], init_val);
+ init_val = crc32c_sse42_u32(d.u32[1], init_val);
+ return init_val;
+}
+#endif
+
+#ifdef RTE_ARCH_X86_64
+static inline uint32_t
+crc32c_sse42_u64(uint64_t data, uint64_t init_val)
+{
+ __asm__ volatile(
+ "crc32q %[data], %[init_val];"
+ : [init_val] "+r" (init_val)
+ : [data] "rm" (data));
+ return init_val;
+}
+#endif
+
+#define CRC32_SW (1U << 0)
+#define CRC32_SSE42 (1U << 1)
+#define CRC32_x64 (1U << 2)
+#define CRC32_SSE42_x64 (CRC32_x64|CRC32_SSE42)
+#define CRC32_ARM64 (1U << 3)
+
+static uint8_t crc32_alg = CRC32_SW;
+
+#if defined(RTE_ARCH_ARM64)
+#include "rte_crc_arm64.h"
+#else
+
+/**
+ * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
+ * calculation.
+ *
+ * @param alg
+ * An OR of following flags:
+ * - (CRC32_SW) Don't use SSE4.2 intrinsics
+ * - (CRC32_SSE42) Use SSE4.2 intrinsics if available
+ * - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default)
+ *
+ */
+static inline void
+rte_hash_crc_set_alg(uint8_t alg)
+{
+ switch (alg) {
+#if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64)
+ case CRC32_SSE42_x64:
+ if (! rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
+ alg = CRC32_SSE42;
+ case CRC32_SSE42:
+ if (! rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2))
+ alg = CRC32_SW;
+#endif
+ case CRC32_SW:
+ crc32_alg = alg;
+ default:
+ break;
+ }
+}
+
+/* Setting the best available algorithm */
+static inline void __attribute__((constructor))
+rte_hash_crc_init_alg(void)
+{
+ rte_hash_crc_set_alg(CRC32_SSE42_x64);
+}
+
/**
* Use single crc32 instruction to perform a hash on a 4 byte value.
+ * Fall back to software crc32 implementation in case SSE4.2 is
+ * not supported
*
* @param data
* Data to perform hash on.
static inline uint32_t
rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
{
- return _mm_crc32_u32(init_val, data);
+#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64
+ if (likely(crc32_alg & CRC32_SSE42))
+ return crc32c_sse42_u32(data, init_val);
+#endif
+
+ return crc32c_1word(data, init_val);
}
/**
- * Use crc32 instruction to perform a hash.
+ * Use single crc32 instruction to perform a hash on a 8 byte value.
+ * Fall back to software crc32 implementation in case SSE4.2 is
+ * not supported
+ *
+ * @param data
+ * Data to perform hash on.
+ * @param init_val
+ * Value to initialise hash generator.
+ * @return
+ * 32bit calculated hash value.
+ */
+static inline uint32_t
+rte_hash_crc_8byte(uint64_t data, uint32_t init_val)
+{
+#ifdef RTE_ARCH_X86_64
+ if (likely(crc32_alg == CRC32_SSE42_x64))
+ return crc32c_sse42_u64(data, init_val);
+#endif
+
+#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64
+ if (likely(crc32_alg & CRC32_SSE42))
+ return crc32c_sse42_u64_mimic(data, init_val);
+#endif
+
+ return crc32c_2words(data, init_val);
+}
+
+#endif
+
+/**
+ * Calculate CRC32 hash on user-supplied byte array.
*
* @param data
* Data to perform hash on.
rte_hash_crc(const void *data, uint32_t data_len, uint32_t init_val)
{
unsigned i;
- uint32_t temp = 0;
- const uint32_t *p32 = (const uint32_t *)data;
+ uint64_t temp = 0;
+ uintptr_t pd = (uintptr_t) data;
- for (i = 0; i < data_len / 4; i++) {
- init_val = rte_hash_crc_4byte(*p32++, init_val);
+ for (i = 0; i < data_len / 8; i++) {
+ init_val = rte_hash_crc_8byte(*(const uint64_t *)pd, init_val);
+ pd += 8;
}
- switch (3 - (data_len & 0x03)) {
+ switch (7 - (data_len & 0x07)) {
case 0:
- temp |= *((const uint8_t *)p32 + 2) << 16;
+ temp |= (uint64_t) *((const uint8_t *)pd + 6) << 48;
/* Fallthrough */
case 1:
- temp |= *((const uint8_t *)p32 + 1) << 8;
+ temp |= (uint64_t) *((const uint8_t *)pd + 5) << 40;
/* Fallthrough */
case 2:
- temp |= *((const uint8_t *)p32);
+ temp |= (uint64_t) *((const uint8_t *)pd + 4) << 32;
+ temp |= *(const uint32_t *)pd;
+ init_val = rte_hash_crc_8byte(temp, init_val);
+ break;
+ case 3:
+ init_val = rte_hash_crc_4byte(*(const uint32_t *)pd, init_val);
+ break;
+ case 4:
+ temp |= *((const uint8_t *)pd + 2) << 16;
+ /* Fallthrough */
+ case 5:
+ temp |= *((const uint8_t *)pd + 1) << 8;
+ /* Fallthrough */
+ case 6:
+ temp |= *(const uint8_t *)pd;
init_val = rte_hash_crc_4byte(temp, init_val);
+ /* Fallthrough */
default:
break;
}