X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_eal%2Fcommon%2Finclude%2Frte_memcpy.h;h=beb95b8173bcd0a0bb51fda3aa209e5574c4bafe;hb=73a2bc5dba483a09802ce2af90548672238d3c88;hp=fd2a296dac8342a6bdb1b9453a22ad78e89686ba;hpb=af75078fece3615088e561357c1e97603e43a5fe;p=dpdk.git diff --git a/lib/librte_eal/common/include/rte_memcpy.h b/lib/librte_eal/common/include/rte_memcpy.h index fd2a296dac..beb95b8173 100644 --- a/lib/librte_eal/common/include/rte_memcpy.h +++ b/lib/librte_eal/common/include/rte_memcpy.h @@ -1,36 +1,34 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2012 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. * All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions * are met: * - * * Redistributions of source code must retain the above copyright + * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * version: DPDK.L.1.2.3-3 */ #ifndef _RTE_MEMCPY_H_ @@ -44,11 +42,16 @@ #include #include +#include #ifdef __cplusplus extern "C" { #endif +#ifdef __INTEL_COMPILER +#pragma warning(disable:593) /* Stop unused variable warning (reg_a etc). */ +#endif + /** * Copy 16 bytes from one location to another using optimised SSE * instructions. The locations should not overlap. @@ -61,12 +64,15 @@ extern "C" { static inline void rte_mov16(uint8_t *dst, const uint8_t *src) { - asm volatile ("movdqu (%[src]), %%xmm0\n\t" - "movdqu %%xmm0, (%[dst])\n\t" - : - : [src] "r" (src), - [dst] "r"(dst) - : "xmm0", "memory"); + __m128i reg_a; + asm volatile ( + "movdqu (%[src]), %[reg_a]\n\t" + "movdqu %[reg_a], (%[dst])\n\t" + : [reg_a] "=x" (reg_a) + : [src] "r" (src), + [dst] "r"(dst) + : "memory" + ); } /** @@ -81,14 +87,18 @@ rte_mov16(uint8_t *dst, const uint8_t *src) static inline void rte_mov32(uint8_t *dst, const uint8_t *src) { - asm volatile ("movdqu (%[src]), %%xmm0\n\t" - "movdqu 16(%[src]), %%xmm1\n\t" - "movdqu %%xmm0, (%[dst])\n\t" - "movdqu %%xmm1, 16(%[dst])" - : - : [src] "r" (src), - [dst] "r"(dst) - : "xmm0", "xmm1", "memory"); + __m128i reg_a, reg_b; + asm volatile ( + "movdqu (%[src]), %[reg_a]\n\t" + "movdqu 16(%[src]), %[reg_b]\n\t" + "movdqu %[reg_a], (%[dst])\n\t" + "movdqu %[reg_b], 16(%[dst])\n\t" + : [reg_a] "=x" (reg_a), + [reg_b] "=x" (reg_b) + : [src] "r" (src), + [dst] "r"(dst) + : "memory" + ); } /** @@ -103,16 +113,21 @@ rte_mov32(uint8_t *dst, const uint8_t *src) static inline void rte_mov48(uint8_t *dst, const uint8_t *src) { - asm volatile ("movdqu (%[src]), %%xmm0\n\t" - "movdqu 16(%[src]), %%xmm1\n\t" - "movdqu 32(%[src]), %%xmm2\n\t" - "movdqu %%xmm0, (%[dst])\n\t" - "movdqu %%xmm1, 16(%[dst])\n\t" - "movdqu %%xmm2, 32(%[dst])" - : - : [src] "r" (src), - [dst] "r"(dst) - : "xmm0", "xmm1", "memory"); + __m128i reg_a, reg_b, reg_c; + asm volatile ( + "movdqu (%[src]), %[reg_a]\n\t" + "movdqu 16(%[src]), %[reg_b]\n\t" + "movdqu 32(%[src]), %[reg_c]\n\t" + "movdqu %[reg_a], (%[dst])\n\t" + "movdqu %[reg_b], 16(%[dst])\n\t" + "movdqu %[reg_c], 32(%[dst])\n\t" + : [reg_a] "=x" (reg_a), + [reg_b] "=x" (reg_b), + [reg_c] "=x" (reg_c) + : [src] "r" (src), + [dst] "r"(dst) + : "memory" + ); } /** @@ -127,18 +142,24 @@ rte_mov48(uint8_t *dst, const uint8_t *src) static inline void rte_mov64(uint8_t *dst, const uint8_t *src) { - asm volatile ("movdqu (%[src]), %%xmm0\n\t" - "movdqu 16(%[src]), %%xmm1\n\t" - "movdqu 32(%[src]), %%xmm2\n\t" - "movdqu 48(%[src]), %%xmm3\n\t" - "movdqu %%xmm0, (%[dst])\n\t" - "movdqu %%xmm1, 16(%[dst])\n\t" - "movdqu %%xmm2, 32(%[dst])\n\t" - "movdqu %%xmm3, 48(%[dst])" - : - : [src] "r" (src), - [dst] "r"(dst) - : "xmm0", "xmm1", "xmm2", "xmm3","memory"); + __m128i reg_a, reg_b, reg_c, reg_d; + asm volatile ( + "movdqu (%[src]), %[reg_a]\n\t" + "movdqu 16(%[src]), %[reg_b]\n\t" + "movdqu 32(%[src]), %[reg_c]\n\t" + "movdqu 48(%[src]), %[reg_d]\n\t" + "movdqu %[reg_a], (%[dst])\n\t" + "movdqu %[reg_b], 16(%[dst])\n\t" + "movdqu %[reg_c], 32(%[dst])\n\t" + "movdqu %[reg_d], 48(%[dst])\n\t" + : [reg_a] "=x" (reg_a), + [reg_b] "=x" (reg_b), + [reg_c] "=x" (reg_c), + [reg_d] "=x" (reg_d) + : [src] "r" (src), + [dst] "r"(dst) + : "memory" + ); } /** @@ -153,29 +174,42 @@ rte_mov64(uint8_t *dst, const uint8_t *src) static inline void rte_mov128(uint8_t *dst, const uint8_t *src) { - asm volatile ("movdqu (%[src]), %%xmm0\n\t" - "movdqu 16(%[src]), %%xmm1\n\t" - "movdqu 32(%[src]), %%xmm2\n\t" - "movdqu 48(%[src]), %%xmm3\n\t" - "movdqu 64(%[src]), %%xmm4\n\t" - "movdqu 80(%[src]), %%xmm5\n\t" - "movdqu 96(%[src]), %%xmm6\n\t" - "movdqu 112(%[src]), %%xmm7\n\t" - "movdqu %%xmm0, (%[dst])\n\t" - "movdqu %%xmm1, 16(%[dst])\n\t" - "movdqu %%xmm2, 32(%[dst])\n\t" - "movdqu %%xmm3, 48(%[dst])\n\t" - "movdqu %%xmm4, 64(%[dst])\n\t" - "movdqu %%xmm5, 80(%[dst])\n\t" - "movdqu %%xmm6, 96(%[dst])\n\t" - "movdqu %%xmm7, 112(%[dst])" - : - : [src] "r" (src), - [dst] "r"(dst) - : "xmm0", "xmm1", "xmm2", "xmm3", - "xmm4", "xmm5", "xmm6", "xmm7", "memory"); + __m128i reg_a, reg_b, reg_c, reg_d, reg_e, reg_f, reg_g, reg_h; + asm volatile ( + "movdqu (%[src]), %[reg_a]\n\t" + "movdqu 16(%[src]), %[reg_b]\n\t" + "movdqu 32(%[src]), %[reg_c]\n\t" + "movdqu 48(%[src]), %[reg_d]\n\t" + "movdqu 64(%[src]), %[reg_e]\n\t" + "movdqu 80(%[src]), %[reg_f]\n\t" + "movdqu 96(%[src]), %[reg_g]\n\t" + "movdqu 112(%[src]), %[reg_h]\n\t" + "movdqu %[reg_a], (%[dst])\n\t" + "movdqu %[reg_b], 16(%[dst])\n\t" + "movdqu %[reg_c], 32(%[dst])\n\t" + "movdqu %[reg_d], 48(%[dst])\n\t" + "movdqu %[reg_e], 64(%[dst])\n\t" + "movdqu %[reg_f], 80(%[dst])\n\t" + "movdqu %[reg_g], 96(%[dst])\n\t" + "movdqu %[reg_h], 112(%[dst])\n\t" + : [reg_a] "=x" (reg_a), + [reg_b] "=x" (reg_b), + [reg_c] "=x" (reg_c), + [reg_d] "=x" (reg_d), + [reg_e] "=x" (reg_e), + [reg_f] "=x" (reg_f), + [reg_g] "=x" (reg_g), + [reg_h] "=x" (reg_h) + : [src] "r" (src), + [dst] "r"(dst) + : "memory" + ); } +#ifdef __INTEL_COMPILER +#pragma warning(enable:593) +#endif + /** * Copy 256 bytes from one location to another using optimised SSE * instructions. The locations should not overlap. @@ -188,34 +222,16 @@ rte_mov128(uint8_t *dst, const uint8_t *src) static inline void rte_mov256(uint8_t *dst, const uint8_t *src) { - /* - * There are 16XMM registers, but this function does not use - * them all so that it can still be compiled as 32bit - * code. The performance increase was neglible if all 16 - * registers were used. - */ rte_mov128(dst, src); rte_mov128(dst + 128, src + 128); } -#ifdef RTE_MEMCPY_BUILTIN_CONSTANT_P -/** - * Choose between compiler built-in implementation of memcpy or DPDK - * implementation depending if size is a compile-time constant - */ -#define rte_memcpy(dst, src, n) \ - (__builtin_constant_p (n) ? \ - memcpy(dst, src, n) : rte_memcpy_func(dst, src, n)) -#else -/** - * Always use DPDK implementation. - */ -#define rte_memcpy rte_memcpy_func -#endif - /** * Copy bytes from one location to another. The locations must not overlap. * + * @note This is implemented as a macro, so it's address should not be taken + * and care is needed as parameter expressions may be evaluated multiple times. + * * @param dst * Pointer to the destination of the data. * @param src @@ -225,6 +241,17 @@ rte_mov256(uint8_t *dst, const uint8_t *src) * @return * Pointer to the destination data. */ +#define rte_memcpy(dst, src, n) \ + ((__builtin_constant_p(n)) ? \ + memcpy((dst), (src), (n)) : \ + rte_memcpy_func((dst), (src), (n))) + +/* + * memcpy() function used by rte_memcpy macro + */ +static inline void * +rte_memcpy_func(void *dst, const void *src, size_t n) __attribute__((always_inline)); + static inline void * rte_memcpy_func(void *dst, const void *src, size_t n) { @@ -243,13 +270,7 @@ rte_memcpy_func(void *dst, const void *src, size_t n) src = (const uint16_t *)src + 1; } if (n & 0x04) { - /* - * NOTE: doing this as a 32bit copy causes "strict - * aliasing" compile errors, but worked fine for 64bit - * copy below, for unknown reasons. - */ - *(uint16_t *)dst = *(const uint16_t *)src; - *((uint16_t *)dst + 1) = *((const uint16_t *)src + 1); + *(uint32_t *)dst = *(const uint32_t *)src; dst = (uint32_t *)dst + 1; src = (const uint32_t *)src + 1; }