Sometimes gcc does not inline the function despite keyword *inline*,
we observe rte_movX is not inline when doing performance profiling,
so use *always_inline* keyword to force gcc to inline the function.
Signed-off-by: Junjie Chen <junjie.j.chen@intel.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
* Copy 16 bytes from one location to another,
* locations should not overlap.
*/
* Copy 16 bytes from one location to another,
* locations should not overlap.
*/
+static __rte_always_inline void
rte_mov16(uint8_t *dst, const uint8_t *src)
{
__m128i xmm0;
rte_mov16(uint8_t *dst, const uint8_t *src)
{
__m128i xmm0;
* Copy 32 bytes from one location to another,
* locations should not overlap.
*/
* Copy 32 bytes from one location to another,
* locations should not overlap.
*/
+static __rte_always_inline void
rte_mov32(uint8_t *dst, const uint8_t *src)
{
__m256i ymm0;
rte_mov32(uint8_t *dst, const uint8_t *src)
{
__m256i ymm0;
* Copy 64 bytes from one location to another,
* locations should not overlap.
*/
* Copy 64 bytes from one location to another,
* locations should not overlap.
*/
+static __rte_always_inline void
rte_mov64(uint8_t *dst, const uint8_t *src)
{
__m512i zmm0;
rte_mov64(uint8_t *dst, const uint8_t *src)
{
__m512i zmm0;
* Copy 128 bytes from one location to another,
* locations should not overlap.
*/
* Copy 128 bytes from one location to another,
* locations should not overlap.
*/
+static __rte_always_inline void
rte_mov128(uint8_t *dst, const uint8_t *src)
{
rte_mov64(dst + 0 * 64, src + 0 * 64);
rte_mov128(uint8_t *dst, const uint8_t *src)
{
rte_mov64(dst + 0 * 64, src + 0 * 64);
* Copy 256 bytes from one location to another,
* locations should not overlap.
*/
* Copy 256 bytes from one location to another,
* locations should not overlap.
*/
+static __rte_always_inline void
rte_mov256(uint8_t *dst, const uint8_t *src)
{
rte_mov64(dst + 0 * 64, src + 0 * 64);
rte_mov256(uint8_t *dst, const uint8_t *src)
{
rte_mov64(dst + 0 * 64, src + 0 * 64);
* Copy 16 bytes from one location to another,
* locations should not overlap.
*/
* Copy 16 bytes from one location to another,
* locations should not overlap.
*/
+static __rte_always_inline void
rte_mov16(uint8_t *dst, const uint8_t *src)
{
__m128i xmm0;
rte_mov16(uint8_t *dst, const uint8_t *src)
{
__m128i xmm0;
* Copy 32 bytes from one location to another,
* locations should not overlap.
*/
* Copy 32 bytes from one location to another,
* locations should not overlap.
*/
+static __rte_always_inline void
rte_mov32(uint8_t *dst, const uint8_t *src)
{
__m256i ymm0;
rte_mov32(uint8_t *dst, const uint8_t *src)
{
__m256i ymm0;
* Copy 64 bytes from one location to another,
* locations should not overlap.
*/
* Copy 64 bytes from one location to another,
* locations should not overlap.
*/
+static __rte_always_inline void
rte_mov64(uint8_t *dst, const uint8_t *src)
{
rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
rte_mov64(uint8_t *dst, const uint8_t *src)
{
rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
* Copy 16 bytes from one location to another,
* locations should not overlap.
*/
* Copy 16 bytes from one location to another,
* locations should not overlap.
*/
+static __rte_always_inline void
rte_mov16(uint8_t *dst, const uint8_t *src)
{
__m128i xmm0;
rte_mov16(uint8_t *dst, const uint8_t *src)
{
__m128i xmm0;
* Copy 32 bytes from one location to another,
* locations should not overlap.
*/
* Copy 32 bytes from one location to another,
* locations should not overlap.
*/
+static __rte_always_inline void
rte_mov32(uint8_t *dst, const uint8_t *src)
{
rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
rte_mov32(uint8_t *dst, const uint8_t *src)
{
rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
* Copy 64 bytes from one location to another,
* locations should not overlap.
*/
* Copy 64 bytes from one location to another,
* locations should not overlap.
*/
+static __rte_always_inline void
rte_mov64(uint8_t *dst, const uint8_t *src)
{
rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
rte_mov64(uint8_t *dst, const uint8_t *src)
{
rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);