- asm volatile ("movdqu (%[src]), %%xmm0\n\t"
- "movdqu 16(%[src]), %%xmm1\n\t"
- "movdqu 32(%[src]), %%xmm2\n\t"
- "movdqu 48(%[src]), %%xmm3\n\t"
- "movdqu 64(%[src]), %%xmm4\n\t"
- "movdqu 80(%[src]), %%xmm5\n\t"
- "movdqu 96(%[src]), %%xmm6\n\t"
- "movdqu 112(%[src]), %%xmm7\n\t"
- "movdqu %%xmm0, (%[dst])\n\t"
- "movdqu %%xmm1, 16(%[dst])\n\t"
- "movdqu %%xmm2, 32(%[dst])\n\t"
- "movdqu %%xmm3, 48(%[dst])\n\t"
- "movdqu %%xmm4, 64(%[dst])\n\t"
- "movdqu %%xmm5, 80(%[dst])\n\t"
- "movdqu %%xmm6, 96(%[dst])\n\t"
- "movdqu %%xmm7, 112(%[dst])"
- :
- : [src] "r" (src),
- [dst] "r"(dst)
- : "xmm0", "xmm1", "xmm2", "xmm3",
- "xmm4", "xmm5", "xmm6", "xmm7", "memory");
+ __m128i reg_a, reg_b, reg_c, reg_d, reg_e, reg_f, reg_g, reg_h;
+ asm volatile (
+ "movdqu (%[src]), %[reg_a]\n\t"
+ "movdqu 16(%[src]), %[reg_b]\n\t"
+ "movdqu 32(%[src]), %[reg_c]\n\t"
+ "movdqu 48(%[src]), %[reg_d]\n\t"
+ "movdqu 64(%[src]), %[reg_e]\n\t"
+ "movdqu 80(%[src]), %[reg_f]\n\t"
+ "movdqu 96(%[src]), %[reg_g]\n\t"
+ "movdqu 112(%[src]), %[reg_h]\n\t"
+ "movdqu %[reg_a], (%[dst])\n\t"
+ "movdqu %[reg_b], 16(%[dst])\n\t"
+ "movdqu %[reg_c], 32(%[dst])\n\t"
+ "movdqu %[reg_d], 48(%[dst])\n\t"
+ "movdqu %[reg_e], 64(%[dst])\n\t"
+ "movdqu %[reg_f], 80(%[dst])\n\t"
+ "movdqu %[reg_g], 96(%[dst])\n\t"
+ "movdqu %[reg_h], 112(%[dst])\n\t"
+ : [reg_a] "=x" (reg_a),
+ [reg_b] "=x" (reg_b),
+ [reg_c] "=x" (reg_c),
+ [reg_d] "=x" (reg_d),
+ [reg_e] "=x" (reg_e),
+ [reg_f] "=x" (reg_f),
+ [reg_g] "=x" (reg_g),
+ [reg_h] "=x" (reg_h)
+ : [src] "r" (src),
+ [dst] "r"(dst)
+ : "memory"
+ );