- temp = (xmm_t)MM_SHUFFLEPS((__m128)*indices1, (__m128)*indices2,
- 0x88);
- *indices2 = (xmm_t)MM_SHUFFLEPS((__m128)*indices1,
- (__m128)*indices2, 0xdd);
+ temp = (xmm_t)MM_SHUFFLEPS((__m128)indices1, (__m128)indices2, 0x88);
+ range = (xmm_t)MM_SHUFFLEPS((__m128)indices1, (__m128)indices2, 0xdd);
+
+ t = MM_XOR(index_mask, index_mask);
+
+ /* shuffle input byte to all 4 positions of 32 bit value */
+ in = MM_SHUFFLE8(next_input, shuffle_input);