volatile uint8x16_t *wqe_be = (volatile uint8x16_t *)src_be32;
uint8x16_t *wqe_le = (uint8x16_t *)dst_le32;
const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
volatile uint8x16_t *wqe_be = (volatile uint8x16_t *)src_be32;
uint8x16_t *wqe_le = (uint8x16_t *)dst_le32;
const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,