+ /*
+ * At this point, we have the 4 sets of flags in the low 64-bits
+ * of vtag1 (4x16).
+ * We want to extract these, and merge them with the mbuf init data
+ * so we can do a single 16-byte write to the mbuf to set the flags
+ * and all the other initialization fields. Extracting the
+ * appropriate flags means that we have to do a shift and blend for
+ * each mbuf before we do the write.
+ */
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+
+ rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(vtag1, 8), 0x10);
+ rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(vtag1, 6), 0x10);
+ rearm2 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(vtag1, 4), 0x10);
+ rearm3 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(vtag1, 2), 0x10);
+
+#else
+ rearm0 = _mm_slli_si128(vtag1, 14);
+ rearm1 = _mm_slli_si128(vtag1, 12);
+ rearm2 = _mm_slli_si128(vtag1, 10);
+ rearm3 = _mm_slli_si128(vtag1, 8);
+
+ rearm0 = _mm_or_si128(mbuf_init, _mm_srli_epi64(rearm0, 48));
+ rearm1 = _mm_or_si128(mbuf_init, _mm_srli_epi64(rearm1, 48));
+ rearm2 = _mm_or_si128(mbuf_init, _mm_srli_epi64(rearm2, 48));
+ rearm3 = _mm_or_si128(mbuf_init, _mm_srli_epi64(rearm3, 48));
+
+#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+
+ _mm_store_si128((__m128i *)&rx_pkts[0]->rearm_data, rearm0);
+ _mm_store_si128((__m128i *)&rx_pkts[1]->rearm_data, rearm1);
+ _mm_store_si128((__m128i *)&rx_pkts[2]->rearm_data, rearm2);
+ _mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3);