+ __m128i v_qid_done = {0};
+ int hw_qid0 = _mm_extract_epi8(v_qe_meta, 2);
+ int hw_qid1 = _mm_extract_epi8(v_qe_meta, 6);
+ int hw_qid2 = _mm_extract_epi8(v_qe_meta, 10);
+ int hw_qid3 = _mm_extract_epi8(v_qe_meta, 14);
+
+ int ev_qid0 = qm_port->qid_mappings[hw_qid0];
+ int ev_qid1 = qm_port->qid_mappings[hw_qid1];
+ int ev_qid2 = qm_port->qid_mappings[hw_qid2];
+ int ev_qid3 = qm_port->qid_mappings[hw_qid3];
+
+ int hw_sched0 = _mm_extract_epi8(v_qe_meta, 3) & 3ul;
+ int hw_sched1 = _mm_extract_epi8(v_qe_meta, 7) & 3ul;
+ int hw_sched2 = _mm_extract_epi8(v_qe_meta, 11) & 3ul;
+ int hw_sched3 = _mm_extract_epi8(v_qe_meta, 15) & 3ul;
+
+ v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid0, 2);
+ v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid1, 6);
+ v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid2, 10);
+ v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid3, 14);
+
+ /* Schedule field remapping using byte shuffle
+ * - Full byte containing sched field handled here (op, rsvd are zero)
+ * - Note sanitizing the register requires two masking ANDs:
+ * 1) to strip prio/msg_type from byte for correct shuffle lookup
+ * 2) to strip any non-sched-field lanes from any results to OR later
+ * - Final byte result is >> 10 to another byte-lane inside the u32.
+ * This makes the final combination OR easier to make the rte_event.
+ */
+ __m128i v_sched_done;
+ __m128i v_sched_bits;
+ {
+ static const uint8_t sched_type_map[16] = {
+ [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
+ [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
+ [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
+ [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
+ };
+ static const uint8_t sched_and_mask[16] = {
+ 0x00, 0x00, 0x00, 0x03,
+ 0x00, 0x00, 0x00, 0x03,
+ 0x00, 0x00, 0x00, 0x03,
+ 0x00, 0x00, 0x00, 0x03,
+ };
+ const __m128i v_sched_map = _mm_loadu_si128(
+ (const __m128i *)sched_type_map);
+ __m128i v_sched_mask = _mm_loadu_si128(
+ (const __m128i *)&sched_and_mask);
+ v_sched_bits = _mm_and_si128(v_qe_meta, v_sched_mask);
+ __m128i v_sched_remapped = _mm_shuffle_epi8(v_sched_map,
+ v_sched_bits);
+ __m128i v_preshift = _mm_and_si128(v_sched_remapped,
+ v_sched_mask);
+ v_sched_done = _mm_srli_epi32(v_preshift, 10);
+ }
+
+ /* Priority handling
+ * - QE provides 3 bits of priority
+ * - Shift << 3 to move to MSBs for byte-prio in rte_event
+ * - Mask bits to avoid pollution, leaving only 3 prio MSBs in reg
+ */
+ __m128i v_prio_done;
+ {
+ static const uint8_t prio_mask[16] = {
+ 0x00, 0x00, 0x00, 0x07 << 5,
+ 0x00, 0x00, 0x00, 0x07 << 5,
+ 0x00, 0x00, 0x00, 0x07 << 5,
+ 0x00, 0x00, 0x00, 0x07 << 5,
+ };
+ __m128i v_prio_mask = _mm_loadu_si128(
+ (const __m128i *)prio_mask);
+ __m128i v_prio_shifted = _mm_slli_epi32(v_qe_meta, 3);
+ v_prio_done = _mm_and_si128(v_prio_shifted, v_prio_mask);
+ }