* IO barriers are used to ensure consistent state.
*/
rxcmp1[3] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 7]);
- rte_io_rmb();
+ rxcmp1[2] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 5]);
+ rxcmp1[1] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 3]);
+ rxcmp1[0] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 1]);
+
+ /* Use acquire fence to order loads of descriptor words. */
+ rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
/* Reload lower 64b of descriptors to make it ordered after info3_v. */
rxcmp1[3] = vreinterpretq_u32_u64(vld1q_lane_u64
((void *)&cpr->cp_desc_ring[cons + 7],
vreinterpretq_u64_u32(rxcmp1[3]), 0));
- rxcmp[3] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 6]);
-
- rxcmp1[2] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 5]);
- rte_io_rmb();
rxcmp1[2] = vreinterpretq_u32_u64(vld1q_lane_u64
((void *)&cpr->cp_desc_ring[cons + 5],
vreinterpretq_u64_u32(rxcmp1[2]), 0));
- rxcmp[2] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 4]);
-
- t1 = vreinterpretq_u64_u32(vzip2q_u32(rxcmp1[2], rxcmp1[3]));
-
- rxcmp1[1] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 3]);
- rte_io_rmb();
rxcmp1[1] = vreinterpretq_u32_u64(vld1q_lane_u64
((void *)&cpr->cp_desc_ring[cons + 3],
vreinterpretq_u64_u32(rxcmp1[1]), 0));
- rxcmp[1] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 2]);
-
- rxcmp1[0] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 1]);
- rte_io_rmb();
rxcmp1[0] = vreinterpretq_u32_u64(vld1q_lane_u64
((void *)&cpr->cp_desc_ring[cons + 1],
vreinterpretq_u64_u32(rxcmp1[0]), 0));
+
+ rxcmp[3] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 6]);
+ rxcmp[2] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 4]);
+
+ t1 = vreinterpretq_u64_u32(vzip2q_u32(rxcmp1[2], rxcmp1[3]));
+
+ rxcmp[1] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 2]);
rxcmp[0] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 0]);
t0 = vreinterpretq_u64_u32(vzip2q_u32(rxcmp1[0], rxcmp1[1]));
* bits and count the number of set bits in order to determine
* the number of valid descriptors.
*/
- valid = vget_lane_u64(vreinterpret_u64_u16(vqmovn_u32(info3_v)),
- 0);
+ valid = vget_lane_u64(vreinterpret_u64_s16(vshr_n_s16
+ (vreinterpret_s16_u16(vshl_n_u16
+ (vqmovn_u32(info3_v), 15)), 15)), 0);
+
/*
* At this point, 'valid' is a 64-bit value containing four
- * 16-bit fields, each of which is either 0x0001 or 0x0000.
- * Compute number of valid descriptors from the index of
- * the highest non-zero field.
+ * 16-bit fields, each of which is either 0xffff or 0x0000.
+ * Count the number of consecutive 1s from LSB in order to
+ * determine the number of valid descriptors.
*/
- num_valid = (sizeof(uint64_t) / sizeof(uint16_t)) -
- (__builtin_clzl(valid & desc_valid_mask) / 16);
+ valid = ~(valid & desc_valid_mask);
+ if (valid == 0)
+ num_valid = 4;
+ else
+ num_valid = __builtin_ctzl(valid) / 16;
if (num_valid == 0)
break;