descs[1] = vld1q_u64((uint64_t *)(rxdp + 1));
descs[0] = vld1q_u64((uint64_t *)(rxdp));
+ /* Use acquire fence to order loads of descriptor qwords */
+ rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+ /* A.2 reload qword0 to make it ordered after qword1 load */
+ descs[3] = vld1q_lane_u64((uint64_t *)(rxdp + 3), descs[3], 0);
+ descs[2] = vld1q_lane_u64((uint64_t *)(rxdp + 2), descs[2], 0);
+ descs[1] = vld1q_lane_u64((uint64_t *)(rxdp + 1), descs[1], 0);
+ descs[0] = vld1q_lane_u64((uint64_t *)(rxdp), descs[0], 0);
+
/* B.1 load 4 mbuf point */
mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);