virtqueue_notify(vq);
- rte_rmb();
- while (VIRTQUEUE_NUSED(vq) == 0) {
- rte_rmb();
+ while (virtqueue_nused(vq) == 0)
usleep(100);
- }
- while (VIRTQUEUE_NUSED(vq)) {
+ while (virtqueue_nused(vq)) {
uint32_t idx, desc_idx, used_idx;
struct vring_used_elem *uep;
struct vring_used {
uint16_t flags;
- volatile uint16_t idx;
+ uint16_t idx;
struct vring_used_elem ring[0];
};
struct virtnet_rx *rxvq = rxq;
struct virtqueue *vq = rxvq->vq;
- return VIRTQUEUE_NUSED(vq) >= offset;
+ return virtqueue_nused(vq) >= offset;
}
void
if (unlikely(hw->started == 0))
return nb_rx;
- nb_used = VIRTQUEUE_NUSED(vq);
-
- virtio_rmb(hw->weak_barriers);
+ nb_used = virtqueue_nused(vq);
num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
if (unlikely(hw->started == 0))
return nb_rx;
- nb_used = VIRTQUEUE_NUSED(vq);
+ nb_used = virtqueue_nused(vq);
nb_used = RTE_MIN(nb_used, nb_pkts);
nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
- virtio_rmb(hw->weak_barriers);
-
PMD_RX_LOG(DEBUG, "used:%d", nb_used);
nb_enqueued = 0;
uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
VIRTIO_MBUF_BURST_SZ);
- if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
- virtio_rmb(hw->weak_barriers);
+ if (likely(virtqueue_nused(vq) >= rcv_cnt)) {
num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
rcv_cnt);
uint16_t extra_idx = 0;
if (unlikely(hw->started == 0))
return nb_rx;
- nb_used = VIRTQUEUE_NUSED(vq);
-
- virtio_rmb(hw->weak_barriers);
+ nb_used = virtqueue_nused(vq);
PMD_RX_LOG(DEBUG, "used:%d", nb_used);
uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
VIRTIO_MBUF_BURST_SZ);
- if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
- virtio_rmb(hw->weak_barriers);
+ if (likely(virtqueue_nused(vq) >= rcv_cnt)) {
num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len,
rcv_cnt);
uint16_t extra_idx = 0;
return nb_pkts;
PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
- nb_used = VIRTQUEUE_NUSED(vq);
- virtio_rmb(hw->weak_barriers);
+ nb_used = virtqueue_nused(vq);
+
if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
virtio_xmit_cleanup(vq, nb_used);
/* Positive value indicates it need free vring descriptors */
if (unlikely(need > 0)) {
- nb_used = VIRTQUEUE_NUSED(vq);
- virtio_rmb(hw->weak_barriers);
+ nb_used = virtqueue_nused(vq);
+
need = RTE_MIN(need, (int)nb_used);
virtio_xmit_cleanup(vq, need);
virtio_xmit_try_cleanup_inorder(struct virtqueue *vq, uint16_t need)
{
uint16_t nb_used, nb_clean, nb_descs;
- struct virtio_hw *hw = vq->hw;
nb_descs = vq->vq_free_cnt + need;
- nb_used = VIRTQUEUE_NUSED(vq);
- virtio_rmb(hw->weak_barriers);
+ nb_used = virtqueue_nused(vq);
nb_clean = RTE_MIN(need, (int)nb_used);
virtio_xmit_cleanup_inorder(vq, nb_clean);
VIRTQUEUE_DUMP(vq);
PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
- nb_used = VIRTQUEUE_NUSED(vq);
+ nb_used = virtqueue_nused(vq);
- virtio_rmb(hw->weak_barriers);
if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
virtio_xmit_cleanup_inorder(vq, nb_used);
if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
return 0;
- nb_used = VIRTQUEUE_NUSED(vq);
+ nb_used = virtqueue_nused(vq);
rte_compiler_barrier();
if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
return 0;
- nb_used = VIRTQUEUE_NUSED(vq);
-
- rte_rmb();
+ /* virtqueue_nused has a load-acquire or rte_cio_rmb inside */
+ nb_used = virtqueue_nused(vq);
if (unlikely(nb_used == 0))
return 0;
if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
return 0;
- nb_used = VIRTQUEUE_NUSED(vq);
-
- rte_compiler_barrier();
+ nb_used = virtqueue_nused(vq);
if (unlikely(nb_used == 0))
return 0;
struct vring *vring = &dev->vrings[queue_idx];
/* Consume avail ring, using used ring idx as first one */
- while (vring->used->idx != vring->avail->idx) {
- avail_idx = (vring->used->idx) & (vring->num - 1);
+ while (__atomic_load_n(&vring->used->idx, __ATOMIC_RELAXED)
+ != vring->avail->idx) {
+ avail_idx = __atomic_load_n(&vring->used->idx, __ATOMIC_RELAXED)
+ & (vring->num - 1);
desc_idx = vring->avail->ring[avail_idx];
n_descs = virtio_user_handle_ctrl_msg(dev, vring, desc_idx);
uep->id = desc_idx;
uep->len = n_descs;
- vring->used->idx++;
+ __atomic_add_fetch(&vring->used->idx, 1, __ATOMIC_RELAXED);
}
}
uint16_t used_idx, desc_idx;
uint16_t nb_used, i;
- nb_used = VIRTQUEUE_NUSED(vq);
+ nb_used = virtqueue_nused(vq);
for (i = 0; i < nb_used; i++) {
used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
return VTNET_TQ;
}
-#define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_split.ring.used->idx - \
- (vq)->vq_used_cons_idx))
+/* virtqueue_nused has load-acquire or rte_cio_rmb insed */
+static inline uint16_t
+virtqueue_nused(const struct virtqueue *vq)
+{
+ uint16_t idx;
+
+ if (vq->hw->weak_barriers) {
+ /**
+ * x86 prefers to using rte_smp_rmb over __atomic_load_n as it
+ * reports a slightly better perf, which comes from the saved
+ * branch by the compiler.
+ * The if and else branches are identical with the smp and cio
+ * barriers both defined as compiler barriers on x86.
+ */
+#ifdef RTE_ARCH_X86_64
+ idx = vq->vq_split.ring.used->idx;
+ rte_smp_rmb();
+#else
+ idx = __atomic_load_n(&(vq)->vq_split.ring.used->idx,
+ __ATOMIC_ACQUIRE);
+#endif
+ } else {
+ idx = vq->vq_split.ring.used->idx;
+ rte_cio_rmb();
+ }
+ return idx - vq->vq_used_cons_idx;
+}
void vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx);
void vq_ring_free_chain_packed(struct virtqueue *vq, uint16_t used_idx);
#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
#define VIRTQUEUE_DUMP(vq) do { \
uint16_t used_idx, nused; \
- used_idx = (vq)->vq_split.ring.used->idx; \
+ used_idx = __atomic_load_n(&(vq)->vq_split.ring.used->idx, \
+ __ATOMIC_RELAXED); \
nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \
if (vtpci_packed_queue((vq)->hw)) { \
PMD_INIT_LOG(DEBUG, \
"VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \
" avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \
" avail.flags=0x%x; used.flags=0x%x", \
- (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \
- (vq)->vq_desc_head_idx, (vq)->vq_split.ring.avail->idx, \
- (vq)->vq_used_cons_idx, (vq)->vq_split.ring.used->idx, \
+ (vq)->vq_nentries, (vq)->vq_free_cnt, nused, (vq)->vq_desc_head_idx, \
+ (vq)->vq_split.ring.avail->idx, (vq)->vq_used_cons_idx, \
+ __atomic_load_n(&(vq)->vq_split.ring.used->idx, __ATOMIC_RELAXED), \
(vq)->vq_split.ring.avail->flags, (vq)->vq_split.ring.used->flags); \
} while (0)
#else
}
vq->last_used_idx += vq->shadow_used_idx;
- rte_smp_wmb();
-
vhost_log_cache_sync(dev, vq);
- *(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx;
+ __atomic_add_fetch(&vq->used->idx, vq->shadow_used_idx,
+ __ATOMIC_RELEASE);
vq->shadow_used_idx = 0;
vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
sizeof(vq->used->idx));