vhost: optimize broadcast RARP sync with C11 atomic
authorPhil Yang <phil.yang@arm.com>
Thu, 23 Apr 2020 16:54:49 +0000 (00:54 +0800)
committerFerruh Yigit <ferruh.yigit@intel.com>
Tue, 5 May 2020 13:54:26 +0000 (15:54 +0200)
The rarp packet broadcast flag is synchronized with rte_atomic_XX APIs
which is a full barrier, DMB, on aarch64. This patch optimized it with
c11 atomic one-way barrier.

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
lib/librte_vhost/vhost.h
lib/librte_vhost/vhost_user.c
lib/librte_vhost/virtio_net.c

index 507dbf2..d2b5dc4 100644 (file)
@@ -350,7 +350,7 @@ struct virtio_net {
        uint32_t                flags;
        uint16_t                vhost_hlen;
        /* to tell if we need broadcast rarp packet */
-       rte_atomic16_t          broadcast_rarp;
+       int16_t                 broadcast_rarp;
        uint32_t                nr_vring;
        int                     dequeue_zero_copy;
        int                     extbuf;
index 971ccdb..6f85084 100644 (file)
@@ -2145,11 +2145,10 @@ vhost_user_send_rarp(struct virtio_net **pdev, struct VhostUserMsg *msg,
         * Set the flag to inject a RARP broadcast packet at
         * rte_vhost_dequeue_burst().
         *
-        * rte_smp_wmb() is for making sure the mac is copied
-        * before the flag is set.
+        * __ATOMIC_RELEASE ordering is for making sure the mac is
+        * copied before the flag is set.
         */
-       rte_smp_wmb();
-       rte_atomic16_set(&dev->broadcast_rarp, 1);
+       __atomic_store_n(&dev->broadcast_rarp, 1, __ATOMIC_RELEASE);
        did = dev->vdpa_dev_id;
        vdpa_dev = rte_vdpa_get_device(did);
        if (vdpa_dev && vdpa_dev->ops->migration_done)
index 1fc30c6..62f37da 100644 (file)
@@ -2166,6 +2166,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
        struct virtio_net *dev;
        struct rte_mbuf *rarp_mbuf = NULL;
        struct vhost_virtqueue *vq;
+       int16_t success = 1;
 
        dev = get_device(vid);
        if (!dev)
@@ -2212,16 +2213,17 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
         *
         * broadcast_rarp shares a cacheline in the virtio_net structure
         * with some fields that are accessed during enqueue and
-        * rte_atomic16_cmpset() causes a write if using cmpxchg. This could
-        * result in false sharing between enqueue and dequeue.
+        * __atomic_compare_exchange_n causes a write if performed compare
+        * and exchange. This could result in false sharing between enqueue
+        * and dequeue.
         *
         * Prevent unnecessary false sharing by reading broadcast_rarp first
-        * and only performing cmpset if the read indicates it is likely to
-        * be set.
+        * and only performing compare and exchange if the read indicates it
+        * is likely to be set.
         */
-       if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) &&
-                       rte_atomic16_cmpset((volatile uint16_t *)
-                               &dev->broadcast_rarp.cnt, 1, 0))) {
+       if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) &&
+                       __atomic_compare_exchange_n(&dev->broadcast_rarp,
+                       &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) {
 
                rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
                if (rarp_mbuf == NULL) {