eal: introduce SMP memory barriers

author Jerin Jacob <jerin.jacob@caviumnetworks.com>

Fri, 6 Nov 2015 09:40:32 +0000 (15:10 +0530)

committer Thomas Monjalon <thomas.monjalon@6wind.com>

Wed, 18 Nov 2015 21:44:01 +0000 (22:44 +0100)
author Jerin Jacob <jerin.jacob@caviumnetworks.com>
Fri, 6 Nov 2015 09:40:32 +0000 (15:10 +0530)
committer Thomas Monjalon <thomas.monjalon@6wind.com>
Wed, 18 Nov 2015 21:44:01 +0000 (22:44 +0100)
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h

index 689c3213496bad2489de75ce6fc027a193b35500..61b3137034cb9450b5b749400b35cdebe3ccd19b 100644 (file)
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -53,12 +53,10 @@ struct rte_mbuf;
   *     accesses through relaxed memory I/O windows, so smp_mb() et al are
   *     sufficient.
   *
- * This driver is for virtio_pci on SMP and therefore can assume
- * weaker (compiler barriers)
   */
-#define virtio_mb()    rte_mb()
-#define virtio_rmb()   rte_compiler_barrier()
-#define virtio_wmb()   rte_compiler_barrier()
+#define virtio_mb()    rte_smp_mb()
+#define virtio_rmb()   rte_smp_rmb()
+#define virtio_wmb()   rte_smp_wmb()
  
  #ifdef RTE_PMD_PACKET_PREFETCH
  #define rte_packet_prefetch(p)  rte_prefetch1(p)
diff --git a/drivers/net/xenvirt/rte_eth_xenvirt.c b/drivers/net/xenvirt/rte_eth_xenvirt.c

index 264aedae29c5f8960f6af5def4261952ae7fa4d8..e83c08c3835ab43f6a1c815958013d22dc6b58a0 100644 (file)
--- a/drivers/net/xenvirt/rte_eth_xenvirt.c
+++ b/drivers/net/xenvirt/rte_eth_xenvirt.c
@@ -102,7 +102,7 @@ eth_xenvirt_rx(void *q, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
  
         nb_used = VIRTQUEUE_NUSED(rxvq);
  
-       rte_compiler_barrier(); /* rmb */
+       rte_smp_rmb();
         num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
         num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
         if (unlikely(num == 0)) return 0;
@@ -153,7 +153,7 @@ eth_xenvirt_tx(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
         nb_used = VIRTQUEUE_NUSED(txvq);
  
-       rte_compiler_barrier();   /* rmb */
+       rte_smp_rmb();
  
         num = (uint16_t)(likely(nb_used <= VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ);
         num = virtqueue_dequeue_burst(txvq, snd_pkts, len, num);
diff --git a/drivers/net/xenvirt/virtqueue.h b/drivers/net/xenvirt/virtqueue.h

index eff620862367aca41fa2caf3dc5d8c1c6a64c87e..6dcb0efb94eddda6dd54ab9e9b135e7ce82acb39 100644 (file)
--- a/drivers/net/xenvirt/virtqueue.h
+++ b/drivers/net/xenvirt/virtqueue.h
@@ -151,7 +151,7 @@ vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx)
          */
         avail_idx = (uint16_t)(vq->vq_ring.avail->idx & (vq->vq_nentries - 1));
         vq->vq_ring.avail->ring[avail_idx] = desc_idx;
-       rte_compiler_barrier();  /* wmb , for IA memory model barrier is enough*/
+       rte_smp_wmb();
         vq->vq_ring.avail->idx++;
  }
  
diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic.h b/lib/librte_eal/common/include/arch/arm/rte_atomic.h

index f3f3b6e3163edd137f13e49e6ef9e0ce44ebdc1c..454a12b0a8201405a90d8e8f810fdb7f93385bed 100644 (file)
--- a/lib/librte_eal/common/include/arch/arm/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_atomic.h
@@ -39,4 +39,10 @@
  #include <rte_atomic_32.h>
  #endif
  
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_wmb()
+
+#define rte_smp_rmb() rte_rmb()
+
  #endif /* _RTE_ATOMIC_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h

index fb7af2bdea3ba05cdda0620a7d82daad020a8c4c..b8bc2c0b67408ab35d947ed1f777ac01a9ee8cff 100644 (file)
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
@@ -72,6 +72,12 @@ extern "C" {
   */
  #define        rte_rmb() {asm volatile("sync" : : : "memory"); }
  
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_compiler_barrier()
+
+#define rte_smp_rmb() rte_compiler_barrier()
+
  /*------------------------- 16 bit atomic operations -------------------------*/
  /* To be compatible with Power7, use GCC built-in functions for 16 bit
   * operations */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_atomic.h b/lib/librte_eal/common/include/arch/tile/rte_atomic.h

index 3dc8eb825a2e75edf38ad53eaaff64154b4cfa78..28825ff65b195c08b66e37e825bb0496d28a16f1 100644 (file)
--- a/lib/librte_eal/common/include/arch/tile/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/tile/rte_atomic.h
@@ -79,6 +79,12 @@ static inline void rte_rmb(void)
         __sync_synchronize();
  }
  
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_compiler_barrier()
+
+#define rte_smp_rmb() rte_compiler_barrier()
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/lib/librte_eal/common/include/arch/x86/rte_atomic.h

index e93e8eef24db87fbe9477503ae9de3a99c8c0aaf..41178c7ba4ac8360af7126d6dceed13824e9a9b3 100644 (file)
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
@@ -53,6 +53,12 @@ extern "C" {
  
  #define        rte_rmb() _mm_lfence()
  
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_compiler_barrier()
+
+#define rte_smp_rmb() rte_compiler_barrier()
+
  /*------------------------- 16 bit atomic operations -------------------------*/
  
  #ifndef RTE_FORCE_INTRINSICS
diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h

index 6c7581adac265f4a6d44aa53c3d18c3f37dabaac..26d1f56de25213d8763af8ba6844f3b20bb29112 100644 (file)
--- a/lib/librte_eal/common/include/generic/rte_atomic.h
+++ b/lib/librte_eal/common/include/generic/rte_atomic.h
@@ -72,6 +72,33 @@ static inline void rte_wmb(void);
   */
  static inline void rte_rmb(void);
  
+/**
+ * General memory barrier between lcores
+ *
+ * Guarantees that the LOAD and STORE operations that precede the
+ * rte_smp_mb() call are globally visible across the lcores
+ * before the the LOAD and STORE operations that follows it.
+ */
+static inline void rte_smp_mb(void);
+
+/**
+ * Write memory barrier between lcores
+ *
+ * Guarantees that the STORE operations that precede the
+ * rte_smp_wmb() call are globally visible across the lcores
+ * before the the STORE operations that follows it.
+ */
+static inline void rte_smp_wmb(void);
+
+/**
+ * Read memory barrier between lcores
+ *
+ * Guarantees that the LOAD operations that precede the
+ * rte_smp_rmb() call are globally visible across the lcores
+ * before the the LOAD operations that follows it.
+ */
+static inline void rte_smp_rmb(void);
+
  #endif /* __DOXYGEN__ */
  
  /**
diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h

index fb5a6263a1f9e9bb178518daac78fd0a75ffa248..de036cefc8fafdffd3aa594b4d4ed418395b8e6a 100644 (file)
--- a/lib/librte_ring/rte_ring.h
+++ b/lib/librte_ring/rte_ring.h
@@ -468,7 +468,7 @@ __rte_ring_mp_do_enqueue(struct rte_ring *r, void * const *obj_table,
  
         /* write entries in ring */
         ENQUEUE_PTRS();
-       rte_compiler_barrier();
+       rte_smp_wmb();
  
         /* if we exceed the watermark */
         if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) {
@@ -563,7 +563,7 @@ __rte_ring_sp_do_enqueue(struct rte_ring *r, void * const *obj_table,
  
         /* write entries in ring */
         ENQUEUE_PTRS();
-       rte_compiler_barrier();
+       rte_smp_wmb();
  
         /* if we exceed the watermark */
         if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) {
@@ -654,7 +654,7 @@ __rte_ring_mc_do_dequeue(struct rte_ring *r, void **obj_table,
  
         /* copy in table */
         DEQUEUE_PTRS();
-       rte_compiler_barrier();
+       rte_smp_rmb();
  
         /*
          * If there are other dequeues in progress that preceded us,
@@ -738,7 +738,7 @@ __rte_ring_sc_do_dequeue(struct rte_ring *r, void **obj_table,
  
         /* copy in table */
         DEQUEUE_PTRS();
-       rte_compiler_barrier();
+       rte_smp_rmb();
  
         __RING_STAT_ADD(r, deq_success, n);
         r->cons.tail = cons_next;
author	Jerin Jacob <jerin.jacob@caviumnetworks.com>
	Fri, 6 Nov 2015 09:40:32 +0000 (15:10 +0530)
committer	Thomas Monjalon <thomas.monjalon@6wind.com>
	Wed, 18 Nov 2015 21:44:01 +0000 (22:44 +0100)
drivers/net/virtio/virtqueue.h		patch \| blob \| history
drivers/net/xenvirt/rte_eth_xenvirt.c		patch \| blob \| history
drivers/net/xenvirt/virtqueue.h		patch \| blob \| history
lib/librte_eal/common/include/arch/arm/rte_atomic.h		patch \| blob \| history
lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h		patch \| blob \| history
lib/librte_eal/common/include/arch/tile/rte_atomic.h		patch \| blob \| history
lib/librte_eal/common/include/arch/x86/rte_atomic.h		patch \| blob \| history
lib/librte_eal/common/include/generic/rte_atomic.h		patch \| blob \| history
lib/librte_ring/rte_ring.h		patch \| blob \| history