From 4c02e453cc6272e20c3a901b7afb6f49ec08f1b3 Mon Sep 17 00:00:00 2001 From: Jerin Jacob Date: Fri, 6 Nov 2015 15:10:32 +0530 Subject: [PATCH] eal: introduce SMP memory barriers This commit introduce rte_smp_mb(), rte_smp_wmb() and rte_smp_rmb(), in order to enable memory barriers between lcores. The patch does not provide any functional change for IA, the goal is to have infrastructure for weakly ordered machines like ARM to work on DPDK. Signed-off-by: Jerin Jacob Acked-by: Konstantin Ananyev --- drivers/net/virtio/virtqueue.h | 8 +++--- drivers/net/xenvirt/rte_eth_xenvirt.c | 4 +-- drivers/net/xenvirt/virtqueue.h | 2 +- .../common/include/arch/arm/rte_atomic.h | 6 +++++ .../common/include/arch/ppc_64/rte_atomic.h | 6 +++++ .../common/include/arch/tile/rte_atomic.h | 6 +++++ .../common/include/arch/x86/rte_atomic.h | 6 +++++ .../common/include/generic/rte_atomic.h | 27 +++++++++++++++++++ lib/librte_ring/rte_ring.h | 8 +++--- 9 files changed, 61 insertions(+), 12 deletions(-) diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h index 689c321349..61b3137034 100644 --- a/drivers/net/virtio/virtqueue.h +++ b/drivers/net/virtio/virtqueue.h @@ -53,12 +53,10 @@ struct rte_mbuf; * accesses through relaxed memory I/O windows, so smp_mb() et al are * sufficient. * - * This driver is for virtio_pci on SMP and therefore can assume - * weaker (compiler barriers) */ -#define virtio_mb() rte_mb() -#define virtio_rmb() rte_compiler_barrier() -#define virtio_wmb() rte_compiler_barrier() +#define virtio_mb() rte_smp_mb() +#define virtio_rmb() rte_smp_rmb() +#define virtio_wmb() rte_smp_wmb() #ifdef RTE_PMD_PACKET_PREFETCH #define rte_packet_prefetch(p) rte_prefetch1(p) diff --git a/drivers/net/xenvirt/rte_eth_xenvirt.c b/drivers/net/xenvirt/rte_eth_xenvirt.c index 264aedae29..e83c08c383 100644 --- a/drivers/net/xenvirt/rte_eth_xenvirt.c +++ b/drivers/net/xenvirt/rte_eth_xenvirt.c @@ -102,7 +102,7 @@ eth_xenvirt_rx(void *q, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) nb_used = VIRTQUEUE_NUSED(rxvq); - rte_compiler_barrier(); /* rmb */ + rte_smp_rmb(); num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts); num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ); if (unlikely(num == 0)) return 0; @@ -153,7 +153,7 @@ eth_xenvirt_tx(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); nb_used = VIRTQUEUE_NUSED(txvq); - rte_compiler_barrier(); /* rmb */ + rte_smp_rmb(); num = (uint16_t)(likely(nb_used <= VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ); num = virtqueue_dequeue_burst(txvq, snd_pkts, len, num); diff --git a/drivers/net/xenvirt/virtqueue.h b/drivers/net/xenvirt/virtqueue.h index eff6208623..6dcb0efb94 100644 --- a/drivers/net/xenvirt/virtqueue.h +++ b/drivers/net/xenvirt/virtqueue.h @@ -151,7 +151,7 @@ vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx) */ avail_idx = (uint16_t)(vq->vq_ring.avail->idx & (vq->vq_nentries - 1)); vq->vq_ring.avail->ring[avail_idx] = desc_idx; - rte_compiler_barrier(); /* wmb , for IA memory model barrier is enough*/ + rte_smp_wmb(); vq->vq_ring.avail->idx++; } diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic.h b/lib/librte_eal/common/include/arch/arm/rte_atomic.h index f3f3b6e316..454a12b0a8 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic.h @@ -39,4 +39,10 @@ #include #endif +#define rte_smp_mb() rte_mb() + +#define rte_smp_wmb() rte_wmb() + +#define rte_smp_rmb() rte_rmb() + #endif /* _RTE_ATOMIC_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h index fb7af2bdea..b8bc2c0b67 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h @@ -72,6 +72,12 @@ extern "C" { */ #define rte_rmb() {asm volatile("sync" : : : "memory"); } +#define rte_smp_mb() rte_mb() + +#define rte_smp_wmb() rte_compiler_barrier() + +#define rte_smp_rmb() rte_compiler_barrier() + /*------------------------- 16 bit atomic operations -------------------------*/ /* To be compatible with Power7, use GCC built-in functions for 16 bit * operations */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_atomic.h b/lib/librte_eal/common/include/arch/tile/rte_atomic.h index 3dc8eb825a..28825ff65b 100644 --- a/lib/librte_eal/common/include/arch/tile/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/tile/rte_atomic.h @@ -79,6 +79,12 @@ static inline void rte_rmb(void) __sync_synchronize(); } +#define rte_smp_mb() rte_mb() + +#define rte_smp_wmb() rte_compiler_barrier() + +#define rte_smp_rmb() rte_compiler_barrier() + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/lib/librte_eal/common/include/arch/x86/rte_atomic.h index e93e8eef24..41178c7ba4 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h @@ -53,6 +53,12 @@ extern "C" { #define rte_rmb() _mm_lfence() +#define rte_smp_mb() rte_mb() + +#define rte_smp_wmb() rte_compiler_barrier() + +#define rte_smp_rmb() rte_compiler_barrier() + /*------------------------- 16 bit atomic operations -------------------------*/ #ifndef RTE_FORCE_INTRINSICS diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h index 6c7581adac..26d1f56de2 100644 --- a/lib/librte_eal/common/include/generic/rte_atomic.h +++ b/lib/librte_eal/common/include/generic/rte_atomic.h @@ -72,6 +72,33 @@ static inline void rte_wmb(void); */ static inline void rte_rmb(void); +/** + * General memory barrier between lcores + * + * Guarantees that the LOAD and STORE operations that precede the + * rte_smp_mb() call are globally visible across the lcores + * before the the LOAD and STORE operations that follows it. + */ +static inline void rte_smp_mb(void); + +/** + * Write memory barrier between lcores + * + * Guarantees that the STORE operations that precede the + * rte_smp_wmb() call are globally visible across the lcores + * before the the STORE operations that follows it. + */ +static inline void rte_smp_wmb(void); + +/** + * Read memory barrier between lcores + * + * Guarantees that the LOAD operations that precede the + * rte_smp_rmb() call are globally visible across the lcores + * before the the LOAD operations that follows it. + */ +static inline void rte_smp_rmb(void); + #endif /* __DOXYGEN__ */ /** diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h index fb5a6263a1..de036cefc8 100644 --- a/lib/librte_ring/rte_ring.h +++ b/lib/librte_ring/rte_ring.h @@ -468,7 +468,7 @@ __rte_ring_mp_do_enqueue(struct rte_ring *r, void * const *obj_table, /* write entries in ring */ ENQUEUE_PTRS(); - rte_compiler_barrier(); + rte_smp_wmb(); /* if we exceed the watermark */ if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) { @@ -563,7 +563,7 @@ __rte_ring_sp_do_enqueue(struct rte_ring *r, void * const *obj_table, /* write entries in ring */ ENQUEUE_PTRS(); - rte_compiler_barrier(); + rte_smp_wmb(); /* if we exceed the watermark */ if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) { @@ -654,7 +654,7 @@ __rte_ring_mc_do_dequeue(struct rte_ring *r, void **obj_table, /* copy in table */ DEQUEUE_PTRS(); - rte_compiler_barrier(); + rte_smp_rmb(); /* * If there are other dequeues in progress that preceded us, @@ -738,7 +738,7 @@ __rte_ring_sc_do_dequeue(struct rte_ring *r, void **obj_table, /* copy in table */ DEQUEUE_PTRS(); - rte_compiler_barrier(); + rte_smp_rmb(); __RING_STAT_ADD(r, deq_success, n); r->cons.tail = cons_next; -- 2.20.1