lib/librte_eal/x86/include/rte_atomic.h

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  * Copyright(c) 2010-2014 Intel Corporation
   3  */
   4
   5 #ifndef _RTE_ATOMIC_X86_H_
   6 #define _RTE_ATOMIC_X86_H_
   7
   8 #ifdef __cplusplus
   9 extern "C" {
  10 #endif
  11
  12 #include <stdint.h>
  13 #include <rte_common.h>
  14 #include <rte_config.h>
  15 #include <emmintrin.h>
  16 #include "generic/rte_atomic.h"
  17
  18 #if RTE_MAX_LCORE == 1
  19 #define MPLOCKED                        /**< No need to insert MP lock prefix. */
  20 #else
  21 #define MPLOCKED        "lock ; "       /**< Insert MP lock prefix. */
  22 #endif
  23
  24 #define rte_mb() _mm_mfence()
  25
  26 #define rte_wmb() _mm_sfence()
  27
  28 #define rte_rmb() _mm_lfence()
  29
  30 #define rte_smp_wmb() rte_compiler_barrier()
  31
  32 #define rte_smp_rmb() rte_compiler_barrier()
  33
  34 /*
  35  * From Intel Software Development Manual; Vol 3;
  36  * 8.2.2 Memory Ordering in P6 and More Recent Processor Families:
  37  * ...
  38  * . Reads are not reordered with other reads.
  39  * . Writes are not reordered with older reads.
  40  * . Writes to memory are not reordered with other writes,
  41  *   with the following exceptions:
  42  *   . streaming stores (writes) executed with the non-temporal move
  43  *     instructions (MOVNTI, MOVNTQ, MOVNTDQ, MOVNTPS, and MOVNTPD); and
  44  *   . string operations (see Section 8.2.4.1).
  45  *  ...
  46  * . Reads may be reordered with older writes to different locations but not
  47  * with older writes to the same location.
  48  * . Reads or writes cannot be reordered with I/O instructions,
  49  * locked instructions, or serializing instructions.
  50  * . Reads cannot pass earlier LFENCE and MFENCE instructions.
  51  * . Writes ... cannot pass earlier LFENCE, SFENCE, and MFENCE instructions.
  52  * . LFENCE instructions cannot pass earlier reads.
  53  * . SFENCE instructions cannot pass earlier writes ...
  54  * . MFENCE instructions cannot pass earlier reads, writes ...
  55  *
  56  * As pointed by Java guys, that makes possible to use lock-prefixed
  57  * instructions to get the same effect as mfence and on most modern HW
  58  * that gives a better performance then using mfence:
  59  * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
  60  * Basic idea is to use lock prefixed add with some dummy memory location
  61  * as the destination. From their experiments 128B(2 cache lines) below
  62  * current stack pointer looks like a good candidate.
  63  * So below we use that techinque for rte_smp_mb() implementation.
  64  */
  65
  66 static __rte_always_inline void
  67 rte_smp_mb(void)
  68 {
  69 #ifdef RTE_ARCH_I686
  70         asm volatile("lock addl $0, -128(%%esp); " ::: "memory");
  71 #else
  72         asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
  73 #endif
  74 }
  75
  76 #define rte_io_mb() rte_mb()
  77
  78 #define rte_io_wmb() rte_compiler_barrier()
  79
  80 #define rte_io_rmb() rte_compiler_barrier()
  81
  82 #define rte_cio_wmb() rte_compiler_barrier()
  83
  84 #define rte_cio_rmb() rte_compiler_barrier()
  85
  86 /**
  87  * Synchronization fence between threads based on the specified memory order.
  88  *
  89  * On x86 the __atomic_thread_fence(__ATOMIC_SEQ_CST) generates full 'mfence'
  90  * which is quite expensive. The optimized implementation of rte_smp_mb is
  91  * used instead.
  92  */
  93 static __rte_always_inline void
  94 rte_atomic_thread_fence(int memory_order)
  95 {
  96         if (memory_order == __ATOMIC_SEQ_CST)
  97                 rte_smp_mb();
  98         else
  99                 __atomic_thread_fence(memory_order);
 100 }
 101
 102 /*------------------------- 16 bit atomic operations -------------------------*/
 103
 104 #ifndef RTE_FORCE_INTRINSICS
 105 static inline int
 106 rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
 107 {
 108         uint8_t res;
 109
 110         asm volatile(
 111                         MPLOCKED
 112                         "cmpxchgw %[src], %[dst];"
 113                         "sete %[res];"
 114                         : [res] "=a" (res),     /* output */
 115                           [dst] "=m" (*dst)
 116                         : [src] "r" (src),      /* input */
 117                           "a" (exp),
 118                           "m" (*dst)
 119                         : "memory");            /* no-clobber list */
 120         return res;
 121 }
 122
 123 static inline uint16_t
 124 rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val)
 125 {
 126         asm volatile(
 127                         MPLOCKED
 128                         "xchgw %0, %1;"
 129                         : "=r" (val), "=m" (*dst)
 130                         : "0" (val),  "m" (*dst)
 131                         : "memory");         /* no-clobber list */
 132         return val;
 133 }
 134
 135 static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
 136 {
 137         return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1);
 138 }
 139
 140 static inline void
 141 rte_atomic16_inc(rte_atomic16_t *v)
 142 {
 143         asm volatile(
 144                         MPLOCKED
 145                         "incw %[cnt]"
 146                         : [cnt] "=m" (v->cnt)   /* output */
 147                         : "m" (v->cnt)          /* input */
 148                         );
 149 }
 150
 151 static inline void
 152 rte_atomic16_dec(rte_atomic16_t *v)
 153 {
 154         asm volatile(
 155                         MPLOCKED
 156                         "decw %[cnt]"
 157                         : [cnt] "=m" (v->cnt)   /* output */
 158                         : "m" (v->cnt)          /* input */
 159                         );
 160 }
 161
 162 static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
 163 {
 164         uint8_t ret;
 165
 166         asm volatile(
 167                         MPLOCKED
 168                         "incw %[cnt] ; "
 169                         "sete %[ret]"
 170                         : [cnt] "+m" (v->cnt),  /* output */
 171                           [ret] "=qm" (ret)
 172                         );
 173         return ret != 0;
 174 }
 175
 176 static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
 177 {
 178         uint8_t ret;
 179
 180         asm volatile(MPLOCKED
 181                         "decw %[cnt] ; "
 182                         "sete %[ret]"
 183                         : [cnt] "+m" (v->cnt),  /* output */
 184                           [ret] "=qm" (ret)
 185                         );
 186         return ret != 0;
 187 }
 188
 189 /*------------------------- 32 bit atomic operations -------------------------*/
 190
 191 static inline int
 192 rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
 193 {
 194         uint8_t res;
 195
 196         asm volatile(
 197                         MPLOCKED
 198                         "cmpxchgl %[src], %[dst];"
 199                         "sete %[res];"
 200                         : [res] "=a" (res),     /* output */
 201                           [dst] "=m" (*dst)
 202                         : [src] "r" (src),      /* input */
 203                           "a" (exp),
 204                           "m" (*dst)
 205                         : "memory");            /* no-clobber list */
 206         return res;
 207 }
 208
 209 static inline uint32_t
 210 rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val)
 211 {
 212         asm volatile(
 213                         MPLOCKED
 214                         "xchgl %0, %1;"
 215                         : "=r" (val), "=m" (*dst)
 216                         : "0" (val),  "m" (*dst)
 217                         : "memory");         /* no-clobber list */
 218         return val;
 219 }
 220
 221 static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
 222 {
 223         return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1);
 224 }
 225
 226 static inline void
 227 rte_atomic32_inc(rte_atomic32_t *v)
 228 {
 229         asm volatile(
 230                         MPLOCKED
 231                         "incl %[cnt]"
 232                         : [cnt] "=m" (v->cnt)   /* output */
 233                         : "m" (v->cnt)          /* input */
 234                         );
 235 }
 236
 237 static inline void
 238 rte_atomic32_dec(rte_atomic32_t *v)
 239 {
 240         asm volatile(
 241                         MPLOCKED
 242                         "decl %[cnt]"
 243                         : [cnt] "=m" (v->cnt)   /* output */
 244                         : "m" (v->cnt)          /* input */
 245                         );
 246 }
 247
 248 static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v)
 249 {
 250         uint8_t ret;
 251
 252         asm volatile(
 253                         MPLOCKED
 254                         "incl %[cnt] ; "
 255                         "sete %[ret]"
 256                         : [cnt] "+m" (v->cnt),  /* output */
 257                           [ret] "=qm" (ret)
 258                         );
 259         return ret != 0;
 260 }
 261
 262 static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
 263 {
 264         uint8_t ret;
 265
 266         asm volatile(MPLOCKED
 267                         "decl %[cnt] ; "
 268                         "sete %[ret]"
 269                         : [cnt] "+m" (v->cnt),  /* output */
 270                           [ret] "=qm" (ret)
 271                         );
 272         return ret != 0;
 273 }
 274 #endif
 275
 276 #ifdef RTE_ARCH_I686
 277 #include "rte_atomic_32.h"
 278 #else
 279 #include "rte_atomic_64.h"
 280 #endif
 281
 282 #ifdef __cplusplus
 283 }
 284 #endif
 285
 286 #endif /* _RTE_ATOMIC_X86_H_ */