eal/x86: include SSE4 support on Windows
[dpdk.git] / lib / librte_eal / common / include / arch / arm / rte_atomic_64.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2015 Cavium, Inc
3  * Copyright(c) 2019 Arm Limited
4  */
5
6 #ifndef _RTE_ATOMIC_ARM64_H_
7 #define _RTE_ATOMIC_ARM64_H_
8
9 #ifndef RTE_FORCE_INTRINSICS
10 #  error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS
11 #endif
12
13 #ifdef __cplusplus
14 extern "C" {
15 #endif
16
17 #include "generic/rte_atomic.h"
18 #include <rte_branch_prediction.h>
19 #include <rte_compat.h>
20 #include <rte_debug.h>
21
22 #define dsb(opt) asm volatile("dsb " #opt : : : "memory")
23 #define dmb(opt) asm volatile("dmb " #opt : : : "memory")
24
25 #define rte_mb() dsb(sy)
26
27 #define rte_wmb() dsb(st)
28
29 #define rte_rmb() dsb(ld)
30
31 #define rte_smp_mb() dmb(ish)
32
33 #define rte_smp_wmb() dmb(ishst)
34
35 #define rte_smp_rmb() dmb(ishld)
36
37 #define rte_io_mb() rte_mb()
38
39 #define rte_io_wmb() rte_wmb()
40
41 #define rte_io_rmb() rte_rmb()
42
43 #define rte_cio_wmb() dmb(oshst)
44
45 #define rte_cio_rmb() dmb(oshld)
46
47 /*------------------------ 128 bit atomic operations -------------------------*/
48
49 #if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS)
50 #define __ATOMIC128_CAS_OP(cas_op_name, op_string)                          \
51 static __rte_noinline rte_int128_t                                          \
52 cas_op_name(rte_int128_t *dst, rte_int128_t old, rte_int128_t updated)      \
53 {                                                                           \
54         /* caspX instructions register pair must start from even-numbered
55          * register at operand 1.
56          * So, specify registers for local variables here.
57          */                                                                 \
58         register uint64_t x0 __asm("x0") = (uint64_t)old.val[0];            \
59         register uint64_t x1 __asm("x1") = (uint64_t)old.val[1];            \
60         register uint64_t x2 __asm("x2") = (uint64_t)updated.val[0];        \
61         register uint64_t x3 __asm("x3") = (uint64_t)updated.val[1];        \
62         asm volatile(                                                       \
63                 op_string " %[old0], %[old1], %[upd0], %[upd1], [%[dst]]"   \
64                 : [old0] "+r" (x0),                                         \
65                 [old1] "+r" (x1)                                            \
66                 : [upd0] "r" (x2),                                          \
67                 [upd1] "r" (x3),                                            \
68                 [dst] "r" (dst)                                             \
69                 : "memory");                                                \
70         old.val[0] = x0;                                                    \
71         old.val[1] = x1;                                                    \
72         return old;                                                         \
73 }
74
75 __ATOMIC128_CAS_OP(__cas_128_relaxed, "casp")
76 __ATOMIC128_CAS_OP(__cas_128_acquire, "caspa")
77 __ATOMIC128_CAS_OP(__cas_128_release, "caspl")
78 __ATOMIC128_CAS_OP(__cas_128_acq_rel, "caspal")
79
80 #undef __ATOMIC128_CAS_OP
81
82 #endif
83
84 __rte_experimental
85 static inline int
86 rte_atomic128_cmp_exchange(rte_int128_t *dst, rte_int128_t *exp,
87                 const rte_int128_t *src, unsigned int weak, int success,
88                 int failure)
89 {
90         /* Always do strong CAS */
91         RTE_SET_USED(weak);
92         /* Ignore memory ordering for failure, memory order for
93          * success must be stronger or equal
94          */
95         RTE_SET_USED(failure);
96         /* Find invalid memory order */
97         RTE_ASSERT(success == __ATOMIC_RELAXED ||
98                 success == __ATOMIC_ACQUIRE ||
99                 success == __ATOMIC_RELEASE ||
100                 success == __ATOMIC_ACQ_REL ||
101                 success == __ATOMIC_SEQ_CST);
102
103         rte_int128_t expected = *exp;
104         rte_int128_t desired = *src;
105         rte_int128_t old;
106
107 #if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS)
108         if (success == __ATOMIC_RELAXED)
109                 old = __cas_128_relaxed(dst, expected, desired);
110         else if (success == __ATOMIC_ACQUIRE)
111                 old = __cas_128_acquire(dst, expected, desired);
112         else if (success == __ATOMIC_RELEASE)
113                 old = __cas_128_release(dst, expected, desired);
114         else
115                 old = __cas_128_acq_rel(dst, expected, desired);
116 #else
117 #define __HAS_ACQ(mo) ((mo) != __ATOMIC_RELAXED && (mo) != __ATOMIC_RELEASE)
118 #define __HAS_RLS(mo) ((mo) == __ATOMIC_RELEASE || (mo) == __ATOMIC_ACQ_REL || \
119                 (mo) == __ATOMIC_SEQ_CST)
120
121         int ldx_mo = __HAS_ACQ(success) ? __ATOMIC_ACQUIRE : __ATOMIC_RELAXED;
122         int stx_mo = __HAS_RLS(success) ? __ATOMIC_RELEASE : __ATOMIC_RELAXED;
123
124 #undef __HAS_ACQ
125 #undef __HAS_RLS
126
127         uint32_t ret = 1;
128
129         /* ldx128 can not guarantee atomic,
130          * Must write back src or old to verify atomicity of ldx128;
131          */
132         do {
133
134 #define __LOAD_128(op_string, src, dst) { \
135         asm volatile(                     \
136                 op_string " %0, %1, %2"   \
137                 : "=&r" (dst.val[0]),     \
138                   "=&r" (dst.val[1])      \
139                 : "Q" (src->val[0])       \
140                 : "memory"); }
141
142                 if (ldx_mo == __ATOMIC_RELAXED)
143                         __LOAD_128("ldxp", dst, old)
144                 else
145                         __LOAD_128("ldaxp", dst, old)
146
147 #undef __LOAD_128
148
149 #define __STORE_128(op_string, dst, src, ret) { \
150         asm volatile(                           \
151                 op_string " %w0, %1, %2, %3"    \
152                 : "=&r" (ret)                   \
153                 : "r" (src.val[0]),             \
154                   "r" (src.val[1]),             \
155                   "Q" (dst->val[0])             \
156                 : "memory"); }
157
158                 if (likely(old.int128 == expected.int128)) {
159                         if (stx_mo == __ATOMIC_RELAXED)
160                                 __STORE_128("stxp", dst, desired, ret)
161                         else
162                                 __STORE_128("stlxp", dst, desired, ret)
163                 } else {
164                         /* In the failure case (since 'weak' is ignored and only
165                          * weak == 0 is implemented), expected should contain
166                          * the atomically read value of dst. This means, 'old'
167                          * needs to be stored back to ensure it was read
168                          * atomically.
169                          */
170                         if (stx_mo == __ATOMIC_RELAXED)
171                                 __STORE_128("stxp", dst, old, ret)
172                         else
173                                 __STORE_128("stlxp", dst, old, ret)
174                 }
175
176 #undef __STORE_128
177
178         } while (unlikely(ret));
179 #endif
180
181         /* Unconditionally updating expected removes an 'if' statement.
182          * expected should already be in register if not in the cache.
183          */
184         *exp = old;
185
186         return (old.int128 == expected.int128);
187 }
188
189 #ifdef __cplusplus
190 }
191 #endif
192
193 #endif /* _RTE_ATOMIC_ARM64_H_ */