['RTE_CACHE_LINE_SIZE', 64],
['RTE_MAX_NUMA_NODES', 2],
['RTE_MAX_LCORE', 256],
+ ['RTE_ARM_FEATURE_ATOMICS', true],
['RTE_USE_C11_MEM_MODEL', true]]
flags_octeontx2_extra = [
['RTE_MACHINE', '"octeontx2"'],
['RTE_MAX_NUMA_NODES', 1],
['RTE_MAX_LCORE', 24],
+ ['RTE_ARM_FEATURE_ATOMICS', true],
['RTE_EAL_IGB_UIO', false],
['RTE_USE_C11_MEM_MODEL', true]]
['0xa2', ['-mcpu=thunderxt81'], flags_thunderx_extra],
['0xa3', ['-mcpu=thunderxt83'], flags_thunderx_extra],
['0xaf', ['-march=armv8.1-a+crc+crypto','-mcpu=thunderx2t99'], flags_thunderx2_extra],
- ['0xb2', ['-mcpu=octeontx2'], flags_octeontx2_extra]]
+ ['0xb2', ['-march=armv8.2-a+crc+crypto+lse','-mcpu=octeontx2'], flags_octeontx2_extra]]
## Arm implementer ID (ARM DDI 0487C.a, Section G7.2.106, Page G7-5321)
impl_generic = ['Generic armv8', flags_generic, machine_args_generic]
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2015 Cavium, Inc
+ * Copyright(c) 2019 Arm Limited
*/
#ifndef _RTE_ATOMIC_ARM64_H_
#endif
#include "generic/rte_atomic.h"
+#include <rte_branch_prediction.h>
+#include <rte_compat.h>
+#include <rte_debug.h>
#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
#define rte_cio_rmb() dmb(oshld)
+/*------------------------ 128 bit atomic operations -------------------------*/
+
+#if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS)
+#define __ATOMIC128_CAS_OP(cas_op_name, op_string) \
+static __rte_noinline rte_int128_t \
+cas_op_name(rte_int128_t *dst, rte_int128_t old, rte_int128_t updated) \
+{ \
+ /* caspX instructions register pair must start from even-numbered
+ * register at operand 1.
+ * So, specify registers for local variables here.
+ */ \
+ register uint64_t x0 __asm("x0") = (uint64_t)old.val[0]; \
+ register uint64_t x1 __asm("x1") = (uint64_t)old.val[1]; \
+ register uint64_t x2 __asm("x2") = (uint64_t)updated.val[0]; \
+ register uint64_t x3 __asm("x3") = (uint64_t)updated.val[1]; \
+ asm volatile( \
+ op_string " %[old0], %[old1], %[upd0], %[upd1], [%[dst]]" \
+ : [old0] "+r" (x0), \
+ [old1] "+r" (x1) \
+ : [upd0] "r" (x2), \
+ [upd1] "r" (x3), \
+ [dst] "r" (dst) \
+ : "memory"); \
+ old.val[0] = x0; \
+ old.val[1] = x1; \
+ return old; \
+}
+
+__ATOMIC128_CAS_OP(__cas_128_relaxed, "casp")
+__ATOMIC128_CAS_OP(__cas_128_acquire, "caspa")
+__ATOMIC128_CAS_OP(__cas_128_release, "caspl")
+__ATOMIC128_CAS_OP(__cas_128_acq_rel, "caspal")
+
+#undef __ATOMIC128_CAS_OP
+
+#endif
+
+__rte_experimental
+static inline int
+rte_atomic128_cmp_exchange(rte_int128_t *dst, rte_int128_t *exp,
+ const rte_int128_t *src, unsigned int weak, int success,
+ int failure)
+{
+ /* Always do strong CAS */
+ RTE_SET_USED(weak);
+ /* Ignore memory ordering for failure, memory order for
+ * success must be stronger or equal
+ */
+ RTE_SET_USED(failure);
+ /* Find invalid memory order */
+ RTE_ASSERT(success == __ATOMIC_RELAXED ||
+ success == __ATOMIC_ACQUIRE ||
+ success == __ATOMIC_RELEASE ||
+ success == __ATOMIC_ACQ_REL ||
+ success == __ATOMIC_SEQ_CST);
+
+ rte_int128_t expected = *exp;
+ rte_int128_t desired = *src;
+ rte_int128_t old;
+
+#if defined(__ARM_FEATURE_ATOMICS) || defined(RTE_ARM_FEATURE_ATOMICS)
+ if (success == __ATOMIC_RELAXED)
+ old = __cas_128_relaxed(dst, expected, desired);
+ else if (success == __ATOMIC_ACQUIRE)
+ old = __cas_128_acquire(dst, expected, desired);
+ else if (success == __ATOMIC_RELEASE)
+ old = __cas_128_release(dst, expected, desired);
+ else
+ old = __cas_128_acq_rel(dst, expected, desired);
+#else
+#define __HAS_ACQ(mo) ((mo) != __ATOMIC_RELAXED && (mo) != __ATOMIC_RELEASE)
+#define __HAS_RLS(mo) ((mo) == __ATOMIC_RELEASE || (mo) == __ATOMIC_ACQ_REL || \
+ (mo) == __ATOMIC_SEQ_CST)
+
+ int ldx_mo = __HAS_ACQ(success) ? __ATOMIC_ACQUIRE : __ATOMIC_RELAXED;
+ int stx_mo = __HAS_RLS(success) ? __ATOMIC_RELEASE : __ATOMIC_RELAXED;
+
+#undef __HAS_ACQ
+#undef __HAS_RLS
+
+ uint32_t ret = 1;
+
+ /* ldx128 can not guarantee atomic,
+ * Must write back src or old to verify atomicity of ldx128;
+ */
+ do {
+
+#define __LOAD_128(op_string, src, dst) { \
+ asm volatile( \
+ op_string " %0, %1, %2" \
+ : "=&r" (dst.val[0]), \
+ "=&r" (dst.val[1]) \
+ : "Q" (src->val[0]) \
+ : "memory"); }
+
+ if (ldx_mo == __ATOMIC_RELAXED)
+ __LOAD_128("ldxp", dst, old)
+ else
+ __LOAD_128("ldaxp", dst, old)
+
+#undef __LOAD_128
+
+#define __STORE_128(op_string, dst, src, ret) { \
+ asm volatile( \
+ op_string " %w0, %1, %2, %3" \
+ : "=&r" (ret) \
+ : "r" (src.val[0]), \
+ "r" (src.val[1]), \
+ "Q" (dst->val[0]) \
+ : "memory"); }
+
+ if (likely(old.int128 == expected.int128)) {
+ if (stx_mo == __ATOMIC_RELAXED)
+ __STORE_128("stxp", dst, desired, ret)
+ else
+ __STORE_128("stlxp", dst, desired, ret)
+ } else {
+ /* In the failure case (since 'weak' is ignored and only
+ * weak == 0 is implemented), expected should contain
+ * the atomically read value of dst. This means, 'old'
+ * needs to be stored back to ensure it was read
+ * atomically.
+ */
+ if (stx_mo == __ATOMIC_RELAXED)
+ __STORE_128("stxp", dst, old, ret)
+ else
+ __STORE_128("stlxp", dst, old, ret)
+ }
+
+#undef __STORE_128
+
+ } while (unlikely(ret));
+#endif
+
+ /* Unconditionally updating expected removes an 'if' statement.
+ * expected should already be in register if not in the cache.
+ */
+ *exp = old;
+
+ return (old.int128 == expected.int128);
+}
+
#ifdef __cplusplus
}
#endif