From 7e6e609939a8d76bc5c0804d7cab91512dd607e4 Mon Sep 17 00:00:00 2001 From: Gage Eads Date: Wed, 3 Apr 2019 18:20:18 -0500 Subject: [PATCH] stack: add C11 atomic implementation This commit adds an implementation of the lock-free stack push, pop, and length functions that use __atomic builtins, for systems that benefit from the finer-grained memory ordering control. Signed-off-by: Gage Eads Reviewed-by: Olivier Matz Reviewed-by: Honnappa Nagarahalli --- lib/librte_stack/Makefile | 3 +- lib/librte_stack/meson.build | 3 +- lib/librte_stack/rte_stack_lf.h | 4 + lib/librte_stack/rte_stack_lf_c11.h | 175 ++++++++++++++++++++++++++++ 4 files changed, 183 insertions(+), 2 deletions(-) create mode 100644 lib/librte_stack/rte_stack_lf_c11.h diff --git a/lib/librte_stack/Makefile b/lib/librte_stack/Makefile index 311edd9976..8d18ce5204 100644 --- a/lib/librte_stack/Makefile +++ b/lib/librte_stack/Makefile @@ -23,6 +23,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_STACK) := rte_stack.c \ SYMLINK-$(CONFIG_RTE_LIBRTE_STACK)-include := rte_stack.h \ rte_stack_std.h \ rte_stack_lf.h \ - rte_stack_lf_generic.h + rte_stack_lf_generic.h \ + rte_stack_lf_c11.h include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_stack/meson.build b/lib/librte_stack/meson.build index 7a09a5d666..46fce0c20f 100644 --- a/lib/librte_stack/meson.build +++ b/lib/librte_stack/meson.build @@ -8,4 +8,5 @@ sources = files('rte_stack.c', 'rte_stack_std.c', 'rte_stack_lf.c') headers = files('rte_stack.h', 'rte_stack_std.h', 'rte_stack_lf.h', - 'rte_stack_lf_generic.h') + 'rte_stack_lf_generic.h', + 'rte_stack_lf_c11.h') diff --git a/lib/librte_stack/rte_stack_lf.h b/lib/librte_stack/rte_stack_lf.h index bfd6801330..518889a05a 100644 --- a/lib/librte_stack/rte_stack_lf.h +++ b/lib/librte_stack/rte_stack_lf.h @@ -5,7 +5,11 @@ #ifndef _RTE_STACK_LF_H_ #define _RTE_STACK_LF_H_ +#ifdef RTE_USE_C11_MEM_MODEL +#include "rte_stack_lf_c11.h" +#else #include "rte_stack_lf_generic.h" +#endif /** * @internal Push several objects on the lock-free stack (MT-safe). diff --git a/lib/librte_stack/rte_stack_lf_c11.h b/lib/librte_stack/rte_stack_lf_c11.h new file mode 100644 index 0000000000..a316e9af50 --- /dev/null +++ b/lib/librte_stack/rte_stack_lf_c11.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#ifndef _RTE_STACK_LF_C11_H_ +#define _RTE_STACK_LF_C11_H_ + +#include +#include + +static __rte_always_inline unsigned int +__rte_stack_lf_count(struct rte_stack *s) +{ + /* stack_lf_push() and stack_lf_pop() do not update the list's contents + * and stack_lf->len atomically, which can cause the list to appear + * shorter than it actually is if this function is called while other + * threads are modifying the list. + * + * However, given the inherently approximate nature of the get_count + * callback -- even if the list and its size were updated atomically, + * the size could change between when get_count executes and when the + * value is returned to the caller -- this is acceptable. + * + * The stack_lf->len updates are placed such that the list may appear to + * have fewer elements than it does, but will never appear to have more + * elements. If the mempool is near-empty to the point that this is a + * concern, the user should consider increasing the mempool size. + */ + return (unsigned int)__atomic_load_n(&s->stack_lf.used.len.cnt, + __ATOMIC_RELAXED); +} + +static __rte_always_inline void +__rte_stack_lf_push_elems(struct rte_stack_lf_list *list, + struct rte_stack_lf_elem *first, + struct rte_stack_lf_elem *last, + unsigned int num) +{ +#ifndef RTE_ARCH_X86_64 + RTE_SET_USED(first); + RTE_SET_USED(last); + RTE_SET_USED(list); + RTE_SET_USED(num); +#else + struct rte_stack_lf_head old_head; + int success; + + old_head = list->head; + + do { + struct rte_stack_lf_head new_head; + + /* Use an acquire fence to establish a synchronized-with + * relationship between the list->head load and store-release + * operations (as part of the rte_atomic128_cmp_exchange()). + */ + __atomic_thread_fence(__ATOMIC_ACQUIRE); + + /* Swing the top pointer to the first element in the list and + * make the last element point to the old top. + */ + new_head.top = first; + new_head.cnt = old_head.cnt + 1; + + last->next = old_head.top; + + /* Use the release memmodel to ensure the writes to the LF LIFO + * elements are visible before the head pointer write. + */ + success = rte_atomic128_cmp_exchange( + (rte_int128_t *)&list->head, + (rte_int128_t *)&old_head, + (rte_int128_t *)&new_head, + 1, __ATOMIC_RELEASE, + __ATOMIC_RELAXED); + } while (success == 0); + + /* Ensure the stack modifications are not reordered with respect + * to the LIFO len update. + */ + __atomic_add_fetch(&list->len.cnt, num, __ATOMIC_RELEASE); +#endif +} + +static __rte_always_inline struct rte_stack_lf_elem * +__rte_stack_lf_pop_elems(struct rte_stack_lf_list *list, + unsigned int num, + void **obj_table, + struct rte_stack_lf_elem **last) +{ +#ifndef RTE_ARCH_X86_64 + RTE_SET_USED(obj_table); + RTE_SET_USED(last); + RTE_SET_USED(list); + RTE_SET_USED(num); + + return NULL; +#else + struct rte_stack_lf_head old_head; + uint64_t len; + int success; + + /* Reserve num elements, if available */ + len = __atomic_load_n(&list->len.cnt, __ATOMIC_ACQUIRE); + + while (1) { + /* Does the list contain enough elements? */ + if (unlikely(len < num)) + return NULL; + + /* len is updated on failure */ + if (__atomic_compare_exchange_n(&list->len.cnt, + &len, len - num, + 0, __ATOMIC_ACQUIRE, + __ATOMIC_ACQUIRE)) + break; + } + + /* If a torn read occurs, the CAS will fail and set old_head to the + * correct/latest value. + */ + old_head = list->head; + + /* Pop num elements */ + do { + struct rte_stack_lf_head new_head; + struct rte_stack_lf_elem *tmp; + unsigned int i; + + /* Use the acquire memmodel to ensure the reads to the LF LIFO + * elements are properly ordered with respect to the head + * pointer read. + */ + __atomic_thread_fence(__ATOMIC_ACQUIRE); + + rte_prefetch0(old_head.top); + + tmp = old_head.top; + + /* Traverse the list to find the new head. A next pointer will + * either point to another element or NULL; if a thread + * encounters a pointer that has already been popped, the CAS + * will fail. + */ + for (i = 0; i < num && tmp != NULL; i++) { + rte_prefetch0(tmp->next); + if (obj_table) + obj_table[i] = tmp->data; + if (last) + *last = tmp; + tmp = tmp->next; + } + + /* If NULL was encountered, the list was modified while + * traversing it. Retry. + */ + if (i != num) + continue; + + new_head.top = tmp; + new_head.cnt = old_head.cnt + 1; + + success = rte_atomic128_cmp_exchange( + (rte_int128_t *)&list->head, + (rte_int128_t *)&old_head, + (rte_int128_t *)&new_head, + 1, __ATOMIC_RELEASE, + __ATOMIC_RELAXED); + } while (success == 0); + + return old_head.top; +#endif +} + +#endif /* _RTE_STACK_LF_C11_H_ */ -- 2.20.1