X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_eal%2Fx86%2Frte_power_intrinsics.c;h=af3ae3237c1e93e25bb87b2f6d47f255d1ac1242;hb=27ff8384deaca2f7727d0cedf2053aa13fbae1e2;hp=2a38440bec98daa6922c97c7bee5314d75b69c6a;hpb=68fbbb8369dbe0c38b4464886855d937ce21a443;p=dpdk.git diff --git a/lib/librte_eal/x86/rte_power_intrinsics.c b/lib/librte_eal/x86/rte_power_intrinsics.c index 2a38440bec..af3ae3237c 100644 --- a/lib/librte_eal/x86/rte_power_intrinsics.c +++ b/lib/librte_eal/x86/rte_power_intrinsics.c @@ -2,8 +2,31 @@ * Copyright(c) 2020 Intel Corporation */ +#include +#include +#include + #include "rte_power_intrinsics.h" +/* + * Per-lcore structure holding current status of C0.2 sleeps. + */ +static struct power_wait_status { + rte_spinlock_t lock; + volatile void *monitor_addr; /**< NULL if not currently sleeping */ +} __rte_cache_aligned wait_status[RTE_MAX_LCORE]; + +static inline void +__umwait_wakeup(volatile void *addr) +{ + uint64_t val; + + /* trigger a write but don't change the value */ + val = __atomic_load_n((volatile uint64_t *)addr, __ATOMIC_RELAXED); + __atomic_compare_exchange_n((volatile uint64_t *)addr, &val, val, 0, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); +} + static bool wait_supported; static inline uint64_t @@ -46,66 +69,33 @@ __check_val_size(const uint8_t sz) * Intel(R) 64 and IA-32 Architectures Software Developer's Manual. */ int -rte_power_monitor(const volatile void *p, const uint64_t expected_value, - const uint64_t value_mask, const uint64_t tsc_timestamp, - const uint8_t data_sz) +rte_power_monitor(const struct rte_power_monitor_cond *pmc, + const uint64_t tsc_timestamp) { const uint32_t tsc_l = (uint32_t)tsc_timestamp; const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32); + const unsigned int lcore_id = rte_lcore_id(); + struct power_wait_status *s; /* prevent user from running this instruction if it's not supported */ if (!wait_supported) return -ENOTSUP; - if (__check_val_size(data_sz) < 0) + /* prevent non-EAL thread from using this API */ + if (lcore_id >= RTE_MAX_LCORE) return -EINVAL; - /* - * we're using raw byte codes for now as only the newest compiler - * versions support this instruction natively. - */ - - /* set address for UMONITOR */ - asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7;" - : - : "D"(p)); - - if (value_mask) { - const uint64_t cur_value = __get_umwait_val(p, data_sz); - const uint64_t masked = cur_value & value_mask; - - /* if the masked value is already matching, abort */ - if (masked == expected_value) - return 0; - } - /* execute UMWAIT */ - asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;" - : /* ignore rflags */ - : "D"(0), /* enter C0.2 */ - "a"(tsc_l), "d"(tsc_h)); - - return 0; -} + if (pmc == NULL) + return -EINVAL; -/** - * This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state. - * For more information about usage of these instructions, please refer to - * Intel(R) 64 and IA-32 Architectures Software Developer's Manual. - */ -int -rte_power_monitor_sync(const volatile void *p, const uint64_t expected_value, - const uint64_t value_mask, const uint64_t tsc_timestamp, - const uint8_t data_sz, rte_spinlock_t *lck) -{ - const uint32_t tsc_l = (uint32_t)tsc_timestamp; - const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32); + if (__check_val_size(pmc->data_sz) < 0) + return -EINVAL; - /* prevent user from running this instruction if it's not supported */ - if (!wait_supported) - return -ENOTSUP; + s = &wait_status[lcore_id]; - if (__check_val_size(data_sz) < 0) - return -EINVAL; + /* update sleep address */ + rte_spinlock_lock(&s->lock); + s->monitor_addr = pmc->addr; /* * we're using raw byte codes for now as only the newest compiler @@ -115,17 +105,21 @@ rte_power_monitor_sync(const volatile void *p, const uint64_t expected_value, /* set address for UMONITOR */ asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7;" : - : "D"(p)); + : "D"(pmc->addr)); - if (value_mask) { - const uint64_t cur_value = __get_umwait_val(p, data_sz); - const uint64_t masked = cur_value & value_mask; + /* now that we've put this address into monitor, we can unlock */ + rte_spinlock_unlock(&s->lock); + + /* if we have a comparison mask, we might not need to sleep at all */ + if (pmc->mask) { + const uint64_t cur_value = __get_umwait_val( + pmc->addr, pmc->data_sz); + const uint64_t masked = cur_value & pmc->mask; /* if the masked value is already matching, abort */ - if (masked == expected_value) - return 0; + if (masked == pmc->val) + goto end; } - rte_spinlock_unlock(lck); /* execute UMWAIT */ asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;" @@ -133,7 +127,11 @@ rte_power_monitor_sync(const volatile void *p, const uint64_t expected_value, : "D"(0), /* enter C0.2 */ "a"(tsc_l), "d"(tsc_h)); - rte_spinlock_lock(lck); +end: + /* erase sleep address */ + rte_spinlock_lock(&s->lock); + s->monitor_addr = NULL; + rte_spinlock_unlock(&s->lock); return 0; } @@ -170,3 +168,48 @@ RTE_INIT(rte_power_intrinsics_init) { if (i.power_monitor && i.power_pause) wait_supported = 1; } + +int +rte_power_monitor_wakeup(const unsigned int lcore_id) +{ + struct power_wait_status *s; + + /* prevent user from running this instruction if it's not supported */ + if (!wait_supported) + return -ENOTSUP; + + /* prevent buffer overrun */ + if (lcore_id >= RTE_MAX_LCORE) + return -EINVAL; + + s = &wait_status[lcore_id]; + + /* + * There is a race condition between sleep, wakeup and locking, but we + * don't need to handle it. + * + * Possible situations: + * + * 1. T1 locks, sets address, unlocks + * 2. T2 locks, triggers wakeup, unlocks + * 3. T1 sleeps + * + * In this case, because T1 has already set the address for monitoring, + * we will wake up immediately even if T2 triggers wakeup before T1 + * goes to sleep. + * + * 1. T1 locks, sets address, unlocks, goes to sleep, and wakes up + * 2. T2 locks, triggers wakeup, and unlocks + * 3. T1 locks, erases address, and unlocks + * + * In this case, since we've already woken up, the "wakeup" was + * unneeded, and since T1 is still waiting on T2 releasing the lock, the + * wakeup address is still valid so it's perfectly safe to write it. + */ + rte_spinlock_lock(&s->lock); + if (s->monitor_addr != NULL) + __umwait_wakeup(s->monitor_addr); + rte_spinlock_unlock(&s->lock); + + return 0; +}