1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020 Intel Corporation
5 #include <rte_common.h>
7 #include <rte_spinlock.h>
9 #include "rte_power_intrinsics.h"
12 * Per-lcore structure holding current status of C0.2 sleeps.
14 static struct power_wait_status {
16 volatile void *monitor_addr; /**< NULL if not currently sleeping */
17 } __rte_cache_aligned wait_status[RTE_MAX_LCORE];
20 __umwait_wakeup(volatile void *addr)
24 /* trigger a write but don't change the value */
25 val = __atomic_load_n((volatile uint64_t *)addr, __ATOMIC_RELAXED);
26 __atomic_compare_exchange_n((volatile uint64_t *)addr, &val, val, 0,
27 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
30 static bool wait_supported;
32 static inline uint64_t
33 __get_umwait_val(const volatile void *p, const uint8_t sz)
37 return *(const volatile uint8_t *)p;
38 case sizeof(uint16_t):
39 return *(const volatile uint16_t *)p;
40 case sizeof(uint32_t):
41 return *(const volatile uint32_t *)p;
42 case sizeof(uint64_t):
43 return *(const volatile uint64_t *)p;
45 /* shouldn't happen */
52 __check_val_size(const uint8_t sz)
55 case sizeof(uint8_t): /* fall-through */
56 case sizeof(uint16_t): /* fall-through */
57 case sizeof(uint32_t): /* fall-through */
58 case sizeof(uint64_t): /* fall-through */
67 * This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state.
68 * For more information about usage of these instructions, please refer to
69 * Intel(R) 64 and IA-32 Architectures Software Developer's Manual.
72 rte_power_monitor(const struct rte_power_monitor_cond *pmc,
73 const uint64_t tsc_timestamp)
75 const uint32_t tsc_l = (uint32_t)tsc_timestamp;
76 const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
77 const unsigned int lcore_id = rte_lcore_id();
78 struct power_wait_status *s;
80 /* prevent user from running this instruction if it's not supported */
84 /* prevent non-EAL thread from using this API */
85 if (lcore_id >= RTE_MAX_LCORE)
91 if (__check_val_size(pmc->size) < 0)
94 s = &wait_status[lcore_id];
96 /* update sleep address */
97 rte_spinlock_lock(&s->lock);
98 s->monitor_addr = pmc->addr;
101 * we're using raw byte codes for now as only the newest compiler
102 * versions support this instruction natively.
105 /* set address for UMONITOR */
106 asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7;"
110 /* now that we've put this address into monitor, we can unlock */
111 rte_spinlock_unlock(&s->lock);
113 /* if we have a comparison mask, we might not need to sleep at all */
115 const uint64_t cur_value = __get_umwait_val(
116 pmc->addr, pmc->size);
117 const uint64_t masked = cur_value & pmc->mask;
119 /* if the masked value is already matching, abort */
120 if (masked == pmc->val)
125 asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;"
126 : /* ignore rflags */
127 : "D"(0), /* enter C0.2 */
128 "a"(tsc_l), "d"(tsc_h));
131 /* erase sleep address */
132 rte_spinlock_lock(&s->lock);
133 s->monitor_addr = NULL;
134 rte_spinlock_unlock(&s->lock);
140 * This function uses TPAUSE instruction and will enter C0.2 state. For more
141 * information about usage of this instruction, please refer to Intel(R) 64 and
142 * IA-32 Architectures Software Developer's Manual.
145 rte_power_pause(const uint64_t tsc_timestamp)
147 const uint32_t tsc_l = (uint32_t)tsc_timestamp;
148 const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
150 /* prevent user from running this instruction if it's not supported */
155 asm volatile(".byte 0x66, 0x0f, 0xae, 0xf7;"
156 : /* ignore rflags */
157 : "D"(0), /* enter C0.2 */
158 "a"(tsc_l), "d"(tsc_h));
163 RTE_INIT(rte_power_intrinsics_init) {
164 struct rte_cpu_intrinsics i;
166 rte_cpu_get_intrinsics_support(&i);
168 if (i.power_monitor && i.power_pause)
173 rte_power_monitor_wakeup(const unsigned int lcore_id)
175 struct power_wait_status *s;
177 /* prevent user from running this instruction if it's not supported */
181 /* prevent buffer overrun */
182 if (lcore_id >= RTE_MAX_LCORE)
185 s = &wait_status[lcore_id];
188 * There is a race condition between sleep, wakeup and locking, but we
189 * don't need to handle it.
191 * Possible situations:
193 * 1. T1 locks, sets address, unlocks
194 * 2. T2 locks, triggers wakeup, unlocks
197 * In this case, because T1 has already set the address for monitoring,
198 * we will wake up immediately even if T2 triggers wakeup before T1
201 * 1. T1 locks, sets address, unlocks, goes to sleep, and wakes up
202 * 2. T2 locks, triggers wakeup, and unlocks
203 * 3. T1 locks, erases address, and unlocks
205 * In this case, since we've already woken up, the "wakeup" was
206 * unneeded, and since T1 is still waiting on T2 releasing the lock, the
207 * wakeup address is still valid so it's perfectly safe to write it.
209 rte_spinlock_lock(&s->lock);
210 if (s->monitor_addr != NULL)
211 __umwait_wakeup(s->monitor_addr);
212 rte_spinlock_unlock(&s->lock);