1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020 Intel Corporation
5 #include <rte_common.h>
8 #include <rte_spinlock.h>
10 #include "rte_power_intrinsics.h"
13 * Per-lcore structure holding current status of C0.2 sleeps.
15 static struct power_wait_status {
17 volatile void *monitor_addr; /**< NULL if not currently sleeping */
18 } __rte_cache_aligned wait_status[RTE_MAX_LCORE];
21 __umwait_wakeup(volatile void *addr)
25 /* trigger a write but don't change the value */
26 val = __atomic_load_n((volatile uint64_t *)addr, __ATOMIC_RELAXED);
27 __atomic_compare_exchange_n((volatile uint64_t *)addr, &val, val, 0,
28 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
31 static bool wait_supported;
32 static bool wait_multi_supported;
34 static inline uint64_t
35 __get_umwait_val(const volatile void *p, const uint8_t sz)
39 return *(const volatile uint8_t *)p;
40 case sizeof(uint16_t):
41 return *(const volatile uint16_t *)p;
42 case sizeof(uint32_t):
43 return *(const volatile uint32_t *)p;
44 case sizeof(uint64_t):
45 return *(const volatile uint64_t *)p;
47 /* shouldn't happen */
54 __check_val_size(const uint8_t sz)
57 case sizeof(uint8_t): /* fall-through */
58 case sizeof(uint16_t): /* fall-through */
59 case sizeof(uint32_t): /* fall-through */
60 case sizeof(uint64_t): /* fall-through */
69 * This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state.
70 * For more information about usage of these instructions, please refer to
71 * Intel(R) 64 and IA-32 Architectures Software Developer's Manual.
74 rte_power_monitor(const struct rte_power_monitor_cond *pmc,
75 const uint64_t tsc_timestamp)
77 const uint32_t tsc_l = (uint32_t)tsc_timestamp;
78 const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
79 const unsigned int lcore_id = rte_lcore_id();
80 struct power_wait_status *s;
83 /* prevent user from running this instruction if it's not supported */
87 /* prevent non-EAL thread from using this API */
88 if (lcore_id >= RTE_MAX_LCORE)
94 if (__check_val_size(pmc->size) < 0)
100 s = &wait_status[lcore_id];
102 /* update sleep address */
103 rte_spinlock_lock(&s->lock);
104 s->monitor_addr = pmc->addr;
107 * we're using raw byte codes for now as only the newest compiler
108 * versions support this instruction natively.
111 /* set address for UMONITOR */
112 asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7;"
116 /* now that we've put this address into monitor, we can unlock */
117 rte_spinlock_unlock(&s->lock);
119 cur_value = __get_umwait_val(pmc->addr, pmc->size);
121 /* check if callback indicates we should abort */
122 if (pmc->fn(cur_value, pmc->opaque) != 0)
126 asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;"
127 : /* ignore rflags */
128 : "D"(0), /* enter C0.2 */
129 "a"(tsc_l), "d"(tsc_h));
132 /* erase sleep address */
133 rte_spinlock_lock(&s->lock);
134 s->monitor_addr = NULL;
135 rte_spinlock_unlock(&s->lock);
141 * This function uses TPAUSE instruction and will enter C0.2 state. For more
142 * information about usage of this instruction, please refer to Intel(R) 64 and
143 * IA-32 Architectures Software Developer's Manual.
146 rte_power_pause(const uint64_t tsc_timestamp)
148 const uint32_t tsc_l = (uint32_t)tsc_timestamp;
149 const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
151 /* prevent user from running this instruction if it's not supported */
156 asm volatile(".byte 0x66, 0x0f, 0xae, 0xf7;"
157 : /* ignore rflags */
158 : "D"(0), /* enter C0.2 */
159 "a"(tsc_l), "d"(tsc_h));
164 RTE_INIT(rte_power_intrinsics_init) {
165 struct rte_cpu_intrinsics i;
167 rte_cpu_get_intrinsics_support(&i);
169 if (i.power_monitor && i.power_pause)
171 if (i.power_monitor_multi)
172 wait_multi_supported = 1;
176 rte_power_monitor_wakeup(const unsigned int lcore_id)
178 struct power_wait_status *s;
180 /* prevent user from running this instruction if it's not supported */
184 /* prevent buffer overrun */
185 if (lcore_id >= RTE_MAX_LCORE)
188 s = &wait_status[lcore_id];
191 * There is a race condition between sleep, wakeup and locking, but we
192 * don't need to handle it.
194 * Possible situations:
196 * 1. T1 locks, sets address, unlocks
197 * 2. T2 locks, triggers wakeup, unlocks
200 * In this case, because T1 has already set the address for monitoring,
201 * we will wake up immediately even if T2 triggers wakeup before T1
204 * 1. T1 locks, sets address, unlocks, goes to sleep, and wakes up
205 * 2. T2 locks, triggers wakeup, and unlocks
206 * 3. T1 locks, erases address, and unlocks
208 * In this case, since we've already woken up, the "wakeup" was
209 * unneeded, and since T1 is still waiting on T2 releasing the lock, the
210 * wakeup address is still valid so it's perfectly safe to write it.
212 * For multi-monitor case, the act of locking will in itself trigger the
213 * wakeup, so no additional writes necessary.
215 rte_spinlock_lock(&s->lock);
216 if (s->monitor_addr != NULL)
217 __umwait_wakeup(s->monitor_addr);
218 rte_spinlock_unlock(&s->lock);
224 rte_power_monitor_multi(const struct rte_power_monitor_cond pmc[],
225 const uint32_t num, const uint64_t tsc_timestamp)
227 const unsigned int lcore_id = rte_lcore_id();
228 struct power_wait_status *s = &wait_status[lcore_id];
231 /* check if supported */
232 if (!wait_multi_supported)
235 if (pmc == NULL || num == 0)
238 /* we are already inside transaction region, return */
239 if (rte_xtest() != 0)
242 /* start new transaction region */
245 /* transaction abort, possible write to one of wait addresses */
246 if (rc != RTE_XBEGIN_STARTED)
250 * the mere act of reading the lock status here adds the lock to
251 * the read set. This means that when we trigger a wakeup from another
252 * thread, even if we don't have a defined wakeup address and thus don't
253 * actually cause any writes, the act of locking our lock will itself
254 * trigger the wakeup and abort the transaction.
256 rte_spinlock_is_locked(&s->lock);
259 * add all addresses to wait on into transaction read-set and check if
260 * any of wakeup conditions are already met.
263 for (i = 0; i < num; i++) {
264 const struct rte_power_monitor_cond *c = &pmc[i];
272 const uint64_t val = __get_umwait_val(c->addr, c->size);
274 /* abort if callback indicates that we need to stop */
275 if (c->fn(val, c->opaque) != 0)
279 /* none of the conditions were met, sleep until timeout */
281 rte_power_pause(tsc_timestamp);
283 /* end transaction region */