#include <errno.h>
#include <inttypes.h>
-#include <rte_string_fns.h>
#include <rte_memcpy.h>
-#include <rte_atomic.h>
+#include <rte_memory.h>
+#include <rte_string_fns.h>
#include "power_pstate_cpufreq.h"
#include "power_common.h"
#define FOPEN_OR_ERR_RET(f, retval) do { \
if ((f) == NULL) { \
- RTE_LOG(ERR, POWER, "File not openned\n"); \
+ RTE_LOG(ERR, POWER, "File not opened\n"); \
return retval; \
} \
} while (0)
} \
} while (0)
+/* macros used for rounding frequency to nearest 100000 */
+#define FREQ_ROUNDING_DELTA 50000
+#define ROUND_FREQ_TO_N_100000 100000
#define POWER_CONVERT_TO_DECIMAL 10
#define BUS_FREQ 100000
"/sys/devices/system/cpu/cpu%u/cpufreq/cpuinfo_min_freq"
#define POWER_SYSFILE_BASE_FREQ \
"/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency"
+#define POWER_PSTATE_DRIVER "intel_pstate"
#define POWER_MSR_PATH "/dev/cpu/%u/msr"
/*
uint32_t non_turbo_max_ratio; /**< Non Turbo Max ratio */
uint32_t sys_max_freq; /**< system wide max freq */
uint32_t core_base_freq; /**< core base freq */
- volatile uint32_t state; /**< Power in use state */
+ uint32_t state; /**< Power in use state */
uint16_t turbo_available; /**< Turbo Boost available */
uint16_t turbo_enable; /**< Turbo Boost enable/disable */
uint16_t priority_core; /**< High Performance core */
char *s_base;
uint32_t base_ratio = 0;
uint64_t max_non_turbo = 0;
+ int ret_val = 0;
snprintf(fullpath_min, sizeof(fullpath_min), POWER_SYSFILE_MIN_FREQ,
pi->lcore_id);
/* Add MSR read to detect turbo status */
- if (power_rdmsr(PLATFORM_INFO, &max_non_turbo, pi->lcore_id) < 0)
- return -1;
+ if (power_rdmsr(PLATFORM_INFO, &max_non_turbo, pi->lcore_id) < 0) {
+ ret_val = -1;
+ goto out;
+ }
max_non_turbo = (max_non_turbo&NON_TURBO_MASK)>>NON_TURBO_OFFSET;
pi->core_base_freq = base_ratio * BUS_FREQ;
out:
- return 0;
+ if (f_base != NULL)
+ fclose(f_base);
+ return ret_val;
}
static int
return -1;
}
- POWER_DEBUG_TRACE("Freqency '%u' to be set for lcore %u\n",
+ POWER_DEBUG_TRACE("Frequency '%u' to be set for lcore %u\n",
target_freq, pi->lcore_id);
fflush(pi->f_cur_min);
return -1;
}
- POWER_DEBUG_TRACE("Freqency '%u' to be set for lcore %u\n",
+ POWER_DEBUG_TRACE("Frequency '%u' to be set for lcore %u\n",
target_freq, pi->lcore_id);
fflush(pi->f_cur_max);
return ret;
}
+static int
+power_get_cur_idx(struct pstate_power_info *pi)
+{
+ FILE *f_cur;
+ int ret = -1;
+ char *p_cur;
+ char buf_cur[BUFSIZ];
+ char fullpath_cur[PATH_MAX];
+ char *s_cur;
+ uint32_t sys_cur_freq = 0;
+ unsigned int i;
+
+ snprintf(fullpath_cur, sizeof(fullpath_cur),
+ POWER_SYSFILE_CUR_FREQ,
+ pi->lcore_id);
+ f_cur = fopen(fullpath_cur, "r");
+ FOPEN_OR_ERR_RET(f_cur, ret);
+
+ /* initialize the cur_idx to matching current frequency freq index */
+ s_cur = fgets(buf_cur, sizeof(buf_cur), f_cur);
+ FOPS_OR_NULL_GOTO(s_cur, fail);
+
+ p_cur = strchr(buf_cur, '\n');
+ if (p_cur != NULL)
+ *p_cur = 0;
+ sys_cur_freq = strtoul(buf_cur, &p_cur, POWER_CONVERT_TO_DECIMAL);
+
+ /* convert the frequency to nearest 100000 value
+ * Ex: if sys_cur_freq=1396789 then freq_conv=1400000
+ * Ex: if sys_cur_freq=800030 then freq_conv=800000
+ * Ex: if sys_cur_freq=800030 then freq_conv=800000
+ */
+ unsigned int freq_conv = 0;
+ freq_conv = (sys_cur_freq + FREQ_ROUNDING_DELTA)
+ / ROUND_FREQ_TO_N_100000;
+ freq_conv = freq_conv * ROUND_FREQ_TO_N_100000;
+
+ for (i = 0; i < pi->nb_freqs; i++) {
+ if (freq_conv == pi->freqs[i]) {
+ pi->curr_idx = i;
+ break;
+ }
+ }
+
+ fclose(f_cur);
+ return 0;
+fail:
+ fclose(f_cur);
+ return ret;
+}
+
+int
+power_pstate_cpufreq_check_supported(void)
+{
+ return cpufreq_check_scaling_driver(POWER_PSTATE_DRIVER);
+}
+
int
power_pstate_cpufreq_init(unsigned int lcore_id)
{
struct pstate_power_info *pi;
+ uint32_t exp_state;
if (lcore_id >= RTE_MAX_LCORE) {
RTE_LOG(ERR, POWER, "Lcore id %u can not exceed %u\n",
}
pi = &lcore_power_info[lcore_id];
- if (rte_atomic32_cmpset(&(pi->state), POWER_IDLE, POWER_ONGOING)
- == 0) {
+ exp_state = POWER_IDLE;
+ /* The power in use state works as a guard variable between
+ * the CPU frequency control initialization and exit process.
+ * The ACQUIRE memory ordering here pairs with the RELEASE
+ * ordering below as lock to make sure the frequency operations
+ * in the critical section are done under the correct state.
+ */
+ if (!__atomic_compare_exchange_n(&(pi->state), &exp_state,
+ POWER_ONGOING, 0,
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
RTE_LOG(INFO, POWER, "Power management of lcore %u is "
"in use\n", lcore_id);
return -1;
goto fail;
}
+ if (power_get_cur_idx(pi) < 0) {
+ RTE_LOG(ERR, POWER, "Cannot get current frequency "
+ "index of lcore %u\n", lcore_id);
+ goto fail;
+ }
/* Set freq to max by default */
if (power_pstate_cpufreq_freq_max(lcore_id) < 0) {
RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
"power management\n", lcore_id);
- rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_USED);
+ exp_state = POWER_ONGOING;
+ __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_USED,
+ 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
return 0;
fail:
- rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
+ exp_state = POWER_ONGOING;
+ __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_UNKNOWN,
+ 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
return -1;
}
power_pstate_cpufreq_exit(unsigned int lcore_id)
{
struct pstate_power_info *pi;
+ uint32_t exp_state;
if (lcore_id >= RTE_MAX_LCORE) {
RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
}
pi = &lcore_power_info[lcore_id];
- if (rte_atomic32_cmpset(&(pi->state), POWER_USED, POWER_ONGOING)
- == 0) {
+ exp_state = POWER_USED;
+ /* The power in use state works as a guard variable between
+ * the CPU frequency control initialization and exit process.
+ * The ACQUIRE memory ordering here pairs with the RELEASE
+ * ordering below as lock to make sure the frequency operations
+ * in the critical section are under done the correct state.
+ */
+ if (!__atomic_compare_exchange_n(&(pi->state), &exp_state,
+ POWER_ONGOING, 0,
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
RTE_LOG(INFO, POWER, "Power management of lcore %u is "
"not used\n", lcore_id);
return -1;
RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
"'performance' mode and been set back to the "
"original\n", lcore_id);
- rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_IDLE);
+ exp_state = POWER_ONGOING;
+ __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_IDLE,
+ 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
return 0;
fail:
- rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
+ exp_state = POWER_ONGOING;
+ __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_UNKNOWN,
+ 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
return -1;
}
}
pi = &lcore_power_info[lcore_id];
- if (pi->curr_idx == 0)
+ if (pi->curr_idx == 0 ||
+ (pi->curr_idx == 1 && pi->turbo_available && !pi->turbo_enable))
return 0;
/* Frequencies in the array are from high to low. */
pi->turbo_enable = 0;
+ if (pi->turbo_available && pi->curr_idx <= 1) {
+ /* Try to set freq to max by default coming out of turbo */
+ if (power_pstate_cpufreq_freq_max(lcore_id) < 0) {
+ RTE_LOG(ERR, POWER,
+ "Failed to set frequency of lcore %u to max\n",
+ lcore_id);
+ return -1;
+ }
+ }
return 0;
}