common/octeontx2: fix memory mapping API usage
[dpdk.git] / lib / librte_power / power_pstate_cpufreq.c
index 61ff8db..e3126d3 100644 (file)
@@ -15,7 +15,8 @@
 #include <inttypes.h>
 
 #include <rte_memcpy.h>
-#include <rte_atomic.h>
+#include <rte_memory.h>
+#include <rte_string_fns.h>
 
 #include "power_pstate_cpufreq.h"
 #include "power_common.h"
@@ -31,7 +32,7 @@
 
 #define FOPEN_OR_ERR_RET(f, retval) do { \
                if ((f) == NULL) { \
-                       RTE_LOG(ERR, POWER, "File not openned\n"); \
+                       RTE_LOG(ERR, POWER, "File not opened\n"); \
                        return retval; \
                } \
 } while (0)
@@ -69,6 +70,7 @@
                "/sys/devices/system/cpu/cpu%u/cpufreq/cpuinfo_min_freq"
 #define POWER_SYSFILE_BASE_FREQ  \
                "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency"
+#define POWER_PSTATE_DRIVER "intel_pstate"
 #define POWER_MSR_PATH  "/dev/cpu/%u/msr"
 
 /*
@@ -97,7 +99,7 @@ struct pstate_power_info {
        uint32_t non_turbo_max_ratio;        /**< Non Turbo Max ratio  */
        uint32_t sys_max_freq;               /**< system wide max freq  */
        uint32_t core_base_freq;             /**< core base freq  */
-       volatile uint32_t state;             /**< Power in use state */
+       uint32_t state;                      /**< Power in use state */
        uint16_t turbo_available;            /**< Turbo Boost available */
        uint16_t turbo_enable;               /**< Turbo Boost enable/disable */
        uint16_t priority_core;              /**< High Performance core */
@@ -157,6 +159,7 @@ power_init_for_setting_freq(struct pstate_power_info *pi)
        char *s_base;
        uint32_t base_ratio = 0;
        uint64_t max_non_turbo = 0;
+       int  ret_val = 0;
 
        snprintf(fullpath_min, sizeof(fullpath_min), POWER_SYSFILE_MIN_FREQ,
                        pi->lcore_id);
@@ -198,8 +201,10 @@ power_init_for_setting_freq(struct pstate_power_info *pi)
 
        /* Add MSR read to detect turbo status */
 
-       if (power_rdmsr(PLATFORM_INFO, &max_non_turbo, pi->lcore_id) < 0)
-               return -1;
+       if (power_rdmsr(PLATFORM_INFO, &max_non_turbo, pi->lcore_id) < 0) {
+               ret_val = -1;
+               goto out;
+       }
 
        max_non_turbo = (max_non_turbo&NON_TURBO_MASK)>>NON_TURBO_OFFSET;
 
@@ -218,7 +223,9 @@ power_init_for_setting_freq(struct pstate_power_info *pi)
        pi->core_base_freq = base_ratio * BUS_FREQ;
 
 out:
-       return 0;
+       if (f_base != NULL)
+               fclose(f_base);
+       return ret_val;
 }
 
 static int
@@ -280,7 +287,7 @@ set_freq_internal(struct pstate_power_info *pi, uint32_t idx)
                        return -1;
                }
 
-               POWER_DEBUG_TRACE("Freqency '%u' to be set for lcore %u\n",
+               POWER_DEBUG_TRACE("Frequency '%u' to be set for lcore %u\n",
                                  target_freq, pi->lcore_id);
 
                fflush(pi->f_cur_min);
@@ -303,7 +310,7 @@ set_freq_internal(struct pstate_power_info *pi, uint32_t idx)
                        return -1;
                }
 
-               POWER_DEBUG_TRACE("Freqency '%u' to be set for lcore %u\n",
+               POWER_DEBUG_TRACE("Frequency '%u' to be set for lcore %u\n",
                                  target_freq, pi->lcore_id);
 
                fflush(pi->f_cur_max);
@@ -349,7 +356,7 @@ power_set_governor_performance(struct pstate_power_info *pi)
                goto out;
        }
        /* Save the original governor */
-       snprintf(pi->governor_ori, sizeof(pi->governor_ori), "%s", buf);
+       strlcpy(pi->governor_ori, buf, sizeof(pi->governor_ori));
 
        /* Write 'performance' to the governor */
        val = fseek(f, 0, SEEK_SET);
@@ -524,10 +531,17 @@ out:
        return ret;
 }
 
+int
+power_pstate_cpufreq_check_supported(void)
+{
+       return cpufreq_check_scaling_driver(POWER_PSTATE_DRIVER);
+}
+
 int
 power_pstate_cpufreq_init(unsigned int lcore_id)
 {
        struct pstate_power_info *pi;
+       uint32_t exp_state;
 
        if (lcore_id >= RTE_MAX_LCORE) {
                RTE_LOG(ERR, POWER, "Lcore id %u can not exceed %u\n",
@@ -536,8 +550,16 @@ power_pstate_cpufreq_init(unsigned int lcore_id)
        }
 
        pi = &lcore_power_info[lcore_id];
-       if (rte_atomic32_cmpset(&(pi->state), POWER_IDLE, POWER_ONGOING)
-                       == 0) {
+       exp_state = POWER_IDLE;
+       /* The power in use state works as a guard variable between
+        * the CPU frequency control initialization and exit process.
+        * The ACQUIRE memory ordering here pairs with the RELEASE
+        * ordering below as lock to make sure the frequency operations
+        * in the critical section are done under the correct state.
+        */
+       if (!__atomic_compare_exchange_n(&(pi->state), &exp_state,
+                                       POWER_ONGOING, 0,
+                                       __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
                RTE_LOG(INFO, POWER, "Power management of lcore %u is "
                                "in use\n", lcore_id);
                return -1;
@@ -574,12 +596,16 @@ power_pstate_cpufreq_init(unsigned int lcore_id)
 
        RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
                        "power management\n", lcore_id);
-       rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_USED);
+       exp_state = POWER_ONGOING;
+       __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_USED,
+                                   0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
 
        return 0;
 
 fail:
-       rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
+       exp_state = POWER_ONGOING;
+       __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_UNKNOWN,
+                                   0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
 
        return -1;
 }
@@ -588,6 +614,7 @@ int
 power_pstate_cpufreq_exit(unsigned int lcore_id)
 {
        struct pstate_power_info *pi;
+       uint32_t exp_state;
 
        if (lcore_id >= RTE_MAX_LCORE) {
                RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
@@ -596,8 +623,16 @@ power_pstate_cpufreq_exit(unsigned int lcore_id)
        }
        pi = &lcore_power_info[lcore_id];
 
-       if (rte_atomic32_cmpset(&(pi->state), POWER_USED, POWER_ONGOING)
-                       == 0) {
+       exp_state = POWER_USED;
+       /* The power in use state works as a guard variable between
+        * the CPU frequency control initialization and exit process.
+        * The ACQUIRE memory ordering here pairs with the RELEASE
+        * ordering below as lock to make sure the frequency operations
+        * in the critical section are under done the correct state.
+        */
+       if (!__atomic_compare_exchange_n(&(pi->state), &exp_state,
+                                       POWER_ONGOING, 0,
+                                       __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
                RTE_LOG(INFO, POWER, "Power management of lcore %u is "
                                "not used\n", lcore_id);
                return -1;
@@ -619,12 +654,16 @@ power_pstate_cpufreq_exit(unsigned int lcore_id)
        RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
                        "'performance' mode and been set back to the "
                        "original\n", lcore_id);
-       rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_IDLE);
+       exp_state = POWER_ONGOING;
+       __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_IDLE,
+                                   0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
 
        return 0;
 
 fail:
-       rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
+       exp_state = POWER_ONGOING;
+       __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_UNKNOWN,
+                                   0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
 
        return -1;
 }
@@ -689,7 +728,8 @@ power_pstate_cpufreq_freq_up(unsigned int lcore_id)
        }
 
        pi = &lcore_power_info[lcore_id];
-       if (pi->curr_idx == 0)
+       if (pi->curr_idx == 0 ||
+           (pi->curr_idx == 1 && pi->turbo_available && !pi->turbo_enable))
                return 0;
 
        /* Frequencies in the array are from high to low. */
@@ -809,6 +849,15 @@ power_pstate_disable_turbo(unsigned int lcore_id)
 
        pi->turbo_enable = 0;
 
+       if (pi->turbo_available && pi->curr_idx <= 1) {
+               /* Try to set freq to max by default coming out of turbo */
+               if (power_pstate_cpufreq_freq_max(lcore_id) < 0) {
+                       RTE_LOG(ERR, POWER,
+                               "Failed to set frequency of lcore %u to max\n",
+                               lcore_id);
+                       return -1;
+               }
+       }
 
        return 0;
 }