net/cnxk: remove restriction on VF for PFC config
[dpdk.git] / lib / power / power_pstate_cpufreq.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <fcntl.h>
7 #include <string.h>
8 #include <unistd.h>
9 #include <limits.h>
10 #include <errno.h>
11 #include <inttypes.h>
12
13 #include <rte_memcpy.h>
14
15 #include "rte_power_pmd_mgmt.h"
16 #include "power_pstate_cpufreq.h"
17 #include "power_common.h"
18
19 /* macros used for rounding frequency to nearest 100000 */
20 #define FREQ_ROUNDING_DELTA 50000
21 #define ROUND_FREQ_TO_N_100000 100000
22
23 #define BUS_FREQ     100000
24
25 #define POWER_GOVERNOR_PERF "performance"
26 #define POWER_SYSFILE_MAX_FREQ \
27                 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_max_freq"
28 #define POWER_SYSFILE_MIN_FREQ  \
29                 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_min_freq"
30 #define POWER_SYSFILE_CUR_FREQ  \
31                 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq"
32 #define POWER_SYSFILE_BASE_MAX_FREQ \
33                 "/sys/devices/system/cpu/cpu%u/cpufreq/cpuinfo_max_freq"
34 #define POWER_SYSFILE_BASE_MIN_FREQ  \
35                 "/sys/devices/system/cpu/cpu%u/cpufreq/cpuinfo_min_freq"
36 #define POWER_SYSFILE_BASE_FREQ  \
37                 "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency"
38 #define POWER_PSTATE_DRIVER "intel_pstate"
39 #define POWER_MSR_PATH  "/dev/cpu/%u/msr"
40
41 /*
42  * MSR related
43  */
44 #define PLATFORM_INFO     0x0CE
45 #define NON_TURBO_MASK    0xFF00
46 #define NON_TURBO_OFFSET  0x8
47
48
49 enum power_state {
50         POWER_IDLE = 0,
51         POWER_ONGOING,
52         POWER_USED,
53         POWER_UNKNOWN
54 };
55
56 struct pstate_power_info {
57         unsigned int lcore_id;               /**< Logical core id */
58         uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */
59         uint32_t nb_freqs;                   /**< number of available freqs */
60         FILE *f_cur_min;                     /**< FD of scaling_min */
61         FILE *f_cur_max;                     /**< FD of scaling_max */
62         char governor_ori[32];               /**< Original governor name */
63         uint32_t curr_idx;                   /**< Freq index in freqs array */
64         uint32_t non_turbo_max_ratio;        /**< Non Turbo Max ratio  */
65         uint32_t sys_max_freq;               /**< system wide max freq  */
66         uint32_t core_base_freq;             /**< core base freq  */
67         uint32_t state;                      /**< Power in use state */
68         uint16_t turbo_available;            /**< Turbo Boost available */
69         uint16_t turbo_enable;               /**< Turbo Boost enable/disable */
70         uint16_t priority_core;              /**< High Performance core */
71 } __rte_cache_aligned;
72
73
74 static struct pstate_power_info lcore_power_info[RTE_MAX_LCORE];
75
76 /**
77  * It is to read the specific MSR.
78  */
79
80 static int32_t
81 power_rdmsr(int msr, uint64_t *val, unsigned int lcore_id)
82 {
83         int fd, ret;
84         char fullpath[PATH_MAX];
85
86         snprintf(fullpath, sizeof(fullpath), POWER_MSR_PATH, lcore_id);
87
88         fd = open(fullpath, O_RDONLY);
89
90         if (fd < 0) {
91                 RTE_LOG(ERR, POWER, "Error opening '%s': %s\n", fullpath,
92                                  strerror(errno));
93                 return fd;
94         }
95
96         ret = pread(fd, val, sizeof(uint64_t), msr);
97
98         if (ret < 0) {
99                 RTE_LOG(ERR, POWER, "Error reading '%s': %s\n", fullpath,
100                                  strerror(errno));
101                 goto out;
102         }
103
104         POWER_DEBUG_TRACE("MSR Path %s, offset 0x%X for lcore %u\n",
105                         fullpath, msr, lcore_id);
106
107         POWER_DEBUG_TRACE("Ret value %d, content is 0x%"PRIx64"\n", ret, *val);
108
109 out:    close(fd);
110         return ret;
111 }
112
113 /**
114  * It is to fopen the sys file for the future setting the lcore frequency.
115  */
116 static int
117 power_init_for_setting_freq(struct pstate_power_info *pi)
118 {
119         FILE *f_base = NULL, *f_base_max = NULL, *f_min = NULL, *f_max = NULL;
120         uint32_t base_ratio, base_max_ratio;
121         uint64_t max_non_turbo;
122         int ret;
123
124         /* open all files we expect to have open */
125         open_core_sysfs_file(&f_base_max, "r", POWER_SYSFILE_BASE_MAX_FREQ,
126                         pi->lcore_id);
127         if (f_base_max == NULL) {
128                 RTE_LOG(ERR, POWER, "failed to open %s\n",
129                                 POWER_SYSFILE_BASE_MAX_FREQ);
130                 goto err;
131         }
132
133         open_core_sysfs_file(&f_min, "rw+", POWER_SYSFILE_MIN_FREQ,
134                         pi->lcore_id);
135         if (f_min == NULL) {
136                 RTE_LOG(ERR, POWER, "failed to open %s\n",
137                                 POWER_SYSFILE_MIN_FREQ);
138                 goto err;
139         }
140
141         open_core_sysfs_file(&f_max, "rw+", POWER_SYSFILE_MAX_FREQ,
142                         pi->lcore_id);
143         if (f_max == NULL) {
144                 RTE_LOG(ERR, POWER, "failed to open %s\n",
145                                 POWER_SYSFILE_MAX_FREQ);
146                 goto err;
147         }
148
149         open_core_sysfs_file(&f_base, "r", POWER_SYSFILE_BASE_FREQ,
150                         pi->lcore_id);
151         /* base ratio file may not exist in some kernels, so no error check */
152
153         /* read base max ratio */
154         ret = read_core_sysfs_u32(f_base_max, &base_max_ratio);
155         if (ret < 0) {
156                 RTE_LOG(ERR, POWER, "Failed to read %s\n",
157                                 POWER_SYSFILE_BASE_MAX_FREQ);
158                 goto err;
159         }
160
161         /* base ratio may not exist */
162         if (f_base != NULL) {
163                 ret = read_core_sysfs_u32(f_base, &base_ratio);
164                 if (ret < 0) {
165                         RTE_LOG(ERR, POWER, "Failed to read %s\n",
166                                         POWER_SYSFILE_BASE_FREQ);
167                         goto err;
168                 }
169         } else {
170                 base_ratio = 0;
171         }
172
173         /* Add MSR read to detect turbo status */
174         if (power_rdmsr(PLATFORM_INFO, &max_non_turbo, pi->lcore_id) < 0)
175                 goto err;
176         /* no errors after this point */
177
178         /* convert ratios to bins */
179         base_max_ratio /= BUS_FREQ;
180         base_ratio /= BUS_FREQ;
181
182         /* assign file handles */
183         pi->f_cur_min = f_min;
184         pi->f_cur_max = f_max;
185
186         max_non_turbo = (max_non_turbo&NON_TURBO_MASK)>>NON_TURBO_OFFSET;
187
188         POWER_DEBUG_TRACE("no turbo perf %"PRIu64"\n", max_non_turbo);
189
190         pi->non_turbo_max_ratio = (uint32_t)max_non_turbo;
191
192         /*
193          * If base_frequency is reported as greater than the maximum
194          * turbo frequency, that's a known issue with some kernels.
195          * Set base_frequency to max_non_turbo as a workaround.
196          */
197         if (base_ratio > base_max_ratio) {
198                 /* base_ratio is greater than max turbo. Kernel bug. */
199                 pi->priority_core = 0;
200                 goto out;
201         }
202
203         /*
204          * If base_frequency is reported as greater than the maximum
205          * non-turbo frequency, then mark it as a high priority core.
206          */
207         if (base_ratio > max_non_turbo)
208                 pi->priority_core = 1;
209         else
210                 pi->priority_core = 0;
211         pi->core_base_freq = base_ratio * BUS_FREQ;
212
213 out:
214         if (f_base != NULL)
215                 fclose(f_base);
216         fclose(f_base_max);
217         /* f_min and f_max are stored, no need to close */
218         return 0;
219
220 err:
221         if (f_base != NULL)
222                 fclose(f_base);
223         if (f_base_max != NULL)
224                 fclose(f_base_max);
225         if (f_min != NULL)
226                 fclose(f_min);
227         if (f_max != NULL)
228                 fclose(f_max);
229         return -1;
230 }
231
232 static int
233 set_freq_internal(struct pstate_power_info *pi, uint32_t idx)
234 {
235         uint32_t target_freq = 0;
236
237         if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) {
238                 RTE_LOG(ERR, POWER, "Invalid frequency index %u, which "
239                                 "should be less than %u\n", idx, pi->nb_freqs);
240                 return -1;
241         }
242
243         /* Check if it is the same as current */
244         if (idx == pi->curr_idx)
245                 return 0;
246
247         /* Because Intel Pstate Driver only allow user change min/max hint
248          * User need change the min/max as same value.
249          */
250         if (fseek(pi->f_cur_min, 0, SEEK_SET) < 0) {
251                 RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
252                                 "for setting frequency for lcore %u\n",
253                                 pi->lcore_id);
254                 return -1;
255         }
256
257         if (fseek(pi->f_cur_max, 0, SEEK_SET) < 0) {
258                 RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
259                                 "for setting frequency for lcore %u\n",
260                                 pi->lcore_id);
261                 return -1;
262         }
263
264         /* Turbo is available and enabled, first freq bucket is sys max freq */
265         if (pi->turbo_available && idx == 0) {
266                 if (pi->turbo_enable)
267                         target_freq = pi->sys_max_freq;
268                 else {
269                         RTE_LOG(ERR, POWER, "Turbo is off, frequency can't be scaled up more %u\n",
270                                         pi->lcore_id);
271                         return -1;
272                 }
273         } else
274                 target_freq = pi->freqs[idx];
275
276         /* Decrease freq, the min freq should be updated first */
277         if (idx  >  pi->curr_idx) {
278
279                 if (fprintf(pi->f_cur_min, "%u", target_freq) < 0) {
280                         RTE_LOG(ERR, POWER, "Fail to write new frequency for "
281                                         "lcore %u\n", pi->lcore_id);
282                         return -1;
283                 }
284
285                 if (fprintf(pi->f_cur_max, "%u", target_freq) < 0) {
286                         RTE_LOG(ERR, POWER, "Fail to write new frequency for "
287                                         "lcore %u\n", pi->lcore_id);
288                         return -1;
289                 }
290
291                 POWER_DEBUG_TRACE("Frequency '%u' to be set for lcore %u\n",
292                                   target_freq, pi->lcore_id);
293
294                 fflush(pi->f_cur_min);
295                 fflush(pi->f_cur_max);
296
297         }
298
299         /* Increase freq, the max freq should be updated first */
300         if (idx  <  pi->curr_idx) {
301
302                 if (fprintf(pi->f_cur_max, "%u", target_freq) < 0) {
303                         RTE_LOG(ERR, POWER, "Fail to write new frequency for "
304                                         "lcore %u\n", pi->lcore_id);
305                         return -1;
306                 }
307
308                 if (fprintf(pi->f_cur_min, "%u", target_freq) < 0) {
309                         RTE_LOG(ERR, POWER, "Fail to write new frequency for "
310                                         "lcore %u\n", pi->lcore_id);
311                         return -1;
312                 }
313
314                 POWER_DEBUG_TRACE("Frequency '%u' to be set for lcore %u\n",
315                                   target_freq, pi->lcore_id);
316
317                 fflush(pi->f_cur_max);
318                 fflush(pi->f_cur_min);
319         }
320
321         pi->curr_idx = idx;
322
323         return 1;
324 }
325
326 /**
327  * It is to check the current scaling governor by reading sys file, and then
328  * set it into 'performance' if it is not by writing the sys file. The original
329  * governor will be saved for rolling back.
330  */
331 static int
332 power_set_governor_performance(struct pstate_power_info *pi)
333 {
334         return power_set_governor(pi->lcore_id, POWER_GOVERNOR_PERF,
335                         pi->governor_ori, sizeof(pi->governor_ori));
336 }
337
338 /**
339  * It is to check the governor and then set the original governor back if
340  * needed by writing the sys file.
341  */
342 static int
343 power_set_governor_original(struct pstate_power_info *pi)
344 {
345         return power_set_governor(pi->lcore_id, pi->governor_ori, NULL, 0);
346 }
347
348 /**
349  * It is to get the available frequencies of the specific lcore by reading the
350  * sys file.
351  */
352 static int
353 power_get_available_freqs(struct pstate_power_info *pi)
354 {
355         FILE *f_min = NULL, *f_max = NULL;
356         int ret = -1;
357         uint32_t sys_min_freq = 0, sys_max_freq = 0, base_max_freq = 0;
358         int config_min_freq, config_max_freq;
359         uint32_t i, num_freqs = 0;
360
361         /* open all files */
362         open_core_sysfs_file(&f_max, "r", POWER_SYSFILE_BASE_MAX_FREQ,
363                         pi->lcore_id);
364         if (f_max == NULL) {
365                 RTE_LOG(ERR, POWER, "failed to open %s\n",
366                                 POWER_SYSFILE_BASE_MAX_FREQ);
367                 goto out;
368         }
369
370         open_core_sysfs_file(&f_min, "r", POWER_SYSFILE_BASE_MIN_FREQ,
371                         pi->lcore_id);
372         if (f_min == NULL) {
373                 RTE_LOG(ERR, POWER, "failed to open %s\n",
374                                 POWER_SYSFILE_BASE_MIN_FREQ);
375                 goto out;
376         }
377
378         /* read base ratios */
379         ret = read_core_sysfs_u32(f_max, &sys_max_freq);
380         if (ret < 0) {
381                 RTE_LOG(ERR, POWER, "Failed to read %s\n",
382                                 POWER_SYSFILE_BASE_MAX_FREQ);
383                 goto out;
384         }
385
386         ret = read_core_sysfs_u32(f_min, &sys_min_freq);
387         if (ret < 0) {
388                 RTE_LOG(ERR, POWER, "Failed to read %s\n",
389                                 POWER_SYSFILE_BASE_MIN_FREQ);
390                 goto out;
391         }
392
393         /* check for config set by user or application to limit frequency range */
394         config_min_freq = rte_power_pmd_mgmt_get_scaling_freq_min(pi->lcore_id);
395         if (config_min_freq < 0)
396                 goto out;
397         config_max_freq = rte_power_pmd_mgmt_get_scaling_freq_max(pi->lcore_id);
398         if (config_max_freq < 0)
399                 goto out;
400
401         sys_min_freq = RTE_MAX(sys_min_freq, (uint32_t)config_min_freq);
402         if (config_max_freq > 0) /* Only use config_max_freq if a value has been set */
403                 sys_max_freq = RTE_MIN(sys_max_freq, (uint32_t)config_max_freq);
404
405         if (sys_max_freq < sys_min_freq)
406                 goto out;
407
408         pi->sys_max_freq = sys_max_freq;
409
410         if (pi->priority_core == 1)
411                 base_max_freq = pi->core_base_freq;
412         else
413                 base_max_freq = pi->non_turbo_max_ratio * BUS_FREQ;
414
415         POWER_DEBUG_TRACE("sys min %u, sys max %u, base_max %u\n",
416                         sys_min_freq,
417                         sys_max_freq,
418                         base_max_freq);
419
420         if (base_max_freq < sys_max_freq)
421                 pi->turbo_available = 1;
422         else
423                 pi->turbo_available = 0;
424
425         /* If turbo is available then there is one extra freq bucket
426          * to store the sys max freq which value is base_max +1
427          */
428         num_freqs = (RTE_MIN(base_max_freq, sys_max_freq) - sys_min_freq) / BUS_FREQ
429                         + 1 + pi->turbo_available;
430         if (num_freqs >= RTE_MAX_LCORE_FREQS) {
431                 RTE_LOG(ERR, POWER, "Too many available frequencies: %d\n",
432                                 num_freqs);
433                 goto out;
434         }
435
436         /* Generate the freq bucket array.
437          * If turbo is available the freq bucket[0] value is base_max +1
438          * the bucket[1] is base_max, bucket[2] is base_max - BUS_FREQ
439          * and so on.
440          * If turbo is not available bucket[0] is base_max and so on
441          */
442         for (i = 0, pi->nb_freqs = 0; i < num_freqs; i++) {
443                 if ((i == 0) && pi->turbo_available)
444                         pi->freqs[pi->nb_freqs++] = RTE_MIN(base_max_freq, sys_max_freq) + 1;
445                 else
446                         pi->freqs[pi->nb_freqs++] = RTE_MIN(base_max_freq, sys_max_freq) -
447                                         (i - pi->turbo_available) * BUS_FREQ;
448         }
449
450         ret = 0;
451
452         POWER_DEBUG_TRACE("%d frequency(s) of lcore %u are available\n",
453                         num_freqs, pi->lcore_id);
454
455 out:
456         if (f_min != NULL)
457                 fclose(f_min);
458         if (f_max != NULL)
459                 fclose(f_max);
460
461         return ret;
462 }
463
464 static int
465 power_get_cur_idx(struct pstate_power_info *pi)
466 {
467         FILE *f_cur;
468         int ret = -1;
469         uint32_t sys_cur_freq = 0;
470         unsigned int i;
471
472         open_core_sysfs_file(&f_cur, "r", POWER_SYSFILE_CUR_FREQ,
473                         pi->lcore_id);
474         if (f_cur == NULL) {
475                 RTE_LOG(ERR, POWER, "failed to open %s\n",
476                                 POWER_SYSFILE_CUR_FREQ);
477                 goto fail;
478         }
479
480         ret = read_core_sysfs_u32(f_cur, &sys_cur_freq);
481         if (ret < 0) {
482                 RTE_LOG(ERR, POWER, "Failed to read %s\n",
483                                 POWER_SYSFILE_CUR_FREQ);
484                 goto fail;
485         }
486
487         /* convert the frequency to nearest 100000 value
488          * Ex: if sys_cur_freq=1396789 then freq_conv=1400000
489          * Ex: if sys_cur_freq=800030 then freq_conv=800000
490          * Ex: if sys_cur_freq=800030 then freq_conv=800000
491          */
492         unsigned int freq_conv = 0;
493         freq_conv = (sys_cur_freq + FREQ_ROUNDING_DELTA)
494                                 / ROUND_FREQ_TO_N_100000;
495         freq_conv = freq_conv * ROUND_FREQ_TO_N_100000;
496
497         for (i = 0; i < pi->nb_freqs; i++) {
498                 if (freq_conv == pi->freqs[i]) {
499                         pi->curr_idx = i;
500                         break;
501                 }
502         }
503
504         ret = 0;
505 fail:
506         if (f_cur != NULL)
507                 fclose(f_cur);
508         return ret;
509 }
510
511 int
512 power_pstate_cpufreq_check_supported(void)
513 {
514         return cpufreq_check_scaling_driver(POWER_PSTATE_DRIVER);
515 }
516
517 int
518 power_pstate_cpufreq_init(unsigned int lcore_id)
519 {
520         struct pstate_power_info *pi;
521         uint32_t exp_state;
522
523         if (lcore_id >= RTE_MAX_LCORE) {
524                 RTE_LOG(ERR, POWER, "Lcore id %u can not exceed %u\n",
525                                 lcore_id, RTE_MAX_LCORE - 1U);
526                 return -1;
527         }
528
529         pi = &lcore_power_info[lcore_id];
530         exp_state = POWER_IDLE;
531         /* The power in use state works as a guard variable between
532          * the CPU frequency control initialization and exit process.
533          * The ACQUIRE memory ordering here pairs with the RELEASE
534          * ordering below as lock to make sure the frequency operations
535          * in the critical section are done under the correct state.
536          */
537         if (!__atomic_compare_exchange_n(&(pi->state), &exp_state,
538                                         POWER_ONGOING, 0,
539                                         __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
540                 RTE_LOG(INFO, POWER, "Power management of lcore %u is "
541                                 "in use\n", lcore_id);
542                 return -1;
543         }
544
545         pi->lcore_id = lcore_id;
546         /* Check and set the governor */
547         if (power_set_governor_performance(pi) < 0) {
548                 RTE_LOG(ERR, POWER, "Cannot set governor of lcore %u to "
549                                 "performance\n", lcore_id);
550                 goto fail;
551         }
552         /* Init for setting lcore frequency */
553         if (power_init_for_setting_freq(pi) < 0) {
554                 RTE_LOG(ERR, POWER, "Cannot init for setting frequency for "
555                                 "lcore %u\n", lcore_id);
556                 goto fail;
557         }
558
559         /* Get the available frequencies */
560         if (power_get_available_freqs(pi) < 0) {
561                 RTE_LOG(ERR, POWER, "Cannot get available frequencies of "
562                                 "lcore %u\n", lcore_id);
563                 goto fail;
564         }
565
566         if (power_get_cur_idx(pi) < 0) {
567                 RTE_LOG(ERR, POWER, "Cannot get current frequency "
568                                 "index of lcore %u\n", lcore_id);
569                 goto fail;
570         }
571
572         /* Set freq to max by default */
573         if (power_pstate_cpufreq_freq_max(lcore_id) < 0) {
574                 RTE_LOG(ERR, POWER, "Cannot set frequency of lcore %u "
575                                 "to max\n", lcore_id);
576                 goto fail;
577         }
578
579         RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
580                         "power management\n", lcore_id);
581         exp_state = POWER_ONGOING;
582         __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_USED,
583                                     0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
584
585         return 0;
586
587 fail:
588         exp_state = POWER_ONGOING;
589         __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_UNKNOWN,
590                                     0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
591
592         return -1;
593 }
594
595 int
596 power_pstate_cpufreq_exit(unsigned int lcore_id)
597 {
598         struct pstate_power_info *pi;
599         uint32_t exp_state;
600
601         if (lcore_id >= RTE_MAX_LCORE) {
602                 RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
603                                 lcore_id, RTE_MAX_LCORE - 1U);
604                 return -1;
605         }
606         pi = &lcore_power_info[lcore_id];
607
608         exp_state = POWER_USED;
609         /* The power in use state works as a guard variable between
610          * the CPU frequency control initialization and exit process.
611          * The ACQUIRE memory ordering here pairs with the RELEASE
612          * ordering below as lock to make sure the frequency operations
613          * in the critical section are under done the correct state.
614          */
615         if (!__atomic_compare_exchange_n(&(pi->state), &exp_state,
616                                         POWER_ONGOING, 0,
617                                         __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
618                 RTE_LOG(INFO, POWER, "Power management of lcore %u is "
619                                 "not used\n", lcore_id);
620                 return -1;
621         }
622
623         /* Close FD of setting freq */
624         fclose(pi->f_cur_min);
625         fclose(pi->f_cur_max);
626         pi->f_cur_min = NULL;
627         pi->f_cur_max = NULL;
628
629         /* Set the governor back to the original */
630         if (power_set_governor_original(pi) < 0) {
631                 RTE_LOG(ERR, POWER, "Cannot set the governor of %u back "
632                                 "to the original\n", lcore_id);
633                 goto fail;
634         }
635
636         RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
637                         "'performance' mode and been set back to the "
638                         "original\n", lcore_id);
639         exp_state = POWER_ONGOING;
640         __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_IDLE,
641                                     0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
642
643         return 0;
644
645 fail:
646         exp_state = POWER_ONGOING;
647         __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_UNKNOWN,
648                                     0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
649
650         return -1;
651 }
652
653
654 uint32_t
655 power_pstate_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t num)
656 {
657         struct pstate_power_info *pi;
658
659         if (lcore_id >= RTE_MAX_LCORE) {
660                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
661                 return 0;
662         }
663
664         if (freqs == NULL) {
665                 RTE_LOG(ERR, POWER, "NULL buffer supplied\n");
666                 return 0;
667         }
668
669         pi = &lcore_power_info[lcore_id];
670         if (num < pi->nb_freqs) {
671                 RTE_LOG(ERR, POWER, "Buffer size is not enough\n");
672                 return 0;
673         }
674         rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t));
675
676         return pi->nb_freqs;
677 }
678
679 uint32_t
680 power_pstate_cpufreq_get_freq(unsigned int lcore_id)
681 {
682         if (lcore_id >= RTE_MAX_LCORE) {
683                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
684                 return RTE_POWER_INVALID_FREQ_INDEX;
685         }
686
687         return lcore_power_info[lcore_id].curr_idx;
688 }
689
690
691 int
692 power_pstate_cpufreq_set_freq(unsigned int lcore_id, uint32_t index)
693 {
694         if (lcore_id >= RTE_MAX_LCORE) {
695                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
696                 return -1;
697         }
698
699         return set_freq_internal(&(lcore_power_info[lcore_id]), index);
700 }
701
702 int
703 power_pstate_cpufreq_freq_up(unsigned int lcore_id)
704 {
705         struct pstate_power_info *pi;
706
707         if (lcore_id >= RTE_MAX_LCORE) {
708                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
709                 return -1;
710         }
711
712         pi = &lcore_power_info[lcore_id];
713         if (pi->curr_idx == 0 ||
714             (pi->curr_idx == 1 && pi->turbo_available && !pi->turbo_enable))
715                 return 0;
716
717         /* Frequencies in the array are from high to low. */
718         return set_freq_internal(pi, pi->curr_idx - 1);
719 }
720
721 int
722 power_pstate_cpufreq_freq_down(unsigned int lcore_id)
723 {
724         struct pstate_power_info *pi;
725
726         if (lcore_id >= RTE_MAX_LCORE) {
727                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
728                 return -1;
729         }
730
731         pi = &lcore_power_info[lcore_id];
732         if (pi->curr_idx + 1 == pi->nb_freqs)
733                 return 0;
734
735         /* Frequencies in the array are from high to low. */
736         return set_freq_internal(pi, pi->curr_idx + 1);
737 }
738
739 int
740 power_pstate_cpufreq_freq_max(unsigned int lcore_id)
741 {
742         if (lcore_id >= RTE_MAX_LCORE) {
743                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
744                 return -1;
745         }
746
747         /* Frequencies in the array are from high to low. */
748         if (lcore_power_info[lcore_id].turbo_available) {
749                 if (lcore_power_info[lcore_id].turbo_enable)
750                         /* Set to Turbo */
751                         return set_freq_internal(
752                                         &lcore_power_info[lcore_id], 0);
753                 else
754                         /* Set to max non-turbo */
755                         return set_freq_internal(
756                                         &lcore_power_info[lcore_id], 1);
757         } else
758                 return set_freq_internal(&lcore_power_info[lcore_id], 0);
759 }
760
761
762 int
763 power_pstate_cpufreq_freq_min(unsigned int lcore_id)
764 {
765         struct pstate_power_info *pi;
766
767         if (lcore_id >= RTE_MAX_LCORE) {
768                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
769                 return -1;
770         }
771
772         pi = &lcore_power_info[lcore_id];
773
774         /* Frequencies in the array are from high to low. */
775         return set_freq_internal(pi, pi->nb_freqs - 1);
776 }
777
778
779 int
780 power_pstate_turbo_status(unsigned int lcore_id)
781 {
782         struct pstate_power_info *pi;
783
784         if (lcore_id >= RTE_MAX_LCORE) {
785                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
786                 return -1;
787         }
788
789         pi = &lcore_power_info[lcore_id];
790
791         return pi->turbo_enable;
792 }
793
794 int
795 power_pstate_enable_turbo(unsigned int lcore_id)
796 {
797         struct pstate_power_info *pi;
798
799         if (lcore_id >= RTE_MAX_LCORE) {
800                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
801                 return -1;
802         }
803
804         pi = &lcore_power_info[lcore_id];
805
806         if (pi->turbo_available)
807                 pi->turbo_enable = 1;
808         else {
809                 pi->turbo_enable = 0;
810                 RTE_LOG(ERR, POWER,
811                         "Failed to enable turbo on lcore %u\n",
812                         lcore_id);
813                         return -1;
814         }
815
816         return 0;
817 }
818
819
820 int
821 power_pstate_disable_turbo(unsigned int lcore_id)
822 {
823         struct pstate_power_info *pi;
824
825         if (lcore_id >= RTE_MAX_LCORE) {
826                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
827                 return -1;
828         }
829
830         pi = &lcore_power_info[lcore_id];
831
832         pi->turbo_enable = 0;
833
834         if (pi->turbo_available && pi->curr_idx <= 1) {
835                 /* Try to set freq to max by default coming out of turbo */
836                 if (power_pstate_cpufreq_freq_max(lcore_id) < 0) {
837                         RTE_LOG(ERR, POWER,
838                                 "Failed to set frequency of lcore %u to max\n",
839                                 lcore_id);
840                         return -1;
841                 }
842         }
843
844         return 0;
845 }
846
847
848 int power_pstate_get_capabilities(unsigned int lcore_id,
849                 struct rte_power_core_capabilities *caps)
850 {
851         struct pstate_power_info *pi;
852
853         if (lcore_id >= RTE_MAX_LCORE) {
854                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
855                 return -1;
856         }
857         if (caps == NULL) {
858                 RTE_LOG(ERR, POWER, "Invalid argument\n");
859                 return -1;
860         }
861
862         pi = &lcore_power_info[lcore_id];
863         caps->capabilities = 0;
864         caps->turbo = !!(pi->turbo_available);
865         caps->priority = pi->priority_core;
866
867         return 0;
868 }