6e57aca53513903cfb1c6fb8572525ae7c819b39
[dpdk.git] / power_acpi_cpufreq.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <fcntl.h>
7 #include <stdlib.h>
8
9 #include <rte_memcpy.h>
10 #include <rte_string_fns.h>
11
12 #include "power_acpi_cpufreq.h"
13 #include "power_common.h"
14
15 #define STR_SIZE     1024
16 #define POWER_CONVERT_TO_DECIMAL 10
17
18 #define POWER_GOVERNOR_USERSPACE "userspace"
19 #define POWER_SYSFILE_AVAIL_FREQ \
20                 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_available_frequencies"
21 #define POWER_SYSFILE_SETSPEED   \
22                 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed"
23 #define POWER_ACPI_DRIVER "acpi-cpufreq"
24
25 /*
26  * MSR related
27  */
28 #define PLATFORM_INFO     0x0CE
29 #define TURBO_RATIO_LIMIT 0x1AD
30 #define IA32_PERF_CTL     0x199
31 #define CORE_TURBO_DISABLE_BIT ((uint64_t)1<<32)
32
33 enum power_state {
34         POWER_IDLE = 0,
35         POWER_ONGOING,
36         POWER_USED,
37         POWER_UNKNOWN
38 };
39
40 /**
41  * Power info per lcore.
42  */
43 struct acpi_power_info {
44         unsigned int lcore_id;                   /**< Logical core id */
45         uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */
46         uint32_t nb_freqs;                   /**< number of available freqs */
47         FILE *f;                             /**< FD of scaling_setspeed */
48         char governor_ori[32];               /**< Original governor name */
49         uint32_t curr_idx;                   /**< Freq index in freqs array */
50         uint32_t state;                      /**< Power in use state */
51         uint16_t turbo_available;            /**< Turbo Boost available */
52         uint16_t turbo_enable;               /**< Turbo Boost enable/disable */
53 } __rte_cache_aligned;
54
55 static struct acpi_power_info lcore_power_info[RTE_MAX_LCORE];
56
57 /**
58  * It is to set specific freq for specific logical core, according to the index
59  * of supported frequencies.
60  */
61 static int
62 set_freq_internal(struct acpi_power_info *pi, uint32_t idx)
63 {
64         if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) {
65                 RTE_LOG(ERR, POWER, "Invalid frequency index %u, which "
66                                 "should be less than %u\n", idx, pi->nb_freqs);
67                 return -1;
68         }
69
70         /* Check if it is the same as current */
71         if (idx == pi->curr_idx)
72                 return 0;
73
74         POWER_DEBUG_TRACE("Frequency[%u] %u to be set for lcore %u\n",
75                         idx, pi->freqs[idx], pi->lcore_id);
76         if (fseek(pi->f, 0, SEEK_SET) < 0) {
77                 RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
78                                 "for setting frequency for lcore %u\n", pi->lcore_id);
79                 return -1;
80         }
81         if (fprintf(pi->f, "%u", pi->freqs[idx]) < 0) {
82                 RTE_LOG(ERR, POWER, "Fail to write new frequency for "
83                                 "lcore %u\n", pi->lcore_id);
84                 return -1;
85         }
86         fflush(pi->f);
87         pi->curr_idx = idx;
88
89         return 1;
90 }
91
92 /**
93  * It is to check the current scaling governor by reading sys file, and then
94  * set it into 'userspace' if it is not by writing the sys file. The original
95  * governor will be saved for rolling back.
96  */
97 static int
98 power_set_governor_userspace(struct acpi_power_info *pi)
99 {
100         return power_set_governor(pi->lcore_id, POWER_GOVERNOR_USERSPACE,
101                         pi->governor_ori, sizeof(pi->governor_ori));
102 }
103
104 /**
105  * It is to check the governor and then set the original governor back if
106  * needed by writing the sys file.
107  */
108 static int
109 power_set_governor_original(struct acpi_power_info *pi)
110 {
111         return power_set_governor(pi->lcore_id, pi->governor_ori, NULL, 0);
112 }
113
114 /**
115  * It is to get the available frequencies of the specific lcore by reading the
116  * sys file.
117  */
118 static int
119 power_get_available_freqs(struct acpi_power_info *pi)
120 {
121         FILE *f;
122         int ret = -1, i, count;
123         char *p;
124         char buf[BUFSIZ];
125         char *freqs[RTE_MAX_LCORE_FREQS];
126
127         open_core_sysfs_file(&f, "r", POWER_SYSFILE_AVAIL_FREQ, pi->lcore_id);
128         if (f == NULL) {
129                 RTE_LOG(ERR, POWER, "failed to open %s\n",
130                                 POWER_SYSFILE_AVAIL_FREQ);
131                 goto out;
132         }
133
134         ret = read_core_sysfs_s(f, buf, sizeof(buf));
135         if ((ret) < 0) {
136                 RTE_LOG(ERR, POWER, "Failed to read %s\n",
137                                 POWER_SYSFILE_AVAIL_FREQ);
138                 goto out;
139         }
140
141         /* Split string into at most RTE_MAX_LCORE_FREQS frequencies */
142         count = rte_strsplit(buf, sizeof(buf), freqs,
143                         RTE_MAX_LCORE_FREQS, ' ');
144         if (count <= 0) {
145                 RTE_LOG(ERR, POWER, "No available frequency in "
146                                 ""POWER_SYSFILE_AVAIL_FREQ"\n", pi->lcore_id);
147                 goto out;
148         }
149         if (count >= RTE_MAX_LCORE_FREQS) {
150                 RTE_LOG(ERR, POWER, "Too many available frequencies : %d\n",
151                                 count);
152                 goto out;
153         }
154
155         /* Store the available frequencies into power context */
156         for (i = 0, pi->nb_freqs = 0; i < count; i++) {
157                 POWER_DEBUG_TRACE("Lcore %u frequency[%d]: %s\n", pi->lcore_id,
158                                 i, freqs[i]);
159                 pi->freqs[pi->nb_freqs++] = strtoul(freqs[i], &p,
160                                 POWER_CONVERT_TO_DECIMAL);
161         }
162
163         if ((pi->freqs[0]-1000) == pi->freqs[1]) {
164                 pi->turbo_available = 1;
165                 pi->turbo_enable = 1;
166                 POWER_DEBUG_TRACE("Lcore %u Can do Turbo Boost\n",
167                                 pi->lcore_id);
168         } else {
169                 pi->turbo_available = 0;
170                 pi->turbo_enable = 0;
171                 POWER_DEBUG_TRACE("Turbo Boost not available on Lcore %u\n",
172                                 pi->lcore_id);
173         }
174
175         ret = 0;
176         POWER_DEBUG_TRACE("%d frequency(s) of lcore %u are available\n",
177                         count, pi->lcore_id);
178 out:
179         if (f != NULL)
180                 fclose(f);
181
182         return ret;
183 }
184
185 /**
186  * It is to fopen the sys file for the future setting the lcore frequency.
187  */
188 static int
189 power_init_for_setting_freq(struct acpi_power_info *pi)
190 {
191         FILE *f;
192         char buf[BUFSIZ];
193         uint32_t i, freq;
194         int ret;
195
196         open_core_sysfs_file(&f, "rw+", POWER_SYSFILE_SETSPEED, pi->lcore_id);
197         if (f == NULL) {
198                 RTE_LOG(ERR, POWER, "Failed to open %s\n",
199                                 POWER_SYSFILE_SETSPEED);
200                 goto err;
201         }
202
203         ret = read_core_sysfs_s(f, buf, sizeof(buf));
204         if ((ret) < 0) {
205                 RTE_LOG(ERR, POWER, "Failed to read %s\n",
206                                 POWER_SYSFILE_SETSPEED);
207                 goto err;
208         }
209
210         freq = strtoul(buf, NULL, POWER_CONVERT_TO_DECIMAL);
211         for (i = 0; i < pi->nb_freqs; i++) {
212                 if (freq == pi->freqs[i]) {
213                         pi->curr_idx = i;
214                         pi->f = f;
215                         return 0;
216                 }
217         }
218
219 err:
220         if (f != NULL)
221                 fclose(f);
222
223         return -1;
224 }
225
226 int
227 power_acpi_cpufreq_check_supported(void)
228 {
229         return cpufreq_check_scaling_driver(POWER_ACPI_DRIVER);
230 }
231
232 int
233 power_acpi_cpufreq_init(unsigned int lcore_id)
234 {
235         struct acpi_power_info *pi;
236         uint32_t exp_state;
237
238         if (lcore_id >= RTE_MAX_LCORE) {
239                 RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
240                                 lcore_id, RTE_MAX_LCORE - 1U);
241                 return -1;
242         }
243
244         pi = &lcore_power_info[lcore_id];
245         exp_state = POWER_IDLE;
246         /* The power in use state works as a guard variable between
247          * the CPU frequency control initialization and exit process.
248          * The ACQUIRE memory ordering here pairs with the RELEASE
249          * ordering below as lock to make sure the frequency operations
250          * in the critical section are done under the correct state.
251          */
252         if (!__atomic_compare_exchange_n(&(pi->state), &exp_state,
253                                         POWER_ONGOING, 0,
254                                         __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
255                 RTE_LOG(INFO, POWER, "Power management of lcore %u is "
256                                 "in use\n", lcore_id);
257                 return -1;
258         }
259
260         pi->lcore_id = lcore_id;
261         /* Check and set the governor */
262         if (power_set_governor_userspace(pi) < 0) {
263                 RTE_LOG(ERR, POWER, "Cannot set governor of lcore %u to "
264                                 "userspace\n", lcore_id);
265                 goto fail;
266         }
267
268         /* Get the available frequencies */
269         if (power_get_available_freqs(pi) < 0) {
270                 RTE_LOG(ERR, POWER, "Cannot get available frequencies of "
271                                 "lcore %u\n", lcore_id);
272                 goto fail;
273         }
274
275         /* Init for setting lcore frequency */
276         if (power_init_for_setting_freq(pi) < 0) {
277                 RTE_LOG(ERR, POWER, "Cannot init for setting frequency for "
278                                 "lcore %u\n", lcore_id);
279                 goto fail;
280         }
281
282         /* Set freq to max by default */
283         if (power_acpi_cpufreq_freq_max(lcore_id) < 0) {
284                 RTE_LOG(ERR, POWER, "Cannot set frequency of lcore %u "
285                                 "to max\n", lcore_id);
286                 goto fail;
287         }
288
289         RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
290                         "power management\n", lcore_id);
291         exp_state = POWER_ONGOING;
292         __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_USED,
293                                     0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
294
295         return 0;
296
297 fail:
298         exp_state = POWER_ONGOING;
299         __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_UNKNOWN,
300                                     0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
301
302         return -1;
303 }
304
305 int
306 power_acpi_cpufreq_exit(unsigned int lcore_id)
307 {
308         struct acpi_power_info *pi;
309         uint32_t exp_state;
310
311         if (lcore_id >= RTE_MAX_LCORE) {
312                 RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
313                                 lcore_id, RTE_MAX_LCORE - 1U);
314                 return -1;
315         }
316         pi = &lcore_power_info[lcore_id];
317         exp_state = POWER_USED;
318         /* The power in use state works as a guard variable between
319          * the CPU frequency control initialization and exit process.
320          * The ACQUIRE memory ordering here pairs with the RELEASE
321          * ordering below as lock to make sure the frequency operations
322          * in the critical section are done under the correct state.
323          */
324         if (!__atomic_compare_exchange_n(&(pi->state), &exp_state,
325                                         POWER_ONGOING, 0,
326                                         __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
327                 RTE_LOG(INFO, POWER, "Power management of lcore %u is "
328                                 "not used\n", lcore_id);
329                 return -1;
330         }
331
332         /* Close FD of setting freq */
333         fclose(pi->f);
334         pi->f = NULL;
335
336         /* Set the governor back to the original */
337         if (power_set_governor_original(pi) < 0) {
338                 RTE_LOG(ERR, POWER, "Cannot set the governor of %u back "
339                                 "to the original\n", lcore_id);
340                 goto fail;
341         }
342
343         RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
344                         "'userspace' mode and been set back to the "
345                         "original\n", lcore_id);
346         exp_state = POWER_ONGOING;
347         __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_IDLE,
348                                     0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
349
350         return 0;
351
352 fail:
353         exp_state = POWER_ONGOING;
354         __atomic_compare_exchange_n(&(pi->state), &exp_state, POWER_UNKNOWN,
355                                     0, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
356
357         return -1;
358 }
359
360 uint32_t
361 power_acpi_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t num)
362 {
363         struct acpi_power_info *pi;
364
365         if (lcore_id >= RTE_MAX_LCORE) {
366                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
367                 return 0;
368         }
369
370         if (freqs == NULL) {
371                 RTE_LOG(ERR, POWER, "NULL buffer supplied\n");
372                 return 0;
373         }
374
375         pi = &lcore_power_info[lcore_id];
376         if (num < pi->nb_freqs) {
377                 RTE_LOG(ERR, POWER, "Buffer size is not enough\n");
378                 return 0;
379         }
380         rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t));
381
382         return pi->nb_freqs;
383 }
384
385 uint32_t
386 power_acpi_cpufreq_get_freq(unsigned int lcore_id)
387 {
388         if (lcore_id >= RTE_MAX_LCORE) {
389                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
390                 return RTE_POWER_INVALID_FREQ_INDEX;
391         }
392
393         return lcore_power_info[lcore_id].curr_idx;
394 }
395
396 int
397 power_acpi_cpufreq_set_freq(unsigned int lcore_id, uint32_t index)
398 {
399         if (lcore_id >= RTE_MAX_LCORE) {
400                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
401                 return -1;
402         }
403
404         return set_freq_internal(&(lcore_power_info[lcore_id]), index);
405 }
406
407 int
408 power_acpi_cpufreq_freq_down(unsigned int lcore_id)
409 {
410         struct acpi_power_info *pi;
411
412         if (lcore_id >= RTE_MAX_LCORE) {
413                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
414                 return -1;
415         }
416
417         pi = &lcore_power_info[lcore_id];
418         if (pi->curr_idx + 1 == pi->nb_freqs)
419                 return 0;
420
421         /* Frequencies in the array are from high to low. */
422         return set_freq_internal(pi, pi->curr_idx + 1);
423 }
424
425 int
426 power_acpi_cpufreq_freq_up(unsigned int lcore_id)
427 {
428         struct acpi_power_info *pi;
429
430         if (lcore_id >= RTE_MAX_LCORE) {
431                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
432                 return -1;
433         }
434
435         pi = &lcore_power_info[lcore_id];
436         if (pi->curr_idx == 0 ||
437             (pi->curr_idx == 1 && pi->turbo_available && !pi->turbo_enable))
438                 return 0;
439
440         /* Frequencies in the array are from high to low. */
441         return set_freq_internal(pi, pi->curr_idx - 1);
442 }
443
444 int
445 power_acpi_cpufreq_freq_max(unsigned int lcore_id)
446 {
447         if (lcore_id >= RTE_MAX_LCORE) {
448                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
449                 return -1;
450         }
451
452         /* Frequencies in the array are from high to low. */
453         if (lcore_power_info[lcore_id].turbo_available) {
454                 if (lcore_power_info[lcore_id].turbo_enable)
455                         /* Set to Turbo */
456                         return set_freq_internal(
457                                         &lcore_power_info[lcore_id], 0);
458                 else
459                         /* Set to max non-turbo */
460                         return set_freq_internal(
461                                         &lcore_power_info[lcore_id], 1);
462         } else
463                 return set_freq_internal(&lcore_power_info[lcore_id], 0);
464 }
465
466 int
467 power_acpi_cpufreq_freq_min(unsigned int lcore_id)
468 {
469         struct acpi_power_info *pi;
470
471         if (lcore_id >= RTE_MAX_LCORE) {
472                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
473                 return -1;
474         }
475
476         pi = &lcore_power_info[lcore_id];
477
478         /* Frequencies in the array are from high to low. */
479         return set_freq_internal(pi, pi->nb_freqs - 1);
480 }
481
482
483 int
484 power_acpi_turbo_status(unsigned int lcore_id)
485 {
486         struct acpi_power_info *pi;
487
488         if (lcore_id >= RTE_MAX_LCORE) {
489                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
490                 return -1;
491         }
492
493         pi = &lcore_power_info[lcore_id];
494
495         return pi->turbo_enable;
496 }
497
498
499 int
500 power_acpi_enable_turbo(unsigned int lcore_id)
501 {
502         struct acpi_power_info *pi;
503
504         if (lcore_id >= RTE_MAX_LCORE) {
505                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
506                 return -1;
507         }
508
509         pi = &lcore_power_info[lcore_id];
510
511         if (pi->turbo_available)
512                 pi->turbo_enable = 1;
513         else {
514                 pi->turbo_enable = 0;
515                 RTE_LOG(ERR, POWER,
516                         "Failed to enable turbo on lcore %u\n",
517                         lcore_id);
518                         return -1;
519         }
520
521         /* Max may have changed, so call to max function */
522         if (power_acpi_cpufreq_freq_max(lcore_id) < 0) {
523                 RTE_LOG(ERR, POWER,
524                         "Failed to set frequency of lcore %u to max\n",
525                         lcore_id);
526                         return -1;
527         }
528
529         return 0;
530 }
531
532 int
533 power_acpi_disable_turbo(unsigned int lcore_id)
534 {
535         struct acpi_power_info *pi;
536
537         if (lcore_id >= RTE_MAX_LCORE) {
538                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
539                 return -1;
540         }
541
542         pi = &lcore_power_info[lcore_id];
543
544          pi->turbo_enable = 0;
545
546         if ((pi->turbo_available) && (pi->curr_idx <= 1)) {
547                 /* Try to set freq to max by default coming out of turbo */
548                 if (power_acpi_cpufreq_freq_max(lcore_id) < 0) {
549                         RTE_LOG(ERR, POWER,
550                                 "Failed to set frequency of lcore %u to max\n",
551                                 lcore_id);
552                         return -1;
553                 }
554         }
555
556         return 0;
557 }
558
559 int power_acpi_get_capabilities(unsigned int lcore_id,
560                 struct rte_power_core_capabilities *caps)
561 {
562         struct acpi_power_info *pi;
563
564         if (lcore_id >= RTE_MAX_LCORE) {
565                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
566                 return -1;
567         }
568         if (caps == NULL) {
569                 RTE_LOG(ERR, POWER, "Invalid argument\n");
570                 return -1;
571         }
572
573         pi = &lcore_power_info[lcore_id];
574         caps->capabilities = 0;
575         caps->turbo = !!(pi->turbo_available);
576
577         return 0;
578 }