test/cycles: restore default delay callback
[dpdk.git] / lib / librte_power / power_acpi_cpufreq.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <sys/types.h>
7 #include <sys/stat.h>
8 #include <fcntl.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <unistd.h>
12 #include <signal.h>
13 #include <limits.h>
14
15 #include <rte_atomic.h>
16 #include <rte_memcpy.h>
17 #include <rte_memory.h>
18 #include <rte_string_fns.h>
19
20 #include "power_acpi_cpufreq.h"
21 #include "power_common.h"
22
23 #ifdef RTE_LIBRTE_POWER_DEBUG
24 #define POWER_DEBUG_TRACE(fmt, args...) do { \
25                 RTE_LOG(ERR, POWER, "%s: " fmt, __func__, ## args); \
26 } while (0)
27 #else
28 #define POWER_DEBUG_TRACE(fmt, args...)
29 #endif
30
31 #define FOPEN_OR_ERR_RET(f, retval) do { \
32                 if ((f) == NULL) { \
33                         RTE_LOG(ERR, POWER, "File not opened\n"); \
34                         return retval; \
35                 } \
36 } while (0)
37
38 #define FOPS_OR_NULL_GOTO(ret, label) do { \
39                 if ((ret) == NULL) { \
40                         RTE_LOG(ERR, POWER, "fgets returns nothing\n"); \
41                         goto label; \
42                 } \
43 } while (0)
44
45 #define FOPS_OR_ERR_GOTO(ret, label) do { \
46                 if ((ret) < 0) { \
47                         RTE_LOG(ERR, POWER, "File operations failed\n"); \
48                         goto label; \
49                 } \
50 } while (0)
51
52 #define STR_SIZE     1024
53 #define POWER_CONVERT_TO_DECIMAL 10
54
55 #define POWER_GOVERNOR_USERSPACE "userspace"
56 #define POWER_SYSFILE_GOVERNOR   \
57                 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor"
58 #define POWER_SYSFILE_AVAIL_FREQ \
59                 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_available_frequencies"
60 #define POWER_SYSFILE_SETSPEED   \
61                 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed"
62 #define POWER_ACPI_DRIVER "acpi-cpufreq"
63
64 /*
65  * MSR related
66  */
67 #define PLATFORM_INFO     0x0CE
68 #define TURBO_RATIO_LIMIT 0x1AD
69 #define IA32_PERF_CTL     0x199
70 #define CORE_TURBO_DISABLE_BIT ((uint64_t)1<<32)
71
72 enum power_state {
73         POWER_IDLE = 0,
74         POWER_ONGOING,
75         POWER_USED,
76         POWER_UNKNOWN
77 };
78
79 /**
80  * Power info per lcore.
81  */
82 struct rte_power_info {
83         unsigned int lcore_id;                   /**< Logical core id */
84         uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */
85         uint32_t nb_freqs;                   /**< number of available freqs */
86         FILE *f;                             /**< FD of scaling_setspeed */
87         char governor_ori[32];               /**< Original governor name */
88         uint32_t curr_idx;                   /**< Freq index in freqs array */
89         volatile uint32_t state;             /**< Power in use state */
90         uint16_t turbo_available;            /**< Turbo Boost available */
91         uint16_t turbo_enable;               /**< Turbo Boost enable/disable */
92 } __rte_cache_aligned;
93
94 static struct rte_power_info lcore_power_info[RTE_MAX_LCORE];
95
96 /**
97  * It is to set specific freq for specific logical core, according to the index
98  * of supported frequencies.
99  */
100 static int
101 set_freq_internal(struct rte_power_info *pi, uint32_t idx)
102 {
103         if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) {
104                 RTE_LOG(ERR, POWER, "Invalid frequency index %u, which "
105                                 "should be less than %u\n", idx, pi->nb_freqs);
106                 return -1;
107         }
108
109         /* Check if it is the same as current */
110         if (idx == pi->curr_idx)
111                 return 0;
112
113         POWER_DEBUG_TRACE("Frequency[%u] %u to be set for lcore %u\n",
114                         idx, pi->freqs[idx], pi->lcore_id);
115         if (fseek(pi->f, 0, SEEK_SET) < 0) {
116                 RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
117                                 "for setting frequency for lcore %u\n", pi->lcore_id);
118                 return -1;
119         }
120         if (fprintf(pi->f, "%u", pi->freqs[idx]) < 0) {
121                 RTE_LOG(ERR, POWER, "Fail to write new frequency for "
122                                 "lcore %u\n", pi->lcore_id);
123                 return -1;
124         }
125         fflush(pi->f);
126         pi->curr_idx = idx;
127
128         return 1;
129 }
130
131 /**
132  * It is to check the current scaling governor by reading sys file, and then
133  * set it into 'userspace' if it is not by writing the sys file. The original
134  * governor will be saved for rolling back.
135  */
136 static int
137 power_set_governor_userspace(struct rte_power_info *pi)
138 {
139         FILE *f;
140         int ret = -1;
141         char buf[BUFSIZ];
142         char fullpath[PATH_MAX];
143         char *s;
144         int val;
145
146         snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
147                         pi->lcore_id);
148         f = fopen(fullpath, "rw+");
149         FOPEN_OR_ERR_RET(f, ret);
150
151         s = fgets(buf, sizeof(buf), f);
152         FOPS_OR_NULL_GOTO(s, out);
153         /* Strip off terminating '\n' */
154         strtok(buf, "\n");
155
156         /* Check if current governor is userspace */
157         if (strncmp(buf, POWER_GOVERNOR_USERSPACE,
158                         sizeof(POWER_GOVERNOR_USERSPACE)) == 0) {
159                 ret = 0;
160                 POWER_DEBUG_TRACE("Power management governor of lcore %u is "
161                                 "already userspace\n", pi->lcore_id);
162                 goto out;
163         }
164         /* Save the original governor */
165         strlcpy(pi->governor_ori, buf, sizeof(pi->governor_ori));
166
167         /* Write 'userspace' to the governor */
168         val = fseek(f, 0, SEEK_SET);
169         FOPS_OR_ERR_GOTO(val, out);
170
171         val = fputs(POWER_GOVERNOR_USERSPACE, f);
172         FOPS_OR_ERR_GOTO(val, out);
173
174         /* We need to flush to see if the fputs succeeds */
175         val = fflush(f);
176         FOPS_OR_ERR_GOTO(val, out);
177
178         ret = 0;
179         RTE_LOG(INFO, POWER, "Power management governor of lcore %u has been "
180                         "set to user space successfully\n", pi->lcore_id);
181 out:
182         fclose(f);
183
184         return ret;
185 }
186
187 /**
188  * It is to get the available frequencies of the specific lcore by reading the
189  * sys file.
190  */
191 static int
192 power_get_available_freqs(struct rte_power_info *pi)
193 {
194         FILE *f;
195         int ret = -1, i, count;
196         char *p;
197         char buf[BUFSIZ];
198         char fullpath[PATH_MAX];
199         char *freqs[RTE_MAX_LCORE_FREQS];
200         char *s;
201
202         snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_AVAIL_FREQ,
203                         pi->lcore_id);
204         f = fopen(fullpath, "r");
205         FOPEN_OR_ERR_RET(f, ret);
206
207         s = fgets(buf, sizeof(buf), f);
208         FOPS_OR_NULL_GOTO(s, out);
209
210         /* Strip the line break if there is */
211         p = strchr(buf, '\n');
212         if (p != NULL)
213                 *p = 0;
214
215         /* Split string into at most RTE_MAX_LCORE_FREQS frequencies */
216         count = rte_strsplit(buf, sizeof(buf), freqs,
217                         RTE_MAX_LCORE_FREQS, ' ');
218         if (count <= 0) {
219                 RTE_LOG(ERR, POWER, "No available frequency in "
220                                 ""POWER_SYSFILE_AVAIL_FREQ"\n", pi->lcore_id);
221                 goto out;
222         }
223         if (count >= RTE_MAX_LCORE_FREQS) {
224                 RTE_LOG(ERR, POWER, "Too many available frequencies : %d\n",
225                                 count);
226                 goto out;
227         }
228
229         /* Store the available frequncies into power context */
230         for (i = 0, pi->nb_freqs = 0; i < count; i++) {
231                 POWER_DEBUG_TRACE("Lcore %u frequency[%d]: %s\n", pi->lcore_id,
232                                 i, freqs[i]);
233                 pi->freqs[pi->nb_freqs++] = strtoul(freqs[i], &p,
234                                 POWER_CONVERT_TO_DECIMAL);
235         }
236
237         if ((pi->freqs[0]-1000) == pi->freqs[1]) {
238                 pi->turbo_available = 1;
239                 pi->turbo_enable = 1;
240                 POWER_DEBUG_TRACE("Lcore %u Can do Turbo Boost\n",
241                                 pi->lcore_id);
242         } else {
243                 pi->turbo_available = 0;
244                 pi->turbo_enable = 0;
245                 POWER_DEBUG_TRACE("Turbo Boost not available on Lcore %u\n",
246                                 pi->lcore_id);
247         }
248
249         ret = 0;
250         POWER_DEBUG_TRACE("%d frequency(s) of lcore %u are available\n",
251                         count, pi->lcore_id);
252 out:
253         fclose(f);
254
255         return ret;
256 }
257
258 /**
259  * It is to fopen the sys file for the future setting the lcore frequency.
260  */
261 static int
262 power_init_for_setting_freq(struct rte_power_info *pi)
263 {
264         FILE *f;
265         char fullpath[PATH_MAX];
266         char buf[BUFSIZ];
267         uint32_t i, freq;
268         char *s;
269
270         snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_SETSPEED,
271                         pi->lcore_id);
272         f = fopen(fullpath, "rw+");
273         FOPEN_OR_ERR_RET(f, -1);
274
275         s = fgets(buf, sizeof(buf), f);
276         FOPS_OR_NULL_GOTO(s, out);
277
278         freq = strtoul(buf, NULL, POWER_CONVERT_TO_DECIMAL);
279         for (i = 0; i < pi->nb_freqs; i++) {
280                 if (freq == pi->freqs[i]) {
281                         pi->curr_idx = i;
282                         pi->f = f;
283                         return 0;
284                 }
285         }
286
287 out:
288         fclose(f);
289
290         return -1;
291 }
292
293 int
294 power_acpi_cpufreq_check_supported(void)
295 {
296         return cpufreq_check_scaling_driver(POWER_ACPI_DRIVER);
297 }
298
299 int
300 power_acpi_cpufreq_init(unsigned int lcore_id)
301 {
302         struct rte_power_info *pi;
303
304         if (lcore_id >= RTE_MAX_LCORE) {
305                 RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
306                                 lcore_id, RTE_MAX_LCORE - 1U);
307                 return -1;
308         }
309
310         pi = &lcore_power_info[lcore_id];
311         if (rte_atomic32_cmpset(&(pi->state), POWER_IDLE, POWER_ONGOING)
312                         == 0) {
313                 RTE_LOG(INFO, POWER, "Power management of lcore %u is "
314                                 "in use\n", lcore_id);
315                 return -1;
316         }
317
318         pi->lcore_id = lcore_id;
319         /* Check and set the governor */
320         if (power_set_governor_userspace(pi) < 0) {
321                 RTE_LOG(ERR, POWER, "Cannot set governor of lcore %u to "
322                                 "userspace\n", lcore_id);
323                 goto fail;
324         }
325
326         /* Get the available frequencies */
327         if (power_get_available_freqs(pi) < 0) {
328                 RTE_LOG(ERR, POWER, "Cannot get available frequencies of "
329                                 "lcore %u\n", lcore_id);
330                 goto fail;
331         }
332
333         /* Init for setting lcore frequency */
334         if (power_init_for_setting_freq(pi) < 0) {
335                 RTE_LOG(ERR, POWER, "Cannot init for setting frequency for "
336                                 "lcore %u\n", lcore_id);
337                 goto fail;
338         }
339
340         /* Set freq to max by default */
341         if (power_acpi_cpufreq_freq_max(lcore_id) < 0) {
342                 RTE_LOG(ERR, POWER, "Cannot set frequency of lcore %u "
343                                 "to max\n", lcore_id);
344                 goto fail;
345         }
346
347         RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
348                         "power management\n", lcore_id);
349         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_USED);
350
351         return 0;
352
353 fail:
354         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
355
356         return -1;
357 }
358
359 /**
360  * It is to check the governor and then set the original governor back if
361  * needed by writing the sys file.
362  */
363 static int
364 power_set_governor_original(struct rte_power_info *pi)
365 {
366         FILE *f;
367         int ret = -1;
368         char buf[BUFSIZ];
369         char fullpath[PATH_MAX];
370         char *s;
371         int val;
372
373         snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
374                         pi->lcore_id);
375         f = fopen(fullpath, "rw+");
376         FOPEN_OR_ERR_RET(f, ret);
377
378         s = fgets(buf, sizeof(buf), f);
379         FOPS_OR_NULL_GOTO(s, out);
380
381         /* Check if the governor to be set is the same as current */
382         if (strncmp(buf, pi->governor_ori, sizeof(pi->governor_ori)) == 0) {
383                 ret = 0;
384                 POWER_DEBUG_TRACE("Power management governor of lcore %u "
385                                 "has already been set to %s\n",
386                                 pi->lcore_id, pi->governor_ori);
387                 goto out;
388         }
389
390         /* Write back the original governor */
391         val = fseek(f, 0, SEEK_SET);
392         FOPS_OR_ERR_GOTO(val, out);
393
394         val = fputs(pi->governor_ori, f);
395         FOPS_OR_ERR_GOTO(val, out);
396
397         ret = 0;
398         RTE_LOG(INFO, POWER, "Power management governor of lcore %u "
399                         "has been set back to %s successfully\n",
400                         pi->lcore_id, pi->governor_ori);
401 out:
402         fclose(f);
403
404         return ret;
405 }
406
407 int
408 power_acpi_cpufreq_exit(unsigned int lcore_id)
409 {
410         struct rte_power_info *pi;
411
412         if (lcore_id >= RTE_MAX_LCORE) {
413                 RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
414                                 lcore_id, RTE_MAX_LCORE - 1U);
415                 return -1;
416         }
417         pi = &lcore_power_info[lcore_id];
418         if (rte_atomic32_cmpset(&(pi->state), POWER_USED, POWER_ONGOING)
419                         == 0) {
420                 RTE_LOG(INFO, POWER, "Power management of lcore %u is "
421                                 "not used\n", lcore_id);
422                 return -1;
423         }
424
425         /* Close FD of setting freq */
426         fclose(pi->f);
427         pi->f = NULL;
428
429         /* Set the governor back to the original */
430         if (power_set_governor_original(pi) < 0) {
431                 RTE_LOG(ERR, POWER, "Cannot set the governor of %u back "
432                                 "to the original\n", lcore_id);
433                 goto fail;
434         }
435
436         RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
437                         "'userspace' mode and been set back to the "
438                         "original\n", lcore_id);
439         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_IDLE);
440
441         return 0;
442
443 fail:
444         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
445
446         return -1;
447 }
448
449 uint32_t
450 power_acpi_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t num)
451 {
452         struct rte_power_info *pi;
453
454         if (lcore_id >= RTE_MAX_LCORE) {
455                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
456                 return 0;
457         }
458
459         if (freqs == NULL) {
460                 RTE_LOG(ERR, POWER, "NULL buffer supplied\n");
461                 return 0;
462         }
463
464         pi = &lcore_power_info[lcore_id];
465         if (num < pi->nb_freqs) {
466                 RTE_LOG(ERR, POWER, "Buffer size is not enough\n");
467                 return 0;
468         }
469         rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t));
470
471         return pi->nb_freqs;
472 }
473
474 uint32_t
475 power_acpi_cpufreq_get_freq(unsigned int lcore_id)
476 {
477         if (lcore_id >= RTE_MAX_LCORE) {
478                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
479                 return RTE_POWER_INVALID_FREQ_INDEX;
480         }
481
482         return lcore_power_info[lcore_id].curr_idx;
483 }
484
485 int
486 power_acpi_cpufreq_set_freq(unsigned int lcore_id, uint32_t index)
487 {
488         if (lcore_id >= RTE_MAX_LCORE) {
489                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
490                 return -1;
491         }
492
493         return set_freq_internal(&(lcore_power_info[lcore_id]), index);
494 }
495
496 int
497 power_acpi_cpufreq_freq_down(unsigned int lcore_id)
498 {
499         struct rte_power_info *pi;
500
501         if (lcore_id >= RTE_MAX_LCORE) {
502                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
503                 return -1;
504         }
505
506         pi = &lcore_power_info[lcore_id];
507         if (pi->curr_idx + 1 == pi->nb_freqs)
508                 return 0;
509
510         /* Frequencies in the array are from high to low. */
511         return set_freq_internal(pi, pi->curr_idx + 1);
512 }
513
514 int
515 power_acpi_cpufreq_freq_up(unsigned int lcore_id)
516 {
517         struct rte_power_info *pi;
518
519         if (lcore_id >= RTE_MAX_LCORE) {
520                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
521                 return -1;
522         }
523
524         pi = &lcore_power_info[lcore_id];
525         if (pi->curr_idx == 0 ||
526             (pi->curr_idx == 1 && pi->turbo_available && !pi->turbo_enable))
527                 return 0;
528
529         /* Frequencies in the array are from high to low. */
530         return set_freq_internal(pi, pi->curr_idx - 1);
531 }
532
533 int
534 power_acpi_cpufreq_freq_max(unsigned int lcore_id)
535 {
536         if (lcore_id >= RTE_MAX_LCORE) {
537                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
538                 return -1;
539         }
540
541         /* Frequencies in the array are from high to low. */
542         if (lcore_power_info[lcore_id].turbo_available) {
543                 if (lcore_power_info[lcore_id].turbo_enable)
544                         /* Set to Turbo */
545                         return set_freq_internal(
546                                         &lcore_power_info[lcore_id], 0);
547                 else
548                         /* Set to max non-turbo */
549                         return set_freq_internal(
550                                         &lcore_power_info[lcore_id], 1);
551         } else
552                 return set_freq_internal(&lcore_power_info[lcore_id], 0);
553 }
554
555 int
556 power_acpi_cpufreq_freq_min(unsigned int lcore_id)
557 {
558         struct rte_power_info *pi;
559
560         if (lcore_id >= RTE_MAX_LCORE) {
561                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
562                 return -1;
563         }
564
565         pi = &lcore_power_info[lcore_id];
566
567         /* Frequencies in the array are from high to low. */
568         return set_freq_internal(pi, pi->nb_freqs - 1);
569 }
570
571
572 int
573 power_acpi_turbo_status(unsigned int lcore_id)
574 {
575         struct rte_power_info *pi;
576
577         if (lcore_id >= RTE_MAX_LCORE) {
578                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
579                 return -1;
580         }
581
582         pi = &lcore_power_info[lcore_id];
583
584         return pi->turbo_enable;
585 }
586
587
588 int
589 power_acpi_enable_turbo(unsigned int lcore_id)
590 {
591         struct rte_power_info *pi;
592
593         if (lcore_id >= RTE_MAX_LCORE) {
594                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
595                 return -1;
596         }
597
598         pi = &lcore_power_info[lcore_id];
599
600         if (pi->turbo_available)
601                 pi->turbo_enable = 1;
602         else {
603                 pi->turbo_enable = 0;
604                 RTE_LOG(ERR, POWER,
605                         "Failed to enable turbo on lcore %u\n",
606                         lcore_id);
607                         return -1;
608         }
609
610         /* Max may have changed, so call to max function */
611         if (power_acpi_cpufreq_freq_max(lcore_id) < 0) {
612                 RTE_LOG(ERR, POWER,
613                         "Failed to set frequency of lcore %u to max\n",
614                         lcore_id);
615                         return -1;
616         }
617
618         return 0;
619 }
620
621 int
622 power_acpi_disable_turbo(unsigned int lcore_id)
623 {
624         struct rte_power_info *pi;
625
626         if (lcore_id >= RTE_MAX_LCORE) {
627                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
628                 return -1;
629         }
630
631         pi = &lcore_power_info[lcore_id];
632
633          pi->turbo_enable = 0;
634
635         if ((pi->turbo_available) && (pi->curr_idx <= 1)) {
636                 /* Try to set freq to max by default coming out of turbo */
637                 if (power_acpi_cpufreq_freq_max(lcore_id) < 0) {
638                         RTE_LOG(ERR, POWER,
639                                 "Failed to set frequency of lcore %u to max\n",
640                                 lcore_id);
641                         return -1;
642                 }
643         }
644
645         return 0;
646 }
647
648 int power_acpi_get_capabilities(unsigned int lcore_id,
649                 struct rte_power_core_capabilities *caps)
650 {
651         struct rte_power_info *pi;
652
653         if (lcore_id >= RTE_MAX_LCORE) {
654                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
655                 return -1;
656         }
657         if (caps == NULL) {
658                 RTE_LOG(ERR, POWER, "Invalid argument\n");
659                 return -1;
660         }
661
662         pi = &lcore_power_info[lcore_id];
663         caps->capabilities = 0;
664         caps->turbo = !!(pi->turbo_available);
665
666         return 0;
667 }