01ac5acb8b52d8ab82e994833e69cad4cbab50b6
[dpdk.git] / lib / librte_power / rte_power_acpi_cpufreq.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdio.h>
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <fcntl.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <signal.h>
42 #include <limits.h>
43
44 #include <rte_memcpy.h>
45 #include <rte_atomic.h>
46
47 #include "rte_power_acpi_cpufreq.h"
48 #include "rte_power_common.h"
49
50 #ifdef RTE_LIBRTE_POWER_DEBUG
51 #define POWER_DEBUG_TRACE(fmt, args...) do { \
52                 RTE_LOG(ERR, POWER, "%s: " fmt, __func__, ## args); \
53 } while (0)
54 #else
55 #define POWER_DEBUG_TRACE(fmt, args...)
56 #endif
57
58 #define FOPEN_OR_ERR_RET(f, retval) do { \
59                 if ((f) == NULL) { \
60                         RTE_LOG(ERR, POWER, "File not openned\n"); \
61                         return retval; \
62                 } \
63 } while (0)
64
65 #define FOPS_OR_NULL_GOTO(ret, label) do { \
66                 if ((ret) == NULL) { \
67                         RTE_LOG(ERR, POWER, "fgets returns nothing\n"); \
68                         goto label; \
69                 } \
70 } while (0)
71
72 #define FOPS_OR_ERR_GOTO(ret, label) do { \
73                 if ((ret) < 0) { \
74                         RTE_LOG(ERR, POWER, "File operations failed\n"); \
75                         goto label; \
76                 } \
77 } while (0)
78
79 #define STR_SIZE     1024
80 #define POWER_CONVERT_TO_DECIMAL 10
81
82 #define POWER_GOVERNOR_USERSPACE "userspace"
83 #define POWER_SYSFILE_GOVERNOR   \
84                 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor"
85 #define POWER_SYSFILE_AVAIL_FREQ \
86                 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_available_frequencies"
87 #define POWER_SYSFILE_SETSPEED   \
88                 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed"
89
90 /*
91  * MSR related
92  */
93 #define PLATFORM_INFO     0x0CE
94 #define TURBO_RATIO_LIMIT 0x1AD
95 #define IA32_PERF_CTL     0x199
96 #define CORE_TURBO_DISABLE_BIT ((uint64_t)1<<32)
97
98 enum power_state {
99         POWER_IDLE = 0,
100         POWER_ONGOING,
101         POWER_USED,
102         POWER_UNKNOWN
103 };
104
105 /**
106  * Power info per lcore.
107  */
108 struct rte_power_info {
109         unsigned lcore_id;                   /**< Logical core id */
110         uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */
111         uint32_t nb_freqs;                   /**< number of available freqs */
112         FILE *f;                             /**< FD of scaling_setspeed */
113         char governor_ori[32];               /**< Original governor name */
114         uint32_t curr_idx;                   /**< Freq index in freqs array */
115         volatile uint32_t state;             /**< Power in use state */
116         uint16_t turbo_available;            /**< Turbo Boost available */
117         uint16_t turbo_enable;               /**< Turbo Boost enable/disable */
118 } __rte_cache_aligned;
119
120 static struct rte_power_info lcore_power_info[RTE_MAX_LCORE];
121
122 /**
123  * It is to set specific freq for specific logical core, according to the index
124  * of supported frequencies.
125  */
126 static int
127 set_freq_internal(struct rte_power_info *pi, uint32_t idx)
128 {
129         if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) {
130                 RTE_LOG(ERR, POWER, "Invalid frequency index %u, which "
131                                 "should be less than %u\n", idx, pi->nb_freqs);
132                 return -1;
133         }
134
135         /* Check if it is the same as current */
136         if (idx == pi->curr_idx)
137                 return 0;
138
139         POWER_DEBUG_TRACE("Freqency[%u] %u to be set for lcore %u\n",
140                         idx, pi->freqs[idx], pi->lcore_id);
141         if (fseek(pi->f, 0, SEEK_SET) < 0) {
142                 RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
143                                 "for setting frequency for lcore %u\n", pi->lcore_id);
144                 return -1;
145         }
146         if (fprintf(pi->f, "%u", pi->freqs[idx]) < 0) {
147                 RTE_LOG(ERR, POWER, "Fail to write new frequency for "
148                                 "lcore %u\n", pi->lcore_id);
149                 return -1;
150         }
151         fflush(pi->f);
152         pi->curr_idx = idx;
153
154         return 1;
155 }
156
157 /**
158  * It is to check the current scaling governor by reading sys file, and then
159  * set it into 'userspace' if it is not by writing the sys file. The original
160  * governor will be saved for rolling back.
161  */
162 static int
163 power_set_governor_userspace(struct rte_power_info *pi)
164 {
165         FILE *f;
166         int ret = -1;
167         char buf[BUFSIZ];
168         char fullpath[PATH_MAX];
169         char *s;
170         int val;
171
172         snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
173                         pi->lcore_id);
174         f = fopen(fullpath, "rw+");
175         FOPEN_OR_ERR_RET(f, ret);
176
177         s = fgets(buf, sizeof(buf), f);
178         FOPS_OR_NULL_GOTO(s, out);
179
180         /* Check if current governor is userspace */
181         if (strncmp(buf, POWER_GOVERNOR_USERSPACE,
182                         sizeof(POWER_GOVERNOR_USERSPACE)) == 0) {
183                 ret = 0;
184                 POWER_DEBUG_TRACE("Power management governor of lcore %u is "
185                                 "already userspace\n", pi->lcore_id);
186                 goto out;
187         }
188         /* Save the original governor */
189         snprintf(pi->governor_ori, sizeof(pi->governor_ori), "%s", buf);
190
191         /* Write 'userspace' to the governor */
192         val = fseek(f, 0, SEEK_SET);
193         FOPS_OR_ERR_GOTO(val, out);
194
195         val = fputs(POWER_GOVERNOR_USERSPACE, f);
196         FOPS_OR_ERR_GOTO(val, out);
197
198         ret = 0;
199         RTE_LOG(INFO, POWER, "Power management governor of lcore %u has been "
200                         "set to user space successfully\n", pi->lcore_id);
201 out:
202         fclose(f);
203
204         return ret;
205 }
206
207 /**
208  * It is to get the available frequencies of the specific lcore by reading the
209  * sys file.
210  */
211 static int
212 power_get_available_freqs(struct rte_power_info *pi)
213 {
214         FILE *f;
215         int ret = -1, i, count;
216         char *p;
217         char buf[BUFSIZ];
218         char fullpath[PATH_MAX];
219         char *freqs[RTE_MAX_LCORE_FREQS];
220         char *s;
221
222         snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_AVAIL_FREQ,
223                         pi->lcore_id);
224         f = fopen(fullpath, "r");
225         FOPEN_OR_ERR_RET(f, ret);
226
227         s = fgets(buf, sizeof(buf), f);
228         FOPS_OR_NULL_GOTO(s, out);
229
230         /* Strip the line break if there is */
231         p = strchr(buf, '\n');
232         if (p != NULL)
233                 *p = 0;
234
235         /* Split string into at most RTE_MAX_LCORE_FREQS frequencies */
236         count = rte_strsplit(buf, sizeof(buf), freqs,
237                         RTE_MAX_LCORE_FREQS, ' ');
238         if (count <= 0) {
239                 RTE_LOG(ERR, POWER, "No available frequency in "
240                                 ""POWER_SYSFILE_AVAIL_FREQ"\n", pi->lcore_id);
241                 goto out;
242         }
243         if (count >= RTE_MAX_LCORE_FREQS) {
244                 RTE_LOG(ERR, POWER, "Too many available frequencies : %d\n",
245                                 count);
246                 goto out;
247         }
248
249         /* Store the available frequncies into power context */
250         for (i = 0, pi->nb_freqs = 0; i < count; i++) {
251                 POWER_DEBUG_TRACE("Lcore %u frequency[%d]: %s\n", pi->lcore_id,
252                                 i, freqs[i]);
253                 pi->freqs[pi->nb_freqs++] = strtoul(freqs[i], &p,
254                                 POWER_CONVERT_TO_DECIMAL);
255         }
256
257         if ((pi->freqs[0]-1000) == pi->freqs[1]) {
258                 pi->turbo_available = 1;
259                 pi->turbo_enable = 1;
260                 POWER_DEBUG_TRACE("Lcore %u Can do Turbo Boost\n",
261                                 pi->lcore_id);
262         } else {
263                 pi->turbo_available = 0;
264                 pi->turbo_enable = 0;
265                 POWER_DEBUG_TRACE("Turbo Boost not available on Lcore %u\n",
266                                 pi->lcore_id);
267         }
268
269         ret = 0;
270         POWER_DEBUG_TRACE("%d frequencie(s) of lcore %u are available\n",
271                         count, pi->lcore_id);
272 out:
273         fclose(f);
274
275         return ret;
276 }
277
278 /**
279  * It is to fopen the sys file for the future setting the lcore frequency.
280  */
281 static int
282 power_init_for_setting_freq(struct rte_power_info *pi)
283 {
284         FILE *f;
285         char fullpath[PATH_MAX];
286         char buf[BUFSIZ];
287         uint32_t i, freq;
288         char *s;
289
290         snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_SETSPEED,
291                         pi->lcore_id);
292         f = fopen(fullpath, "rw+");
293         FOPEN_OR_ERR_RET(f, -1);
294
295         s = fgets(buf, sizeof(buf), f);
296         FOPS_OR_NULL_GOTO(s, out);
297
298         freq = strtoul(buf, NULL, POWER_CONVERT_TO_DECIMAL);
299         for (i = 0; i < pi->nb_freqs; i++) {
300                 if (freq == pi->freqs[i]) {
301                         pi->curr_idx = i;
302                         pi->f = f;
303                         return 0;
304                 }
305         }
306
307 out:
308         fclose(f);
309
310         return -1;
311 }
312
313 int
314 rte_power_acpi_cpufreq_init(unsigned lcore_id)
315 {
316         struct rte_power_info *pi;
317
318         if (lcore_id >= RTE_MAX_LCORE) {
319                 RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
320                                 lcore_id, RTE_MAX_LCORE - 1U);
321                 return -1;
322         }
323
324         pi = &lcore_power_info[lcore_id];
325         if (rte_atomic32_cmpset(&(pi->state), POWER_IDLE, POWER_ONGOING)
326                         == 0) {
327                 RTE_LOG(INFO, POWER, "Power management of lcore %u is "
328                                 "in use\n", lcore_id);
329                 return -1;
330         }
331
332         pi->lcore_id = lcore_id;
333         /* Check and set the governor */
334         if (power_set_governor_userspace(pi) < 0) {
335                 RTE_LOG(ERR, POWER, "Cannot set governor of lcore %u to "
336                                 "userspace\n", lcore_id);
337                 goto fail;
338         }
339
340         /* Get the available frequencies */
341         if (power_get_available_freqs(pi) < 0) {
342                 RTE_LOG(ERR, POWER, "Cannot get available frequencies of "
343                                 "lcore %u\n", lcore_id);
344                 goto fail;
345         }
346
347         /* Init for setting lcore frequency */
348         if (power_init_for_setting_freq(pi) < 0) {
349                 RTE_LOG(ERR, POWER, "Cannot init for setting frequency for "
350                                 "lcore %u\n", lcore_id);
351                 goto fail;
352         }
353
354         /* Set freq to max by default */
355         if (rte_power_acpi_cpufreq_freq_max(lcore_id) < 0) {
356                 RTE_LOG(ERR, POWER, "Cannot set frequency of lcore %u "
357                                 "to max\n", lcore_id);
358                 goto fail;
359         }
360
361         RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
362                         "power manamgement\n", lcore_id);
363         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_USED);
364
365         return 0;
366
367 fail:
368         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
369
370         return -1;
371 }
372
373 /**
374  * It is to check the governor and then set the original governor back if
375  * needed by writing the the sys file.
376  */
377 static int
378 power_set_governor_original(struct rte_power_info *pi)
379 {
380         FILE *f;
381         int ret = -1;
382         char buf[BUFSIZ];
383         char fullpath[PATH_MAX];
384         char *s;
385         int val;
386
387         snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
388                         pi->lcore_id);
389         f = fopen(fullpath, "rw+");
390         FOPEN_OR_ERR_RET(f, ret);
391
392         s = fgets(buf, sizeof(buf), f);
393         FOPS_OR_NULL_GOTO(s, out);
394
395         /* Check if the governor to be set is the same as current */
396         if (strncmp(buf, pi->governor_ori, sizeof(pi->governor_ori)) == 0) {
397                 ret = 0;
398                 POWER_DEBUG_TRACE("Power management governor of lcore %u "
399                                 "has already been set to %s\n",
400                                 pi->lcore_id, pi->governor_ori);
401                 goto out;
402         }
403
404         /* Write back the original governor */
405         val = fseek(f, 0, SEEK_SET);
406         FOPS_OR_ERR_GOTO(val, out);
407
408         val = fputs(pi->governor_ori, f);
409         FOPS_OR_ERR_GOTO(val, out);
410
411         ret = 0;
412         RTE_LOG(INFO, POWER, "Power management governor of lcore %u "
413                         "has been set back to %s successfully\n",
414                         pi->lcore_id, pi->governor_ori);
415 out:
416         fclose(f);
417
418         return ret;
419 }
420
421 int
422 rte_power_acpi_cpufreq_exit(unsigned lcore_id)
423 {
424         struct rte_power_info *pi;
425
426         if (lcore_id >= RTE_MAX_LCORE) {
427                 RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
428                                 lcore_id, RTE_MAX_LCORE - 1U);
429                 return -1;
430         }
431         pi = &lcore_power_info[lcore_id];
432         if (rte_atomic32_cmpset(&(pi->state), POWER_USED, POWER_ONGOING)
433                         == 0) {
434                 RTE_LOG(INFO, POWER, "Power management of lcore %u is "
435                                 "not used\n", lcore_id);
436                 return -1;
437         }
438
439         /* Close FD of setting freq */
440         fclose(pi->f);
441         pi->f = NULL;
442
443         /* Set the governor back to the original */
444         if (power_set_governor_original(pi) < 0) {
445                 RTE_LOG(ERR, POWER, "Cannot set the governor of %u back "
446                                 "to the original\n", lcore_id);
447                 goto fail;
448         }
449
450         RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
451                         "'userspace' mode and been set back to the "
452                         "original\n", lcore_id);
453         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_IDLE);
454
455         return 0;
456
457 fail:
458         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
459
460         return -1;
461 }
462
463 uint32_t
464 rte_power_acpi_cpufreq_freqs(unsigned lcore_id, uint32_t *freqs, uint32_t num)
465 {
466         struct rte_power_info *pi;
467
468         if (lcore_id >= RTE_MAX_LCORE || !freqs) {
469                 RTE_LOG(ERR, POWER, "Invalid input parameter\n");
470                 return 0;
471         }
472
473         pi = &lcore_power_info[lcore_id];
474         if (num < pi->nb_freqs) {
475                 RTE_LOG(ERR, POWER, "Buffer size is not enough\n");
476                 return 0;
477         }
478         rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t));
479
480         return pi->nb_freqs;
481 }
482
483 uint32_t
484 rte_power_acpi_cpufreq_get_freq(unsigned lcore_id)
485 {
486         if (lcore_id >= RTE_MAX_LCORE) {
487                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
488                 return RTE_POWER_INVALID_FREQ_INDEX;
489         }
490
491         return lcore_power_info[lcore_id].curr_idx;
492 }
493
494 int
495 rte_power_acpi_cpufreq_set_freq(unsigned lcore_id, uint32_t index)
496 {
497         if (lcore_id >= RTE_MAX_LCORE) {
498                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
499                 return -1;
500         }
501
502         return set_freq_internal(&(lcore_power_info[lcore_id]), index);
503 }
504
505 int
506 rte_power_acpi_cpufreq_freq_down(unsigned lcore_id)
507 {
508         struct rte_power_info *pi;
509
510         if (lcore_id >= RTE_MAX_LCORE) {
511                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
512                 return -1;
513         }
514
515         pi = &lcore_power_info[lcore_id];
516         if (pi->curr_idx + 1 == pi->nb_freqs)
517                 return 0;
518
519         /* Frequencies in the array are from high to low. */
520         return set_freq_internal(pi, pi->curr_idx + 1);
521 }
522
523 int
524 rte_power_acpi_cpufreq_freq_up(unsigned lcore_id)
525 {
526         struct rte_power_info *pi;
527
528         if (lcore_id >= RTE_MAX_LCORE) {
529                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
530                 return -1;
531         }
532
533         pi = &lcore_power_info[lcore_id];
534         if (pi->curr_idx == 0)
535                 return 0;
536
537         /* Frequencies in the array are from high to low. */
538         return set_freq_internal(pi, pi->curr_idx - 1);
539 }
540
541 int
542 rte_power_acpi_cpufreq_freq_max(unsigned lcore_id)
543 {
544         if (lcore_id >= RTE_MAX_LCORE) {
545                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
546                 return -1;
547         }
548
549         /* Frequencies in the array are from high to low. */
550         if (lcore_power_info[lcore_id].turbo_available) {
551                 if (lcore_power_info[lcore_id].turbo_enable)
552                         /* Set to Turbo */
553                         return set_freq_internal(
554                                         &lcore_power_info[lcore_id], 0);
555                 else
556                         /* Set to max non-turbo */
557                         return set_freq_internal(
558                                         &lcore_power_info[lcore_id], 1);
559         } else
560                 return set_freq_internal(&lcore_power_info[lcore_id], 0);
561 }
562
563 int
564 rte_power_acpi_cpufreq_freq_min(unsigned lcore_id)
565 {
566         struct rte_power_info *pi;
567
568         if (lcore_id >= RTE_MAX_LCORE) {
569                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
570                 return -1;
571         }
572
573         pi = &lcore_power_info[lcore_id];
574
575         /* Frequencies in the array are from high to low. */
576         return set_freq_internal(pi, pi->nb_freqs - 1);
577 }
578
579
580 int
581 rte_power_acpi_turbo_status(unsigned int lcore_id)
582 {
583         struct rte_power_info *pi;
584
585         if (lcore_id >= RTE_MAX_LCORE) {
586                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
587                 return -1;
588         }
589
590         pi = &lcore_power_info[lcore_id];
591
592         return pi->turbo_enable;
593 }
594
595
596 int
597 rte_power_acpi_enable_turbo(unsigned int lcore_id)
598 {
599         struct rte_power_info *pi;
600
601         if (lcore_id >= RTE_MAX_LCORE) {
602                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
603                 return -1;
604         }
605
606         pi = &lcore_power_info[lcore_id];
607
608         if (pi->turbo_available)
609                 pi->turbo_enable = 1;
610         else {
611                 pi->turbo_enable = 0;
612                 RTE_LOG(ERR, POWER,
613                         "Failed to enable turbo on lcore %u\n",
614                         lcore_id);
615                         return -1;
616         }
617
618         /* Max may have changed, so call to max function */
619         if (rte_power_acpi_cpufreq_freq_max(lcore_id) < 0) {
620                 RTE_LOG(ERR, POWER,
621                         "Failed to set frequency of lcore %u to max\n",
622                         lcore_id);
623                         return -1;
624         }
625
626         return 0;
627 }
628
629 int
630 rte_power_acpi_disable_turbo(unsigned int lcore_id)
631 {
632         struct rte_power_info *pi;
633
634         if (lcore_id >= RTE_MAX_LCORE) {
635                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
636                 return -1;
637         }
638
639         pi = &lcore_power_info[lcore_id];
640
641          pi->turbo_enable = 0;
642
643         if ((pi->turbo_available) && (pi->curr_idx <= 1)) {
644                 /* Try to set freq to max by default coming out of turbo */
645                 if (rte_power_acpi_cpufreq_freq_max(lcore_id) < 0) {
646                         RTE_LOG(ERR, POWER,
647                                 "Failed to set frequency of lcore %u to max\n",
648                                 lcore_id);
649                         return -1;
650                 }
651         }
652
653         return 0;
654 }