net/bnxt: fix ping with MTU change
[dpdk.git] / lib / librte_power / power_acpi_cpufreq.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <sys/types.h>
7 #include <sys/stat.h>
8 #include <fcntl.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <unistd.h>
12 #include <signal.h>
13 #include <limits.h>
14
15 #include <rte_atomic.h>
16 #include <rte_memcpy.h>
17 #include <rte_memory.h>
18 #include <rte_string_fns.h>
19
20 #include "power_acpi_cpufreq.h"
21 #include "power_common.h"
22
23 #ifdef RTE_LIBRTE_POWER_DEBUG
24 #define POWER_DEBUG_TRACE(fmt, args...) do { \
25                 RTE_LOG(ERR, POWER, "%s: " fmt, __func__, ## args); \
26 } while (0)
27 #else
28 #define POWER_DEBUG_TRACE(fmt, args...)
29 #endif
30
31 #define FOPEN_OR_ERR_RET(f, retval) do { \
32                 if ((f) == NULL) { \
33                         RTE_LOG(ERR, POWER, "File not openned\n"); \
34                         return retval; \
35                 } \
36 } while (0)
37
38 #define FOPS_OR_NULL_GOTO(ret, label) do { \
39                 if ((ret) == NULL) { \
40                         RTE_LOG(ERR, POWER, "fgets returns nothing\n"); \
41                         goto label; \
42                 } \
43 } while (0)
44
45 #define FOPS_OR_ERR_GOTO(ret, label) do { \
46                 if ((ret) < 0) { \
47                         RTE_LOG(ERR, POWER, "File operations failed\n"); \
48                         goto label; \
49                 } \
50 } while (0)
51
52 #define STR_SIZE     1024
53 #define POWER_CONVERT_TO_DECIMAL 10
54
55 #define POWER_GOVERNOR_USERSPACE "userspace"
56 #define POWER_SYSFILE_GOVERNOR   \
57                 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor"
58 #define POWER_SYSFILE_AVAIL_FREQ \
59                 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_available_frequencies"
60 #define POWER_SYSFILE_SETSPEED   \
61                 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed"
62
63 /*
64  * MSR related
65  */
66 #define PLATFORM_INFO     0x0CE
67 #define TURBO_RATIO_LIMIT 0x1AD
68 #define IA32_PERF_CTL     0x199
69 #define CORE_TURBO_DISABLE_BIT ((uint64_t)1<<32)
70
71 enum power_state {
72         POWER_IDLE = 0,
73         POWER_ONGOING,
74         POWER_USED,
75         POWER_UNKNOWN
76 };
77
78 /**
79  * Power info per lcore.
80  */
81 struct rte_power_info {
82         unsigned int lcore_id;                   /**< Logical core id */
83         uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */
84         uint32_t nb_freqs;                   /**< number of available freqs */
85         FILE *f;                             /**< FD of scaling_setspeed */
86         char governor_ori[32];               /**< Original governor name */
87         uint32_t curr_idx;                   /**< Freq index in freqs array */
88         volatile uint32_t state;             /**< Power in use state */
89         uint16_t turbo_available;            /**< Turbo Boost available */
90         uint16_t turbo_enable;               /**< Turbo Boost enable/disable */
91 } __rte_cache_aligned;
92
93 static struct rte_power_info lcore_power_info[RTE_MAX_LCORE];
94
95 /**
96  * It is to set specific freq for specific logical core, according to the index
97  * of supported frequencies.
98  */
99 static int
100 set_freq_internal(struct rte_power_info *pi, uint32_t idx)
101 {
102         if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) {
103                 RTE_LOG(ERR, POWER, "Invalid frequency index %u, which "
104                                 "should be less than %u\n", idx, pi->nb_freqs);
105                 return -1;
106         }
107
108         /* Check if it is the same as current */
109         if (idx == pi->curr_idx)
110                 return 0;
111
112         POWER_DEBUG_TRACE("Freqency[%u] %u to be set for lcore %u\n",
113                         idx, pi->freqs[idx], pi->lcore_id);
114         if (fseek(pi->f, 0, SEEK_SET) < 0) {
115                 RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
116                                 "for setting frequency for lcore %u\n", pi->lcore_id);
117                 return -1;
118         }
119         if (fprintf(pi->f, "%u", pi->freqs[idx]) < 0) {
120                 RTE_LOG(ERR, POWER, "Fail to write new frequency for "
121                                 "lcore %u\n", pi->lcore_id);
122                 return -1;
123         }
124         fflush(pi->f);
125         pi->curr_idx = idx;
126
127         return 1;
128 }
129
130 /**
131  * It is to check the current scaling governor by reading sys file, and then
132  * set it into 'userspace' if it is not by writing the sys file. The original
133  * governor will be saved for rolling back.
134  */
135 static int
136 power_set_governor_userspace(struct rte_power_info *pi)
137 {
138         FILE *f;
139         int ret = -1;
140         char buf[BUFSIZ];
141         char fullpath[PATH_MAX];
142         char *s;
143         int val;
144
145         snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
146                         pi->lcore_id);
147         f = fopen(fullpath, "rw+");
148         FOPEN_OR_ERR_RET(f, ret);
149
150         s = fgets(buf, sizeof(buf), f);
151         FOPS_OR_NULL_GOTO(s, out);
152         /* Strip off terminating '\n' */
153         strtok(buf, "\n");
154
155         /* Check if current governor is userspace */
156         if (strncmp(buf, POWER_GOVERNOR_USERSPACE,
157                         sizeof(POWER_GOVERNOR_USERSPACE)) == 0) {
158                 ret = 0;
159                 POWER_DEBUG_TRACE("Power management governor of lcore %u is "
160                                 "already userspace\n", pi->lcore_id);
161                 goto out;
162         }
163         /* Save the original governor */
164         strlcpy(pi->governor_ori, buf, sizeof(pi->governor_ori));
165
166         /* Write 'userspace' to the governor */
167         val = fseek(f, 0, SEEK_SET);
168         FOPS_OR_ERR_GOTO(val, out);
169
170         val = fputs(POWER_GOVERNOR_USERSPACE, f);
171         FOPS_OR_ERR_GOTO(val, out);
172
173         /* We need to flush to see if the fputs succeeds */
174         val = fflush(f);
175         FOPS_OR_ERR_GOTO(val, out);
176
177         ret = 0;
178         RTE_LOG(INFO, POWER, "Power management governor of lcore %u has been "
179                         "set to user space successfully\n", pi->lcore_id);
180 out:
181         fclose(f);
182
183         return ret;
184 }
185
186 /**
187  * It is to get the available frequencies of the specific lcore by reading the
188  * sys file.
189  */
190 static int
191 power_get_available_freqs(struct rte_power_info *pi)
192 {
193         FILE *f;
194         int ret = -1, i, count;
195         char *p;
196         char buf[BUFSIZ];
197         char fullpath[PATH_MAX];
198         char *freqs[RTE_MAX_LCORE_FREQS];
199         char *s;
200
201         snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_AVAIL_FREQ,
202                         pi->lcore_id);
203         f = fopen(fullpath, "r");
204         FOPEN_OR_ERR_RET(f, ret);
205
206         s = fgets(buf, sizeof(buf), f);
207         FOPS_OR_NULL_GOTO(s, out);
208
209         /* Strip the line break if there is */
210         p = strchr(buf, '\n');
211         if (p != NULL)
212                 *p = 0;
213
214         /* Split string into at most RTE_MAX_LCORE_FREQS frequencies */
215         count = rte_strsplit(buf, sizeof(buf), freqs,
216                         RTE_MAX_LCORE_FREQS, ' ');
217         if (count <= 0) {
218                 RTE_LOG(ERR, POWER, "No available frequency in "
219                                 ""POWER_SYSFILE_AVAIL_FREQ"\n", pi->lcore_id);
220                 goto out;
221         }
222         if (count >= RTE_MAX_LCORE_FREQS) {
223                 RTE_LOG(ERR, POWER, "Too many available frequencies : %d\n",
224                                 count);
225                 goto out;
226         }
227
228         /* Store the available frequncies into power context */
229         for (i = 0, pi->nb_freqs = 0; i < count; i++) {
230                 POWER_DEBUG_TRACE("Lcore %u frequency[%d]: %s\n", pi->lcore_id,
231                                 i, freqs[i]);
232                 pi->freqs[pi->nb_freqs++] = strtoul(freqs[i], &p,
233                                 POWER_CONVERT_TO_DECIMAL);
234         }
235
236         if ((pi->freqs[0]-1000) == pi->freqs[1]) {
237                 pi->turbo_available = 1;
238                 pi->turbo_enable = 1;
239                 POWER_DEBUG_TRACE("Lcore %u Can do Turbo Boost\n",
240                                 pi->lcore_id);
241         } else {
242                 pi->turbo_available = 0;
243                 pi->turbo_enable = 0;
244                 POWER_DEBUG_TRACE("Turbo Boost not available on Lcore %u\n",
245                                 pi->lcore_id);
246         }
247
248         ret = 0;
249         POWER_DEBUG_TRACE("%d frequency(s) of lcore %u are available\n",
250                         count, pi->lcore_id);
251 out:
252         fclose(f);
253
254         return ret;
255 }
256
257 /**
258  * It is to fopen the sys file for the future setting the lcore frequency.
259  */
260 static int
261 power_init_for_setting_freq(struct rte_power_info *pi)
262 {
263         FILE *f;
264         char fullpath[PATH_MAX];
265         char buf[BUFSIZ];
266         uint32_t i, freq;
267         char *s;
268
269         snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_SETSPEED,
270                         pi->lcore_id);
271         f = fopen(fullpath, "rw+");
272         FOPEN_OR_ERR_RET(f, -1);
273
274         s = fgets(buf, sizeof(buf), f);
275         FOPS_OR_NULL_GOTO(s, out);
276
277         freq = strtoul(buf, NULL, POWER_CONVERT_TO_DECIMAL);
278         for (i = 0; i < pi->nb_freqs; i++) {
279                 if (freq == pi->freqs[i]) {
280                         pi->curr_idx = i;
281                         pi->f = f;
282                         return 0;
283                 }
284         }
285
286 out:
287         fclose(f);
288
289         return -1;
290 }
291
292 int
293 power_acpi_cpufreq_init(unsigned int lcore_id)
294 {
295         struct rte_power_info *pi;
296
297         if (lcore_id >= RTE_MAX_LCORE) {
298                 RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
299                                 lcore_id, RTE_MAX_LCORE - 1U);
300                 return -1;
301         }
302
303         pi = &lcore_power_info[lcore_id];
304         if (rte_atomic32_cmpset(&(pi->state), POWER_IDLE, POWER_ONGOING)
305                         == 0) {
306                 RTE_LOG(INFO, POWER, "Power management of lcore %u is "
307                                 "in use\n", lcore_id);
308                 return -1;
309         }
310
311         pi->lcore_id = lcore_id;
312         /* Check and set the governor */
313         if (power_set_governor_userspace(pi) < 0) {
314                 RTE_LOG(ERR, POWER, "Cannot set governor of lcore %u to "
315                                 "userspace\n", lcore_id);
316                 goto fail;
317         }
318
319         /* Get the available frequencies */
320         if (power_get_available_freqs(pi) < 0) {
321                 RTE_LOG(ERR, POWER, "Cannot get available frequencies of "
322                                 "lcore %u\n", lcore_id);
323                 goto fail;
324         }
325
326         /* Init for setting lcore frequency */
327         if (power_init_for_setting_freq(pi) < 0) {
328                 RTE_LOG(ERR, POWER, "Cannot init for setting frequency for "
329                                 "lcore %u\n", lcore_id);
330                 goto fail;
331         }
332
333         /* Set freq to max by default */
334         if (power_acpi_cpufreq_freq_max(lcore_id) < 0) {
335                 RTE_LOG(ERR, POWER, "Cannot set frequency of lcore %u "
336                                 "to max\n", lcore_id);
337                 goto fail;
338         }
339
340         RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
341                         "power management\n", lcore_id);
342         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_USED);
343
344         return 0;
345
346 fail:
347         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
348
349         return -1;
350 }
351
352 /**
353  * It is to check the governor and then set the original governor back if
354  * needed by writing the sys file.
355  */
356 static int
357 power_set_governor_original(struct rte_power_info *pi)
358 {
359         FILE *f;
360         int ret = -1;
361         char buf[BUFSIZ];
362         char fullpath[PATH_MAX];
363         char *s;
364         int val;
365
366         snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
367                         pi->lcore_id);
368         f = fopen(fullpath, "rw+");
369         FOPEN_OR_ERR_RET(f, ret);
370
371         s = fgets(buf, sizeof(buf), f);
372         FOPS_OR_NULL_GOTO(s, out);
373
374         /* Check if the governor to be set is the same as current */
375         if (strncmp(buf, pi->governor_ori, sizeof(pi->governor_ori)) == 0) {
376                 ret = 0;
377                 POWER_DEBUG_TRACE("Power management governor of lcore %u "
378                                 "has already been set to %s\n",
379                                 pi->lcore_id, pi->governor_ori);
380                 goto out;
381         }
382
383         /* Write back the original governor */
384         val = fseek(f, 0, SEEK_SET);
385         FOPS_OR_ERR_GOTO(val, out);
386
387         val = fputs(pi->governor_ori, f);
388         FOPS_OR_ERR_GOTO(val, out);
389
390         ret = 0;
391         RTE_LOG(INFO, POWER, "Power management governor of lcore %u "
392                         "has been set back to %s successfully\n",
393                         pi->lcore_id, pi->governor_ori);
394 out:
395         fclose(f);
396
397         return ret;
398 }
399
400 int
401 power_acpi_cpufreq_exit(unsigned int lcore_id)
402 {
403         struct rte_power_info *pi;
404
405         if (lcore_id >= RTE_MAX_LCORE) {
406                 RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
407                                 lcore_id, RTE_MAX_LCORE - 1U);
408                 return -1;
409         }
410         pi = &lcore_power_info[lcore_id];
411         if (rte_atomic32_cmpset(&(pi->state), POWER_USED, POWER_ONGOING)
412                         == 0) {
413                 RTE_LOG(INFO, POWER, "Power management of lcore %u is "
414                                 "not used\n", lcore_id);
415                 return -1;
416         }
417
418         /* Close FD of setting freq */
419         fclose(pi->f);
420         pi->f = NULL;
421
422         /* Set the governor back to the original */
423         if (power_set_governor_original(pi) < 0) {
424                 RTE_LOG(ERR, POWER, "Cannot set the governor of %u back "
425                                 "to the original\n", lcore_id);
426                 goto fail;
427         }
428
429         RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
430                         "'userspace' mode and been set back to the "
431                         "original\n", lcore_id);
432         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_IDLE);
433
434         return 0;
435
436 fail:
437         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
438
439         return -1;
440 }
441
442 uint32_t
443 power_acpi_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t num)
444 {
445         struct rte_power_info *pi;
446
447         if (lcore_id >= RTE_MAX_LCORE) {
448                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
449                 return 0;
450         }
451
452         if (freqs == NULL) {
453                 RTE_LOG(ERR, POWER, "NULL buffer supplied\n");
454                 return 0;
455         }
456
457         pi = &lcore_power_info[lcore_id];
458         if (num < pi->nb_freqs) {
459                 RTE_LOG(ERR, POWER, "Buffer size is not enough\n");
460                 return 0;
461         }
462         rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t));
463
464         return pi->nb_freqs;
465 }
466
467 uint32_t
468 power_acpi_cpufreq_get_freq(unsigned int lcore_id)
469 {
470         if (lcore_id >= RTE_MAX_LCORE) {
471                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
472                 return RTE_POWER_INVALID_FREQ_INDEX;
473         }
474
475         return lcore_power_info[lcore_id].curr_idx;
476 }
477
478 int
479 power_acpi_cpufreq_set_freq(unsigned int lcore_id, uint32_t index)
480 {
481         if (lcore_id >= RTE_MAX_LCORE) {
482                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
483                 return -1;
484         }
485
486         return set_freq_internal(&(lcore_power_info[lcore_id]), index);
487 }
488
489 int
490 power_acpi_cpufreq_freq_down(unsigned int lcore_id)
491 {
492         struct rte_power_info *pi;
493
494         if (lcore_id >= RTE_MAX_LCORE) {
495                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
496                 return -1;
497         }
498
499         pi = &lcore_power_info[lcore_id];
500         if (pi->curr_idx + 1 == pi->nb_freqs)
501                 return 0;
502
503         /* Frequencies in the array are from high to low. */
504         return set_freq_internal(pi, pi->curr_idx + 1);
505 }
506
507 int
508 power_acpi_cpufreq_freq_up(unsigned int lcore_id)
509 {
510         struct rte_power_info *pi;
511
512         if (lcore_id >= RTE_MAX_LCORE) {
513                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
514                 return -1;
515         }
516
517         pi = &lcore_power_info[lcore_id];
518         if (pi->curr_idx == 0)
519                 return 0;
520
521         /* Frequencies in the array are from high to low. */
522         return set_freq_internal(pi, pi->curr_idx - 1);
523 }
524
525 int
526 power_acpi_cpufreq_freq_max(unsigned int lcore_id)
527 {
528         if (lcore_id >= RTE_MAX_LCORE) {
529                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
530                 return -1;
531         }
532
533         /* Frequencies in the array are from high to low. */
534         if (lcore_power_info[lcore_id].turbo_available) {
535                 if (lcore_power_info[lcore_id].turbo_enable)
536                         /* Set to Turbo */
537                         return set_freq_internal(
538                                         &lcore_power_info[lcore_id], 0);
539                 else
540                         /* Set to max non-turbo */
541                         return set_freq_internal(
542                                         &lcore_power_info[lcore_id], 1);
543         } else
544                 return set_freq_internal(&lcore_power_info[lcore_id], 0);
545 }
546
547 int
548 power_acpi_cpufreq_freq_min(unsigned int lcore_id)
549 {
550         struct rte_power_info *pi;
551
552         if (lcore_id >= RTE_MAX_LCORE) {
553                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
554                 return -1;
555         }
556
557         pi = &lcore_power_info[lcore_id];
558
559         /* Frequencies in the array are from high to low. */
560         return set_freq_internal(pi, pi->nb_freqs - 1);
561 }
562
563
564 int
565 power_acpi_turbo_status(unsigned int lcore_id)
566 {
567         struct rte_power_info *pi;
568
569         if (lcore_id >= RTE_MAX_LCORE) {
570                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
571                 return -1;
572         }
573
574         pi = &lcore_power_info[lcore_id];
575
576         return pi->turbo_enable;
577 }
578
579
580 int
581 power_acpi_enable_turbo(unsigned int lcore_id)
582 {
583         struct rte_power_info *pi;
584
585         if (lcore_id >= RTE_MAX_LCORE) {
586                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
587                 return -1;
588         }
589
590         pi = &lcore_power_info[lcore_id];
591
592         if (pi->turbo_available)
593                 pi->turbo_enable = 1;
594         else {
595                 pi->turbo_enable = 0;
596                 RTE_LOG(ERR, POWER,
597                         "Failed to enable turbo on lcore %u\n",
598                         lcore_id);
599                         return -1;
600         }
601
602         /* Max may have changed, so call to max function */
603         if (power_acpi_cpufreq_freq_max(lcore_id) < 0) {
604                 RTE_LOG(ERR, POWER,
605                         "Failed to set frequency of lcore %u to max\n",
606                         lcore_id);
607                         return -1;
608         }
609
610         return 0;
611 }
612
613 int
614 power_acpi_disable_turbo(unsigned int lcore_id)
615 {
616         struct rte_power_info *pi;
617
618         if (lcore_id >= RTE_MAX_LCORE) {
619                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
620                 return -1;
621         }
622
623         pi = &lcore_power_info[lcore_id];
624
625          pi->turbo_enable = 0;
626
627         if ((pi->turbo_available) && (pi->curr_idx <= 1)) {
628                 /* Try to set freq to max by default coming out of turbo */
629                 if (power_acpi_cpufreq_freq_max(lcore_id) < 0) {
630                         RTE_LOG(ERR, POWER,
631                                 "Failed to set frequency of lcore %u to max\n",
632                                 lcore_id);
633                         return -1;
634                 }
635         }
636
637         return 0;
638 }
639
640 int power_acpi_get_capabilities(unsigned int lcore_id,
641                 struct rte_power_core_capabilities *caps)
642 {
643         struct rte_power_info *pi;
644
645         if (lcore_id >= RTE_MAX_LCORE) {
646                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
647                 return -1;
648         }
649         if (caps == NULL) {
650                 RTE_LOG(ERR, POWER, "Invalid argument\n");
651                 return -1;
652         }
653
654         pi = &lcore_power_info[lcore_id];
655         caps->capabilities = 0;
656         caps->turbo = !!(pi->turbo_available);
657
658         return 0;
659 }