8370b96fcf7692df0b6e5dfc03edbd9eefd2c947
[dpdk.git] / lib / librte_power / rte_power.c
1 /*-
2  *   BSD LICENSE
3  * 
4  *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  * 
7  *   Redistribution and use in source and binary forms, with or without 
8  *   modification, are permitted provided that the following conditions 
9  *   are met:
10  * 
11  *     * Redistributions of source code must retain the above copyright 
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright 
14  *       notice, this list of conditions and the following disclaimer in 
15  *       the documentation and/or other materials provided with the 
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its 
18  *       contributors may be used to endorse or promote products derived 
19  *       from this software without specific prior written permission.
20  * 
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  * 
33  */
34
35 #include <stdio.h>
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <fcntl.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <signal.h>
43 #include <limits.h>
44
45 #include <rte_memcpy.h>
46 #include <rte_atomic.h>
47
48 #include "rte_power.h"
49
50 #ifdef RTE_LIBRTE_POWER_DEBUG
51 #define POWER_DEBUG_TRACE(fmt, args...) do { \
52                 RTE_LOG(ERR, POWER, "%s: " fmt, __func__, ## args); \
53         } while (0) 
54 #else
55 #define POWER_DEBUG_TRACE(fmt, args...)
56 #endif
57
58 #define FOPEN_OR_ERR_RET(f, retval) do { \
59         if ((f) == NULL) { \
60                 RTE_LOG(ERR, POWER, "File not openned\n"); \
61                 return (retval); \
62         } \
63 } while(0)
64
65 #define FOPS_OR_NULL_GOTO(ret, label) do { \
66         if ((ret) == NULL) { \
67                 RTE_LOG(ERR, POWER, "fgets returns nothing\n"); \
68                 goto label; \
69         } \
70 } while(0)
71
72 #define FOPS_OR_ERR_GOTO(ret, label) do { \
73         if ((ret) < 0) { \
74                 RTE_LOG(ERR, POWER, "File operations failed\n"); \
75                 goto label; \
76         } \
77 } while(0)
78
79 #define STR_SIZE     1024
80 #define POWER_CONVERT_TO_DECIMAL 10
81
82 #define POWER_GOVERNOR_USERSPACE "userspace"
83 #define POWER_SYSFILE_GOVERNOR   \
84         "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor"
85 #define POWER_SYSFILE_AVAIL_FREQ \
86         "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_available_frequencies"
87 #define POWER_SYSFILE_SETSPEED   \
88         "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed"
89
90 enum power_state {
91         POWER_IDLE = 0,
92         POWER_ONGOING,
93         POWER_USED,
94         POWER_UNKNOWN
95 };
96
97 /**
98  * Power info per lcore.
99  */
100 struct rte_power_info {
101         unsigned lcore_id;                   /**< Logical core id */
102         uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */
103         uint32_t nb_freqs;                   /**< number of available freqs */
104         FILE *f;                             /**< FD of scaling_setspeed */
105         char governor_ori[32];               /**< Original governor name */
106         uint32_t curr_idx;                   /**< Freq index in freqs array */
107         volatile uint32_t state;             /**< Power in use state */
108 } __rte_cache_aligned;
109
110 static struct rte_power_info lcore_power_info[RTE_MAX_LCORE];
111
112 /**
113  * It is to set specific freq for specific logical core, according to the index
114  * of supported frequencies.
115  */
116 static int
117 set_freq_internal(struct rte_power_info *pi, uint32_t idx)
118 {
119         if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) {
120                 RTE_LOG(ERR, POWER, "Invalid frequency index %u, which "
121                         "should be less than %u\n", idx, pi->nb_freqs);
122                 return -1;
123         }
124
125         /* Check if it is the same as current */
126         if (idx == pi->curr_idx)
127                 return 0;
128
129         POWER_DEBUG_TRACE("Freqency[%u] %u to be set for lcore %u\n",
130                                 idx, pi->freqs[idx], pi->lcore_id);
131         if (fseek(pi->f, 0, SEEK_SET) < 0) {
132                 RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
133                         "for setting frequency for lcore %u\n", pi->lcore_id);
134                 return -1;
135         }
136         if (fprintf(pi->f, "%u", pi->freqs[idx]) < 0) {
137                 RTE_LOG(ERR, POWER, "Fail to write new frequency for "
138                                         "lcore %u\n", pi->lcore_id);
139                 return -1;
140         }
141         fflush(pi->f);
142         pi->curr_idx = idx;
143
144         return 1;
145 }
146
147 /**
148  * It is to check the current scaling governor by reading sys file, and then
149  * set it into 'userspace' if it is not by writing the sys file. The original
150  * governor will be saved for rolling back.
151  */
152 static int
153 power_set_governor_userspace(struct rte_power_info *pi)
154 {
155         FILE *f;
156         int ret = -1;
157         char buf[BUFSIZ];
158         char fullpath[PATH_MAX];
159         char *s;
160         int val;
161
162         rte_snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
163                                                         pi->lcore_id);
164         f = fopen(fullpath, "rw+");
165         FOPEN_OR_ERR_RET(f, ret);
166
167         s = fgets(buf, sizeof(buf), f);
168         FOPS_OR_NULL_GOTO(s, out);
169
170         /* Check if current governor is userspace */
171         if (strncmp(buf, POWER_GOVERNOR_USERSPACE,
172                 sizeof(POWER_GOVERNOR_USERSPACE)) == 0) {
173                 ret = 0;
174                 POWER_DEBUG_TRACE("Power management governor of lcore %u is "
175                                         "already userspace\n", pi->lcore_id);
176                 goto out;
177         }
178         /* Save the original governor */
179         rte_snprintf(pi->governor_ori, sizeof(pi->governor_ori), "%s", buf);
180
181         /* Write 'userspace' to the governor */
182         val = fseek(f, 0, SEEK_SET);
183         FOPS_OR_ERR_GOTO(val, out);
184
185         val = fputs(POWER_GOVERNOR_USERSPACE, f);
186         FOPS_OR_ERR_GOTO(val, out);
187
188         ret = 0;
189         RTE_LOG(INFO, POWER, "Power management governor of lcore %u has been "
190                         "set to user space successfully\n", pi->lcore_id);
191 out:
192         fclose(f);
193
194         return ret;
195 }
196
197 /**
198  * It is to get the available frequencies of the specific lcore by reading the
199  * sys file.
200  */
201 static int
202 power_get_available_freqs(struct rte_power_info *pi)
203 {
204         FILE *f;
205         int ret = -1, i, count;
206         char *p;
207         char buf[BUFSIZ];
208         char fullpath[PATH_MAX];
209         char *freqs[RTE_MAX_LCORE_FREQS];
210         char *s;
211
212         rte_snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_AVAIL_FREQ,
213                                                                 pi->lcore_id);
214         f = fopen(fullpath, "r");
215         FOPEN_OR_ERR_RET(f, ret);
216
217         s = fgets(buf, sizeof(buf), f);
218         FOPS_OR_NULL_GOTO(s, out);
219
220         /* Strip the line break if there is */
221         p = strchr(buf, '\n');
222         if (p != NULL)
223                 *p = 0;
224
225         /* Split string into at most RTE_MAX_LCORE_FREQS frequencies */
226         count = rte_strsplit(buf, sizeof(buf), freqs,
227                                 RTE_MAX_LCORE_FREQS, ' ');
228         if (count <= 0) {
229                 RTE_LOG(ERR, POWER, "No available frequency in "
230                         ""POWER_SYSFILE_AVAIL_FREQ"\n", pi->lcore_id);
231                 goto out;
232         }
233         if (count >= RTE_MAX_LCORE_FREQS) {
234                 RTE_LOG(ERR, POWER, "Too many available frequencies : %d\n",
235                                                                 count);
236                 goto out;
237         }
238
239         /* Store the available frequncies into power context */
240         for (i = 0, pi->nb_freqs = 0; i < count; i++) {
241                 POWER_DEBUG_TRACE("Lcore %u frequency[%d]: %s\n", pi->lcore_id,
242                                                                 i, freqs[i]);
243                 pi->freqs[pi->nb_freqs++] = strtoul(freqs[i], &p,
244                                         POWER_CONVERT_TO_DECIMAL);
245         }
246
247         ret = 0;
248         POWER_DEBUG_TRACE("%d frequencie(s) of lcore %u are available\n",
249                                                 count, pi->lcore_id);
250 out:
251         fclose(f);
252
253         return ret;
254 }
255
256 /**
257  * It is to fopen the sys file for the future setting the lcore frequency.
258  */
259 static int
260 power_init_for_setting_freq(struct rte_power_info *pi)
261 {
262         FILE *f;
263         char fullpath[PATH_MAX];
264         char buf[BUFSIZ];
265         uint32_t i, freq;
266         char *s;
267
268         rte_snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_SETSPEED,
269                                                         pi->lcore_id);
270         f = fopen(fullpath, "rw+");
271         FOPEN_OR_ERR_RET(f, -1);
272
273         s = fgets(buf, sizeof(buf), f);
274         FOPS_OR_NULL_GOTO(s, out);
275
276         freq = strtoul(buf, NULL, POWER_CONVERT_TO_DECIMAL);
277         for (i = 0; i < pi->nb_freqs; i++) {
278                 if (freq == pi->freqs[i]) {
279                         pi->curr_idx = i;
280                         pi->f = f;
281                         return 0;
282                 }
283         }
284
285 out:
286         fclose(f);
287
288         return -1;
289 }
290
291 int
292 rte_power_init(unsigned lcore_id)
293 {
294         struct rte_power_info *pi;
295
296         if (lcore_id >= RTE_MAX_LCORE) {
297                 RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
298                                         lcore_id, RTE_MAX_LCORE - 1U);
299                 return -1;
300         }
301
302         pi = &lcore_power_info[lcore_id];
303         if (rte_atomic32_cmpset(&(pi->state), POWER_IDLE, POWER_ONGOING)
304                                                                 == 0) {
305                 RTE_LOG(INFO, POWER, "Power management of lcore %u is "
306                                                 "in use\n", lcore_id);
307                 return -1;
308         }
309
310         pi->lcore_id = lcore_id;
311         /* Check and set the governor */
312         if (power_set_governor_userspace(pi) < 0) {
313                 RTE_LOG(ERR, POWER, "Cannot set governor of lcore %u to "
314                                                 "userspace\n", lcore_id);
315                 goto fail;
316         }
317
318         /* Get the available frequencies */
319         if (power_get_available_freqs(pi) < 0) {
320                 RTE_LOG(ERR, POWER, "Cannot get available frequencies of "
321                                                 "lcore %u\n", lcore_id);
322                 goto fail;
323         }
324
325         /* Init for setting lcore frequency */
326         if (power_init_for_setting_freq(pi) < 0) {
327                 RTE_LOG(ERR, POWER, "Cannot init for setting frequency for "
328                                                 "lcore %u\n", lcore_id);
329                 goto fail;
330         }
331
332         /* Set freq to max by default */
333         if (rte_power_freq_max(lcore_id) < 0) {
334                 RTE_LOG(ERR, POWER, "Cannot set frequency of lcore %u "
335                                                 "to max\n", lcore_id);
336                 goto fail;
337         }
338
339         RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
340                                         "power manamgement\n", lcore_id);
341         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_USED);
342
343         return 0;
344
345 fail:
346         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
347
348         return -1;
349 }
350
351 /**
352  * It is to check the governor and then set the original governor back if
353  * needed by writing the the sys file.
354  */
355 static int
356 power_set_governor_original(struct rte_power_info *pi)
357 {
358         FILE *f;
359         int ret = -1;
360         char buf[BUFSIZ];
361         char fullpath[PATH_MAX];
362         char *s;
363         int val;
364
365         rte_snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
366                                                         pi->lcore_id);
367         f = fopen(fullpath, "rw+");
368         FOPEN_OR_ERR_RET(f, ret);
369
370         s = fgets(buf, sizeof(buf), f);
371         FOPS_OR_NULL_GOTO(s, out);
372
373         /* Check if the governor to be set is the same as current */
374         if (strncmp(buf, pi->governor_ori, sizeof(pi->governor_ori)) == 0) {
375                 ret = 0;
376                 POWER_DEBUG_TRACE("Power management governor of lcore %u "
377                                         "has already been set to %s\n",
378                                         pi->lcore_id, pi->governor_ori);
379                 goto out;
380         }
381
382         /* Write back the original governor */
383         val = fseek(f, 0, SEEK_SET);
384         FOPS_OR_ERR_GOTO(val, out);
385
386         val = fputs(pi->governor_ori, f);
387         FOPS_OR_ERR_GOTO(val, out);
388
389         ret = 0;
390         RTE_LOG(INFO, POWER, "Power manamgement governor of lcore %u "
391                                 "has been set back to %s successfully\n",
392                                         pi->lcore_id, pi->governor_ori);
393 out:
394         fclose(f);
395
396         return ret;
397 }
398
399 int
400 rte_power_exit(unsigned lcore_id)
401 {
402         struct rte_power_info *pi;
403
404         if (lcore_id >= RTE_MAX_LCORE) {
405                 RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
406                                         lcore_id, RTE_MAX_LCORE - 1U);
407                 return -1;
408         }
409         pi = &lcore_power_info[lcore_id];
410         if (rte_atomic32_cmpset(&(pi->state), POWER_USED, POWER_ONGOING)
411                                                                 == 0) {
412                 RTE_LOG(INFO, POWER, "Power management of lcore %u is "
413                                                 "not used\n", lcore_id);
414                 return -1;
415         }
416
417         /* Close FD of setting freq */
418         fclose(pi->f);
419         pi->f = NULL;
420
421         /* Set the governor back to the original */
422         if (power_set_governor_original(pi) < 0) {
423                 RTE_LOG(ERR, POWER, "Cannot set the governor of %u back "
424                                         "to the original\n", lcore_id);
425                 goto fail;
426         }
427
428         RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
429                                 "'userspace' mode and been set back to the "
430                                                 "original\n", lcore_id);
431         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_IDLE);
432
433         return 0;
434
435 fail:
436         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
437
438         return -1;
439 }
440  
441 uint32_t
442 rte_power_freqs(unsigned lcore_id, uint32_t *freqs, uint32_t num)
443 {
444         struct rte_power_info *pi;
445
446         if (lcore_id >= RTE_MAX_LCORE || !freqs) {
447                 RTE_LOG(ERR, POWER, "Invalid input parameter\n");
448                 return 0;
449         }
450
451         pi = &lcore_power_info[lcore_id];
452         if (num < pi->nb_freqs) {
453                 RTE_LOG(ERR, POWER, "Buffer size is not enough\n");
454                 return 0;
455         }
456         rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t));
457
458         return pi->nb_freqs;
459 }
460  
461 uint32_t
462 rte_power_get_freq(unsigned lcore_id)
463 {
464         if (lcore_id >= RTE_MAX_LCORE) {
465                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
466                 return RTE_POWER_INVALID_FREQ_INDEX;
467         }
468
469         return lcore_power_info[lcore_id].curr_idx;
470 }
471
472 int
473 rte_power_set_freq(unsigned lcore_id, uint32_t index)
474 {
475         if (lcore_id >= RTE_MAX_LCORE) {
476                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
477                 return -1;
478         }
479
480         return set_freq_internal(&(lcore_power_info[lcore_id]), index);
481 }
482  
483 int
484 rte_power_freq_down(unsigned lcore_id)
485 {
486         struct rte_power_info *pi;
487
488         if (lcore_id >= RTE_MAX_LCORE) {
489                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
490                 return -1;
491         }
492
493         pi = &lcore_power_info[lcore_id];
494         if (pi->curr_idx + 1 == pi->nb_freqs)
495                 return 0;
496
497         /* Frequencies in the array are from high to low. */
498         return set_freq_internal(pi, pi->curr_idx + 1);
499 }
500  
501 int
502 rte_power_freq_up(unsigned lcore_id)
503 {
504         struct rte_power_info *pi;
505
506         if (lcore_id >= RTE_MAX_LCORE) {
507                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
508                 return -1;
509         }
510
511         pi = &lcore_power_info[lcore_id];
512         if (pi->curr_idx == 0)
513                 return 0;
514
515         /* Frequencies in the array are from high to low. */
516         return set_freq_internal(pi, pi->curr_idx - 1);
517 }
518  
519 int
520 rte_power_freq_max(unsigned lcore_id)
521 {
522         if (lcore_id >= RTE_MAX_LCORE) {
523                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
524                 return -1;
525         }
526
527         /* Frequencies in the array are from high to low. */
528         return set_freq_internal(&lcore_power_info[lcore_id], 0);
529 }
530  
531 int
532 rte_power_freq_min(unsigned lcore_id)
533 {
534         struct rte_power_info *pi;
535
536         if (lcore_id >= RTE_MAX_LCORE) {
537                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
538                 return -1;
539         }
540
541         pi = &lcore_power_info[lcore_id];
542
543         /* Frequencies in the array are from high to low. */
544         return set_freq_internal(pi, pi->nb_freqs - 1);
545 }
546