update Intel copyright years to 2014
[dpdk.git] / lib / librte_power / rte_power.c
1 /*-
2  *   BSD LICENSE
3  * 
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  * 
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  * 
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  * 
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdio.h>
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <fcntl.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <signal.h>
42 #include <limits.h>
43
44 #include <rte_memcpy.h>
45 #include <rte_atomic.h>
46
47 #include "rte_power.h"
48
49 #ifdef RTE_LIBRTE_POWER_DEBUG
50 #define POWER_DEBUG_TRACE(fmt, args...) do { \
51                 RTE_LOG(ERR, POWER, "%s: " fmt, __func__, ## args); \
52         } while (0) 
53 #else
54 #define POWER_DEBUG_TRACE(fmt, args...)
55 #endif
56
57 #define FOPEN_OR_ERR_RET(f, retval) do { \
58         if ((f) == NULL) { \
59                 RTE_LOG(ERR, POWER, "File not openned\n"); \
60                 return (retval); \
61         } \
62 } while(0)
63
64 #define FOPS_OR_NULL_GOTO(ret, label) do { \
65         if ((ret) == NULL) { \
66                 RTE_LOG(ERR, POWER, "fgets returns nothing\n"); \
67                 goto label; \
68         } \
69 } while(0)
70
71 #define FOPS_OR_ERR_GOTO(ret, label) do { \
72         if ((ret) < 0) { \
73                 RTE_LOG(ERR, POWER, "File operations failed\n"); \
74                 goto label; \
75         } \
76 } while(0)
77
78 #define STR_SIZE     1024
79 #define POWER_CONVERT_TO_DECIMAL 10
80
81 #define POWER_GOVERNOR_USERSPACE "userspace"
82 #define POWER_SYSFILE_GOVERNOR   \
83         "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor"
84 #define POWER_SYSFILE_AVAIL_FREQ \
85         "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_available_frequencies"
86 #define POWER_SYSFILE_SETSPEED   \
87         "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed"
88
89 enum power_state {
90         POWER_IDLE = 0,
91         POWER_ONGOING,
92         POWER_USED,
93         POWER_UNKNOWN
94 };
95
96 /**
97  * Power info per lcore.
98  */
99 struct rte_power_info {
100         unsigned lcore_id;                   /**< Logical core id */
101         uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */
102         uint32_t nb_freqs;                   /**< number of available freqs */
103         FILE *f;                             /**< FD of scaling_setspeed */
104         char governor_ori[32];               /**< Original governor name */
105         uint32_t curr_idx;                   /**< Freq index in freqs array */
106         volatile uint32_t state;             /**< Power in use state */
107 } __rte_cache_aligned;
108
109 static struct rte_power_info lcore_power_info[RTE_MAX_LCORE];
110
111 /**
112  * It is to set specific freq for specific logical core, according to the index
113  * of supported frequencies.
114  */
115 static int
116 set_freq_internal(struct rte_power_info *pi, uint32_t idx)
117 {
118         if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) {
119                 RTE_LOG(ERR, POWER, "Invalid frequency index %u, which "
120                         "should be less than %u\n", idx, pi->nb_freqs);
121                 return -1;
122         }
123
124         /* Check if it is the same as current */
125         if (idx == pi->curr_idx)
126                 return 0;
127
128         POWER_DEBUG_TRACE("Freqency[%u] %u to be set for lcore %u\n",
129                                 idx, pi->freqs[idx], pi->lcore_id);
130         if (fseek(pi->f, 0, SEEK_SET) < 0) {
131                 RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
132                         "for setting frequency for lcore %u\n", pi->lcore_id);
133                 return -1;
134         }
135         if (fprintf(pi->f, "%u", pi->freqs[idx]) < 0) {
136                 RTE_LOG(ERR, POWER, "Fail to write new frequency for "
137                                         "lcore %u\n", pi->lcore_id);
138                 return -1;
139         }
140         fflush(pi->f);
141         pi->curr_idx = idx;
142
143         return 1;
144 }
145
146 /**
147  * It is to check the current scaling governor by reading sys file, and then
148  * set it into 'userspace' if it is not by writing the sys file. The original
149  * governor will be saved for rolling back.
150  */
151 static int
152 power_set_governor_userspace(struct rte_power_info *pi)
153 {
154         FILE *f;
155         int ret = -1;
156         char buf[BUFSIZ];
157         char fullpath[PATH_MAX];
158         char *s;
159         int val;
160
161         rte_snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
162                                                         pi->lcore_id);
163         f = fopen(fullpath, "rw+");
164         FOPEN_OR_ERR_RET(f, ret);
165
166         s = fgets(buf, sizeof(buf), f);
167         FOPS_OR_NULL_GOTO(s, out);
168
169         /* Check if current governor is userspace */
170         if (strncmp(buf, POWER_GOVERNOR_USERSPACE,
171                 sizeof(POWER_GOVERNOR_USERSPACE)) == 0) {
172                 ret = 0;
173                 POWER_DEBUG_TRACE("Power management governor of lcore %u is "
174                                         "already userspace\n", pi->lcore_id);
175                 goto out;
176         }
177         /* Save the original governor */
178         rte_snprintf(pi->governor_ori, sizeof(pi->governor_ori), "%s", buf);
179
180         /* Write 'userspace' to the governor */
181         val = fseek(f, 0, SEEK_SET);
182         FOPS_OR_ERR_GOTO(val, out);
183
184         val = fputs(POWER_GOVERNOR_USERSPACE, f);
185         FOPS_OR_ERR_GOTO(val, out);
186
187         ret = 0;
188         RTE_LOG(INFO, POWER, "Power management governor of lcore %u has been "
189                         "set to user space successfully\n", pi->lcore_id);
190 out:
191         fclose(f);
192
193         return ret;
194 }
195
196 /**
197  * It is to get the available frequencies of the specific lcore by reading the
198  * sys file.
199  */
200 static int
201 power_get_available_freqs(struct rte_power_info *pi)
202 {
203         FILE *f;
204         int ret = -1, i, count;
205         char *p;
206         char buf[BUFSIZ];
207         char fullpath[PATH_MAX];
208         char *freqs[RTE_MAX_LCORE_FREQS];
209         char *s;
210
211         rte_snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_AVAIL_FREQ,
212                                                                 pi->lcore_id);
213         f = fopen(fullpath, "r");
214         FOPEN_OR_ERR_RET(f, ret);
215
216         s = fgets(buf, sizeof(buf), f);
217         FOPS_OR_NULL_GOTO(s, out);
218
219         /* Strip the line break if there is */
220         p = strchr(buf, '\n');
221         if (p != NULL)
222                 *p = 0;
223
224         /* Split string into at most RTE_MAX_LCORE_FREQS frequencies */
225         count = rte_strsplit(buf, sizeof(buf), freqs,
226                                 RTE_MAX_LCORE_FREQS, ' ');
227         if (count <= 0) {
228                 RTE_LOG(ERR, POWER, "No available frequency in "
229                         ""POWER_SYSFILE_AVAIL_FREQ"\n", pi->lcore_id);
230                 goto out;
231         }
232         if (count >= RTE_MAX_LCORE_FREQS) {
233                 RTE_LOG(ERR, POWER, "Too many available frequencies : %d\n",
234                                                                 count);
235                 goto out;
236         }
237
238         /* Store the available frequncies into power context */
239         for (i = 0, pi->nb_freqs = 0; i < count; i++) {
240                 POWER_DEBUG_TRACE("Lcore %u frequency[%d]: %s\n", pi->lcore_id,
241                                                                 i, freqs[i]);
242                 pi->freqs[pi->nb_freqs++] = strtoul(freqs[i], &p,
243                                         POWER_CONVERT_TO_DECIMAL);
244         }
245
246         ret = 0;
247         POWER_DEBUG_TRACE("%d frequencie(s) of lcore %u are available\n",
248                                                 count, pi->lcore_id);
249 out:
250         fclose(f);
251
252         return ret;
253 }
254
255 /**
256  * It is to fopen the sys file for the future setting the lcore frequency.
257  */
258 static int
259 power_init_for_setting_freq(struct rte_power_info *pi)
260 {
261         FILE *f;
262         char fullpath[PATH_MAX];
263         char buf[BUFSIZ];
264         uint32_t i, freq;
265         char *s;
266
267         rte_snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_SETSPEED,
268                                                         pi->lcore_id);
269         f = fopen(fullpath, "rw+");
270         FOPEN_OR_ERR_RET(f, -1);
271
272         s = fgets(buf, sizeof(buf), f);
273         FOPS_OR_NULL_GOTO(s, out);
274
275         freq = strtoul(buf, NULL, POWER_CONVERT_TO_DECIMAL);
276         for (i = 0; i < pi->nb_freqs; i++) {
277                 if (freq == pi->freqs[i]) {
278                         pi->curr_idx = i;
279                         pi->f = f;
280                         return 0;
281                 }
282         }
283
284 out:
285         fclose(f);
286
287         return -1;
288 }
289
290 int
291 rte_power_init(unsigned lcore_id)
292 {
293         struct rte_power_info *pi;
294
295         if (lcore_id >= RTE_MAX_LCORE) {
296                 RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
297                                         lcore_id, RTE_MAX_LCORE - 1U);
298                 return -1;
299         }
300
301         pi = &lcore_power_info[lcore_id];
302         if (rte_atomic32_cmpset(&(pi->state), POWER_IDLE, POWER_ONGOING)
303                                                                 == 0) {
304                 RTE_LOG(INFO, POWER, "Power management of lcore %u is "
305                                                 "in use\n", lcore_id);
306                 return -1;
307         }
308
309         pi->lcore_id = lcore_id;
310         /* Check and set the governor */
311         if (power_set_governor_userspace(pi) < 0) {
312                 RTE_LOG(ERR, POWER, "Cannot set governor of lcore %u to "
313                                                 "userspace\n", lcore_id);
314                 goto fail;
315         }
316
317         /* Get the available frequencies */
318         if (power_get_available_freqs(pi) < 0) {
319                 RTE_LOG(ERR, POWER, "Cannot get available frequencies of "
320                                                 "lcore %u\n", lcore_id);
321                 goto fail;
322         }
323
324         /* Init for setting lcore frequency */
325         if (power_init_for_setting_freq(pi) < 0) {
326                 RTE_LOG(ERR, POWER, "Cannot init for setting frequency for "
327                                                 "lcore %u\n", lcore_id);
328                 goto fail;
329         }
330
331         /* Set freq to max by default */
332         if (rte_power_freq_max(lcore_id) < 0) {
333                 RTE_LOG(ERR, POWER, "Cannot set frequency of lcore %u "
334                                                 "to max\n", lcore_id);
335                 goto fail;
336         }
337
338         RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
339                                         "power manamgement\n", lcore_id);
340         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_USED);
341
342         return 0;
343
344 fail:
345         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
346
347         return -1;
348 }
349
350 /**
351  * It is to check the governor and then set the original governor back if
352  * needed by writing the the sys file.
353  */
354 static int
355 power_set_governor_original(struct rte_power_info *pi)
356 {
357         FILE *f;
358         int ret = -1;
359         char buf[BUFSIZ];
360         char fullpath[PATH_MAX];
361         char *s;
362         int val;
363
364         rte_snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
365                                                         pi->lcore_id);
366         f = fopen(fullpath, "rw+");
367         FOPEN_OR_ERR_RET(f, ret);
368
369         s = fgets(buf, sizeof(buf), f);
370         FOPS_OR_NULL_GOTO(s, out);
371
372         /* Check if the governor to be set is the same as current */
373         if (strncmp(buf, pi->governor_ori, sizeof(pi->governor_ori)) == 0) {
374                 ret = 0;
375                 POWER_DEBUG_TRACE("Power management governor of lcore %u "
376                                         "has already been set to %s\n",
377                                         pi->lcore_id, pi->governor_ori);
378                 goto out;
379         }
380
381         /* Write back the original governor */
382         val = fseek(f, 0, SEEK_SET);
383         FOPS_OR_ERR_GOTO(val, out);
384
385         val = fputs(pi->governor_ori, f);
386         FOPS_OR_ERR_GOTO(val, out);
387
388         ret = 0;
389         RTE_LOG(INFO, POWER, "Power manamgement governor of lcore %u "
390                                 "has been set back to %s successfully\n",
391                                         pi->lcore_id, pi->governor_ori);
392 out:
393         fclose(f);
394
395         return ret;
396 }
397
398 int
399 rte_power_exit(unsigned lcore_id)
400 {
401         struct rte_power_info *pi;
402
403         if (lcore_id >= RTE_MAX_LCORE) {
404                 RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
405                                         lcore_id, RTE_MAX_LCORE - 1U);
406                 return -1;
407         }
408         pi = &lcore_power_info[lcore_id];
409         if (rte_atomic32_cmpset(&(pi->state), POWER_USED, POWER_ONGOING)
410                                                                 == 0) {
411                 RTE_LOG(INFO, POWER, "Power management of lcore %u is "
412                                                 "not used\n", lcore_id);
413                 return -1;
414         }
415
416         /* Close FD of setting freq */
417         fclose(pi->f);
418         pi->f = NULL;
419
420         /* Set the governor back to the original */
421         if (power_set_governor_original(pi) < 0) {
422                 RTE_LOG(ERR, POWER, "Cannot set the governor of %u back "
423                                         "to the original\n", lcore_id);
424                 goto fail;
425         }
426
427         RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
428                                 "'userspace' mode and been set back to the "
429                                                 "original\n", lcore_id);
430         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_IDLE);
431
432         return 0;
433
434 fail:
435         rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
436
437         return -1;
438 }
439  
440 uint32_t
441 rte_power_freqs(unsigned lcore_id, uint32_t *freqs, uint32_t num)
442 {
443         struct rte_power_info *pi;
444
445         if (lcore_id >= RTE_MAX_LCORE || !freqs) {
446                 RTE_LOG(ERR, POWER, "Invalid input parameter\n");
447                 return 0;
448         }
449
450         pi = &lcore_power_info[lcore_id];
451         if (num < pi->nb_freqs) {
452                 RTE_LOG(ERR, POWER, "Buffer size is not enough\n");
453                 return 0;
454         }
455         rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t));
456
457         return pi->nb_freqs;
458 }
459  
460 uint32_t
461 rte_power_get_freq(unsigned lcore_id)
462 {
463         if (lcore_id >= RTE_MAX_LCORE) {
464                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
465                 return RTE_POWER_INVALID_FREQ_INDEX;
466         }
467
468         return lcore_power_info[lcore_id].curr_idx;
469 }
470
471 int
472 rte_power_set_freq(unsigned lcore_id, uint32_t index)
473 {
474         if (lcore_id >= RTE_MAX_LCORE) {
475                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
476                 return -1;
477         }
478
479         return set_freq_internal(&(lcore_power_info[lcore_id]), index);
480 }
481  
482 int
483 rte_power_freq_down(unsigned lcore_id)
484 {
485         struct rte_power_info *pi;
486
487         if (lcore_id >= RTE_MAX_LCORE) {
488                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
489                 return -1;
490         }
491
492         pi = &lcore_power_info[lcore_id];
493         if (pi->curr_idx + 1 == pi->nb_freqs)
494                 return 0;
495
496         /* Frequencies in the array are from high to low. */
497         return set_freq_internal(pi, pi->curr_idx + 1);
498 }
499  
500 int
501 rte_power_freq_up(unsigned lcore_id)
502 {
503         struct rte_power_info *pi;
504
505         if (lcore_id >= RTE_MAX_LCORE) {
506                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
507                 return -1;
508         }
509
510         pi = &lcore_power_info[lcore_id];
511         if (pi->curr_idx == 0)
512                 return 0;
513
514         /* Frequencies in the array are from high to low. */
515         return set_freq_internal(pi, pi->curr_idx - 1);
516 }
517  
518 int
519 rte_power_freq_max(unsigned lcore_id)
520 {
521         if (lcore_id >= RTE_MAX_LCORE) {
522                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
523                 return -1;
524         }
525
526         /* Frequencies in the array are from high to low. */
527         return set_freq_internal(&lcore_power_info[lcore_id], 0);
528 }
529  
530 int
531 rte_power_freq_min(unsigned lcore_id)
532 {
533         struct rte_power_info *pi;
534
535         if (lcore_id >= RTE_MAX_LCORE) {
536                 RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
537                 return -1;
538         }
539
540         pi = &lcore_power_info[lcore_id];
541
542         /* Frequencies in the array are from high to low. */
543         return set_freq_internal(pi, pi->nb_freqs - 1);
544 }
545