3c514475f17c8163900ecce7f4fd5b16b875cdfc
[dpdk.git] / examples / vm_power_manager / oob_monitor_x86.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4
5 #include <unistd.h>
6 #include <fcntl.h>
7 #include <rte_log.h>
8
9 #include "oob_monitor.h"
10 #include "power_manager.h"
11 #include "channel_manager.h"
12
13 static volatile unsigned run_loop = 1;
14 static uint64_t g_branches, g_branch_misses;
15 static int g_active;
16
17 void branch_monitor_exit(void)
18 {
19         run_loop = 0;
20 }
21
22 /* Number of microseconds between each poll */
23 #define INTERVAL 100
24 #define PRINT_LOOP_COUNT (1000000/INTERVAL)
25 #define IA32_PERFEVTSEL0 0x186
26 #define IA32_PERFEVTSEL1 0x187
27 #define IA32_PERFCTR0 0xc1
28 #define IA32_PERFCTR1 0xc2
29 #define IA32_PERFEVT_BRANCH_HITS 0x05300c4
30 #define IA32_PERFEVT_BRANCH_MISS 0x05300c5
31
32 static float
33 apply_policy(int core)
34 {
35         struct core_info *ci;
36         uint64_t counter = 0;
37         uint64_t branches, branch_misses;
38         uint64_t last_branches, last_branch_misses;
39         int64_t hits_diff, miss_diff;
40         float ratio;
41         int ret;
42         int freq_window_idx, up_count = 0, i;
43
44         g_active = 0;
45         ci = get_core_info();
46
47         last_branches = ci->cd[core].last_branches;
48         last_branch_misses = ci->cd[core].last_branch_misses;
49
50         ret = pread(ci->cd[core].msr_fd, &counter,
51                         sizeof(counter), IA32_PERFCTR0);
52         if (ret < 0)
53                 RTE_LOG(ERR, POWER_MANAGER,
54                                 "unable to read counter for core %u\n",
55                                 core);
56         branches = counter;
57
58         counter = 0;
59         ret = pread(ci->cd[core].msr_fd, &counter,
60                         sizeof(counter), IA32_PERFCTR1);
61         if (ret < 0)
62                 RTE_LOG(ERR, POWER_MANAGER,
63                                 "unable to read counter for core %u\n",
64                                 core);
65         branch_misses = counter;
66
67
68         ci->cd[core].last_branches = branches;
69         ci->cd[core].last_branch_misses = branch_misses;
70
71         /*
72          * Intentional right shift to make MSB 0 to avoid
73          * possible signed overflow or truncation.
74          */
75         branches >>= 1;
76         last_branches >>= 1;
77         hits_diff = (int64_t)branches - (int64_t)last_branches;
78         if (hits_diff <= 0) {
79                 /* Likely a counter overflow condition, skip this round */
80                 return -1.0;
81         }
82
83         /*
84          * Intentional right shift to make MSB 0 to avoid
85          * possible signed overflow or truncation.
86          */
87         branch_misses >>= 1;
88         last_branch_misses >>= 1;
89         miss_diff = (int64_t)branch_misses - (int64_t)last_branch_misses;
90         if (miss_diff <= 0) {
91                 /* Likely a counter overflow condition, skip this round */
92                 return -1.0;
93         }
94
95         g_branches = hits_diff;
96         g_branch_misses = miss_diff;
97
98         if (hits_diff < (INTERVAL*100)) {
99                 /* Likely no workload running on this core. Skip. */
100                 return -1.0;
101         }
102
103         ratio = (float)miss_diff * (float)100 / (float)hits_diff;
104
105         /*
106          * Store the last few directions that the ratio indicates
107          * we should take. If there's on 'up', then we scale up
108          * quickly. If all indicate 'down', only then do we scale
109          * down. Each core_details struct has it's own array.
110          */
111         freq_window_idx = ci->cd[core].freq_window_idx;
112         if (ratio > ci->cd[core].branch_ratio_threshold)
113                 ci->cd[core].freq_directions[freq_window_idx] = 1;
114         else
115                 ci->cd[core].freq_directions[freq_window_idx] = 0;
116
117         freq_window_idx++;
118         freq_window_idx = freq_window_idx & (FREQ_WINDOW_SIZE-1);
119         ci->cd[core].freq_window_idx = freq_window_idx;
120
121         up_count = 0;
122         for (i = 0; i < FREQ_WINDOW_SIZE; i++)
123                 up_count +=  ci->cd[core].freq_directions[i];
124
125         if (up_count == 0) {
126                 if (ci->cd[core].freq_state != FREQ_MIN) {
127                         power_manager_scale_core_min(core);
128                         ci->cd[core].freq_state = FREQ_MIN;
129                 }
130         } else {
131                 if (ci->cd[core].freq_state != FREQ_MAX) {
132                         power_manager_scale_core_max(core);
133                         ci->cd[core].freq_state = FREQ_MAX;
134                 }
135         }
136
137         g_active = 1;
138         return ratio;
139 }
140
141 int
142 add_core_to_monitor(int core)
143 {
144         struct core_info *ci;
145         char proc_file[UNIX_PATH_MAX];
146         int ret;
147
148         ci = get_core_info();
149
150         if (core < ci->core_count) {
151                 long setup;
152
153                 snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core);
154                 ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC);
155                 if (ci->cd[core].msr_fd < 0) {
156                         RTE_LOG(ERR, POWER_MANAGER,
157                                         "Error opening MSR file for core %d "
158                                         "(is msr kernel module loaded?)\n",
159                                         core);
160                         return -1;
161                 }
162                 /*
163                  * Set up branch counters
164                  */
165                 setup = IA32_PERFEVT_BRANCH_HITS;
166                 ret = pwrite(ci->cd[core].msr_fd, &setup,
167                                 sizeof(setup), IA32_PERFEVTSEL0);
168                 if (ret < 0) {
169                         RTE_LOG(ERR, POWER_MANAGER,
170                                         "unable to set counter for core %u\n",
171                                         core);
172                         return ret;
173                 }
174                 setup = IA32_PERFEVT_BRANCH_MISS;
175                 ret = pwrite(ci->cd[core].msr_fd, &setup,
176                                 sizeof(setup), IA32_PERFEVTSEL1);
177                 if (ret < 0) {
178                         RTE_LOG(ERR, POWER_MANAGER,
179                                         "unable to set counter for core %u\n",
180                                         core);
181                         return ret;
182                 }
183                 /*
184                  * Close the file and re-open as read only so
185                  * as not to hog the resource
186                  */
187                 close(ci->cd[core].msr_fd);
188                 ci->cd[core].msr_fd = open(proc_file, O_RDONLY);
189                 if (ci->cd[core].msr_fd < 0) {
190                         RTE_LOG(ERR, POWER_MANAGER,
191                                         "Error opening MSR file for core %d "
192                                         "(is msr kernel module loaded?)\n",
193                                         core);
194                         return -1;
195                 }
196                 ci->cd[core].oob_enabled = 1;
197         }
198         return 0;
199 }
200
201 int
202 remove_core_from_monitor(int core)
203 {
204         struct core_info *ci;
205         char proc_file[UNIX_PATH_MAX];
206         int ret;
207
208         ci = get_core_info();
209
210         if (ci->cd[core].oob_enabled) {
211                 long setup;
212
213                 /*
214                  * close the msr file, then reopen rw so we can
215                  * disable the counters
216                  */
217                 if (ci->cd[core].msr_fd != 0)
218                         close(ci->cd[core].msr_fd);
219                 snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core);
220                 ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC);
221                 if (ci->cd[core].msr_fd < 0) {
222                         RTE_LOG(ERR, POWER_MANAGER,
223                                         "Error opening MSR file for core %d "
224                                         "(is msr kernel module loaded?)\n",
225                                         core);
226                         return -1;
227                 }
228                 setup = 0x0; /* clear event */
229                 ret = pwrite(ci->cd[core].msr_fd, &setup,
230                                 sizeof(setup), IA32_PERFEVTSEL0);
231                 if (ret < 0) {
232                         RTE_LOG(ERR, POWER_MANAGER,
233                                         "unable to set counter for core %u\n",
234                                         core);
235                         return ret;
236                 }
237                 setup = 0x0; /* clear event */
238                 ret = pwrite(ci->cd[core].msr_fd, &setup,
239                                 sizeof(setup), IA32_PERFEVTSEL1);
240                 if (ret < 0) {
241                         RTE_LOG(ERR, POWER_MANAGER,
242                                         "unable to set counter for core %u\n",
243                                         core);
244                         return ret;
245                 }
246
247                 close(ci->cd[core].msr_fd);
248                 ci->cd[core].msr_fd = 0;
249                 ci->cd[core].oob_enabled = 0;
250         }
251         return 0;
252 }
253
254 int
255 branch_monitor_init(void)
256 {
257         return 0;
258 }
259
260 void
261 run_branch_monitor(void)
262 {
263         struct core_info *ci;
264         int print = 0;
265         float ratio;
266         int printed;
267         int reads = 0;
268
269         ci = get_core_info();
270
271         while (run_loop) {
272
273                 if (!run_loop)
274                         break;
275                 usleep(INTERVAL);
276                 int j;
277                 print++;
278                 printed = 0;
279                 for (j = 0; j < ci->core_count; j++) {
280                         if (ci->cd[j].oob_enabled) {
281                                 ratio = apply_policy(j);
282                                 if ((print > PRINT_LOOP_COUNT) && (g_active)) {
283                                         printf("  %d: %.4f {%lu} {%d}", j,
284                                                         ratio, g_branches,
285                                                         reads);
286                                         printed = 1;
287                                         reads = 0;
288                                 } else {
289                                         reads++;
290                                 }
291                         }
292                 }
293                 if (print > PRINT_LOOP_COUNT) {
294                         if (printed)
295                                 printf("\n");
296                         print = 0;
297                 }
298         }
299 }