examples/vm_power_mgr: add policy to channels
[dpdk.git] / examples / vm_power_manager / channel_monitor.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <unistd.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <stdint.h>
38 #include <signal.h>
39 #include <errno.h>
40 #include <string.h>
41 #include <sys/types.h>
42 #include <sys/epoll.h>
43 #include <sys/queue.h>
44 #include <sys/time.h>
45
46 #include <rte_log.h>
47 #include <rte_memory.h>
48 #include <rte_malloc.h>
49 #include <rte_atomic.h>
50 #include <rte_cycles.h>
51 #include <rte_ethdev.h>
52 #include <rte_pmd_i40e.h>
53
54 #include <libvirt/libvirt.h>
55 #include "channel_monitor.h"
56 #include "channel_commands.h"
57 #include "channel_manager.h"
58 #include "power_manager.h"
59
60 #define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1
61
62 #define MAX_EVENTS 256
63
64 uint64_t vsi_pkt_count_prev[384];
65 uint64_t rdtsc_prev[384];
66
67 double time_period_s = 1;
68 static volatile unsigned run_loop = 1;
69 static int global_event_fd;
70 static unsigned int policy_is_set;
71 static struct epoll_event *global_events_list;
72 static struct policy policies[MAX_VMS];
73
74 void channel_monitor_exit(void)
75 {
76         run_loop = 0;
77         rte_free(global_events_list);
78 }
79
80 static void
81 core_share(int pNo, int z, int x, int t)
82 {
83         if (policies[pNo].core_share[z].pcpu == lvm_info[x].pcpus[t]) {
84                 if (strcmp(policies[pNo].pkt.vm_name,
85                                 lvm_info[x].vm_name) != 0) {
86                         policies[pNo].core_share[z].status = 1;
87                         power_manager_scale_core_max(
88                                         policies[pNo].core_share[z].pcpu);
89                 }
90         }
91 }
92
93 static void
94 core_share_status(int pNo)
95 {
96
97         int noVms, noVcpus, z, x, t;
98
99         get_all_vm(&noVms, &noVcpus);
100
101         /* Reset Core Share Status. */
102         for (z = 0; z < noVcpus; z++)
103                 policies[pNo].core_share[z].status = 0;
104
105         /* Foreach vcpu in a policy. */
106         for (z = 0; z < policies[pNo].pkt.num_vcpu; z++) {
107                 /* Foreach VM on the platform. */
108                 for (x = 0; x < noVms; x++) {
109                         /* Foreach vcpu of VMs on platform. */
110                         for (t = 0; t < lvm_info[x].num_cpus; t++)
111                                 core_share(pNo, z, x, t);
112                 }
113         }
114 }
115
116 static void
117 get_pcpu_to_control(struct policy *pol)
118 {
119
120         /* Convert vcpu to pcpu. */
121         struct vm_info info;
122         int pcpu, count;
123         uint64_t mask_u64b;
124
125         RTE_LOG(INFO, CHANNEL_MONITOR, "Looking for pcpu for %s\n",
126                         pol->pkt.vm_name);
127         get_info_vm(pol->pkt.vm_name, &info);
128
129         for (count = 0; count < pol->pkt.num_vcpu; count++) {
130                 mask_u64b = info.pcpu_mask[pol->pkt.vcpu_to_control[count]];
131                 for (pcpu = 0; mask_u64b; mask_u64b &= ~(1ULL << pcpu++)) {
132                         if ((mask_u64b >> pcpu) & 1)
133                                 pol->core_share[count].pcpu = pcpu;
134                 }
135         }
136 }
137
138 static int
139 get_pfid(struct policy *pol)
140 {
141
142         int i, x, ret = 0, nb_ports;
143
144         nb_ports = rte_eth_dev_count();
145         for (i = 0; i < pol->pkt.nb_mac_to_monitor; i++) {
146
147                 for (x = 0; x < nb_ports; x++) {
148                         ret = rte_pmd_i40e_query_vfid_by_mac(x,
149                                 (struct ether_addr *)&(pol->pkt.vfid[i]));
150                         if (ret != -EINVAL) {
151                                 pol->port[i] = x;
152                                 break;
153                         }
154                 }
155                 if (ret == -EINVAL || ret == -ENOTSUP || ret == ENODEV) {
156                         RTE_LOG(INFO, CHANNEL_MONITOR,
157                                 "Error with Policy. MAC not found on "
158                                 "attached ports ");
159                         pol->enabled = 0;
160                         return ret;
161                 }
162                 pol->pfid[i] = ret;
163         }
164         return 1;
165 }
166
167 static int
168 update_policy(struct channel_packet *pkt)
169 {
170
171         unsigned int updated = 0;
172
173         for (int i = 0; i < MAX_VMS; i++) {
174                 if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
175                         policies[i].pkt = *pkt;
176                         get_pcpu_to_control(&policies[i]);
177                         if (get_pfid(&policies[i]) == -1) {
178                                 updated = 1;
179                                 break;
180                         }
181                         core_share_status(i);
182                         policies[i].enabled = 1;
183                         updated = 1;
184                 }
185         }
186         if (!updated) {
187                 for (int i = 0; i < MAX_VMS; i++) {
188                         if (policies[i].enabled == 0) {
189                                 policies[i].pkt = *pkt;
190                                 get_pcpu_to_control(&policies[i]);
191                                 if (get_pfid(&policies[i]) == -1)
192                                         break;
193                                 core_share_status(i);
194                                 policies[i].enabled = 1;
195                                 break;
196                         }
197                 }
198         }
199         return 0;
200 }
201
202 static uint64_t
203 get_pkt_diff(struct policy *pol)
204 {
205
206         uint64_t vsi_pkt_count,
207                 vsi_pkt_total = 0,
208                 vsi_pkt_count_prev_total = 0;
209         double rdtsc_curr, rdtsc_diff, diff;
210         int x;
211         struct rte_eth_stats vf_stats;
212
213         for (x = 0; x < pol->pkt.nb_mac_to_monitor; x++) {
214
215                 /*Read vsi stats*/
216                 if (rte_pmd_i40e_get_vf_stats(x, pol->pfid[x], &vf_stats) == 0)
217                         vsi_pkt_count = vf_stats.ipackets;
218                 else
219                         vsi_pkt_count = -1;
220
221                 vsi_pkt_total += vsi_pkt_count;
222
223                 vsi_pkt_count_prev_total += vsi_pkt_count_prev[pol->pfid[x]];
224                 vsi_pkt_count_prev[pol->pfid[x]] = vsi_pkt_count;
225         }
226
227         rdtsc_curr = rte_rdtsc_precise();
228         rdtsc_diff = rdtsc_curr - rdtsc_prev[pol->pfid[x-1]];
229         rdtsc_prev[pol->pfid[x-1]] = rdtsc_curr;
230
231         diff = (vsi_pkt_total - vsi_pkt_count_prev_total) *
232                         ((double)rte_get_tsc_hz() / rdtsc_diff);
233
234         return diff;
235 }
236
237 static void
238 apply_traffic_profile(struct policy *pol)
239 {
240
241         int count;
242         uint64_t diff = 0;
243
244         diff = get_pkt_diff(pol);
245
246         RTE_LOG(INFO, CHANNEL_MONITOR, "Applying traffic profile\n");
247
248         if (diff >= (pol->pkt.traffic_policy.max_max_packet_thresh)) {
249                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
250                         if (pol->core_share[count].status != 1)
251                                 power_manager_scale_core_max(
252                                                 pol->core_share[count].pcpu);
253                 }
254         } else if (diff >= (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
255                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
256                         if (pol->core_share[count].status != 1)
257                                 power_manager_scale_core_med(
258                                                 pol->core_share[count].pcpu);
259                 }
260         } else if (diff < (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
261                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
262                         if (pol->core_share[count].status != 1)
263                                 power_manager_scale_core_min(
264                                                 pol->core_share[count].pcpu);
265                 }
266         }
267 }
268
269 static void
270 apply_time_profile(struct policy *pol)
271 {
272
273         int count, x;
274         struct timeval tv;
275         struct tm *ptm;
276         char time_string[40];
277
278         /* Obtain the time of day, and convert it to a tm struct. */
279         gettimeofday(&tv, NULL);
280         ptm = localtime(&tv.tv_sec);
281         /* Format the date and time, down to a single second. */
282         strftime(time_string, sizeof(time_string), "%Y-%m-%d %H:%M:%S", ptm);
283
284         for (x = 0; x < HOURS; x++) {
285
286                 if (ptm->tm_hour == pol->pkt.timer_policy.busy_hours[x]) {
287                         for (count = 0; count < pol->pkt.num_vcpu; count++) {
288                                 if (pol->core_share[count].status != 1) {
289                                         power_manager_scale_core_max(
290                                                 pol->core_share[count].pcpu);
291                                 RTE_LOG(INFO, CHANNEL_MONITOR,
292                                         "Scaling up core %d to max\n",
293                                         pol->core_share[count].pcpu);
294                                 }
295                         }
296                         break;
297                 } else if (ptm->tm_hour ==
298                                 pol->pkt.timer_policy.quiet_hours[x]) {
299                         for (count = 0; count < pol->pkt.num_vcpu; count++) {
300                                 if (pol->core_share[count].status != 1) {
301                                         power_manager_scale_core_min(
302                                                 pol->core_share[count].pcpu);
303                                 RTE_LOG(INFO, CHANNEL_MONITOR,
304                                         "Scaling down core %d to min\n",
305                                         pol->core_share[count].pcpu);
306                         }
307                 }
308                         break;
309                 } else if (ptm->tm_hour ==
310                         pol->pkt.timer_policy.hours_to_use_traffic_profile[x]) {
311                         apply_traffic_profile(pol);
312                         break;
313                 }
314         }
315 }
316
317 static void
318 apply_workload_profile(struct policy *pol)
319 {
320
321         int count;
322
323         if (pol->pkt.workload == HIGH) {
324                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
325                         if (pol->core_share[count].status != 1)
326                                 power_manager_scale_core_max(
327                                                 pol->core_share[count].pcpu);
328                 }
329         } else if (pol->pkt.workload == MEDIUM) {
330                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
331                         if (pol->core_share[count].status != 1)
332                                 power_manager_scale_core_med(
333                                                 pol->core_share[count].pcpu);
334                 }
335         } else if (pol->pkt.workload == LOW) {
336                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
337                         if (pol->core_share[count].status != 1)
338                                 power_manager_scale_core_min(
339                                                 pol->core_share[count].pcpu);
340                 }
341         }
342 }
343
344 static void
345 apply_policy(struct policy *pol)
346 {
347
348         struct channel_packet *pkt = &pol->pkt;
349
350         /*Check policy to use*/
351         if (pkt->policy_to_use == TRAFFIC)
352                 apply_traffic_profile(pol);
353         else if (pkt->policy_to_use == TIME)
354                 apply_time_profile(pol);
355         else if (pkt->policy_to_use == WORKLOAD)
356                 apply_workload_profile(pol);
357 }
358
359
360 static int
361 process_request(struct channel_packet *pkt, struct channel_info *chan_info)
362 {
363         uint64_t core_mask;
364
365         if (chan_info == NULL)
366                 return -1;
367
368         if (rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_CONNECTED,
369                         CHANNEL_MGR_CHANNEL_PROCESSING) == 0)
370                 return -1;
371
372         if (pkt->command == CPU_POWER) {
373                 core_mask = get_pcpus_mask(chan_info, pkt->resource_id);
374                 if (core_mask == 0) {
375                         RTE_LOG(ERR, CHANNEL_MONITOR, "Error get physical CPU mask for "
376                                 "channel '%s' using vCPU(%u)\n", chan_info->channel_path,
377                                 (unsigned)pkt->unit);
378                         return -1;
379                 }
380                 if (__builtin_popcountll(core_mask) == 1) {
381
382                         unsigned core_num = __builtin_ffsll(core_mask) - 1;
383
384                         switch (pkt->unit) {
385                         case(CPU_POWER_SCALE_MIN):
386                                         power_manager_scale_core_min(core_num);
387                         break;
388                         case(CPU_POWER_SCALE_MAX):
389                                         power_manager_scale_core_max(core_num);
390                         break;
391                         case(CPU_POWER_SCALE_DOWN):
392                                         power_manager_scale_core_down(core_num);
393                         break;
394                         case(CPU_POWER_SCALE_UP):
395                                         power_manager_scale_core_up(core_num);
396                         break;
397                         case(CPU_POWER_ENABLE_TURBO):
398                                 power_manager_enable_turbo_core(core_num);
399                         break;
400                         case(CPU_POWER_DISABLE_TURBO):
401                                 power_manager_disable_turbo_core(core_num);
402                         break;
403                         default:
404                                 break;
405                         }
406                 } else {
407                         switch (pkt->unit) {
408                         case(CPU_POWER_SCALE_MIN):
409                                         power_manager_scale_mask_min(core_mask);
410                         break;
411                         case(CPU_POWER_SCALE_MAX):
412                                         power_manager_scale_mask_max(core_mask);
413                         break;
414                         case(CPU_POWER_SCALE_DOWN):
415                                         power_manager_scale_mask_down(core_mask);
416                         break;
417                         case(CPU_POWER_SCALE_UP):
418                                         power_manager_scale_mask_up(core_mask);
419                         break;
420                         case(CPU_POWER_ENABLE_TURBO):
421                                 power_manager_enable_turbo_mask(core_mask);
422                         break;
423                         case(CPU_POWER_DISABLE_TURBO):
424                                 power_manager_disable_turbo_mask(core_mask);
425                         break;
426                         default:
427                                 break;
428                         }
429
430                 }
431         }
432
433         if (pkt->command == PKT_POLICY) {
434                 RTE_LOG(INFO, CHANNEL_MONITOR, "\nProcessing Policy request from Guest\n");
435                 update_policy(pkt);
436                 policy_is_set = 1;
437         }
438
439         /* Return is not checked as channel status may have been set to DISABLED
440          * from management thread
441          */
442         rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING,
443                         CHANNEL_MGR_CHANNEL_CONNECTED);
444         return 0;
445
446 }
447
448 int
449 add_channel_to_monitor(struct channel_info **chan_info)
450 {
451         struct channel_info *info = *chan_info;
452         struct epoll_event event;
453
454         event.events = EPOLLIN;
455         event.data.ptr = info;
456         if (epoll_ctl(global_event_fd, EPOLL_CTL_ADD, info->fd, &event) < 0) {
457                 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to add channel '%s' "
458                                 "to epoll\n", info->channel_path);
459                 return -1;
460         }
461         return 0;
462 }
463
464 int
465 remove_channel_from_monitor(struct channel_info *chan_info)
466 {
467         if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL, chan_info->fd, NULL) < 0) {
468                 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' "
469                                 "from epoll\n", chan_info->channel_path);
470                 return -1;
471         }
472         return 0;
473 }
474
475 int
476 channel_monitor_init(void)
477 {
478         global_event_fd = epoll_create1(0);
479         if (global_event_fd == 0) {
480                 RTE_LOG(ERR, CHANNEL_MONITOR, "Error creating epoll context with "
481                                 "error %s\n", strerror(errno));
482                 return -1;
483         }
484         global_events_list = rte_malloc("epoll_events", sizeof(*global_events_list)
485                         * MAX_EVENTS, RTE_CACHE_LINE_SIZE);
486         if (global_events_list == NULL) {
487                 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for "
488                                 "epoll events\n");
489                 return -1;
490         }
491         return 0;
492 }
493
494 void
495 run_channel_monitor(void)
496 {
497         while (run_loop) {
498                 int n_events, i;
499
500                 n_events = epoll_wait(global_event_fd, global_events_list,
501                                 MAX_EVENTS, 1);
502                 if (!run_loop)
503                         break;
504                 for (i = 0; i < n_events; i++) {
505                         struct channel_info *chan_info = (struct channel_info *)
506                                         global_events_list[i].data.ptr;
507                         if ((global_events_list[i].events & EPOLLERR) ||
508                                 (global_events_list[i].events & EPOLLHUP)) {
509                                 RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for "
510                                                 "channel '%s'\n",
511                                                 chan_info->channel_path);
512                                 remove_channel(&chan_info);
513                                 continue;
514                         }
515                         if (global_events_list[i].events & EPOLLIN) {
516
517                                 int n_bytes, err = 0;
518                                 struct channel_packet pkt;
519                                 void *buffer = &pkt;
520                                 int buffer_len = sizeof(pkt);
521
522                                 while (buffer_len > 0) {
523                                         n_bytes = read(chan_info->fd,
524                                                         buffer, buffer_len);
525                                         if (n_bytes == buffer_len)
526                                                 break;
527                                         if (n_bytes == -1) {
528                                                 err = errno;
529                                                 RTE_LOG(DEBUG, CHANNEL_MONITOR,
530                                                         "Received error on "
531                                                         "channel '%s' read: %s\n",
532                                                         chan_info->channel_path,
533                                                         strerror(err));
534                                                 remove_channel(&chan_info);
535                                                 break;
536                                         }
537                                         buffer = (char *)buffer + n_bytes;
538                                         buffer_len -= n_bytes;
539                                 }
540                                 if (!err)
541                                         process_request(&pkt, chan_info);
542                         }
543                 }
544                 rte_delay_us(time_period_s*1000000);
545                 if (policy_is_set) {
546                         for (int j = 0; j < MAX_VMS; j++) {
547                                 if (policies[j].enabled == 1)
548                                         apply_policy(&policies[j]);
549                         }
550                 }
551         }
552 }