examples/power: increase allowed number of clients
[dpdk.git] / examples / vm_power_manager / channel_monitor.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <unistd.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <stdint.h>
9 #include <signal.h>
10 #include <errno.h>
11 #include <string.h>
12 #include <sys/types.h>
13 #include <sys/epoll.h>
14 #include <sys/queue.h>
15 #include <sys/time.h>
16
17 #include <rte_log.h>
18 #include <rte_memory.h>
19 #include <rte_malloc.h>
20 #include <rte_atomic.h>
21 #include <rte_cycles.h>
22 #include <rte_ethdev.h>
23 #include <rte_pmd_i40e.h>
24
25 #include <libvirt/libvirt.h>
26 #include "channel_monitor.h"
27 #include "channel_commands.h"
28 #include "channel_manager.h"
29 #include "power_manager.h"
30 #include "oob_monitor.h"
31
32 #define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1
33
34 #define MAX_EVENTS 256
35
36 uint64_t vsi_pkt_count_prev[384];
37 uint64_t rdtsc_prev[384];
38
39 double time_period_ms = 1;
40 static volatile unsigned run_loop = 1;
41 static int global_event_fd;
42 static unsigned int policy_is_set;
43 static struct epoll_event *global_events_list;
44 static struct policy policies[MAX_CLIENTS];
45
46 void channel_monitor_exit(void)
47 {
48         run_loop = 0;
49         rte_free(global_events_list);
50 }
51
52 static void
53 core_share(int pNo, int z, int x, int t)
54 {
55         if (policies[pNo].core_share[z].pcpu == lvm_info[x].pcpus[t]) {
56                 if (strcmp(policies[pNo].pkt.vm_name,
57                                 lvm_info[x].vm_name) != 0) {
58                         policies[pNo].core_share[z].status = 1;
59                         power_manager_scale_core_max(
60                                         policies[pNo].core_share[z].pcpu);
61                 }
62         }
63 }
64
65 static void
66 core_share_status(int pNo)
67 {
68
69         int noVms = 0, noVcpus = 0, z, x, t;
70
71         get_all_vm(&noVms, &noVcpus);
72
73         /* Reset Core Share Status. */
74         for (z = 0; z < noVcpus; z++)
75                 policies[pNo].core_share[z].status = 0;
76
77         /* Foreach vcpu in a policy. */
78         for (z = 0; z < policies[pNo].pkt.num_vcpu; z++) {
79                 /* Foreach VM on the platform. */
80                 for (x = 0; x < noVms; x++) {
81                         /* Foreach vcpu of VMs on platform. */
82                         for (t = 0; t < lvm_info[x].num_cpus; t++)
83                                 core_share(pNo, z, x, t);
84                 }
85         }
86 }
87
88
89 static int
90 pcpu_monitor(struct policy *pol, struct core_info *ci, int pcpu, int count)
91 {
92         int ret = 0;
93
94         if (pol->pkt.policy_to_use == BRANCH_RATIO) {
95                 ci->cd[pcpu].oob_enabled = 1;
96                 ret = add_core_to_monitor(pcpu);
97                 if (ret == 0)
98                         RTE_LOG(INFO, CHANNEL_MONITOR,
99                                         "Monitoring pcpu %d OOB for %s\n",
100                                         pcpu, pol->pkt.vm_name);
101                 else
102                         RTE_LOG(ERR, CHANNEL_MONITOR,
103                                         "Error monitoring pcpu %d OOB for %s\n",
104                                         pcpu, pol->pkt.vm_name);
105
106         } else {
107                 pol->core_share[count].pcpu = pcpu;
108                 RTE_LOG(INFO, CHANNEL_MONITOR,
109                                 "Monitoring pcpu %d for %s\n",
110                                 pcpu, pol->pkt.vm_name);
111         }
112         return ret;
113 }
114
115 static void
116 get_pcpu_to_control(struct policy *pol)
117 {
118
119         /* Convert vcpu to pcpu. */
120         struct vm_info info;
121         int pcpu, count;
122         uint64_t mask_u64b;
123         struct core_info *ci;
124
125         ci = get_core_info();
126
127         RTE_LOG(INFO, CHANNEL_MONITOR,
128                         "Looking for pcpu for %s\n", pol->pkt.vm_name);
129
130         /*
131          * So now that we're handling virtual and physical cores, we need to
132          * differenciate between them when adding them to the branch monitor.
133          * Virtual cores need to be converted to physical cores.
134          */
135         if (pol->pkt.core_type == CORE_TYPE_VIRTUAL) {
136                 /*
137                  * If the cores in the policy are virtual, we need to map them
138                  * to physical core. We look up the vm info and use that for
139                  * the mapping.
140                  */
141                 get_info_vm(pol->pkt.vm_name, &info);
142                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
143                         mask_u64b =
144                                 info.pcpu_mask[pol->pkt.vcpu_to_control[count]];
145                         for (pcpu = 0; mask_u64b;
146                                         mask_u64b &= ~(1ULL << pcpu++)) {
147                                 if ((mask_u64b >> pcpu) & 1)
148                                         pcpu_monitor(pol, ci, pcpu, count);
149                         }
150                 }
151         } else {
152                 /*
153                  * If the cores in the policy are physical, we just use
154                  * those core id's directly.
155                  */
156                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
157                         pcpu = pol->pkt.vcpu_to_control[count];
158                         pcpu_monitor(pol, ci, pcpu, count);
159                 }
160         }
161 }
162
163 static int
164 get_pfid(struct policy *pol)
165 {
166
167         int i, x, ret = 0;
168
169         for (i = 0; i < pol->pkt.nb_mac_to_monitor; i++) {
170
171                 RTE_ETH_FOREACH_DEV(x) {
172                         ret = rte_pmd_i40e_query_vfid_by_mac(x,
173                                 (struct ether_addr *)&(pol->pkt.vfid[i]));
174                         if (ret != -EINVAL) {
175                                 pol->port[i] = x;
176                                 break;
177                         }
178                 }
179                 if (ret == -EINVAL || ret == -ENOTSUP || ret == ENODEV) {
180                         RTE_LOG(INFO, CHANNEL_MONITOR,
181                                 "Error with Policy. MAC not found on "
182                                 "attached ports ");
183                         pol->enabled = 0;
184                         return ret;
185                 }
186                 pol->pfid[i] = ret;
187         }
188         return 1;
189 }
190
191 static int
192 update_policy(struct channel_packet *pkt)
193 {
194
195         unsigned int updated = 0;
196         int i;
197
198
199         RTE_LOG(INFO, CHANNEL_MONITOR,
200                         "Applying policy for %s\n", pkt->vm_name);
201
202         for (i = 0; i < MAX_CLIENTS; i++) {
203                 if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
204                         /* Copy the contents of *pkt into the policy.pkt */
205                         policies[i].pkt = *pkt;
206                         get_pcpu_to_control(&policies[i]);
207                         if (get_pfid(&policies[i]) == -1) {
208                                 updated = 1;
209                                 break;
210                         }
211                         core_share_status(i);
212                         policies[i].enabled = 1;
213                         updated = 1;
214                 }
215         }
216         if (!updated) {
217                 for (i = 0; i < MAX_CLIENTS; i++) {
218                         if (policies[i].enabled == 0) {
219                                 policies[i].pkt = *pkt;
220                                 get_pcpu_to_control(&policies[i]);
221                                 if (get_pfid(&policies[i]) == -1)
222                                         break;
223                                 core_share_status(i);
224                                 policies[i].enabled = 1;
225                                 break;
226                         }
227                 }
228         }
229         return 0;
230 }
231
232 static int
233 remove_policy(struct channel_packet *pkt __rte_unused)
234 {
235         int i;
236
237         /*
238          * Disabling the policy is simply a case of setting
239          * enabled to 0
240          */
241         for (i = 0; i < MAX_CLIENTS; i++) {
242                 if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
243                         policies[i].enabled = 0;
244                         return 0;
245                 }
246         }
247         return -1;
248 }
249
250 static uint64_t
251 get_pkt_diff(struct policy *pol)
252 {
253
254         uint64_t vsi_pkt_count,
255                 vsi_pkt_total = 0,
256                 vsi_pkt_count_prev_total = 0;
257         double rdtsc_curr, rdtsc_diff, diff;
258         int x;
259         struct rte_eth_stats vf_stats;
260
261         for (x = 0; x < pol->pkt.nb_mac_to_monitor; x++) {
262
263                 /*Read vsi stats*/
264                 if (rte_pmd_i40e_get_vf_stats(x, pol->pfid[x], &vf_stats) == 0)
265                         vsi_pkt_count = vf_stats.ipackets;
266                 else
267                         vsi_pkt_count = -1;
268
269                 vsi_pkt_total += vsi_pkt_count;
270
271                 vsi_pkt_count_prev_total += vsi_pkt_count_prev[pol->pfid[x]];
272                 vsi_pkt_count_prev[pol->pfid[x]] = vsi_pkt_count;
273         }
274
275         rdtsc_curr = rte_rdtsc_precise();
276         rdtsc_diff = rdtsc_curr - rdtsc_prev[pol->pfid[x-1]];
277         rdtsc_prev[pol->pfid[x-1]] = rdtsc_curr;
278
279         diff = (vsi_pkt_total - vsi_pkt_count_prev_total) *
280                         ((double)rte_get_tsc_hz() / rdtsc_diff);
281
282         return diff;
283 }
284
285 static void
286 apply_traffic_profile(struct policy *pol)
287 {
288
289         int count;
290         uint64_t diff = 0;
291
292         diff = get_pkt_diff(pol);
293
294         RTE_LOG(INFO, CHANNEL_MONITOR, "Applying traffic profile\n");
295
296         if (diff >= (pol->pkt.traffic_policy.max_max_packet_thresh)) {
297                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
298                         if (pol->core_share[count].status != 1)
299                                 power_manager_scale_core_max(
300                                                 pol->core_share[count].pcpu);
301                 }
302         } else if (diff >= (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
303                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
304                         if (pol->core_share[count].status != 1)
305                                 power_manager_scale_core_med(
306                                                 pol->core_share[count].pcpu);
307                 }
308         } else if (diff < (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
309                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
310                         if (pol->core_share[count].status != 1)
311                                 power_manager_scale_core_min(
312                                                 pol->core_share[count].pcpu);
313                 }
314         }
315 }
316
317 static void
318 apply_time_profile(struct policy *pol)
319 {
320
321         int count, x;
322         struct timeval tv;
323         struct tm *ptm;
324         char time_string[40];
325
326         /* Obtain the time of day, and convert it to a tm struct. */
327         gettimeofday(&tv, NULL);
328         ptm = localtime(&tv.tv_sec);
329         /* Format the date and time, down to a single second. */
330         strftime(time_string, sizeof(time_string), "%Y-%m-%d %H:%M:%S", ptm);
331
332         for (x = 0; x < HOURS; x++) {
333
334                 if (ptm->tm_hour == pol->pkt.timer_policy.busy_hours[x]) {
335                         for (count = 0; count < pol->pkt.num_vcpu; count++) {
336                                 if (pol->core_share[count].status != 1) {
337                                         power_manager_scale_core_max(
338                                                 pol->core_share[count].pcpu);
339                                 RTE_LOG(INFO, CHANNEL_MONITOR,
340                                         "Scaling up core %d to max\n",
341                                         pol->core_share[count].pcpu);
342                                 }
343                         }
344                         break;
345                 } else if (ptm->tm_hour ==
346                                 pol->pkt.timer_policy.quiet_hours[x]) {
347                         for (count = 0; count < pol->pkt.num_vcpu; count++) {
348                                 if (pol->core_share[count].status != 1) {
349                                         power_manager_scale_core_min(
350                                                 pol->core_share[count].pcpu);
351                                 RTE_LOG(INFO, CHANNEL_MONITOR,
352                                         "Scaling down core %d to min\n",
353                                         pol->core_share[count].pcpu);
354                         }
355                 }
356                         break;
357                 } else if (ptm->tm_hour ==
358                         pol->pkt.timer_policy.hours_to_use_traffic_profile[x]) {
359                         apply_traffic_profile(pol);
360                         break;
361                 }
362         }
363 }
364
365 static void
366 apply_workload_profile(struct policy *pol)
367 {
368
369         int count;
370
371         if (pol->pkt.workload == HIGH) {
372                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
373                         if (pol->core_share[count].status != 1)
374                                 power_manager_scale_core_max(
375                                                 pol->core_share[count].pcpu);
376                 }
377         } else if (pol->pkt.workload == MEDIUM) {
378                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
379                         if (pol->core_share[count].status != 1)
380                                 power_manager_scale_core_med(
381                                                 pol->core_share[count].pcpu);
382                 }
383         } else if (pol->pkt.workload == LOW) {
384                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
385                         if (pol->core_share[count].status != 1)
386                                 power_manager_scale_core_min(
387                                                 pol->core_share[count].pcpu);
388                 }
389         }
390 }
391
392 static void
393 apply_policy(struct policy *pol)
394 {
395
396         struct channel_packet *pkt = &pol->pkt;
397
398         /*Check policy to use*/
399         if (pkt->policy_to_use == TRAFFIC)
400                 apply_traffic_profile(pol);
401         else if (pkt->policy_to_use == TIME)
402                 apply_time_profile(pol);
403         else if (pkt->policy_to_use == WORKLOAD)
404                 apply_workload_profile(pol);
405 }
406
407 static int
408 process_request(struct channel_packet *pkt, struct channel_info *chan_info)
409 {
410         uint64_t core_mask;
411
412         if (chan_info == NULL)
413                 return -1;
414
415         RTE_LOG(INFO, CHANNEL_MONITOR, "Processing Request %s\n", pkt->vm_name);
416
417         if (rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_CONNECTED,
418                         CHANNEL_MGR_CHANNEL_PROCESSING) == 0)
419                 return -1;
420
421         if (pkt->command == CPU_POWER) {
422                 core_mask = get_pcpus_mask(chan_info, pkt->resource_id);
423                 if (core_mask == 0) {
424                         /*
425                          * Core mask will be 0 in the case where
426                          * hypervisor is not available so we're working in
427                          * the host, so use the core as the mask.
428                          */
429                         core_mask = 1ULL << pkt->resource_id;
430                 }
431                 if (__builtin_popcountll(core_mask) == 1) {
432
433                         unsigned core_num = __builtin_ffsll(core_mask) - 1;
434
435                         switch (pkt->unit) {
436                         case(CPU_POWER_SCALE_MIN):
437                                         power_manager_scale_core_min(core_num);
438                         break;
439                         case(CPU_POWER_SCALE_MAX):
440                                         power_manager_scale_core_max(core_num);
441                         break;
442                         case(CPU_POWER_SCALE_DOWN):
443                                         power_manager_scale_core_down(core_num);
444                         break;
445                         case(CPU_POWER_SCALE_UP):
446                                         power_manager_scale_core_up(core_num);
447                         break;
448                         case(CPU_POWER_ENABLE_TURBO):
449                                 power_manager_enable_turbo_core(core_num);
450                         break;
451                         case(CPU_POWER_DISABLE_TURBO):
452                                 power_manager_disable_turbo_core(core_num);
453                         break;
454                         default:
455                                 break;
456                         }
457                 } else {
458                         switch (pkt->unit) {
459                         case(CPU_POWER_SCALE_MIN):
460                                         power_manager_scale_mask_min(core_mask);
461                         break;
462                         case(CPU_POWER_SCALE_MAX):
463                                         power_manager_scale_mask_max(core_mask);
464                         break;
465                         case(CPU_POWER_SCALE_DOWN):
466                                         power_manager_scale_mask_down(core_mask);
467                         break;
468                         case(CPU_POWER_SCALE_UP):
469                                         power_manager_scale_mask_up(core_mask);
470                         break;
471                         case(CPU_POWER_ENABLE_TURBO):
472                                 power_manager_enable_turbo_mask(core_mask);
473                         break;
474                         case(CPU_POWER_DISABLE_TURBO):
475                                 power_manager_disable_turbo_mask(core_mask);
476                         break;
477                         default:
478                                 break;
479                         }
480
481                 }
482         }
483
484         if (pkt->command == PKT_POLICY) {
485                 RTE_LOG(INFO, CHANNEL_MONITOR,
486                                 "\nProcessing Policy request\n");
487                 update_policy(pkt);
488                 policy_is_set = 1;
489         }
490
491         if (pkt->command == PKT_POLICY_REMOVE) {
492                 RTE_LOG(INFO, CHANNEL_MONITOR,
493                                  "Removing policy %s\n", pkt->vm_name);
494                 remove_policy(pkt);
495         }
496
497         /*
498          * Return is not checked as channel status may have been set to DISABLED
499          * from management thread
500          */
501         rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING,
502                         CHANNEL_MGR_CHANNEL_CONNECTED);
503         return 0;
504
505 }
506
507 int
508 add_channel_to_monitor(struct channel_info **chan_info)
509 {
510         struct channel_info *info = *chan_info;
511         struct epoll_event event;
512
513         event.events = EPOLLIN;
514         event.data.ptr = info;
515         if (epoll_ctl(global_event_fd, EPOLL_CTL_ADD, info->fd, &event) < 0) {
516                 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to add channel '%s' "
517                                 "to epoll\n", info->channel_path);
518                 return -1;
519         }
520         RTE_LOG(ERR, CHANNEL_MONITOR, "Added channel '%s' "
521                         "to monitor\n", info->channel_path);
522         return 0;
523 }
524
525 int
526 remove_channel_from_monitor(struct channel_info *chan_info)
527 {
528         if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL,
529                         chan_info->fd, NULL) < 0) {
530                 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' "
531                                 "from epoll\n", chan_info->channel_path);
532                 return -1;
533         }
534         return 0;
535 }
536
537 int
538 channel_monitor_init(void)
539 {
540         global_event_fd = epoll_create1(0);
541         if (global_event_fd == 0) {
542                 RTE_LOG(ERR, CHANNEL_MONITOR,
543                                 "Error creating epoll context with error %s\n",
544                                 strerror(errno));
545                 return -1;
546         }
547         global_events_list = rte_malloc("epoll_events",
548                         sizeof(*global_events_list)
549                         * MAX_EVENTS, RTE_CACHE_LINE_SIZE);
550         if (global_events_list == NULL) {
551                 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for "
552                                 "epoll events\n");
553                 return -1;
554         }
555         return 0;
556 }
557
558 void
559 run_channel_monitor(void)
560 {
561         while (run_loop) {
562                 int n_events, i;
563
564                 n_events = epoll_wait(global_event_fd, global_events_list,
565                                 MAX_EVENTS, 1);
566                 if (!run_loop)
567                         break;
568                 for (i = 0; i < n_events; i++) {
569                         struct channel_info *chan_info = (struct channel_info *)
570                                         global_events_list[i].data.ptr;
571                         if ((global_events_list[i].events & EPOLLERR) ||
572                                 (global_events_list[i].events & EPOLLHUP)) {
573                                 RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for "
574                                                 "channel '%s'\n",
575                                                 chan_info->channel_path);
576                                 remove_channel(&chan_info);
577                                 continue;
578                         }
579                         if (global_events_list[i].events & EPOLLIN) {
580
581                                 int n_bytes, err = 0;
582                                 struct channel_packet pkt;
583                                 void *buffer = &pkt;
584                                 int buffer_len = sizeof(pkt);
585
586                                 while (buffer_len > 0) {
587                                         n_bytes = read(chan_info->fd,
588                                                         buffer, buffer_len);
589                                         if (n_bytes == buffer_len)
590                                                 break;
591                                         if (n_bytes == -1) {
592                                                 err = errno;
593                                                 RTE_LOG(DEBUG, CHANNEL_MONITOR,
594                                                         "Received error on "
595                                                         "channel '%s' read: %s\n",
596                                                         chan_info->channel_path,
597                                                         strerror(err));
598                                                 remove_channel(&chan_info);
599                                                 break;
600                                         }
601                                         buffer = (char *)buffer + n_bytes;
602                                         buffer_len -= n_bytes;
603                                 }
604                                 if (!err)
605                                         process_request(&pkt, chan_info);
606                         }
607                 }
608                 rte_delay_us(time_period_ms*1000);
609                 if (policy_is_set) {
610                         int j;
611
612                         for (j = 0; j < MAX_CLIENTS; j++) {
613                                 if (policies[j].enabled == 1)
614                                         apply_policy(&policies[j]);
615                         }
616                 }
617         }
618 }