examples/vm_power_manager/channel_monitor.c

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  * Copyright(c) 2010-2014 Intel Corporation
   3  */
   4
   5 #include <unistd.h>
   6 #include <stdio.h>
   7 #include <stdlib.h>
   8 #include <stdint.h>
   9 #include <signal.h>
  10 #include <errno.h>
  11 #include <string.h>
  12 #include <sys/types.h>
  13 #include <sys/epoll.h>
  14 #include <sys/queue.h>
  15 #include <sys/time.h>
  16
  17 #include <rte_log.h>
  18 #include <rte_memory.h>
  19 #include <rte_malloc.h>
  20 #include <rte_atomic.h>
  21 #include <rte_cycles.h>
  22 #include <rte_ethdev.h>
  23 #include <rte_pmd_i40e.h>
  24
  25 #include <libvirt/libvirt.h>
  26 #include "channel_monitor.h"
  27 #include "channel_commands.h"
  28 #include "channel_manager.h"
  29 #include "power_manager.h"
  30 #include "oob_monitor.h"
  31
  32 #define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1
  33
  34 #define MAX_EVENTS 256
  35
  36 uint64_t vsi_pkt_count_prev[384];
  37 uint64_t rdtsc_prev[384];
  38
  39 double time_period_ms = 1;
  40 static volatile unsigned run_loop = 1;
  41 static int global_event_fd;
  42 static unsigned int policy_is_set;
  43 static struct epoll_event *global_events_list;
  44 static struct policy policies[MAX_CLIENTS];
  45
  46 void channel_monitor_exit(void)
  47 {
  48         run_loop = 0;
  49         rte_free(global_events_list);
  50 }
  51
  52 static void
  53 core_share(int pNo, int z, int x, int t)
  54 {
  55         if (policies[pNo].core_share[z].pcpu == lvm_info[x].pcpus[t]) {
  56                 if (strcmp(policies[pNo].pkt.vm_name,
  57                                 lvm_info[x].vm_name) != 0) {
  58                         policies[pNo].core_share[z].status = 1;
  59                         power_manager_scale_core_max(
  60                                         policies[pNo].core_share[z].pcpu);
  61                 }
  62         }
  63 }
  64
  65 static void
  66 core_share_status(int pNo)
  67 {
  68
  69         int noVms = 0, noVcpus = 0, z, x, t;
  70
  71         get_all_vm(&noVms, &noVcpus);
  72
  73         /* Reset Core Share Status. */
  74         for (z = 0; z < noVcpus; z++)
  75                 policies[pNo].core_share[z].status = 0;
  76
  77         /* Foreach vcpu in a policy. */
  78         for (z = 0; z < policies[pNo].pkt.num_vcpu; z++) {
  79                 /* Foreach VM on the platform. */
  80                 for (x = 0; x < noVms; x++) {
  81                         /* Foreach vcpu of VMs on platform. */
  82                         for (t = 0; t < lvm_info[x].num_cpus; t++)
  83                                 core_share(pNo, z, x, t);
  84                 }
  85         }
  86 }
  87
  88
  89 static int
  90 pcpu_monitor(struct policy *pol, struct core_info *ci, int pcpu, int count)
  91 {
  92         int ret = 0;
  93
  94         if (pol->pkt.policy_to_use == BRANCH_RATIO) {
  95                 ci->cd[pcpu].oob_enabled = 1;
  96                 ret = add_core_to_monitor(pcpu);
  97                 if (ret == 0)
  98                         RTE_LOG(INFO, CHANNEL_MONITOR,
  99                                         "Monitoring pcpu %d OOB for %s\n",
 100                                         pcpu, pol->pkt.vm_name);
 101                 else
 102                         RTE_LOG(ERR, CHANNEL_MONITOR,
 103                                         "Error monitoring pcpu %d OOB for %s\n",
 104                                         pcpu, pol->pkt.vm_name);
 105
 106         } else {
 107                 pol->core_share[count].pcpu = pcpu;
 108                 RTE_LOG(INFO, CHANNEL_MONITOR,
 109                                 "Monitoring pcpu %d for %s\n",
 110                                 pcpu, pol->pkt.vm_name);
 111         }
 112         return ret;
 113 }
 114
 115 static void
 116 get_pcpu_to_control(struct policy *pol)
 117 {
 118
 119         /* Convert vcpu to pcpu. */
 120         struct vm_info info;
 121         int pcpu, count;
 122         uint64_t mask_u64b;
 123         struct core_info *ci;
 124
 125         ci = get_core_info();
 126
 127         RTE_LOG(INFO, CHANNEL_MONITOR,
 128                         "Looking for pcpu for %s\n", pol->pkt.vm_name);
 129
 130         /*
 131          * So now that we're handling virtual and physical cores, we need to
 132          * differenciate between them when adding them to the branch monitor.
 133          * Virtual cores need to be converted to physical cores.
 134          */
 135         if (pol->pkt.core_type == CORE_TYPE_VIRTUAL) {
 136                 /*
 137                  * If the cores in the policy are virtual, we need to map them
 138                  * to physical core. We look up the vm info and use that for
 139                  * the mapping.
 140                  */
 141                 get_info_vm(pol->pkt.vm_name, &info);
 142                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
 143                         mask_u64b =
 144                                 info.pcpu_mask[pol->pkt.vcpu_to_control[count]];
 145                         for (pcpu = 0; mask_u64b;
 146                                         mask_u64b &= ~(1ULL << pcpu++)) {
 147                                 if ((mask_u64b >> pcpu) & 1)
 148                                         pcpu_monitor(pol, ci, pcpu, count);
 149                         }
 150                 }
 151         } else {
 152                 /*
 153                  * If the cores in the policy are physical, we just use
 154                  * those core id's directly.
 155                  */
 156                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
 157                         pcpu = pol->pkt.vcpu_to_control[count];
 158                         pcpu_monitor(pol, ci, pcpu, count);
 159                 }
 160         }
 161 }
 162
 163 static int
 164 get_pfid(struct policy *pol)
 165 {
 166
 167         int i, x, ret = 0;
 168
 169         for (i = 0; i < pol->pkt.nb_mac_to_monitor; i++) {
 170
 171                 RTE_ETH_FOREACH_DEV(x) {
 172                         ret = rte_pmd_i40e_query_vfid_by_mac(x,
 173                                 (struct ether_addr *)&(pol->pkt.vfid[i]));
 174                         if (ret != -EINVAL) {
 175                                 pol->port[i] = x;
 176                                 break;
 177                         }
 178                 }
 179                 if (ret == -EINVAL || ret == -ENOTSUP || ret == ENODEV) {
 180                         RTE_LOG(INFO, CHANNEL_MONITOR,
 181                                 "Error with Policy. MAC not found on "
 182                                 "attached ports ");
 183                         pol->enabled = 0;
 184                         return ret;
 185                 }
 186                 pol->pfid[i] = ret;
 187         }
 188         return 1;
 189 }
 190
 191 static int
 192 update_policy(struct channel_packet *pkt)
 193 {
 194
 195         unsigned int updated = 0;
 196         int i;
 197
 198
 199         RTE_LOG(INFO, CHANNEL_MONITOR,
 200                         "Applying policy for %s\n", pkt->vm_name);
 201
 202         for (i = 0; i < MAX_CLIENTS; i++) {
 203                 if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
 204                         /* Copy the contents of *pkt into the policy.pkt */
 205                         policies[i].pkt = *pkt;
 206                         get_pcpu_to_control(&policies[i]);
 207                         if (get_pfid(&policies[i]) == -1) {
 208                                 updated = 1;
 209                                 break;
 210                         }
 211                         core_share_status(i);
 212                         policies[i].enabled = 1;
 213                         updated = 1;
 214                 }
 215         }
 216         if (!updated) {
 217                 for (i = 0; i < MAX_CLIENTS; i++) {
 218                         if (policies[i].enabled == 0) {
 219                                 policies[i].pkt = *pkt;
 220                                 get_pcpu_to_control(&policies[i]);
 221                                 if (get_pfid(&policies[i]) == -1)
 222                                         break;
 223                                 core_share_status(i);
 224                                 policies[i].enabled = 1;
 225                                 break;
 226                         }
 227                 }
 228         }
 229         return 0;
 230 }
 231
 232 static int
 233 remove_policy(struct channel_packet *pkt __rte_unused)
 234 {
 235         int i;
 236
 237         /*
 238          * Disabling the policy is simply a case of setting
 239          * enabled to 0
 240          */
 241         for (i = 0; i < MAX_CLIENTS; i++) {
 242                 if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
 243                         policies[i].enabled = 0;
 244                         return 0;
 245                 }
 246         }
 247         return -1;
 248 }
 249
 250 static uint64_t
 251 get_pkt_diff(struct policy *pol)
 252 {
 253
 254         uint64_t vsi_pkt_count,
 255                 vsi_pkt_total = 0,
 256                 vsi_pkt_count_prev_total = 0;
 257         double rdtsc_curr, rdtsc_diff, diff;
 258         int x;
 259         struct rte_eth_stats vf_stats;
 260
 261         for (x = 0; x < pol->pkt.nb_mac_to_monitor; x++) {
 262
 263                 /*Read vsi stats*/
 264                 if (rte_pmd_i40e_get_vf_stats(x, pol->pfid[x], &vf_stats) == 0)
 265                         vsi_pkt_count = vf_stats.ipackets;
 266                 else
 267                         vsi_pkt_count = -1;
 268
 269                 vsi_pkt_total += vsi_pkt_count;
 270
 271                 vsi_pkt_count_prev_total += vsi_pkt_count_prev[pol->pfid[x]];
 272                 vsi_pkt_count_prev[pol->pfid[x]] = vsi_pkt_count;
 273         }
 274
 275         rdtsc_curr = rte_rdtsc_precise();
 276         rdtsc_diff = rdtsc_curr - rdtsc_prev[pol->pfid[x-1]];
 277         rdtsc_prev[pol->pfid[x-1]] = rdtsc_curr;
 278
 279         diff = (vsi_pkt_total - vsi_pkt_count_prev_total) *
 280                         ((double)rte_get_tsc_hz() / rdtsc_diff);
 281
 282         return diff;
 283 }
 284
 285 static void
 286 apply_traffic_profile(struct policy *pol)
 287 {
 288
 289         int count;
 290         uint64_t diff = 0;
 291
 292         diff = get_pkt_diff(pol);
 293
 294         RTE_LOG(INFO, CHANNEL_MONITOR, "Applying traffic profile\n");
 295
 296         if (diff >= (pol->pkt.traffic_policy.max_max_packet_thresh)) {
 297                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
 298                         if (pol->core_share[count].status != 1)
 299                                 power_manager_scale_core_max(
 300                                                 pol->core_share[count].pcpu);
 301                 }
 302         } else if (diff >= (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
 303                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
 304                         if (pol->core_share[count].status != 1)
 305                                 power_manager_scale_core_med(
 306                                                 pol->core_share[count].pcpu);
 307                 }
 308         } else if (diff < (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
 309                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
 310                         if (pol->core_share[count].status != 1)
 311                                 power_manager_scale_core_min(
 312                                                 pol->core_share[count].pcpu);
 313                 }
 314         }
 315 }
 316
 317 static void
 318 apply_time_profile(struct policy *pol)
 319 {
 320
 321         int count, x;
 322         struct timeval tv;
 323         struct tm *ptm;
 324         char time_string[40];
 325
 326         /* Obtain the time of day, and convert it to a tm struct. */
 327         gettimeofday(&tv, NULL);
 328         ptm = localtime(&tv.tv_sec);
 329         /* Format the date and time, down to a single second. */
 330         strftime(time_string, sizeof(time_string), "%Y-%m-%d %H:%M:%S", ptm);
 331
 332         for (x = 0; x < HOURS; x++) {
 333
 334                 if (ptm->tm_hour == pol->pkt.timer_policy.busy_hours[x]) {
 335                         for (count = 0; count < pol->pkt.num_vcpu; count++) {
 336                                 if (pol->core_share[count].status != 1) {
 337                                         power_manager_scale_core_max(
 338                                                 pol->core_share[count].pcpu);
 339                                 RTE_LOG(INFO, CHANNEL_MONITOR,
 340                                         "Scaling up core %d to max\n",
 341                                         pol->core_share[count].pcpu);
 342                                 }
 343                         }
 344                         break;
 345                 } else if (ptm->tm_hour ==
 346                                 pol->pkt.timer_policy.quiet_hours[x]) {
 347                         for (count = 0; count < pol->pkt.num_vcpu; count++) {
 348                                 if (pol->core_share[count].status != 1) {
 349                                         power_manager_scale_core_min(
 350                                                 pol->core_share[count].pcpu);
 351                                 RTE_LOG(INFO, CHANNEL_MONITOR,
 352                                         "Scaling down core %d to min\n",
 353                                         pol->core_share[count].pcpu);
 354                         }
 355                 }
 356                         break;
 357                 } else if (ptm->tm_hour ==
 358                         pol->pkt.timer_policy.hours_to_use_traffic_profile[x]) {
 359                         apply_traffic_profile(pol);
 360                         break;
 361                 }
 362         }
 363 }
 364
 365 static void
 366 apply_workload_profile(struct policy *pol)
 367 {
 368
 369         int count;
 370
 371         if (pol->pkt.workload == HIGH) {
 372                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
 373                         if (pol->core_share[count].status != 1)
 374                                 power_manager_scale_core_max(
 375                                                 pol->core_share[count].pcpu);
 376                 }
 377         } else if (pol->pkt.workload == MEDIUM) {
 378                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
 379                         if (pol->core_share[count].status != 1)
 380                                 power_manager_scale_core_med(
 381                                                 pol->core_share[count].pcpu);
 382                 }
 383         } else if (pol->pkt.workload == LOW) {
 384                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
 385                         if (pol->core_share[count].status != 1)
 386                                 power_manager_scale_core_min(
 387                                                 pol->core_share[count].pcpu);
 388                 }
 389         }
 390 }
 391
 392 static void
 393 apply_policy(struct policy *pol)
 394 {
 395
 396         struct channel_packet *pkt = &pol->pkt;
 397
 398         /*Check policy to use*/
 399         if (pkt->policy_to_use == TRAFFIC)
 400                 apply_traffic_profile(pol);
 401         else if (pkt->policy_to_use == TIME)
 402                 apply_time_profile(pol);
 403         else if (pkt->policy_to_use == WORKLOAD)
 404                 apply_workload_profile(pol);
 405 }
 406
 407 static int
 408 process_request(struct channel_packet *pkt, struct channel_info *chan_info)
 409 {
 410         uint64_t core_mask;
 411
 412         if (chan_info == NULL)
 413                 return -1;
 414
 415         RTE_LOG(INFO, CHANNEL_MONITOR, "Processing Request %s\n", pkt->vm_name);
 416
 417         if (rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_CONNECTED,
 418                         CHANNEL_MGR_CHANNEL_PROCESSING) == 0)
 419                 return -1;
 420
 421         if (pkt->command == CPU_POWER) {
 422                 core_mask = get_pcpus_mask(chan_info, pkt->resource_id);
 423                 if (core_mask == 0) {
 424                         /*
 425                          * Core mask will be 0 in the case where
 426                          * hypervisor is not available so we're working in
 427                          * the host, so use the core as the mask.
 428                          */
 429                         core_mask = 1ULL << pkt->resource_id;
 430                 }
 431                 if (__builtin_popcountll(core_mask) == 1) {
 432
 433                         unsigned core_num = __builtin_ffsll(core_mask) - 1;
 434
 435                         switch (pkt->unit) {
 436                         case(CPU_POWER_SCALE_MIN):
 437                                         power_manager_scale_core_min(core_num);
 438                         break;
 439                         case(CPU_POWER_SCALE_MAX):
 440                                         power_manager_scale_core_max(core_num);
 441                         break;
 442                         case(CPU_POWER_SCALE_DOWN):
 443                                         power_manager_scale_core_down(core_num);
 444                         break;
 445                         case(CPU_POWER_SCALE_UP):
 446                                         power_manager_scale_core_up(core_num);
 447                         break;
 448                         case(CPU_POWER_ENABLE_TURBO):
 449                                 power_manager_enable_turbo_core(core_num);
 450                         break;
 451                         case(CPU_POWER_DISABLE_TURBO):
 452                                 power_manager_disable_turbo_core(core_num);
 453                         break;
 454                         default:
 455                                 break;
 456                         }
 457                 } else {
 458                         switch (pkt->unit) {
 459                         case(CPU_POWER_SCALE_MIN):
 460                                         power_manager_scale_mask_min(core_mask);
 461                         break;
 462                         case(CPU_POWER_SCALE_MAX):
 463                                         power_manager_scale_mask_max(core_mask);
 464                         break;
 465                         case(CPU_POWER_SCALE_DOWN):
 466                                         power_manager_scale_mask_down(core_mask);
 467                         break;
 468                         case(CPU_POWER_SCALE_UP):
 469                                         power_manager_scale_mask_up(core_mask);
 470                         break;
 471                         case(CPU_POWER_ENABLE_TURBO):
 472                                 power_manager_enable_turbo_mask(core_mask);
 473                         break;
 474                         case(CPU_POWER_DISABLE_TURBO):
 475                                 power_manager_disable_turbo_mask(core_mask);
 476                         break;
 477                         default:
 478                                 break;
 479                         }
 480
 481                 }
 482         }
 483
 484         if (pkt->command == PKT_POLICY) {
 485                 RTE_LOG(INFO, CHANNEL_MONITOR,
 486                                 "\nProcessing Policy request\n");
 487                 update_policy(pkt);
 488                 policy_is_set = 1;
 489         }
 490
 491         if (pkt->command == PKT_POLICY_REMOVE) {
 492                 RTE_LOG(INFO, CHANNEL_MONITOR,
 493                                  "Removing policy %s\n", pkt->vm_name);
 494                 remove_policy(pkt);
 495         }
 496
 497         /*
 498          * Return is not checked as channel status may have been set to DISABLED
 499          * from management thread
 500          */
 501         rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING,
 502                         CHANNEL_MGR_CHANNEL_CONNECTED);
 503         return 0;
 504
 505 }
 506
 507 int
 508 add_channel_to_monitor(struct channel_info **chan_info)
 509 {
 510         struct channel_info *info = *chan_info;
 511         struct epoll_event event;
 512
 513         event.events = EPOLLIN;
 514         event.data.ptr = info;
 515         if (epoll_ctl(global_event_fd, EPOLL_CTL_ADD, info->fd, &event) < 0) {
 516                 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to add channel '%s' "
 517                                 "to epoll\n", info->channel_path);
 518                 return -1;
 519         }
 520         RTE_LOG(ERR, CHANNEL_MONITOR, "Added channel '%s' "
 521                         "to monitor\n", info->channel_path);
 522         return 0;
 523 }
 524
 525 int
 526 remove_channel_from_monitor(struct channel_info *chan_info)
 527 {
 528         if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL,
 529                         chan_info->fd, NULL) < 0) {
 530                 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' "
 531                                 "from epoll\n", chan_info->channel_path);
 532                 return -1;
 533         }
 534         return 0;
 535 }
 536
 537 int
 538 channel_monitor_init(void)
 539 {
 540         global_event_fd = epoll_create1(0);
 541         if (global_event_fd == 0) {
 542                 RTE_LOG(ERR, CHANNEL_MONITOR,
 543                                 "Error creating epoll context with error %s\n",
 544                                 strerror(errno));
 545                 return -1;
 546         }
 547         global_events_list = rte_malloc("epoll_events",
 548                         sizeof(*global_events_list)
 549                         * MAX_EVENTS, RTE_CACHE_LINE_SIZE);
 550         if (global_events_list == NULL) {
 551                 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for "
 552                                 "epoll events\n");
 553                 return -1;
 554         }
 555         return 0;
 556 }
 557
 558 void
 559 run_channel_monitor(void)
 560 {
 561         while (run_loop) {
 562                 int n_events, i;
 563
 564                 n_events = epoll_wait(global_event_fd, global_events_list,
 565                                 MAX_EVENTS, 1);
 566                 if (!run_loop)
 567                         break;
 568                 for (i = 0; i < n_events; i++) {
 569                         struct channel_info *chan_info = (struct channel_info *)
 570                                         global_events_list[i].data.ptr;
 571                         if ((global_events_list[i].events & EPOLLERR) ||
 572                                 (global_events_list[i].events & EPOLLHUP)) {
 573                                 RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for "
 574                                                 "channel '%s'\n",
 575                                                 chan_info->channel_path);
 576                                 remove_channel(&chan_info);
 577                                 continue;
 578                         }
 579                         if (global_events_list[i].events & EPOLLIN) {
 580
 581                                 int n_bytes, err = 0;
 582                                 struct channel_packet pkt;
 583                                 void *buffer = &pkt;
 584                                 int buffer_len = sizeof(pkt);
 585
 586                                 while (buffer_len > 0) {
 587                                         n_bytes = read(chan_info->fd,
 588                                                         buffer, buffer_len);
 589                                         if (n_bytes == buffer_len)
 590                                                 break;
 591                                         if (n_bytes == -1) {
 592                                                 err = errno;
 593                                                 RTE_LOG(DEBUG, CHANNEL_MONITOR,
 594                                                         "Received error on "
 595                                                         "channel '%s' read: %s\n",
 596                                                         chan_info->channel_path,
 597                                                         strerror(err));
 598                                                 remove_channel(&chan_info);
 599                                                 break;
 600                                         }
 601                                         buffer = (char *)buffer + n_bytes;
 602                                         buffer_len -= n_bytes;
 603                                 }
 604                                 if (!err)
 605                                         process_request(&pkt, chan_info);
 606                         }
 607                 }
 608                 rte_delay_us(time_period_ms*1000);
 609                 if (policy_is_set) {
 610                         int j;
 611
 612                         for (j = 0; j < MAX_CLIENTS; j++) {
 613                                 if (policies[j].enabled == 1)
 614                                         apply_policy(&policies[j]);
 615                         }
 616                 }
 617         }
 618 }