1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
12 #include <sys/types.h>
13 #include <sys/epoll.h>
14 #include <sys/queue.h>
18 #include <rte_memory.h>
19 #include <rte_malloc.h>
20 #include <rte_atomic.h>
21 #include <rte_cycles.h>
22 #include <rte_ethdev.h>
23 #include <rte_pmd_i40e.h>
25 #include <libvirt/libvirt.h>
26 #include "channel_monitor.h"
27 #include "channel_commands.h"
28 #include "channel_manager.h"
29 #include "power_manager.h"
30 #include "oob_monitor.h"
32 #define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1
34 #define MAX_EVENTS 256
36 uint64_t vsi_pkt_count_prev[384];
37 uint64_t rdtsc_prev[384];
39 double time_period_ms = 1;
40 static volatile unsigned run_loop = 1;
41 static int global_event_fd;
42 static unsigned int policy_is_set;
43 static struct epoll_event *global_events_list;
44 static struct policy policies[MAX_CLIENTS];
46 void channel_monitor_exit(void)
49 rte_free(global_events_list);
53 core_share(int pNo, int z, int x, int t)
55 if (policies[pNo].core_share[z].pcpu == lvm_info[x].pcpus[t]) {
56 if (strcmp(policies[pNo].pkt.vm_name,
57 lvm_info[x].vm_name) != 0) {
58 policies[pNo].core_share[z].status = 1;
59 power_manager_scale_core_max(
60 policies[pNo].core_share[z].pcpu);
66 core_share_status(int pNo)
69 int noVms = 0, noVcpus = 0, z, x, t;
71 get_all_vm(&noVms, &noVcpus);
73 /* Reset Core Share Status. */
74 for (z = 0; z < noVcpus; z++)
75 policies[pNo].core_share[z].status = 0;
77 /* Foreach vcpu in a policy. */
78 for (z = 0; z < policies[pNo].pkt.num_vcpu; z++) {
79 /* Foreach VM on the platform. */
80 for (x = 0; x < noVms; x++) {
81 /* Foreach vcpu of VMs on platform. */
82 for (t = 0; t < lvm_info[x].num_cpus; t++)
83 core_share(pNo, z, x, t);
90 pcpu_monitor(struct policy *pol, struct core_info *ci, int pcpu, int count)
94 if (pol->pkt.policy_to_use == BRANCH_RATIO) {
95 ci->cd[pcpu].oob_enabled = 1;
96 ret = add_core_to_monitor(pcpu);
98 RTE_LOG(INFO, CHANNEL_MONITOR,
99 "Monitoring pcpu %d OOB for %s\n",
100 pcpu, pol->pkt.vm_name);
102 RTE_LOG(ERR, CHANNEL_MONITOR,
103 "Error monitoring pcpu %d OOB for %s\n",
104 pcpu, pol->pkt.vm_name);
107 pol->core_share[count].pcpu = pcpu;
108 RTE_LOG(INFO, CHANNEL_MONITOR,
109 "Monitoring pcpu %d for %s\n",
110 pcpu, pol->pkt.vm_name);
116 get_pcpu_to_control(struct policy *pol)
119 /* Convert vcpu to pcpu. */
123 struct core_info *ci;
125 ci = get_core_info();
127 RTE_LOG(INFO, CHANNEL_MONITOR,
128 "Looking for pcpu for %s\n", pol->pkt.vm_name);
131 * So now that we're handling virtual and physical cores, we need to
132 * differenciate between them when adding them to the branch monitor.
133 * Virtual cores need to be converted to physical cores.
135 if (pol->pkt.core_type == CORE_TYPE_VIRTUAL) {
137 * If the cores in the policy are virtual, we need to map them
138 * to physical core. We look up the vm info and use that for
141 get_info_vm(pol->pkt.vm_name, &info);
142 for (count = 0; count < pol->pkt.num_vcpu; count++) {
144 info.pcpu_mask[pol->pkt.vcpu_to_control[count]];
145 for (pcpu = 0; mask_u64b;
146 mask_u64b &= ~(1ULL << pcpu++)) {
147 if ((mask_u64b >> pcpu) & 1)
148 pcpu_monitor(pol, ci, pcpu, count);
153 * If the cores in the policy are physical, we just use
154 * those core id's directly.
156 for (count = 0; count < pol->pkt.num_vcpu; count++) {
157 pcpu = pol->pkt.vcpu_to_control[count];
158 pcpu_monitor(pol, ci, pcpu, count);
164 get_pfid(struct policy *pol)
169 for (i = 0; i < pol->pkt.nb_mac_to_monitor; i++) {
171 RTE_ETH_FOREACH_DEV(x) {
172 ret = rte_pmd_i40e_query_vfid_by_mac(x,
173 (struct ether_addr *)&(pol->pkt.vfid[i]));
174 if (ret != -EINVAL) {
179 if (ret == -EINVAL || ret == -ENOTSUP || ret == ENODEV) {
180 RTE_LOG(INFO, CHANNEL_MONITOR,
181 "Error with Policy. MAC not found on "
192 update_policy(struct channel_packet *pkt)
195 unsigned int updated = 0;
199 RTE_LOG(INFO, CHANNEL_MONITOR,
200 "Applying policy for %s\n", pkt->vm_name);
202 for (i = 0; i < MAX_CLIENTS; i++) {
203 if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
204 /* Copy the contents of *pkt into the policy.pkt */
205 policies[i].pkt = *pkt;
206 get_pcpu_to_control(&policies[i]);
207 if (get_pfid(&policies[i]) == -1) {
211 core_share_status(i);
212 policies[i].enabled = 1;
217 for (i = 0; i < MAX_CLIENTS; i++) {
218 if (policies[i].enabled == 0) {
219 policies[i].pkt = *pkt;
220 get_pcpu_to_control(&policies[i]);
221 if (get_pfid(&policies[i]) == -1)
223 core_share_status(i);
224 policies[i].enabled = 1;
233 remove_policy(struct channel_packet *pkt __rte_unused)
238 * Disabling the policy is simply a case of setting
241 for (i = 0; i < MAX_CLIENTS; i++) {
242 if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
243 policies[i].enabled = 0;
251 get_pkt_diff(struct policy *pol)
254 uint64_t vsi_pkt_count,
256 vsi_pkt_count_prev_total = 0;
257 double rdtsc_curr, rdtsc_diff, diff;
259 struct rte_eth_stats vf_stats;
261 for (x = 0; x < pol->pkt.nb_mac_to_monitor; x++) {
264 if (rte_pmd_i40e_get_vf_stats(x, pol->pfid[x], &vf_stats) == 0)
265 vsi_pkt_count = vf_stats.ipackets;
269 vsi_pkt_total += vsi_pkt_count;
271 vsi_pkt_count_prev_total += vsi_pkt_count_prev[pol->pfid[x]];
272 vsi_pkt_count_prev[pol->pfid[x]] = vsi_pkt_count;
275 rdtsc_curr = rte_rdtsc_precise();
276 rdtsc_diff = rdtsc_curr - rdtsc_prev[pol->pfid[x-1]];
277 rdtsc_prev[pol->pfid[x-1]] = rdtsc_curr;
279 diff = (vsi_pkt_total - vsi_pkt_count_prev_total) *
280 ((double)rte_get_tsc_hz() / rdtsc_diff);
286 apply_traffic_profile(struct policy *pol)
292 diff = get_pkt_diff(pol);
294 RTE_LOG(INFO, CHANNEL_MONITOR, "Applying traffic profile\n");
296 if (diff >= (pol->pkt.traffic_policy.max_max_packet_thresh)) {
297 for (count = 0; count < pol->pkt.num_vcpu; count++) {
298 if (pol->core_share[count].status != 1)
299 power_manager_scale_core_max(
300 pol->core_share[count].pcpu);
302 } else if (diff >= (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
303 for (count = 0; count < pol->pkt.num_vcpu; count++) {
304 if (pol->core_share[count].status != 1)
305 power_manager_scale_core_med(
306 pol->core_share[count].pcpu);
308 } else if (diff < (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
309 for (count = 0; count < pol->pkt.num_vcpu; count++) {
310 if (pol->core_share[count].status != 1)
311 power_manager_scale_core_min(
312 pol->core_share[count].pcpu);
318 apply_time_profile(struct policy *pol)
324 char time_string[40];
326 /* Obtain the time of day, and convert it to a tm struct. */
327 gettimeofday(&tv, NULL);
328 ptm = localtime(&tv.tv_sec);
329 /* Format the date and time, down to a single second. */
330 strftime(time_string, sizeof(time_string), "%Y-%m-%d %H:%M:%S", ptm);
332 for (x = 0; x < HOURS; x++) {
334 if (ptm->tm_hour == pol->pkt.timer_policy.busy_hours[x]) {
335 for (count = 0; count < pol->pkt.num_vcpu; count++) {
336 if (pol->core_share[count].status != 1) {
337 power_manager_scale_core_max(
338 pol->core_share[count].pcpu);
339 RTE_LOG(INFO, CHANNEL_MONITOR,
340 "Scaling up core %d to max\n",
341 pol->core_share[count].pcpu);
345 } else if (ptm->tm_hour ==
346 pol->pkt.timer_policy.quiet_hours[x]) {
347 for (count = 0; count < pol->pkt.num_vcpu; count++) {
348 if (pol->core_share[count].status != 1) {
349 power_manager_scale_core_min(
350 pol->core_share[count].pcpu);
351 RTE_LOG(INFO, CHANNEL_MONITOR,
352 "Scaling down core %d to min\n",
353 pol->core_share[count].pcpu);
357 } else if (ptm->tm_hour ==
358 pol->pkt.timer_policy.hours_to_use_traffic_profile[x]) {
359 apply_traffic_profile(pol);
366 apply_workload_profile(struct policy *pol)
371 if (pol->pkt.workload == HIGH) {
372 for (count = 0; count < pol->pkt.num_vcpu; count++) {
373 if (pol->core_share[count].status != 1)
374 power_manager_scale_core_max(
375 pol->core_share[count].pcpu);
377 } else if (pol->pkt.workload == MEDIUM) {
378 for (count = 0; count < pol->pkt.num_vcpu; count++) {
379 if (pol->core_share[count].status != 1)
380 power_manager_scale_core_med(
381 pol->core_share[count].pcpu);
383 } else if (pol->pkt.workload == LOW) {
384 for (count = 0; count < pol->pkt.num_vcpu; count++) {
385 if (pol->core_share[count].status != 1)
386 power_manager_scale_core_min(
387 pol->core_share[count].pcpu);
393 apply_policy(struct policy *pol)
396 struct channel_packet *pkt = &pol->pkt;
398 /*Check policy to use*/
399 if (pkt->policy_to_use == TRAFFIC)
400 apply_traffic_profile(pol);
401 else if (pkt->policy_to_use == TIME)
402 apply_time_profile(pol);
403 else if (pkt->policy_to_use == WORKLOAD)
404 apply_workload_profile(pol);
408 process_request(struct channel_packet *pkt, struct channel_info *chan_info)
412 if (chan_info == NULL)
415 RTE_LOG(INFO, CHANNEL_MONITOR, "Processing Request %s\n", pkt->vm_name);
417 if (rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_CONNECTED,
418 CHANNEL_MGR_CHANNEL_PROCESSING) == 0)
421 if (pkt->command == CPU_POWER) {
422 core_mask = get_pcpus_mask(chan_info, pkt->resource_id);
423 if (core_mask == 0) {
425 * Core mask will be 0 in the case where
426 * hypervisor is not available so we're working in
427 * the host, so use the core as the mask.
429 core_mask = 1ULL << pkt->resource_id;
431 if (__builtin_popcountll(core_mask) == 1) {
433 unsigned core_num = __builtin_ffsll(core_mask) - 1;
436 case(CPU_POWER_SCALE_MIN):
437 power_manager_scale_core_min(core_num);
439 case(CPU_POWER_SCALE_MAX):
440 power_manager_scale_core_max(core_num);
442 case(CPU_POWER_SCALE_DOWN):
443 power_manager_scale_core_down(core_num);
445 case(CPU_POWER_SCALE_UP):
446 power_manager_scale_core_up(core_num);
448 case(CPU_POWER_ENABLE_TURBO):
449 power_manager_enable_turbo_core(core_num);
451 case(CPU_POWER_DISABLE_TURBO):
452 power_manager_disable_turbo_core(core_num);
459 case(CPU_POWER_SCALE_MIN):
460 power_manager_scale_mask_min(core_mask);
462 case(CPU_POWER_SCALE_MAX):
463 power_manager_scale_mask_max(core_mask);
465 case(CPU_POWER_SCALE_DOWN):
466 power_manager_scale_mask_down(core_mask);
468 case(CPU_POWER_SCALE_UP):
469 power_manager_scale_mask_up(core_mask);
471 case(CPU_POWER_ENABLE_TURBO):
472 power_manager_enable_turbo_mask(core_mask);
474 case(CPU_POWER_DISABLE_TURBO):
475 power_manager_disable_turbo_mask(core_mask);
484 if (pkt->command == PKT_POLICY) {
485 RTE_LOG(INFO, CHANNEL_MONITOR,
486 "\nProcessing Policy request\n");
491 if (pkt->command == PKT_POLICY_REMOVE) {
492 RTE_LOG(INFO, CHANNEL_MONITOR,
493 "Removing policy %s\n", pkt->vm_name);
498 * Return is not checked as channel status may have been set to DISABLED
499 * from management thread
501 rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING,
502 CHANNEL_MGR_CHANNEL_CONNECTED);
508 add_channel_to_monitor(struct channel_info **chan_info)
510 struct channel_info *info = *chan_info;
511 struct epoll_event event;
513 event.events = EPOLLIN;
514 event.data.ptr = info;
515 if (epoll_ctl(global_event_fd, EPOLL_CTL_ADD, info->fd, &event) < 0) {
516 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to add channel '%s' "
517 "to epoll\n", info->channel_path);
520 RTE_LOG(ERR, CHANNEL_MONITOR, "Added channel '%s' "
521 "to monitor\n", info->channel_path);
526 remove_channel_from_monitor(struct channel_info *chan_info)
528 if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL,
529 chan_info->fd, NULL) < 0) {
530 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' "
531 "from epoll\n", chan_info->channel_path);
538 channel_monitor_init(void)
540 global_event_fd = epoll_create1(0);
541 if (global_event_fd == 0) {
542 RTE_LOG(ERR, CHANNEL_MONITOR,
543 "Error creating epoll context with error %s\n",
547 global_events_list = rte_malloc("epoll_events",
548 sizeof(*global_events_list)
549 * MAX_EVENTS, RTE_CACHE_LINE_SIZE);
550 if (global_events_list == NULL) {
551 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for "
559 run_channel_monitor(void)
564 n_events = epoll_wait(global_event_fd, global_events_list,
568 for (i = 0; i < n_events; i++) {
569 struct channel_info *chan_info = (struct channel_info *)
570 global_events_list[i].data.ptr;
571 if ((global_events_list[i].events & EPOLLERR) ||
572 (global_events_list[i].events & EPOLLHUP)) {
573 RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for "
575 chan_info->channel_path);
576 remove_channel(&chan_info);
579 if (global_events_list[i].events & EPOLLIN) {
581 int n_bytes, err = 0;
582 struct channel_packet pkt;
584 int buffer_len = sizeof(pkt);
586 while (buffer_len > 0) {
587 n_bytes = read(chan_info->fd,
589 if (n_bytes == buffer_len)
593 RTE_LOG(DEBUG, CHANNEL_MONITOR,
595 "channel '%s' read: %s\n",
596 chan_info->channel_path,
598 remove_channel(&chan_info);
601 buffer = (char *)buffer + n_bytes;
602 buffer_len -= n_bytes;
605 process_request(&pkt, chan_info);
608 rte_delay_us(time_period_ms*1000);
612 for (j = 0; j < MAX_CLIENTS; j++) {
613 if (policies[j].enabled == 1)
614 apply_policy(&policies[j]);