4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #include <sys/types.h>
42 #include <sys/epoll.h>
43 #include <sys/queue.h>
47 #include <rte_memory.h>
48 #include <rte_malloc.h>
49 #include <rte_atomic.h>
50 #include <rte_cycles.h>
51 #include <rte_ethdev.h>
52 #include <rte_pmd_i40e.h>
54 #include <libvirt/libvirt.h>
55 #include "channel_monitor.h"
56 #include "channel_commands.h"
57 #include "channel_manager.h"
58 #include "power_manager.h"
60 #define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1
62 #define MAX_EVENTS 256
64 uint64_t vsi_pkt_count_prev[384];
65 uint64_t rdtsc_prev[384];
67 double time_period_s = 1;
68 static volatile unsigned run_loop = 1;
69 static int global_event_fd;
70 static unsigned int policy_is_set;
71 static struct epoll_event *global_events_list;
72 static struct policy policies[MAX_VMS];
74 void channel_monitor_exit(void)
77 rte_free(global_events_list);
81 core_share(int pNo, int z, int x, int t)
83 if (policies[pNo].core_share[z].pcpu == lvm_info[x].pcpus[t]) {
84 if (strcmp(policies[pNo].pkt.vm_name,
85 lvm_info[x].vm_name) != 0) {
86 policies[pNo].core_share[z].status = 1;
87 power_manager_scale_core_max(
88 policies[pNo].core_share[z].pcpu);
94 core_share_status(int pNo)
97 int noVms, noVcpus, z, x, t;
99 get_all_vm(&noVms, &noVcpus);
101 /* Reset Core Share Status. */
102 for (z = 0; z < noVcpus; z++)
103 policies[pNo].core_share[z].status = 0;
105 /* Foreach vcpu in a policy. */
106 for (z = 0; z < policies[pNo].pkt.num_vcpu; z++) {
107 /* Foreach VM on the platform. */
108 for (x = 0; x < noVms; x++) {
109 /* Foreach vcpu of VMs on platform. */
110 for (t = 0; t < lvm_info[x].num_cpus; t++)
111 core_share(pNo, z, x, t);
117 get_pcpu_to_control(struct policy *pol)
120 /* Convert vcpu to pcpu. */
125 RTE_LOG(INFO, CHANNEL_MONITOR, "Looking for pcpu for %s\n",
127 get_info_vm(pol->pkt.vm_name, &info);
129 for (count = 0; count < pol->pkt.num_vcpu; count++) {
130 mask_u64b = info.pcpu_mask[pol->pkt.vcpu_to_control[count]];
131 for (pcpu = 0; mask_u64b; mask_u64b &= ~(1ULL << pcpu++)) {
132 if ((mask_u64b >> pcpu) & 1)
133 pol->core_share[count].pcpu = pcpu;
139 get_pfid(struct policy *pol)
142 int i, x, ret = 0, nb_ports;
144 nb_ports = rte_eth_dev_count();
145 for (i = 0; i < pol->pkt.nb_mac_to_monitor; i++) {
147 for (x = 0; x < nb_ports; x++) {
148 ret = rte_pmd_i40e_query_vfid_by_mac(x,
149 (struct ether_addr *)&(pol->pkt.vfid[i]));
150 if (ret != -EINVAL) {
155 if (ret == -EINVAL || ret == -ENOTSUP || ret == ENODEV) {
156 RTE_LOG(INFO, CHANNEL_MONITOR,
157 "Error with Policy. MAC not found on "
168 update_policy(struct channel_packet *pkt)
171 unsigned int updated = 0;
174 for (i = 0; i < MAX_VMS; i++) {
175 if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
176 policies[i].pkt = *pkt;
177 get_pcpu_to_control(&policies[i]);
178 if (get_pfid(&policies[i]) == -1) {
182 core_share_status(i);
183 policies[i].enabled = 1;
188 for (i = 0; i < MAX_VMS; i++) {
189 if (policies[i].enabled == 0) {
190 policies[i].pkt = *pkt;
191 get_pcpu_to_control(&policies[i]);
192 if (get_pfid(&policies[i]) == -1)
194 core_share_status(i);
195 policies[i].enabled = 1;
204 get_pkt_diff(struct policy *pol)
207 uint64_t vsi_pkt_count,
209 vsi_pkt_count_prev_total = 0;
210 double rdtsc_curr, rdtsc_diff, diff;
212 struct rte_eth_stats vf_stats;
214 for (x = 0; x < pol->pkt.nb_mac_to_monitor; x++) {
217 if (rte_pmd_i40e_get_vf_stats(x, pol->pfid[x], &vf_stats) == 0)
218 vsi_pkt_count = vf_stats.ipackets;
222 vsi_pkt_total += vsi_pkt_count;
224 vsi_pkt_count_prev_total += vsi_pkt_count_prev[pol->pfid[x]];
225 vsi_pkt_count_prev[pol->pfid[x]] = vsi_pkt_count;
228 rdtsc_curr = rte_rdtsc_precise();
229 rdtsc_diff = rdtsc_curr - rdtsc_prev[pol->pfid[x-1]];
230 rdtsc_prev[pol->pfid[x-1]] = rdtsc_curr;
232 diff = (vsi_pkt_total - vsi_pkt_count_prev_total) *
233 ((double)rte_get_tsc_hz() / rdtsc_diff);
239 apply_traffic_profile(struct policy *pol)
245 diff = get_pkt_diff(pol);
247 RTE_LOG(INFO, CHANNEL_MONITOR, "Applying traffic profile\n");
249 if (diff >= (pol->pkt.traffic_policy.max_max_packet_thresh)) {
250 for (count = 0; count < pol->pkt.num_vcpu; count++) {
251 if (pol->core_share[count].status != 1)
252 power_manager_scale_core_max(
253 pol->core_share[count].pcpu);
255 } else if (diff >= (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
256 for (count = 0; count < pol->pkt.num_vcpu; count++) {
257 if (pol->core_share[count].status != 1)
258 power_manager_scale_core_med(
259 pol->core_share[count].pcpu);
261 } else if (diff < (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
262 for (count = 0; count < pol->pkt.num_vcpu; count++) {
263 if (pol->core_share[count].status != 1)
264 power_manager_scale_core_min(
265 pol->core_share[count].pcpu);
271 apply_time_profile(struct policy *pol)
277 char time_string[40];
279 /* Obtain the time of day, and convert it to a tm struct. */
280 gettimeofday(&tv, NULL);
281 ptm = localtime(&tv.tv_sec);
282 /* Format the date and time, down to a single second. */
283 strftime(time_string, sizeof(time_string), "%Y-%m-%d %H:%M:%S", ptm);
285 for (x = 0; x < HOURS; x++) {
287 if (ptm->tm_hour == pol->pkt.timer_policy.busy_hours[x]) {
288 for (count = 0; count < pol->pkt.num_vcpu; count++) {
289 if (pol->core_share[count].status != 1) {
290 power_manager_scale_core_max(
291 pol->core_share[count].pcpu);
292 RTE_LOG(INFO, CHANNEL_MONITOR,
293 "Scaling up core %d to max\n",
294 pol->core_share[count].pcpu);
298 } else if (ptm->tm_hour ==
299 pol->pkt.timer_policy.quiet_hours[x]) {
300 for (count = 0; count < pol->pkt.num_vcpu; count++) {
301 if (pol->core_share[count].status != 1) {
302 power_manager_scale_core_min(
303 pol->core_share[count].pcpu);
304 RTE_LOG(INFO, CHANNEL_MONITOR,
305 "Scaling down core %d to min\n",
306 pol->core_share[count].pcpu);
310 } else if (ptm->tm_hour ==
311 pol->pkt.timer_policy.hours_to_use_traffic_profile[x]) {
312 apply_traffic_profile(pol);
319 apply_workload_profile(struct policy *pol)
324 if (pol->pkt.workload == HIGH) {
325 for (count = 0; count < pol->pkt.num_vcpu; count++) {
326 if (pol->core_share[count].status != 1)
327 power_manager_scale_core_max(
328 pol->core_share[count].pcpu);
330 } else if (pol->pkt.workload == MEDIUM) {
331 for (count = 0; count < pol->pkt.num_vcpu; count++) {
332 if (pol->core_share[count].status != 1)
333 power_manager_scale_core_med(
334 pol->core_share[count].pcpu);
336 } else if (pol->pkt.workload == LOW) {
337 for (count = 0; count < pol->pkt.num_vcpu; count++) {
338 if (pol->core_share[count].status != 1)
339 power_manager_scale_core_min(
340 pol->core_share[count].pcpu);
346 apply_policy(struct policy *pol)
349 struct channel_packet *pkt = &pol->pkt;
351 /*Check policy to use*/
352 if (pkt->policy_to_use == TRAFFIC)
353 apply_traffic_profile(pol);
354 else if (pkt->policy_to_use == TIME)
355 apply_time_profile(pol);
356 else if (pkt->policy_to_use == WORKLOAD)
357 apply_workload_profile(pol);
362 process_request(struct channel_packet *pkt, struct channel_info *chan_info)
366 if (chan_info == NULL)
369 if (rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_CONNECTED,
370 CHANNEL_MGR_CHANNEL_PROCESSING) == 0)
373 if (pkt->command == CPU_POWER) {
374 core_mask = get_pcpus_mask(chan_info, pkt->resource_id);
375 if (core_mask == 0) {
376 RTE_LOG(ERR, CHANNEL_MONITOR, "Error get physical CPU mask for "
377 "channel '%s' using vCPU(%u)\n", chan_info->channel_path,
378 (unsigned)pkt->unit);
381 if (__builtin_popcountll(core_mask) == 1) {
383 unsigned core_num = __builtin_ffsll(core_mask) - 1;
386 case(CPU_POWER_SCALE_MIN):
387 power_manager_scale_core_min(core_num);
389 case(CPU_POWER_SCALE_MAX):
390 power_manager_scale_core_max(core_num);
392 case(CPU_POWER_SCALE_DOWN):
393 power_manager_scale_core_down(core_num);
395 case(CPU_POWER_SCALE_UP):
396 power_manager_scale_core_up(core_num);
398 case(CPU_POWER_ENABLE_TURBO):
399 power_manager_enable_turbo_core(core_num);
401 case(CPU_POWER_DISABLE_TURBO):
402 power_manager_disable_turbo_core(core_num);
409 case(CPU_POWER_SCALE_MIN):
410 power_manager_scale_mask_min(core_mask);
412 case(CPU_POWER_SCALE_MAX):
413 power_manager_scale_mask_max(core_mask);
415 case(CPU_POWER_SCALE_DOWN):
416 power_manager_scale_mask_down(core_mask);
418 case(CPU_POWER_SCALE_UP):
419 power_manager_scale_mask_up(core_mask);
421 case(CPU_POWER_ENABLE_TURBO):
422 power_manager_enable_turbo_mask(core_mask);
424 case(CPU_POWER_DISABLE_TURBO):
425 power_manager_disable_turbo_mask(core_mask);
434 if (pkt->command == PKT_POLICY) {
435 RTE_LOG(INFO, CHANNEL_MONITOR, "\nProcessing Policy request from Guest\n");
440 /* Return is not checked as channel status may have been set to DISABLED
441 * from management thread
443 rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING,
444 CHANNEL_MGR_CHANNEL_CONNECTED);
450 add_channel_to_monitor(struct channel_info **chan_info)
452 struct channel_info *info = *chan_info;
453 struct epoll_event event;
455 event.events = EPOLLIN;
456 event.data.ptr = info;
457 if (epoll_ctl(global_event_fd, EPOLL_CTL_ADD, info->fd, &event) < 0) {
458 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to add channel '%s' "
459 "to epoll\n", info->channel_path);
466 remove_channel_from_monitor(struct channel_info *chan_info)
468 if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL, chan_info->fd, NULL) < 0) {
469 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' "
470 "from epoll\n", chan_info->channel_path);
477 channel_monitor_init(void)
479 global_event_fd = epoll_create1(0);
480 if (global_event_fd == 0) {
481 RTE_LOG(ERR, CHANNEL_MONITOR, "Error creating epoll context with "
482 "error %s\n", strerror(errno));
485 global_events_list = rte_malloc("epoll_events", sizeof(*global_events_list)
486 * MAX_EVENTS, RTE_CACHE_LINE_SIZE);
487 if (global_events_list == NULL) {
488 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for "
496 run_channel_monitor(void)
501 n_events = epoll_wait(global_event_fd, global_events_list,
505 for (i = 0; i < n_events; i++) {
506 struct channel_info *chan_info = (struct channel_info *)
507 global_events_list[i].data.ptr;
508 if ((global_events_list[i].events & EPOLLERR) ||
509 (global_events_list[i].events & EPOLLHUP)) {
510 RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for "
512 chan_info->channel_path);
513 remove_channel(&chan_info);
516 if (global_events_list[i].events & EPOLLIN) {
518 int n_bytes, err = 0;
519 struct channel_packet pkt;
521 int buffer_len = sizeof(pkt);
523 while (buffer_len > 0) {
524 n_bytes = read(chan_info->fd,
526 if (n_bytes == buffer_len)
530 RTE_LOG(DEBUG, CHANNEL_MONITOR,
532 "channel '%s' read: %s\n",
533 chan_info->channel_path,
535 remove_channel(&chan_info);
538 buffer = (char *)buffer + n_bytes;
539 buffer_len -= n_bytes;
542 process_request(&pkt, chan_info);
545 rte_delay_us(time_period_s*1000000);
549 for (j = 0; j < MAX_VMS; j++) {
550 if (policies[j].enabled == 1)
551 apply_policy(&policies[j]);