4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #include <sys/types.h>
42 #include <sys/epoll.h>
43 #include <sys/queue.h>
47 #include <rte_memory.h>
48 #include <rte_malloc.h>
49 #include <rte_atomic.h>
50 #include <rte_cycles.h>
51 #include <rte_ethdev.h>
52 #include <rte_pmd_i40e.h>
54 #include <libvirt/libvirt.h>
55 #include "channel_monitor.h"
56 #include "channel_commands.h"
57 #include "channel_manager.h"
58 #include "power_manager.h"
60 #define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1
62 #define MAX_EVENTS 256
64 uint64_t vsi_pkt_count_prev[384];
65 uint64_t rdtsc_prev[384];
67 double time_period_s = 1;
68 static volatile unsigned run_loop = 1;
69 static int global_event_fd;
70 static unsigned int policy_is_set;
71 static struct epoll_event *global_events_list;
72 static struct policy policies[MAX_VMS];
74 void channel_monitor_exit(void)
77 rte_free(global_events_list);
81 core_share(int pNo, int z, int x, int t)
83 if (policies[pNo].core_share[z].pcpu == lvm_info[x].pcpus[t]) {
84 if (strcmp(policies[pNo].pkt.vm_name,
85 lvm_info[x].vm_name) != 0) {
86 policies[pNo].core_share[z].status = 1;
87 power_manager_scale_core_max(
88 policies[pNo].core_share[z].pcpu);
94 core_share_status(int pNo)
97 int noVms, noVcpus, z, x, t;
99 get_all_vm(&noVms, &noVcpus);
101 /* Reset Core Share Status. */
102 for (z = 0; z < noVcpus; z++)
103 policies[pNo].core_share[z].status = 0;
105 /* Foreach vcpu in a policy. */
106 for (z = 0; z < policies[pNo].pkt.num_vcpu; z++) {
107 /* Foreach VM on the platform. */
108 for (x = 0; x < noVms; x++) {
109 /* Foreach vcpu of VMs on platform. */
110 for (t = 0; t < lvm_info[x].num_cpus; t++)
111 core_share(pNo, z, x, t);
117 get_pcpu_to_control(struct policy *pol)
120 /* Convert vcpu to pcpu. */
125 RTE_LOG(INFO, CHANNEL_MONITOR, "Looking for pcpu for %s\n",
127 get_info_vm(pol->pkt.vm_name, &info);
129 for (count = 0; count < pol->pkt.num_vcpu; count++) {
130 mask_u64b = info.pcpu_mask[pol->pkt.vcpu_to_control[count]];
131 for (pcpu = 0; mask_u64b; mask_u64b &= ~(1ULL << pcpu++)) {
132 if ((mask_u64b >> pcpu) & 1)
133 pol->core_share[count].pcpu = pcpu;
139 get_pfid(struct policy *pol)
142 int i, x, ret = 0, nb_ports;
144 nb_ports = rte_eth_dev_count();
145 for (i = 0; i < pol->pkt.nb_mac_to_monitor; i++) {
147 for (x = 0; x < nb_ports; x++) {
148 ret = rte_pmd_i40e_query_vfid_by_mac(x,
149 (struct ether_addr *)&(pol->pkt.vfid[i]));
150 if (ret != -EINVAL) {
155 if (ret == -EINVAL || ret == -ENOTSUP || ret == ENODEV) {
156 RTE_LOG(INFO, CHANNEL_MONITOR,
157 "Error with Policy. MAC not found on "
168 update_policy(struct channel_packet *pkt)
171 unsigned int updated = 0;
173 for (int i = 0; i < MAX_VMS; i++) {
174 if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
175 policies[i].pkt = *pkt;
176 get_pcpu_to_control(&policies[i]);
177 if (get_pfid(&policies[i]) == -1) {
181 core_share_status(i);
182 policies[i].enabled = 1;
187 for (int i = 0; i < MAX_VMS; i++) {
188 if (policies[i].enabled == 0) {
189 policies[i].pkt = *pkt;
190 get_pcpu_to_control(&policies[i]);
191 if (get_pfid(&policies[i]) == -1)
193 core_share_status(i);
194 policies[i].enabled = 1;
203 get_pkt_diff(struct policy *pol)
206 uint64_t vsi_pkt_count,
208 vsi_pkt_count_prev_total = 0;
209 double rdtsc_curr, rdtsc_diff, diff;
211 struct rte_eth_stats vf_stats;
213 for (x = 0; x < pol->pkt.nb_mac_to_monitor; x++) {
216 if (rte_pmd_i40e_get_vf_stats(x, pol->pfid[x], &vf_stats) == 0)
217 vsi_pkt_count = vf_stats.ipackets;
221 vsi_pkt_total += vsi_pkt_count;
223 vsi_pkt_count_prev_total += vsi_pkt_count_prev[pol->pfid[x]];
224 vsi_pkt_count_prev[pol->pfid[x]] = vsi_pkt_count;
227 rdtsc_curr = rte_rdtsc_precise();
228 rdtsc_diff = rdtsc_curr - rdtsc_prev[pol->pfid[x-1]];
229 rdtsc_prev[pol->pfid[x-1]] = rdtsc_curr;
231 diff = (vsi_pkt_total - vsi_pkt_count_prev_total) *
232 ((double)rte_get_tsc_hz() / rdtsc_diff);
238 apply_traffic_profile(struct policy *pol)
244 diff = get_pkt_diff(pol);
246 RTE_LOG(INFO, CHANNEL_MONITOR, "Applying traffic profile\n");
248 if (diff >= (pol->pkt.traffic_policy.max_max_packet_thresh)) {
249 for (count = 0; count < pol->pkt.num_vcpu; count++) {
250 if (pol->core_share[count].status != 1)
251 power_manager_scale_core_max(
252 pol->core_share[count].pcpu);
254 } else if (diff >= (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
255 for (count = 0; count < pol->pkt.num_vcpu; count++) {
256 if (pol->core_share[count].status != 1)
257 power_manager_scale_core_med(
258 pol->core_share[count].pcpu);
260 } else if (diff < (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
261 for (count = 0; count < pol->pkt.num_vcpu; count++) {
262 if (pol->core_share[count].status != 1)
263 power_manager_scale_core_min(
264 pol->core_share[count].pcpu);
270 apply_time_profile(struct policy *pol)
276 char time_string[40];
278 /* Obtain the time of day, and convert it to a tm struct. */
279 gettimeofday(&tv, NULL);
280 ptm = localtime(&tv.tv_sec);
281 /* Format the date and time, down to a single second. */
282 strftime(time_string, sizeof(time_string), "%Y-%m-%d %H:%M:%S", ptm);
284 for (x = 0; x < HOURS; x++) {
286 if (ptm->tm_hour == pol->pkt.timer_policy.busy_hours[x]) {
287 for (count = 0; count < pol->pkt.num_vcpu; count++) {
288 if (pol->core_share[count].status != 1) {
289 power_manager_scale_core_max(
290 pol->core_share[count].pcpu);
291 RTE_LOG(INFO, CHANNEL_MONITOR,
292 "Scaling up core %d to max\n",
293 pol->core_share[count].pcpu);
297 } else if (ptm->tm_hour ==
298 pol->pkt.timer_policy.quiet_hours[x]) {
299 for (count = 0; count < pol->pkt.num_vcpu; count++) {
300 if (pol->core_share[count].status != 1) {
301 power_manager_scale_core_min(
302 pol->core_share[count].pcpu);
303 RTE_LOG(INFO, CHANNEL_MONITOR,
304 "Scaling down core %d to min\n",
305 pol->core_share[count].pcpu);
309 } else if (ptm->tm_hour ==
310 pol->pkt.timer_policy.hours_to_use_traffic_profile[x]) {
311 apply_traffic_profile(pol);
318 apply_workload_profile(struct policy *pol)
323 if (pol->pkt.workload == HIGH) {
324 for (count = 0; count < pol->pkt.num_vcpu; count++) {
325 if (pol->core_share[count].status != 1)
326 power_manager_scale_core_max(
327 pol->core_share[count].pcpu);
329 } else if (pol->pkt.workload == MEDIUM) {
330 for (count = 0; count < pol->pkt.num_vcpu; count++) {
331 if (pol->core_share[count].status != 1)
332 power_manager_scale_core_med(
333 pol->core_share[count].pcpu);
335 } else if (pol->pkt.workload == LOW) {
336 for (count = 0; count < pol->pkt.num_vcpu; count++) {
337 if (pol->core_share[count].status != 1)
338 power_manager_scale_core_min(
339 pol->core_share[count].pcpu);
345 apply_policy(struct policy *pol)
348 struct channel_packet *pkt = &pol->pkt;
350 /*Check policy to use*/
351 if (pkt->policy_to_use == TRAFFIC)
352 apply_traffic_profile(pol);
353 else if (pkt->policy_to_use == TIME)
354 apply_time_profile(pol);
355 else if (pkt->policy_to_use == WORKLOAD)
356 apply_workload_profile(pol);
361 process_request(struct channel_packet *pkt, struct channel_info *chan_info)
365 if (chan_info == NULL)
368 if (rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_CONNECTED,
369 CHANNEL_MGR_CHANNEL_PROCESSING) == 0)
372 if (pkt->command == CPU_POWER) {
373 core_mask = get_pcpus_mask(chan_info, pkt->resource_id);
374 if (core_mask == 0) {
375 RTE_LOG(ERR, CHANNEL_MONITOR, "Error get physical CPU mask for "
376 "channel '%s' using vCPU(%u)\n", chan_info->channel_path,
377 (unsigned)pkt->unit);
380 if (__builtin_popcountll(core_mask) == 1) {
382 unsigned core_num = __builtin_ffsll(core_mask) - 1;
385 case(CPU_POWER_SCALE_MIN):
386 power_manager_scale_core_min(core_num);
388 case(CPU_POWER_SCALE_MAX):
389 power_manager_scale_core_max(core_num);
391 case(CPU_POWER_SCALE_DOWN):
392 power_manager_scale_core_down(core_num);
394 case(CPU_POWER_SCALE_UP):
395 power_manager_scale_core_up(core_num);
397 case(CPU_POWER_ENABLE_TURBO):
398 power_manager_enable_turbo_core(core_num);
400 case(CPU_POWER_DISABLE_TURBO):
401 power_manager_disable_turbo_core(core_num);
408 case(CPU_POWER_SCALE_MIN):
409 power_manager_scale_mask_min(core_mask);
411 case(CPU_POWER_SCALE_MAX):
412 power_manager_scale_mask_max(core_mask);
414 case(CPU_POWER_SCALE_DOWN):
415 power_manager_scale_mask_down(core_mask);
417 case(CPU_POWER_SCALE_UP):
418 power_manager_scale_mask_up(core_mask);
420 case(CPU_POWER_ENABLE_TURBO):
421 power_manager_enable_turbo_mask(core_mask);
423 case(CPU_POWER_DISABLE_TURBO):
424 power_manager_disable_turbo_mask(core_mask);
433 if (pkt->command == PKT_POLICY) {
434 RTE_LOG(INFO, CHANNEL_MONITOR, "\nProcessing Policy request from Guest\n");
439 /* Return is not checked as channel status may have been set to DISABLED
440 * from management thread
442 rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING,
443 CHANNEL_MGR_CHANNEL_CONNECTED);
449 add_channel_to_monitor(struct channel_info **chan_info)
451 struct channel_info *info = *chan_info;
452 struct epoll_event event;
454 event.events = EPOLLIN;
455 event.data.ptr = info;
456 if (epoll_ctl(global_event_fd, EPOLL_CTL_ADD, info->fd, &event) < 0) {
457 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to add channel '%s' "
458 "to epoll\n", info->channel_path);
465 remove_channel_from_monitor(struct channel_info *chan_info)
467 if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL, chan_info->fd, NULL) < 0) {
468 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' "
469 "from epoll\n", chan_info->channel_path);
476 channel_monitor_init(void)
478 global_event_fd = epoll_create1(0);
479 if (global_event_fd == 0) {
480 RTE_LOG(ERR, CHANNEL_MONITOR, "Error creating epoll context with "
481 "error %s\n", strerror(errno));
484 global_events_list = rte_malloc("epoll_events", sizeof(*global_events_list)
485 * MAX_EVENTS, RTE_CACHE_LINE_SIZE);
486 if (global_events_list == NULL) {
487 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for "
495 run_channel_monitor(void)
500 n_events = epoll_wait(global_event_fd, global_events_list,
504 for (i = 0; i < n_events; i++) {
505 struct channel_info *chan_info = (struct channel_info *)
506 global_events_list[i].data.ptr;
507 if ((global_events_list[i].events & EPOLLERR) ||
508 (global_events_list[i].events & EPOLLHUP)) {
509 RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for "
511 chan_info->channel_path);
512 remove_channel(&chan_info);
515 if (global_events_list[i].events & EPOLLIN) {
517 int n_bytes, err = 0;
518 struct channel_packet pkt;
520 int buffer_len = sizeof(pkt);
522 while (buffer_len > 0) {
523 n_bytes = read(chan_info->fd,
525 if (n_bytes == buffer_len)
529 RTE_LOG(DEBUG, CHANNEL_MONITOR,
531 "channel '%s' read: %s\n",
532 chan_info->channel_path,
534 remove_channel(&chan_info);
537 buffer = (char *)buffer + n_bytes;
538 buffer_len -= n_bytes;
541 process_request(&pkt, chan_info);
544 rte_delay_us(time_period_s*1000000);
546 for (int j = 0; j < MAX_VMS; j++) {
547 if (policies[j].enabled == 1)
548 apply_policy(&policies[j]);