examples/vm_power: allow managing idle cores
[dpdk.git] / examples / vm_power_manager / oob_monitor_x86.c
index 62d503c..a5b1c16 100644 (file)
@@ -22,7 +22,6 @@ void branch_monitor_exit(void)
 /* Number of microseconds between each poll */
 #define INTERVAL 100
 #define PRINT_LOOP_COUNT (1000000/INTERVAL)
-#define RATIO_THRESHOLD 0.03
 #define IA32_PERFEVTSEL0 0x186
 #define IA32_PERFEVTSEL1 0x187
 #define IA32_PERFCTR0 0xc1
@@ -34,12 +33,13 @@ static float
 apply_policy(int core)
 {
        struct core_info *ci;
-       uint64_t counter;
+       uint64_t counter = 0;
        uint64_t branches, branch_misses;
-       uint32_t last_branches, last_branch_misses;
-       int hits_diff, miss_diff;
+       uint64_t last_branches, last_branch_misses;
+       int64_t hits_diff, miss_diff;
        float ratio;
        int ret;
+       int freq_window_idx, up_count = 0, i;
 
        g_active = 0;
        ci = get_core_info();
@@ -55,6 +55,7 @@ apply_policy(int core)
                                core);
        branches = counter;
 
+       counter = 0;
        ret = pread(ci->cd[core].msr_fd, &counter,
                        sizeof(counter), IA32_PERFCTR1);
        if (ret < 0)
@@ -67,13 +68,25 @@ apply_policy(int core)
        ci->cd[core].last_branches = branches;
        ci->cd[core].last_branch_misses = branch_misses;
 
-       hits_diff = (int)branches - (int)last_branches;
+       /*
+        * Intentional right shift to make MSB 0 to avoid
+        * possible signed overflow or truncation.
+        */
+       branches >>= 1;
+       last_branches >>= 1;
+       hits_diff = (int64_t)branches - (int64_t)last_branches;
        if (hits_diff <= 0) {
                /* Likely a counter overflow condition, skip this round */
                return -1.0;
        }
 
-       miss_diff = (int)branch_misses - (int)last_branch_misses;
+       /*
+        * Intentional right shift to make MSB 0 to avoid
+        * possible signed overflow or truncation.
+        */
+       branch_misses >>= 1;
+       last_branch_misses >>= 1;
+       miss_diff = (int64_t)branch_misses - (int64_t)last_branch_misses;
        if (miss_diff <= 0) {
                /* Likely a counter overflow condition, skip this round */
                return -1.0;
@@ -83,16 +96,43 @@ apply_policy(int core)
        g_branch_misses = miss_diff;
 
        if (hits_diff < (INTERVAL*100)) {
-               /* Likely no workload running on this core. Skip. */
-               return -1.0;
+               /* Likely no workload running on this core. */
+               ratio = 0.0;
+       } else {
+               ratio = (float)miss_diff * (float)100 / (float)hits_diff;
        }
 
-       ratio = (float)miss_diff * (float)100 / (float)hits_diff;
-
-       if (ratio < RATIO_THRESHOLD)
-               power_manager_scale_core_min(core);
+       /*
+        * Store the last few directions that the ratio indicates
+        * we should take. If there's on 'up', then we scale up
+        * quickly. If all indicate 'down', only then do we scale
+        * down. Each core_details struct has it's own array.
+        */
+       freq_window_idx = ci->cd[core].freq_window_idx;
+       if (ratio > ci->cd[core].branch_ratio_threshold)
+               ci->cd[core].freq_directions[freq_window_idx] = 1;
        else
-               power_manager_scale_core_max(core);
+               ci->cd[core].freq_directions[freq_window_idx] = 0;
+
+       freq_window_idx++;
+       freq_window_idx = freq_window_idx & (FREQ_WINDOW_SIZE-1);
+       ci->cd[core].freq_window_idx = freq_window_idx;
+
+       up_count = 0;
+       for (i = 0; i < FREQ_WINDOW_SIZE; i++)
+               up_count +=  ci->cd[core].freq_directions[i];
+
+       if (up_count == 0) {
+               if (ci->cd[core].freq_state != FREQ_MIN) {
+                       power_manager_scale_core_min(core);
+                       ci->cd[core].freq_state = FREQ_MIN;
+               }
+       } else {
+               if (ci->cd[core].freq_state != FREQ_MAX) {
+                       power_manager_scale_core_max(core);
+                       ci->cd[core].freq_state = FREQ_MAX;
+               }
+       }
 
        g_active = 1;
        return ratio;