1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2018 Intel Corporation
8 #include <rte_cycles.h>
9 #include <rte_atomic.h>
10 #include <rte_malloc.h>
13 #include "rte_power.h"
14 #include "rte_power_empty_poll.h"
16 #define INTERVALS_PER_SECOND 100 /* (10ms) */
17 #define SECONDS_TO_TRAIN_FOR 2
18 #define DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD 70
19 #define DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD 30
20 #define DEFAULT_CYCLES_PER_PACKET 800
22 static struct ep_params *ep_params;
23 static uint32_t med_to_high_threshold = DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD;
24 static uint32_t high_to_med_threshold = DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD;
26 static uint32_t avail_freqs[RTE_MAX_LCORE][NUM_FREQS];
28 static uint32_t total_avail_freqs[RTE_MAX_LCORE];
30 static uint32_t freq_index[NUM_FREQ];
33 get_freq_index(enum freq_val index)
35 return freq_index[index];
40 set_power_freq(int lcore_id, enum freq_val freq, bool specific_freq)
43 uint32_t power_freq_index;
45 power_freq_index = get_freq_index(freq);
47 power_freq_index = freq;
49 err = rte_power_set_freq(lcore_id, power_freq_index);
55 static inline void __attribute__((always_inline))
56 exit_training_state(struct priority_worker *poll_stats)
58 RTE_SET_USED(poll_stats);
61 static inline void __attribute__((always_inline))
62 enter_training_state(struct priority_worker *poll_stats)
64 poll_stats->iter_counter = 0;
65 poll_stats->cur_freq = LOW;
66 poll_stats->queue_state = TRAINING;
69 static inline void __attribute__((always_inline))
70 enter_normal_state(struct priority_worker *poll_stats)
72 /* Clear the averages arrays and strs */
73 memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
75 memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
78 poll_stats->cur_freq = MED;
79 poll_stats->iter_counter = 0;
80 poll_stats->threshold_ctr = 0;
81 poll_stats->queue_state = MED_NORMAL;
82 RTE_LOG(INFO, POWER, "Set the power freq to MED\n");
83 set_power_freq(poll_stats->lcore_id, MED, false);
85 poll_stats->thresh[MED].threshold_percent = med_to_high_threshold;
86 poll_stats->thresh[HGH].threshold_percent = high_to_med_threshold;
89 static inline void __attribute__((always_inline))
90 enter_busy_state(struct priority_worker *poll_stats)
92 memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
94 memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
97 poll_stats->cur_freq = HGH;
98 poll_stats->iter_counter = 0;
99 poll_stats->threshold_ctr = 0;
100 poll_stats->queue_state = HGH_BUSY;
101 set_power_freq(poll_stats->lcore_id, HGH, false);
104 static inline void __attribute__((always_inline))
105 enter_purge_state(struct priority_worker *poll_stats)
107 poll_stats->iter_counter = 0;
108 poll_stats->queue_state = LOW_PURGE;
111 static inline void __attribute__((always_inline))
112 set_state(struct priority_worker *poll_stats,
113 enum queue_state new_state)
115 enum queue_state old_state = poll_stats->queue_state;
116 if (old_state != new_state) {
118 /* Call any old state exit functions */
119 if (old_state == TRAINING)
120 exit_training_state(poll_stats);
122 /* Call any new state entry functions */
123 if (new_state == TRAINING)
124 enter_training_state(poll_stats);
125 if (new_state == MED_NORMAL)
126 enter_normal_state(poll_stats);
127 if (new_state == HGH_BUSY)
128 enter_busy_state(poll_stats);
129 if (new_state == LOW_PURGE)
130 enter_purge_state(poll_stats);
134 static inline void __attribute__((always_inline))
135 set_policy(struct priority_worker *poll_stats,
136 struct ep_policy *policy)
138 set_state(poll_stats, policy->state);
140 if (policy->state == TRAINING)
143 poll_stats->thresh[MED_NORMAL].base_edpi = policy->med_base_edpi;
144 poll_stats->thresh[HGH_BUSY].base_edpi = policy->hgh_base_edpi;
146 poll_stats->thresh[MED_NORMAL].trained = true;
147 poll_stats->thresh[HGH_BUSY].trained = true;
152 update_training_stats(struct priority_worker *poll_stats,
155 uint32_t max_train_iter)
157 RTE_SET_USED(specific_freq);
160 uint64_t p0_empty_deq;
162 sprintf(pfi_str, "%02d", freq);
164 if (poll_stats->cur_freq == freq &&
165 poll_stats->thresh[freq].trained == false) {
166 if (poll_stats->thresh[freq].cur_train_iter == 0) {
168 set_power_freq(poll_stats->lcore_id,
169 freq, specific_freq);
171 poll_stats->empty_dequeues_prev =
172 poll_stats->empty_dequeues;
174 poll_stats->thresh[freq].cur_train_iter++;
177 } else if (poll_stats->thresh[freq].cur_train_iter
180 p0_empty_deq = poll_stats->empty_dequeues -
181 poll_stats->empty_dequeues_prev;
183 poll_stats->empty_dequeues_prev =
184 poll_stats->empty_dequeues;
186 poll_stats->thresh[freq].base_edpi += p0_empty_deq;
187 poll_stats->thresh[freq].cur_train_iter++;
190 if (poll_stats->thresh[freq].trained == false) {
191 poll_stats->thresh[freq].base_edpi =
192 poll_stats->thresh[freq].base_edpi /
195 /* Add on a factor of 0.05%
196 * this should remove any
197 * false negatives when the system is 0% busy
199 poll_stats->thresh[freq].base_edpi +=
200 poll_stats->thresh[freq].base_edpi / 2000;
202 poll_stats->thresh[freq].trained = true;
203 poll_stats->cur_freq++;
210 static inline uint32_t __attribute__((always_inline))
211 update_stats(struct priority_worker *poll_stats)
213 uint64_t tot_edpi = 0, tot_ppi = 0;
216 struct priority_worker *s = poll_stats;
218 uint64_t cur_edpi = s->empty_dequeues - s->empty_dequeues_prev;
220 s->empty_dequeues_prev = s->empty_dequeues;
222 uint64_t ppi = s->num_dequeue_pkts - s->num_dequeue_pkts_prev;
224 s->num_dequeue_pkts_prev = s->num_dequeue_pkts;
226 if (s->thresh[s->cur_freq].base_edpi < cur_edpi) {
228 /* edpi mean empty poll counter difference per interval */
229 RTE_LOG(DEBUG, POWER, "cur_edpi is too large "
230 "cur edpi %"PRId64" "
231 "base edpi %"PRId64"\n",
233 s->thresh[s->cur_freq].base_edpi);
234 /* Value to make us fail need debug log*/
238 s->edpi_av[s->ec++ % BINS_AV] = cur_edpi;
239 s->ppi_av[s->pc++ % BINS_AV] = ppi;
241 for (j = 0; j < BINS_AV; j++) {
242 tot_edpi += s->edpi_av[j];
243 tot_ppi += s->ppi_av[j];
246 tot_edpi = tot_edpi / BINS_AV;
248 percent = 100 - (uint32_t)(((float)tot_edpi /
249 (float)s->thresh[s->cur_freq].base_edpi) * 100);
251 return (uint32_t)percent;
255 static inline void __attribute__((always_inline))
256 update_stats_normal(struct priority_worker *poll_stats)
260 if (poll_stats->thresh[poll_stats->cur_freq].base_edpi == 0) {
262 enum freq_val cur_freq = poll_stats->cur_freq;
264 /* edpi mean empty poll counter difference per interval */
265 RTE_LOG(DEBUG, POWER, "cure freq is %d, edpi is %"PRIu64"\n",
267 poll_stats->thresh[cur_freq].base_edpi);
271 percent = update_stats(poll_stats);
274 /* edpi mean empty poll counter difference per interval */
275 RTE_LOG(DEBUG, POWER, "Edpi is bigger than threshold\n");
279 if (poll_stats->cur_freq == LOW)
280 RTE_LOG(INFO, POWER, "Purge Mode is not currently supported\n");
281 else if (poll_stats->cur_freq == MED) {
284 poll_stats->thresh[MED].threshold_percent) {
286 if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
287 poll_stats->threshold_ctr++;
289 set_state(poll_stats, HGH_BUSY);
290 RTE_LOG(INFO, POWER, "MOVE to HGH\n");
295 poll_stats->threshold_ctr = 0;
298 } else if (poll_stats->cur_freq == HGH) {
301 poll_stats->thresh[HGH].threshold_percent) {
303 if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
304 poll_stats->threshold_ctr++;
306 set_state(poll_stats, MED_NORMAL);
307 RTE_LOG(INFO, POWER, "MOVE to MED\n");
311 poll_stats->threshold_ctr = 0;
318 empty_poll_training(struct priority_worker *poll_stats,
319 uint32_t max_train_iter)
322 if (poll_stats->iter_counter < INTERVALS_PER_SECOND) {
323 poll_stats->iter_counter++;
328 update_training_stats(poll_stats,
333 update_training_stats(poll_stats,
338 update_training_stats(poll_stats,
344 if (poll_stats->thresh[LOW].trained == true
345 && poll_stats->thresh[MED].trained == true
346 && poll_stats->thresh[HGH].trained == true) {
348 set_state(poll_stats, MED_NORMAL);
350 RTE_LOG(INFO, POWER, "LOW threshold is %"PRIu64"\n",
351 poll_stats->thresh[LOW].base_edpi);
353 RTE_LOG(INFO, POWER, "MED threshold is %"PRIu64"\n",
354 poll_stats->thresh[MED].base_edpi);
357 RTE_LOG(INFO, POWER, "HIGH threshold is %"PRIu64"\n",
358 poll_stats->thresh[HGH].base_edpi);
360 RTE_LOG(INFO, POWER, "Training is Complete for %d\n",
361 poll_stats->lcore_id);
367 void __rte_experimental
368 rte_empty_poll_detection(struct rte_timer *tim, void *arg)
373 struct priority_worker *poll_stats;
379 for (i = 0; i < NUM_NODES; i++) {
381 poll_stats = &(ep_params->wrk_data.wrk_stats[i]);
383 if (rte_lcore_is_enabled(poll_stats->lcore_id) == 0)
386 switch (poll_stats->queue_state) {
388 empty_poll_training(poll_stats,
389 ep_params->max_train_iter);
394 update_stats_normal(poll_stats);
408 int __rte_experimental
409 rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb,
410 struct ep_policy *policy)
413 /* Allocate the ep_params structure */
414 ep_params = rte_zmalloc_socket(NULL,
415 sizeof(struct ep_params),
422 if (freq_tlb == NULL) {
423 freq_index[LOW] = 14;
427 freq_index[LOW] = freq_tlb[LOW];
428 freq_index[MED] = freq_tlb[MED];
429 freq_index[HGH] = freq_tlb[HGH];
432 RTE_LOG(INFO, POWER, "Initialize the Empty Poll\n");
434 /* Train for pre-defined period */
435 ep_params->max_train_iter = INTERVALS_PER_SECOND * SECONDS_TO_TRAIN_FOR;
437 struct stats_data *w = &ep_params->wrk_data;
441 /* initialize all wrk_stats state */
442 for (i = 0; i < NUM_NODES; i++) {
444 if (rte_lcore_is_enabled(i) == 0)
446 /*init the freqs table */
447 total_avail_freqs[i] = rte_power_freqs(i,
451 RTE_LOG(INFO, POWER, "total avail freq is %d , lcoreid %d\n",
452 total_avail_freqs[i],
455 if (get_freq_index(LOW) > total_avail_freqs[i])
458 if (rte_get_master_lcore() != i) {
459 w->wrk_stats[i].lcore_id = i;
460 set_policy(&w->wrk_stats[i], policy);
467 void __rte_experimental
468 rte_power_empty_poll_stat_free(void)
471 RTE_LOG(INFO, POWER, "Close the Empty Poll\n");
473 if (ep_params != NULL)
477 int __rte_experimental
478 rte_power_empty_poll_stat_update(unsigned int lcore_id)
480 struct priority_worker *poll_stats;
482 if (lcore_id >= NUM_NODES)
485 poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
487 if (poll_stats->lcore_id == 0)
488 poll_stats->lcore_id = lcore_id;
490 poll_stats->empty_dequeues++;
495 int __rte_experimental
496 rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt)
499 struct priority_worker *poll_stats;
501 if (lcore_id >= NUM_NODES)
504 poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
506 if (poll_stats->lcore_id == 0)
507 poll_stats->lcore_id = lcore_id;
509 poll_stats->num_dequeue_pkts += nb_pkt;
515 uint64_t __rte_experimental
516 rte_power_empty_poll_stat_fetch(unsigned int lcore_id)
518 struct priority_worker *poll_stats;
520 if (lcore_id >= NUM_NODES)
523 poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
525 if (poll_stats->lcore_id == 0)
526 poll_stats->lcore_id = lcore_id;
528 return poll_stats->empty_dequeues;
531 uint64_t __rte_experimental
532 rte_power_poll_stat_fetch(unsigned int lcore_id)
534 struct priority_worker *poll_stats;
536 if (lcore_id >= NUM_NODES)
539 poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
541 if (poll_stats->lcore_id == 0)
542 poll_stats->lcore_id = lcore_id;
544 return poll_stats->num_dequeue_pkts;