1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2018 Intel Corporation
8 #include <rte_cycles.h>
9 #include <rte_atomic.h>
10 #include <rte_malloc.h>
12 #include "rte_power.h"
13 #include "rte_power_empty_poll.h"
15 #define INTERVALS_PER_SECOND 100 /* (10ms) */
16 #define SECONDS_TO_TRAIN_FOR 2
17 #define DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD 70
18 #define DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD 30
19 #define DEFAULT_CYCLES_PER_PACKET 800
21 static struct ep_params *ep_params;
22 static uint32_t med_to_high_threshold = DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD;
23 static uint32_t high_to_med_threshold = DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD;
25 static uint32_t avail_freqs[RTE_MAX_LCORE][NUM_FREQS];
27 static uint32_t total_avail_freqs[RTE_MAX_LCORE];
29 static uint32_t freq_index[NUM_FREQ];
32 get_freq_index(enum freq_val index)
34 return freq_index[index];
39 set_power_freq(int lcore_id, enum freq_val freq, bool specific_freq)
42 uint32_t power_freq_index;
44 power_freq_index = get_freq_index(freq);
46 power_freq_index = freq;
48 err = rte_power_set_freq(lcore_id, power_freq_index);
54 static inline void __attribute__((always_inline))
55 exit_training_state(struct priority_worker *poll_stats)
57 RTE_SET_USED(poll_stats);
60 static inline void __attribute__((always_inline))
61 enter_training_state(struct priority_worker *poll_stats)
63 poll_stats->iter_counter = 0;
64 poll_stats->cur_freq = LOW;
65 poll_stats->queue_state = TRAINING;
68 static inline void __attribute__((always_inline))
69 enter_normal_state(struct priority_worker *poll_stats)
71 /* Clear the averages arrays and strs */
72 memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
74 memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
77 poll_stats->cur_freq = MED;
78 poll_stats->iter_counter = 0;
79 poll_stats->threshold_ctr = 0;
80 poll_stats->queue_state = MED_NORMAL;
81 RTE_LOG(INFO, POWER, "Set the power freq to MED\n");
82 set_power_freq(poll_stats->lcore_id, MED, false);
84 poll_stats->thresh[MED].threshold_percent = med_to_high_threshold;
85 poll_stats->thresh[HGH].threshold_percent = high_to_med_threshold;
88 static inline void __attribute__((always_inline))
89 enter_busy_state(struct priority_worker *poll_stats)
91 memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
93 memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
96 poll_stats->cur_freq = HGH;
97 poll_stats->iter_counter = 0;
98 poll_stats->threshold_ctr = 0;
99 poll_stats->queue_state = HGH_BUSY;
100 set_power_freq(poll_stats->lcore_id, HGH, false);
103 static inline void __attribute__((always_inline))
104 enter_purge_state(struct priority_worker *poll_stats)
106 poll_stats->iter_counter = 0;
107 poll_stats->queue_state = LOW_PURGE;
110 static inline void __attribute__((always_inline))
111 set_state(struct priority_worker *poll_stats,
112 enum queue_state new_state)
114 enum queue_state old_state = poll_stats->queue_state;
115 if (old_state != new_state) {
117 /* Call any old state exit functions */
118 if (old_state == TRAINING)
119 exit_training_state(poll_stats);
121 /* Call any new state entry functions */
122 if (new_state == TRAINING)
123 enter_training_state(poll_stats);
124 if (new_state == MED_NORMAL)
125 enter_normal_state(poll_stats);
126 if (new_state == HGH_BUSY)
127 enter_busy_state(poll_stats);
128 if (new_state == LOW_PURGE)
129 enter_purge_state(poll_stats);
133 static inline void __attribute__((always_inline))
134 set_policy(struct priority_worker *poll_stats,
135 struct ep_policy *policy)
137 set_state(poll_stats, policy->state);
139 if (policy->state == TRAINING)
142 poll_stats->thresh[MED_NORMAL].base_edpi = policy->med_base_edpi;
143 poll_stats->thresh[HGH_BUSY].base_edpi = policy->hgh_base_edpi;
145 poll_stats->thresh[MED_NORMAL].trained = true;
146 poll_stats->thresh[HGH_BUSY].trained = true;
151 update_training_stats(struct priority_worker *poll_stats,
154 uint32_t max_train_iter)
156 RTE_SET_USED(specific_freq);
159 uint64_t p0_empty_deq;
161 sprintf(pfi_str, "%02d", freq);
163 if (poll_stats->cur_freq == freq &&
164 poll_stats->thresh[freq].trained == false) {
165 if (poll_stats->thresh[freq].cur_train_iter == 0) {
167 set_power_freq(poll_stats->lcore_id,
168 freq, specific_freq);
170 poll_stats->empty_dequeues_prev =
171 poll_stats->empty_dequeues;
173 poll_stats->thresh[freq].cur_train_iter++;
176 } else if (poll_stats->thresh[freq].cur_train_iter
179 p0_empty_deq = poll_stats->empty_dequeues -
180 poll_stats->empty_dequeues_prev;
182 poll_stats->empty_dequeues_prev =
183 poll_stats->empty_dequeues;
185 poll_stats->thresh[freq].base_edpi += p0_empty_deq;
186 poll_stats->thresh[freq].cur_train_iter++;
189 if (poll_stats->thresh[freq].trained == false) {
190 poll_stats->thresh[freq].base_edpi =
191 poll_stats->thresh[freq].base_edpi /
194 /* Add on a factor of 0.05%
195 * this should remove any
196 * false negatives when the system is 0% busy
198 poll_stats->thresh[freq].base_edpi +=
199 poll_stats->thresh[freq].base_edpi / 2000;
201 poll_stats->thresh[freq].trained = true;
202 poll_stats->cur_freq++;
209 static inline uint32_t __attribute__((always_inline))
210 update_stats(struct priority_worker *poll_stats)
212 uint64_t tot_edpi = 0, tot_ppi = 0;
215 struct priority_worker *s = poll_stats;
217 uint64_t cur_edpi = s->empty_dequeues - s->empty_dequeues_prev;
219 s->empty_dequeues_prev = s->empty_dequeues;
221 uint64_t ppi = s->num_dequeue_pkts - s->num_dequeue_pkts_prev;
223 s->num_dequeue_pkts_prev = s->num_dequeue_pkts;
225 if (s->thresh[s->cur_freq].base_edpi < cur_edpi) {
227 /* edpi mean empty poll counter difference per interval */
228 RTE_LOG(DEBUG, POWER, "cur_edpi is too large "
232 s->thresh[s->cur_freq].base_edpi);
233 /* Value to make us fail need debug log*/
237 s->edpi_av[s->ec++ % BINS_AV] = cur_edpi;
238 s->ppi_av[s->pc++ % BINS_AV] = ppi;
240 for (j = 0; j < BINS_AV; j++) {
241 tot_edpi += s->edpi_av[j];
242 tot_ppi += s->ppi_av[j];
245 tot_edpi = tot_edpi / BINS_AV;
247 percent = 100 - (uint32_t)(((float)tot_edpi /
248 (float)s->thresh[s->cur_freq].base_edpi) * 100);
250 return (uint32_t)percent;
254 static inline void __attribute__((always_inline))
255 update_stats_normal(struct priority_worker *poll_stats)
259 if (poll_stats->thresh[poll_stats->cur_freq].base_edpi == 0) {
261 enum freq_val cur_freq = poll_stats->cur_freq;
263 /* edpi mean empty poll counter difference per interval */
264 RTE_LOG(DEBUG, POWER, "cure freq is %d, edpi is %lu\n",
266 poll_stats->thresh[cur_freq].base_edpi);
270 percent = update_stats(poll_stats);
273 /* edpi mean empty poll counter difference per interval */
274 RTE_LOG(DEBUG, POWER, "Edpi is bigger than threshold\n");
278 if (poll_stats->cur_freq == LOW)
279 RTE_LOG(INFO, POWER, "Purge Mode is not currently supported\n");
280 else if (poll_stats->cur_freq == MED) {
283 poll_stats->thresh[MED].threshold_percent) {
285 if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
286 poll_stats->threshold_ctr++;
288 set_state(poll_stats, HGH_BUSY);
289 RTE_LOG(INFO, POWER, "MOVE to HGH\n");
294 poll_stats->threshold_ctr = 0;
297 } else if (poll_stats->cur_freq == HGH) {
300 poll_stats->thresh[HGH].threshold_percent) {
302 if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
303 poll_stats->threshold_ctr++;
305 set_state(poll_stats, MED_NORMAL);
306 RTE_LOG(INFO, POWER, "MOVE to MED\n");
310 poll_stats->threshold_ctr = 0;
317 empty_poll_training(struct priority_worker *poll_stats,
318 uint32_t max_train_iter)
321 if (poll_stats->iter_counter < INTERVALS_PER_SECOND) {
322 poll_stats->iter_counter++;
327 update_training_stats(poll_stats,
332 update_training_stats(poll_stats,
337 update_training_stats(poll_stats,
343 if (poll_stats->thresh[LOW].trained == true
344 && poll_stats->thresh[MED].trained == true
345 && poll_stats->thresh[HGH].trained == true) {
347 set_state(poll_stats, MED_NORMAL);
349 RTE_LOG(INFO, POWER, "LOW threshold is %lu\n",
350 poll_stats->thresh[LOW].base_edpi);
352 RTE_LOG(INFO, POWER, "MED threshold is %lu\n",
353 poll_stats->thresh[MED].base_edpi);
356 RTE_LOG(INFO, POWER, "HIGH threshold is %lu\n",
357 poll_stats->thresh[HGH].base_edpi);
359 RTE_LOG(INFO, POWER, "Training is Complete for %d\n",
360 poll_stats->lcore_id);
366 void __rte_experimental
367 rte_empty_poll_detection(struct rte_timer *tim, void *arg)
372 struct priority_worker *poll_stats;
378 for (i = 0; i < NUM_NODES; i++) {
380 poll_stats = &(ep_params->wrk_data.wrk_stats[i]);
382 if (rte_lcore_is_enabled(poll_stats->lcore_id) == 0)
385 switch (poll_stats->queue_state) {
387 empty_poll_training(poll_stats,
388 ep_params->max_train_iter);
393 update_stats_normal(poll_stats);
407 int __rte_experimental
408 rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb,
409 struct ep_policy *policy)
412 /* Allocate the ep_params structure */
413 ep_params = rte_zmalloc_socket(NULL,
414 sizeof(struct ep_params),
421 if (freq_tlb == NULL) {
422 freq_index[LOW] = 14;
426 freq_index[LOW] = freq_tlb[LOW];
427 freq_index[MED] = freq_tlb[MED];
428 freq_index[HGH] = freq_tlb[HGH];
431 RTE_LOG(INFO, POWER, "Initialize the Empty Poll\n");
433 /* Train for pre-defined period */
434 ep_params->max_train_iter = INTERVALS_PER_SECOND * SECONDS_TO_TRAIN_FOR;
436 struct stats_data *w = &ep_params->wrk_data;
440 /* initialize all wrk_stats state */
441 for (i = 0; i < NUM_NODES; i++) {
443 if (rte_lcore_is_enabled(i) == 0)
445 /*init the freqs table */
446 total_avail_freqs[i] = rte_power_freqs(i,
450 RTE_LOG(INFO, POWER, "total avail freq is %d , lcoreid %d\n",
451 total_avail_freqs[i],
454 if (get_freq_index(LOW) > total_avail_freqs[i])
457 if (rte_get_master_lcore() != i) {
458 w->wrk_stats[i].lcore_id = i;
459 set_policy(&w->wrk_stats[i], policy);
466 void __rte_experimental
467 rte_power_empty_poll_stat_free(void)
470 RTE_LOG(INFO, POWER, "Close the Empty Poll\n");
472 if (ep_params != NULL)
476 int __rte_experimental
477 rte_power_empty_poll_stat_update(unsigned int lcore_id)
479 struct priority_worker *poll_stats;
481 if (lcore_id >= NUM_NODES)
484 poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
486 if (poll_stats->lcore_id == 0)
487 poll_stats->lcore_id = lcore_id;
489 poll_stats->empty_dequeues++;
494 int __rte_experimental
495 rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt)
498 struct priority_worker *poll_stats;
500 if (lcore_id >= NUM_NODES)
503 poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
505 if (poll_stats->lcore_id == 0)
506 poll_stats->lcore_id = lcore_id;
508 poll_stats->num_dequeue_pkts += nb_pkt;
514 uint64_t __rte_experimental
515 rte_power_empty_poll_stat_fetch(unsigned int lcore_id)
517 struct priority_worker *poll_stats;
519 if (lcore_id >= NUM_NODES)
522 poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
524 if (poll_stats->lcore_id == 0)
525 poll_stats->lcore_id = lcore_id;
527 return poll_stats->empty_dequeues;
530 uint64_t __rte_experimental
531 rte_power_poll_stat_fetch(unsigned int lcore_id)
533 struct priority_worker *poll_stats;
535 if (lcore_id >= NUM_NODES)
538 poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
540 if (poll_stats->lcore_id == 0)
541 poll_stats->lcore_id = lcore_id;
543 return poll_stats->num_dequeue_pkts;