power: add traffic pattern aware power control
[dpdk.git] / lib / librte_power / rte_power_empty_poll.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2018 Intel Corporation
3  */
4
5 #include <string.h>
6
7 #include <rte_lcore.h>
8 #include <rte_cycles.h>
9 #include <rte_atomic.h>
10 #include <rte_malloc.h>
11
12 #include "rte_power.h"
13 #include "rte_power_empty_poll.h"
14
15 #define INTERVALS_PER_SECOND 100     /* (10ms) */
16 #define SECONDS_TO_TRAIN_FOR 2
17 #define DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD 70
18 #define DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD 30
19 #define DEFAULT_CYCLES_PER_PACKET 800
20
21 static struct ep_params *ep_params;
22 static uint32_t med_to_high_threshold = DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD;
23 static uint32_t high_to_med_threshold = DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD;
24
25 static uint32_t avail_freqs[RTE_MAX_LCORE][NUM_FREQS];
26
27 static uint32_t total_avail_freqs[RTE_MAX_LCORE];
28
29 static uint32_t freq_index[NUM_FREQ];
30
31 static uint32_t
32 get_freq_index(enum freq_val index)
33 {
34         return freq_index[index];
35 }
36
37
38 static int
39 set_power_freq(int lcore_id, enum freq_val freq, bool specific_freq)
40 {
41         int err = 0;
42         uint32_t power_freq_index;
43         if (!specific_freq)
44                 power_freq_index = get_freq_index(freq);
45         else
46                 power_freq_index = freq;
47
48         err = rte_power_set_freq(lcore_id, power_freq_index);
49
50         return err;
51 }
52
53
54 static inline void __attribute__((always_inline))
55 exit_training_state(struct priority_worker *poll_stats)
56 {
57         RTE_SET_USED(poll_stats);
58 }
59
60 static inline void __attribute__((always_inline))
61 enter_training_state(struct priority_worker *poll_stats)
62 {
63         poll_stats->iter_counter = 0;
64         poll_stats->cur_freq = LOW;
65         poll_stats->queue_state = TRAINING;
66 }
67
68 static inline void __attribute__((always_inline))
69 enter_normal_state(struct priority_worker *poll_stats)
70 {
71         /* Clear the averages arrays and strs */
72         memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
73         poll_stats->ec = 0;
74         memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
75         poll_stats->pc = 0;
76
77         poll_stats->cur_freq = MED;
78         poll_stats->iter_counter = 0;
79         poll_stats->threshold_ctr = 0;
80         poll_stats->queue_state = MED_NORMAL;
81         RTE_LOG(INFO, POWER, "Set the power freq to MED\n");
82         set_power_freq(poll_stats->lcore_id, MED, false);
83
84         poll_stats->thresh[MED].threshold_percent = med_to_high_threshold;
85         poll_stats->thresh[HGH].threshold_percent = high_to_med_threshold;
86 }
87
88 static inline void __attribute__((always_inline))
89 enter_busy_state(struct priority_worker *poll_stats)
90 {
91         memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
92         poll_stats->ec = 0;
93         memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
94         poll_stats->pc = 0;
95
96         poll_stats->cur_freq = HGH;
97         poll_stats->iter_counter = 0;
98         poll_stats->threshold_ctr = 0;
99         poll_stats->queue_state = HGH_BUSY;
100         set_power_freq(poll_stats->lcore_id, HGH, false);
101 }
102
103 static inline void __attribute__((always_inline))
104 enter_purge_state(struct priority_worker *poll_stats)
105 {
106         poll_stats->iter_counter = 0;
107         poll_stats->queue_state = LOW_PURGE;
108 }
109
110 static inline void __attribute__((always_inline))
111 set_state(struct priority_worker *poll_stats,
112                 enum queue_state new_state)
113 {
114         enum queue_state old_state = poll_stats->queue_state;
115         if (old_state != new_state) {
116
117                 /* Call any old state exit functions */
118                 if (old_state == TRAINING)
119                         exit_training_state(poll_stats);
120
121                 /* Call any new state entry functions */
122                 if (new_state == TRAINING)
123                         enter_training_state(poll_stats);
124                 if (new_state == MED_NORMAL)
125                         enter_normal_state(poll_stats);
126                 if (new_state == HGH_BUSY)
127                         enter_busy_state(poll_stats);
128                 if (new_state == LOW_PURGE)
129                         enter_purge_state(poll_stats);
130         }
131 }
132
133 static inline void __attribute__((always_inline))
134 set_policy(struct priority_worker *poll_stats,
135                 struct ep_policy *policy)
136 {
137         set_state(poll_stats, policy->state);
138
139         if (policy->state == TRAINING)
140                 return;
141
142         poll_stats->thresh[MED_NORMAL].base_edpi = policy->med_base_edpi;
143         poll_stats->thresh[HGH_BUSY].base_edpi = policy->hgh_base_edpi;
144
145         poll_stats->thresh[MED_NORMAL].trained = true;
146         poll_stats->thresh[HGH_BUSY].trained = true;
147
148 }
149
150 static void
151 update_training_stats(struct priority_worker *poll_stats,
152                 uint32_t freq,
153                 bool specific_freq,
154                 uint32_t max_train_iter)
155 {
156         RTE_SET_USED(specific_freq);
157
158         char pfi_str[32];
159         uint64_t p0_empty_deq;
160
161         sprintf(pfi_str, "%02d", freq);
162
163         if (poll_stats->cur_freq == freq &&
164                         poll_stats->thresh[freq].trained == false) {
165                 if (poll_stats->thresh[freq].cur_train_iter == 0) {
166
167                         set_power_freq(poll_stats->lcore_id,
168                                         freq, specific_freq);
169
170                         poll_stats->empty_dequeues_prev =
171                                 poll_stats->empty_dequeues;
172
173                         poll_stats->thresh[freq].cur_train_iter++;
174
175                         return;
176                 } else if (poll_stats->thresh[freq].cur_train_iter
177                                 <= max_train_iter) {
178
179                         p0_empty_deq = poll_stats->empty_dequeues -
180                                 poll_stats->empty_dequeues_prev;
181
182                         poll_stats->empty_dequeues_prev =
183                                 poll_stats->empty_dequeues;
184
185                         poll_stats->thresh[freq].base_edpi += p0_empty_deq;
186                         poll_stats->thresh[freq].cur_train_iter++;
187
188                 } else {
189                         if (poll_stats->thresh[freq].trained == false) {
190                                 poll_stats->thresh[freq].base_edpi =
191                                         poll_stats->thresh[freq].base_edpi /
192                                         max_train_iter;
193
194                                 /* Add on a factor of 0.05%
195                                  * this should remove any
196                                  * false negatives when the system is 0% busy
197                                  */
198                                 poll_stats->thresh[freq].base_edpi +=
199                                 poll_stats->thresh[freq].base_edpi / 2000;
200
201                                 poll_stats->thresh[freq].trained = true;
202                                 poll_stats->cur_freq++;
203
204                         }
205                 }
206         }
207 }
208
209 static inline uint32_t __attribute__((always_inline))
210 update_stats(struct priority_worker *poll_stats)
211 {
212         uint64_t tot_edpi = 0, tot_ppi = 0;
213         uint32_t j, percent;
214
215         struct priority_worker *s = poll_stats;
216
217         uint64_t cur_edpi = s->empty_dequeues - s->empty_dequeues_prev;
218
219         s->empty_dequeues_prev = s->empty_dequeues;
220
221         uint64_t ppi = s->num_dequeue_pkts - s->num_dequeue_pkts_prev;
222
223         s->num_dequeue_pkts_prev = s->num_dequeue_pkts;
224
225         if (s->thresh[s->cur_freq].base_edpi < cur_edpi) {
226
227                 /* edpi mean empty poll counter difference per interval */
228                 RTE_LOG(DEBUG, POWER, "cur_edpi is too large "
229                                 "cur edpi %ld "
230                                 "base edpi %ld\n",
231                                 cur_edpi,
232                                 s->thresh[s->cur_freq].base_edpi);
233                 /* Value to make us fail need debug log*/
234                 return 1000UL;
235         }
236
237         s->edpi_av[s->ec++ % BINS_AV] = cur_edpi;
238         s->ppi_av[s->pc++ % BINS_AV] = ppi;
239
240         for (j = 0; j < BINS_AV; j++) {
241                 tot_edpi += s->edpi_av[j];
242                 tot_ppi += s->ppi_av[j];
243         }
244
245         tot_edpi = tot_edpi / BINS_AV;
246
247         percent = 100 - (uint32_t)(((float)tot_edpi /
248                         (float)s->thresh[s->cur_freq].base_edpi) * 100);
249
250         return (uint32_t)percent;
251 }
252
253
254 static inline void  __attribute__((always_inline))
255 update_stats_normal(struct priority_worker *poll_stats)
256 {
257         uint32_t percent;
258
259         if (poll_stats->thresh[poll_stats->cur_freq].base_edpi == 0) {
260
261                 enum freq_val cur_freq = poll_stats->cur_freq;
262
263                 /* edpi mean empty poll counter difference per interval */
264                 RTE_LOG(DEBUG, POWER, "cure freq is %d, edpi is %lu\n",
265                                 cur_freq,
266                                 poll_stats->thresh[cur_freq].base_edpi);
267                 return;
268         }
269
270         percent = update_stats(poll_stats);
271
272         if (percent > 100) {
273                 /* edpi mean empty poll counter difference per interval */
274                 RTE_LOG(DEBUG, POWER, "Edpi is bigger than threshold\n");
275                 return;
276         }
277
278         if (poll_stats->cur_freq == LOW)
279                 RTE_LOG(INFO, POWER, "Purge Mode is not currently supported\n");
280         else if (poll_stats->cur_freq == MED) {
281
282                 if (percent >
283                         poll_stats->thresh[MED].threshold_percent) {
284
285                         if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
286                                 poll_stats->threshold_ctr++;
287                         else {
288                                 set_state(poll_stats, HGH_BUSY);
289                                 RTE_LOG(INFO, POWER, "MOVE to HGH\n");
290                         }
291
292                 } else {
293                         /* reset */
294                         poll_stats->threshold_ctr = 0;
295                 }
296
297         } else if (poll_stats->cur_freq == HGH) {
298
299                 if (percent <
300                                 poll_stats->thresh[HGH].threshold_percent) {
301
302                         if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
303                                 poll_stats->threshold_ctr++;
304                         else {
305                                 set_state(poll_stats, MED_NORMAL);
306                                 RTE_LOG(INFO, POWER, "MOVE to MED\n");
307                         }
308                 } else {
309                         /* reset */
310                         poll_stats->threshold_ctr = 0;
311                 }
312
313         }
314 }
315
316 static int
317 empty_poll_training(struct priority_worker *poll_stats,
318                 uint32_t max_train_iter)
319 {
320
321         if (poll_stats->iter_counter < INTERVALS_PER_SECOND) {
322                 poll_stats->iter_counter++;
323                 return 0;
324         }
325
326
327         update_training_stats(poll_stats,
328                         LOW,
329                         false,
330                         max_train_iter);
331
332         update_training_stats(poll_stats,
333                         MED,
334                         false,
335                         max_train_iter);
336
337         update_training_stats(poll_stats,
338                         HGH,
339                         false,
340                         max_train_iter);
341
342
343         if (poll_stats->thresh[LOW].trained == true
344                         && poll_stats->thresh[MED].trained == true
345                         && poll_stats->thresh[HGH].trained == true) {
346
347                 set_state(poll_stats, MED_NORMAL);
348
349                 RTE_LOG(INFO, POWER, "LOW threshold is %lu\n",
350                                 poll_stats->thresh[LOW].base_edpi);
351
352                 RTE_LOG(INFO, POWER, "MED threshold is %lu\n",
353                                 poll_stats->thresh[MED].base_edpi);
354
355
356                 RTE_LOG(INFO, POWER, "HIGH threshold is %lu\n",
357                                 poll_stats->thresh[HGH].base_edpi);
358
359                 RTE_LOG(INFO, POWER, "Training is Complete for %d\n",
360                                 poll_stats->lcore_id);
361         }
362
363         return 0;
364 }
365
366 void __rte_experimental
367 rte_empty_poll_detection(struct rte_timer *tim, void *arg)
368 {
369
370         uint32_t i;
371
372         struct priority_worker *poll_stats;
373
374         RTE_SET_USED(tim);
375
376         RTE_SET_USED(arg);
377
378         for (i = 0; i < NUM_NODES; i++) {
379
380                 poll_stats = &(ep_params->wrk_data.wrk_stats[i]);
381
382                 if (rte_lcore_is_enabled(poll_stats->lcore_id) == 0)
383                         continue;
384
385                 switch (poll_stats->queue_state) {
386                 case(TRAINING):
387                         empty_poll_training(poll_stats,
388                                         ep_params->max_train_iter);
389                         break;
390
391                 case(HGH_BUSY):
392                 case(MED_NORMAL):
393                         update_stats_normal(poll_stats);
394                         break;
395
396                 case(LOW_PURGE):
397                         break;
398                 default:
399                         break;
400
401                 }
402
403         }
404
405 }
406
407 int __rte_experimental
408 rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb,
409                 struct ep_policy *policy)
410 {
411         uint32_t i;
412         /* Allocate the ep_params structure */
413         ep_params = rte_zmalloc_socket(NULL,
414                         sizeof(struct ep_params),
415                         0,
416                         rte_socket_id());
417
418         if (!ep_params)
419                 return -1;
420
421         if (freq_tlb == NULL) {
422                 freq_index[LOW] = 14;
423                 freq_index[MED] = 9;
424                 freq_index[HGH] = 1;
425         } else {
426                 freq_index[LOW] = freq_tlb[LOW];
427                 freq_index[MED] = freq_tlb[MED];
428                 freq_index[HGH] = freq_tlb[HGH];
429         }
430
431         RTE_LOG(INFO, POWER, "Initialize the Empty Poll\n");
432
433         /* Train for pre-defined period */
434         ep_params->max_train_iter = INTERVALS_PER_SECOND * SECONDS_TO_TRAIN_FOR;
435
436         struct stats_data *w = &ep_params->wrk_data;
437
438         *eptr = ep_params;
439
440         /* initialize all wrk_stats state */
441         for (i = 0; i < NUM_NODES; i++) {
442
443                 if (rte_lcore_is_enabled(i) == 0)
444                         continue;
445                 /*init the freqs table */
446                 total_avail_freqs[i] = rte_power_freqs(i,
447                                 avail_freqs[i],
448                                 NUM_FREQS);
449
450                 RTE_LOG(INFO, POWER, "total avail freq is %d , lcoreid %d\n",
451                                 total_avail_freqs[i],
452                                 i);
453
454                 if (get_freq_index(LOW) > total_avail_freqs[i])
455                         return -1;
456
457                 if (rte_get_master_lcore() != i) {
458                         w->wrk_stats[i].lcore_id = i;
459                         set_policy(&w->wrk_stats[i], policy);
460                 }
461         }
462
463         return 0;
464 }
465
466 void __rte_experimental
467 rte_power_empty_poll_stat_free(void)
468 {
469
470         RTE_LOG(INFO, POWER, "Close the Empty Poll\n");
471
472         if (ep_params != NULL)
473                 rte_free(ep_params);
474 }
475
476 int __rte_experimental
477 rte_power_empty_poll_stat_update(unsigned int lcore_id)
478 {
479         struct priority_worker *poll_stats;
480
481         if (lcore_id >= NUM_NODES)
482                 return -1;
483
484         poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
485
486         if (poll_stats->lcore_id == 0)
487                 poll_stats->lcore_id = lcore_id;
488
489         poll_stats->empty_dequeues++;
490
491         return 0;
492 }
493
494 int __rte_experimental
495 rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt)
496 {
497
498         struct priority_worker *poll_stats;
499
500         if (lcore_id >= NUM_NODES)
501                 return -1;
502
503         poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
504
505         if (poll_stats->lcore_id == 0)
506                 poll_stats->lcore_id = lcore_id;
507
508         poll_stats->num_dequeue_pkts += nb_pkt;
509
510         return 0;
511 }
512
513
514 uint64_t __rte_experimental
515 rte_power_empty_poll_stat_fetch(unsigned int lcore_id)
516 {
517         struct priority_worker *poll_stats;
518
519         if (lcore_id >= NUM_NODES)
520                 return -1;
521
522         poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
523
524         if (poll_stats->lcore_id == 0)
525                 poll_stats->lcore_id = lcore_id;
526
527         return poll_stats->empty_dequeues;
528 }
529
530 uint64_t __rte_experimental
531 rte_power_poll_stat_fetch(unsigned int lcore_id)
532 {
533         struct priority_worker *poll_stats;
534
535         if (lcore_id >= NUM_NODES)
536                 return -1;
537
538         poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
539
540         if (poll_stats->lcore_id == 0)
541                 poll_stats->lcore_id = lcore_id;
542
543         return poll_stats->num_dequeue_pkts;
544 }