8a2d60c5767085a4dd2b847cc6e954a504030eb9
[dpdk.git] / lib / power / rte_power_empty_poll.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2018 Intel Corporation
3  */
4
5 #include <string.h>
6
7 #include <rte_lcore.h>
8 #include <rte_cycles.h>
9 #include <rte_atomic.h>
10 #include <rte_malloc.h>
11 #include <inttypes.h>
12
13 #include "rte_power.h"
14 #include "rte_power_empty_poll.h"
15
16 #define INTERVALS_PER_SECOND 100     /* (10ms) */
17 #define SECONDS_TO_TRAIN_FOR 2
18 #define DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD 70
19 #define DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD 30
20 #define DEFAULT_CYCLES_PER_PACKET 800
21
22 static struct ep_params *ep_params;
23 static uint32_t med_to_high_threshold = DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD;
24 static uint32_t high_to_med_threshold = DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD;
25
26 static uint32_t avail_freqs[RTE_MAX_LCORE][NUM_FREQS];
27
28 static uint32_t total_avail_freqs[RTE_MAX_LCORE];
29
30 static uint32_t freq_index[NUM_FREQ];
31
32 static uint32_t
33 get_freq_index(enum freq_val index)
34 {
35         return freq_index[index];
36 }
37
38
39 static int
40 set_power_freq(int lcore_id, enum freq_val freq, bool specific_freq)
41 {
42         int err = 0;
43         uint32_t power_freq_index;
44         if (!specific_freq)
45                 power_freq_index = get_freq_index(freq);
46         else
47                 power_freq_index = freq;
48
49         err = rte_power_set_freq(lcore_id, power_freq_index);
50
51         return err;
52 }
53
54
55 static __rte_always_inline void
56 exit_training_state(struct priority_worker *poll_stats)
57 {
58         RTE_SET_USED(poll_stats);
59 }
60
61 static __rte_always_inline void
62 enter_training_state(struct priority_worker *poll_stats)
63 {
64         poll_stats->iter_counter = 0;
65         poll_stats->cur_freq = LOW;
66         poll_stats->queue_state = TRAINING;
67 }
68
69 static __rte_always_inline void
70 enter_normal_state(struct priority_worker *poll_stats)
71 {
72         /* Clear the averages arrays and strs */
73         memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
74         poll_stats->ec = 0;
75         memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
76         poll_stats->pc = 0;
77
78         poll_stats->cur_freq = MED;
79         poll_stats->iter_counter = 0;
80         poll_stats->threshold_ctr = 0;
81         poll_stats->queue_state = MED_NORMAL;
82         RTE_LOG(INFO, POWER, "Set the power freq to MED\n");
83         set_power_freq(poll_stats->lcore_id, MED, false);
84
85         poll_stats->thresh[MED].threshold_percent = med_to_high_threshold;
86         poll_stats->thresh[HGH].threshold_percent = high_to_med_threshold;
87 }
88
89 static __rte_always_inline void
90 enter_busy_state(struct priority_worker *poll_stats)
91 {
92         memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
93         poll_stats->ec = 0;
94         memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
95         poll_stats->pc = 0;
96
97         poll_stats->cur_freq = HGH;
98         poll_stats->iter_counter = 0;
99         poll_stats->threshold_ctr = 0;
100         poll_stats->queue_state = HGH_BUSY;
101         set_power_freq(poll_stats->lcore_id, HGH, false);
102 }
103
104 static __rte_always_inline void
105 enter_purge_state(struct priority_worker *poll_stats)
106 {
107         poll_stats->iter_counter = 0;
108         poll_stats->queue_state = LOW_PURGE;
109 }
110
111 static __rte_always_inline void
112 set_state(struct priority_worker *poll_stats,
113                 enum queue_state new_state)
114 {
115         enum queue_state old_state = poll_stats->queue_state;
116         if (old_state != new_state) {
117
118                 /* Call any old state exit functions */
119                 if (old_state == TRAINING)
120                         exit_training_state(poll_stats);
121
122                 /* Call any new state entry functions */
123                 if (new_state == TRAINING)
124                         enter_training_state(poll_stats);
125                 if (new_state == MED_NORMAL)
126                         enter_normal_state(poll_stats);
127                 if (new_state == HGH_BUSY)
128                         enter_busy_state(poll_stats);
129                 if (new_state == LOW_PURGE)
130                         enter_purge_state(poll_stats);
131         }
132 }
133
134 static __rte_always_inline void
135 set_policy(struct priority_worker *poll_stats,
136                 struct ep_policy *policy)
137 {
138         set_state(poll_stats, policy->state);
139
140         if (policy->state == TRAINING)
141                 return;
142
143         poll_stats->thresh[MED_NORMAL].base_edpi = policy->med_base_edpi;
144         poll_stats->thresh[HGH_BUSY].base_edpi = policy->hgh_base_edpi;
145
146         poll_stats->thresh[MED_NORMAL].trained = true;
147         poll_stats->thresh[HGH_BUSY].trained = true;
148
149 }
150
151 static void
152 update_training_stats(struct priority_worker *poll_stats,
153                 uint32_t freq,
154                 bool specific_freq,
155                 uint32_t max_train_iter)
156 {
157         RTE_SET_USED(specific_freq);
158
159         uint64_t p0_empty_deq;
160
161         if (poll_stats->cur_freq == freq &&
162                         poll_stats->thresh[freq].trained == false) {
163                 if (poll_stats->thresh[freq].cur_train_iter == 0) {
164
165                         set_power_freq(poll_stats->lcore_id,
166                                         freq, specific_freq);
167
168                         poll_stats->empty_dequeues_prev =
169                                 poll_stats->empty_dequeues;
170
171                         poll_stats->thresh[freq].cur_train_iter++;
172
173                         return;
174                 } else if (poll_stats->thresh[freq].cur_train_iter
175                                 <= max_train_iter) {
176
177                         p0_empty_deq = poll_stats->empty_dequeues -
178                                 poll_stats->empty_dequeues_prev;
179
180                         poll_stats->empty_dequeues_prev =
181                                 poll_stats->empty_dequeues;
182
183                         poll_stats->thresh[freq].base_edpi += p0_empty_deq;
184                         poll_stats->thresh[freq].cur_train_iter++;
185
186                 } else {
187                         if (poll_stats->thresh[freq].trained == false) {
188                                 poll_stats->thresh[freq].base_edpi =
189                                         poll_stats->thresh[freq].base_edpi /
190                                         max_train_iter;
191
192                                 /* Add on a factor of 0.05%
193                                  * this should remove any
194                                  * false negatives when the system is 0% busy
195                                  */
196                                 poll_stats->thresh[freq].base_edpi +=
197                                 poll_stats->thresh[freq].base_edpi / 2000;
198
199                                 poll_stats->thresh[freq].trained = true;
200                                 poll_stats->cur_freq++;
201
202                         }
203                 }
204         }
205 }
206
207 static __rte_always_inline uint32_t
208 update_stats(struct priority_worker *poll_stats)
209 {
210         uint64_t tot_edpi = 0;
211         uint32_t j, percent;
212
213         struct priority_worker *s = poll_stats;
214
215         uint64_t cur_edpi = s->empty_dequeues - s->empty_dequeues_prev;
216
217         s->empty_dequeues_prev = s->empty_dequeues;
218
219         uint64_t ppi = s->num_dequeue_pkts - s->num_dequeue_pkts_prev;
220
221         s->num_dequeue_pkts_prev = s->num_dequeue_pkts;
222
223         if (s->thresh[s->cur_freq].base_edpi < cur_edpi) {
224
225                 /* edpi mean empty poll counter difference per interval */
226                 RTE_LOG(DEBUG, POWER, "cur_edpi is too large "
227                                 "cur edpi %"PRId64" "
228                                 "base edpi %"PRId64"\n",
229                                 cur_edpi,
230                                 s->thresh[s->cur_freq].base_edpi);
231                 /* Value to make us fail need debug log*/
232                 return 1000UL;
233         }
234
235         s->edpi_av[s->ec++ % BINS_AV] = cur_edpi;
236         s->ppi_av[s->pc++ % BINS_AV] = ppi;
237
238         for (j = 0; j < BINS_AV; j++) {
239                 tot_edpi += s->edpi_av[j];
240         }
241
242         tot_edpi = tot_edpi / BINS_AV;
243
244         percent = 100 - (uint32_t)(((float)tot_edpi /
245                         (float)s->thresh[s->cur_freq].base_edpi) * 100);
246
247         return (uint32_t)percent;
248 }
249
250
251 static __rte_always_inline void
252 update_stats_normal(struct priority_worker *poll_stats)
253 {
254         uint32_t percent;
255
256         if (poll_stats->thresh[poll_stats->cur_freq].base_edpi == 0) {
257
258                 enum freq_val cur_freq = poll_stats->cur_freq;
259
260                 /* edpi mean empty poll counter difference per interval */
261                 RTE_LOG(DEBUG, POWER, "cure freq is %d, edpi is %"PRIu64"\n",
262                                 cur_freq,
263                                 poll_stats->thresh[cur_freq].base_edpi);
264                 return;
265         }
266
267         percent = update_stats(poll_stats);
268
269         if (percent > 100) {
270                 /* edpi mean empty poll counter difference per interval */
271                 RTE_LOG(DEBUG, POWER, "Edpi is bigger than threshold\n");
272                 return;
273         }
274
275         if (poll_stats->cur_freq == LOW)
276                 RTE_LOG(INFO, POWER, "Purge Mode is not currently supported\n");
277         else if (poll_stats->cur_freq == MED) {
278
279                 if (percent >
280                         poll_stats->thresh[MED].threshold_percent) {
281
282                         if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
283                                 poll_stats->threshold_ctr++;
284                         else {
285                                 set_state(poll_stats, HGH_BUSY);
286                                 RTE_LOG(INFO, POWER, "MOVE to HGH\n");
287                         }
288
289                 } else {
290                         /* reset */
291                         poll_stats->threshold_ctr = 0;
292                 }
293
294         } else if (poll_stats->cur_freq == HGH) {
295
296                 if (percent <
297                                 poll_stats->thresh[HGH].threshold_percent) {
298
299                         if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
300                                 poll_stats->threshold_ctr++;
301                         else {
302                                 set_state(poll_stats, MED_NORMAL);
303                                 RTE_LOG(INFO, POWER, "MOVE to MED\n");
304                         }
305                 } else {
306                         /* reset */
307                         poll_stats->threshold_ctr = 0;
308                 }
309
310         }
311 }
312
313 static int
314 empty_poll_training(struct priority_worker *poll_stats,
315                 uint32_t max_train_iter)
316 {
317
318         if (poll_stats->iter_counter < INTERVALS_PER_SECOND) {
319                 poll_stats->iter_counter++;
320                 return 0;
321         }
322
323
324         update_training_stats(poll_stats,
325                         LOW,
326                         false,
327                         max_train_iter);
328
329         update_training_stats(poll_stats,
330                         MED,
331                         false,
332                         max_train_iter);
333
334         update_training_stats(poll_stats,
335                         HGH,
336                         false,
337                         max_train_iter);
338
339
340         if (poll_stats->thresh[LOW].trained == true
341                         && poll_stats->thresh[MED].trained == true
342                         && poll_stats->thresh[HGH].trained == true) {
343
344                 set_state(poll_stats, MED_NORMAL);
345
346                 RTE_LOG(INFO, POWER, "LOW threshold is %"PRIu64"\n",
347                                 poll_stats->thresh[LOW].base_edpi);
348
349                 RTE_LOG(INFO, POWER, "MED threshold is %"PRIu64"\n",
350                                 poll_stats->thresh[MED].base_edpi);
351
352
353                 RTE_LOG(INFO, POWER, "HIGH threshold is %"PRIu64"\n",
354                                 poll_stats->thresh[HGH].base_edpi);
355
356                 RTE_LOG(INFO, POWER, "Training is Complete for %d\n",
357                                 poll_stats->lcore_id);
358         }
359
360         return 0;
361 }
362
363 void
364 rte_empty_poll_detection(struct rte_timer *tim, void *arg)
365 {
366
367         uint32_t i;
368
369         struct priority_worker *poll_stats;
370
371         RTE_SET_USED(tim);
372
373         RTE_SET_USED(arg);
374
375         for (i = 0; i < NUM_NODES; i++) {
376
377                 poll_stats = &(ep_params->wrk_data.wrk_stats[i]);
378
379                 if (rte_lcore_is_enabled(poll_stats->lcore_id) == 0)
380                         continue;
381
382                 switch (poll_stats->queue_state) {
383                 case(TRAINING):
384                         empty_poll_training(poll_stats,
385                                         ep_params->max_train_iter);
386                         break;
387
388                 case(HGH_BUSY):
389                 case(MED_NORMAL):
390                         update_stats_normal(poll_stats);
391                         break;
392
393                 case(LOW_PURGE):
394                         break;
395                 default:
396                         break;
397
398                 }
399
400         }
401
402 }
403
404 int
405 rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb,
406                 struct ep_policy *policy)
407 {
408         uint32_t i;
409         /* Allocate the ep_params structure */
410         ep_params = rte_zmalloc_socket(NULL,
411                         sizeof(struct ep_params),
412                         0,
413                         rte_socket_id());
414
415         if (!ep_params)
416                 return -1;
417
418         if (freq_tlb == NULL) {
419                 freq_index[LOW] = 14;
420                 freq_index[MED] = 9;
421                 freq_index[HGH] = 1;
422         } else {
423                 freq_index[LOW] = freq_tlb[LOW];
424                 freq_index[MED] = freq_tlb[MED];
425                 freq_index[HGH] = freq_tlb[HGH];
426         }
427
428         RTE_LOG(INFO, POWER, "Initialize the Empty Poll\n");
429
430         /* Train for pre-defined period */
431         ep_params->max_train_iter = INTERVALS_PER_SECOND * SECONDS_TO_TRAIN_FOR;
432
433         struct stats_data *w = &ep_params->wrk_data;
434
435         *eptr = ep_params;
436
437         /* initialize all wrk_stats state */
438         for (i = 0; i < NUM_NODES; i++) {
439
440                 if (rte_lcore_is_enabled(i) == 0)
441                         continue;
442                 /*init the freqs table */
443                 total_avail_freqs[i] = rte_power_freqs(i,
444                                 avail_freqs[i],
445                                 NUM_FREQS);
446
447                 RTE_LOG(INFO, POWER, "total avail freq is %d , lcoreid %d\n",
448                                 total_avail_freqs[i],
449                                 i);
450
451                 if (get_freq_index(LOW) > total_avail_freqs[i])
452                         return -1;
453
454                 if (rte_get_main_lcore() != i) {
455                         w->wrk_stats[i].lcore_id = i;
456                         set_policy(&w->wrk_stats[i], policy);
457                 }
458         }
459
460         return 0;
461 }
462
463 void
464 rte_power_empty_poll_stat_free(void)
465 {
466
467         RTE_LOG(INFO, POWER, "Close the Empty Poll\n");
468
469         if (ep_params != NULL)
470                 rte_free(ep_params);
471 }
472
473 int
474 rte_power_empty_poll_stat_update(unsigned int lcore_id)
475 {
476         struct priority_worker *poll_stats;
477
478         if (lcore_id >= NUM_NODES)
479                 return -1;
480
481         poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
482
483         if (poll_stats->lcore_id == 0)
484                 poll_stats->lcore_id = lcore_id;
485
486         poll_stats->empty_dequeues++;
487
488         return 0;
489 }
490
491 int
492 rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt)
493 {
494
495         struct priority_worker *poll_stats;
496
497         if (lcore_id >= NUM_NODES)
498                 return -1;
499
500         poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
501
502         if (poll_stats->lcore_id == 0)
503                 poll_stats->lcore_id = lcore_id;
504
505         poll_stats->num_dequeue_pkts += nb_pkt;
506
507         return 0;
508 }
509
510
511 uint64_t
512 rte_power_empty_poll_stat_fetch(unsigned int lcore_id)
513 {
514         struct priority_worker *poll_stats;
515
516         if (lcore_id >= NUM_NODES)
517                 return -1;
518
519         poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
520
521         if (poll_stats->lcore_id == 0)
522                 poll_stats->lcore_id = lcore_id;
523
524         return poll_stats->empty_dequeues;
525 }
526
527 uint64_t
528 rte_power_poll_stat_fetch(unsigned int lcore_id)
529 {
530         struct priority_worker *poll_stats;
531
532         if (lcore_id >= NUM_NODES)
533                 return -1;
534
535         poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
536
537         if (poll_stats->lcore_id == 0)
538                 poll_stats->lcore_id = lcore_id;
539
540         return poll_stats->num_dequeue_pkts;
541 }