1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020 Intel Corporation
6 #include <rte_cycles.h>
7 #include <rte_cpuflags.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev.h>
10 #include <rte_power_intrinsics.h>
12 #include "rte_power_pmd_mgmt.h"
14 #define EMPTYPOLL_MAX 512
16 /* store some internal state */
17 static struct pmd_conf_data {
18 /** what do we support? */
19 struct rte_cpu_intrinsics intrinsics_support;
20 /** pre-calculated tsc diff for 1us */
22 /** how many rte_pause can we fit in a microsecond? */
23 uint64_t pause_per_us;
27 * Possible power management states of an ethdev port.
30 /** Device power management is disabled. */
31 PMD_MGMT_DISABLED = 0,
32 /** Device power management is enabled. */
36 struct pmd_queue_cfg {
37 volatile enum pmd_mgmt_state pwr_mgmt_state;
38 /**< State of power management for this queue */
39 enum rte_power_pmd_mgmt_type cb_mode;
40 /**< Callback mode for this queue */
41 const struct rte_eth_rxtx_callback *cur_cb;
42 /**< Callback instance */
43 volatile bool umwait_in_progress;
44 /**< are we currently sleeping? */
45 uint64_t empty_poll_stats;
46 /**< Number of empty polls */
47 } __rte_cache_aligned;
49 static struct pmd_queue_cfg port_cfg[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
54 const uint64_t hz = rte_get_timer_hz();
55 const uint64_t tsc_per_us = hz / US_PER_S; /* 1us */
57 global_data.tsc_per_us = tsc_per_us;
59 /* only do this if we don't have tpause */
60 if (!global_data.intrinsics_support.power_pause) {
61 const uint64_t start = rte_rdtsc_precise();
62 const uint32_t n_pauses = 10000;
63 double us, us_per_pause;
67 /* estimate number of rte_pause() calls per us*/
68 for (i = 0; i < n_pauses; i++)
71 end = rte_rdtsc_precise();
72 us = (end - start) / (double)tsc_per_us;
73 us_per_pause = us / n_pauses;
75 global_data.pause_per_us = (uint64_t)(1.0 / us_per_pause);
80 clb_umwait(uint16_t port_id, uint16_t qidx, struct rte_mbuf **pkts __rte_unused,
81 uint16_t nb_rx, uint16_t max_pkts __rte_unused,
82 void *addr __rte_unused)
85 struct pmd_queue_cfg *q_conf;
87 q_conf = &port_cfg[port_id][qidx];
89 if (unlikely(nb_rx == 0)) {
90 q_conf->empty_poll_stats++;
91 if (unlikely(q_conf->empty_poll_stats > EMPTYPOLL_MAX)) {
92 struct rte_power_monitor_cond pmc;
96 * we might get a cancellation request while being
97 * inside the callback, in which case the wakeup
98 * wouldn't work because it would've arrived too early.
100 * to get around this, we notify the other thread that
101 * we're sleeping, so that it can spin until we're done.
102 * unsolicited wakeups are perfectly safe.
104 q_conf->umwait_in_progress = true;
106 rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
108 /* check if we need to cancel sleep */
109 if (q_conf->pwr_mgmt_state == PMD_MGMT_ENABLED) {
110 /* use monitoring condition to sleep */
111 ret = rte_eth_get_monitor_addr(port_id, qidx,
114 rte_power_monitor(&pmc, -1ULL);
116 q_conf->umwait_in_progress = false;
118 rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
121 q_conf->empty_poll_stats = 0;
127 clb_pause(uint16_t port_id, uint16_t qidx, struct rte_mbuf **pkts __rte_unused,
128 uint16_t nb_rx, uint16_t max_pkts __rte_unused,
129 void *addr __rte_unused)
131 struct pmd_queue_cfg *q_conf;
133 q_conf = &port_cfg[port_id][qidx];
135 if (unlikely(nb_rx == 0)) {
136 q_conf->empty_poll_stats++;
137 /* sleep for 1 microsecond */
138 if (unlikely(q_conf->empty_poll_stats > EMPTYPOLL_MAX)) {
139 /* use tpause if we have it */
140 if (global_data.intrinsics_support.power_pause) {
141 const uint64_t cur = rte_rdtsc();
142 const uint64_t wait_tsc =
143 cur + global_data.tsc_per_us;
144 rte_power_pause(wait_tsc);
147 for (i = 0; i < global_data.pause_per_us; i++)
152 q_conf->empty_poll_stats = 0;
158 clb_scale_freq(uint16_t port_id, uint16_t qidx,
159 struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
160 uint16_t max_pkts __rte_unused, void *_ __rte_unused)
162 struct pmd_queue_cfg *q_conf;
164 q_conf = &port_cfg[port_id][qidx];
166 if (unlikely(nb_rx == 0)) {
167 q_conf->empty_poll_stats++;
168 if (unlikely(q_conf->empty_poll_stats > EMPTYPOLL_MAX))
169 /* scale down freq */
170 rte_power_freq_min(rte_lcore_id());
172 q_conf->empty_poll_stats = 0;
174 rte_power_freq_max(rte_lcore_id());
181 rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t port_id,
182 uint16_t queue_id, enum rte_power_pmd_mgmt_type mode)
184 struct pmd_queue_cfg *queue_cfg;
185 struct rte_eth_dev_info info;
188 RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
190 if (queue_id >= RTE_MAX_QUEUES_PER_PORT || lcore_id >= RTE_MAX_LCORE) {
195 if (rte_eth_dev_info_get(port_id, &info) < 0) {
200 /* check if queue id is valid */
201 if (queue_id >= info.nb_rx_queues) {
206 queue_cfg = &port_cfg[port_id][queue_id];
208 if (queue_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED) {
213 /* we need this in various places */
214 rte_cpu_get_intrinsics_support(&global_data.intrinsics_support);
217 case RTE_POWER_MGMT_TYPE_MONITOR:
219 struct rte_power_monitor_cond dummy;
221 /* check if rte_power_monitor is supported */
222 if (!global_data.intrinsics_support.power_monitor) {
223 RTE_LOG(DEBUG, POWER, "Monitoring intrinsics are not supported\n");
228 /* check if the device supports the necessary PMD API */
229 if (rte_eth_get_monitor_addr(port_id, queue_id,
230 &dummy) == -ENOTSUP) {
231 RTE_LOG(DEBUG, POWER, "The device does not support rte_eth_get_monitor_addr\n");
235 /* initialize data before enabling the callback */
236 queue_cfg->empty_poll_stats = 0;
237 queue_cfg->cb_mode = mode;
238 queue_cfg->umwait_in_progress = false;
239 queue_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED;
241 /* ensure we update our state before callback starts */
242 rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
244 queue_cfg->cur_cb = rte_eth_add_rx_callback(port_id, queue_id,
248 case RTE_POWER_MGMT_TYPE_SCALE:
250 enum power_management_env env;
251 /* only PSTATE and ACPI modes are supported */
252 if (!rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ) &&
253 !rte_power_check_env_supported(
254 PM_ENV_PSTATE_CPUFREQ)) {
255 RTE_LOG(DEBUG, POWER, "Neither ACPI nor PSTATE modes are supported\n");
259 /* ensure we could initialize the power library */
260 if (rte_power_init(lcore_id)) {
264 /* ensure we initialized the correct env */
265 env = rte_power_get_env();
266 if (env != PM_ENV_ACPI_CPUFREQ &&
267 env != PM_ENV_PSTATE_CPUFREQ) {
268 RTE_LOG(DEBUG, POWER, "Neither ACPI nor PSTATE modes were initialized\n");
272 /* initialize data before enabling the callback */
273 queue_cfg->empty_poll_stats = 0;
274 queue_cfg->cb_mode = mode;
275 queue_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED;
277 /* this is not necessary here, but do it anyway */
278 rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
280 queue_cfg->cur_cb = rte_eth_add_rx_callback(port_id,
281 queue_id, clb_scale_freq, NULL);
284 case RTE_POWER_MGMT_TYPE_PAUSE:
285 /* figure out various time-to-tsc conversions */
286 if (global_data.tsc_per_us == 0)
289 /* initialize data before enabling the callback */
290 queue_cfg->empty_poll_stats = 0;
291 queue_cfg->cb_mode = mode;
292 queue_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED;
294 /* this is not necessary here, but do it anyway */
295 rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
297 queue_cfg->cur_cb = rte_eth_add_rx_callback(port_id, queue_id,
307 rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id,
308 uint16_t port_id, uint16_t queue_id)
310 struct pmd_queue_cfg *queue_cfg;
312 RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
314 if (lcore_id >= RTE_MAX_LCORE || queue_id >= RTE_MAX_QUEUES_PER_PORT)
317 /* no need to check queue id as wrong queue id would not be enabled */
318 queue_cfg = &port_cfg[port_id][queue_id];
320 if (queue_cfg->pwr_mgmt_state != PMD_MGMT_ENABLED)
323 /* stop any callbacks from progressing */
324 queue_cfg->pwr_mgmt_state = PMD_MGMT_DISABLED;
326 /* ensure we update our state before continuing */
327 rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
329 switch (queue_cfg->cb_mode) {
330 case RTE_POWER_MGMT_TYPE_MONITOR:
335 * we may request cancellation while the other thread
336 * has just entered the callback but hasn't started
337 * sleeping yet, so keep waking it up until we know it's
340 if (queue_cfg->umwait_in_progress)
341 rte_power_monitor_wakeup(lcore_id);
347 case RTE_POWER_MGMT_TYPE_PAUSE:
348 rte_eth_remove_rx_callback(port_id, queue_id,
351 case RTE_POWER_MGMT_TYPE_SCALE:
352 rte_power_freq_max(lcore_id);
353 rte_eth_remove_rx_callback(port_id, queue_id,
355 rte_power_exit(lcore_id);
359 * we don't free the RX callback here because it is unsafe to do so
360 * unless we know for a fact that all data plane threads have stopped.
362 queue_cfg->cur_cb = NULL;