#include <errno.h>
#include <stdlib.h>
#include <string.h>
-#include <poll.h>
#include <netinet/in.h>
#include <net/if.h>
#include <sys/socket.h>
#include <rte_malloc.h>
#include <rte_ring.h>
#include <rte_spinlock.h>
+#include <rte_power_intrinsics.h>
#include "compat.h"
+#ifndef SO_PREFER_BUSY_POLL
+#define SO_PREFER_BUSY_POLL 69
+#endif
+#ifndef SO_BUSY_POLL_BUDGET
+#define SO_BUSY_POLL_BUDGET 70
+#endif
+
#ifndef SOL_XDP
#define SOL_XDP 283
#define PF_XDP AF_XDP
#endif
-RTE_LOG_REGISTER(af_xdp_logtype, pmd.net.af_xdp, NOTICE);
+RTE_LOG_REGISTER_DEFAULT(af_xdp_logtype, NOTICE);
#define AF_XDP_LOG(level, fmt, args...) \
rte_log(RTE_LOG_ ## level, af_xdp_logtype, \
#define ETH_AF_XDP_DFLT_NUM_DESCS XSK_RING_CONS__DEFAULT_NUM_DESCS
#define ETH_AF_XDP_DFLT_START_QUEUE_IDX 0
#define ETH_AF_XDP_DFLT_QUEUE_COUNT 1
+#define ETH_AF_XDP_DFLT_BUSY_BUDGET 64
+#define ETH_AF_XDP_DFLT_BUSY_TIMEOUT 20
#define ETH_AF_XDP_RX_BATCH_SIZE XSK_RING_CONS__DEFAULT_NUM_DESCS
#define ETH_AF_XDP_TX_BATCH_SIZE XSK_RING_CONS__DEFAULT_NUM_DESCS
struct pkt_tx_queue *pair;
struct pollfd fds[1];
int xsk_queue_idx;
+ int busy_budget;
};
struct tx_stats {
#define ETH_AF_XDP_QUEUE_COUNT_ARG "queue_count"
#define ETH_AF_XDP_SHARED_UMEM_ARG "shared_umem"
#define ETH_AF_XDP_PROG_ARG "xdp_prog"
+#define ETH_AF_XDP_BUDGET_ARG "busy_budget"
static const char * const valid_arguments[] = {
ETH_AF_XDP_IFACE_ARG,
ETH_AF_XDP_QUEUE_COUNT_ARG,
ETH_AF_XDP_SHARED_UMEM_ARG,
ETH_AF_XDP_PROG_ARG,
+ ETH_AF_XDP_BUDGET_ARG,
NULL
};
nb_pkts = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx);
if (nb_pkts == 0) {
-#if defined(XDP_USE_NEED_WAKEUP)
- if (xsk_ring_prod__needs_wakeup(fq))
- (void)poll(rxq->fds, 1, 1000);
-#endif
+ /* we can assume a kernel >= 5.11 is in use if busy polling is
+ * enabled and thus we can safely use the recvfrom() syscall
+ * which is only supported for AF_XDP sockets in kernels >=
+ * 5.11.
+ */
+ if (rxq->busy_budget) {
+ (void)recvfrom(xsk_socket__fd(rxq->xsk), NULL, 0,
+ MSG_DONTWAIT, NULL, NULL);
+ } else if (xsk_ring_prod__needs_wakeup(fq)) {
+ (void)poll(&rxq->fds[0], 1, 1000);
+ }
return 0;
}
pull_umem_cq(umem, XSK_RING_CONS__DEFAULT_NUM_DESCS, cq);
-#if defined(XDP_USE_NEED_WAKEUP)
- if (xsk_ring_prod__needs_wakeup(&txq->tx))
-#endif
+ if (tx_syscall_needed(&txq->tx))
while (send(xsk_socket__fd(txq->pair->xsk), NULL,
0, MSG_DONTWAIT) < 0) {
/* some thing unexpected */
if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
rte_pktmbuf_free(local_mbuf);
- kick_tx(txq, cq);
goto out;
}
tx_bytes += mbuf->pkt_len;
}
- kick_tx(txq, cq);
-
out:
xsk_ring_prod__submit(&txq->tx, count);
+ kick_tx(txq, cq);
txq->stats.tx_pkts += count;
txq->stats.tx_bytes += tx_bytes;
return 0;
}
+#define CLB_VAL_IDX 0
+static int
+eth_monitor_callback(const uint64_t value,
+ const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
+{
+ const uint64_t v = opaque[CLB_VAL_IDX];
+ const uint64_t m = (uint32_t)~0;
+
+ /* if the value has changed, abort entering power optimized state */
+ return (value & m) == v ? 0 : -1;
+}
+
+static int
+eth_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
+{
+ struct pkt_rx_queue *rxq = rx_queue;
+ unsigned int *prod = rxq->rx.producer;
+ const uint32_t cur_val = rxq->rx.cached_prod; /* use cached value */
+
+ /* watch for changes in producer ring */
+ pmc->addr = (void *)prod;
+
+ /* store current value */
+ pmc->opaque[CLB_VAL_IDX] = cur_val;
+ pmc->fn = eth_monitor_callback;
+
+ /* AF_XDP producer ring index is 32-bit */
+ pmc->size = sizeof(uint32_t);
+
+ return 0;
+}
+
static int
eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
{
dev_info->max_mtu = ETH_AF_XDP_FRAME_SIZE - XDP_PACKET_HEADROOM;
#endif
+ dev_info->default_rxportconf.burst_size = ETH_AF_XDP_DFLT_BUSY_BUDGET;
+ dev_info->default_txportconf.burst_size = ETH_AF_XDP_DFLT_BUSY_BUDGET;
dev_info->default_rxportconf.nb_queues = 1;
dev_info->default_txportconf.nb_queues = 1;
dev_info->default_rxportconf.ring_size = ETH_AF_XDP_DFLT_NUM_DESCS;
return 0;
}
-static void
-eth_queue_release(void *q __rte_unused)
-{
-}
-
static int
eth_link_update(struct rte_eth_dev *dev __rte_unused,
int wait_to_complete __rte_unused)
return 0;
}
+/* Detect support for busy polling through setsockopt(). */
+static int
+configure_preferred_busy_poll(struct pkt_rx_queue *rxq)
+{
+ int sock_opt = 1;
+ int fd = xsk_socket__fd(rxq->xsk);
+ int ret = 0;
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_PREFER_BUSY_POLL,
+ (void *)&sock_opt, sizeof(sock_opt));
+ if (ret < 0) {
+ AF_XDP_LOG(DEBUG, "Failed to set SO_PREFER_BUSY_POLL\n");
+ goto err_prefer;
+ }
+
+ sock_opt = ETH_AF_XDP_DFLT_BUSY_TIMEOUT;
+ ret = setsockopt(fd, SOL_SOCKET, SO_BUSY_POLL, (void *)&sock_opt,
+ sizeof(sock_opt));
+ if (ret < 0) {
+ AF_XDP_LOG(DEBUG, "Failed to set SO_BUSY_POLL\n");
+ goto err_timeout;
+ }
+
+ sock_opt = rxq->busy_budget;
+ ret = setsockopt(fd, SOL_SOCKET, SO_BUSY_POLL_BUDGET,
+ (void *)&sock_opt, sizeof(sock_opt));
+ if (ret < 0) {
+ AF_XDP_LOG(DEBUG, "Failed to set SO_BUSY_POLL_BUDGET\n");
+ } else {
+ AF_XDP_LOG(INFO, "Busy polling budget set to: %u\n",
+ rxq->busy_budget);
+ return 0;
+ }
+
+ /* setsockopt failure - attempt to restore xsk to default state and
+ * proceed without busy polling support.
+ */
+ sock_opt = 0;
+ ret = setsockopt(fd, SOL_SOCKET, SO_BUSY_POLL, (void *)&sock_opt,
+ sizeof(sock_opt));
+ if (ret < 0) {
+ AF_XDP_LOG(ERR, "Failed to unset SO_BUSY_POLL\n");
+ return -1;
+ }
+
+err_timeout:
+ sock_opt = 0;
+ ret = setsockopt(fd, SOL_SOCKET, SO_PREFER_BUSY_POLL,
+ (void *)&sock_opt, sizeof(sock_opt));
+ if (ret < 0) {
+ AF_XDP_LOG(ERR, "Failed to unset SO_PREFER_BUSY_POLL\n");
+ return -1;
+ }
+
+err_prefer:
+ rxq->busy_budget = 0;
+ return 0;
+}
+
static int
xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
int ring_size)
goto err;
}
#endif
+
+ if (rxq->busy_budget) {
+ ret = configure_preferred_busy_poll(rxq);
+ if (ret) {
+ AF_XDP_LOG(ERR, "Failed configure busy polling.\n");
+ goto err;
+ }
+ }
+
ret = reserve_fill_queue(rxq->umem, reserve_size, fq_bufs, &rxq->fq);
if (ret) {
xsk_socket__delete(rxq->xsk);
goto err;
}
+ if (!rxq->busy_budget)
+ AF_XDP_LOG(DEBUG, "Preferred busy polling not enabled\n");
+
rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
rxq->fds[0].events = POLLIN;
.promiscuous_disable = eth_dev_promiscuous_disable,
.rx_queue_setup = eth_rx_queue_setup,
.tx_queue_setup = eth_tx_queue_setup,
- .rx_queue_release = eth_queue_release,
- .tx_queue_release = eth_queue_release,
.link_update = eth_link_update,
.stats_get = eth_stats_get,
.stats_reset = eth_stats_reset,
+ .get_monitor_addr = eth_get_monitor_addr,
};
+/** parse busy_budget argument */
+static int
+parse_budget_arg(const char *key __rte_unused,
+ const char *value, void *extra_args)
+{
+ int *i = (int *)extra_args;
+ char *end;
+
+ *i = strtol(value, &end, 10);
+ if (*i < 0 || *i > UINT16_MAX) {
+ AF_XDP_LOG(ERR, "Invalid busy_budget %i, must be >= 0 and <= %u\n",
+ *i, UINT16_MAX);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/** parse integer from integer argument */
static int
parse_integer_arg(const char *key __rte_unused,
static int
parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
- int *queue_cnt, int *shared_umem, char *prog_path)
+ int *queue_cnt, int *shared_umem, char *prog_path,
+ int *busy_budget)
{
int ret;
if (ret < 0)
goto free_kvlist;
+ ret = rte_kvargs_process(kvlist, ETH_AF_XDP_BUDGET_ARG,
+ &parse_budget_arg, busy_budget);
+ if (ret < 0)
+ goto free_kvlist;
+
free_kvlist:
rte_kvargs_free(kvlist);
return ret;
static struct rte_eth_dev *
init_internals(struct rte_vdev_device *dev, const char *if_name,
int start_queue_idx, int queue_cnt, int shared_umem,
- const char *prog_path)
+ const char *prog_path, int busy_budget)
{
const char *name = rte_vdev_device_name(dev);
const unsigned int numa_node = dev->device.numa_node;
internals->rx_queues[i].pair = &internals->tx_queues[i];
internals->rx_queues[i].xsk_queue_idx = start_queue_idx + i;
internals->tx_queues[i].xsk_queue_idx = start_queue_idx + i;
+ internals->rx_queues[i].busy_budget = busy_budget;
}
ret = get_iface_info(if_name, &internals->eth_addr,
int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
int shared_umem = 0;
char prog_path[PATH_MAX] = {'\0'};
+ int busy_budget = -1;
struct rte_eth_dev *eth_dev = NULL;
const char *name;
rte_vdev_device_name(dev));
name = rte_vdev_device_name(dev);
- if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
- strlen(rte_vdev_device_args(dev)) == 0) {
- eth_dev = rte_eth_dev_attach_secondary(name);
- if (eth_dev == NULL) {
- AF_XDP_LOG(ERR, "Failed to probe %s\n", name);
- return -EINVAL;
- }
- eth_dev->dev_ops = &ops;
- rte_eth_dev_probing_finish(eth_dev);
- return 0;
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ AF_XDP_LOG(ERR, "Failed to probe %s. "
+ "AF_XDP PMD does not support secondary processes.\n",
+ name);
+ return -ENOTSUP;
}
kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
dev->device.numa_node = rte_socket_id();
if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
- &xsk_queue_cnt, &shared_umem, prog_path) < 0) {
+ &xsk_queue_cnt, &shared_umem, prog_path,
+ &busy_budget) < 0) {
AF_XDP_LOG(ERR, "Invalid kvargs value\n");
return -EINVAL;
}
return -EINVAL;
}
+ busy_budget = busy_budget == -1 ? ETH_AF_XDP_DFLT_BUSY_BUDGET :
+ busy_budget;
+
eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
- xsk_queue_cnt, shared_umem, prog_path);
+ xsk_queue_cnt, shared_umem, prog_path,
+ busy_budget);
if (eth_dev == NULL) {
AF_XDP_LOG(ERR, "Failed to init internals\n");
return -1;
"start_queue=<int> "
"queue_count=<int> "
"shared_umem=<int> "
- "xdp_prog=<string> ");
+ "xdp_prog=<string> "
+ "busy_budget=<int>");