From: Bruce Richardson Date: Tue, 11 Feb 2014 13:47:39 +0000 (+0000) Subject: examples/netmap_compat: import netmap compatibility example X-Git-Tag: spdx-start~10996 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=06371afe394d465e588392c045dd2be74b7cc2ac;p=dpdk.git examples/netmap_compat: import netmap compatibility example This provides a sample application and library showing how to use the Intel(R) DPDK with basic netmap applications. The Netmap compatibility library provides a minimal set of APIs to give the ability to programs written against the Netmap APIs to be run with minimal changes to their source code, using the Intel® DPDK to perform the actual packet I/O. Since Netmap applications use regular system calls, like open(), ioctl() and mmap() to communicate with the Netmap kernel module performing the packet I/O, the compat_netmap library provides a set of similar APIs to use in place of those system calls, effectively turning a Netmap application into a Intel(R) DPDK one. The provided library is currently minimal and doesn’t support all the features that Netmap supports, but is enough to run simple applications, such as the bridge example included. The application requires a single command line option: -i INTERFACE is the number of a valid Intel(R) DPDK port to use. If a single -i parameter is given, the interface will send back all the traffic it receives. If two -i parameters are given, the two interfaces form a bridge, where traffic received on one interface is replicated and sent by the other interface. Signed-off-by: Bruce Richardson --- diff --git a/examples/netmap_compat/Makefile b/examples/netmap_compat/Makefile new file mode 100644 index 0000000000..8c9b67aba3 --- /dev/null +++ b/examples/netmap_compat/Makefile @@ -0,0 +1,47 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +include $(RTE_SDK)/mk/rte.vars.mk +unexport RTE_SRCDIR RTE_OUTPUT RTE_EXTMK + +DIRS-y += bridge + +.PHONY: all clean $(DIRS-y) + +all: $(DIRS-y) +clean: $(DIRS-y) + +$(DIRS-y): + $(MAKE) -C $@ $(MAKECMDGOALS) O=$(RTE_OUTPUT) diff --git a/examples/netmap_compat/bridge/Makefile b/examples/netmap_compat/bridge/Makefile new file mode 100644 index 0000000000..74feb1e350 --- /dev/null +++ b/examples/netmap_compat/bridge/Makefile @@ -0,0 +1,51 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define the RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-default-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = bridge + +# all source are stored in SRCS-y +SRCS-y := bridge.c +SRCS-y += $(SRCDIR)/../lib/compat_netmap.c + +CFLAGS += -O3 -I$(SRCDIR)/../lib -I$(SRCDIR)/../netmap +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/netmap_compat/bridge/bridge.c b/examples/netmap_compat/bridge/bridge.c new file mode 100644 index 0000000000..ecf5757651 --- /dev/null +++ b/examples/netmap_compat/bridge/bridge.c @@ -0,0 +1,414 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "compat_netmap.h" + + +#define BUF_SIZE 2048 +#define MBUF_SIZE (BUF_SIZE + sizeof(struct rte_mbuf) + \ + RTE_PKTMBUF_HEADROOM) + +#define MBUF_PER_POOL 8192 + +struct rte_eth_conf eth_conf = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, + .hw_ip_checksum = 0, + .hw_vlan_filter = 0, + .jumbo_frame = 0, + .hw_strip_crc = 0, + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +struct rte_eth_txconf tx_conf = { + .tx_thresh = { + .pthresh = 36, + .hthresh = 0, + .wthresh = 0, + }, + .tx_free_thresh = 0, + .tx_rs_thresh = 0, + .txq_flags = (ETH_TXQ_FLAGS_NOMULTSEGS | + ETH_TXQ_FLAGS_NOVLANOFFL | + ETH_TXQ_FLAGS_NOXSUMSCTP | + ETH_TXQ_FLAGS_NOXSUMUDP | + ETH_TXQ_FLAGS_NOXSUMTCP) +}; + +struct rte_eth_rxconf rx_conf = { + .rx_thresh = { + .pthresh = 8, + .hthresh = 8, + .wthresh = 4, + }, +}; + +#define MAX_QUEUE_NUM 1 +#define RX_QUEUE_NUM 1 +#define TX_QUEUE_NUM 1 + +#define MAX_DESC_NUM 0x400 +#define RX_DESC_NUM 0x100 +#define TX_DESC_NUM 0x200 + +#define RX_SYNC_NUM 0x20 +#define TX_SYNC_NUM 0x20 + +struct rte_netmap_port_conf port_conf = { + .eth_conf = ð_conf, + .tx_conf = &tx_conf, + .rx_conf = &rx_conf, + .socket_id = SOCKET_ID_ANY, + .nr_tx_rings = TX_QUEUE_NUM, + .nr_rx_rings = RX_QUEUE_NUM, + .nr_tx_slots = TX_DESC_NUM, + .nr_rx_slots = RX_DESC_NUM, + .tx_burst = TX_SYNC_NUM, + .rx_burst = RX_SYNC_NUM, +}; + +struct rte_netmap_conf netmap_conf = { + .socket_id = SOCKET_ID_ANY, + .max_bufsz = BUF_SIZE, + .max_rings = MAX_QUEUE_NUM, + .max_slots = MAX_DESC_NUM, +}; + +static int stop = 0; + +#define MAX_PORT_NUM 2 + +struct netmap_port { + int fd; + struct netmap_if *nmif; + struct netmap_ring *rx_ring; + struct netmap_ring *tx_ring; + const char *str; + uint8_t id; +}; + +static struct { + uint32_t num; + struct netmap_port p[MAX_PORT_NUM]; + void *mem; +} ports; + +static void +usage(const char *prgname) +{ + fprintf(stderr, "Usage: %s [EAL args] -- [OPTION]...\n" + "-h, --help \t Show this help message and exit\n" + "-i INTERFACE_A \t Interface (DPDK port number) to use\n" + "[ -i INTERFACE_B \t Interface (DPDK port number) to use ]\n", + prgname); +} + +static uint8_t +parse_portid(const char *portid_str) +{ + char *end; + unsigned id; + + id = strtoul(portid_str, &end, 10); + + if (end == portid_str || *end != '\0' || id > RTE_MAX_ETHPORTS) + rte_exit(EXIT_FAILURE, "Invalid port number\n"); + + return (uint8_t) id; +} + +static int +parse_args(int argc, char **argv) +{ + int opt; + + while ((opt = getopt(argc, argv, "hi:")) != -1) { + switch (opt) { + case 'h': + usage(argv[0]); + rte_exit(EXIT_SUCCESS, "exiting..."); + break; + case 'i': + if (ports.num >= RTE_DIM(ports.p)) { + usage(argv[0]); + rte_exit(EXIT_FAILURE, "configs with %u " + "ports are not supported\n", + ports.num + 1); + + } + + ports.p[ports.num].str = optarg; + ports.p[ports.num].id = parse_portid(optarg); + ports.num++; + break; + default: + usage(argv[0]); + rte_exit(EXIT_FAILURE, "invalid option: %c\n", opt); + } + } + + return 0; +} + +static void sigint_handler(__rte_unused int sig) +{ + stop = 1; + signal(SIGINT, SIG_DFL); +} + +static void move(int n, struct netmap_ring *rx, struct netmap_ring *tx) +{ + uint32_t tmp; + + while (n-- > 0) { + tmp = tx->slot[tx->cur].buf_idx; + + tx->slot[tx->cur].buf_idx = rx->slot[rx->cur].buf_idx; + tx->slot[tx->cur].len = rx->slot[rx->cur].len; + tx->slot[tx->cur].flags |= NS_BUF_CHANGED; + tx->cur = NETMAP_RING_NEXT(tx, tx->cur); + tx->avail--; + + rx->slot[rx->cur].buf_idx = tmp; + rx->slot[rx->cur].flags |= NS_BUF_CHANGED; + rx->cur = NETMAP_RING_NEXT(rx, rx->cur); + rx->avail--; + } +} + +static int +netmap_port_open(uint32_t idx) +{ + int err; + struct netmap_port *port; + struct nmreq req; + + port = ports.p + idx; + + port->fd = rte_netmap_open("/dev/netmap", O_RDWR); + + rte_snprintf(req.nr_name, sizeof(req.nr_name), "%s", port->str); + req.nr_version = NETMAP_API; + req.nr_ringid = 0; + + err = rte_netmap_ioctl(port->fd, NIOCGINFO, &req); + if (err) { + printf("[E] NIOCGINFO ioctl failed (error %d)\n", err); + return (err); + } + + rte_snprintf(req.nr_name, sizeof(req.nr_name), "%s", port->str); + req.nr_version = NETMAP_API; + req.nr_ringid = 0; + + err = rte_netmap_ioctl(port->fd, NIOCREGIF, &req); + if (err) { + printf("[E] NIOCREGIF ioctl failed (error %d)\n", err); + return (err); + } + + /* mmap only once. */ + if (ports.mem == NULL) + ports.mem = rte_netmap_mmap(NULL, req.nr_memsize, + PROT_WRITE | PROT_READ, MAP_PRIVATE, port->fd, 0); + + if (ports.mem == MAP_FAILED) { + printf("[E] NETMAP mmap failed for fd: %d)\n", port->fd); + return (-ENOMEM); + } + + port->nmif = NETMAP_IF(ports.mem, req.nr_offset); + + port->tx_ring = NETMAP_TXRING(port->nmif, 0); + port->rx_ring = NETMAP_RXRING(port->nmif, 0); + + return (0); +} + + +int main(int argc, char *argv[]) +{ + int err, ret; + uint32_t i, pmsk; + struct nmreq req; + struct pollfd pollfd[MAX_PORT_NUM]; + struct rte_mempool *pool; + struct netmap_ring *rx_ring, *tx_ring; + + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot initialize EAL\n"); + + argc -= ret; + argv += ret; + + parse_args(argc, argv); + + if (ports.num == 0) + rte_exit(EXIT_FAILURE, "no ports specified\n"); + + err = rte_pmd_init_all(); + if (err < 0) + rte_exit(EXIT_FAILURE, "rte_pmd_init_all(): error %d\n", err); + + err = rte_eal_pci_probe(); + if (err < 0) + rte_exit(EXIT_FAILURE, "rte_eal_pci_probe(): error %d\n", err); + + if (rte_eth_dev_count() < 1) + rte_exit(EXIT_FAILURE, "Not enough ethernet ports available\n"); + + pool = rte_mempool_create("mbuf_pool", MBUF_PER_POOL, MBUF_SIZE, 32, + sizeof(struct rte_pktmbuf_pool_private), + rte_pktmbuf_pool_init, NULL, + rte_pktmbuf_init, NULL, + rte_socket_id(), 0); + if (pool == NULL) + rte_exit(EXIT_FAILURE, "Couldn't create mempool\n"); + + netmap_conf.socket_id = rte_socket_id(); + err = rte_netmap_init(&netmap_conf); + + if (err < 0) + rte_exit(EXIT_FAILURE, + "Couldn't initialize librte_compat_netmap\n"); + else + printf("librte_compat_netmap initialized\n"); + + port_conf.pool = pool; + port_conf.socket_id = rte_socket_id(); + + for (i = 0; i != ports.num; i++) { + + err = rte_netmap_init_port(ports.p[i].id, &port_conf); + if (err < 0) + rte_exit(EXIT_FAILURE, "Couldn't setup port %hhu\n", + ports.p[i].id); + + rte_eth_promiscuous_enable(ports.p[i].id); + } + + for (i = 0; i != ports.num; i++) { + + err = netmap_port_open(i); + if (err) { + rte_exit(EXIT_FAILURE, "Couldn't set port %hhu " + "under NETMAP control\n", + ports.p[i].id); + } + else + printf("Port %hhu now in Netmap mode\n", ports.p[i].id); + } + + memset(pollfd, 0, sizeof(pollfd)); + + for (i = 0; i != ports.num; i++) { + pollfd[i].fd = ports.p[i].fd; + pollfd[i].events = POLLIN | POLLOUT; + } + + signal(SIGINT, sigint_handler); + + pmsk = ports.num - 1; + + printf("Bridge up and running!\n"); + + while (!stop) { + uint32_t n_pkts; + + pollfd[0].revents = 0; + pollfd[1].revents = 0; + + ret = rte_netmap_poll(pollfd, ports.num, 0); + if (ret < 0) { + stop = 1; + printf("[E] poll returned with error %d\n", ret); + } + + if (((pollfd[0].revents | pollfd[1].revents) & POLLERR) != 0) { + printf("POLLERR!\n"); + } + + if ((pollfd[0].revents & POLLIN) != 0 && + (pollfd[pmsk].revents & POLLOUT) != 0) { + + rx_ring = ports.p[0].rx_ring; + tx_ring = ports.p[pmsk].tx_ring; + + n_pkts = RTE_MIN(rx_ring->avail, tx_ring->avail); + move(n_pkts, rx_ring, tx_ring); + } + + if (pmsk != 0 && (pollfd[pmsk].revents & POLLIN) != 0 && + (pollfd[0].revents & POLLOUT) != 0) { + + rx_ring = ports.p[pmsk].rx_ring; + tx_ring = ports.p[0].tx_ring; + + n_pkts = RTE_MIN(rx_ring->avail, tx_ring->avail); + move(n_pkts, rx_ring, tx_ring); + } + } + + printf("Bridge stopped!\n"); + + for (i = 0; i != ports.num; i++) { + err = rte_netmap_ioctl(ports.p[i].fd, NIOCUNREGIF, &req); + if (err) { + printf("[E] NIOCUNREGIF ioctl failed (error %d)\n", + err); + } + else + printf("Port %hhu unregistered from Netmap mode\n", ports.p[i].id); + + rte_netmap_close(ports.p[i].fd); + } + return 0; +} diff --git a/examples/netmap_compat/lib/compat_netmap.c b/examples/netmap_compat/lib/compat_netmap.c new file mode 100644 index 0000000000..ebb98ffb12 --- /dev/null +++ b/examples/netmap_compat/lib/compat_netmap.c @@ -0,0 +1,902 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "compat_netmap.h" + +struct netmap_port { + struct rte_mempool *pool; + struct netmap_if *nmif; + struct rte_eth_conf eth_conf; + struct rte_eth_txconf tx_conf; + struct rte_eth_rxconf rx_conf; + int32_t socket_id; + uint16_t nr_tx_rings; + uint16_t nr_rx_rings; + uint32_t nr_tx_slots; + uint32_t nr_rx_slots; + uint16_t tx_burst; + uint16_t rx_burst; + uint32_t fd; +}; + +struct fd_port { + uint32_t port; +}; + +#define FD_PORT_FREE UINT32_MAX +#define FD_PORT_RSRV (FD_PORT_FREE - 1) + +struct netmap_state { + struct rte_netmap_conf conf; + uintptr_t buf_start; + void *mem; + uint32_t mem_sz; + uint32_t netif_memsz; +}; + + +#define COMPAT_NETMAP_MAX_NOFILE (2 * RTE_MAX_ETHPORTS) +#define COMPAT_NETMAP_MAX_BURST 64 +#define COMPAT_NETMAP_MAX_PKT_PER_SYNC (2 * COMPAT_NETMAP_MAX_BURST) + +static struct netmap_port ports[RTE_MAX_ETHPORTS]; +static struct netmap_state netmap; + +static struct fd_port fd_port[COMPAT_NETMAP_MAX_NOFILE]; +static const int next_fd_start = RLIMIT_NOFILE + 1; +static rte_spinlock_t netmap_lock; + +#define IDX_TO_FD(x) ((x) + next_fd_start) +#define FD_TO_IDX(x) ((x) - next_fd_start) +#define FD_VALID(x) ((x) >= next_fd_start && \ + (x) < (typeof (x))(RTE_DIM(fd_port) + next_fd_start)) + +#define PORT_NUM_RINGS (2 * netmap.conf.max_rings) +#define PORT_NUM_SLOTS (PORT_NUM_RINGS * netmap.conf.max_slots) + +#define BUF_IDX(port, ring, slot) \ + (((port) * PORT_NUM_RINGS + (ring)) * netmap.conf.max_slots + \ + (slot)) + +#define NETMAP_IF_RING_OFS(rid, rings, slots) ({\ + struct netmap_if *_if; \ + struct netmap_ring *_rg; \ + sizeof(*_if) + \ + (rings) * sizeof(_if->ring_ofs[0]) + \ + (rid) * sizeof(*_rg) + \ + (slots) * sizeof(_rg->slot[0]); \ + }) + +static void netmap_unregif(uint32_t idx, uint32_t port); + + +static int32_t +ifname_to_portid(const char *ifname, uint8_t *port) +{ + char *endptr; + uint64_t portid; + + errno = 0; + portid = strtoul(ifname, &endptr, 10); + if (endptr == ifname || *endptr != '\0' || + portid >= RTE_DIM(ports) || errno != 0) + return (-EINVAL); + + *port = (uint8_t)portid; + return (0); +} + +/** + * Given a dpdk mbuf, fill in the Netmap slot in ring r and its associated + * buffer with the data held by the mbuf. + * Note that mbuf chains are not supported. + */ +static void +mbuf_to_slot(struct rte_mbuf *mbuf, struct netmap_ring *r, uint32_t index) +{ + char *data; + uint16_t length; + + data = rte_pktmbuf_mtod(mbuf, char *); + length = rte_pktmbuf_data_len(mbuf); + + if (length > r->nr_buf_size) + length = 0; + + r->slot[index].len = length; + rte_memcpy(NETMAP_BUF(r, r->slot[index].buf_idx), data, length); +} + +/** + * Given a Netmap ring and a slot index for that ring, construct a dpdk mbuf + * from the data held in the buffer associated with the slot. + * Allocation/deallocation of the dpdk mbuf are the responsability of the + * caller. + * Note that mbuf chains are not supported. + */ +static void +slot_to_mbuf(struct netmap_ring *r, uint32_t index, struct rte_mbuf *mbuf) +{ + char *data; + uint16_t length; + + rte_pktmbuf_reset(mbuf); + length = r->slot[index].len; + data = rte_pktmbuf_append(mbuf, length); + + if (data != NULL) + rte_memcpy(data, NETMAP_BUF(r, r->slot[index].buf_idx), length); +} + +static int32_t +fd_reserve(void) +{ + uint32_t i; + + for (i = 0; i != RTE_DIM(fd_port) && fd_port[i].port != FD_PORT_FREE; + i++) + ; + + if (i == RTE_DIM(fd_port)) + return (-ENOMEM); + + fd_port[i].port = FD_PORT_RSRV; + return (IDX_TO_FD(i)); +} + +static int32_t +fd_release(int32_t fd) +{ + uint32_t idx, port; + + idx = FD_TO_IDX(fd); + + if (!FD_VALID(fd) || (port = fd_port[idx].port) == FD_PORT_FREE) + return (-EINVAL); + + /* if we still have a valid port attached, release the port */ + if (port < RTE_DIM(ports) && ports[port].fd == idx) { + netmap_unregif(idx, port); + } + + fd_port[idx].port = FD_PORT_FREE; + return (0); +} + +static int +check_nmreq(struct nmreq *req, uint8_t *port) +{ + int32_t rc; + uint8_t portid; + + if (req == NULL) + return (-EINVAL); + + if (req->nr_version != NETMAP_API) { + req->nr_version = NETMAP_API; + return (-EINVAL); + } + + if ((rc = ifname_to_portid(req->nr_name, &portid)) != 0) { + RTE_LOG(ERR, USER1, "Invalid interface name:\"%s\" " + "in NIOCGINFO call\n", req->nr_name); + return (rc); + } + + if (ports[portid].pool == NULL) { + RTE_LOG(ERR, USER1, "Misconfigured portid %hhu\n", portid); + return (-EINVAL); + } + + *port = portid; + return (0); +} + +/** + * Simulate a Netmap NIOCGINFO ioctl: given a struct nmreq holding an interface + * name (a port number in our case), fill the struct nmreq in with advisory + * information about the interface: number of rings and their size, total memory + * required in the map, ... + * Those are preconfigured using rte_eth_{,tx,rx}conf and + * rte_netmap_port_conf structures + * and calls to rte_netmap_init_port() in the Netmap application. + */ +static int +ioctl_niocginfo(__rte_unused int fd, void * param) +{ + uint8_t portid; + struct nmreq *req; + int32_t rc; + + req = (struct nmreq *)param; + if ((rc = check_nmreq(req, &portid)) != 0) + return (rc); + + req->nr_tx_rings = (uint16_t)(ports[portid].nr_tx_rings - 1); + req->nr_rx_rings = (uint16_t)(ports[portid].nr_rx_rings - 1); + req->nr_tx_slots = ports[portid].nr_tx_slots; + req->nr_rx_slots = ports[portid].nr_rx_slots; + + /* in current implementation we have all NETIFs shared aone region. */ + req->nr_memsize = netmap.mem_sz; + req->nr_offset = 0; + + return (0); +} + +static void +netmap_ring_setup(struct netmap_ring *ring, uint8_t port, uint32_t ringid, + uint32_t num_slots) +{ + uint32_t j; + + ring->buf_ofs = netmap.buf_start - (uintptr_t)ring; + ring->num_slots = num_slots; + ring->cur = 0; + ring->reserved = 0; + ring->nr_buf_size = netmap.conf.max_bufsz; + ring->flags = 0; + ring->ts.tv_sec = 0; + ring->ts.tv_usec = 0; + + for (j = 0; j < ring->num_slots; j++) { + ring->slot[j].buf_idx = BUF_IDX(port, ringid, j); + ring->slot[j].len = 0; + ring->flags = 0; + } +} + +static int +netmap_regif(struct nmreq *req, uint32_t idx, uint8_t port) +{ + struct netmap_if *nmif; + struct netmap_ring *ring; + uint32_t i, slots, start_ring; + int32_t rc; + + if (ports[port].fd < RTE_DIM(fd_port)) { + RTE_LOG(ERR, USER1, "port %hhu already in use by fd: %u\n", + port, IDX_TO_FD(ports[port].fd)); + return (-EBUSY); + } + if (fd_port[idx].port != FD_PORT_RSRV) { + RTE_LOG(ERR, USER1, "fd: %u is misconfigured\n", + IDX_TO_FD(idx)); + return (-EBUSY); + } + + nmif = ports[port].nmif; + + /* setup netmap_if fields. */ + memset(nmif, 0, netmap.netif_memsz); + + /* only ALL rings supported right now. */ + if (req->nr_ringid != 0) + return (-EINVAL); + + rte_snprintf(nmif->ni_name, sizeof(nmif->ni_name), "%s", req->nr_name); + nmif->ni_version = req->nr_version; + + /* Netmap uses ni_(r|t)x_rings + 1 */ + nmif->ni_rx_rings = ports[port].nr_rx_rings - 1; + nmif->ni_tx_rings = ports[port].nr_tx_rings - 1; + + /* + * Setup TX rings and slots. + * Refer to the comments in netmap.h for details + */ + + slots = 0; + for (i = 0; i < nmif->ni_tx_rings + 1; i++) { + + nmif->ring_ofs[i] = NETMAP_IF_RING_OFS(i, + PORT_NUM_RINGS, slots); + + ring = NETMAP_TXRING(nmif, i); + netmap_ring_setup(ring, port, i, ports[port].nr_tx_slots); + ring->avail = ring->num_slots; + + slots += ports[port].nr_tx_slots; + } + + /* + * Setup RX rings and slots. + * Refer to the comments in netmap.h for details + */ + + start_ring = i; + + for (; i < nmif->ni_rx_rings + 1 + start_ring; i++) { + + nmif->ring_ofs[i] = NETMAP_IF_RING_OFS(i, + PORT_NUM_RINGS, slots); + + ring = NETMAP_RXRING(nmif, (i - start_ring)); + netmap_ring_setup(ring, port, i, ports[port].nr_rx_slots); + ring->avail = 0; + + slots += ports[port].nr_rx_slots; + } + + if ((rc = rte_eth_dev_start(port)) < 0) { + RTE_LOG(ERR, USER1, + "Couldn't start ethernet device %s (error %d)\n", + req->nr_name, rc); + return (rc); + } + + /* setup fdi <--> port relationtip. */ + ports[port].fd = idx; + fd_port[idx].port = port; + + req->nr_memsize = netmap.mem_sz; + req->nr_offset = (uintptr_t)nmif - (uintptr_t)netmap.mem; + + return (0); +} + +/** + * Simulate a Netmap NIOCREGIF ioctl: + */ +static int +ioctl_niocregif(int32_t fd, void * param) +{ + uint8_t portid; + int32_t rc; + uint32_t idx; + struct nmreq *req; + + req = (struct nmreq *)param; + if ((rc = check_nmreq(req, &portid)) != 0) + return (rc); + + idx = FD_TO_IDX(fd); + + rte_spinlock_lock(&netmap_lock); + rc = netmap_regif(req, idx, portid); + rte_spinlock_unlock(&netmap_lock); + + return (rc); +} + +static void +netmap_unregif(uint32_t idx, uint32_t port) +{ + fd_port[idx].port = FD_PORT_RSRV; + ports[port].fd = UINT32_MAX; + rte_eth_dev_stop((uint8_t)port); +} + +/** + * Simulate a Netmap NIOCUNREGIF ioctl: put an interface running in Netmap + * mode back in "normal" mode. In our case, we just stop the port associated + * with this file descriptor. + */ +static int +ioctl_niocunregif(int fd) +{ + uint32_t idx, port; + int32_t rc; + + idx = FD_TO_IDX(fd); + + rte_spinlock_lock(&netmap_lock); + + port = fd_port[idx].port; + if (port < RTE_DIM(ports) && ports[port].fd == idx) { + netmap_unregif(idx, port); + rc = 0; + } else { + RTE_LOG(ERR, USER1, + "%s: %d is not associated with valid port\n", + __func__, fd); + rc = -EINVAL; + } + + rte_spinlock_unlock(&netmap_lock); + return (rc); +} + +/** + * A call to rx_sync_ring will try to fill a Netmap RX ring with as many + * packets as it can hold coming from its dpdk port. + */ +static inline int +rx_sync_ring(struct netmap_ring *ring, uint8_t port, uint16_t ring_number, + uint16_t max_burst) +{ + int32_t i, n_rx; + uint16_t burst_size; + uint32_t cur_slot, n_free_slots; + struct rte_mbuf *rx_mbufs[COMPAT_NETMAP_MAX_BURST]; + + n_free_slots = ring->num_slots - (ring->avail + ring->reserved); + n_free_slots = RTE_MIN(n_free_slots, max_burst); + cur_slot = (ring->cur + ring->avail) & (ring->num_slots - 1); + + while (n_free_slots) { + burst_size = (uint16_t)RTE_MIN(n_free_slots, RTE_DIM(rx_mbufs)); + + /* receive up to burst_size packets from the NIC's queue */ + n_rx = rte_eth_rx_burst(port, ring_number, rx_mbufs, + burst_size); + + if (n_rx == 0) + return 0; + if (unlikely(n_rx < 0)) + return -1; + + /* Put those n_rx packets in the Netmap structures */ + for (i = 0; i < n_rx ; i++) { + mbuf_to_slot(rx_mbufs[i], ring, cur_slot); + rte_pktmbuf_free(rx_mbufs[i]); + cur_slot = NETMAP_RING_NEXT(ring, cur_slot); + } + + /* Update the Netmap ring structure to reflect the change */ + ring->avail += n_rx; + n_free_slots -= n_rx; + } + + return 0; +} + +static inline int +rx_sync_if(uint32_t port) +{ + uint16_t burst; + uint32_t i, rc; + struct netmap_if *nifp; + struct netmap_ring *r; + + nifp = ports[port].nmif; + burst = ports[port].rx_burst; + rc = 0; + + for (i = 0; i < nifp->ni_rx_rings + 1; i++) { + r = NETMAP_RXRING(nifp, i); + rx_sync_ring(r, (uint8_t)port, (uint16_t)i, burst); + rc += r->avail; + } + + return (rc); +} + +/** + * Simulate a Netmap NIOCRXSYNC ioctl: + */ +static int +ioctl_niocrxsync(int fd) +{ + uint32_t idx, port; + + idx = FD_TO_IDX(fd); + if ((port = fd_port[idx].port) < RTE_DIM(ports) && + ports[port].fd == idx) { + return (rx_sync_if(fd_port[idx].port)); + } else { + return (-EINVAL); + } +} + +/** + * A call to tx_sync_ring will try to empty a Netmap TX ring by converting its + * buffers into rte_mbufs and sending them out on the rings's dpdk port. + */ +static int +tx_sync_ring(struct netmap_ring *ring, uint8_t port, uint16_t ring_number, + struct rte_mempool *pool, uint16_t max_burst) +{ + uint32_t i, n_tx; + uint16_t burst_size; + uint32_t cur_slot, n_used_slots; + struct rte_mbuf *tx_mbufs[COMPAT_NETMAP_MAX_BURST]; + + n_used_slots = ring->num_slots - ring->avail; + n_used_slots = RTE_MIN(n_used_slots, max_burst); + cur_slot = (ring->cur + ring->avail) & (ring->num_slots - 1); + + while (n_used_slots) { + burst_size = (uint16_t)RTE_MIN(n_used_slots, RTE_DIM(tx_mbufs)); + + for (i = 0; i < burst_size; i++) { + tx_mbufs[i] = rte_pktmbuf_alloc(pool); + if (tx_mbufs[i] == NULL) + goto err; + + slot_to_mbuf(ring, cur_slot, tx_mbufs[i]); + cur_slot = NETMAP_RING_NEXT(ring, cur_slot); + } + + n_tx = rte_eth_tx_burst(port, ring_number, tx_mbufs, + burst_size); + + /* Update the Netmap ring structure to reflect the change */ + ring->avail += n_tx; + n_used_slots -= n_tx; + + /* Return the mbufs that failed to transmit to their pool */ + if (unlikely(n_tx != burst_size)) { + for (i = n_tx; i < burst_size; i++) + rte_pktmbuf_free(tx_mbufs[i]); + break; + } + } + + return 0; + +err: + for (; i == 0; --i) + rte_pktmbuf_free(tx_mbufs[i]); + + RTE_LOG(ERR, USER1, + "Couldn't get mbuf from mempool is the mempool too small?\n"); + return -1; +} + +static int +tx_sync_if(uint32_t port) +{ + uint16_t burst; + uint32_t i, rc; + struct netmap_if *nifp; + struct netmap_ring *r; + struct rte_mempool *mp; + + nifp = ports[port].nmif; + mp = ports[port].pool; + burst = ports[port].tx_burst; + rc = 0; + + for (i = 0; i < nifp->ni_tx_rings + 1; i++) { + r = NETMAP_TXRING(nifp, i); + tx_sync_ring(r, (uint8_t)port, (uint16_t)i, mp, burst); + rc += r->avail; + } + + return (rc); +} + +/** + * Simulate a Netmap NIOCTXSYNC ioctl: + */ +static inline int +ioctl_nioctxsync(int fd) +{ + uint32_t idx, port; + + idx = FD_TO_IDX(fd); + if ((port = fd_port[idx].port) < RTE_DIM(ports) && + ports[port].fd == idx) { + return (tx_sync_if(fd_port[idx].port)); + } else { + return (-EINVAL); + } +} + +/** + * Give the library a mempool of rte_mbufs with which it can do the + * rte_mbuf <--> netmap slot conversions. + */ +int +rte_netmap_init(const struct rte_netmap_conf *conf) +{ + size_t buf_ofs, nmif_sz, sz; + size_t port_rings, port_slots, port_bufs; + uint32_t i, port_num; + + port_num = RTE_MAX_ETHPORTS; + port_rings = 2 * conf->max_rings; + port_slots = port_rings * conf->max_slots; + port_bufs = port_slots; + + nmif_sz = NETMAP_IF_RING_OFS(port_rings, port_rings, port_slots); + sz = nmif_sz * port_num; + + buf_ofs = RTE_ALIGN_CEIL(sz, CACHE_LINE_SIZE); + sz = buf_ofs + port_bufs * conf->max_bufsz * port_num; + + if (sz > UINT32_MAX || + (netmap.mem = rte_zmalloc_socket(__func__, sz, + CACHE_LINE_SIZE, conf->socket_id)) == NULL) { + RTE_LOG(ERR, USER1, "%s: failed to allocate %zu bytes\n", + __func__, sz); + return (-ENOMEM); + } + + netmap.mem_sz = sz; + netmap.netif_memsz = nmif_sz; + netmap.buf_start = (uintptr_t)netmap.mem + buf_ofs; + netmap.conf = *conf; + + rte_spinlock_init(&netmap_lock); + + /* Mark all ports as unused and set NETIF pointer. */ + for (i = 0; i != RTE_DIM(ports); i++) { + ports[i].fd = UINT32_MAX; + ports[i].nmif = (struct netmap_if *) + ((uintptr_t)netmap.mem + nmif_sz * i); + } + + /* Mark all fd_ports as unused. */ + for (i = 0; i != RTE_DIM(fd_port); i++) { + fd_port[i].port = FD_PORT_FREE; + } + + return (0); +} + + +int +rte_netmap_init_port(uint8_t portid, const struct rte_netmap_port_conf *conf) +{ + int32_t ret; + uint16_t i; + uint16_t rx_slots, tx_slots; + + if (conf == NULL || + portid >= RTE_DIM(ports) || + conf->nr_tx_rings > netmap.conf.max_rings || + conf->nr_rx_rings > netmap.conf.max_rings) { + RTE_LOG(ERR, USER1, "%s(%hhu): invalid parameters\n", + __func__, portid); + return (-EINVAL); + } + + rx_slots = (uint16_t)rte_align32pow2(conf->nr_rx_slots); + tx_slots = (uint16_t)rte_align32pow2(conf->nr_tx_slots); + + if (tx_slots > netmap.conf.max_slots || + rx_slots > netmap.conf.max_slots) { + RTE_LOG(ERR, USER1, "%s(%hhu): invalid parameters\n", + __func__, portid); + return (-EINVAL); + } + + ret = rte_eth_dev_configure(portid, conf->nr_rx_rings, + conf->nr_tx_rings, conf->eth_conf); + + if (ret < 0) { + RTE_LOG(ERR, USER1, "Couldn't configure port %hhu\n", portid); + return (ret); + } + + for (i = 0; i < conf->nr_tx_rings; i++) { + ret = rte_eth_tx_queue_setup(portid, i, tx_slots, + conf->socket_id, conf->tx_conf); + + if (ret < 0) { + RTE_LOG(ERR, USER1, + "Couldn't configure TX queue %hhu of " + "port %hu\n", + i, portid); + return (ret); + } + + ret = rte_eth_rx_queue_setup(portid, i, rx_slots, + conf->socket_id, conf->rx_conf, conf->pool); + + if (ret < 0) { + RTE_LOG(ERR, USER1, + "Couldn't configure RX queue %hu of " + "port %hhu\n", + i, portid); + return (ret); + } + } + + /* copy config to the private storage. */ + ports[portid].eth_conf = conf->eth_conf[0]; + ports[portid].rx_conf = conf->rx_conf[0]; + ports[portid].tx_conf = conf->tx_conf[0]; + ports[portid].pool = conf->pool; + ports[portid].socket_id = conf->socket_id; + ports[portid].nr_tx_rings = conf->nr_tx_rings; + ports[portid].nr_rx_rings = conf->nr_rx_rings; + ports[portid].nr_tx_slots = tx_slots; + ports[portid].nr_rx_slots = rx_slots; + ports[portid].tx_burst = conf->tx_burst; + ports[portid].rx_burst = conf->rx_burst; + + return (0); +} + +int +rte_netmap_close(__rte_unused int fd) +{ + int32_t rc; + + rte_spinlock_lock(&netmap_lock); + rc = fd_release(fd); + rte_spinlock_unlock(&netmap_lock); + + if (rc < 0) { + errno =-rc; + rc = -1; + } + return (rc); +} + +int rte_netmap_ioctl(int fd, int op, void *param) +{ + int ret; + + if (!FD_VALID(fd)) { + errno = EBADF; + return (-1); + } + + switch (op) { + + case NIOCGINFO: + ret = ioctl_niocginfo(fd, param); + break; + + case NIOCREGIF: + ret = ioctl_niocregif(fd, param); + break; + + case NIOCUNREGIF: + ret = ioctl_niocunregif(fd); + break; + + case NIOCRXSYNC: + ret = ioctl_niocrxsync(fd); + break; + + case NIOCTXSYNC: + ret = ioctl_nioctxsync(fd); + break; + + default: + ret = -ENOTTY; + } + + if (ret < 0) { + errno = -ret; + ret = -1; + } else { + ret = 0; + } + + return (ret); +} + +void * +rte_netmap_mmap(void *addr, size_t length, + int prot, int flags, int fd, off_t offset) +{ + static const int cprot = PROT_WRITE | PROT_READ; + + if (!FD_VALID(fd) || length + offset > netmap.mem_sz || + (prot & cprot) != cprot || + ((flags & MAP_FIXED) != 0 && addr != NULL)) { + + errno = EINVAL; + return (MAP_FAILED); + } + + return ((void *)((uintptr_t)netmap.mem + offset)); +} + +/** + * Return a "fake" file descriptor with a value above RLIMIT_NOFILE so that + * any attempt to use that file descriptor with the usual API will fail. + */ +int +rte_netmap_open(__rte_unused const char *pathname, __rte_unused int flags) +{ + int fd; + + rte_spinlock_lock(&netmap_lock); + fd = fd_reserve(); + rte_spinlock_unlock(&netmap_lock); + + if (fd < 0) { + errno = -fd; + fd = -1; + } + return (fd); +} + +/** + * Doesn't support timeout other than 0 or infinite (negative) timeout + */ +int +rte_netmap_poll(struct pollfd *fds, nfds_t nfds, int timeout) +{ + int32_t count_it, ret; + uint32_t i, idx, port; + uint32_t want_rx, want_tx; + + ret = 0; + do { + for (i = 0; i < nfds; i++) { + + count_it = 0; + + if (!FD_VALID(fds[i].fd) || fds[i].events == 0) { + fds[i].revents = 0; + continue; + } + + idx = FD_TO_IDX(fds[i].fd); + if ((port = fd_port[idx].port) >= RTE_DIM(ports) || + ports[port].fd != idx) { + + fds[i].revents |= POLLERR; + ret++; + continue; + } + + want_rx = fds[i].events & (POLLIN | POLLRDNORM); + want_tx = fds[i].events & (POLLOUT | POLLWRNORM); + + if (want_rx && rx_sync_if(port) > 0) { + fds[i].revents = (uint16_t) + (fds[i].revents | want_rx); + count_it = 1; + } + if (want_tx && tx_sync_if(port) > 0) { + fds[i].revents = (uint16_t) + (fds[i].revents | want_tx); + count_it = 1; + } + + ret += count_it; + } + } + while ((ret == 0 && timeout < 0) || timeout); + + return ret; +} diff --git a/examples/netmap_compat/lib/compat_netmap.h b/examples/netmap_compat/lib/compat_netmap.h new file mode 100644 index 0000000000..0971443402 --- /dev/null +++ b/examples/netmap_compat/lib/compat_netmap.h @@ -0,0 +1,82 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_COMPAT_NETMAP_H_ + +#include +#include +#include + +#include +#include + +#include "netmap.h" +#include "netmap_user.h" + +/** + * One can overwrite Netmap macros here as needed + */ + +struct rte_netmap_conf { + int32_t socket_id; + uint32_t max_rings; /* number of rings(queues) per netmap_if(port) */ + uint32_t max_slots; /* number of slots(descriptors) per netmap ring. */ + uint16_t max_bufsz; /* size of each netmap buffer. */ +}; + +struct rte_netmap_port_conf { + struct rte_eth_conf *eth_conf; + struct rte_eth_txconf *tx_conf; + struct rte_eth_rxconf *rx_conf; + struct rte_mempool *pool; + int32_t socket_id; + uint16_t nr_tx_rings; + uint16_t nr_rx_rings; + uint32_t nr_tx_slots; + uint32_t nr_rx_slots; + uint16_t tx_burst; + uint16_t rx_burst; +}; + +int rte_netmap_init(const struct rte_netmap_conf *conf); +int rte_netmap_init_port(uint8_t portid, + const struct rte_netmap_port_conf *conf); + +int rte_netmap_close(int fd); +int rte_netmap_ioctl(int fd, int op, void *param); +int rte_netmap_open(const char *pathname, int flags); +int rte_netmap_poll(struct pollfd *fds, nfds_t nfds, int timeout); +void *rte_netmap_mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset); + +#endif /* _RTE_COMPAT_NETMAP_H_ */ diff --git a/examples/netmap_compat/netmap/netmap.h b/examples/netmap_compat/netmap/netmap.h new file mode 100644 index 0000000000..8dba31c6da --- /dev/null +++ b/examples/netmap_compat/netmap/netmap.h @@ -0,0 +1,289 @@ +/* + * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the + * distribution. + * + * 3. Neither the name of the authors nor the names of their contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * $FreeBSD: head/sys/net/netmap.h 231198 2012-02-08 11:43:29Z luigi $ + * $Id: netmap.h 10879 2012-04-12 22:48:59Z luigi $ + * + * Definitions of constants and the structures used by the netmap + * framework, for the part visible to both kernel and userspace. + * Detailed info on netmap is available with "man netmap" or at + * + * http://info.iet.unipi.it/~luigi/netmap/ + */ + +#ifndef _NET_NETMAP_H_ +#define _NET_NETMAP_H_ + +/* + * --- Netmap data structures --- + * + * The data structures used by netmap are shown below. Those in + * capital letters are in an mmapp()ed area shared with userspace, + * while others are private to the kernel. + * Shared structures do not contain pointers but only memory + * offsets, so that addressing is portable between kernel and userspace. + + + softc ++----------------+ +| standard fields| +| if_pspare[0] ----------+ ++----------------+ | + | ++----------------+<------+ +|(netmap_adapter)| +| | netmap_kring +| tx_rings *--------------------------------->+---------------+ +| | netmap_kring | ring *---------. +| rx_rings *--------->+---------------+ | nr_hwcur | | ++----------------+ | ring *--------. | nr_hwavail | V + | nr_hwcur | | | selinfo | | + | nr_hwavail | | +---------------+ . + | selinfo | | | ... | . + +---------------+ | |(ntx+1 entries)| + | .... | | | | + |(nrx+1 entries)| | +---------------+ + | | | + KERNEL +---------------+ | + | + ==================================================================== + | + USERSPACE | NETMAP_RING + +---->+-------------+ + / | cur | + NETMAP_IF (nifp, one per file desc.) / | avail | + +---------------+ / | buf_ofs | + | ni_tx_rings | / +=============+ + | ni_rx_rings | / | buf_idx | slot[0] + | | / | len, flags | + | | / +-------------+ + +===============+ / | buf_idx | slot[1] + | txring_ofs[0] | (rel.to nifp)--' | len, flags | + | txring_ofs[1] | +-------------+ + (num_rings+1 entries) (nr_num_slots entries) + | txring_ofs[n] | | buf_idx | slot[n-1] + +---------------+ | len, flags | + | rxring_ofs[0] | +-------------+ + | rxring_ofs[1] | + (num_rings+1 entries) + | txring_ofs[n] | + +---------------+ + + * The private descriptor ('softc' or 'adapter') of each interface + * is extended with a "struct netmap_adapter" containing netmap-related + * info (see description in dev/netmap/netmap_kernel.h. + * Among other things, tx_rings and rx_rings point to the arrays of + * "struct netmap_kring" which in turn reache the various + * "struct netmap_ring", shared with userspace. + + * The NETMAP_RING is the userspace-visible replica of the NIC ring. + * Each slot has the index of a buffer, its length and some flags. + * In user space, the buffer address is computed as + * (char *)ring + buf_ofs + index*NETMAP_BUF_SIZE + * In the kernel, buffers do not necessarily need to be contiguous, + * and the virtual and physical addresses are derived through + * a lookup table. + * To associate a different buffer to a slot, applications must + * write the new index in buf_idx, and set NS_BUF_CHANGED flag to + * make sure that the kernel updates the hardware ring as needed. + * + * Normally the driver is not requested to report the result of + * transmissions (this can dramatically speed up operation). + * However the user may request to report completion by setting + * NS_REPORT. + */ +struct netmap_slot { + uint32_t buf_idx; /* buffer index */ + uint16_t len; /* packet length, to be copied to/from the hw ring */ + uint16_t flags; /* buf changed, etc. */ +#define NS_BUF_CHANGED 0x0001 /* must resync the map, buffer changed */ +#define NS_REPORT 0x0002 /* ask the hardware to report results + * e.g. by generating an interrupt + */ +}; + +/* + * Netmap representation of a TX or RX ring (also known as "queue"). + * This is a queue implemented as a fixed-size circular array. + * At the software level, two fields are important: avail and cur. + * + * In TX rings: + * avail indicates the number of slots available for transmission. + * It is updated by the kernel after every netmap system call. + * It MUST BE decremented by the application when it appends a + * packet. + * cur indicates the slot to use for the next packet + * to send (i.e. the "tail" of the queue). + * It MUST BE incremented by the application before + * netmap system calls to reflect the number of newly + * sent packets. + * It is checked by the kernel on netmap system calls + * (normally unmodified by the kernel unless invalid). + * + * The kernel side of netmap uses two additional fields in its own + * private ring structure, netmap_kring: + * nr_hwcur is a copy of nr_cur on an NIOCTXSYNC. + * nr_hwavail is the number of slots known as available by the + * hardware. It is updated on an INTR (inc by the + * number of packets sent) and on a NIOCTXSYNC + * (decrease by nr_cur - nr_hwcur) + * A special case, nr_hwavail is -1 if the transmit + * side is idle (no pending transmits). + * + * In RX rings: + * avail is the number of packets available (possibly 0). + * It MUST BE decremented by the application when it consumes + * a packet, and it is updated to nr_hwavail on a NIOCRXSYNC + * cur indicates the first slot that contains a packet not + * processed yet (the "head" of the queue). + * It MUST BE incremented by the software when it consumes + * a packet. + * reserved indicates the number of buffers before 'cur' + * that the application has still in use. Normally 0, + * it MUST BE incremented by the application when it + * does not return the buffer immediately, and decremented + * when the buffer is finally freed. + * + * The kernel side of netmap uses two additional fields in the kring: + * nr_hwcur is a copy of nr_cur on an NIOCRXSYNC + * nr_hwavail is the number of packets available. It is updated + * on INTR (inc by the number of new packets arrived) + * and on NIOCRXSYNC (decreased by nr_cur - nr_hwcur). + * + * DATA OWNERSHIP/LOCKING: + * The netmap_ring is owned by the user program and it is only + * accessed or modified in the upper half of the kernel during + * a system call. + * + * The netmap_kring is only modified by the upper half of the kernel. + */ +struct netmap_ring { + /* + * nr_buf_base_ofs is meant to be used through macros. + * It contains the offset of the buffer region from this + * descriptor. + */ + ssize_t buf_ofs; + uint32_t num_slots; /* number of slots in the ring. */ + uint32_t avail; /* number of usable slots */ + uint32_t cur; /* 'current' r/w position */ + uint32_t reserved; /* not refilled before current */ + + uint16_t nr_buf_size; + uint16_t flags; +#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */ + + struct timeval ts; /* time of last *sync() */ + + /* the slots follow. This struct has variable size */ + struct netmap_slot slot[0]; /* array of slots. */ +}; + + +/* + * Netmap representation of an interface and its queue(s). + * There is one netmap_if for each file descriptor on which we want + * to select/poll. We assume that on each interface has the same number + * of receive and transmit queues. + * select/poll operates on one or all pairs depending on the value of + * nmr_queueid passed on the ioctl. + */ +struct netmap_if { + char ni_name[IFNAMSIZ]; /* name of the interface. */ + u_int ni_version; /* API version, currently unused */ + u_int ni_rx_rings; /* number of rx rings */ + u_int ni_tx_rings; /* if zero, same as ni_rx_rings */ + /* + * The following array contains the offset of each netmap ring + * from this structure. The first ni_tx_queues+1 entries refer + * to the tx rings, the next ni_rx_queues+1 refer to the rx rings + * (the last entry in each block refers to the host stack rings). + * The area is filled up by the kernel on NIOCREG, + * and then only read by userspace code. + */ + ssize_t ring_ofs[0]; +}; + +#ifndef NIOCREGIF +/* + * ioctl names and related fields + * + * NIOCGINFO takes a struct ifreq, the interface name is the input, + * the outputs are number of queues and number of descriptor + * for each queue (useful to set number of threads etc.). + * + * NIOCREGIF takes an interface name within a struct ifreq, + * and activates netmap mode on the interface (if possible). + * + * NIOCUNREGIF unregisters the interface associated to the fd. + * + * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues, + * whose identity is set in NIOCREGIF through nr_ringid + */ + +/* + * struct nmreq overlays a struct ifreq + */ +struct nmreq { + char nr_name[IFNAMSIZ]; + uint32_t nr_version; /* API version */ +#define NETMAP_API 3 /* current version */ + uint32_t nr_offset; /* nifp offset in the shared region */ + uint32_t nr_memsize; /* size of the shared region */ + uint32_t nr_tx_slots; /* slots in tx rings */ + uint32_t nr_rx_slots; /* slots in rx rings */ + uint16_t nr_tx_rings; /* number of tx rings */ + uint16_t nr_rx_rings; /* number of rx rings */ + uint16_t nr_ringid; /* ring(s) we care about */ +#define NETMAP_HW_RING 0x4000 /* low bits indicate one hw ring */ +#define NETMAP_SW_RING 0x2000 /* process the sw ring */ +#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */ +#define NETMAP_RING_MASK 0xfff /* the ring number */ + uint16_t spare1; + uint32_t spare2[4]; +}; + +/* + * FreeBSD uses the size value embedded in the _IOWR to determine + * how much to copy in/out. So we need it to match the actual + * data structure we pass. We put some spares in the structure + * to ease compatibility with other versions + */ +#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */ +#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */ +#define NIOCUNREGIF _IO('i', 147) /* interface unregister */ +#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */ +#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */ +#endif /* !NIOCREGIF */ + +#endif /* _NET_NETMAP_H_ */ diff --git a/examples/netmap_compat/netmap/netmap_user.h b/examples/netmap_compat/netmap/netmap_user.h new file mode 100644 index 0000000000..86b1b7195b --- /dev/null +++ b/examples/netmap_compat/netmap/netmap_user.h @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the + * distribution. + * + * 3. Neither the name of the authors nor the names of their contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * $FreeBSD: head/sys/net/netmap_user.h 231198 2012-02-08 11:43:29Z luigi $ + * $Id: netmap_user.h 10879 2012-04-12 22:48:59Z luigi $ + * + * This header contains the macros used to manipulate netmap structures + * and packets in userspace. See netmap(4) for more information. + * + * The address of the struct netmap_if, say nifp, is computed from the + * value returned from ioctl(.., NIOCREG, ...) and the mmap region: + * ioctl(fd, NIOCREG, &req); + * mem = mmap(0, ... ); + * nifp = NETMAP_IF(mem, req.nr_nifp); + * (so simple, we could just do it manually) + * + * From there: + * struct netmap_ring *NETMAP_TXRING(nifp, index) + * struct netmap_ring *NETMAP_RXRING(nifp, index) + * we can access ring->nr_cur, ring->nr_avail, ring->nr_flags + * + * ring->slot[i] gives us the i-th slot (we can access + * directly plen, flags, bufindex) + * + * char *buf = NETMAP_BUF(ring, index) returns a pointer to + * the i-th buffer + * + * Since rings are circular, we have macros to compute the next index + * i = NETMAP_RING_NEXT(ring, i); + */ + +#ifndef _NET_NETMAP_USER_H_ +#define _NET_NETMAP_USER_H_ + +#define NETMAP_IF(b, o) (struct netmap_if *)((char *)(b) + (o)) + +#define NETMAP_TXRING(nifp, index) \ + ((struct netmap_ring *)((char *)(nifp) + \ + (nifp)->ring_ofs[index] ) ) + +#define NETMAP_RXRING(nifp, index) \ + ((struct netmap_ring *)((char *)(nifp) + \ + (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) ) + +#define NETMAP_BUF(ring, index) \ + ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) + +#define NETMAP_BUF_IDX(ring, buf) \ + ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ + (ring)->nr_buf_size ) + +#define NETMAP_RING_NEXT(r, i) \ + ((i)+1 == (r)->num_slots ? 0 : (i) + 1 ) + +#define NETMAP_RING_FIRST_RESERVED(r) \ + ( (r)->cur < (r)->reserved ? \ + (r)->cur + (r)->num_slots - (r)->reserved : \ + (r)->cur - (r)->reserved ) + +/* + * Return 1 if the given tx ring is empty. + */ +#define NETMAP_TX_RING_EMPTY(r) ((r)->avail >= (r)->num_slots - 1) + +#endif /* _NET_NETMAP_USER_H_ */