From: Jakub Grajciar Date: Thu, 6 Jun 2019 11:38:50 +0000 (+0200) Subject: net/memif: introduce memory interface PMD X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=09c7e63a71f9f2a31efd7e94458019ea4dc427b6;p=dpdk.git net/memif: introduce memory interface PMD Shared memory packet interface (memif) PMD allows for DPDK and any other client using memif (DPDK, VPP, libmemif) to communicate using shared memory. The created device transmits packets in a raw format. It can be used with Ethernet mode, IP mode, or Punt/Inject. At this moment, only Ethernet mode is supported in DPDK memif implementation. Memif is Linux only. Signed-off-by: Jakub Grajciar Reviewed-by: Ferruh Yigit --- diff --git a/MAINTAINERS b/MAINTAINERS index d0bf259b8a..0212fe6d02 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -839,6 +839,12 @@ F: drivers/net/softnic/ F: doc/guides/nics/features/softnic.ini F: doc/guides/nics/softnic.rst +Memif PMD +M: Jakub Grajciar +F: drivers/net/memif/ +F: doc/guides/nics/memif.rst +F: doc/guides/nics/features/memif.ini + Crypto Drivers -------------- diff --git a/config/common_base b/config/common_base index 6f19ad5d21..e406e7836a 100644 --- a/config/common_base +++ b/config/common_base @@ -444,6 +444,11 @@ CONFIG_RTE_LIBRTE_PMD_AF_PACKET=n # CONFIG_RTE_LIBRTE_PMD_AF_XDP=n +# +# Compile Memory Interface PMD driver (Linux only) +# +CONFIG_RTE_LIBRTE_PMD_MEMIF=n + # # Compile link bonding PMD library # diff --git a/config/common_linux b/config/common_linux index 75334273d4..87514fe4f4 100644 --- a/config/common_linux +++ b/config/common_linux @@ -19,6 +19,7 @@ CONFIG_RTE_LIBRTE_VHOST_POSTCOPY=n CONFIG_RTE_LIBRTE_PMD_VHOST=y CONFIG_RTE_LIBRTE_IFC_PMD=y CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y +CONFIG_RTE_LIBRTE_PMD_MEMIF=y CONFIG_RTE_LIBRTE_PMD_SOFTNIC=y CONFIG_RTE_LIBRTE_PMD_TAP=y CONFIG_RTE_LIBRTE_AVP_PMD=y diff --git a/doc/guides/nics/features/memif.ini b/doc/guides/nics/features/memif.ini new file mode 100644 index 0000000000..807d9ecdce --- /dev/null +++ b/doc/guides/nics/features/memif.ini @@ -0,0 +1,14 @@ +; +; Supported features of the 'memif' network poll mode driver. +; +; Refer to default.ini for the full list of available PMD features. +; +[Features] +Link status = Y +Basic stats = Y +Jumbo frame = Y +ARMv8 = Y +Power8 = Y +x86-32 = Y +x86-64 = Y +Usage doc = Y diff --git a/doc/guides/nics/index.rst b/doc/guides/nics/index.rst index 2221c35f21..691e7209ff 100644 --- a/doc/guides/nics/index.rst +++ b/doc/guides/nics/index.rst @@ -36,6 +36,7 @@ Network Interface Controller Drivers intel_vf kni liquidio + memif mlx4 mlx5 mvneta diff --git a/doc/guides/nics/memif.rst b/doc/guides/nics/memif.rst new file mode 100644 index 0000000000..de2d481eb1 --- /dev/null +++ b/doc/guides/nics/memif.rst @@ -0,0 +1,234 @@ +.. SPDX-License-Identifier: BSD-3-Clause + Copyright(c) 2018-2019 Cisco Systems, Inc. + +====================== +Memif Poll Mode Driver +====================== + +Shared memory packet interface (memif) PMD allows for DPDK and any other client +using memif (DPDK, VPP, libmemif) to communicate using shared memory. Memif is +Linux only. + +The created device transmits packets in a raw format. It can be used with +Ethernet mode, IP mode, or Punt/Inject. At this moment, only Ethernet mode is +supported in DPDK memif implementation. + +Memif works in two roles: master and slave. Slave connects to master over an +existing socket. It is also a producer of shared memory file and initializes +the shared memory. Each interface can be connected to one peer interface +at same time. The peer interface is identified by id parameter. Master +creates the socket and listens for any slave connection requests. The socket +may already exist on the system. Be sure to remove any such sockets, if you +are creating a master interface, or you will see an "Address already in use" +error. Function ``rte_pmd_memif_remove()``, which removes memif interface, +will also remove a listener socket, if it is not being used by any other +interface. + +The method to enable one or more interfaces is to use the +``--vdev=net_memif0`` option on the DPDK application command line. Each +``--vdev=net_memif1`` option given will create an interface named net_memif0, +net_memif1, and so on. Memif uses unix domain socket to transmit control +messages. Each memif has a unique id per socket. This id is used to identify +peer interface. If you are connecting multiple +interfaces using same socket, be sure to specify unique ids ``id=0``, ``id=1``, +etc. Note that if you assign a socket to a master interface it becomes a +listener socket. Listener socket can not be used by a slave interface on same +client. + +.. csv-table:: **Memif configuration options** + :header: "Option", "Description", "Default", "Valid value" + + "id=0", "Used to identify peer interface", "0", "uint32_t" + "role=master", "Set memif role", "slave", "master|slave" + "bsize=1024", "Size of single packet buffer", "2048", "uint16_t" + "rsize=11", "Log2 of ring size. If rsize is 10, actual ring size is 1024", "10", "1-14" + "socket=/tmp/memif.sock", "Socket filename", "/tmp/memif.sock", "string len 256" + "mac=01:23:45:ab:cd:ef", "Mac address", "01:ab:23:cd:45:ef", "" + "secret=abc123", "Secret is an optional security option, which if specified, must be matched by peer", "", "string len 24" + "zero-copy=yes", "Enable/disable zero-copy slave mode", "no", "yes|no" + +**Connection establishment** + +In order to create memif connection, two memif interfaces, each in separate +process, are needed. One interface in ``master`` role and other in +``slave`` role. It is not possible to connect two interfaces in a single +process. Each interface can be connected to one interface at same time, +identified by matching id parameter. + +Memif driver uses unix domain socket to exchange required information between +memif interfaces. Socket file path is specified at interface creation see +*Memif configuration options* table above. If socket is used by ``master`` +interface, it's marked as listener socket (in scope of current process) and +listens to connection requests from other processes. One socket can be used by +multiple interfaces. One process can have ``slave`` and ``master`` interfaces +at the same time, provided each role is assigned unique socket. + +For detailed information on memif control messages, see: net/memif/memif.h. + +Slave interface attempts to make a connection on assigned socket. Process +listening on this socket will extract the connection request and create a new +connected socket (control channel). Then it sends the 'hello' message +(``MEMIF_MSG_TYPE_HELLO``), containing configuration boundaries. Slave interface +adjusts its configuration accordingly, and sends 'init' message +(``MEMIF_MSG_TYPE_INIT``). This message among others contains interface id. Driver +uses this id to find master interface, and assigns the control channel to this +interface. If such interface is found, 'ack' message (``MEMIF_MSG_TYPE_ACK``) is +sent. Slave interface sends 'add region' message (``MEMIF_MSG_TYPE_ADD_REGION``) for +every region allocated. Master responds to each of these messages with 'ack' +message. Same behavior applies to rings. Slave sends 'add ring' message +(``MEMIF_MSG_TYPE_ADD_RING``) for every initialized ring. Master again responds to +each message with 'ack' message. To finalize the connection, slave interface +sends 'connect' message (``MEMIF_MSG_TYPE_CONNECT``). Upon receiving this message +master maps regions to its address space, initializes rings and responds with +'connected' message (``MEMIF_MSG_TYPE_CONNECTED``). Disconnect +(``MEMIF_MSG_TYPE_DISCONNECT``) can be sent by both master and slave interfaces at +any time, due to driver error or if the interface is being deleted. + +Files + +- net/memif/memif.h *- control messages definitions* +- net/memif/memif_socket.h +- net/memif/memif_socket.c + +Shared memory +~~~~~~~~~~~~~ + +**Shared memory format** + +Slave is producer and master is consumer. Memory regions, are mapped shared memory files, +created by memif slave and provided to master at connection establishment. +Regions contain rings and buffers. Rings and buffers can also be separated into multiple +regions. For no-zero-copy, rings and buffers are stored inside single memory +region to reduce the number of opened files. + +region n (no-zero-copy): + ++-----------------------+-------------------------------------------------------------------------+ +| Rings | Buffers | ++-----------+-----------+-----------------+---+---------------------------------------------------+ +| S2M rings | M2S rings | packet buffer 0 | . | pb ((1 << pmd->run.log2_ring_size)*(s2m + m2s))-1 | ++-----------+-----------+-----------------+---+---------------------------------------------------+ + +S2M OR M2S Rings: + ++--------+--------+-----------------------+ +| ring 0 | ring 1 | ring num_s2m_rings - 1| ++--------+--------+-----------------------+ + +ring 0: + ++-------------+---------------------------------------+ +| ring header | (1 << pmd->run.log2_ring_size) * desc | ++-------------+---------------------------------------+ + +Descriptors are assigned packet buffers in order of rings creation. If we have one ring +in each direction and ring size is 1024, then first 1024 buffers will belong to S2M ring and +last 1024 will belong to M2S ring. In case of zero-copy, buffers are dequeued and +enqueued as needed. + +**Descriptor format** + ++----+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +|Quad|6| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |3|3| | | | | | | | | | | | | | |1|1| | | | | | | | | | | | | | | | +| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +|Word|3| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |2|1| | | | | | | | | | | | | | |6|5| | | | | | | | | | | | | | |0| ++----+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +|0 |length |region |flags | ++----+---------------------------------------------------------------+-------------------------------+-------------------------------+ +|1 |metadata |offset | ++----+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| |6| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |3|3| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | +| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| |3| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |2|1| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |0| ++----+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +**Flags field - flags (Quad Word 0, bits 0:15)** + ++-----+--------------------+------------------------------------------------------------------------------------------------+ +|Bits |Name |Functionality | ++=====+====================+================================================================================================+ +|0 |MEMIF_DESC_FLAG_NEXT|Is chained buffer. When set, the packet is divided into multiple buffers. May not be contiguous.| ++-----+--------------------+------------------------------------------------------------------------------------------------+ + +**Region index - region (Quad Word 0, 16:31)** + +Index of memory region, the buffer is located in. + +**Data length - length (Quad Word 0, 32:63)** + +Length of transmitted/received data. + +**Data Offset - offset (Quad Word 1, 0:31)** + +Data start offset from memory region address. *.regions[desc->region].addr + desc->offset* + +**Metadata - metadata (Quad Word 1, 32:63)** + +Buffer metadata. + +Files + +- net/memif/memif.h *- descriptor and ring definitions* +- net/memif/rte_eth_memif.c *- eth_memif_rx() eth_memif_tx()* + +Example: testpmd +---------------------------- +In this example we run two instances of testpmd application and transmit packets over memif. + +First create ``master`` interface:: + + #./build/app/testpmd -l 0-1 --proc-type=primary --file-prefix=pmd1 --vdev=net_memif,role=master -- -i + +Now create ``slave`` interface (master must be already running so the slave will connect):: + + #./build/app/testpmd -l 2-3 --proc-type=primary --file-prefix=pmd2 --vdev=net_memif -- -i + +Start forwarding packets:: + + Slave: + testpmd> start + + Master: + testpmd> start tx_first + +Show status:: + + testpmd> show port stats 0 + +For more details on testpmd please refer to :doc:`../testpmd_app_ug/index`. + +Example: testpmd and VPP +------------------------ +For information on how to get and run VPP please see ``_. + +Start VPP in interactive mode (should be by default). Create memif master interface in VPP:: + + vpp# create interface memif id 0 master no-zero-copy + vpp# set interface state memif0/0 up + vpp# set interface ip address memif0/0 192.168.1.1/24 + +To see socket filename use show memif command:: + + vpp# show memif + sockets + id listener filename + 0 yes (1) /run/vpp/memif.sock + ... + +Now create memif interface by running testpmd with these command line options:: + + #./testpmd --vdev=net_memif,socket=/run/vpp/memif.sock -- -i + +Testpmd should now create memif slave interface and try to connect to master. +In testpmd set forward option to icmpecho and start forwarding:: + + testpmd> set fwd icmpecho + testpmd> start + +Send ping from VPP:: + + vpp# ping 192.168.1.2 + 64 bytes from 192.168.1.2: icmp_seq=2 ttl=254 time=36.2918 ms + 64 bytes from 192.168.1.2: icmp_seq=3 ttl=254 time=23.3927 ms + 64 bytes from 192.168.1.2: icmp_seq=4 ttl=254 time=24.2975 ms + 64 bytes from 192.168.1.2: icmp_seq=5 ttl=254 time=17.7049 ms diff --git a/doc/guides/rel_notes/release_19_08.rst b/doc/guides/rel_notes/release_19_08.rst index 5139be62b8..7b36d64e26 100644 --- a/doc/guides/rel_notes/release_19_08.rst +++ b/doc/guides/rel_notes/release_19_08.rst @@ -62,6 +62,11 @@ New Features * Added support for SSE vector mode * Updated HWRM API to version 1.10.0.74 +* **Added memif PMD.** + + Added the new Shared Memory Packet Interface (``memif``) PMD. + See the :doc:`../nics/memif` guide for more details on this new driver. + Removed Items ------------- diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 3a72cf38c6..78cb10fc65 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -35,6 +35,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice DIRS-$(CONFIG_RTE_LIBRTE_IPN3KE_PMD) += ipn3ke DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe DIRS-$(CONFIG_RTE_LIBRTE_LIO_PMD) += liquidio +DIRS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += memif DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4 DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5 DIRS-$(CONFIG_RTE_LIBRTE_MVNETA_PMD) += mvneta diff --git a/drivers/net/memif/Makefile b/drivers/net/memif/Makefile new file mode 100644 index 0000000000..c3119625cd --- /dev/null +++ b/drivers/net/memif/Makefile @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018-2019 Cisco Systems, Inc. All rights reserved. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_pmd_memif.a + +EXPORT_MAP := rte_pmd_memif_version.map + +LIBABIVER := 1 + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API +# Experimantal APIs: +# - rte_intr_callback_unregister_pending +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool +LDLIBS += -lrte_ethdev -lrte_kvargs +LDLIBS += -lrte_hash +LDLIBS += -lrte_bus_vdev + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += rte_eth_memif.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += memif_socket.c + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/drivers/net/memif/memif.h b/drivers/net/memif/memif.h new file mode 100644 index 0000000000..3948b1f502 --- /dev/null +++ b/drivers/net/memif/memif.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018-2019 Cisco Systems, Inc. All rights reserved. + */ + +#ifndef _MEMIF_H_ +#define _MEMIF_H_ + +#define MEMIF_COOKIE 0x3E31F20 +#define MEMIF_VERSION_MAJOR 2 +#define MEMIF_VERSION_MINOR 0 +#define MEMIF_VERSION ((MEMIF_VERSION_MAJOR << 8) | MEMIF_VERSION_MINOR) +#define MEMIF_NAME_SZ 32 + +/* + * S2M: direction slave -> master + * M2S: direction master -> slave + */ + +/* + * Type definitions + */ + +typedef enum memif_msg_type { + MEMIF_MSG_TYPE_NONE, + MEMIF_MSG_TYPE_ACK, + MEMIF_MSG_TYPE_HELLO, + MEMIF_MSG_TYPE_INIT, + MEMIF_MSG_TYPE_ADD_REGION, + MEMIF_MSG_TYPE_ADD_RING, + MEMIF_MSG_TYPE_CONNECT, + MEMIF_MSG_TYPE_CONNECTED, + MEMIF_MSG_TYPE_DISCONNECT, +} memif_msg_type_t; + +typedef enum { + MEMIF_RING_S2M, /**< buffer ring in direction slave -> master */ + MEMIF_RING_M2S, /**< buffer ring in direction master -> slave */ +} memif_ring_type_t; + +typedef enum { + MEMIF_INTERFACE_MODE_ETHERNET, + MEMIF_INTERFACE_MODE_IP, + MEMIF_INTERFACE_MODE_PUNT_INJECT, +} memif_interface_mode_t; + +typedef uint16_t memif_region_index_t; +typedef uint32_t memif_region_offset_t; +typedef uint64_t memif_region_size_t; +typedef uint16_t memif_ring_index_t; +typedef uint32_t memif_interface_id_t; +typedef uint16_t memif_version_t; +typedef uint8_t memif_log2_ring_size_t; + +/* + * Socket messages + */ + + /** + * M2S + * Contains master interfaces configuration. + */ +typedef struct __rte_packed { + uint8_t name[MEMIF_NAME_SZ]; /**< Client app name. In this case DPDK version */ + memif_version_t min_version; /**< lowest supported memif version */ + memif_version_t max_version; /**< highest supported memif version */ + memif_region_index_t max_region; /**< maximum num of regions */ + memif_ring_index_t max_m2s_ring; /**< maximum num of M2S ring */ + memif_ring_index_t max_s2m_ring; /**< maximum num of S2M rings */ + memif_log2_ring_size_t max_log2_ring_size; /**< maximum ring size (as log2) */ +} memif_msg_hello_t; + +/** + * S2M + * Contains information required to identify interface + * to which the slave wants to connect. + */ +typedef struct __rte_packed { + memif_version_t version; /**< memif version */ + memif_interface_id_t id; /**< interface id */ + memif_interface_mode_t mode:8; /**< interface mode */ + uint8_t secret[24]; /**< optional security parameter */ + uint8_t name[MEMIF_NAME_SZ]; /**< Client app name. In this case DPDK version */ +} memif_msg_init_t; + +/** + * S2M + * Request master to add new shared memory region to master interface. + * Shared files file descriptor is passed in cmsghdr. + */ +typedef struct __rte_packed { + memif_region_index_t index; /**< shm regions index */ + memif_region_size_t size; /**< shm region size */ +} memif_msg_add_region_t; + +/** + * S2M + * Request master to add new ring to master interface. + */ +typedef struct __rte_packed { + uint16_t flags; /**< flags */ +#define MEMIF_MSG_ADD_RING_FLAG_S2M 1 /**< ring is in S2M direction */ + memif_ring_index_t index; /**< ring index */ + memif_region_index_t region; /**< region index on which this ring is located */ + memif_region_offset_t offset; /**< buffer start offset */ + memif_log2_ring_size_t log2_ring_size; /**< ring size (log2) */ + uint16_t private_hdr_size; /**< used for private metadata */ +} memif_msg_add_ring_t; + +/** + * S2M + * Finalize connection establishment. + */ +typedef struct __rte_packed { + uint8_t if_name[MEMIF_NAME_SZ]; /**< slave interface name */ +} memif_msg_connect_t; + +/** + * M2S + * Finalize connection establishment. + */ +typedef struct __rte_packed { + uint8_t if_name[MEMIF_NAME_SZ]; /**< master interface name */ +} memif_msg_connected_t; + +/** + * S2M & M2S + * Disconnect interfaces. + */ +typedef struct __rte_packed { + uint32_t code; /**< error code */ + uint8_t string[96]; /**< disconnect reason */ +} memif_msg_disconnect_t; + +typedef struct __rte_packed __rte_aligned(128) +{ + memif_msg_type_t type:16; + union { + memif_msg_hello_t hello; + memif_msg_init_t init; + memif_msg_add_region_t add_region; + memif_msg_add_ring_t add_ring; + memif_msg_connect_t connect; + memif_msg_connected_t connected; + memif_msg_disconnect_t disconnect; + }; +} memif_msg_t; + +/* + * Ring and Descriptor Layout + */ + +/** + * Buffer descriptor. + */ +typedef struct __rte_packed { + uint16_t flags; /**< flags */ +#define MEMIF_DESC_FLAG_NEXT 1 /**< is chained buffer */ + memif_region_index_t region; /**< region index on which the buffer is located */ + uint32_t length; /**< buffer length */ + memif_region_offset_t offset; /**< buffer offset */ + uint32_t metadata; +} memif_desc_t; + +#define MEMIF_CACHELINE_ALIGN_MARK(mark) \ + uint8_t mark[0] __rte_aligned(RTE_CACHE_LINE_SIZE) + +typedef struct { + MEMIF_CACHELINE_ALIGN_MARK(cacheline0); + uint32_t cookie; /**< MEMIF_COOKIE */ + uint16_t flags; /**< flags */ +#define MEMIF_RING_FLAG_MASK_INT 1 /**< disable interrupt mode */ + volatile uint16_t head; /**< pointer to ring buffer head */ + MEMIF_CACHELINE_ALIGN_MARK(cacheline1); + volatile uint16_t tail; /**< pointer to ring buffer tail */ + MEMIF_CACHELINE_ALIGN_MARK(cacheline2); + memif_desc_t desc[0]; /**< buffer descriptors */ +} memif_ring_t; + +#endif /* _MEMIF_H_ */ diff --git a/drivers/net/memif/memif_socket.c b/drivers/net/memif/memif_socket.c new file mode 100644 index 0000000000..1e046b6b1a --- /dev/null +++ b/drivers/net/memif/memif_socket.c @@ -0,0 +1,1124 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018-2019 Cisco Systems, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_eth_memif.h" +#include "memif_socket.h" + +static void memif_intr_handler(void *arg); + +static ssize_t +memif_msg_send(int fd, memif_msg_t *msg, int afd) +{ + struct msghdr mh = { 0 }; + struct iovec iov[1]; + struct cmsghdr *cmsg; + char ctl[CMSG_SPACE(sizeof(int))]; + + iov[0].iov_base = msg; + iov[0].iov_len = sizeof(memif_msg_t); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + + if (afd > 0) { + memset(&ctl, 0, sizeof(ctl)); + mh.msg_control = ctl; + mh.msg_controllen = sizeof(ctl); + cmsg = CMSG_FIRSTHDR(&mh); + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + rte_memcpy(CMSG_DATA(cmsg), &afd, sizeof(int)); + } + + return sendmsg(fd, &mh, 0); +} + +static int +memif_msg_send_from_queue(struct memif_control_channel *cc) +{ + ssize_t size; + int ret = 0; + struct memif_msg_queue_elt *e; + + e = TAILQ_FIRST(&cc->msg_queue); + if (e == NULL) + return 0; + + size = memif_msg_send(cc->intr_handle.fd, &e->msg, e->fd); + if (size != sizeof(memif_msg_t)) { + MIF_LOG(ERR, "sendmsg fail: %s.", strerror(errno)); + ret = -1; + } else { + MIF_LOG(DEBUG, "Sent msg type %u.", e->msg.type); + } + TAILQ_REMOVE(&cc->msg_queue, e, next); + rte_free(e); + + return ret; +} + +static struct memif_msg_queue_elt * +memif_msg_enq(struct memif_control_channel *cc) +{ + struct memif_msg_queue_elt *e; + + e = rte_zmalloc("memif_msg", sizeof(struct memif_msg_queue_elt), 0); + if (e == NULL) { + MIF_LOG(ERR, "Failed to allocate control message."); + return NULL; + } + + e->fd = -1; + TAILQ_INSERT_TAIL(&cc->msg_queue, e, next); + + return e; +} + +void +memif_msg_enq_disconnect(struct memif_control_channel *cc, const char *reason, + int err_code) +{ + struct memif_msg_queue_elt *e; + struct pmd_internals *pmd; + memif_msg_disconnect_t *d; + + if (cc == NULL) { + MIF_LOG(DEBUG, "Missing control channel."); + return; + } + + e = memif_msg_enq(cc); + if (e == NULL) { + MIF_LOG(WARNING, "Failed to enqueue disconnect message."); + return; + } + + d = &e->msg.disconnect; + + e->msg.type = MEMIF_MSG_TYPE_DISCONNECT; + d->code = err_code; + + if (reason != NULL) { + strlcpy((char *)d->string, reason, sizeof(d->string)); + if (cc->dev != NULL) { + pmd = cc->dev->data->dev_private; + strlcpy(pmd->local_disc_string, reason, + sizeof(pmd->local_disc_string)); + } + } +} + +static int +memif_msg_enq_hello(struct memif_control_channel *cc) +{ + struct memif_msg_queue_elt *e = memif_msg_enq(cc); + memif_msg_hello_t *h; + + if (e == NULL) + return -1; + + h = &e->msg.hello; + + e->msg.type = MEMIF_MSG_TYPE_HELLO; + h->min_version = MEMIF_VERSION; + h->max_version = MEMIF_VERSION; + h->max_s2m_ring = ETH_MEMIF_MAX_NUM_Q_PAIRS; + h->max_m2s_ring = ETH_MEMIF_MAX_NUM_Q_PAIRS; + h->max_region = ETH_MEMIF_MAX_REGION_NUM - 1; + h->max_log2_ring_size = ETH_MEMIF_MAX_LOG2_RING_SIZE; + + strlcpy((char *)h->name, rte_version(), sizeof(h->name)); + + return 0; +} + +static int +memif_msg_receive_hello(struct rte_eth_dev *dev, memif_msg_t *msg) +{ + struct pmd_internals *pmd = dev->data->dev_private; + memif_msg_hello_t *h = &msg->hello; + + if (h->min_version > MEMIF_VERSION || h->max_version < MEMIF_VERSION) { + memif_msg_enq_disconnect(pmd->cc, "Incompatible memif version", 0); + return -1; + } + + /* Set parameters for active connection */ + pmd->run.num_s2m_rings = RTE_MIN(h->max_s2m_ring + 1, + pmd->cfg.num_s2m_rings); + pmd->run.num_m2s_rings = RTE_MIN(h->max_m2s_ring + 1, + pmd->cfg.num_m2s_rings); + pmd->run.log2_ring_size = RTE_MIN(h->max_log2_ring_size, + pmd->cfg.log2_ring_size); + pmd->run.pkt_buffer_size = pmd->cfg.pkt_buffer_size; + + strlcpy(pmd->remote_name, (char *)h->name, sizeof(pmd->remote_name)); + + MIF_LOG(DEBUG, "%s: Connecting to %s.", + rte_vdev_device_name(pmd->vdev), pmd->remote_name); + + return 0; +} + +static int +memif_msg_receive_init(struct memif_control_channel *cc, memif_msg_t *msg) +{ + memif_msg_init_t *i = &msg->init; + struct memif_socket_dev_list_elt *elt; + struct pmd_internals *pmd; + struct rte_eth_dev *dev; + + if (i->version != MEMIF_VERSION) { + memif_msg_enq_disconnect(cc, "Incompatible memif version", 0); + return -1; + } + + if (cc->socket == NULL) { + memif_msg_enq_disconnect(cc, "Device error", 0); + return -1; + } + + /* Find device with requested ID */ + TAILQ_FOREACH(elt, &cc->socket->dev_queue, next) { + dev = elt->dev; + pmd = dev->data->dev_private; + if (((pmd->flags & ETH_MEMIF_FLAG_DISABLED) == 0) && + pmd->id == i->id) { + /* assign control channel to device */ + cc->dev = dev; + pmd->cc = cc; + + if (i->mode != MEMIF_INTERFACE_MODE_ETHERNET) { + memif_msg_enq_disconnect(pmd->cc, + "Only ethernet mode supported", + 0); + return -1; + } + + if (pmd->flags & (ETH_MEMIF_FLAG_CONNECTING | + ETH_MEMIF_FLAG_CONNECTED)) { + memif_msg_enq_disconnect(pmd->cc, + "Already connected", 0); + return -1; + } + strlcpy(pmd->remote_name, (char *)i->name, + sizeof(pmd->remote_name)); + + if (*pmd->secret != '\0') { + if (*i->secret == '\0') { + memif_msg_enq_disconnect(pmd->cc, + "Secret required", 0); + return -1; + } + if (strncmp(pmd->secret, (char *)i->secret, + ETH_MEMIF_SECRET_SIZE) != 0) { + memif_msg_enq_disconnect(pmd->cc, + "Incorrect secret", 0); + return -1; + } + } + + pmd->flags |= ETH_MEMIF_FLAG_CONNECTING; + return 0; + } + } + + /* ID not found on this socket */ + MIF_LOG(DEBUG, "ID %u not found.", i->id); + memif_msg_enq_disconnect(cc, "ID not found", 0); + return -1; +} + +static int +memif_msg_receive_add_region(struct rte_eth_dev *dev, memif_msg_t *msg, + int fd) +{ + struct pmd_internals *pmd = dev->data->dev_private; + memif_msg_add_region_t *ar = &msg->add_region; + struct memif_region *r; + + if (fd < 0) { + memif_msg_enq_disconnect(pmd->cc, "Missing region fd", 0); + return -1; + } + + if (ar->index >= ETH_MEMIF_MAX_REGION_NUM || ar->index != pmd->regions_num || + pmd->regions[ar->index] != NULL) { + memif_msg_enq_disconnect(pmd->cc, "Invalid region index", 0); + return -1; + } + + r = rte_zmalloc("region", sizeof(struct memif_region), 0); + if (r == NULL) { + MIF_LOG(ERR, "%s: Failed to alloc memif region.", + rte_vdev_device_name(pmd->vdev)); + return -ENOMEM; + } + + r->fd = fd; + r->region_size = ar->size; + r->addr = NULL; + + pmd->regions[ar->index] = r; + pmd->regions_num++; + + return 0; +} + +static int +memif_msg_receive_add_ring(struct rte_eth_dev *dev, memif_msg_t *msg, int fd) +{ + struct pmd_internals *pmd = dev->data->dev_private; + memif_msg_add_ring_t *ar = &msg->add_ring; + struct memif_queue *mq; + + if (fd < 0) { + memif_msg_enq_disconnect(pmd->cc, "Missing interrupt fd", 0); + return -1; + } + + /* check if we have enough queues */ + if (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) { + if (ar->index >= pmd->cfg.num_s2m_rings) { + memif_msg_enq_disconnect(pmd->cc, "Invalid ring index", 0); + return -1; + } + pmd->run.num_s2m_rings++; + } else { + if (ar->index >= pmd->cfg.num_m2s_rings) { + memif_msg_enq_disconnect(pmd->cc, "Invalid ring index", 0); + return -1; + } + pmd->run.num_m2s_rings++; + } + + mq = (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) ? + dev->data->rx_queues[ar->index] : dev->data->tx_queues[ar->index]; + + mq->intr_handle.fd = fd; + mq->log2_ring_size = ar->log2_ring_size; + mq->region = ar->region; + mq->ring_offset = ar->offset; + + return 0; +} + +static int +memif_msg_receive_connect(struct rte_eth_dev *dev, memif_msg_t *msg) +{ + struct pmd_internals *pmd = dev->data->dev_private; + memif_msg_connect_t *c = &msg->connect; + int ret; + + ret = memif_connect(dev); + if (ret < 0) + return ret; + + strlcpy(pmd->remote_if_name, (char *)c->if_name, + sizeof(pmd->remote_if_name)); + MIF_LOG(INFO, "%s: Remote interface %s connected.", + rte_vdev_device_name(pmd->vdev), pmd->remote_if_name); + + return 0; +} + +static int +memif_msg_receive_connected(struct rte_eth_dev *dev, memif_msg_t *msg) +{ + struct pmd_internals *pmd = dev->data->dev_private; + memif_msg_connected_t *c = &msg->connected; + int ret; + + ret = memif_connect(dev); + if (ret < 0) + return ret; + + strlcpy(pmd->remote_if_name, (char *)c->if_name, + sizeof(pmd->remote_if_name)); + MIF_LOG(INFO, "%s: Remote interface %s connected.", + rte_vdev_device_name(pmd->vdev), pmd->remote_if_name); + + return 0; +} + +static int +memif_msg_receive_disconnect(struct rte_eth_dev *dev, memif_msg_t *msg) +{ + struct pmd_internals *pmd = dev->data->dev_private; + memif_msg_disconnect_t *d = &msg->disconnect; + + memset(pmd->remote_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE); + strlcpy(pmd->remote_disc_string, (char *)d->string, + sizeof(pmd->remote_disc_string)); + + MIF_LOG(INFO, "%s: Disconnect received: %s", + rte_vdev_device_name(pmd->vdev), pmd->remote_disc_string); + + memset(pmd->local_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE); + memif_disconnect(rte_eth_dev_allocated + (rte_vdev_device_name(pmd->vdev))); + return 0; +} + +static int +memif_msg_enq_ack(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc); + if (e == NULL) + return -1; + + e->msg.type = MEMIF_MSG_TYPE_ACK; + + return 0; +} + +static int +memif_msg_enq_init(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc); + memif_msg_init_t *i = &e->msg.init; + + if (e == NULL) + return -1; + + i = &e->msg.init; + e->msg.type = MEMIF_MSG_TYPE_INIT; + i->version = MEMIF_VERSION; + i->id = pmd->id; + i->mode = MEMIF_INTERFACE_MODE_ETHERNET; + + strlcpy((char *)i->name, rte_version(), sizeof(i->name)); + + if (*pmd->secret != '\0') + strlcpy((char *)i->secret, pmd->secret, sizeof(i->secret)); + + return 0; +} + +static int +memif_msg_enq_add_region(struct rte_eth_dev *dev, uint8_t idx) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc); + memif_msg_add_region_t *ar; + struct memif_region *mr = pmd->regions[idx]; + + if (e == NULL) + return -1; + + ar = &e->msg.add_region; + e->msg.type = MEMIF_MSG_TYPE_ADD_REGION; + e->fd = mr->fd; + ar->index = idx; + ar->size = mr->region_size; + + return 0; +} + +static int +memif_msg_enq_add_ring(struct rte_eth_dev *dev, uint8_t idx, + memif_ring_type_t type) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc); + struct memif_queue *mq; + memif_msg_add_ring_t *ar; + + if (e == NULL) + return -1; + + ar = &e->msg.add_ring; + mq = (type == MEMIF_RING_S2M) ? dev->data->tx_queues[idx] : + dev->data->rx_queues[idx]; + + e->msg.type = MEMIF_MSG_TYPE_ADD_RING; + e->fd = mq->intr_handle.fd; + ar->index = idx; + ar->offset = mq->ring_offset; + ar->region = mq->region; + ar->log2_ring_size = mq->log2_ring_size; + ar->flags = (type == MEMIF_RING_S2M) ? MEMIF_MSG_ADD_RING_FLAG_S2M : 0; + ar->private_hdr_size = 0; + + return 0; +} + +static int +memif_msg_enq_connect(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc); + const char *name = rte_vdev_device_name(pmd->vdev); + memif_msg_connect_t *c; + + if (e == NULL) + return -1; + + c = &e->msg.connect; + e->msg.type = MEMIF_MSG_TYPE_CONNECT; + strlcpy((char *)c->if_name, name, sizeof(c->if_name)); + + return 0; +} + +static int +memif_msg_enq_connected(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc); + const char *name = rte_vdev_device_name(pmd->vdev); + memif_msg_connected_t *c; + + if (e == NULL) + return -1; + + c = &e->msg.connected; + e->msg.type = MEMIF_MSG_TYPE_CONNECTED; + strlcpy((char *)c->if_name, name, sizeof(c->if_name)); + + return 0; +} + +static void +memif_intr_unregister_handler(struct rte_intr_handle *intr_handle, void *arg) +{ + struct memif_msg_queue_elt *elt; + struct memif_control_channel *cc = arg; + + /* close control channel fd */ + close(intr_handle->fd); + /* clear message queue */ + while ((elt = TAILQ_FIRST(&cc->msg_queue)) != NULL) { + TAILQ_REMOVE(&cc->msg_queue, elt, next); + rte_free(elt); + } + /* free control channel */ + rte_free(cc); +} + +void +memif_disconnect(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_msg_queue_elt *elt, *next; + struct memif_queue *mq; + struct rte_intr_handle *ih; + int i; + int ret; + + if (pmd->cc != NULL) { + /* Clear control message queue (except disconnect message if any). */ + for (elt = TAILQ_FIRST(&pmd->cc->msg_queue); elt != NULL; elt = next) { + next = TAILQ_NEXT(elt, next); + if (elt->msg.type != MEMIF_MSG_TYPE_DISCONNECT) { + TAILQ_REMOVE(&pmd->cc->msg_queue, elt, next); + rte_free(elt); + } + } + /* send disconnect message (if there is any in queue) */ + memif_msg_send_from_queue(pmd->cc); + + /* at this point, there should be no more messages in queue */ + if (TAILQ_FIRST(&pmd->cc->msg_queue) != NULL) { + MIF_LOG(WARNING, + "%s: Unexpected message(s) in message queue.", + rte_vdev_device_name(pmd->vdev)); + } + + ih = &pmd->cc->intr_handle; + if (ih->fd > 0) { + ret = rte_intr_callback_unregister(ih, + memif_intr_handler, + pmd->cc); + /* + * If callback is active (disconnecting based on + * received control message). + */ + if (ret == -EAGAIN) { + ret = rte_intr_callback_unregister_pending(ih, + memif_intr_handler, + pmd->cc, + memif_intr_unregister_handler); + } else if (ret > 0) { + close(ih->fd); + rte_free(pmd->cc); + } + pmd->cc = NULL; + if (ret <= 0) + MIF_LOG(WARNING, "%s: Failed to unregister " + "control channel callback.", + rte_vdev_device_name(pmd->vdev)); + } + } + + /* unconfig interrupts */ + for (i = 0; i < pmd->cfg.num_s2m_rings; i++) { + if (pmd->role == MEMIF_ROLE_SLAVE) { + if (dev->data->tx_queues != NULL) + mq = dev->data->tx_queues[i]; + else + continue; + } else { + if (dev->data->rx_queues != NULL) + mq = dev->data->rx_queues[i]; + else + continue; + } + if (mq->intr_handle.fd > 0) { + close(mq->intr_handle.fd); + mq->intr_handle.fd = -1; + } + mq->ring = NULL; + } + for (i = 0; i < pmd->cfg.num_m2s_rings; i++) { + if (pmd->role == MEMIF_ROLE_MASTER) { + if (dev->data->tx_queues != NULL) + mq = dev->data->tx_queues[i]; + else + continue; + } else { + if (dev->data->rx_queues != NULL) + mq = dev->data->rx_queues[i]; + else + continue; + } + if (mq->intr_handle.fd > 0) { + close(mq->intr_handle.fd); + mq->intr_handle.fd = -1; + } + mq->ring = NULL; + } + + memif_free_regions(pmd); + + /* reset connection configuration */ + memset(&pmd->run, 0, sizeof(pmd->run)); + + dev->data->dev_link.link_status = ETH_LINK_DOWN; + pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING; + pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTED; + MIF_LOG(DEBUG, "%s: Disconnected.", rte_vdev_device_name(pmd->vdev)); +} + +static int +memif_msg_receive(struct memif_control_channel *cc) +{ + char ctl[CMSG_SPACE(sizeof(int)) + + CMSG_SPACE(sizeof(struct ucred))] = { 0 }; + struct msghdr mh = { 0 }; + struct iovec iov[1]; + memif_msg_t msg = { 0 }; + ssize_t size; + int ret = 0; + struct ucred *cr __rte_unused; + cr = 0; + struct cmsghdr *cmsg; + int afd = -1; + int i; + struct pmd_internals *pmd; + + iov[0].iov_base = (void *)&msg; + iov[0].iov_len = sizeof(memif_msg_t); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + mh.msg_control = ctl; + mh.msg_controllen = sizeof(ctl); + + size = recvmsg(cc->intr_handle.fd, &mh, 0); + if (size != sizeof(memif_msg_t)) { + MIF_LOG(DEBUG, "Invalid message size."); + memif_msg_enq_disconnect(cc, "Invalid message size", 0); + return -1; + } + MIF_LOG(DEBUG, "Received msg type: %u.", msg.type); + + cmsg = CMSG_FIRSTHDR(&mh); + while (cmsg) { + if (cmsg->cmsg_level == SOL_SOCKET) { + if (cmsg->cmsg_type == SCM_CREDENTIALS) + cr = (struct ucred *)CMSG_DATA(cmsg); + else if (cmsg->cmsg_type == SCM_RIGHTS) + memcpy(&afd, CMSG_DATA(cmsg), sizeof(int)); + } + cmsg = CMSG_NXTHDR(&mh, cmsg); + } + + if (cc->dev == NULL && msg.type != MEMIF_MSG_TYPE_INIT) { + MIF_LOG(DEBUG, "Unexpected message."); + memif_msg_enq_disconnect(cc, "Unexpected message", 0); + return -1; + } + + /* get device from hash data */ + switch (msg.type) { + case MEMIF_MSG_TYPE_ACK: + break; + case MEMIF_MSG_TYPE_HELLO: + ret = memif_msg_receive_hello(cc->dev, &msg); + if (ret < 0) + goto exit; + ret = memif_init_regions_and_queues(cc->dev); + if (ret < 0) + goto exit; + ret = memif_msg_enq_init(cc->dev); + if (ret < 0) + goto exit; + pmd = cc->dev->data->dev_private; + for (i = 0; i < pmd->regions_num; i++) { + ret = memif_msg_enq_add_region(cc->dev, i); + if (ret < 0) + goto exit; + } + for (i = 0; i < pmd->run.num_s2m_rings; i++) { + ret = memif_msg_enq_add_ring(cc->dev, i, + MEMIF_RING_S2M); + if (ret < 0) + goto exit; + } + for (i = 0; i < pmd->run.num_m2s_rings; i++) { + ret = memif_msg_enq_add_ring(cc->dev, i, + MEMIF_RING_M2S); + if (ret < 0) + goto exit; + } + ret = memif_msg_enq_connect(cc->dev); + if (ret < 0) + goto exit; + break; + case MEMIF_MSG_TYPE_INIT: + /* + * This cc does not have an interface asociated with it. + * If suitable interface is found it will be assigned here. + */ + ret = memif_msg_receive_init(cc, &msg); + if (ret < 0) + goto exit; + ret = memif_msg_enq_ack(cc->dev); + if (ret < 0) + goto exit; + break; + case MEMIF_MSG_TYPE_ADD_REGION: + ret = memif_msg_receive_add_region(cc->dev, &msg, afd); + if (ret < 0) + goto exit; + ret = memif_msg_enq_ack(cc->dev); + if (ret < 0) + goto exit; + break; + case MEMIF_MSG_TYPE_ADD_RING: + ret = memif_msg_receive_add_ring(cc->dev, &msg, afd); + if (ret < 0) + goto exit; + ret = memif_msg_enq_ack(cc->dev); + if (ret < 0) + goto exit; + break; + case MEMIF_MSG_TYPE_CONNECT: + ret = memif_msg_receive_connect(cc->dev, &msg); + if (ret < 0) + goto exit; + ret = memif_msg_enq_connected(cc->dev); + if (ret < 0) + goto exit; + break; + case MEMIF_MSG_TYPE_CONNECTED: + ret = memif_msg_receive_connected(cc->dev, &msg); + break; + case MEMIF_MSG_TYPE_DISCONNECT: + ret = memif_msg_receive_disconnect(cc->dev, &msg); + if (ret < 0) + goto exit; + break; + default: + memif_msg_enq_disconnect(cc, "Unknown message type", 0); + ret = -1; + goto exit; + } + + exit: + return ret; +} + +static void +memif_intr_handler(void *arg) +{ + struct memif_control_channel *cc = arg; + int ret; + + ret = memif_msg_receive(cc); + /* if driver failed to assign device */ + if (cc->dev == NULL) { + ret = rte_intr_callback_unregister_pending(&cc->intr_handle, + memif_intr_handler, + cc, + memif_intr_unregister_handler); + if (ret < 0) + MIF_LOG(WARNING, + "Failed to unregister control channel callback."); + return; + } + /* if memif_msg_receive failed */ + if (ret < 0) + goto disconnect; + + ret = memif_msg_send_from_queue(cc); + if (ret < 0) + goto disconnect; + + return; + + disconnect: + if (cc->dev == NULL) { + MIF_LOG(WARNING, "eth dev not allocated"); + return; + } + memif_disconnect(cc->dev); +} + +static void +memif_listener_handler(void *arg) +{ + struct memif_socket *socket = arg; + int sockfd; + int addr_len; + struct sockaddr_un client; + struct memif_control_channel *cc; + int ret; + + addr_len = sizeof(client); + sockfd = accept(socket->intr_handle.fd, (struct sockaddr *)&client, + (socklen_t *)&addr_len); + if (sockfd < 0) { + MIF_LOG(ERR, + "Failed to accept connection request on socket fd %d", + socket->intr_handle.fd); + return; + } + + MIF_LOG(DEBUG, "%s: Connection request accepted.", socket->filename); + + cc = rte_zmalloc("memif-cc", sizeof(struct memif_control_channel), 0); + if (cc == NULL) { + MIF_LOG(ERR, "Failed to allocate control channel."); + goto error; + } + + cc->intr_handle.fd = sockfd; + cc->intr_handle.type = RTE_INTR_HANDLE_EXT; + cc->socket = socket; + cc->dev = NULL; + TAILQ_INIT(&cc->msg_queue); + + ret = rte_intr_callback_register(&cc->intr_handle, memif_intr_handler, cc); + if (ret < 0) { + MIF_LOG(ERR, "Failed to register control channel callback."); + goto error; + } + + ret = memif_msg_enq_hello(cc); + if (ret < 0) { + MIF_LOG(ERR, "Failed to enqueue hello message."); + goto error; + } + ret = memif_msg_send_from_queue(cc); + if (ret < 0) + goto error; + + return; + + error: + if (sockfd > 0) { + close(sockfd); + sockfd = -1; + } + if (cc != NULL) + rte_free(cc); +} + +static struct memif_socket * +memif_socket_create(struct pmd_internals *pmd, char *key, uint8_t listener) +{ + struct memif_socket *sock; + struct sockaddr_un un; + int sockfd; + int ret; + int on = 1; + + sock = rte_zmalloc("memif-socket", sizeof(struct memif_socket), 0); + if (sock == NULL) { + MIF_LOG(ERR, "Failed to allocate memory for memif socket"); + return NULL; + } + + sock->listener = listener; + rte_memcpy(sock->filename, key, 256); + TAILQ_INIT(&sock->dev_queue); + + if (listener != 0) { + sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0); + if (sockfd < 0) + goto error; + + un.sun_family = AF_UNIX; + memcpy(un.sun_path, sock->filename, + sizeof(un.sun_path) - 1); + + ret = setsockopt(sockfd, SOL_SOCKET, SO_PASSCRED, &on, + sizeof(on)); + if (ret < 0) + goto error; + ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un)); + if (ret < 0) + goto error; + ret = listen(sockfd, 1); + if (ret < 0) + goto error; + + MIF_LOG(DEBUG, "%s: Memif listener socket %s created.", + rte_vdev_device_name(pmd->vdev), sock->filename); + + sock->intr_handle.fd = sockfd; + sock->intr_handle.type = RTE_INTR_HANDLE_EXT; + ret = rte_intr_callback_register(&sock->intr_handle, + memif_listener_handler, sock); + if (ret < 0) { + MIF_LOG(ERR, "%s: Failed to register interrupt " + "callback for listener socket", + rte_vdev_device_name(pmd->vdev)); + return NULL; + } + } + + return sock; + + error: + MIF_LOG(ERR, "%s: Failed to setup socket %s: %s", + rte_vdev_device_name(pmd->vdev), key, strerror(errno)); + if (sock != NULL) + rte_free(sock); + return NULL; +} + +static struct rte_hash * +memif_create_socket_hash(void) +{ + struct rte_hash_parameters params = { 0 }; + params.name = MEMIF_SOCKET_HASH_NAME; + params.entries = 256; + params.key_len = 256; + params.hash_func = rte_jhash; + params.hash_func_init_val = 0; + return rte_hash_create(¶ms); +} + +int +memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_socket *socket = NULL; + struct memif_socket_dev_list_elt *elt; + struct pmd_internals *tmp_pmd; + struct rte_hash *hash; + int ret; + char key[256]; + + hash = rte_hash_find_existing(MEMIF_SOCKET_HASH_NAME); + if (hash == NULL) { + hash = memif_create_socket_hash(); + if (hash == NULL) { + MIF_LOG(ERR, "Failed to create memif socket hash."); + return -1; + } + } + + memset(key, 0, 256); + rte_memcpy(key, socket_filename, strlen(socket_filename)); + ret = rte_hash_lookup_data(hash, key, (void **)&socket); + if (ret < 0) { + socket = memif_socket_create(pmd, key, + (pmd->role == + MEMIF_ROLE_SLAVE) ? 0 : 1); + if (socket == NULL) + return -1; + ret = rte_hash_add_key_data(hash, key, socket); + if (ret < 0) { + MIF_LOG(ERR, "Failed to add socket to socket hash."); + return ret; + } + } + pmd->socket_filename = socket->filename; + + if (socket->listener != 0 && pmd->role == MEMIF_ROLE_SLAVE) { + MIF_LOG(ERR, "Socket is a listener."); + return -1; + } else if ((socket->listener == 0) && (pmd->role == MEMIF_ROLE_MASTER)) { + MIF_LOG(ERR, "Socket is not a listener."); + return -1; + } + + TAILQ_FOREACH(elt, &socket->dev_queue, next) { + tmp_pmd = elt->dev->data->dev_private; + if (tmp_pmd->id == pmd->id) { + MIF_LOG(ERR, "Memif device with id %d already " + "exists on socket %s", + pmd->id, socket->filename); + return -1; + } + } + + elt = rte_malloc("pmd-queue", sizeof(struct memif_socket_dev_list_elt), 0); + if (elt == NULL) { + MIF_LOG(ERR, "%s: Failed to add device to socket device list.", + rte_vdev_device_name(pmd->vdev)); + return -1; + } + elt->dev = dev; + TAILQ_INSERT_TAIL(&socket->dev_queue, elt, next); + + return 0; +} + +void +memif_socket_remove_device(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_socket *socket = NULL; + struct memif_socket_dev_list_elt *elt, *next; + struct rte_hash *hash; + + hash = rte_hash_find_existing(MEMIF_SOCKET_HASH_NAME); + if (hash == NULL) + return; + + if (pmd->socket_filename == NULL) + return; + + if (rte_hash_lookup_data(hash, pmd->socket_filename, (void **)&socket) < 0) + return; + + for (elt = TAILQ_FIRST(&socket->dev_queue); elt != NULL; elt = next) { + next = TAILQ_NEXT(elt, next); + if (elt->dev == dev) { + TAILQ_REMOVE(&socket->dev_queue, elt, next); + rte_free(elt); + pmd->socket_filename = NULL; + } + } + + /* remove socket, if this was the last device using it */ + if (TAILQ_EMPTY(&socket->dev_queue)) { + rte_hash_del_key(hash, socket->filename); + if (socket->listener) { + /* remove listener socket file, + * so we can create new one later. + */ + remove(socket->filename); + } + rte_free(socket); + } +} + +int +memif_connect_master(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + + memset(pmd->local_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE); + memset(pmd->remote_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE); + pmd->flags &= ~ETH_MEMIF_FLAG_DISABLED; + return 0; +} + +int +memif_connect_slave(struct rte_eth_dev *dev) +{ + int sockfd; + int ret; + struct sockaddr_un sun; + struct pmd_internals *pmd = dev->data->dev_private; + + memset(pmd->local_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE); + memset(pmd->remote_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE); + pmd->flags &= ~ETH_MEMIF_FLAG_DISABLED; + + sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0); + if (sockfd < 0) { + MIF_LOG(ERR, "%s: Failed to open socket.", + rte_vdev_device_name(pmd->vdev)); + return -1; + } + + sun.sun_family = AF_UNIX; + + memcpy(sun.sun_path, pmd->socket_filename, sizeof(sun.sun_path) - 1); + + ret = connect(sockfd, (struct sockaddr *)&sun, + sizeof(struct sockaddr_un)); + if (ret < 0) { + MIF_LOG(ERR, "%s: Failed to connect socket: %s.", + rte_vdev_device_name(pmd->vdev), pmd->socket_filename); + goto error; + } + + MIF_LOG(DEBUG, "%s: Memif socket: %s connected.", + rte_vdev_device_name(pmd->vdev), pmd->socket_filename); + + pmd->cc = rte_zmalloc("memif-cc", + sizeof(struct memif_control_channel), 0); + if (pmd->cc == NULL) { + MIF_LOG(ERR, "%s: Failed to allocate control channel.", + rte_vdev_device_name(pmd->vdev)); + goto error; + } + + pmd->cc->intr_handle.fd = sockfd; + pmd->cc->intr_handle.type = RTE_INTR_HANDLE_EXT; + pmd->cc->socket = NULL; + pmd->cc->dev = dev; + TAILQ_INIT(&pmd->cc->msg_queue); + + ret = rte_intr_callback_register(&pmd->cc->intr_handle, + memif_intr_handler, pmd->cc); + if (ret < 0) { + MIF_LOG(ERR, "%s: Failed to register interrupt callback " + "for control fd", rte_vdev_device_name(pmd->vdev)); + goto error; + } + + return 0; + + error: + if (sockfd > 0) { + close(sockfd); + sockfd = -1; + } + if (pmd->cc != NULL) { + rte_free(pmd->cc); + pmd->cc = NULL; + } + return -1; +} diff --git a/drivers/net/memif/memif_socket.h b/drivers/net/memif/memif_socket.h new file mode 100644 index 0000000000..db293e2009 --- /dev/null +++ b/drivers/net/memif/memif_socket.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018-2019 Cisco Systems, Inc. All rights reserved. + */ + +#ifndef _MEMIF_SOCKET_H_ +#define _MEMIF_SOCKET_H_ + +#include + +/** + * Remove device from socket device list. If no device is left on the socket, + * remove the socket as well. + * + * @param dev + * memif device + */ +void memif_socket_remove_device(struct rte_eth_dev *dev); + +/** + * Enqueue disconnect message to control channel message queue. + * + * @param cc + * control channel + * @param reason + * const string stating disconnect reason (96 characters) + * @param err_code + * error code + */ +void memif_msg_enq_disconnect(struct memif_control_channel *cc, const char *reason, + int err_code); + +/** + * Initialize memif socket for specified device. If socket doesn't exist, create socket. + * + * @param dev + * memif device + * @param socket_filename + * socket filename + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename); + +/** + * Disconnect memif device. Close control channel and shared memory. + * + * @param dev + * memif device + */ +void memif_disconnect(struct rte_eth_dev *dev); + +/** + * If device is properly configured, enable connection establishment. + * + * @param dev + * memif device + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int memif_connect_master(struct rte_eth_dev *dev); + +/** + * If device is properly configured, send connection request. + * + * @param dev + * memif device + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int memif_connect_slave(struct rte_eth_dev *dev); + +struct memif_socket_dev_list_elt { + TAILQ_ENTRY(memif_socket_dev_list_elt) next; + struct rte_eth_dev *dev; /**< pointer to device internals */ + char dev_name[RTE_ETH_NAME_MAX_LEN]; +}; + +#define MEMIF_SOCKET_HASH_NAME "memif-sh" +struct memif_socket { + struct rte_intr_handle intr_handle; /**< interrupt handle */ + char filename[256]; /**< socket filename */ + + TAILQ_HEAD(, memif_socket_dev_list_elt) dev_queue; + /**< Queue of devices using this socket */ + uint8_t listener; /**< if not zero socket is listener */ +}; + +/* Control message queue. */ +struct memif_msg_queue_elt { + memif_msg_t msg; /**< control message */ + TAILQ_ENTRY(memif_msg_queue_elt) next; + int fd; /**< fd to be sent to peer */ +}; + +struct memif_control_channel { + struct rte_intr_handle intr_handle; /**< interrupt handle */ + TAILQ_HEAD(, memif_msg_queue_elt) msg_queue; /**< control message queue */ + struct memif_socket *socket; /**< pointer to socket */ + struct rte_eth_dev *dev; /**< pointer to device */ +}; + +#endif /* MEMIF_SOCKET_H */ diff --git a/drivers/net/memif/meson.build b/drivers/net/memif/meson.build new file mode 100644 index 0000000000..287a30ef29 --- /dev/null +++ b/drivers/net/memif/meson.build @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018-2019 Cisco Systems, Inc. All rights reserved. + +if host_machine.system() != 'linux' + build = false +endif + +sources = files('rte_eth_memif.c', + 'memif_socket.c') + +allow_experimental_apis = true +# Experimantal APIs: +# - rte_intr_callback_unregister_pending + +deps += ['hash'] diff --git a/drivers/net/memif/rte_eth_memif.c b/drivers/net/memif/rte_eth_memif.c new file mode 100644 index 0000000000..b9f05a624f --- /dev/null +++ b/drivers/net/memif/rte_eth_memif.c @@ -0,0 +1,1204 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018-2019 Cisco Systems, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_eth_memif.h" +#include "memif_socket.h" + +#define ETH_MEMIF_ID_ARG "id" +#define ETH_MEMIF_ROLE_ARG "role" +#define ETH_MEMIF_PKT_BUFFER_SIZE_ARG "bsize" +#define ETH_MEMIF_RING_SIZE_ARG "rsize" +#define ETH_MEMIF_SOCKET_ARG "socket" +#define ETH_MEMIF_MAC_ARG "mac" +#define ETH_MEMIF_ZC_ARG "zero-copy" +#define ETH_MEMIF_SECRET_ARG "secret" + +static const char * const valid_arguments[] = { + ETH_MEMIF_ID_ARG, + ETH_MEMIF_ROLE_ARG, + ETH_MEMIF_PKT_BUFFER_SIZE_ARG, + ETH_MEMIF_RING_SIZE_ARG, + ETH_MEMIF_SOCKET_ARG, + ETH_MEMIF_MAC_ARG, + ETH_MEMIF_ZC_ARG, + ETH_MEMIF_SECRET_ARG, + NULL +}; + +const char * +memif_version(void) +{ + return ("memif-" RTE_STR(MEMIF_VERSION_MAJOR) "." RTE_STR(MEMIF_VERSION_MINOR)); +} + +static void +memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *dev_info) +{ + dev_info->max_mac_addrs = 1; + dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN; + dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS; + dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS; + dev_info->min_rx_bufsize = 0; +} + +static memif_ring_t * +memif_get_ring(struct pmd_internals *pmd, memif_ring_type_t type, uint16_t ring_num) +{ + /* rings only in region 0 */ + void *p = pmd->regions[0]->addr; + int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) * + (1 << pmd->run.log2_ring_size); + + p = (uint8_t *)p + (ring_num + type * pmd->run.num_s2m_rings) * ring_size; + + return (memif_ring_t *)p; +} + +static void * +memif_get_buffer(struct pmd_internals *pmd, memif_desc_t *d) +{ + return ((uint8_t *)pmd->regions[d->region]->addr + d->offset); +} + +static int +memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail, + struct rte_mbuf *tail) +{ + /* Check for number-of-segments-overflow */ + if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS)) + return -EOVERFLOW; + + /* Chain 'tail' onto the old tail */ + cur_tail->next = tail; + + /* accumulate number of segments and total length. */ + head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs); + + tail->pkt_len = tail->data_len; + head->pkt_len += tail->pkt_len; + + return 0; +} + +static uint16_t +eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct memif_queue *mq = queue; + struct pmd_internals *pmd = mq->pmd; + memif_ring_t *ring = mq->ring; + uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0; + uint16_t n_rx_pkts = 0; + uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) - + RTE_PKTMBUF_HEADROOM; + uint16_t src_len, src_off, dst_len, dst_off, cp_len; + memif_ring_type_t type = mq->type; + memif_desc_t *d0; + struct rte_mbuf *mbuf, *mbuf_head, *mbuf_tail; + uint64_t b; + ssize_t size __rte_unused; + uint16_t head; + int ret; + + if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0)) + return 0; + if (unlikely(ring == NULL)) + return 0; + + /* consume interrupt */ + if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) + size = read(mq->intr_handle.fd, &b, sizeof(b)); + + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + + cur_slot = (type == MEMIF_RING_S2M) ? mq->last_head : mq->last_tail; + last_slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail; + if (cur_slot == last_slot) + goto refill; + n_slots = last_slot - cur_slot; + + while (n_slots && n_rx_pkts < nb_pkts) { + mbuf_head = rte_pktmbuf_alloc(mq->mempool); + if (unlikely(mbuf_head == NULL)) + goto no_free_bufs; + mbuf = mbuf_head; + mbuf->port = mq->in_port; + +next_slot: + s0 = cur_slot & mask; + d0 = &ring->desc[s0]; + + src_len = d0->length; + dst_off = 0; + src_off = 0; + + do { + dst_len = mbuf_size - dst_off; + if (dst_len == 0) { + dst_off = 0; + dst_len = mbuf_size; + + /* store pointer to tail */ + mbuf_tail = mbuf; + mbuf = rte_pktmbuf_alloc(mq->mempool); + if (unlikely(mbuf == NULL)) + goto no_free_bufs; + mbuf->port = mq->in_port; + ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf); + if (unlikely(ret < 0)) { + MIF_LOG(ERR, "%s: number-of-segments-overflow", + rte_vdev_device_name(pmd->vdev)); + rte_pktmbuf_free(mbuf); + goto no_free_bufs; + } + } + cp_len = RTE_MIN(dst_len, src_len); + + rte_pktmbuf_data_len(mbuf) += cp_len; + rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf); + if (mbuf != mbuf_head) + rte_pktmbuf_pkt_len(mbuf_head) += cp_len; + + memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, dst_off), + (uint8_t *)memif_get_buffer(pmd, d0) + src_off, cp_len); + + src_off += cp_len; + dst_off += cp_len; + src_len -= cp_len; + } while (src_len); + + cur_slot++; + n_slots--; + + if (d0->flags & MEMIF_DESC_FLAG_NEXT) + goto next_slot; + + mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head); + *bufs++ = mbuf_head; + n_rx_pkts++; + } + +no_free_bufs: + if (type == MEMIF_RING_S2M) { + rte_mb(); + ring->tail = cur_slot; + mq->last_head = cur_slot; + } else { + mq->last_tail = cur_slot; + } + +refill: + if (type == MEMIF_RING_M2S) { + head = ring->head; + n_slots = ring_size - head + mq->last_tail; + + while (n_slots--) { + s0 = head++ & mask; + d0 = &ring->desc[s0]; + d0->length = pmd->run.pkt_buffer_size; + } + rte_mb(); + ring->head = head; + } + + mq->n_pkts += n_rx_pkts; + return n_rx_pkts; +} + +static uint16_t +eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct memif_queue *mq = queue; + struct pmd_internals *pmd = mq->pmd; + memif_ring_t *ring = mq->ring; + uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0; + uint16_t src_len, src_off, dst_len, dst_off, cp_len; + memif_ring_type_t type = mq->type; + memif_desc_t *d0; + struct rte_mbuf *mbuf; + struct rte_mbuf *mbuf_head; + uint64_t a; + ssize_t size; + + if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0)) + return 0; + if (unlikely(ring == NULL)) + return 0; + + ring_size = 1 << mq->log2_ring_size; + mask = ring_size - 1; + + n_free = ring->tail - mq->last_tail; + mq->last_tail += n_free; + slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail; + + if (type == MEMIF_RING_S2M) + n_free = ring_size - ring->head + mq->last_tail; + else + n_free = ring->head - ring->tail; + + while (n_tx_pkts < nb_pkts && n_free) { + mbuf_head = *bufs++; + mbuf = mbuf_head; + + saved_slot = slot; + d0 = &ring->desc[slot & mask]; + dst_off = 0; + dst_len = (type == MEMIF_RING_S2M) ? + pmd->run.pkt_buffer_size : d0->length; + +next_in_chain: + src_off = 0; + src_len = rte_pktmbuf_data_len(mbuf); + + while (src_len) { + if (dst_len == 0) { + if (n_free) { + slot++; + n_free--; + d0->flags |= MEMIF_DESC_FLAG_NEXT; + d0 = &ring->desc[slot & mask]; + dst_off = 0; + dst_len = (type == MEMIF_RING_S2M) ? + pmd->run.pkt_buffer_size : d0->length; + d0->flags = 0; + } else { + slot = saved_slot; + goto no_free_slots; + } + } + cp_len = RTE_MIN(dst_len, src_len); + + memcpy((uint8_t *)memif_get_buffer(pmd, d0) + dst_off, + rte_pktmbuf_mtod_offset(mbuf, void *, src_off), + cp_len); + + mq->n_bytes += cp_len; + src_off += cp_len; + dst_off += cp_len; + src_len -= cp_len; + dst_len -= cp_len; + + d0->length = dst_off; + } + + if (rte_pktmbuf_is_contiguous(mbuf) == 0) { + mbuf = mbuf->next; + goto next_in_chain; + } + + n_tx_pkts++; + slot++; + n_free--; + rte_pktmbuf_free(mbuf_head); + } + +no_free_slots: + rte_mb(); + if (type == MEMIF_RING_S2M) + ring->head = slot; + else + ring->tail = slot; + + if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) { + a = 1; + size = write(mq->intr_handle.fd, &a, sizeof(a)); + if (unlikely(size < 0)) { + MIF_LOG(WARNING, + "%s: Failed to send interrupt. %s", + rte_vdev_device_name(pmd->vdev), strerror(errno)); + } + } + + mq->n_err += nb_pkts - n_tx_pkts; + mq->n_pkts += n_tx_pkts; + return n_tx_pkts; +} + +void +memif_free_regions(struct pmd_internals *pmd) +{ + int i; + struct memif_region *r; + + /* regions are allocated contiguously, so it's + * enough to loop until 'pmd->regions_num' + */ + for (i = 0; i < pmd->regions_num; i++) { + r = pmd->regions[i]; + if (r != NULL) { + if (r->addr != NULL) { + munmap(r->addr, r->region_size); + if (r->fd > 0) { + close(r->fd); + r->fd = -1; + } + } + rte_free(r); + pmd->regions[i] = NULL; + } + } + pmd->regions_num = 0; +} + +static int +memif_region_init_shm(struct pmd_internals *pmd, uint8_t has_buffers) +{ + char shm_name[ETH_MEMIF_SHM_NAME_SIZE]; + int ret = 0; + struct memif_region *r; + + if (pmd->regions_num >= ETH_MEMIF_MAX_REGION_NUM) { + MIF_LOG(ERR, "%s: Too many regions.", rte_vdev_device_name(pmd->vdev)); + return -1; + } + + r = rte_zmalloc("region", sizeof(struct memif_region), 0); + if (r == NULL) { + MIF_LOG(ERR, "%s: Failed to alloc memif region.", + rte_vdev_device_name(pmd->vdev)); + return -ENOMEM; + } + + /* calculate buffer offset */ + r->pkt_buffer_offset = (pmd->run.num_s2m_rings + pmd->run.num_m2s_rings) * + (sizeof(memif_ring_t) + sizeof(memif_desc_t) * + (1 << pmd->run.log2_ring_size)); + + r->region_size = r->pkt_buffer_offset; + /* if region has buffers, add buffers size to region_size */ + if (has_buffers == 1) + r->region_size += (uint32_t)(pmd->run.pkt_buffer_size * + (1 << pmd->run.log2_ring_size) * + (pmd->run.num_s2m_rings + + pmd->run.num_m2s_rings)); + + memset(shm_name, 0, sizeof(char) * ETH_MEMIF_SHM_NAME_SIZE); + snprintf(shm_name, ETH_MEMIF_SHM_NAME_SIZE, "memif_region_%d", + pmd->regions_num); + + r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING); + if (r->fd < 0) { + MIF_LOG(ERR, "%s: Failed to create shm file: %s.", + rte_vdev_device_name(pmd->vdev), + strerror(errno)); + ret = -1; + goto error; + } + + ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK); + if (ret < 0) { + MIF_LOG(ERR, "%s: Failed to add seals to shm file: %s.", + rte_vdev_device_name(pmd->vdev), + strerror(errno)); + goto error; + } + + ret = ftruncate(r->fd, r->region_size); + if (ret < 0) { + MIF_LOG(ERR, "%s: Failed to truncate shm file: %s.", + rte_vdev_device_name(pmd->vdev), + strerror(errno)); + goto error; + } + + r->addr = mmap(NULL, r->region_size, PROT_READ | + PROT_WRITE, MAP_SHARED, r->fd, 0); + if (r->addr == MAP_FAILED) { + MIF_LOG(ERR, "%s: Failed to mmap shm region: %s.", + rte_vdev_device_name(pmd->vdev), + strerror(ret)); + ret = -1; + goto error; + } + + pmd->regions[pmd->regions_num] = r; + pmd->regions_num++; + + return ret; + +error: + if (r->fd > 0) + close(r->fd); + r->fd = -1; + + return ret; +} + +static int +memif_regions_init(struct pmd_internals *pmd) +{ + int ret; + + /* create one buffer region */ + ret = memif_region_init_shm(pmd, /* has buffer */ 1); + if (ret < 0) + return ret; + + return 0; +} + +static void +memif_init_rings(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + memif_ring_t *ring; + int i, j; + uint16_t slot; + + for (i = 0; i < pmd->run.num_s2m_rings; i++) { + ring = memif_get_ring(pmd, MEMIF_RING_S2M, i); + ring->head = 0; + ring->tail = 0; + ring->cookie = MEMIF_COOKIE; + ring->flags = 0; + for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) { + slot = i * (1 << pmd->run.log2_ring_size) + j; + ring->desc[j].region = 0; + ring->desc[j].offset = pmd->regions[0]->pkt_buffer_offset + + (uint32_t)(slot * pmd->run.pkt_buffer_size); + ring->desc[j].length = pmd->run.pkt_buffer_size; + } + } + + for (i = 0; i < pmd->run.num_m2s_rings; i++) { + ring = memif_get_ring(pmd, MEMIF_RING_M2S, i); + ring->head = 0; + ring->tail = 0; + ring->cookie = MEMIF_COOKIE; + ring->flags = 0; + for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) { + slot = (i + pmd->run.num_s2m_rings) * + (1 << pmd->run.log2_ring_size) + j; + ring->desc[j].region = 0; + ring->desc[j].offset = pmd->regions[0]->pkt_buffer_offset + + (uint32_t)(slot * pmd->run.pkt_buffer_size); + ring->desc[j].length = pmd->run.pkt_buffer_size; + } + } +} + +/* called only by slave */ +static void +memif_init_queues(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_queue *mq; + int i; + + for (i = 0; i < pmd->run.num_s2m_rings; i++) { + mq = dev->data->tx_queues[i]; + mq->ring = memif_get_ring(pmd, MEMIF_RING_S2M, i); + mq->log2_ring_size = pmd->run.log2_ring_size; + /* queues located only in region 0 */ + mq->region = 0; + mq->ring_offset = (uint8_t *)mq->ring - (uint8_t *)pmd->regions[0]->addr; + mq->last_head = 0; + mq->last_tail = 0; + mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK); + if (mq->intr_handle.fd < 0) { + MIF_LOG(WARNING, + "%s: Failed to create eventfd for tx queue %d: %s.", + rte_vdev_device_name(pmd->vdev), i, + strerror(errno)); + } + } + + for (i = 0; i < pmd->run.num_m2s_rings; i++) { + mq = dev->data->rx_queues[i]; + mq->ring = memif_get_ring(pmd, MEMIF_RING_M2S, i); + mq->log2_ring_size = pmd->run.log2_ring_size; + /* queues located only in region 0 */ + mq->region = 0; + mq->ring_offset = (uint8_t *)mq->ring - (uint8_t *)pmd->regions[0]->addr; + mq->last_head = 0; + mq->last_tail = 0; + mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK); + if (mq->intr_handle.fd < 0) { + MIF_LOG(WARNING, + "%s: Failed to create eventfd for rx queue %d: %s.", + rte_vdev_device_name(pmd->vdev), i, + strerror(errno)); + } + } +} + +int +memif_init_regions_and_queues(struct rte_eth_dev *dev) +{ + int ret; + + ret = memif_regions_init(dev->data->dev_private); + if (ret < 0) + return ret; + + memif_init_rings(dev); + + memif_init_queues(dev); + + return 0; +} + +int +memif_connect(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_region *mr; + struct memif_queue *mq; + int i; + + for (i = 0; i < pmd->regions_num; i++) { + mr = pmd->regions[i]; + if (mr != NULL) { + if (mr->addr == NULL) { + if (mr->fd < 0) + return -1; + mr->addr = mmap(NULL, mr->region_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, mr->fd, 0); + if (mr->addr == NULL) + return -1; + } + } + } + + for (i = 0; i < pmd->run.num_s2m_rings; i++) { + mq = (pmd->role == MEMIF_ROLE_SLAVE) ? + dev->data->tx_queues[i] : dev->data->rx_queues[i]; + mq->ring = (memif_ring_t *)((uint8_t *)pmd->regions[mq->region]->addr + + mq->ring_offset); + if (mq->ring->cookie != MEMIF_COOKIE) { + MIF_LOG(ERR, "%s: Wrong cookie", + rte_vdev_device_name(pmd->vdev)); + return -1; + } + mq->ring->head = 0; + mq->ring->tail = 0; + mq->last_head = 0; + mq->last_tail = 0; + /* enable polling mode */ + if (pmd->role == MEMIF_ROLE_MASTER) + mq->ring->flags = MEMIF_RING_FLAG_MASK_INT; + } + for (i = 0; i < pmd->run.num_m2s_rings; i++) { + mq = (pmd->role == MEMIF_ROLE_SLAVE) ? + dev->data->rx_queues[i] : dev->data->tx_queues[i]; + mq->ring = (memif_ring_t *)((uint8_t *)pmd->regions[mq->region]->addr + + mq->ring_offset); + if (mq->ring->cookie != MEMIF_COOKIE) { + MIF_LOG(ERR, "%s: Wrong cookie", + rte_vdev_device_name(pmd->vdev)); + return -1; + } + mq->ring->head = 0; + mq->ring->tail = 0; + mq->last_head = 0; + mq->last_tail = 0; + /* enable polling mode */ + if (pmd->role == MEMIF_ROLE_SLAVE) + mq->ring->flags = MEMIF_RING_FLAG_MASK_INT; + } + + pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING; + pmd->flags |= ETH_MEMIF_FLAG_CONNECTED; + dev->data->dev_link.link_status = ETH_LINK_UP; + MIF_LOG(INFO, "%s: Connected.", rte_vdev_device_name(pmd->vdev)); + return 0; +} + +static int +memif_dev_start(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + int ret = 0; + + switch (pmd->role) { + case MEMIF_ROLE_SLAVE: + ret = memif_connect_slave(dev); + break; + case MEMIF_ROLE_MASTER: + ret = memif_connect_master(dev); + break; + default: + MIF_LOG(ERR, "%s: Unknown role: %d.", + rte_vdev_device_name(pmd->vdev), pmd->role); + ret = -1; + break; + } + + return ret; +} + +static void +memif_dev_close(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + int i; + + memif_msg_enq_disconnect(pmd->cc, "Device closed", 0); + memif_disconnect(dev); + + for (i = 0; i < dev->data->nb_rx_queues; i++) + (*dev->dev_ops->rx_queue_release)(dev->data->rx_queues[i]); + for (i = 0; i < dev->data->nb_tx_queues; i++) + (*dev->dev_ops->tx_queue_release)(dev->data->tx_queues[i]); + + memif_socket_remove_device(dev); +} + +static int +memif_dev_configure(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + + /* + * SLAVE - TXQ + * MASTER - RXQ + */ + pmd->cfg.num_s2m_rings = (pmd->role == MEMIF_ROLE_SLAVE) ? + dev->data->nb_tx_queues : dev->data->nb_rx_queues; + + /* + * SLAVE - RXQ + * MASTER - TXQ + */ + pmd->cfg.num_m2s_rings = (pmd->role == MEMIF_ROLE_SLAVE) ? + dev->data->nb_rx_queues : dev->data->nb_tx_queues; + + return 0; +} + +static int +memif_tx_queue_setup(struct rte_eth_dev *dev, + uint16_t qid, + uint16_t nb_tx_desc __rte_unused, + unsigned int socket_id __rte_unused, + const struct rte_eth_txconf *tx_conf __rte_unused) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_queue *mq; + + mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0); + if (mq == NULL) { + MIF_LOG(ERR, "%s: Failed to allocate tx queue id: %u", + rte_vdev_device_name(pmd->vdev), qid); + return -ENOMEM; + } + + mq->type = + (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_S2M : MEMIF_RING_M2S; + mq->n_pkts = 0; + mq->n_bytes = 0; + mq->n_err = 0; + mq->intr_handle.fd = -1; + mq->intr_handle.type = RTE_INTR_HANDLE_EXT; + mq->pmd = pmd; + dev->data->tx_queues[qid] = mq; + + return 0; +} + +static int +memif_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t qid, + uint16_t nb_rx_desc __rte_unused, + unsigned int socket_id __rte_unused, + const struct rte_eth_rxconf *rx_conf __rte_unused, + struct rte_mempool *mb_pool) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_queue *mq; + + mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0); + if (mq == NULL) { + MIF_LOG(ERR, "%s: Failed to allocate rx queue id: %u", + rte_vdev_device_name(pmd->vdev), qid); + return -ENOMEM; + } + + mq->type = (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_M2S : MEMIF_RING_S2M; + mq->n_pkts = 0; + mq->n_bytes = 0; + mq->n_err = 0; + mq->intr_handle.fd = -1; + mq->intr_handle.type = RTE_INTR_HANDLE_EXT; + mq->mempool = mb_pool; + mq->in_port = dev->data->port_id; + mq->pmd = pmd; + dev->data->rx_queues[qid] = mq; + + return 0; +} + +static void +memif_queue_release(void *queue) +{ + struct memif_queue *mq = (struct memif_queue *)queue; + + if (!mq) + return; + + rte_free(mq); +} + +static int +memif_link_update(struct rte_eth_dev *dev __rte_unused, + int wait_to_complete __rte_unused) +{ + return 0; +} + +static int +memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) +{ + struct pmd_internals *pmd = dev->data->dev_private; + struct memif_queue *mq; + int i; + uint8_t tmp, nq; + + stats->ipackets = 0; + stats->ibytes = 0; + stats->opackets = 0; + stats->obytes = 0; + stats->oerrors = 0; + + tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_s2m_rings : + pmd->run.num_m2s_rings; + nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp : + RTE_ETHDEV_QUEUE_STAT_CNTRS; + + /* RX stats */ + for (i = 0; i < nq; i++) { + mq = dev->data->rx_queues[i]; + stats->q_ipackets[i] = mq->n_pkts; + stats->q_ibytes[i] = mq->n_bytes; + stats->ipackets += mq->n_pkts; + stats->ibytes += mq->n_bytes; + } + + tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_m2s_rings : + pmd->run.num_s2m_rings; + nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp : + RTE_ETHDEV_QUEUE_STAT_CNTRS; + + /* TX stats */ + for (i = 0; i < nq; i++) { + mq = dev->data->tx_queues[i]; + stats->q_opackets[i] = mq->n_pkts; + stats->q_obytes[i] = mq->n_bytes; + stats->opackets += mq->n_pkts; + stats->obytes += mq->n_bytes; + stats->oerrors += mq->n_err; + } + return 0; +} + +static void +memif_stats_reset(struct rte_eth_dev *dev) +{ + struct pmd_internals *pmd = dev->data->dev_private; + int i; + struct memif_queue *mq; + + for (i = 0; i < pmd->run.num_s2m_rings; i++) { + mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->tx_queues[i] : + dev->data->rx_queues[i]; + mq->n_pkts = 0; + mq->n_bytes = 0; + mq->n_err = 0; + } + for (i = 0; i < pmd->run.num_m2s_rings; i++) { + mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->rx_queues[i] : + dev->data->tx_queues[i]; + mq->n_pkts = 0; + mq->n_bytes = 0; + mq->n_err = 0; + } +} + +static int +memif_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t qid __rte_unused) +{ + struct pmd_internals *pmd = dev->data->dev_private; + + MIF_LOG(WARNING, "%s: Interrupt mode not supported.", + rte_vdev_device_name(pmd->vdev)); + + return -1; +} + +static int +memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused) +{ + struct pmd_internals *pmd __rte_unused = dev->data->dev_private; + + return 0; +} + +static const struct eth_dev_ops ops = { + .dev_start = memif_dev_start, + .dev_close = memif_dev_close, + .dev_infos_get = memif_dev_info, + .dev_configure = memif_dev_configure, + .tx_queue_setup = memif_tx_queue_setup, + .rx_queue_setup = memif_rx_queue_setup, + .rx_queue_release = memif_queue_release, + .tx_queue_release = memif_queue_release, + .rx_queue_intr_enable = memif_rx_queue_intr_enable, + .rx_queue_intr_disable = memif_rx_queue_intr_disable, + .link_update = memif_link_update, + .stats_get = memif_stats_get, + .stats_reset = memif_stats_reset, +}; + +static int +memif_create(struct rte_vdev_device *vdev, enum memif_role_t role, + memif_interface_id_t id, uint32_t flags, + const char *socket_filename, + memif_log2_ring_size_t log2_ring_size, + uint16_t pkt_buffer_size, const char *secret, + struct rte_ether_addr *ether_addr) +{ + int ret = 0; + struct rte_eth_dev *eth_dev; + struct rte_eth_dev_data *data; + struct pmd_internals *pmd; + const unsigned int numa_node = vdev->device.numa_node; + const char *name = rte_vdev_device_name(vdev); + + if (flags & ETH_MEMIF_FLAG_ZERO_COPY) { + MIF_LOG(ERR, "Zero-copy slave not supported."); + return -1; + } + + eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd)); + if (eth_dev == NULL) { + MIF_LOG(ERR, "%s: Unable to allocate device struct.", name); + return -1; + } + + pmd = eth_dev->data->dev_private; + memset(pmd, 0, sizeof(*pmd)); + + pmd->vdev = vdev; + pmd->id = id; + pmd->flags = flags; + pmd->flags |= ETH_MEMIF_FLAG_DISABLED; + pmd->role = role; + + ret = memif_socket_init(eth_dev, socket_filename); + if (ret < 0) + return ret; + + memset(pmd->secret, 0, sizeof(char) * ETH_MEMIF_SECRET_SIZE); + if (secret != NULL) + strlcpy(pmd->secret, secret, sizeof(pmd->secret)); + + pmd->cfg.log2_ring_size = log2_ring_size; + /* set in .dev_configure() */ + pmd->cfg.num_s2m_rings = 0; + pmd->cfg.num_m2s_rings = 0; + + pmd->cfg.pkt_buffer_size = pkt_buffer_size; + + data = eth_dev->data; + data->dev_private = pmd; + data->numa_node = numa_node; + data->mac_addrs = ether_addr; + + eth_dev->dev_ops = &ops; + eth_dev->device = &vdev->device; + eth_dev->rx_pkt_burst = eth_memif_rx; + eth_dev->tx_pkt_burst = eth_memif_tx; + + eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; + + rte_eth_dev_probing_finish(eth_dev); + + return 0; +} + +static int +memif_set_role(const char *key __rte_unused, const char *value, + void *extra_args) +{ + enum memif_role_t *role = (enum memif_role_t *)extra_args; + + if (strstr(value, "master") != NULL) { + *role = MEMIF_ROLE_MASTER; + } else if (strstr(value, "slave") != NULL) { + *role = MEMIF_ROLE_SLAVE; + } else { + MIF_LOG(ERR, "Unknown role: %s.", value); + return -EINVAL; + } + return 0; +} + +static int +memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args) +{ + uint32_t *flags = (uint32_t *)extra_args; + + if (strstr(value, "yes") != NULL) { + *flags |= ETH_MEMIF_FLAG_ZERO_COPY; + } else if (strstr(value, "no") != NULL) { + *flags &= ~ETH_MEMIF_FLAG_ZERO_COPY; + } else { + MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value); + return -EINVAL; + } + return 0; +} + +static int +memif_set_id(const char *key __rte_unused, const char *value, void *extra_args) +{ + memif_interface_id_t *id = (memif_interface_id_t *)extra_args; + + /* even if parsing fails, 0 is a valid id */ + *id = strtoul(value, NULL, 10); + return 0; +} + +static int +memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args) +{ + unsigned long tmp; + uint16_t *pkt_buffer_size = (uint16_t *)extra_args; + + tmp = strtoul(value, NULL, 10); + if (tmp == 0 || tmp > 0xFFFF) { + MIF_LOG(ERR, "Invalid buffer size: %s.", value); + return -EINVAL; + } + *pkt_buffer_size = tmp; + return 0; +} + +static int +memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args) +{ + unsigned long tmp; + memif_log2_ring_size_t *log2_ring_size = + (memif_log2_ring_size_t *)extra_args; + + tmp = strtoul(value, NULL, 10); + if (tmp == 0 || tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE) { + MIF_LOG(ERR, "Invalid ring size: %s (max %u).", + value, ETH_MEMIF_MAX_LOG2_RING_SIZE); + return -EINVAL; + } + *log2_ring_size = tmp; + return 0; +} + +/* check if directory exists and if we have permission to read/write */ +static int +memif_check_socket_filename(const char *filename) +{ + char *dir = NULL, *tmp; + uint32_t idx; + int ret = 0; + + tmp = strrchr(filename, '/'); + if (tmp != NULL) { + idx = tmp - filename; + dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 1), 0); + if (dir == NULL) { + MIF_LOG(ERR, "Failed to allocate memory."); + return -1; + } + strlcpy(dir, filename, sizeof(char) * (idx + 1)); + } + + if (dir == NULL || (faccessat(-1, dir, F_OK | R_OK | + W_OK, AT_EACCESS) < 0)) { + MIF_LOG(ERR, "Invalid socket directory."); + ret = -EINVAL; + } + + if (dir != NULL) + rte_free(dir); + + return ret; +} + +static int +memif_set_socket_filename(const char *key __rte_unused, const char *value, + void *extra_args) +{ + const char **socket_filename = (const char **)extra_args; + + *socket_filename = value; + return memif_check_socket_filename(*socket_filename); +} + +static int +memif_set_mac(const char *key __rte_unused, const char *value, void *extra_args) +{ + struct rte_ether_addr *ether_addr = (struct rte_ether_addr *)extra_args; + int ret = 0; + + ret = sscanf(value, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + ðer_addr->addr_bytes[0], ðer_addr->addr_bytes[1], + ðer_addr->addr_bytes[2], ðer_addr->addr_bytes[3], + ðer_addr->addr_bytes[4], ðer_addr->addr_bytes[5]); + if (ret != 6) + MIF_LOG(WARNING, "Failed to parse mac '%s'.", value); + return 0; +} + +static int +memif_set_secret(const char *key __rte_unused, const char *value, void *extra_args) +{ + const char **secret = (const char **)extra_args; + + *secret = value; + return 0; +} + +static int +rte_pmd_memif_probe(struct rte_vdev_device *vdev) +{ + RTE_BUILD_BUG_ON(sizeof(memif_msg_t) != 128); + RTE_BUILD_BUG_ON(sizeof(memif_desc_t) != 16); + int ret = 0; + struct rte_kvargs *kvlist; + const char *name = rte_vdev_device_name(vdev); + enum memif_role_t role = MEMIF_ROLE_SLAVE; + memif_interface_id_t id = 0; + uint16_t pkt_buffer_size = ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE; + memif_log2_ring_size_t log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE; + const char *socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME; + uint32_t flags = 0; + const char *secret = NULL; + struct rte_ether_addr *ether_addr = rte_zmalloc("", + sizeof(struct rte_ether_addr), 0); + + rte_eth_random_addr(ether_addr->addr_bytes); + + MIF_LOG(INFO, "Initialize MEMIF: %s.", name); + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + MIF_LOG(ERR, "Multi-processing not supported for memif."); + /* TODO: + * Request connection information. + * + * Once memif in the primary process is connected, + * broadcast connection information. + */ + return -1; + } + + kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments); + + /* parse parameters */ + if (kvlist != NULL) { + ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG, + &memif_set_role, &role); + if (ret < 0) + goto exit; + ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG, + &memif_set_id, &id); + if (ret < 0) + goto exit; + ret = rte_kvargs_process(kvlist, ETH_MEMIF_PKT_BUFFER_SIZE_ARG, + &memif_set_bs, &pkt_buffer_size); + if (ret < 0) + goto exit; + ret = rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG, + &memif_set_rs, &log2_ring_size); + if (ret < 0) + goto exit; + ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ARG, + &memif_set_socket_filename, + (void *)(&socket_filename)); + if (ret < 0) + goto exit; + ret = rte_kvargs_process(kvlist, ETH_MEMIF_MAC_ARG, + &memif_set_mac, ether_addr); + if (ret < 0) + goto exit; + ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG, + &memif_set_zc, &flags); + if (ret < 0) + goto exit; + ret = rte_kvargs_process(kvlist, ETH_MEMIF_SECRET_ARG, + &memif_set_secret, (void *)(&secret)); + if (ret < 0) + goto exit; + } + + /* create interface */ + ret = memif_create(vdev, role, id, flags, socket_filename, + log2_ring_size, pkt_buffer_size, secret, ether_addr); + +exit: + if (kvlist != NULL) + rte_kvargs_free(kvlist); + return ret; +} + +static int +rte_pmd_memif_remove(struct rte_vdev_device *vdev) +{ + struct rte_eth_dev *eth_dev; + + eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev)); + if (eth_dev == NULL) + return 0; + + rte_eth_dev_close(eth_dev->data->port_id); + + return 0; +} + +static struct rte_vdev_driver pmd_memif_drv = { + .probe = rte_pmd_memif_probe, + .remove = rte_pmd_memif_remove, +}; + +RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv); + +RTE_PMD_REGISTER_PARAM_STRING(net_memif, + ETH_MEMIF_ID_ARG "=" + ETH_MEMIF_ROLE_ARG "=master|slave" + ETH_MEMIF_PKT_BUFFER_SIZE_ARG "=" + ETH_MEMIF_RING_SIZE_ARG "=" + ETH_MEMIF_SOCKET_ARG "=" + ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx" + ETH_MEMIF_ZC_ARG "=yes|no" + ETH_MEMIF_SECRET_ARG "="); + +int memif_logtype; + +RTE_INIT(memif_init_log) +{ + memif_logtype = rte_log_register("pmd.net.memif"); + if (memif_logtype >= 0) + rte_log_set_level(memif_logtype, RTE_LOG_NOTICE); +} diff --git a/drivers/net/memif/rte_eth_memif.h b/drivers/net/memif/rte_eth_memif.h new file mode 100644 index 0000000000..5f631e925c --- /dev/null +++ b/drivers/net/memif/rte_eth_memif.h @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018-2019 Cisco Systems, Inc. All rights reserved. + */ + +#ifndef _RTE_ETH_MEMIF_H_ +#define _RTE_ETH_MEMIF_H_ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif /* GNU_SOURCE */ + +#include + +#include +#include +#include + +#include "memif.h" + +#define ETH_MEMIF_DEFAULT_SOCKET_FILENAME "/run/memif.sock" +#define ETH_MEMIF_DEFAULT_RING_SIZE 10 +#define ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE 2048 + +#define ETH_MEMIF_MAX_NUM_Q_PAIRS 255 +#define ETH_MEMIF_MAX_LOG2_RING_SIZE 14 +#define ETH_MEMIF_MAX_REGION_NUM 256 + +#define ETH_MEMIF_SHM_NAME_SIZE 32 +#define ETH_MEMIF_DISC_STRING_SIZE 96 +#define ETH_MEMIF_SECRET_SIZE 24 + +extern int memif_logtype; + +#define MIF_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, memif_logtype, \ + "%s(): " fmt "\n", __func__, ##args) + +enum memif_role_t { + MEMIF_ROLE_MASTER, + MEMIF_ROLE_SLAVE, +}; + +struct memif_region { + void *addr; /**< shared memory address */ + memif_region_size_t region_size; /**< shared memory size */ + int fd; /**< shared memory file descriptor */ + uint32_t pkt_buffer_offset; + /**< offset from 'addr' to first packet buffer */ +}; + +struct memif_queue { + struct rte_mempool *mempool; /**< mempool for RX packets */ + struct pmd_internals *pmd; /**< device internals */ + + memif_ring_type_t type; /**< ring type */ + memif_region_index_t region; /**< shared memory region index */ + + uint16_t in_port; /**< port id */ + + memif_region_offset_t ring_offset; + /**< ring offset from start of shm region (ring - memif_region.addr) */ + + uint16_t last_head; /**< last ring head */ + uint16_t last_tail; /**< last ring tail */ + + /* rx/tx info */ + uint64_t n_pkts; /**< number of rx/tx packets */ + uint64_t n_bytes; /**< number of rx/tx bytes */ + uint64_t n_err; /**< number of tx errors */ + + memif_ring_t *ring; /**< pointer to ring */ + + struct rte_intr_handle intr_handle; /**< interrupt handle */ + + memif_log2_ring_size_t log2_ring_size; /**< log2 of ring size */ +}; + +struct pmd_internals { + memif_interface_id_t id; /**< unique id */ + enum memif_role_t role; /**< device role */ + uint32_t flags; /**< device status flags */ +#define ETH_MEMIF_FLAG_CONNECTING (1 << 0) +/**< device is connecting */ +#define ETH_MEMIF_FLAG_CONNECTED (1 << 1) +/**< device is connected */ +#define ETH_MEMIF_FLAG_ZERO_COPY (1 << 2) +/**< device is zero-copy enabled */ +#define ETH_MEMIF_FLAG_DISABLED (1 << 3) +/**< device has not been configured and can not accept connection requests */ + + char *socket_filename; /**< pointer to socket filename */ + char secret[ETH_MEMIF_SECRET_SIZE]; /**< secret (optional security parameter) */ + + struct memif_control_channel *cc; /**< control channel */ + + struct memif_region *regions[ETH_MEMIF_MAX_REGION_NUM]; + /**< shared memory regions */ + memif_region_index_t regions_num; /**< number of regions */ + + /* remote info */ + char remote_name[RTE_DEV_NAME_MAX_LEN]; /**< remote app name */ + char remote_if_name[RTE_DEV_NAME_MAX_LEN]; /**< remote peer name */ + + struct { + memif_log2_ring_size_t log2_ring_size; /**< log2 of ring size */ + uint8_t num_s2m_rings; /**< number of slave to master rings */ + uint8_t num_m2s_rings; /**< number of master to slave rings */ + uint16_t pkt_buffer_size; /**< buffer size */ + } cfg; /**< Configured parameters (max values) */ + + struct { + memif_log2_ring_size_t log2_ring_size; /**< log2 of ring size */ + uint8_t num_s2m_rings; /**< number of slave to master rings */ + uint8_t num_m2s_rings; /**< number of master to slave rings */ + uint16_t pkt_buffer_size; /**< buffer size */ + } run; + /**< Parameters used in active connection */ + + char local_disc_string[ETH_MEMIF_DISC_STRING_SIZE]; + /**< local disconnect reason */ + char remote_disc_string[ETH_MEMIF_DISC_STRING_SIZE]; + /**< remote disconnect reason */ + + struct rte_vdev_device *vdev; /**< vdev handle */ +}; + +/** + * Unmap shared memory and free regions from memory. + * + * @param pmd + * device internals + */ +void memif_free_regions(struct pmd_internals *pmd); + +/** + * Finalize connection establishment process. Map shared memory file + * (master role), initialize ring queue, set link status up. + * + * @param dev + * memif device + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int memif_connect(struct rte_eth_dev *dev); + +/** + * Create shared memory file and initialize ring queue. + * Only called by slave when establishing connection + * + * @param dev + * memif device + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int memif_init_regions_and_queues(struct rte_eth_dev *dev); + +/** + * Get memif version string. + * + * @return + * - memif version string + */ +const char *memif_version(void); + +#ifndef MFD_HUGETLB +#ifndef __NR_memfd_create + +#if defined __x86_64__ +#define __NR_memfd_create 319 +#elif defined __x86_32__ +#define __NR_memfd_create 1073742143 +#elif defined __arm__ +#define __NR_memfd_create 385 +#elif defined __aarch64__ +#define __NR_memfd_create 279 +#elif defined __powerpc__ +#define __NR_memfd_create 360 +#elif defined __i386__ +#define __NR_memfd_create 356 +#else +#error "__NR_memfd_create unknown for this architecture" +#endif + +#endif /* __NR_memfd_create */ + +static inline int memfd_create(const char *name, unsigned int flags) +{ + return syscall(__NR_memfd_create, name, flags); +} +#endif /* MFD_HUGETLB */ + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef MFD_ALLOW_SEALING +#define MFD_ALLOW_SEALING 0x0002U +#endif + +#ifndef F_ADD_SEALS +#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) + +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif + +#endif /* RTE_ETH_MEMIF_H */ diff --git a/drivers/net/memif/rte_pmd_memif_version.map b/drivers/net/memif/rte_pmd_memif_version.map new file mode 100644 index 0000000000..8861484fb3 --- /dev/null +++ b/drivers/net/memif/rte_pmd_memif_version.map @@ -0,0 +1,4 @@ +DPDK_19.08 { + + local: *; +}; diff --git a/drivers/net/meson.build b/drivers/net/meson.build index ed99896c3d..b570734836 100644 --- a/drivers/net/meson.build +++ b/drivers/net/meson.build @@ -24,6 +24,7 @@ drivers = ['af_packet', 'ixgbe', 'kni', 'liquidio', + 'memif', 'mlx4', 'mlx5', 'mvneta', diff --git a/mk/rte.app.mk b/mk/rte.app.mk index b6106e19f3..d0df0b023a 100644 --- a/mk/rte.app.mk +++ b/mk/rte.app.mk @@ -171,6 +171,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_KNI),y) _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += -lrte_pmd_kni endif _LDLIBS-$(CONFIG_RTE_LIBRTE_LIO_PMD) += -lrte_pmd_lio +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += -lrte_pmd_memif _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += -lrte_pmd_mlx4 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += -lrte_pmd_mlx5 -lmnl ifeq ($(CONFIG_RTE_IBVERBS_LINK_DLOPEN),y)