net/memif: introduce memory interface PMD
authorJakub Grajciar <jgrajcia@cisco.com>
Thu, 6 Jun 2019 11:38:50 +0000 (13:38 +0200)
committerFerruh Yigit <ferruh.yigit@intel.com>
Thu, 13 Jun 2019 14:54:29 +0000 (23:54 +0900)
Shared memory packet interface (memif) PMD allows for DPDK and any other
client using memif (DPDK, VPP, libmemif) to communicate using shared
memory. The created device transmits packets in a raw format. It can be
used with Ethernet mode, IP mode, or Punt/Inject. At this moment, only
Ethernet mode is supported in DPDK memif implementation. Memif is Linux
only.

Signed-off-by: Jakub Grajciar <jgrajcia@cisco.com>
Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
18 files changed:
MAINTAINERS
config/common_base
config/common_linux
doc/guides/nics/features/memif.ini [new file with mode: 0644]
doc/guides/nics/index.rst
doc/guides/nics/memif.rst [new file with mode: 0644]
doc/guides/rel_notes/release_19_08.rst
drivers/net/Makefile
drivers/net/memif/Makefile [new file with mode: 0644]
drivers/net/memif/memif.h [new file with mode: 0644]
drivers/net/memif/memif_socket.c [new file with mode: 0644]
drivers/net/memif/memif_socket.h [new file with mode: 0644]
drivers/net/memif/meson.build [new file with mode: 0644]
drivers/net/memif/rte_eth_memif.c [new file with mode: 0644]
drivers/net/memif/rte_eth_memif.h [new file with mode: 0644]
drivers/net/memif/rte_pmd_memif_version.map [new file with mode: 0644]
drivers/net/meson.build
mk/rte.app.mk

index d0bf259..0212fe6 100644 (file)
@@ -839,6 +839,12 @@ F: drivers/net/softnic/
 F: doc/guides/nics/features/softnic.ini
 F: doc/guides/nics/softnic.rst
 
+Memif PMD
+M: Jakub Grajciar <jgrajcia@cisco.com>
+F: drivers/net/memif/
+F: doc/guides/nics/memif.rst
+F: doc/guides/nics/features/memif.ini
+
 
 Crypto Drivers
 --------------
index 6f19ad5..e406e78 100644 (file)
@@ -444,6 +444,11 @@ CONFIG_RTE_LIBRTE_PMD_AF_PACKET=n
 #
 CONFIG_RTE_LIBRTE_PMD_AF_XDP=n
 
+#
+# Compile Memory Interface PMD driver (Linux only)
+#
+CONFIG_RTE_LIBRTE_PMD_MEMIF=n
+
 #
 # Compile link bonding PMD library
 #
index 7533427..87514fe 100644 (file)
@@ -19,6 +19,7 @@ CONFIG_RTE_LIBRTE_VHOST_POSTCOPY=n
 CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_IFC_PMD=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
+CONFIG_RTE_LIBRTE_PMD_MEMIF=y
 CONFIG_RTE_LIBRTE_PMD_SOFTNIC=y
 CONFIG_RTE_LIBRTE_PMD_TAP=y
 CONFIG_RTE_LIBRTE_AVP_PMD=y
diff --git a/doc/guides/nics/features/memif.ini b/doc/guides/nics/features/memif.ini
new file mode 100644 (file)
index 0000000..807d9ec
--- /dev/null
@@ -0,0 +1,14 @@
+;
+; Supported features of the 'memif' network poll mode driver.
+;
+; Refer to default.ini for the full list of available PMD features.
+;
+[Features]
+Link status          = Y
+Basic stats          = Y
+Jumbo frame          = Y
+ARMv8                = Y
+Power8               = Y
+x86-32               = Y
+x86-64               = Y
+Usage doc            = Y
index 2221c35..691e720 100644 (file)
@@ -36,6 +36,7 @@ Network Interface Controller Drivers
     intel_vf
     kni
     liquidio
+    memif
     mlx4
     mlx5
     mvneta
diff --git a/doc/guides/nics/memif.rst b/doc/guides/nics/memif.rst
new file mode 100644 (file)
index 0000000..de2d481
--- /dev/null
@@ -0,0 +1,234 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2018-2019 Cisco Systems, Inc.
+
+======================
+Memif Poll Mode Driver
+======================
+
+Shared memory packet interface (memif) PMD allows for DPDK and any other client
+using memif (DPDK, VPP, libmemif) to communicate using shared memory. Memif is
+Linux only.
+
+The created device transmits packets in a raw format. It can be used with
+Ethernet mode, IP mode, or Punt/Inject. At this moment, only Ethernet mode is
+supported in DPDK memif implementation.
+
+Memif works in two roles: master and slave. Slave connects to master over an
+existing socket. It is also a producer of shared memory file and initializes
+the shared memory. Each interface can be connected to one peer interface
+at same time. The peer interface is identified by id parameter. Master
+creates the socket and listens for any slave connection requests. The socket
+may already exist on the system. Be sure to remove any such sockets, if you
+are creating a master interface, or you will see an "Address already in use"
+error. Function ``rte_pmd_memif_remove()``, which removes memif interface,
+will also remove a listener socket, if it is not being used by any other
+interface.
+
+The method to enable one or more interfaces is to use the
+``--vdev=net_memif0`` option on the DPDK application command line. Each
+``--vdev=net_memif1`` option given will create an interface named net_memif0,
+net_memif1, and so on. Memif uses unix domain socket to transmit control
+messages. Each memif has a unique id per socket. This id is used to identify
+peer interface. If you are connecting multiple
+interfaces using same socket, be sure to specify unique ids ``id=0``, ``id=1``,
+etc. Note that if you assign a socket to a master interface it becomes a
+listener socket. Listener socket can not be used by a slave interface on same
+client.
+
+.. csv-table:: **Memif configuration options**
+   :header: "Option", "Description", "Default", "Valid value"
+
+   "id=0", "Used to identify peer interface", "0", "uint32_t"
+   "role=master", "Set memif role", "slave", "master|slave"
+   "bsize=1024", "Size of single packet buffer", "2048", "uint16_t"
+   "rsize=11", "Log2 of ring size. If rsize is 10, actual ring size is 1024", "10", "1-14"
+   "socket=/tmp/memif.sock", "Socket filename", "/tmp/memif.sock", "string len 256"
+   "mac=01:23:45:ab:cd:ef", "Mac address", "01:ab:23:cd:45:ef", ""
+   "secret=abc123", "Secret is an optional security option, which if specified, must be matched by peer", "", "string len 24"
+   "zero-copy=yes", "Enable/disable zero-copy slave mode", "no", "yes|no"
+
+**Connection establishment**
+
+In order to create memif connection, two memif interfaces, each in separate
+process, are needed. One interface in ``master`` role and other in
+``slave`` role. It is not possible to connect two interfaces in a single
+process. Each interface can be connected to one interface at same time,
+identified by matching id parameter.
+
+Memif driver uses unix domain socket to exchange required information between
+memif interfaces. Socket file path is specified at interface creation see
+*Memif configuration options* table above. If socket is used by ``master``
+interface, it's marked as listener socket (in scope of current process) and
+listens to connection requests from other processes. One socket can be used by
+multiple interfaces. One process can have ``slave`` and ``master`` interfaces
+at the same time, provided each role is assigned unique socket.
+
+For detailed information on memif control messages, see: net/memif/memif.h.
+
+Slave interface attempts to make a connection on assigned socket. Process
+listening on this socket will extract the connection request and create a new
+connected socket (control channel). Then it sends the 'hello' message
+(``MEMIF_MSG_TYPE_HELLO``), containing configuration boundaries. Slave interface
+adjusts its configuration accordingly, and sends 'init' message
+(``MEMIF_MSG_TYPE_INIT``). This message among others contains interface id. Driver
+uses this id to find master interface, and assigns the control channel to this
+interface. If such interface is found, 'ack' message (``MEMIF_MSG_TYPE_ACK``) is
+sent. Slave interface sends 'add region' message (``MEMIF_MSG_TYPE_ADD_REGION``) for
+every region allocated. Master responds to each of these messages with 'ack'
+message. Same behavior applies to rings. Slave sends 'add ring' message
+(``MEMIF_MSG_TYPE_ADD_RING``) for every initialized ring. Master again responds to
+each message with 'ack' message. To finalize the connection, slave interface
+sends 'connect' message (``MEMIF_MSG_TYPE_CONNECT``). Upon receiving this message
+master maps regions to its address space, initializes rings and responds with
+'connected' message (``MEMIF_MSG_TYPE_CONNECTED``). Disconnect
+(``MEMIF_MSG_TYPE_DISCONNECT``) can be sent by both master and slave interfaces at
+any time, due to driver error or if the interface is being deleted.
+
+Files
+
+- net/memif/memif.h *- control messages definitions*
+- net/memif/memif_socket.h
+- net/memif/memif_socket.c
+
+Shared memory
+~~~~~~~~~~~~~
+
+**Shared memory format**
+
+Slave is producer and master is consumer. Memory regions, are mapped shared memory files,
+created by memif slave and provided to master at connection establishment.
+Regions contain rings and buffers. Rings and buffers can also be separated into multiple
+regions. For no-zero-copy, rings and buffers are stored inside single memory
+region to reduce the number of opened files.
+
+region n (no-zero-copy):
+
++-----------------------+-------------------------------------------------------------------------+
+| Rings                 | Buffers                                                                 |
++-----------+-----------+-----------------+---+---------------------------------------------------+
+| S2M rings | M2S rings | packet buffer 0 | . | pb ((1 << pmd->run.log2_ring_size)*(s2m + m2s))-1 |
++-----------+-----------+-----------------+---+---------------------------------------------------+
+
+S2M OR M2S Rings:
+
++--------+--------+-----------------------+
+| ring 0 | ring 1 | ring num_s2m_rings - 1|
++--------+--------+-----------------------+
+
+ring 0:
+
++-------------+---------------------------------------+
+| ring header | (1 << pmd->run.log2_ring_size) * desc |
++-------------+---------------------------------------+
+
+Descriptors are assigned packet buffers in order of rings creation. If we have one ring
+in each direction and ring size is 1024, then first 1024 buffers will belong to S2M ring and
+last 1024 will belong to M2S ring. In case of zero-copy, buffers are dequeued and
+enqueued as needed.
+
+**Descriptor format**
+
++----+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+|Quad|6| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |3|3| | | | | | | | | | | | | | |1|1| | | | | | | | | | | | | | | |
+|    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+|Word|3| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |2|1| | | | | | | | | | | | | | |6|5| | | | | | | | | | | | | | |0|
++----+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+|0   |length                                                         |region                         |flags                          |
++----+---------------------------------------------------------------+-------------------------------+-------------------------------+
+|1   |metadata                                                       |offset                                                         |
++----+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+|    |6| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |3|3| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+|    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+|    |3| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |2|1| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |0|
++----+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+**Flags field - flags (Quad Word 0, bits 0:15)**
+
++-----+--------------------+------------------------------------------------------------------------------------------------+
+|Bits |Name                |Functionality                                                                                   |
++=====+====================+================================================================================================+
+|0    |MEMIF_DESC_FLAG_NEXT|Is chained buffer. When set, the packet is divided into multiple buffers. May not be contiguous.|
++-----+--------------------+------------------------------------------------------------------------------------------------+
+
+**Region index - region (Quad Word 0, 16:31)**
+
+Index of memory region, the buffer is located in.
+
+**Data length - length (Quad Word 0, 32:63)**
+
+Length of transmitted/received data.
+
+**Data Offset - offset (Quad Word 1, 0:31)**
+
+Data start offset from memory region address. *.regions[desc->region].addr + desc->offset*
+
+**Metadata - metadata (Quad Word 1, 32:63)**
+
+Buffer metadata.
+
+Files
+
+- net/memif/memif.h *- descriptor and ring definitions*
+- net/memif/rte_eth_memif.c *- eth_memif_rx() eth_memif_tx()*
+
+Example: testpmd
+----------------------------
+In this example we run two instances of testpmd application and transmit packets over memif.
+
+First create ``master`` interface::
+
+    #./build/app/testpmd -l 0-1 --proc-type=primary --file-prefix=pmd1 --vdev=net_memif,role=master -- -i
+
+Now create ``slave`` interface (master must be already running so the slave will connect)::
+
+    #./build/app/testpmd -l 2-3 --proc-type=primary --file-prefix=pmd2 --vdev=net_memif -- -i
+
+Start forwarding packets::
+
+    Slave:
+        testpmd> start
+
+    Master:
+        testpmd> start tx_first
+
+Show status::
+
+    testpmd> show port stats 0
+
+For more details on testpmd please refer to :doc:`../testpmd_app_ug/index`.
+
+Example: testpmd and VPP
+------------------------
+For information on how to get and run VPP please see `<https://wiki.fd.io/view/VPP>`_.
+
+Start VPP in interactive mode (should be by default). Create memif master interface in VPP::
+
+    vpp# create interface memif id 0 master no-zero-copy
+    vpp# set interface state memif0/0 up
+    vpp# set interface ip address memif0/0 192.168.1.1/24
+
+To see socket filename use show memif command::
+
+    vpp# show memif
+    sockets
+     id  listener    filename
+      0   yes (1)     /run/vpp/memif.sock
+    ...
+
+Now create memif interface by running testpmd with these command line options::
+
+    #./testpmd --vdev=net_memif,socket=/run/vpp/memif.sock -- -i
+
+Testpmd should now create memif slave interface and try to connect to master.
+In testpmd set forward option to icmpecho and start forwarding::
+
+    testpmd> set fwd icmpecho
+    testpmd> start
+
+Send ping from VPP::
+
+    vpp# ping 192.168.1.2
+    64 bytes from 192.168.1.2: icmp_seq=2 ttl=254 time=36.2918 ms
+    64 bytes from 192.168.1.2: icmp_seq=3 ttl=254 time=23.3927 ms
+    64 bytes from 192.168.1.2: icmp_seq=4 ttl=254 time=24.2975 ms
+    64 bytes from 192.168.1.2: icmp_seq=5 ttl=254 time=17.7049 ms
index 5139be6..7b36d64 100644 (file)
@@ -62,6 +62,11 @@ New Features
   * Added support for SSE vector mode
   * Updated HWRM API to version 1.10.0.74
 
+* **Added memif PMD.**
+
+  Added the new Shared Memory Packet Interface (``memif``) PMD.
+  See the :doc:`../nics/memif` guide for more details on this new driver.
+
 
 Removed Items
 -------------
index 3a72cf3..78cb10f 100644 (file)
@@ -35,6 +35,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice
 DIRS-$(CONFIG_RTE_LIBRTE_IPN3KE_PMD) += ipn3ke
 DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
 DIRS-$(CONFIG_RTE_LIBRTE_LIO_PMD) += liquidio
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += memif
 DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
 DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
 DIRS-$(CONFIG_RTE_LIBRTE_MVNETA_PMD) += mvneta
diff --git a/drivers/net/memif/Makefile b/drivers/net/memif/Makefile
new file mode 100644 (file)
index 0000000..c311962
--- /dev/null
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_memif.a
+
+EXPORT_MAP := rte_pmd_memif_version.map
+
+LIBABIVER := 1
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+# Experimantal APIs:
+# - rte_intr_callback_unregister_pending
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool
+LDLIBS += -lrte_ethdev -lrte_kvargs
+LDLIBS += -lrte_hash
+LDLIBS += -lrte_bus_vdev
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += rte_eth_memif.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF) += memif_socket.c
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/memif/memif.h b/drivers/net/memif/memif.h
new file mode 100644 (file)
index 0000000..3948b1f
--- /dev/null
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
+ */
+
+#ifndef _MEMIF_H_
+#define _MEMIF_H_
+
+#define MEMIF_COOKIE           0x3E31F20
+#define MEMIF_VERSION_MAJOR    2
+#define MEMIF_VERSION_MINOR    0
+#define MEMIF_VERSION          ((MEMIF_VERSION_MAJOR << 8) | MEMIF_VERSION_MINOR)
+#define MEMIF_NAME_SZ          32
+
+/*
+ * S2M: direction slave -> master
+ * M2S: direction master -> slave
+ */
+
+/*
+ *  Type definitions
+ */
+
+typedef enum memif_msg_type {
+       MEMIF_MSG_TYPE_NONE,
+       MEMIF_MSG_TYPE_ACK,
+       MEMIF_MSG_TYPE_HELLO,
+       MEMIF_MSG_TYPE_INIT,
+       MEMIF_MSG_TYPE_ADD_REGION,
+       MEMIF_MSG_TYPE_ADD_RING,
+       MEMIF_MSG_TYPE_CONNECT,
+       MEMIF_MSG_TYPE_CONNECTED,
+       MEMIF_MSG_TYPE_DISCONNECT,
+} memif_msg_type_t;
+
+typedef enum {
+       MEMIF_RING_S2M, /**< buffer ring in direction slave -> master */
+       MEMIF_RING_M2S, /**< buffer ring in direction master -> slave */
+} memif_ring_type_t;
+
+typedef enum {
+       MEMIF_INTERFACE_MODE_ETHERNET,
+       MEMIF_INTERFACE_MODE_IP,
+       MEMIF_INTERFACE_MODE_PUNT_INJECT,
+} memif_interface_mode_t;
+
+typedef uint16_t memif_region_index_t;
+typedef uint32_t memif_region_offset_t;
+typedef uint64_t memif_region_size_t;
+typedef uint16_t memif_ring_index_t;
+typedef uint32_t memif_interface_id_t;
+typedef uint16_t memif_version_t;
+typedef uint8_t memif_log2_ring_size_t;
+
+/*
+ *  Socket messages
+ */
+
+ /**
+  * M2S
+  * Contains master interfaces configuration.
+  */
+typedef struct __rte_packed {
+       uint8_t name[MEMIF_NAME_SZ]; /**< Client app name. In this case DPDK version */
+       memif_version_t min_version; /**< lowest supported memif version */
+       memif_version_t max_version; /**< highest supported memif version */
+       memif_region_index_t max_region; /**< maximum num of regions */
+       memif_ring_index_t max_m2s_ring; /**< maximum num of M2S ring */
+       memif_ring_index_t max_s2m_ring; /**< maximum num of S2M rings */
+       memif_log2_ring_size_t max_log2_ring_size; /**< maximum ring size (as log2) */
+} memif_msg_hello_t;
+
+/**
+ * S2M
+ * Contains information required to identify interface
+ * to which the slave wants to connect.
+ */
+typedef struct __rte_packed {
+       memif_version_t version;                /**< memif version */
+       memif_interface_id_t id;                /**< interface id */
+       memif_interface_mode_t mode:8;          /**< interface mode */
+       uint8_t secret[24];                     /**< optional security parameter */
+       uint8_t name[MEMIF_NAME_SZ]; /**< Client app name. In this case DPDK version */
+} memif_msg_init_t;
+
+/**
+ * S2M
+ * Request master to add new shared memory region to master interface.
+ * Shared files file descriptor is passed in cmsghdr.
+ */
+typedef struct __rte_packed {
+       memif_region_index_t index;             /**< shm regions index */
+       memif_region_size_t size;               /**< shm region size */
+} memif_msg_add_region_t;
+
+/**
+ * S2M
+ * Request master to add new ring to master interface.
+ */
+typedef struct __rte_packed {
+       uint16_t flags;                         /**< flags */
+#define MEMIF_MSG_ADD_RING_FLAG_S2M 1          /**< ring is in S2M direction */
+       memif_ring_index_t index;               /**< ring index */
+       memif_region_index_t region; /**< region index on which this ring is located */
+       memif_region_offset_t offset;           /**< buffer start offset */
+       memif_log2_ring_size_t log2_ring_size;  /**< ring size (log2) */
+       uint16_t private_hdr_size;              /**< used for private metadata */
+} memif_msg_add_ring_t;
+
+/**
+ * S2M
+ * Finalize connection establishment.
+ */
+typedef struct __rte_packed {
+       uint8_t if_name[MEMIF_NAME_SZ];         /**< slave interface name */
+} memif_msg_connect_t;
+
+/**
+ * M2S
+ * Finalize connection establishment.
+ */
+typedef struct __rte_packed {
+       uint8_t if_name[MEMIF_NAME_SZ];         /**< master interface name */
+} memif_msg_connected_t;
+
+/**
+ * S2M & M2S
+ * Disconnect interfaces.
+ */
+typedef struct __rte_packed {
+       uint32_t code;                          /**< error code */
+       uint8_t string[96];                     /**< disconnect reason */
+} memif_msg_disconnect_t;
+
+typedef struct __rte_packed __rte_aligned(128)
+{
+       memif_msg_type_t type:16;
+       union {
+               memif_msg_hello_t hello;
+               memif_msg_init_t init;
+               memif_msg_add_region_t add_region;
+               memif_msg_add_ring_t add_ring;
+               memif_msg_connect_t connect;
+               memif_msg_connected_t connected;
+               memif_msg_disconnect_t disconnect;
+       };
+} memif_msg_t;
+
+/*
+ *  Ring and Descriptor Layout
+ */
+
+/**
+ * Buffer descriptor.
+ */
+typedef struct __rte_packed {
+       uint16_t flags;                         /**< flags */
+#define MEMIF_DESC_FLAG_NEXT 1                 /**< is chained buffer */
+       memif_region_index_t region; /**< region index on which the buffer is located */
+       uint32_t length;                        /**< buffer length */
+       memif_region_offset_t offset;           /**< buffer offset */
+       uint32_t metadata;
+} memif_desc_t;
+
+#define MEMIF_CACHELINE_ALIGN_MARK(mark) \
+       uint8_t mark[0] __rte_aligned(RTE_CACHE_LINE_SIZE)
+
+typedef struct {
+       MEMIF_CACHELINE_ALIGN_MARK(cacheline0);
+       uint32_t cookie;                        /**< MEMIF_COOKIE */
+       uint16_t flags;                         /**< flags */
+#define MEMIF_RING_FLAG_MASK_INT 1             /**< disable interrupt mode */
+       volatile uint16_t head;                 /**< pointer to ring buffer head */
+       MEMIF_CACHELINE_ALIGN_MARK(cacheline1);
+       volatile uint16_t tail;                 /**< pointer to ring buffer tail */
+       MEMIF_CACHELINE_ALIGN_MARK(cacheline2);
+       memif_desc_t desc[0];                   /**< buffer descriptors */
+} memif_ring_t;
+
+#endif                         /* _MEMIF_H_ */
diff --git a/drivers/net/memif/memif_socket.c b/drivers/net/memif/memif_socket.c
new file mode 100644 (file)
index 0000000..1e046b6
--- /dev/null
@@ -0,0 +1,1124 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
+ */
+
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+
+#include <rte_version.h>
+#include <rte_mbuf.h>
+#include <rte_ether.h>
+#include <rte_ethdev_driver.h>
+#include <rte_ethdev_vdev.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+#include <rte_bus_vdev.h>
+#include <rte_hash.h>
+#include <rte_jhash.h>
+#include <rte_string_fns.h>
+
+#include "rte_eth_memif.h"
+#include "memif_socket.h"
+
+static void memif_intr_handler(void *arg);
+
+static ssize_t
+memif_msg_send(int fd, memif_msg_t *msg, int afd)
+{
+       struct msghdr mh = { 0 };
+       struct iovec iov[1];
+       struct cmsghdr *cmsg;
+       char ctl[CMSG_SPACE(sizeof(int))];
+
+       iov[0].iov_base = msg;
+       iov[0].iov_len = sizeof(memif_msg_t);
+       mh.msg_iov = iov;
+       mh.msg_iovlen = 1;
+
+       if (afd > 0) {
+               memset(&ctl, 0, sizeof(ctl));
+               mh.msg_control = ctl;
+               mh.msg_controllen = sizeof(ctl);
+               cmsg = CMSG_FIRSTHDR(&mh);
+               cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+               cmsg->cmsg_level = SOL_SOCKET;
+               cmsg->cmsg_type = SCM_RIGHTS;
+               rte_memcpy(CMSG_DATA(cmsg), &afd, sizeof(int));
+       }
+
+       return sendmsg(fd, &mh, 0);
+}
+
+static int
+memif_msg_send_from_queue(struct memif_control_channel *cc)
+{
+       ssize_t size;
+       int ret = 0;
+       struct memif_msg_queue_elt *e;
+
+       e = TAILQ_FIRST(&cc->msg_queue);
+       if (e == NULL)
+               return 0;
+
+       size = memif_msg_send(cc->intr_handle.fd, &e->msg, e->fd);
+       if (size != sizeof(memif_msg_t)) {
+               MIF_LOG(ERR, "sendmsg fail: %s.", strerror(errno));
+               ret = -1;
+       } else {
+               MIF_LOG(DEBUG, "Sent msg type %u.", e->msg.type);
+       }
+       TAILQ_REMOVE(&cc->msg_queue, e, next);
+       rte_free(e);
+
+       return ret;
+}
+
+static struct memif_msg_queue_elt *
+memif_msg_enq(struct memif_control_channel *cc)
+{
+       struct memif_msg_queue_elt *e;
+
+       e = rte_zmalloc("memif_msg", sizeof(struct memif_msg_queue_elt), 0);
+       if (e == NULL) {
+               MIF_LOG(ERR, "Failed to allocate control message.");
+               return NULL;
+       }
+
+       e->fd = -1;
+       TAILQ_INSERT_TAIL(&cc->msg_queue, e, next);
+
+       return e;
+}
+
+void
+memif_msg_enq_disconnect(struct memif_control_channel *cc, const char *reason,
+                        int err_code)
+{
+       struct memif_msg_queue_elt *e;
+       struct pmd_internals *pmd;
+       memif_msg_disconnect_t *d;
+
+       if (cc == NULL) {
+               MIF_LOG(DEBUG, "Missing control channel.");
+               return;
+       }
+
+       e = memif_msg_enq(cc);
+       if (e == NULL) {
+               MIF_LOG(WARNING, "Failed to enqueue disconnect message.");
+               return;
+       }
+
+       d = &e->msg.disconnect;
+
+       e->msg.type = MEMIF_MSG_TYPE_DISCONNECT;
+       d->code = err_code;
+
+       if (reason != NULL) {
+               strlcpy((char *)d->string, reason, sizeof(d->string));
+               if (cc->dev != NULL) {
+                       pmd = cc->dev->data->dev_private;
+                       strlcpy(pmd->local_disc_string, reason,
+                               sizeof(pmd->local_disc_string));
+               }
+       }
+}
+
+static int
+memif_msg_enq_hello(struct memif_control_channel *cc)
+{
+       struct memif_msg_queue_elt *e = memif_msg_enq(cc);
+       memif_msg_hello_t *h;
+
+       if (e == NULL)
+               return -1;
+
+       h = &e->msg.hello;
+
+       e->msg.type = MEMIF_MSG_TYPE_HELLO;
+       h->min_version = MEMIF_VERSION;
+       h->max_version = MEMIF_VERSION;
+       h->max_s2m_ring = ETH_MEMIF_MAX_NUM_Q_PAIRS;
+       h->max_m2s_ring = ETH_MEMIF_MAX_NUM_Q_PAIRS;
+       h->max_region = ETH_MEMIF_MAX_REGION_NUM - 1;
+       h->max_log2_ring_size = ETH_MEMIF_MAX_LOG2_RING_SIZE;
+
+       strlcpy((char *)h->name, rte_version(), sizeof(h->name));
+
+       return 0;
+}
+
+static int
+memif_msg_receive_hello(struct rte_eth_dev *dev, memif_msg_t *msg)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       memif_msg_hello_t *h = &msg->hello;
+
+       if (h->min_version > MEMIF_VERSION || h->max_version < MEMIF_VERSION) {
+               memif_msg_enq_disconnect(pmd->cc, "Incompatible memif version", 0);
+               return -1;
+       }
+
+       /* Set parameters for active connection */
+       pmd->run.num_s2m_rings = RTE_MIN(h->max_s2m_ring + 1,
+                                          pmd->cfg.num_s2m_rings);
+       pmd->run.num_m2s_rings = RTE_MIN(h->max_m2s_ring + 1,
+                                          pmd->cfg.num_m2s_rings);
+       pmd->run.log2_ring_size = RTE_MIN(h->max_log2_ring_size,
+                                           pmd->cfg.log2_ring_size);
+       pmd->run.pkt_buffer_size = pmd->cfg.pkt_buffer_size;
+
+       strlcpy(pmd->remote_name, (char *)h->name, sizeof(pmd->remote_name));
+
+       MIF_LOG(DEBUG, "%s: Connecting to %s.",
+               rte_vdev_device_name(pmd->vdev), pmd->remote_name);
+
+       return 0;
+}
+
+static int
+memif_msg_receive_init(struct memif_control_channel *cc, memif_msg_t *msg)
+{
+       memif_msg_init_t *i = &msg->init;
+       struct memif_socket_dev_list_elt *elt;
+       struct pmd_internals *pmd;
+       struct rte_eth_dev *dev;
+
+       if (i->version != MEMIF_VERSION) {
+               memif_msg_enq_disconnect(cc, "Incompatible memif version", 0);
+               return -1;
+       }
+
+       if (cc->socket == NULL) {
+               memif_msg_enq_disconnect(cc, "Device error", 0);
+               return -1;
+       }
+
+       /* Find device with requested ID */
+       TAILQ_FOREACH(elt, &cc->socket->dev_queue, next) {
+               dev = elt->dev;
+               pmd = dev->data->dev_private;
+               if (((pmd->flags & ETH_MEMIF_FLAG_DISABLED) == 0) &&
+                   pmd->id == i->id) {
+                       /* assign control channel to device */
+                       cc->dev = dev;
+                       pmd->cc = cc;
+
+                       if (i->mode != MEMIF_INTERFACE_MODE_ETHERNET) {
+                               memif_msg_enq_disconnect(pmd->cc,
+                                                        "Only ethernet mode supported",
+                                                        0);
+                               return -1;
+                       }
+
+                       if (pmd->flags & (ETH_MEMIF_FLAG_CONNECTING |
+                                          ETH_MEMIF_FLAG_CONNECTED)) {
+                               memif_msg_enq_disconnect(pmd->cc,
+                                                        "Already connected", 0);
+                               return -1;
+                       }
+                       strlcpy(pmd->remote_name, (char *)i->name,
+                               sizeof(pmd->remote_name));
+
+                       if (*pmd->secret != '\0') {
+                               if (*i->secret == '\0') {
+                                       memif_msg_enq_disconnect(pmd->cc,
+                                                                "Secret required", 0);
+                                       return -1;
+                               }
+                               if (strncmp(pmd->secret, (char *)i->secret,
+                                               ETH_MEMIF_SECRET_SIZE) != 0) {
+                                       memif_msg_enq_disconnect(pmd->cc,
+                                                                "Incorrect secret", 0);
+                                       return -1;
+                               }
+                       }
+
+                       pmd->flags |= ETH_MEMIF_FLAG_CONNECTING;
+                       return 0;
+               }
+       }
+
+       /* ID not found on this socket */
+       MIF_LOG(DEBUG, "ID %u not found.", i->id);
+       memif_msg_enq_disconnect(cc, "ID not found", 0);
+       return -1;
+}
+
+static int
+memif_msg_receive_add_region(struct rte_eth_dev *dev, memif_msg_t *msg,
+                            int fd)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       memif_msg_add_region_t *ar = &msg->add_region;
+       struct memif_region *r;
+
+       if (fd < 0) {
+               memif_msg_enq_disconnect(pmd->cc, "Missing region fd", 0);
+               return -1;
+       }
+
+       if (ar->index >= ETH_MEMIF_MAX_REGION_NUM || ar->index != pmd->regions_num ||
+                       pmd->regions[ar->index] != NULL) {
+               memif_msg_enq_disconnect(pmd->cc, "Invalid region index", 0);
+               return -1;
+       }
+
+       r = rte_zmalloc("region", sizeof(struct memif_region), 0);
+       if (r == NULL) {
+               MIF_LOG(ERR, "%s: Failed to alloc memif region.",
+                       rte_vdev_device_name(pmd->vdev));
+               return -ENOMEM;
+       }
+
+       r->fd = fd;
+       r->region_size = ar->size;
+       r->addr = NULL;
+
+       pmd->regions[ar->index] = r;
+       pmd->regions_num++;
+
+       return 0;
+}
+
+static int
+memif_msg_receive_add_ring(struct rte_eth_dev *dev, memif_msg_t *msg, int fd)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       memif_msg_add_ring_t *ar = &msg->add_ring;
+       struct memif_queue *mq;
+
+       if (fd < 0) {
+               memif_msg_enq_disconnect(pmd->cc, "Missing interrupt fd", 0);
+               return -1;
+       }
+
+       /* check if we have enough queues */
+       if (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) {
+               if (ar->index >= pmd->cfg.num_s2m_rings) {
+                       memif_msg_enq_disconnect(pmd->cc, "Invalid ring index", 0);
+                       return -1;
+               }
+               pmd->run.num_s2m_rings++;
+       } else {
+               if (ar->index >= pmd->cfg.num_m2s_rings) {
+                       memif_msg_enq_disconnect(pmd->cc, "Invalid ring index", 0);
+                       return -1;
+               }
+               pmd->run.num_m2s_rings++;
+       }
+
+       mq = (ar->flags & MEMIF_MSG_ADD_RING_FLAG_S2M) ?
+           dev->data->rx_queues[ar->index] : dev->data->tx_queues[ar->index];
+
+       mq->intr_handle.fd = fd;
+       mq->log2_ring_size = ar->log2_ring_size;
+       mq->region = ar->region;
+       mq->ring_offset = ar->offset;
+
+       return 0;
+}
+
+static int
+memif_msg_receive_connect(struct rte_eth_dev *dev, memif_msg_t *msg)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       memif_msg_connect_t *c = &msg->connect;
+       int ret;
+
+       ret = memif_connect(dev);
+       if (ret < 0)
+               return ret;
+
+       strlcpy(pmd->remote_if_name, (char *)c->if_name,
+               sizeof(pmd->remote_if_name));
+       MIF_LOG(INFO, "%s: Remote interface %s connected.",
+               rte_vdev_device_name(pmd->vdev), pmd->remote_if_name);
+
+       return 0;
+}
+
+static int
+memif_msg_receive_connected(struct rte_eth_dev *dev, memif_msg_t *msg)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       memif_msg_connected_t *c = &msg->connected;
+       int ret;
+
+       ret = memif_connect(dev);
+       if (ret < 0)
+               return ret;
+
+       strlcpy(pmd->remote_if_name, (char *)c->if_name,
+               sizeof(pmd->remote_if_name));
+       MIF_LOG(INFO, "%s: Remote interface %s connected.",
+               rte_vdev_device_name(pmd->vdev), pmd->remote_if_name);
+
+       return 0;
+}
+
+static int
+memif_msg_receive_disconnect(struct rte_eth_dev *dev, memif_msg_t *msg)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       memif_msg_disconnect_t *d = &msg->disconnect;
+
+       memset(pmd->remote_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE);
+       strlcpy(pmd->remote_disc_string, (char *)d->string,
+               sizeof(pmd->remote_disc_string));
+
+       MIF_LOG(INFO, "%s: Disconnect received: %s",
+               rte_vdev_device_name(pmd->vdev), pmd->remote_disc_string);
+
+       memset(pmd->local_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE);
+       memif_disconnect(rte_eth_dev_allocated
+                        (rte_vdev_device_name(pmd->vdev)));
+       return 0;
+}
+
+static int
+memif_msg_enq_ack(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+       if (e == NULL)
+               return -1;
+
+       e->msg.type = MEMIF_MSG_TYPE_ACK;
+
+       return 0;
+}
+
+static int
+memif_msg_enq_init(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+       memif_msg_init_t *i = &e->msg.init;
+
+       if (e == NULL)
+               return -1;
+
+       i = &e->msg.init;
+       e->msg.type = MEMIF_MSG_TYPE_INIT;
+       i->version = MEMIF_VERSION;
+       i->id = pmd->id;
+       i->mode = MEMIF_INTERFACE_MODE_ETHERNET;
+
+       strlcpy((char *)i->name, rte_version(), sizeof(i->name));
+
+       if (*pmd->secret != '\0')
+               strlcpy((char *)i->secret, pmd->secret, sizeof(i->secret));
+
+       return 0;
+}
+
+static int
+memif_msg_enq_add_region(struct rte_eth_dev *dev, uint8_t idx)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+       memif_msg_add_region_t *ar;
+       struct memif_region *mr = pmd->regions[idx];
+
+       if (e == NULL)
+               return -1;
+
+       ar = &e->msg.add_region;
+       e->msg.type = MEMIF_MSG_TYPE_ADD_REGION;
+       e->fd = mr->fd;
+       ar->index = idx;
+       ar->size = mr->region_size;
+
+       return 0;
+}
+
+static int
+memif_msg_enq_add_ring(struct rte_eth_dev *dev, uint8_t idx,
+                      memif_ring_type_t type)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+       struct memif_queue *mq;
+       memif_msg_add_ring_t *ar;
+
+       if (e == NULL)
+               return -1;
+
+       ar = &e->msg.add_ring;
+       mq = (type == MEMIF_RING_S2M) ? dev->data->tx_queues[idx] :
+           dev->data->rx_queues[idx];
+
+       e->msg.type = MEMIF_MSG_TYPE_ADD_RING;
+       e->fd = mq->intr_handle.fd;
+       ar->index = idx;
+       ar->offset = mq->ring_offset;
+       ar->region = mq->region;
+       ar->log2_ring_size = mq->log2_ring_size;
+       ar->flags = (type == MEMIF_RING_S2M) ? MEMIF_MSG_ADD_RING_FLAG_S2M : 0;
+       ar->private_hdr_size = 0;
+
+       return 0;
+}
+
+static int
+memif_msg_enq_connect(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+       const char *name = rte_vdev_device_name(pmd->vdev);
+       memif_msg_connect_t *c;
+
+       if (e == NULL)
+               return -1;
+
+       c = &e->msg.connect;
+       e->msg.type = MEMIF_MSG_TYPE_CONNECT;
+       strlcpy((char *)c->if_name, name, sizeof(c->if_name));
+
+       return 0;
+}
+
+static int
+memif_msg_enq_connected(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct memif_msg_queue_elt *e = memif_msg_enq(pmd->cc);
+       const char *name = rte_vdev_device_name(pmd->vdev);
+       memif_msg_connected_t *c;
+
+       if (e == NULL)
+               return -1;
+
+       c = &e->msg.connected;
+       e->msg.type = MEMIF_MSG_TYPE_CONNECTED;
+       strlcpy((char *)c->if_name, name, sizeof(c->if_name));
+
+       return 0;
+}
+
+static void
+memif_intr_unregister_handler(struct rte_intr_handle *intr_handle, void *arg)
+{
+       struct memif_msg_queue_elt *elt;
+       struct memif_control_channel *cc = arg;
+
+       /* close control channel fd */
+       close(intr_handle->fd);
+       /* clear message queue */
+       while ((elt = TAILQ_FIRST(&cc->msg_queue)) != NULL) {
+               TAILQ_REMOVE(&cc->msg_queue, elt, next);
+               rte_free(elt);
+       }
+       /* free control channel */
+       rte_free(cc);
+}
+
+void
+memif_disconnect(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct memif_msg_queue_elt *elt, *next;
+       struct memif_queue *mq;
+       struct rte_intr_handle *ih;
+       int i;
+       int ret;
+
+       if (pmd->cc != NULL) {
+               /* Clear control message queue (except disconnect message if any). */
+               for (elt = TAILQ_FIRST(&pmd->cc->msg_queue); elt != NULL; elt = next) {
+                       next = TAILQ_NEXT(elt, next);
+                       if (elt->msg.type != MEMIF_MSG_TYPE_DISCONNECT) {
+                               TAILQ_REMOVE(&pmd->cc->msg_queue, elt, next);
+                               rte_free(elt);
+                       }
+               }
+               /* send disconnect message (if there is any in queue) */
+               memif_msg_send_from_queue(pmd->cc);
+
+               /* at this point, there should be no more messages in queue */
+               if (TAILQ_FIRST(&pmd->cc->msg_queue) != NULL) {
+                       MIF_LOG(WARNING,
+                               "%s: Unexpected message(s) in message queue.",
+                               rte_vdev_device_name(pmd->vdev));
+               }
+
+               ih = &pmd->cc->intr_handle;
+               if (ih->fd > 0) {
+                       ret = rte_intr_callback_unregister(ih,
+                                                       memif_intr_handler,
+                                                       pmd->cc);
+                       /*
+                        * If callback is active (disconnecting based on
+                        * received control message).
+                        */
+                       if (ret == -EAGAIN) {
+                               ret = rte_intr_callback_unregister_pending(ih,
+                                                       memif_intr_handler,
+                                                       pmd->cc,
+                                                       memif_intr_unregister_handler);
+                       } else if (ret > 0) {
+                               close(ih->fd);
+                               rte_free(pmd->cc);
+                       }
+                       pmd->cc = NULL;
+                       if (ret <= 0)
+                               MIF_LOG(WARNING, "%s: Failed to unregister "
+                                       "control channel callback.",
+                                       rte_vdev_device_name(pmd->vdev));
+               }
+       }
+
+       /* unconfig interrupts */
+       for (i = 0; i < pmd->cfg.num_s2m_rings; i++) {
+               if (pmd->role == MEMIF_ROLE_SLAVE) {
+                       if (dev->data->tx_queues != NULL)
+                               mq = dev->data->tx_queues[i];
+                       else
+                               continue;
+               } else {
+                       if (dev->data->rx_queues != NULL)
+                               mq = dev->data->rx_queues[i];
+                       else
+                               continue;
+               }
+               if (mq->intr_handle.fd > 0) {
+                       close(mq->intr_handle.fd);
+                       mq->intr_handle.fd = -1;
+               }
+               mq->ring = NULL;
+       }
+       for (i = 0; i < pmd->cfg.num_m2s_rings; i++) {
+               if (pmd->role == MEMIF_ROLE_MASTER) {
+                       if (dev->data->tx_queues != NULL)
+                               mq = dev->data->tx_queues[i];
+                       else
+                               continue;
+               } else {
+                       if (dev->data->rx_queues != NULL)
+                               mq = dev->data->rx_queues[i];
+                       else
+                               continue;
+               }
+               if (mq->intr_handle.fd > 0) {
+                       close(mq->intr_handle.fd);
+                       mq->intr_handle.fd = -1;
+               }
+               mq->ring = NULL;
+       }
+
+       memif_free_regions(pmd);
+
+       /* reset connection configuration */
+       memset(&pmd->run, 0, sizeof(pmd->run));
+
+       dev->data->dev_link.link_status = ETH_LINK_DOWN;
+       pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
+       pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTED;
+       MIF_LOG(DEBUG, "%s: Disconnected.", rte_vdev_device_name(pmd->vdev));
+}
+
+static int
+memif_msg_receive(struct memif_control_channel *cc)
+{
+       char ctl[CMSG_SPACE(sizeof(int)) +
+                CMSG_SPACE(sizeof(struct ucred))] = { 0 };
+       struct msghdr mh = { 0 };
+       struct iovec iov[1];
+       memif_msg_t msg = { 0 };
+       ssize_t size;
+       int ret = 0;
+       struct ucred *cr __rte_unused;
+       cr = 0;
+       struct cmsghdr *cmsg;
+       int afd = -1;
+       int i;
+       struct pmd_internals *pmd;
+
+       iov[0].iov_base = (void *)&msg;
+       iov[0].iov_len = sizeof(memif_msg_t);
+       mh.msg_iov = iov;
+       mh.msg_iovlen = 1;
+       mh.msg_control = ctl;
+       mh.msg_controllen = sizeof(ctl);
+
+       size = recvmsg(cc->intr_handle.fd, &mh, 0);
+       if (size != sizeof(memif_msg_t)) {
+               MIF_LOG(DEBUG, "Invalid message size.");
+               memif_msg_enq_disconnect(cc, "Invalid message size", 0);
+               return -1;
+       }
+       MIF_LOG(DEBUG, "Received msg type: %u.", msg.type);
+
+       cmsg = CMSG_FIRSTHDR(&mh);
+       while (cmsg) {
+               if (cmsg->cmsg_level == SOL_SOCKET) {
+                       if (cmsg->cmsg_type == SCM_CREDENTIALS)
+                               cr = (struct ucred *)CMSG_DATA(cmsg);
+                       else if (cmsg->cmsg_type == SCM_RIGHTS)
+                               memcpy(&afd, CMSG_DATA(cmsg), sizeof(int));
+               }
+               cmsg = CMSG_NXTHDR(&mh, cmsg);
+       }
+
+       if (cc->dev == NULL && msg.type != MEMIF_MSG_TYPE_INIT) {
+               MIF_LOG(DEBUG, "Unexpected message.");
+               memif_msg_enq_disconnect(cc, "Unexpected message", 0);
+               return -1;
+       }
+
+       /* get device from hash data */
+       switch (msg.type) {
+       case MEMIF_MSG_TYPE_ACK:
+               break;
+       case MEMIF_MSG_TYPE_HELLO:
+               ret = memif_msg_receive_hello(cc->dev, &msg);
+               if (ret < 0)
+                       goto exit;
+               ret = memif_init_regions_and_queues(cc->dev);
+               if (ret < 0)
+                       goto exit;
+               ret = memif_msg_enq_init(cc->dev);
+               if (ret < 0)
+                       goto exit;
+               pmd = cc->dev->data->dev_private;
+               for (i = 0; i < pmd->regions_num; i++) {
+                       ret = memif_msg_enq_add_region(cc->dev, i);
+                       if (ret < 0)
+                               goto exit;
+               }
+               for (i = 0; i < pmd->run.num_s2m_rings; i++) {
+                       ret = memif_msg_enq_add_ring(cc->dev, i,
+                                                    MEMIF_RING_S2M);
+                       if (ret < 0)
+                               goto exit;
+               }
+               for (i = 0; i < pmd->run.num_m2s_rings; i++) {
+                       ret = memif_msg_enq_add_ring(cc->dev, i,
+                                                    MEMIF_RING_M2S);
+                       if (ret < 0)
+                               goto exit;
+               }
+               ret = memif_msg_enq_connect(cc->dev);
+               if (ret < 0)
+                       goto exit;
+               break;
+       case MEMIF_MSG_TYPE_INIT:
+               /*
+                * This cc does not have an interface asociated with it.
+                * If suitable interface is found it will be assigned here.
+                */
+               ret = memif_msg_receive_init(cc, &msg);
+               if (ret < 0)
+                       goto exit;
+               ret = memif_msg_enq_ack(cc->dev);
+               if (ret < 0)
+                       goto exit;
+               break;
+       case MEMIF_MSG_TYPE_ADD_REGION:
+               ret = memif_msg_receive_add_region(cc->dev, &msg, afd);
+               if (ret < 0)
+                       goto exit;
+               ret = memif_msg_enq_ack(cc->dev);
+               if (ret < 0)
+                       goto exit;
+               break;
+       case MEMIF_MSG_TYPE_ADD_RING:
+               ret = memif_msg_receive_add_ring(cc->dev, &msg, afd);
+               if (ret < 0)
+                       goto exit;
+               ret = memif_msg_enq_ack(cc->dev);
+               if (ret < 0)
+                       goto exit;
+               break;
+       case MEMIF_MSG_TYPE_CONNECT:
+               ret = memif_msg_receive_connect(cc->dev, &msg);
+               if (ret < 0)
+                       goto exit;
+               ret = memif_msg_enq_connected(cc->dev);
+               if (ret < 0)
+                       goto exit;
+               break;
+       case MEMIF_MSG_TYPE_CONNECTED:
+               ret = memif_msg_receive_connected(cc->dev, &msg);
+               break;
+       case MEMIF_MSG_TYPE_DISCONNECT:
+               ret = memif_msg_receive_disconnect(cc->dev, &msg);
+               if (ret < 0)
+                       goto exit;
+               break;
+       default:
+               memif_msg_enq_disconnect(cc, "Unknown message type", 0);
+               ret = -1;
+               goto exit;
+       }
+
+ exit:
+       return ret;
+}
+
+static void
+memif_intr_handler(void *arg)
+{
+       struct memif_control_channel *cc = arg;
+       int ret;
+
+       ret = memif_msg_receive(cc);
+       /* if driver failed to assign device */
+       if (cc->dev == NULL) {
+               ret = rte_intr_callback_unregister_pending(&cc->intr_handle,
+                                                          memif_intr_handler,
+                                                          cc,
+                                                          memif_intr_unregister_handler);
+               if (ret < 0)
+                       MIF_LOG(WARNING,
+                               "Failed to unregister control channel callback.");
+               return;
+       }
+       /* if memif_msg_receive failed */
+       if (ret < 0)
+               goto disconnect;
+
+       ret = memif_msg_send_from_queue(cc);
+       if (ret < 0)
+               goto disconnect;
+
+       return;
+
+ disconnect:
+       if (cc->dev == NULL) {
+               MIF_LOG(WARNING, "eth dev not allocated");
+               return;
+       }
+       memif_disconnect(cc->dev);
+}
+
+static void
+memif_listener_handler(void *arg)
+{
+       struct memif_socket *socket = arg;
+       int sockfd;
+       int addr_len;
+       struct sockaddr_un client;
+       struct memif_control_channel *cc;
+       int ret;
+
+       addr_len = sizeof(client);
+       sockfd = accept(socket->intr_handle.fd, (struct sockaddr *)&client,
+                       (socklen_t *)&addr_len);
+       if (sockfd < 0) {
+               MIF_LOG(ERR,
+                       "Failed to accept connection request on socket fd %d",
+                       socket->intr_handle.fd);
+               return;
+       }
+
+       MIF_LOG(DEBUG, "%s: Connection request accepted.", socket->filename);
+
+       cc = rte_zmalloc("memif-cc", sizeof(struct memif_control_channel), 0);
+       if (cc == NULL) {
+               MIF_LOG(ERR, "Failed to allocate control channel.");
+               goto error;
+       }
+
+       cc->intr_handle.fd = sockfd;
+       cc->intr_handle.type = RTE_INTR_HANDLE_EXT;
+       cc->socket = socket;
+       cc->dev = NULL;
+       TAILQ_INIT(&cc->msg_queue);
+
+       ret = rte_intr_callback_register(&cc->intr_handle, memif_intr_handler, cc);
+       if (ret < 0) {
+               MIF_LOG(ERR, "Failed to register control channel callback.");
+               goto error;
+       }
+
+       ret = memif_msg_enq_hello(cc);
+       if (ret < 0) {
+               MIF_LOG(ERR, "Failed to enqueue hello message.");
+               goto error;
+       }
+       ret = memif_msg_send_from_queue(cc);
+       if (ret < 0)
+               goto error;
+
+       return;
+
+ error:
+       if (sockfd > 0) {
+               close(sockfd);
+               sockfd = -1;
+       }
+       if (cc != NULL)
+               rte_free(cc);
+}
+
+static struct memif_socket *
+memif_socket_create(struct pmd_internals *pmd, char *key, uint8_t listener)
+{
+       struct memif_socket *sock;
+       struct sockaddr_un un;
+       int sockfd;
+       int ret;
+       int on = 1;
+
+       sock = rte_zmalloc("memif-socket", sizeof(struct memif_socket), 0);
+       if (sock == NULL) {
+               MIF_LOG(ERR, "Failed to allocate memory for memif socket");
+               return NULL;
+       }
+
+       sock->listener = listener;
+       rte_memcpy(sock->filename, key, 256);
+       TAILQ_INIT(&sock->dev_queue);
+
+       if (listener != 0) {
+               sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+               if (sockfd < 0)
+                       goto error;
+
+               un.sun_family = AF_UNIX;
+               memcpy(un.sun_path, sock->filename,
+                       sizeof(un.sun_path) - 1);
+
+               ret = setsockopt(sockfd, SOL_SOCKET, SO_PASSCRED, &on,
+                                sizeof(on));
+               if (ret < 0)
+                       goto error;
+               ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
+               if (ret < 0)
+                       goto error;
+               ret = listen(sockfd, 1);
+               if (ret < 0)
+                       goto error;
+
+               MIF_LOG(DEBUG, "%s: Memif listener socket %s created.",
+                       rte_vdev_device_name(pmd->vdev), sock->filename);
+
+               sock->intr_handle.fd = sockfd;
+               sock->intr_handle.type = RTE_INTR_HANDLE_EXT;
+               ret = rte_intr_callback_register(&sock->intr_handle,
+                                                memif_listener_handler, sock);
+               if (ret < 0) {
+                       MIF_LOG(ERR, "%s: Failed to register interrupt "
+                               "callback for listener socket",
+                               rte_vdev_device_name(pmd->vdev));
+                       return NULL;
+               }
+       }
+
+       return sock;
+
+ error:
+       MIF_LOG(ERR, "%s: Failed to setup socket %s: %s",
+               rte_vdev_device_name(pmd->vdev), key, strerror(errno));
+       if (sock != NULL)
+               rte_free(sock);
+       return NULL;
+}
+
+static struct rte_hash *
+memif_create_socket_hash(void)
+{
+       struct rte_hash_parameters params = { 0 };
+       params.name = MEMIF_SOCKET_HASH_NAME;
+       params.entries = 256;
+       params.key_len = 256;
+       params.hash_func = rte_jhash;
+       params.hash_func_init_val = 0;
+       return rte_hash_create(&params);
+}
+
+int
+memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct memif_socket *socket = NULL;
+       struct memif_socket_dev_list_elt *elt;
+       struct pmd_internals *tmp_pmd;
+       struct rte_hash *hash;
+       int ret;
+       char key[256];
+
+       hash = rte_hash_find_existing(MEMIF_SOCKET_HASH_NAME);
+       if (hash == NULL) {
+               hash = memif_create_socket_hash();
+               if (hash == NULL) {
+                       MIF_LOG(ERR, "Failed to create memif socket hash.");
+                       return -1;
+               }
+       }
+
+       memset(key, 0, 256);
+       rte_memcpy(key, socket_filename, strlen(socket_filename));
+       ret = rte_hash_lookup_data(hash, key, (void **)&socket);
+       if (ret < 0) {
+               socket = memif_socket_create(pmd, key,
+                                            (pmd->role ==
+                                             MEMIF_ROLE_SLAVE) ? 0 : 1);
+               if (socket == NULL)
+                       return -1;
+               ret = rte_hash_add_key_data(hash, key, socket);
+               if (ret < 0) {
+                       MIF_LOG(ERR, "Failed to add socket to socket hash.");
+                       return ret;
+               }
+       }
+       pmd->socket_filename = socket->filename;
+
+       if (socket->listener != 0 && pmd->role == MEMIF_ROLE_SLAVE) {
+               MIF_LOG(ERR, "Socket is a listener.");
+               return -1;
+       } else if ((socket->listener == 0) && (pmd->role == MEMIF_ROLE_MASTER)) {
+               MIF_LOG(ERR, "Socket is not a listener.");
+               return -1;
+       }
+
+       TAILQ_FOREACH(elt, &socket->dev_queue, next) {
+               tmp_pmd = elt->dev->data->dev_private;
+               if (tmp_pmd->id == pmd->id) {
+                       MIF_LOG(ERR, "Memif device with id %d already "
+                               "exists on socket %s",
+                               pmd->id, socket->filename);
+                       return -1;
+               }
+       }
+
+       elt = rte_malloc("pmd-queue", sizeof(struct memif_socket_dev_list_elt), 0);
+       if (elt == NULL) {
+               MIF_LOG(ERR, "%s: Failed to add device to socket device list.",
+                       rte_vdev_device_name(pmd->vdev));
+               return -1;
+       }
+       elt->dev = dev;
+       TAILQ_INSERT_TAIL(&socket->dev_queue, elt, next);
+
+       return 0;
+}
+
+void
+memif_socket_remove_device(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct memif_socket *socket = NULL;
+       struct memif_socket_dev_list_elt *elt, *next;
+       struct rte_hash *hash;
+
+       hash = rte_hash_find_existing(MEMIF_SOCKET_HASH_NAME);
+       if (hash == NULL)
+               return;
+
+       if (pmd->socket_filename == NULL)
+               return;
+
+       if (rte_hash_lookup_data(hash, pmd->socket_filename, (void **)&socket) < 0)
+               return;
+
+       for (elt = TAILQ_FIRST(&socket->dev_queue); elt != NULL; elt = next) {
+               next = TAILQ_NEXT(elt, next);
+               if (elt->dev == dev) {
+                       TAILQ_REMOVE(&socket->dev_queue, elt, next);
+                       rte_free(elt);
+                       pmd->socket_filename = NULL;
+               }
+       }
+
+       /* remove socket, if this was the last device using it */
+       if (TAILQ_EMPTY(&socket->dev_queue)) {
+               rte_hash_del_key(hash, socket->filename);
+               if (socket->listener) {
+                       /* remove listener socket file,
+                        * so we can create new one later.
+                        */
+                       remove(socket->filename);
+               }
+               rte_free(socket);
+       }
+}
+
+int
+memif_connect_master(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+
+       memset(pmd->local_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE);
+       memset(pmd->remote_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE);
+       pmd->flags &= ~ETH_MEMIF_FLAG_DISABLED;
+       return 0;
+}
+
+int
+memif_connect_slave(struct rte_eth_dev *dev)
+{
+       int sockfd;
+       int ret;
+       struct sockaddr_un sun;
+       struct pmd_internals *pmd = dev->data->dev_private;
+
+       memset(pmd->local_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE);
+       memset(pmd->remote_disc_string, 0, ETH_MEMIF_DISC_STRING_SIZE);
+       pmd->flags &= ~ETH_MEMIF_FLAG_DISABLED;
+
+       sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+       if (sockfd < 0) {
+               MIF_LOG(ERR, "%s: Failed to open socket.",
+                       rte_vdev_device_name(pmd->vdev));
+               return -1;
+       }
+
+       sun.sun_family = AF_UNIX;
+
+       memcpy(sun.sun_path, pmd->socket_filename, sizeof(sun.sun_path) - 1);
+
+       ret = connect(sockfd, (struct sockaddr *)&sun,
+                     sizeof(struct sockaddr_un));
+       if (ret < 0) {
+               MIF_LOG(ERR, "%s: Failed to connect socket: %s.",
+                       rte_vdev_device_name(pmd->vdev), pmd->socket_filename);
+               goto error;
+       }
+
+       MIF_LOG(DEBUG, "%s: Memif socket: %s connected.",
+               rte_vdev_device_name(pmd->vdev), pmd->socket_filename);
+
+       pmd->cc = rte_zmalloc("memif-cc",
+                             sizeof(struct memif_control_channel), 0);
+       if (pmd->cc == NULL) {
+               MIF_LOG(ERR, "%s: Failed to allocate control channel.",
+                       rte_vdev_device_name(pmd->vdev));
+               goto error;
+       }
+
+       pmd->cc->intr_handle.fd = sockfd;
+       pmd->cc->intr_handle.type = RTE_INTR_HANDLE_EXT;
+       pmd->cc->socket = NULL;
+       pmd->cc->dev = dev;
+       TAILQ_INIT(&pmd->cc->msg_queue);
+
+       ret = rte_intr_callback_register(&pmd->cc->intr_handle,
+                                        memif_intr_handler, pmd->cc);
+       if (ret < 0) {
+               MIF_LOG(ERR, "%s: Failed to register interrupt callback "
+                       "for control fd", rte_vdev_device_name(pmd->vdev));
+               goto error;
+       }
+
+       return 0;
+
+ error:
+       if (sockfd > 0) {
+               close(sockfd);
+               sockfd = -1;
+       }
+       if (pmd->cc != NULL) {
+               rte_free(pmd->cc);
+               pmd->cc = NULL;
+       }
+       return -1;
+}
diff --git a/drivers/net/memif/memif_socket.h b/drivers/net/memif/memif_socket.h
new file mode 100644 (file)
index 0000000..db293e2
--- /dev/null
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
+ */
+
+#ifndef _MEMIF_SOCKET_H_
+#define _MEMIF_SOCKET_H_
+
+#include <sys/queue.h>
+
+/**
+ * Remove device from socket device list. If no device is left on the socket,
+ * remove the socket as well.
+ *
+ * @param dev
+ *   memif device
+ */
+void memif_socket_remove_device(struct rte_eth_dev *dev);
+
+/**
+ * Enqueue disconnect message to control channel message queue.
+ *
+ * @param cc
+ *   control channel
+ * @param reason
+ *   const string stating disconnect reason (96 characters)
+ * @param err_code
+ *   error code
+ */
+void memif_msg_enq_disconnect(struct memif_control_channel *cc, const char *reason,
+                             int err_code);
+
+/**
+ * Initialize memif socket for specified device. If socket doesn't exist, create socket.
+ *
+ * @param dev
+ *   memif device
+ * @param socket_filename
+ *   socket filename
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int memif_socket_init(struct rte_eth_dev *dev, const char *socket_filename);
+
+/**
+ * Disconnect memif device. Close control channel and shared memory.
+ *
+ * @param dev
+ *   memif device
+ */
+void memif_disconnect(struct rte_eth_dev *dev);
+
+/**
+ * If device is properly configured, enable connection establishment.
+ *
+ * @param dev
+ *   memif device
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int memif_connect_master(struct rte_eth_dev *dev);
+
+/**
+ * If device is properly configured, send connection request.
+ *
+ * @param dev
+ *   memif device
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int memif_connect_slave(struct rte_eth_dev *dev);
+
+struct memif_socket_dev_list_elt {
+       TAILQ_ENTRY(memif_socket_dev_list_elt) next;
+       struct rte_eth_dev *dev;                /**< pointer to device internals */
+       char dev_name[RTE_ETH_NAME_MAX_LEN];
+};
+
+#define MEMIF_SOCKET_HASH_NAME                 "memif-sh"
+struct memif_socket {
+       struct rte_intr_handle intr_handle;     /**< interrupt handle */
+       char filename[256];                     /**< socket filename */
+
+       TAILQ_HEAD(, memif_socket_dev_list_elt) dev_queue;
+       /**< Queue of devices using this socket */
+       uint8_t listener;                       /**< if not zero socket is listener */
+};
+
+/* Control message queue. */
+struct memif_msg_queue_elt {
+       memif_msg_t msg;                        /**< control message */
+       TAILQ_ENTRY(memif_msg_queue_elt) next;
+       int fd;                                 /**< fd to be sent to peer */
+};
+
+struct memif_control_channel {
+       struct rte_intr_handle intr_handle;     /**< interrupt handle */
+       TAILQ_HEAD(, memif_msg_queue_elt) msg_queue; /**< control message queue */
+       struct memif_socket *socket;            /**< pointer to socket */
+       struct rte_eth_dev *dev;                /**< pointer to device */
+};
+
+#endif                         /* MEMIF_SOCKET_H */
diff --git a/drivers/net/memif/meson.build b/drivers/net/memif/meson.build
new file mode 100644 (file)
index 0000000..287a30e
--- /dev/null
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
+
+if host_machine.system() != 'linux'
+        build = false
+endif
+
+sources = files('rte_eth_memif.c',
+               'memif_socket.c')
+
+allow_experimental_apis = true
+# Experimantal APIs:
+# - rte_intr_callback_unregister_pending
+
+deps += ['hash']
diff --git a/drivers/net/memif/rte_eth_memif.c b/drivers/net/memif/rte_eth_memif.c
new file mode 100644 (file)
index 0000000..b9f05a6
--- /dev/null
@@ -0,0 +1,1204 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
+ */
+
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <linux/if_ether.h>
+#include <errno.h>
+#include <sys/eventfd.h>
+
+#include <rte_version.h>
+#include <rte_mbuf.h>
+#include <rte_ether.h>
+#include <rte_ethdev_driver.h>
+#include <rte_ethdev_vdev.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+#include <rte_bus_vdev.h>
+#include <rte_string_fns.h>
+
+#include "rte_eth_memif.h"
+#include "memif_socket.h"
+
+#define ETH_MEMIF_ID_ARG               "id"
+#define ETH_MEMIF_ROLE_ARG             "role"
+#define ETH_MEMIF_PKT_BUFFER_SIZE_ARG  "bsize"
+#define ETH_MEMIF_RING_SIZE_ARG                "rsize"
+#define ETH_MEMIF_SOCKET_ARG           "socket"
+#define ETH_MEMIF_MAC_ARG              "mac"
+#define ETH_MEMIF_ZC_ARG               "zero-copy"
+#define ETH_MEMIF_SECRET_ARG           "secret"
+
+static const char * const valid_arguments[] = {
+       ETH_MEMIF_ID_ARG,
+       ETH_MEMIF_ROLE_ARG,
+       ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
+       ETH_MEMIF_RING_SIZE_ARG,
+       ETH_MEMIF_SOCKET_ARG,
+       ETH_MEMIF_MAC_ARG,
+       ETH_MEMIF_ZC_ARG,
+       ETH_MEMIF_SECRET_ARG,
+       NULL
+};
+
+const char *
+memif_version(void)
+{
+       return ("memif-" RTE_STR(MEMIF_VERSION_MAJOR) "." RTE_STR(MEMIF_VERSION_MINOR));
+}
+
+static void
+memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *dev_info)
+{
+       dev_info->max_mac_addrs = 1;
+       dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN;
+       dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
+       dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
+       dev_info->min_rx_bufsize = 0;
+}
+
+static memif_ring_t *
+memif_get_ring(struct pmd_internals *pmd, memif_ring_type_t type, uint16_t ring_num)
+{
+       /* rings only in region 0 */
+       void *p = pmd->regions[0]->addr;
+       int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) *
+           (1 << pmd->run.log2_ring_size);
+
+       p = (uint8_t *)p + (ring_num + type * pmd->run.num_s2m_rings) * ring_size;
+
+       return (memif_ring_t *)p;
+}
+
+static void *
+memif_get_buffer(struct pmd_internals *pmd, memif_desc_t *d)
+{
+       return ((uint8_t *)pmd->regions[d->region]->addr + d->offset);
+}
+
+static int
+memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail,
+                   struct rte_mbuf *tail)
+{
+       /* Check for number-of-segments-overflow */
+       if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS))
+               return -EOVERFLOW;
+
+       /* Chain 'tail' onto the old tail */
+       cur_tail->next = tail;
+
+       /* accumulate number of segments and total length. */
+       head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs);
+
+       tail->pkt_len = tail->data_len;
+       head->pkt_len += tail->pkt_len;
+
+       return 0;
+}
+
+static uint16_t
+eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+       struct memif_queue *mq = queue;
+       struct pmd_internals *pmd = mq->pmd;
+       memif_ring_t *ring = mq->ring;
+       uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
+       uint16_t n_rx_pkts = 0;
+       uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
+               RTE_PKTMBUF_HEADROOM;
+       uint16_t src_len, src_off, dst_len, dst_off, cp_len;
+       memif_ring_type_t type = mq->type;
+       memif_desc_t *d0;
+       struct rte_mbuf *mbuf, *mbuf_head, *mbuf_tail;
+       uint64_t b;
+       ssize_t size __rte_unused;
+       uint16_t head;
+       int ret;
+
+       if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
+               return 0;
+       if (unlikely(ring == NULL))
+               return 0;
+
+       /* consume interrupt */
+       if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0)
+               size = read(mq->intr_handle.fd, &b, sizeof(b));
+
+       ring_size = 1 << mq->log2_ring_size;
+       mask = ring_size - 1;
+
+       cur_slot = (type == MEMIF_RING_S2M) ? mq->last_head : mq->last_tail;
+       last_slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail;
+       if (cur_slot == last_slot)
+               goto refill;
+       n_slots = last_slot - cur_slot;
+
+       while (n_slots && n_rx_pkts < nb_pkts) {
+               mbuf_head = rte_pktmbuf_alloc(mq->mempool);
+               if (unlikely(mbuf_head == NULL))
+                       goto no_free_bufs;
+               mbuf = mbuf_head;
+               mbuf->port = mq->in_port;
+
+next_slot:
+               s0 = cur_slot & mask;
+               d0 = &ring->desc[s0];
+
+               src_len = d0->length;
+               dst_off = 0;
+               src_off = 0;
+
+               do {
+                       dst_len = mbuf_size - dst_off;
+                       if (dst_len == 0) {
+                               dst_off = 0;
+                               dst_len = mbuf_size;
+
+                               /* store pointer to tail */
+                               mbuf_tail = mbuf;
+                               mbuf = rte_pktmbuf_alloc(mq->mempool);
+                               if (unlikely(mbuf == NULL))
+                                       goto no_free_bufs;
+                               mbuf->port = mq->in_port;
+                               ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
+                               if (unlikely(ret < 0)) {
+                                       MIF_LOG(ERR, "%s: number-of-segments-overflow",
+                                               rte_vdev_device_name(pmd->vdev));
+                                       rte_pktmbuf_free(mbuf);
+                                       goto no_free_bufs;
+                               }
+                       }
+                       cp_len = RTE_MIN(dst_len, src_len);
+
+                       rte_pktmbuf_data_len(mbuf) += cp_len;
+                       rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
+                       if (mbuf != mbuf_head)
+                               rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
+
+                       memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, dst_off),
+                              (uint8_t *)memif_get_buffer(pmd, d0) + src_off, cp_len);
+
+                       src_off += cp_len;
+                       dst_off += cp_len;
+                       src_len -= cp_len;
+               } while (src_len);
+
+               cur_slot++;
+               n_slots--;
+
+               if (d0->flags & MEMIF_DESC_FLAG_NEXT)
+                       goto next_slot;
+
+               mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
+               *bufs++ = mbuf_head;
+               n_rx_pkts++;
+       }
+
+no_free_bufs:
+       if (type == MEMIF_RING_S2M) {
+               rte_mb();
+               ring->tail = cur_slot;
+               mq->last_head = cur_slot;
+       } else {
+               mq->last_tail = cur_slot;
+       }
+
+refill:
+       if (type == MEMIF_RING_M2S) {
+               head = ring->head;
+               n_slots = ring_size - head + mq->last_tail;
+
+               while (n_slots--) {
+                       s0 = head++ & mask;
+                       d0 = &ring->desc[s0];
+                       d0->length = pmd->run.pkt_buffer_size;
+               }
+               rte_mb();
+               ring->head = head;
+       }
+
+       mq->n_pkts += n_rx_pkts;
+       return n_rx_pkts;
+}
+
+static uint16_t
+eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+       struct memif_queue *mq = queue;
+       struct pmd_internals *pmd = mq->pmd;
+       memif_ring_t *ring = mq->ring;
+       uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
+       uint16_t src_len, src_off, dst_len, dst_off, cp_len;
+       memif_ring_type_t type = mq->type;
+       memif_desc_t *d0;
+       struct rte_mbuf *mbuf;
+       struct rte_mbuf *mbuf_head;
+       uint64_t a;
+       ssize_t size;
+
+       if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
+               return 0;
+       if (unlikely(ring == NULL))
+               return 0;
+
+       ring_size = 1 << mq->log2_ring_size;
+       mask = ring_size - 1;
+
+       n_free = ring->tail - mq->last_tail;
+       mq->last_tail += n_free;
+       slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail;
+
+       if (type == MEMIF_RING_S2M)
+               n_free = ring_size - ring->head + mq->last_tail;
+       else
+               n_free = ring->head - ring->tail;
+
+       while (n_tx_pkts < nb_pkts && n_free) {
+               mbuf_head = *bufs++;
+               mbuf = mbuf_head;
+
+               saved_slot = slot;
+               d0 = &ring->desc[slot & mask];
+               dst_off = 0;
+               dst_len = (type == MEMIF_RING_S2M) ?
+                       pmd->run.pkt_buffer_size : d0->length;
+
+next_in_chain:
+               src_off = 0;
+               src_len = rte_pktmbuf_data_len(mbuf);
+
+               while (src_len) {
+                       if (dst_len == 0) {
+                               if (n_free) {
+                                       slot++;
+                                       n_free--;
+                                       d0->flags |= MEMIF_DESC_FLAG_NEXT;
+                                       d0 = &ring->desc[slot & mask];
+                                       dst_off = 0;
+                                       dst_len = (type == MEMIF_RING_S2M) ?
+                                           pmd->run.pkt_buffer_size : d0->length;
+                                       d0->flags = 0;
+                               } else {
+                                       slot = saved_slot;
+                                       goto no_free_slots;
+                               }
+                       }
+                       cp_len = RTE_MIN(dst_len, src_len);
+
+                       memcpy((uint8_t *)memif_get_buffer(pmd, d0) + dst_off,
+                              rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
+                              cp_len);
+
+                       mq->n_bytes += cp_len;
+                       src_off += cp_len;
+                       dst_off += cp_len;
+                       src_len -= cp_len;
+                       dst_len -= cp_len;
+
+                       d0->length = dst_off;
+               }
+
+               if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
+                       mbuf = mbuf->next;
+                       goto next_in_chain;
+               }
+
+               n_tx_pkts++;
+               slot++;
+               n_free--;
+               rte_pktmbuf_free(mbuf_head);
+       }
+
+no_free_slots:
+       rte_mb();
+       if (type == MEMIF_RING_S2M)
+               ring->head = slot;
+       else
+               ring->tail = slot;
+
+       if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
+               a = 1;
+               size = write(mq->intr_handle.fd, &a, sizeof(a));
+               if (unlikely(size < 0)) {
+                       MIF_LOG(WARNING,
+                               "%s: Failed to send interrupt. %s",
+                               rte_vdev_device_name(pmd->vdev), strerror(errno));
+               }
+       }
+
+       mq->n_err += nb_pkts - n_tx_pkts;
+       mq->n_pkts += n_tx_pkts;
+       return n_tx_pkts;
+}
+
+void
+memif_free_regions(struct pmd_internals *pmd)
+{
+       int i;
+       struct memif_region *r;
+
+       /* regions are allocated contiguously, so it's
+        * enough to loop until 'pmd->regions_num'
+        */
+       for (i = 0; i < pmd->regions_num; i++) {
+               r = pmd->regions[i];
+               if (r != NULL) {
+                       if (r->addr != NULL) {
+                               munmap(r->addr, r->region_size);
+                               if (r->fd > 0) {
+                                       close(r->fd);
+                                       r->fd = -1;
+                               }
+                       }
+                       rte_free(r);
+                       pmd->regions[i] = NULL;
+               }
+       }
+       pmd->regions_num = 0;
+}
+
+static int
+memif_region_init_shm(struct pmd_internals *pmd, uint8_t has_buffers)
+{
+       char shm_name[ETH_MEMIF_SHM_NAME_SIZE];
+       int ret = 0;
+       struct memif_region *r;
+
+       if (pmd->regions_num >= ETH_MEMIF_MAX_REGION_NUM) {
+               MIF_LOG(ERR, "%s: Too many regions.", rte_vdev_device_name(pmd->vdev));
+               return -1;
+       }
+
+       r = rte_zmalloc("region", sizeof(struct memif_region), 0);
+       if (r == NULL) {
+               MIF_LOG(ERR, "%s: Failed to alloc memif region.",
+                       rte_vdev_device_name(pmd->vdev));
+               return -ENOMEM;
+       }
+
+       /* calculate buffer offset */
+       r->pkt_buffer_offset = (pmd->run.num_s2m_rings + pmd->run.num_m2s_rings) *
+           (sizeof(memif_ring_t) + sizeof(memif_desc_t) *
+           (1 << pmd->run.log2_ring_size));
+
+       r->region_size = r->pkt_buffer_offset;
+       /* if region has buffers, add buffers size to region_size */
+       if (has_buffers == 1)
+               r->region_size += (uint32_t)(pmd->run.pkt_buffer_size *
+                       (1 << pmd->run.log2_ring_size) *
+                       (pmd->run.num_s2m_rings +
+                        pmd->run.num_m2s_rings));
+
+       memset(shm_name, 0, sizeof(char) * ETH_MEMIF_SHM_NAME_SIZE);
+       snprintf(shm_name, ETH_MEMIF_SHM_NAME_SIZE, "memif_region_%d",
+                pmd->regions_num);
+
+       r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING);
+       if (r->fd < 0) {
+               MIF_LOG(ERR, "%s: Failed to create shm file: %s.",
+                       rte_vdev_device_name(pmd->vdev),
+                       strerror(errno));
+               ret = -1;
+               goto error;
+       }
+
+       ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK);
+       if (ret < 0) {
+               MIF_LOG(ERR, "%s: Failed to add seals to shm file: %s.",
+                       rte_vdev_device_name(pmd->vdev),
+                       strerror(errno));
+               goto error;
+       }
+
+       ret = ftruncate(r->fd, r->region_size);
+       if (ret < 0) {
+               MIF_LOG(ERR, "%s: Failed to truncate shm file: %s.",
+                       rte_vdev_device_name(pmd->vdev),
+                       strerror(errno));
+               goto error;
+       }
+
+       r->addr = mmap(NULL, r->region_size, PROT_READ |
+                      PROT_WRITE, MAP_SHARED, r->fd, 0);
+       if (r->addr == MAP_FAILED) {
+               MIF_LOG(ERR, "%s: Failed to mmap shm region: %s.",
+                       rte_vdev_device_name(pmd->vdev),
+                       strerror(ret));
+               ret = -1;
+               goto error;
+       }
+
+       pmd->regions[pmd->regions_num] = r;
+       pmd->regions_num++;
+
+       return ret;
+
+error:
+       if (r->fd > 0)
+               close(r->fd);
+       r->fd = -1;
+
+       return ret;
+}
+
+static int
+memif_regions_init(struct pmd_internals *pmd)
+{
+       int ret;
+
+       /* create one buffer region */
+       ret = memif_region_init_shm(pmd, /* has buffer */ 1);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+
+static void
+memif_init_rings(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       memif_ring_t *ring;
+       int i, j;
+       uint16_t slot;
+
+       for (i = 0; i < pmd->run.num_s2m_rings; i++) {
+               ring = memif_get_ring(pmd, MEMIF_RING_S2M, i);
+               ring->head = 0;
+               ring->tail = 0;
+               ring->cookie = MEMIF_COOKIE;
+               ring->flags = 0;
+               for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
+                       slot = i * (1 << pmd->run.log2_ring_size) + j;
+                       ring->desc[j].region = 0;
+                       ring->desc[j].offset = pmd->regions[0]->pkt_buffer_offset +
+                               (uint32_t)(slot * pmd->run.pkt_buffer_size);
+                       ring->desc[j].length = pmd->run.pkt_buffer_size;
+               }
+       }
+
+       for (i = 0; i < pmd->run.num_m2s_rings; i++) {
+               ring = memif_get_ring(pmd, MEMIF_RING_M2S, i);
+               ring->head = 0;
+               ring->tail = 0;
+               ring->cookie = MEMIF_COOKIE;
+               ring->flags = 0;
+               for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
+                       slot = (i + pmd->run.num_s2m_rings) *
+                           (1 << pmd->run.log2_ring_size) + j;
+                       ring->desc[j].region = 0;
+                       ring->desc[j].offset = pmd->regions[0]->pkt_buffer_offset +
+                               (uint32_t)(slot * pmd->run.pkt_buffer_size);
+                       ring->desc[j].length = pmd->run.pkt_buffer_size;
+               }
+       }
+}
+
+/* called only by slave */
+static void
+memif_init_queues(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct memif_queue *mq;
+       int i;
+
+       for (i = 0; i < pmd->run.num_s2m_rings; i++) {
+               mq = dev->data->tx_queues[i];
+               mq->ring = memif_get_ring(pmd, MEMIF_RING_S2M, i);
+               mq->log2_ring_size = pmd->run.log2_ring_size;
+               /* queues located only in region 0 */
+               mq->region = 0;
+               mq->ring_offset = (uint8_t *)mq->ring - (uint8_t *)pmd->regions[0]->addr;
+               mq->last_head = 0;
+               mq->last_tail = 0;
+               mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
+               if (mq->intr_handle.fd < 0) {
+                       MIF_LOG(WARNING,
+                               "%s: Failed to create eventfd for tx queue %d: %s.",
+                               rte_vdev_device_name(pmd->vdev), i,
+                               strerror(errno));
+               }
+       }
+
+       for (i = 0; i < pmd->run.num_m2s_rings; i++) {
+               mq = dev->data->rx_queues[i];
+               mq->ring = memif_get_ring(pmd, MEMIF_RING_M2S, i);
+               mq->log2_ring_size = pmd->run.log2_ring_size;
+               /* queues located only in region 0 */
+               mq->region = 0;
+               mq->ring_offset = (uint8_t *)mq->ring - (uint8_t *)pmd->regions[0]->addr;
+               mq->last_head = 0;
+               mq->last_tail = 0;
+               mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
+               if (mq->intr_handle.fd < 0) {
+                       MIF_LOG(WARNING,
+                               "%s: Failed to create eventfd for rx queue %d: %s.",
+                               rte_vdev_device_name(pmd->vdev), i,
+                               strerror(errno));
+               }
+       }
+}
+
+int
+memif_init_regions_and_queues(struct rte_eth_dev *dev)
+{
+       int ret;
+
+       ret = memif_regions_init(dev->data->dev_private);
+       if (ret < 0)
+               return ret;
+
+       memif_init_rings(dev);
+
+       memif_init_queues(dev);
+
+       return 0;
+}
+
+int
+memif_connect(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct memif_region *mr;
+       struct memif_queue *mq;
+       int i;
+
+       for (i = 0; i < pmd->regions_num; i++) {
+               mr = pmd->regions[i];
+               if (mr != NULL) {
+                       if (mr->addr == NULL) {
+                               if (mr->fd < 0)
+                                       return -1;
+                               mr->addr = mmap(NULL, mr->region_size,
+                                               PROT_READ | PROT_WRITE,
+                                               MAP_SHARED, mr->fd, 0);
+                               if (mr->addr == NULL)
+                                       return -1;
+                       }
+               }
+       }
+
+       for (i = 0; i < pmd->run.num_s2m_rings; i++) {
+               mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
+                   dev->data->tx_queues[i] : dev->data->rx_queues[i];
+               mq->ring = (memif_ring_t *)((uint8_t *)pmd->regions[mq->region]->addr +
+                           mq->ring_offset);
+               if (mq->ring->cookie != MEMIF_COOKIE) {
+                       MIF_LOG(ERR, "%s: Wrong cookie",
+                               rte_vdev_device_name(pmd->vdev));
+                       return -1;
+               }
+               mq->ring->head = 0;
+               mq->ring->tail = 0;
+               mq->last_head = 0;
+               mq->last_tail = 0;
+               /* enable polling mode */
+               if (pmd->role == MEMIF_ROLE_MASTER)
+                       mq->ring->flags = MEMIF_RING_FLAG_MASK_INT;
+       }
+       for (i = 0; i < pmd->run.num_m2s_rings; i++) {
+               mq = (pmd->role == MEMIF_ROLE_SLAVE) ?
+                   dev->data->rx_queues[i] : dev->data->tx_queues[i];
+               mq->ring = (memif_ring_t *)((uint8_t *)pmd->regions[mq->region]->addr +
+                           mq->ring_offset);
+               if (mq->ring->cookie != MEMIF_COOKIE) {
+                       MIF_LOG(ERR, "%s: Wrong cookie",
+                               rte_vdev_device_name(pmd->vdev));
+                       return -1;
+               }
+               mq->ring->head = 0;
+               mq->ring->tail = 0;
+               mq->last_head = 0;
+               mq->last_tail = 0;
+               /* enable polling mode */
+               if (pmd->role == MEMIF_ROLE_SLAVE)
+                       mq->ring->flags = MEMIF_RING_FLAG_MASK_INT;
+       }
+
+       pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
+       pmd->flags |= ETH_MEMIF_FLAG_CONNECTED;
+       dev->data->dev_link.link_status = ETH_LINK_UP;
+       MIF_LOG(INFO, "%s: Connected.", rte_vdev_device_name(pmd->vdev));
+       return 0;
+}
+
+static int
+memif_dev_start(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       int ret = 0;
+
+       switch (pmd->role) {
+       case MEMIF_ROLE_SLAVE:
+               ret = memif_connect_slave(dev);
+               break;
+       case MEMIF_ROLE_MASTER:
+               ret = memif_connect_master(dev);
+               break;
+       default:
+               MIF_LOG(ERR, "%s: Unknown role: %d.",
+                       rte_vdev_device_name(pmd->vdev), pmd->role);
+               ret = -1;
+               break;
+       }
+
+       return ret;
+}
+
+static void
+memif_dev_close(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       int i;
+
+       memif_msg_enq_disconnect(pmd->cc, "Device closed", 0);
+       memif_disconnect(dev);
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++)
+               (*dev->dev_ops->rx_queue_release)(dev->data->rx_queues[i]);
+       for (i = 0; i < dev->data->nb_tx_queues; i++)
+               (*dev->dev_ops->tx_queue_release)(dev->data->tx_queues[i]);
+
+       memif_socket_remove_device(dev);
+}
+
+static int
+memif_dev_configure(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+
+       /*
+        * SLAVE - TXQ
+        * MASTER - RXQ
+        */
+       pmd->cfg.num_s2m_rings = (pmd->role == MEMIF_ROLE_SLAVE) ?
+                                 dev->data->nb_tx_queues : dev->data->nb_rx_queues;
+
+       /*
+        * SLAVE - RXQ
+        * MASTER - TXQ
+        */
+       pmd->cfg.num_m2s_rings = (pmd->role == MEMIF_ROLE_SLAVE) ?
+                                 dev->data->nb_rx_queues : dev->data->nb_tx_queues;
+
+       return 0;
+}
+
+static int
+memif_tx_queue_setup(struct rte_eth_dev *dev,
+                    uint16_t qid,
+                    uint16_t nb_tx_desc __rte_unused,
+                    unsigned int socket_id __rte_unused,
+                    const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct memif_queue *mq;
+
+       mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0);
+       if (mq == NULL) {
+               MIF_LOG(ERR, "%s: Failed to allocate tx queue id: %u",
+                       rte_vdev_device_name(pmd->vdev), qid);
+               return -ENOMEM;
+       }
+
+       mq->type =
+           (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_S2M : MEMIF_RING_M2S;
+       mq->n_pkts = 0;
+       mq->n_bytes = 0;
+       mq->n_err = 0;
+       mq->intr_handle.fd = -1;
+       mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
+       mq->pmd = pmd;
+       dev->data->tx_queues[qid] = mq;
+
+       return 0;
+}
+
+static int
+memif_rx_queue_setup(struct rte_eth_dev *dev,
+                    uint16_t qid,
+                    uint16_t nb_rx_desc __rte_unused,
+                    unsigned int socket_id __rte_unused,
+                    const struct rte_eth_rxconf *rx_conf __rte_unused,
+                    struct rte_mempool *mb_pool)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct memif_queue *mq;
+
+       mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0);
+       if (mq == NULL) {
+               MIF_LOG(ERR, "%s: Failed to allocate rx queue id: %u",
+                       rte_vdev_device_name(pmd->vdev), qid);
+               return -ENOMEM;
+       }
+
+       mq->type = (pmd->role == MEMIF_ROLE_SLAVE) ? MEMIF_RING_M2S : MEMIF_RING_S2M;
+       mq->n_pkts = 0;
+       mq->n_bytes = 0;
+       mq->n_err = 0;
+       mq->intr_handle.fd = -1;
+       mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
+       mq->mempool = mb_pool;
+       mq->in_port = dev->data->port_id;
+       mq->pmd = pmd;
+       dev->data->rx_queues[qid] = mq;
+
+       return 0;
+}
+
+static void
+memif_queue_release(void *queue)
+{
+       struct memif_queue *mq = (struct memif_queue *)queue;
+
+       if (!mq)
+               return;
+
+       rte_free(mq);
+}
+
+static int
+memif_link_update(struct rte_eth_dev *dev __rte_unused,
+                 int wait_to_complete __rte_unused)
+{
+       return 0;
+}
+
+static int
+memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       struct memif_queue *mq;
+       int i;
+       uint8_t tmp, nq;
+
+       stats->ipackets = 0;
+       stats->ibytes = 0;
+       stats->opackets = 0;
+       stats->obytes = 0;
+       stats->oerrors = 0;
+
+       tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_s2m_rings :
+           pmd->run.num_m2s_rings;
+       nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
+           RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+       /* RX stats */
+       for (i = 0; i < nq; i++) {
+               mq = dev->data->rx_queues[i];
+               stats->q_ipackets[i] = mq->n_pkts;
+               stats->q_ibytes[i] = mq->n_bytes;
+               stats->ipackets += mq->n_pkts;
+               stats->ibytes += mq->n_bytes;
+       }
+
+       tmp = (pmd->role == MEMIF_ROLE_SLAVE) ? pmd->run.num_m2s_rings :
+           pmd->run.num_s2m_rings;
+       nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
+           RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+       /* TX stats */
+       for (i = 0; i < nq; i++) {
+               mq = dev->data->tx_queues[i];
+               stats->q_opackets[i] = mq->n_pkts;
+               stats->q_obytes[i] = mq->n_bytes;
+               stats->opackets += mq->n_pkts;
+               stats->obytes += mq->n_bytes;
+               stats->oerrors += mq->n_err;
+       }
+       return 0;
+}
+
+static void
+memif_stats_reset(struct rte_eth_dev *dev)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+       int i;
+       struct memif_queue *mq;
+
+       for (i = 0; i < pmd->run.num_s2m_rings; i++) {
+               mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->tx_queues[i] :
+                   dev->data->rx_queues[i];
+               mq->n_pkts = 0;
+               mq->n_bytes = 0;
+               mq->n_err = 0;
+       }
+       for (i = 0; i < pmd->run.num_m2s_rings; i++) {
+               mq = (pmd->role == MEMIF_ROLE_SLAVE) ? dev->data->rx_queues[i] :
+                   dev->data->tx_queues[i];
+               mq->n_pkts = 0;
+               mq->n_bytes = 0;
+               mq->n_err = 0;
+       }
+}
+
+static int
+memif_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
+{
+       struct pmd_internals *pmd = dev->data->dev_private;
+
+       MIF_LOG(WARNING, "%s: Interrupt mode not supported.",
+               rte_vdev_device_name(pmd->vdev));
+
+       return -1;
+}
+
+static int
+memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
+{
+       struct pmd_internals *pmd __rte_unused = dev->data->dev_private;
+
+       return 0;
+}
+
+static const struct eth_dev_ops ops = {
+       .dev_start = memif_dev_start,
+       .dev_close = memif_dev_close,
+       .dev_infos_get = memif_dev_info,
+       .dev_configure = memif_dev_configure,
+       .tx_queue_setup = memif_tx_queue_setup,
+       .rx_queue_setup = memif_rx_queue_setup,
+       .rx_queue_release = memif_queue_release,
+       .tx_queue_release = memif_queue_release,
+       .rx_queue_intr_enable = memif_rx_queue_intr_enable,
+       .rx_queue_intr_disable = memif_rx_queue_intr_disable,
+       .link_update = memif_link_update,
+       .stats_get = memif_stats_get,
+       .stats_reset = memif_stats_reset,
+};
+
+static int
+memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
+            memif_interface_id_t id, uint32_t flags,
+            const char *socket_filename,
+            memif_log2_ring_size_t log2_ring_size,
+            uint16_t pkt_buffer_size, const char *secret,
+            struct rte_ether_addr *ether_addr)
+{
+       int ret = 0;
+       struct rte_eth_dev *eth_dev;
+       struct rte_eth_dev_data *data;
+       struct pmd_internals *pmd;
+       const unsigned int numa_node = vdev->device.numa_node;
+       const char *name = rte_vdev_device_name(vdev);
+
+       if (flags & ETH_MEMIF_FLAG_ZERO_COPY) {
+               MIF_LOG(ERR, "Zero-copy slave not supported.");
+               return -1;
+       }
+
+       eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
+       if (eth_dev == NULL) {
+               MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
+               return -1;
+       }
+
+       pmd = eth_dev->data->dev_private;
+       memset(pmd, 0, sizeof(*pmd));
+
+       pmd->vdev = vdev;
+       pmd->id = id;
+       pmd->flags = flags;
+       pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
+       pmd->role = role;
+
+       ret = memif_socket_init(eth_dev, socket_filename);
+       if (ret < 0)
+               return ret;
+
+       memset(pmd->secret, 0, sizeof(char) * ETH_MEMIF_SECRET_SIZE);
+       if (secret != NULL)
+               strlcpy(pmd->secret, secret, sizeof(pmd->secret));
+
+       pmd->cfg.log2_ring_size = log2_ring_size;
+       /* set in .dev_configure() */
+       pmd->cfg.num_s2m_rings = 0;
+       pmd->cfg.num_m2s_rings = 0;
+
+       pmd->cfg.pkt_buffer_size = pkt_buffer_size;
+
+       data = eth_dev->data;
+       data->dev_private = pmd;
+       data->numa_node = numa_node;
+       data->mac_addrs = ether_addr;
+
+       eth_dev->dev_ops = &ops;
+       eth_dev->device = &vdev->device;
+       eth_dev->rx_pkt_burst = eth_memif_rx;
+       eth_dev->tx_pkt_burst = eth_memif_tx;
+
+       eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
+
+       rte_eth_dev_probing_finish(eth_dev);
+
+       return 0;
+}
+
+static int
+memif_set_role(const char *key __rte_unused, const char *value,
+              void *extra_args)
+{
+       enum memif_role_t *role = (enum memif_role_t *)extra_args;
+
+       if (strstr(value, "master") != NULL) {
+               *role = MEMIF_ROLE_MASTER;
+       } else if (strstr(value, "slave") != NULL) {
+               *role = MEMIF_ROLE_SLAVE;
+       } else {
+               MIF_LOG(ERR, "Unknown role: %s.", value);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int
+memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
+{
+       uint32_t *flags = (uint32_t *)extra_args;
+
+       if (strstr(value, "yes") != NULL) {
+               *flags |= ETH_MEMIF_FLAG_ZERO_COPY;
+       } else if (strstr(value, "no") != NULL) {
+               *flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
+       } else {
+               MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int
+memif_set_id(const char *key __rte_unused, const char *value, void *extra_args)
+{
+       memif_interface_id_t *id = (memif_interface_id_t *)extra_args;
+
+       /* even if parsing fails, 0 is a valid id */
+       *id = strtoul(value, NULL, 10);
+       return 0;
+}
+
+static int
+memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args)
+{
+       unsigned long tmp;
+       uint16_t *pkt_buffer_size = (uint16_t *)extra_args;
+
+       tmp = strtoul(value, NULL, 10);
+       if (tmp == 0 || tmp > 0xFFFF) {
+               MIF_LOG(ERR, "Invalid buffer size: %s.", value);
+               return -EINVAL;
+       }
+       *pkt_buffer_size = tmp;
+       return 0;
+}
+
+static int
+memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args)
+{
+       unsigned long tmp;
+       memif_log2_ring_size_t *log2_ring_size =
+           (memif_log2_ring_size_t *)extra_args;
+
+       tmp = strtoul(value, NULL, 10);
+       if (tmp == 0 || tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE) {
+               MIF_LOG(ERR, "Invalid ring size: %s (max %u).",
+                       value, ETH_MEMIF_MAX_LOG2_RING_SIZE);
+               return -EINVAL;
+       }
+       *log2_ring_size = tmp;
+       return 0;
+}
+
+/* check if directory exists and if we have permission to read/write */
+static int
+memif_check_socket_filename(const char *filename)
+{
+       char *dir = NULL, *tmp;
+       uint32_t idx;
+       int ret = 0;
+
+       tmp = strrchr(filename, '/');
+       if (tmp != NULL) {
+               idx = tmp - filename;
+               dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 1), 0);
+               if (dir == NULL) {
+                       MIF_LOG(ERR, "Failed to allocate memory.");
+                       return -1;
+               }
+               strlcpy(dir, filename, sizeof(char) * (idx + 1));
+       }
+
+       if (dir == NULL || (faccessat(-1, dir, F_OK | R_OK |
+                                       W_OK, AT_EACCESS) < 0)) {
+               MIF_LOG(ERR, "Invalid socket directory.");
+               ret = -EINVAL;
+       }
+
+       if (dir != NULL)
+               rte_free(dir);
+
+       return ret;
+}
+
+static int
+memif_set_socket_filename(const char *key __rte_unused, const char *value,
+                         void *extra_args)
+{
+       const char **socket_filename = (const char **)extra_args;
+
+       *socket_filename = value;
+       return memif_check_socket_filename(*socket_filename);
+}
+
+static int
+memif_set_mac(const char *key __rte_unused, const char *value, void *extra_args)
+{
+       struct rte_ether_addr *ether_addr = (struct rte_ether_addr *)extra_args;
+       int ret = 0;
+
+       ret = sscanf(value, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+              &ether_addr->addr_bytes[0], &ether_addr->addr_bytes[1],
+              &ether_addr->addr_bytes[2], &ether_addr->addr_bytes[3],
+              &ether_addr->addr_bytes[4], &ether_addr->addr_bytes[5]);
+       if (ret != 6)
+               MIF_LOG(WARNING, "Failed to parse mac '%s'.", value);
+       return 0;
+}
+
+static int
+memif_set_secret(const char *key __rte_unused, const char *value, void *extra_args)
+{
+       const char **secret = (const char **)extra_args;
+
+       *secret = value;
+       return 0;
+}
+
+static int
+rte_pmd_memif_probe(struct rte_vdev_device *vdev)
+{
+       RTE_BUILD_BUG_ON(sizeof(memif_msg_t) != 128);
+       RTE_BUILD_BUG_ON(sizeof(memif_desc_t) != 16);
+       int ret = 0;
+       struct rte_kvargs *kvlist;
+       const char *name = rte_vdev_device_name(vdev);
+       enum memif_role_t role = MEMIF_ROLE_SLAVE;
+       memif_interface_id_t id = 0;
+       uint16_t pkt_buffer_size = ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE;
+       memif_log2_ring_size_t log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE;
+       const char *socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME;
+       uint32_t flags = 0;
+       const char *secret = NULL;
+       struct rte_ether_addr *ether_addr = rte_zmalloc("",
+               sizeof(struct rte_ether_addr), 0);
+
+       rte_eth_random_addr(ether_addr->addr_bytes);
+
+       MIF_LOG(INFO, "Initialize MEMIF: %s.", name);
+
+       if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+               MIF_LOG(ERR, "Multi-processing not supported for memif.");
+               /* TODO:
+                * Request connection information.
+                *
+                * Once memif in the primary process is connected,
+                * broadcast connection information.
+                */
+               return -1;
+       }
+
+       kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments);
+
+       /* parse parameters */
+       if (kvlist != NULL) {
+               ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG,
+                                        &memif_set_role, &role);
+               if (ret < 0)
+                       goto exit;
+               ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG,
+                                        &memif_set_id, &id);
+               if (ret < 0)
+                       goto exit;
+               ret = rte_kvargs_process(kvlist, ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
+                                        &memif_set_bs, &pkt_buffer_size);
+               if (ret < 0)
+                       goto exit;
+               ret = rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG,
+                                        &memif_set_rs, &log2_ring_size);
+               if (ret < 0)
+                       goto exit;
+               ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ARG,
+                                        &memif_set_socket_filename,
+                                        (void *)(&socket_filename));
+               if (ret < 0)
+                       goto exit;
+               ret = rte_kvargs_process(kvlist, ETH_MEMIF_MAC_ARG,
+                                        &memif_set_mac, ether_addr);
+               if (ret < 0)
+                       goto exit;
+               ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG,
+                                        &memif_set_zc, &flags);
+               if (ret < 0)
+                       goto exit;
+               ret = rte_kvargs_process(kvlist, ETH_MEMIF_SECRET_ARG,
+                                        &memif_set_secret, (void *)(&secret));
+               if (ret < 0)
+                       goto exit;
+       }
+
+       /* create interface */
+       ret = memif_create(vdev, role, id, flags, socket_filename,
+                          log2_ring_size, pkt_buffer_size, secret, ether_addr);
+
+exit:
+       if (kvlist != NULL)
+               rte_kvargs_free(kvlist);
+       return ret;
+}
+
+static int
+rte_pmd_memif_remove(struct rte_vdev_device *vdev)
+{
+       struct rte_eth_dev *eth_dev;
+
+       eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
+       if (eth_dev == NULL)
+               return 0;
+
+       rte_eth_dev_close(eth_dev->data->port_id);
+
+       return 0;
+}
+
+static struct rte_vdev_driver pmd_memif_drv = {
+       .probe = rte_pmd_memif_probe,
+       .remove = rte_pmd_memif_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv);
+
+RTE_PMD_REGISTER_PARAM_STRING(net_memif,
+                             ETH_MEMIF_ID_ARG "=<int>"
+                             ETH_MEMIF_ROLE_ARG "=master|slave"
+                             ETH_MEMIF_PKT_BUFFER_SIZE_ARG "=<int>"
+                             ETH_MEMIF_RING_SIZE_ARG "=<int>"
+                             ETH_MEMIF_SOCKET_ARG "=<string>"
+                             ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx"
+                             ETH_MEMIF_ZC_ARG "=yes|no"
+                             ETH_MEMIF_SECRET_ARG "=<string>");
+
+int memif_logtype;
+
+RTE_INIT(memif_init_log)
+{
+       memif_logtype = rte_log_register("pmd.net.memif");
+       if (memif_logtype >= 0)
+               rte_log_set_level(memif_logtype, RTE_LOG_NOTICE);
+}
diff --git a/drivers/net/memif/rte_eth_memif.h b/drivers/net/memif/rte_eth_memif.h
new file mode 100644 (file)
index 0000000..5f631e9
--- /dev/null
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
+ */
+
+#ifndef _RTE_ETH_MEMIF_H_
+#define _RTE_ETH_MEMIF_H_
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif                         /* GNU_SOURCE */
+
+#include <sys/queue.h>
+
+#include <rte_ethdev_driver.h>
+#include <rte_ether.h>
+#include <rte_interrupts.h>
+
+#include "memif.h"
+
+#define ETH_MEMIF_DEFAULT_SOCKET_FILENAME      "/run/memif.sock"
+#define ETH_MEMIF_DEFAULT_RING_SIZE            10
+#define ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE      2048
+
+#define ETH_MEMIF_MAX_NUM_Q_PAIRS              255
+#define ETH_MEMIF_MAX_LOG2_RING_SIZE           14
+#define ETH_MEMIF_MAX_REGION_NUM               256
+
+#define ETH_MEMIF_SHM_NAME_SIZE                        32
+#define ETH_MEMIF_DISC_STRING_SIZE             96
+#define ETH_MEMIF_SECRET_SIZE                  24
+
+extern int memif_logtype;
+
+#define MIF_LOG(level, fmt, args...) \
+       rte_log(RTE_LOG_ ## level, memif_logtype, \
+               "%s(): " fmt "\n", __func__, ##args)
+
+enum memif_role_t {
+       MEMIF_ROLE_MASTER,
+       MEMIF_ROLE_SLAVE,
+};
+
+struct memif_region {
+       void *addr;                             /**< shared memory address */
+       memif_region_size_t region_size;        /**< shared memory size */
+       int fd;                                 /**< shared memory file descriptor */
+       uint32_t pkt_buffer_offset;
+       /**< offset from 'addr' to first packet buffer */
+};
+
+struct memif_queue {
+       struct rte_mempool *mempool;            /**< mempool for RX packets */
+       struct pmd_internals *pmd;              /**< device internals */
+
+       memif_ring_type_t type;                 /**< ring type */
+       memif_region_index_t region;            /**< shared memory region index */
+
+       uint16_t in_port;                       /**< port id */
+
+       memif_region_offset_t ring_offset;
+       /**< ring offset from start of shm region (ring - memif_region.addr) */
+
+       uint16_t last_head;                     /**< last ring head */
+       uint16_t last_tail;                     /**< last ring tail */
+
+       /* rx/tx info */
+       uint64_t n_pkts;                        /**< number of rx/tx packets */
+       uint64_t n_bytes;                       /**< number of rx/tx bytes */
+       uint64_t n_err;                         /**< number of tx errors */
+
+       memif_ring_t *ring;                     /**< pointer to ring */
+
+       struct rte_intr_handle intr_handle;     /**< interrupt handle */
+
+       memif_log2_ring_size_t log2_ring_size;  /**< log2 of ring size */
+};
+
+struct pmd_internals {
+       memif_interface_id_t id;                /**< unique id */
+       enum memif_role_t role;                 /**< device role */
+       uint32_t flags;                         /**< device status flags */
+#define ETH_MEMIF_FLAG_CONNECTING      (1 << 0)
+/**< device is connecting */
+#define ETH_MEMIF_FLAG_CONNECTED       (1 << 1)
+/**< device is connected */
+#define ETH_MEMIF_FLAG_ZERO_COPY       (1 << 2)
+/**< device is zero-copy enabled */
+#define ETH_MEMIF_FLAG_DISABLED                (1 << 3)
+/**< device has not been configured and can not accept connection requests */
+
+       char *socket_filename;                  /**< pointer to socket filename */
+       char secret[ETH_MEMIF_SECRET_SIZE]; /**< secret (optional security parameter) */
+
+       struct memif_control_channel *cc;       /**< control channel */
+
+       struct memif_region *regions[ETH_MEMIF_MAX_REGION_NUM];
+       /**< shared memory regions */
+       memif_region_index_t regions_num;       /**< number of regions */
+
+       /* remote info */
+       char remote_name[RTE_DEV_NAME_MAX_LEN];         /**< remote app name */
+       char remote_if_name[RTE_DEV_NAME_MAX_LEN];      /**< remote peer name */
+
+       struct {
+               memif_log2_ring_size_t log2_ring_size; /**< log2 of ring size */
+               uint8_t num_s2m_rings;          /**< number of slave to master rings */
+               uint8_t num_m2s_rings;          /**< number of master to slave rings */
+               uint16_t pkt_buffer_size;       /**< buffer size */
+       } cfg;                                  /**< Configured parameters (max values) */
+
+       struct {
+               memif_log2_ring_size_t log2_ring_size; /**< log2 of ring size */
+               uint8_t num_s2m_rings;          /**< number of slave to master rings */
+               uint8_t num_m2s_rings;          /**< number of master to slave rings */
+               uint16_t pkt_buffer_size;       /**< buffer size */
+       } run;
+       /**< Parameters used in active connection */
+
+       char local_disc_string[ETH_MEMIF_DISC_STRING_SIZE];
+       /**< local disconnect reason */
+       char remote_disc_string[ETH_MEMIF_DISC_STRING_SIZE];
+       /**< remote disconnect reason */
+
+       struct rte_vdev_device *vdev;           /**< vdev handle */
+};
+
+/**
+ * Unmap shared memory and free regions from memory.
+ *
+ * @param pmd
+ *   device internals
+ */
+void memif_free_regions(struct pmd_internals *pmd);
+
+/**
+ * Finalize connection establishment process. Map shared memory file
+ * (master role), initialize ring queue, set link status up.
+ *
+ * @param dev
+ *   memif device
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int memif_connect(struct rte_eth_dev *dev);
+
+/**
+ * Create shared memory file and initialize ring queue.
+ * Only called by slave when establishing connection
+ *
+ * @param dev
+ *   memif device
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int memif_init_regions_and_queues(struct rte_eth_dev *dev);
+
+/**
+ * Get memif version string.
+ *
+ * @return
+ *   - memif version string
+ */
+const char *memif_version(void);
+
+#ifndef MFD_HUGETLB
+#ifndef __NR_memfd_create
+
+#if defined __x86_64__
+#define __NR_memfd_create 319
+#elif defined __x86_32__
+#define __NR_memfd_create 1073742143
+#elif defined __arm__
+#define __NR_memfd_create 385
+#elif defined __aarch64__
+#define __NR_memfd_create 279
+#elif defined __powerpc__
+#define __NR_memfd_create 360
+#elif defined __i386__
+#define __NR_memfd_create 356
+#else
+#error "__NR_memfd_create unknown for this architecture"
+#endif
+
+#endif                         /* __NR_memfd_create */
+
+static inline int memfd_create(const char *name, unsigned int flags)
+{
+       return syscall(__NR_memfd_create, name, flags);
+}
+#endif                         /* MFD_HUGETLB */
+
+#ifndef F_LINUX_SPECIFIC_BASE
+#define F_LINUX_SPECIFIC_BASE 1024
+#endif
+
+#ifndef MFD_ALLOW_SEALING
+#define MFD_ALLOW_SEALING       0x0002U
+#endif
+
+#ifndef F_ADD_SEALS
+#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
+
+#define F_SEAL_SEAL     0x0001 /* prevent further seals from being set */
+#define F_SEAL_SHRINK   0x0002 /* prevent file from shrinking */
+#define F_SEAL_GROW     0x0004 /* prevent file from growing */
+#define F_SEAL_WRITE    0x0008 /* prevent writes */
+#endif
+
+#endif                         /* RTE_ETH_MEMIF_H */
diff --git a/drivers/net/memif/rte_pmd_memif_version.map b/drivers/net/memif/rte_pmd_memif_version.map
new file mode 100644 (file)
index 0000000..8861484
--- /dev/null
@@ -0,0 +1,4 @@
+DPDK_19.08 {
+
+        local: *;
+};
index ed99896..b570734 100644 (file)
@@ -24,6 +24,7 @@ drivers = ['af_packet',
        'ixgbe',
        'kni',
        'liquidio',
+       'memif',
        'mlx4',
        'mlx5',
        'mvneta',
index b6106e1..d0df0b0 100644 (file)
@@ -171,6 +171,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI)        += -lrte_pmd_kni
 endif
 _LDLIBS-$(CONFIG_RTE_LIBRTE_LIO_PMD)        += -lrte_pmd_lio
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_MEMIF)      += -lrte_pmd_memif
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -lmnl
 ifeq ($(CONFIG_RTE_IBVERBS_LINK_DLOPEN),y)