mlx5: introduce new driver for Mellanox ConnectX-4 adapters
authorAdrien Mazarguil <adrien.mazarguil@6wind.com>
Fri, 30 Oct 2015 18:52:30 +0000 (19:52 +0100)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Fri, 30 Oct 2015 21:03:42 +0000 (22:03 +0100)
In its current state, this driver implements the bare minimum to initialize
itself and Mellanox ConnectX-4 adapters without doing anything else
(no RX/TX for instance). It is disabled by default since it is based on the
mlx4 driver and also depends on libibverbs.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Signed-off-by: Or Ami <ora@mellanox.com>
13 files changed:
MAINTAINERS
config/common_bsdapp
config/common_linuxapp
drivers/net/Makefile
drivers/net/mlx5/Makefile [new file with mode: 0644]
drivers/net/mlx5/mlx5.c [new file with mode: 0644]
drivers/net/mlx5/mlx5.h [new file with mode: 0644]
drivers/net/mlx5/mlx5_defs.h [new file with mode: 0644]
drivers/net/mlx5/mlx5_ethdev.c [new file with mode: 0644]
drivers/net/mlx5/mlx5_mac.c [new file with mode: 0644]
drivers/net/mlx5/mlx5_utils.h [new file with mode: 0644]
drivers/net/mlx5/rte_pmd_mlx5_version.map [new file with mode: 0644]
mk/rte.app.mk

index 080a8e8..9d11055 100644 (file)
@@ -255,6 +255,10 @@ M: Adrien Mazarguil <adrien.mazarguil@6wind.com>
 F: drivers/net/mlx4/
 F: doc/guides/nics/mlx4.rst
 
+Mellanox mlx5
+M: Adrien Mazarguil <adrien.mazarguil@6wind.com>
+F: drivers/net/mlx5/
+
 RedHat virtio
 M: Huawei Xie <huawei.xie@intel.com>
 M: Changchun Ouyang <changchun.ouyang@intel.com>
index 3003da5..8d3ed01 100644 (file)
@@ -215,6 +215,12 @@ CONFIG_RTE_LIBRTE_MLX4_MAX_INLINE=0
 CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE=8
 CONFIG_RTE_LIBRTE_MLX4_SOFT_COUNTERS=1
 
+#
+# Compile burst-oriented Mellanox ConnectX-4 (MLX5) PMD
+#
+CONFIG_RTE_LIBRTE_MLX5_PMD=n
+CONFIG_RTE_LIBRTE_MLX5_DEBUG=n
+
 #
 # Compile burst-oriented Broadcom PMD driver
 #
index dadba4d..94d5ae1 100644 (file)
@@ -213,6 +213,12 @@ CONFIG_RTE_LIBRTE_MLX4_MAX_INLINE=0
 CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE=8
 CONFIG_RTE_LIBRTE_MLX4_SOFT_COUNTERS=1
 
+#
+# Compile burst-oriented Mellanox ConnectX-4 (MLX5) PMD
+#
+CONFIG_RTE_LIBRTE_MLX5_PMD=n
+CONFIG_RTE_LIBRTE_MLX5_DEBUG=n
+
 #
 # Compile burst-oriented Broadcom PMD driver
 #
index 5ebf963..6da1ce2 100644 (file)
@@ -41,6 +41,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
 DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
 DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
 DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
+DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
 DIRS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD) += mpipe
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_NULL) += null
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += pcap
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
new file mode 100644 (file)
index 0000000..6e63073
--- /dev/null
@@ -0,0 +1,109 @@
+#   BSD LICENSE
+#
+#   Copyright 2015 6WIND S.A.
+#   Copyright 2015 Mellanox.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of 6WIND S.A. nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifeq ($(CONFIG_RTE_BUILD_COMBINE_LIBS)$(CONFIG_RTE_BUILD_SHARED_LIB),yy)
+all:
+       @echo 'MLX5: Not supported in a combined shared library'
+       @false
+endif
+
+# Library name.
+LIB = librte_pmd_mlx5.a
+
+# Sources.
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_ethdev.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mac.c
+
+# Dependencies.
+DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += lib/librte_mempool
+
+# Basic CFLAGS.
+CFLAGS += -O3
+CFLAGS += -std=gnu99 -Wall -Wextra
+CFLAGS += -g
+CFLAGS += -I.
+CFLAGS += -D_XOPEN_SOURCE=600
+CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -libverbs
+
+# A few warnings cannot be avoided in external headers.
+CFLAGS += -Wno-error=cast-qual
+
+EXPORT_MAP := rte_pmd_mlx5_version.map
+LIBABIVER := 1
+
+# DEBUG which is usually provided on the command-line may enable
+# CONFIG_RTE_LIBRTE_MLX5_DEBUG.
+ifeq ($(DEBUG),1)
+CONFIG_RTE_LIBRTE_MLX5_DEBUG := y
+endif
+
+# User-defined CFLAGS.
+ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DEBUG),y)
+CFLAGS += -pedantic -UNDEBUG -DPEDANTIC
+else
+CFLAGS += -DNDEBUG -UPEDANTIC
+endif
+
+include $(RTE_SDK)/mk/rte.lib.mk
+
+# Generate and clean-up mlx5_autoconf.h.
+
+export CC CFLAGS CPPFLAGS EXTRA_CFLAGS EXTRA_CPPFLAGS
+export AUTO_CONFIG_CFLAGS = -Wno-error
+
+ifndef V
+AUTOCONF_OUTPUT := >/dev/null
+endif
+
+mlx5_autoconf.h: $(RTE_SDK)/scripts/auto-config-h.sh
+       $Q $(RM) -f -- '$@'
+       $Q sh -- '$<' '$@' \
+               RSS_SUPPORT \
+               infiniband/verbs.h \
+               enum IBV_EXP_DEVICE_UD_RSS $(AUTOCONF_OUTPUT)
+       $Q sh -- '$<' '$@' \
+               HAVE_EXP_QUERY_DEVICE \
+               infiniband/verbs.h \
+               type 'struct ibv_exp_device_attr' $(AUTOCONF_OUTPUT)
+
+mlx5.o: mlx5_autoconf.h
+
+clean_mlx5: FORCE
+       $Q rm -f -- mlx5_autoconf.h
+
+clean: clean_mlx5
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
new file mode 100644 (file)
index 0000000..6df486b
--- /dev/null
@@ -0,0 +1,496 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2015 6WIND S.A.
+ *   Copyright 2015 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#include <unistd.h>
+#include <string.h>
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <net/if.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-pedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-pedantic"
+#endif
+
+/* DPDK headers don't like -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-pedantic"
+#endif
+#include <rte_malloc.h>
+#include <rte_ethdev.h>
+#include <rte_pci.h>
+#include <rte_common.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-pedantic"
+#endif
+
+#include "mlx5.h"
+#include "mlx5_utils.h"
+#include "mlx5_autoconf.h"
+
+/**
+ * DPDK callback to close the device.
+ *
+ * Destroy all queues and objects, free memory.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mlx5_dev_close(struct rte_eth_dev *dev)
+{
+       struct priv *priv = dev->data->dev_private;
+
+       priv_lock(priv);
+       DEBUG("%p: closing device \"%s\"",
+             (void *)dev,
+             ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
+       if (priv->pd != NULL) {
+               assert(priv->ctx != NULL);
+               claim_zero(ibv_dealloc_pd(priv->pd));
+               claim_zero(ibv_close_device(priv->ctx));
+       } else
+               assert(priv->ctx == NULL);
+       priv_unlock(priv);
+       memset(priv, 0, sizeof(*priv));
+}
+
+static const struct eth_dev_ops mlx5_dev_ops = {
+       .dev_close = mlx5_dev_close,
+};
+
+static struct {
+       struct rte_pci_addr pci_addr; /* associated PCI address */
+       uint32_t ports; /* physical ports bitfield. */
+} mlx5_dev[32];
+
+/**
+ * Get device index in mlx5_dev[] from PCI bus address.
+ *
+ * @param[in] pci_addr
+ *   PCI bus address to look for.
+ *
+ * @return
+ *   mlx5_dev[] index on success, -1 on failure.
+ */
+static int
+mlx5_dev_idx(struct rte_pci_addr *pci_addr)
+{
+       unsigned int i;
+       int ret = -1;
+
+       assert(pci_addr != NULL);
+       for (i = 0; (i != RTE_DIM(mlx5_dev)); ++i) {
+               if ((mlx5_dev[i].pci_addr.domain == pci_addr->domain) &&
+                   (mlx5_dev[i].pci_addr.bus == pci_addr->bus) &&
+                   (mlx5_dev[i].pci_addr.devid == pci_addr->devid) &&
+                   (mlx5_dev[i].pci_addr.function == pci_addr->function))
+                       return i;
+               if ((mlx5_dev[i].ports == 0) && (ret == -1))
+                       ret = i;
+       }
+       return ret;
+}
+
+static struct eth_driver mlx5_driver;
+
+/**
+ * DPDK callback to register a PCI device.
+ *
+ * This function creates an Ethernet device for each port of a given
+ * PCI device.
+ *
+ * @param[in] pci_drv
+ *   PCI driver structure (mlx5_driver).
+ * @param[in] pci_dev
+ *   PCI device information.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+static int
+mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
+{
+       struct ibv_device **list;
+       struct ibv_device *ibv_dev;
+       int err = 0;
+       struct ibv_context *attr_ctx = NULL;
+       struct ibv_device_attr device_attr;
+       unsigned int vf;
+       int idx;
+       int i;
+
+       (void)pci_drv;
+       assert(pci_drv == &mlx5_driver.pci_drv);
+       /* Get mlx5_dev[] index. */
+       idx = mlx5_dev_idx(&pci_dev->addr);
+       if (idx == -1) {
+               ERROR("this driver cannot support any more adapters");
+               return -ENOMEM;
+       }
+       DEBUG("using driver device index %d", idx);
+
+       /* Save PCI address. */
+       mlx5_dev[idx].pci_addr = pci_dev->addr;
+       list = ibv_get_device_list(&i);
+       if (list == NULL) {
+               assert(errno);
+               if (errno == ENOSYS) {
+                       WARN("cannot list devices, is ib_uverbs loaded?");
+                       return 0;
+               }
+               return -errno;
+       }
+       assert(i >= 0);
+       /*
+        * For each listed device, check related sysfs entry against
+        * the provided PCI ID.
+        */
+       while (i != 0) {
+               struct rte_pci_addr pci_addr;
+
+               --i;
+               DEBUG("checking device \"%s\"", list[i]->name);
+               if (mlx5_ibv_device_to_pci_addr(list[i], &pci_addr))
+                       continue;
+               if ((pci_dev->addr.domain != pci_addr.domain) ||
+                   (pci_dev->addr.bus != pci_addr.bus) ||
+                   (pci_dev->addr.devid != pci_addr.devid) ||
+                   (pci_dev->addr.function != pci_addr.function))
+                       continue;
+               vf = ((pci_dev->id.device_id ==
+                      PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) ||
+                     (pci_dev->id.device_id ==
+                      PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF));
+               INFO("PCI information matches, using device \"%s\" (VF: %s)",
+                    list[i]->name, (vf ? "true" : "false"));
+               attr_ctx = ibv_open_device(list[i]);
+               err = errno;
+               break;
+       }
+       if (attr_ctx == NULL) {
+               ibv_free_device_list(list);
+               switch (err) {
+               case 0:
+                       WARN("cannot access device, is mlx5_ib loaded?");
+                       return 0;
+               case EINVAL:
+                       WARN("cannot use device, are drivers up to date?");
+                       return 0;
+               }
+               assert(err > 0);
+               return -err;
+       }
+       ibv_dev = list[i];
+
+       DEBUG("device opened");
+       if (ibv_query_device(attr_ctx, &device_attr))
+               goto error;
+       INFO("%u port(s) detected", device_attr.phys_port_cnt);
+
+       for (i = 0; i < device_attr.phys_port_cnt; i++) {
+               uint32_t port = i + 1; /* ports are indexed from one */
+               uint32_t test = (1 << i);
+               struct ibv_context *ctx = NULL;
+               struct ibv_port_attr port_attr;
+               struct ibv_pd *pd = NULL;
+               struct priv *priv = NULL;
+               struct rte_eth_dev *eth_dev;
+#ifdef HAVE_EXP_QUERY_DEVICE
+               struct ibv_exp_device_attr exp_device_attr;
+#endif /* HAVE_EXP_QUERY_DEVICE */
+               struct ether_addr mac;
+
+#ifdef HAVE_EXP_QUERY_DEVICE
+               exp_device_attr.comp_mask = IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS;
+#ifdef RSS_SUPPORT
+               exp_device_attr.comp_mask |= IBV_EXP_DEVICE_ATTR_RSS_TBL_SZ;
+#endif /* RSS_SUPPORT */
+#endif /* HAVE_EXP_QUERY_DEVICE */
+
+               DEBUG("using port %u (%08" PRIx32 ")", port, test);
+
+               ctx = ibv_open_device(ibv_dev);
+               if (ctx == NULL)
+                       goto port_error;
+
+               /* Check port status. */
+               err = ibv_query_port(ctx, port, &port_attr);
+               if (err) {
+                       ERROR("port query failed: %s", strerror(err));
+                       goto port_error;
+               }
+               if (port_attr.state != IBV_PORT_ACTIVE)
+                       DEBUG("port %d is not active: \"%s\" (%d)",
+                             port, ibv_port_state_str(port_attr.state),
+                             port_attr.state);
+
+               /* Allocate protection domain. */
+               pd = ibv_alloc_pd(ctx);
+               if (pd == NULL) {
+                       ERROR("PD allocation failure");
+                       err = ENOMEM;
+                       goto port_error;
+               }
+
+               mlx5_dev[idx].ports |= test;
+
+               /* from rte_ethdev.c */
+               priv = rte_zmalloc("ethdev private structure",
+                                  sizeof(*priv),
+                                  RTE_CACHE_LINE_SIZE);
+               if (priv == NULL) {
+                       ERROR("priv allocation failure");
+                       err = ENOMEM;
+                       goto port_error;
+               }
+
+               priv->ctx = ctx;
+               priv->device_attr = device_attr;
+               priv->port = port;
+               priv->pd = pd;
+               priv->mtu = ETHER_MTU;
+#ifdef HAVE_EXP_QUERY_DEVICE
+               if (ibv_exp_query_device(ctx, &exp_device_attr)) {
+                       ERROR("ibv_exp_query_device() failed");
+                       goto port_error;
+               }
+#ifdef RSS_SUPPORT
+               if ((exp_device_attr.exp_device_cap_flags &
+                    IBV_EXP_DEVICE_QPG) &&
+                   (exp_device_attr.exp_device_cap_flags &
+                    IBV_EXP_DEVICE_UD_RSS) &&
+                   (exp_device_attr.comp_mask &
+                    IBV_EXP_DEVICE_ATTR_RSS_TBL_SZ) &&
+                   (exp_device_attr.max_rss_tbl_sz > 0)) {
+                       priv->hw_qpg = 1;
+                       priv->hw_rss = 1;
+                       priv->max_rss_tbl_sz = exp_device_attr.max_rss_tbl_sz;
+               } else {
+                       priv->hw_qpg = 0;
+                       priv->hw_rss = 0;
+                       priv->max_rss_tbl_sz = 0;
+               }
+               priv->hw_tss = !!(exp_device_attr.exp_device_cap_flags &
+                                 IBV_EXP_DEVICE_UD_TSS);
+               DEBUG("device flags: %s%s%s",
+                     (priv->hw_qpg ? "IBV_DEVICE_QPG " : ""),
+                     (priv->hw_tss ? "IBV_DEVICE_TSS " : ""),
+                     (priv->hw_rss ? "IBV_DEVICE_RSS " : ""));
+               if (priv->hw_rss)
+                       DEBUG("maximum RSS indirection table size: %u",
+                             exp_device_attr.max_rss_tbl_sz);
+#endif /* RSS_SUPPORT */
+
+               priv->hw_csum =
+                       ((exp_device_attr.exp_device_cap_flags &
+                         IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT) &&
+                        (exp_device_attr.exp_device_cap_flags &
+                         IBV_EXP_DEVICE_RX_CSUM_IP_PKT));
+               DEBUG("checksum offloading is %ssupported",
+                     (priv->hw_csum ? "" : "not "));
+
+               priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags &
+                                        IBV_EXP_DEVICE_VXLAN_SUPPORT);
+               DEBUG("L2 tunnel checksum offloads are %ssupported",
+                     (priv->hw_csum_l2tun ? "" : "not "));
+
+#endif /* HAVE_EXP_QUERY_DEVICE */
+
+               priv->vf = vf;
+               /* Configure the first MAC address by default. */
+               if (priv_get_mac(priv, &mac.addr_bytes)) {
+                       ERROR("cannot get MAC address, is mlx5_en loaded?"
+                             " (errno: %s)", strerror(errno));
+                       goto port_error;
+               }
+               INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
+                    priv->port,
+                    mac.addr_bytes[0], mac.addr_bytes[1],
+                    mac.addr_bytes[2], mac.addr_bytes[3],
+                    mac.addr_bytes[4], mac.addr_bytes[5]);
+               /* Register MAC and broadcast addresses. */
+               claim_zero(priv_mac_addr_add(priv, 0,
+                                            (const uint8_t (*)[ETHER_ADDR_LEN])
+                                            mac.addr_bytes));
+               claim_zero(priv_mac_addr_add(priv, 1,
+                                            &(const uint8_t [ETHER_ADDR_LEN])
+                                            { "\xff\xff\xff\xff\xff\xff" }));
+#ifndef NDEBUG
+               {
+                       char ifname[IF_NAMESIZE];
+
+                       if (priv_get_ifname(priv, &ifname) == 0)
+                               DEBUG("port %u ifname is \"%s\"",
+                                     priv->port, ifname);
+                       else
+                               DEBUG("port %u ifname is unknown", priv->port);
+               }
+#endif
+               /* Get actual MTU if possible. */
+               priv_get_mtu(priv, &priv->mtu);
+               DEBUG("port %u MTU is %u", priv->port, priv->mtu);
+
+               /* from rte_ethdev.c */
+               {
+                       char name[RTE_ETH_NAME_MAX_LEN];
+
+                       snprintf(name, sizeof(name), "%s port %u",
+                                ibv_get_device_name(ibv_dev), port);
+                       eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_PCI);
+               }
+               if (eth_dev == NULL) {
+                       ERROR("can not allocate rte ethdev");
+                       err = ENOMEM;
+                       goto port_error;
+               }
+
+               eth_dev->data->dev_private = priv;
+               eth_dev->pci_dev = pci_dev;
+               eth_dev->driver = &mlx5_driver;
+               eth_dev->data->rx_mbuf_alloc_failed = 0;
+               eth_dev->data->mtu = ETHER_MTU;
+
+               priv->dev = eth_dev;
+               eth_dev->dev_ops = &mlx5_dev_ops;
+               eth_dev->data->mac_addrs = priv->mac;
+
+               /* Bring Ethernet device up. */
+               DEBUG("forcing Ethernet interface up");
+               priv_set_flags(priv, ~IFF_UP, IFF_UP);
+               continue;
+
+port_error:
+               rte_free(priv);
+               if (pd)
+                       claim_zero(ibv_dealloc_pd(pd));
+               if (ctx)
+                       claim_zero(ibv_close_device(ctx));
+               break;
+       }
+
+       /*
+        * XXX if something went wrong in the loop above, there is a resource
+        * leak (ctx, pd, priv, dpdk ethdev) but we can do nothing about it as
+        * long as the dpdk does not provide a way to deallocate a ethdev and a
+        * way to enumerate the registered ethdevs to free the previous ones.
+        */
+
+       /* no port found, complain */
+       if (!mlx5_dev[idx].ports) {
+               err = ENODEV;
+               goto error;
+       }
+
+error:
+       if (attr_ctx)
+               claim_zero(ibv_close_device(attr_ctx));
+       if (list)
+               ibv_free_device_list(list);
+       assert(err >= 0);
+       return -err;
+}
+
+static const struct rte_pci_id mlx5_pci_id_map[] = {
+       {
+               .vendor_id = PCI_VENDOR_ID_MELLANOX,
+               .device_id = PCI_DEVICE_ID_MELLANOX_CONNECTX4,
+               .subsystem_vendor_id = PCI_ANY_ID,
+               .subsystem_device_id = PCI_ANY_ID
+       },
+       {
+               .vendor_id = PCI_VENDOR_ID_MELLANOX,
+               .device_id = PCI_DEVICE_ID_MELLANOX_CONNECTX4VF,
+               .subsystem_vendor_id = PCI_ANY_ID,
+               .subsystem_device_id = PCI_ANY_ID
+       },
+       {
+               .vendor_id = PCI_VENDOR_ID_MELLANOX,
+               .device_id = PCI_DEVICE_ID_MELLANOX_CONNECTX4LX,
+               .subsystem_vendor_id = PCI_ANY_ID,
+               .subsystem_device_id = PCI_ANY_ID
+       },
+       {
+               .vendor_id = PCI_VENDOR_ID_MELLANOX,
+               .device_id = PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF,
+               .subsystem_vendor_id = PCI_ANY_ID,
+               .subsystem_device_id = PCI_ANY_ID
+       },
+       {
+               .vendor_id = 0
+       }
+};
+
+static struct eth_driver mlx5_driver = {
+       .pci_drv = {
+               .name = MLX5_DRIVER_NAME,
+               .id_table = mlx5_pci_id_map,
+               .devinit = mlx5_pci_devinit,
+       },
+       .dev_private_size = sizeof(struct priv)
+};
+
+/**
+ * Driver initialization routine.
+ */
+static int
+rte_mlx5_pmd_init(const char *name, const char *args)
+{
+       (void)name;
+       (void)args;
+       /*
+        * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
+        * huge pages. Calling ibv_fork_init() during init allows
+        * applications to use fork() safely for purposes other than
+        * using this PMD, which is not supported in forked processes.
+        */
+       setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
+       ibv_fork_init();
+       rte_eal_pci_register(&mlx5_driver.pci_drv);
+       return 0;
+}
+
+static struct rte_driver rte_mlx5_driver = {
+       .type = PMD_PDEV,
+       .name = MLX5_DRIVER_NAME,
+       .init = rte_mlx5_pmd_init,
+};
+
+PMD_REGISTER_DRIVER(rte_mlx5_driver)
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
new file mode 100644 (file)
index 0000000..21db3cd
--- /dev/null
@@ -0,0 +1,147 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2015 6WIND S.A.
+ *   Copyright 2015 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_PMD_MLX5_H_
+#define RTE_PMD_MLX5_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <limits.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <linux/if.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-pedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-pedantic"
+#endif
+
+/* DPDK headers don't like -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-pedantic"
+#endif
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_spinlock.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-pedantic"
+#endif
+
+#include "mlx5_utils.h"
+#include "mlx5_autoconf.h"
+#include "mlx5_defs.h"
+
+enum {
+       PCI_VENDOR_ID_MELLANOX = 0x15b3,
+};
+
+enum {
+       PCI_DEVICE_ID_MELLANOX_CONNECTX4 = 0x1013,
+       PCI_DEVICE_ID_MELLANOX_CONNECTX4VF = 0x1014,
+       PCI_DEVICE_ID_MELLANOX_CONNECTX4LX = 0x1015,
+       PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF = 0x1016,
+};
+
+struct priv {
+       struct rte_eth_dev *dev; /* Ethernet device. */
+       struct ibv_context *ctx; /* Verbs context. */
+       struct ibv_device_attr device_attr; /* Device properties. */
+       struct ibv_pd *pd; /* Protection Domain. */
+       /*
+        * MAC addresses array and configuration bit-field.
+        * An extra entry that cannot be modified by the DPDK is reserved
+        * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
+        */
+       struct ether_addr mac[MLX5_MAX_MAC_ADDRESSES];
+       BITFIELD_DECLARE(mac_configured, uint32_t, MLX5_MAX_MAC_ADDRESSES);
+       /* Device properties. */
+       uint16_t mtu; /* Configured MTU. */
+       uint8_t port; /* Physical port number. */
+       unsigned int started:1; /* Device started, flows enabled. */
+       unsigned int hw_qpg:1; /* QP groups are supported. */
+       unsigned int hw_tss:1; /* TSS is supported. */
+       unsigned int hw_rss:1; /* RSS is supported. */
+       unsigned int hw_csum:1; /* Checksum offload is supported. */
+       unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
+       unsigned int rss:1; /* RSS is enabled. */
+       unsigned int vf:1; /* This is a VF device. */
+       unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */
+       rte_spinlock_t lock; /* Lock for control functions. */
+};
+
+/**
+ * Lock private structure to protect it from concurrent access in the
+ * control path.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static inline void
+priv_lock(struct priv *priv)
+{
+       rte_spinlock_lock(&priv->lock);
+}
+
+/**
+ * Unlock private structure.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static inline void
+priv_unlock(struct priv *priv)
+{
+       rte_spinlock_unlock(&priv->lock);
+}
+
+/* mlx5_ethdev.c */
+
+int priv_get_ifname(const struct priv *, char (*)[IF_NAMESIZE]);
+int priv_ifreq(const struct priv *, int req, struct ifreq *);
+int priv_get_mtu(struct priv *, uint16_t *);
+int priv_set_flags(struct priv *, unsigned int, unsigned int);
+int mlx5_ibv_device_to_pci_addr(const struct ibv_device *,
+                               struct rte_pci_addr *);
+
+/* mlx5_mac.c */
+
+int priv_get_mac(struct priv *, uint8_t (*)[ETHER_ADDR_LEN]);
+int priv_mac_addr_add(struct priv *, unsigned int,
+                     const uint8_t (*)[ETHER_ADDR_LEN]);
+
+#endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
new file mode 100644 (file)
index 0000000..c66a74f
--- /dev/null
@@ -0,0 +1,43 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2015 6WIND S.A.
+ *   Copyright 2015 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_PMD_MLX5_DEFS_H_
+#define RTE_PMD_MLX5_DEFS_H_
+
+/* Reported driver name. */
+#define MLX5_DRIVER_NAME "librte_pmd_mlx5"
+
+/* Maximum number of simultaneous MAC addresses. */
+#define MLX5_MAX_MAC_ADDRESSES 128
+
+#endif /* RTE_PMD_MLX5_DEFS_H_ */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
new file mode 100644 (file)
index 0000000..b6c7d7a
--- /dev/null
@@ -0,0 +1,420 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2015 6WIND S.A.
+ *   Copyright 2015 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <dirent.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <linux/if.h>
+
+/* DPDK headers don't like -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-pedantic"
+#endif
+#include <rte_atomic.h>
+#include <rte_ethdev.h>
+#include <rte_mbuf.h>
+#include <rte_common.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-pedantic"
+#endif
+
+#include "mlx5.h"
+#include "mlx5_utils.h"
+
+/**
+ * Get interface name from private structure.
+ *
+ * @param[in] priv
+ *   Pointer to private structure.
+ * @param[out] ifname
+ *   Interface name output buffer.
+ *
+ * @return
+ *   0 on success, -1 on failure and errno is set.
+ */
+int
+priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
+{
+       DIR *dir;
+       struct dirent *dent;
+       unsigned int dev_type = 0;
+       unsigned int dev_port_prev = ~0u;
+       char match[IF_NAMESIZE] = "";
+
+       {
+               MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path);
+
+               dir = opendir(path);
+               if (dir == NULL)
+                       return -1;
+       }
+       while ((dent = readdir(dir)) != NULL) {
+               char *name = dent->d_name;
+               FILE *file;
+               unsigned int dev_port;
+               int r;
+
+               if ((name[0] == '.') &&
+                   ((name[1] == '\0') ||
+                    ((name[1] == '.') && (name[2] == '\0'))))
+                       continue;
+
+               MKSTR(path, "%s/device/net/%s/%s",
+                     priv->ctx->device->ibdev_path, name,
+                     (dev_type ? "dev_id" : "dev_port"));
+
+               file = fopen(path, "rb");
+               if (file == NULL) {
+                       if (errno != ENOENT)
+                               continue;
+                       /*
+                        * Switch to dev_id when dev_port does not exist as
+                        * is the case with Linux kernel versions < 3.15.
+                        */
+try_dev_id:
+                       match[0] = '\0';
+                       if (dev_type)
+                               break;
+                       dev_type = 1;
+                       dev_port_prev = ~0u;
+                       rewinddir(dir);
+                       continue;
+               }
+               r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
+               fclose(file);
+               if (r != 1)
+                       continue;
+               /*
+                * Switch to dev_id when dev_port returns the same value for
+                * all ports. May happen when using a MOFED release older than
+                * 3.0 with a Linux kernel >= 3.15.
+                */
+               if (dev_port == dev_port_prev)
+                       goto try_dev_id;
+               dev_port_prev = dev_port;
+               if (dev_port == (priv->port - 1u))
+                       snprintf(match, sizeof(match), "%s", name);
+       }
+       closedir(dir);
+       if (match[0] == '\0')
+               return -1;
+       strncpy(*ifname, match, sizeof(*ifname));
+       return 0;
+}
+
+/**
+ * Read from sysfs entry.
+ *
+ * @param[in] priv
+ *   Pointer to private structure.
+ * @param[in] entry
+ *   Entry name relative to sysfs path.
+ * @param[out] buf
+ *   Data output buffer.
+ * @param size
+ *   Buffer size.
+ *
+ * @return
+ *   0 on success, -1 on failure and errno is set.
+ */
+static int
+priv_sysfs_read(const struct priv *priv, const char *entry,
+               char *buf, size_t size)
+{
+       char ifname[IF_NAMESIZE];
+       FILE *file;
+       int ret;
+       int err;
+
+       if (priv_get_ifname(priv, &ifname))
+               return -1;
+
+       MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
+             ifname, entry);
+
+       file = fopen(path, "rb");
+       if (file == NULL)
+               return -1;
+       ret = fread(buf, 1, size, file);
+       err = errno;
+       if (((size_t)ret < size) && (ferror(file)))
+               ret = -1;
+       else
+               ret = size;
+       fclose(file);
+       errno = err;
+       return ret;
+}
+
+/**
+ * Write to sysfs entry.
+ *
+ * @param[in] priv
+ *   Pointer to private structure.
+ * @param[in] entry
+ *   Entry name relative to sysfs path.
+ * @param[in] buf
+ *   Data buffer.
+ * @param size
+ *   Buffer size.
+ *
+ * @return
+ *   0 on success, -1 on failure and errno is set.
+ */
+static int
+priv_sysfs_write(const struct priv *priv, const char *entry,
+                char *buf, size_t size)
+{
+       char ifname[IF_NAMESIZE];
+       FILE *file;
+       int ret;
+       int err;
+
+       if (priv_get_ifname(priv, &ifname))
+               return -1;
+
+       MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
+             ifname, entry);
+
+       file = fopen(path, "wb");
+       if (file == NULL)
+               return -1;
+       ret = fwrite(buf, 1, size, file);
+       err = errno;
+       if (((size_t)ret < size) || (ferror(file)))
+               ret = -1;
+       else
+               ret = size;
+       fclose(file);
+       errno = err;
+       return ret;
+}
+
+/**
+ * Get unsigned long sysfs property.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] name
+ *   Entry name relative to sysfs path.
+ * @param[out] value
+ *   Value output buffer.
+ *
+ * @return
+ *   0 on success, -1 on failure and errno is set.
+ */
+static int
+priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value)
+{
+       int ret;
+       unsigned long value_ret;
+       char value_str[32];
+
+       ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
+       if (ret == -1) {
+               DEBUG("cannot read %s value from sysfs: %s",
+                     name, strerror(errno));
+               return -1;
+       }
+       value_str[ret] = '\0';
+       errno = 0;
+       value_ret = strtoul(value_str, NULL, 0);
+       if (errno) {
+               DEBUG("invalid %s value `%s': %s", name, value_str,
+                     strerror(errno));
+               return -1;
+       }
+       *value = value_ret;
+       return 0;
+}
+
+/**
+ * Set unsigned long sysfs property.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] name
+ *   Entry name relative to sysfs path.
+ * @param value
+ *   Value to set.
+ *
+ * @return
+ *   0 on success, -1 on failure and errno is set.
+ */
+static int
+priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value)
+{
+       int ret;
+       MKSTR(value_str, "%lu", value);
+
+       ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
+       if (ret == -1) {
+               DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
+                     name, value_str, value, strerror(errno));
+               return -1;
+       }
+       return 0;
+}
+
+/**
+ * Perform ifreq ioctl() on associated Ethernet device.
+ *
+ * @param[in] priv
+ *   Pointer to private structure.
+ * @param req
+ *   Request number to pass to ioctl().
+ * @param[out] ifr
+ *   Interface request structure output buffer.
+ *
+ * @return
+ *   0 on success, -1 on failure and errno is set.
+ */
+int
+priv_ifreq(const struct priv *priv, int req, struct ifreq *ifr)
+{
+       int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
+       int ret = -1;
+
+       if (sock == -1)
+               return ret;
+       if (priv_get_ifname(priv, &ifr->ifr_name) == 0)
+               ret = ioctl(sock, req, ifr);
+       close(sock);
+       return ret;
+}
+
+/**
+ * Get device MTU.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[out] mtu
+ *   MTU value output buffer.
+ *
+ * @return
+ *   0 on success, -1 on failure and errno is set.
+ */
+int
+priv_get_mtu(struct priv *priv, uint16_t *mtu)
+{
+       unsigned long ulong_mtu;
+
+       if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1)
+               return -1;
+       *mtu = ulong_mtu;
+       return 0;
+}
+
+/**
+ * Set device flags.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param keep
+ *   Bitmask for flags that must remain untouched.
+ * @param flags
+ *   Bitmask for flags to modify.
+ *
+ * @return
+ *   0 on success, -1 on failure and errno is set.
+ */
+int
+priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)
+{
+       unsigned long tmp;
+
+       if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1)
+               return -1;
+       tmp &= keep;
+       tmp |= flags;
+       return priv_set_sysfs_ulong(priv, "flags", tmp);
+}
+
+/**
+ * Get PCI information from struct ibv_device.
+ *
+ * @param device
+ *   Pointer to Ethernet device structure.
+ * @param[out] pci_addr
+ *   PCI bus address output buffer.
+ *
+ * @return
+ *   0 on success, -1 on failure and errno is set.
+ */
+int
+mlx5_ibv_device_to_pci_addr(const struct ibv_device *device,
+                           struct rte_pci_addr *pci_addr)
+{
+       FILE *file;
+       char line[32];
+       MKSTR(path, "%s/device/uevent", device->ibdev_path);
+
+       file = fopen(path, "rb");
+       if (file == NULL)
+               return -1;
+       while (fgets(line, sizeof(line), file) == line) {
+               size_t len = strlen(line);
+               int ret;
+
+               /* Truncate long lines. */
+               if (len == (sizeof(line) - 1))
+                       while (line[(len - 1)] != '\n') {
+                               ret = fgetc(file);
+                               if (ret == EOF)
+                                       break;
+                               line[(len - 1)] = ret;
+                       }
+               /* Extract information. */
+               if (sscanf(line,
+                          "PCI_SLOT_NAME="
+                          "%" SCNx16 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
+                          &pci_addr->domain,
+                          &pci_addr->bus,
+                          &pci_addr->devid,
+                          &pci_addr->function) == 4) {
+                       ret = 0;
+                       break;
+               }
+       }
+       fclose(file);
+       return 0;
+}
diff --git a/drivers/net/mlx5/mlx5_mac.c b/drivers/net/mlx5/mlx5_mac.c
new file mode 100644 (file)
index 0000000..f7e1cf6
--- /dev/null
@@ -0,0 +1,150 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2015 6WIND S.A.
+ *   Copyright 2015 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <linux/if.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-pedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-pedantic"
+#endif
+
+/* DPDK headers don't like -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-pedantic"
+#endif
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_common.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-pedantic"
+#endif
+
+#include "mlx5.h"
+#include "mlx5_utils.h"
+
+/**
+ * Get MAC address by querying netdevice.
+ *
+ * @param[in] priv
+ *   struct priv for the requested device.
+ * @param[out] mac
+ *   MAC address output buffer.
+ *
+ * @return
+ *   0 on success, -1 on failure and errno is set.
+ */
+int
+priv_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN])
+{
+       struct ifreq request;
+
+       if (priv_ifreq(priv, SIOCGIFHWADDR, &request))
+               return -1;
+       memcpy(mac, request.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
+       return 0;
+}
+
+/**
+ * Unregister a MAC address.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param mac_index
+ *   MAC address index.
+ */
+static void
+priv_mac_addr_del(struct priv *priv, unsigned int mac_index)
+{
+       assert(mac_index < RTE_DIM(priv->mac));
+       if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
+               return;
+       BITFIELD_RESET(priv->mac_configured, mac_index);
+}
+
+/**
+ * Register a MAC address.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param mac_index
+ *   MAC address index to use.
+ * @param mac
+ *   MAC address to register.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+priv_mac_addr_add(struct priv *priv, unsigned int mac_index,
+                 const uint8_t (*mac)[ETHER_ADDR_LEN])
+{
+       unsigned int i;
+
+       assert(mac_index < RTE_DIM(priv->mac));
+       /* First, make sure this address isn't already configured. */
+       for (i = 0; (i != RTE_DIM(priv->mac)); ++i) {
+               /* Skip this index, it's going to be reconfigured. */
+               if (i == mac_index)
+                       continue;
+               if (!BITFIELD_ISSET(priv->mac_configured, i))
+                       continue;
+               if (memcmp(priv->mac[i].addr_bytes, *mac, sizeof(*mac)))
+                       continue;
+               /* Address already configured elsewhere, return with error. */
+               return EADDRINUSE;
+       }
+       if (BITFIELD_ISSET(priv->mac_configured, mac_index))
+               priv_mac_addr_del(priv, mac_index);
+       priv->mac[mac_index] = (struct ether_addr){
+               {
+                       (*mac)[0], (*mac)[1], (*mac)[2],
+                       (*mac)[3], (*mac)[4], (*mac)[5]
+               }
+       };
+       BITFIELD_SET(priv->mac_configured, mac_index);
+       return 0;
+}
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
new file mode 100644 (file)
index 0000000..cc6aab6
--- /dev/null
@@ -0,0 +1,149 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2015 6WIND S.A.
+ *   Copyright 2015 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_PMD_MLX5_UTILS_H_
+#define RTE_PMD_MLX5_UTILS_H_
+
+#include <stddef.h>
+#include <stdio.h>
+#include <limits.h>
+#include <assert.h>
+#include <errno.h>
+
+#include "mlx5_defs.h"
+
+/* Bit-field manipulation. */
+#define BITFIELD_DECLARE(bf, type, size) \
+       type bf[(((size_t)(size) / (sizeof(type) * CHAR_BIT)) + \
+                !!((size_t)(size) % (sizeof(type) * CHAR_BIT)))]
+#define BITFIELD_DEFINE(bf, type, size) \
+       BITFIELD_DECLARE((bf), type, (size)) = { 0 }
+#define BITFIELD_SET(bf, b) \
+       (assert((size_t)(b) < (sizeof(bf) * CHAR_BIT)), \
+        (void)((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] |= \
+               ((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT)))))
+#define BITFIELD_RESET(bf, b) \
+       (assert((size_t)(b) < (sizeof(bf) * CHAR_BIT)), \
+        (void)((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] &= \
+               ~((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT)))))
+#define BITFIELD_ISSET(bf, b) \
+       (assert((size_t)(b) < (sizeof(bf) * CHAR_BIT)), \
+        !!(((bf)[((b) / (sizeof((bf)[0]) * CHAR_BIT))] & \
+            ((size_t)1 << ((b) % (sizeof((bf)[0]) * CHAR_BIT))))))
+
+/* Save and restore errno around argument evaluation. */
+#define ERRNO_SAFE(x) ((errno = (int []){ errno, ((x), 0) }[0]))
+
+/*
+ * Helper macros to work around __VA_ARGS__ limitations in a C99 compliant
+ * manner.
+ */
+#define PMD_DRV_LOG_STRIP(a, b) a
+#define PMD_DRV_LOG_OPAREN (
+#define PMD_DRV_LOG_CPAREN )
+#define PMD_DRV_LOG_COMMA ,
+
+/* Return the file name part of a path. */
+static inline const char *
+pmd_drv_log_basename(const char *s)
+{
+       const char *n = s;
+
+       while (*n)
+               if (*(n++) == '/')
+                       s = n;
+       return s;
+}
+
+/*
+ * When debugging is enabled (NDEBUG not defined), file, line and function
+ * information replace the driver name (MLX5_DRIVER_NAME) in log messages.
+ */
+#ifndef NDEBUG
+
+#define PMD_DRV_LOG___(level, ...) \
+       ERRNO_SAFE(RTE_LOG(level, PMD, __VA_ARGS__))
+#define PMD_DRV_LOG__(level, ...) \
+       PMD_DRV_LOG___(level, "%s:%u: %s(): " __VA_ARGS__)
+#define PMD_DRV_LOG_(level, s, ...) \
+       PMD_DRV_LOG__(level, \
+               s "\n" PMD_DRV_LOG_COMMA \
+               pmd_drv_log_basename(__FILE__) PMD_DRV_LOG_COMMA \
+               __LINE__ PMD_DRV_LOG_COMMA \
+               __func__, \
+               __VA_ARGS__)
+
+#else /* NDEBUG */
+
+#define PMD_DRV_LOG___(level, ...) \
+       ERRNO_SAFE(RTE_LOG(level, PMD, MLX5_DRIVER_NAME ": " __VA_ARGS__))
+#define PMD_DRV_LOG__(level, ...) \
+       PMD_DRV_LOG___(level, __VA_ARGS__)
+#define PMD_DRV_LOG_(level, s, ...) \
+       PMD_DRV_LOG__(level, s "\n", __VA_ARGS__)
+
+#endif /* NDEBUG */
+
+/* Generic printf()-like logging macro with automatic line feed. */
+#define PMD_DRV_LOG(level, ...) \
+       PMD_DRV_LOG_(level, \
+               __VA_ARGS__ PMD_DRV_LOG_STRIP PMD_DRV_LOG_OPAREN, \
+               PMD_DRV_LOG_CPAREN)
+
+/*
+ * Like assert(), DEBUG() becomes a no-op and claim_zero() does not perform
+ * any check when debugging is disabled.
+ */
+#ifndef NDEBUG
+
+#define DEBUG(...) PMD_DRV_LOG(DEBUG, __VA_ARGS__)
+#define claim_zero(...) assert((__VA_ARGS__) == 0)
+
+#else /* NDEBUG */
+
+#define DEBUG(...) (void)0
+#define claim_zero(...) (__VA_ARGS__)
+
+#endif /* NDEBUG */
+
+#define INFO(...) PMD_DRV_LOG(INFO, __VA_ARGS__)
+#define WARN(...) PMD_DRV_LOG(WARNING, __VA_ARGS__)
+#define ERROR(...) PMD_DRV_LOG(ERR, __VA_ARGS__)
+
+/* Allocate a buffer on the stack and fill it with a printf format string. */
+#define MKSTR(name, ...) \
+       char name[snprintf(NULL, 0, __VA_ARGS__) + 1]; \
+       \
+       snprintf(name, sizeof(name), __VA_ARGS__)
+
+#endif /* RTE_PMD_MLX5_UTILS_H_ */
diff --git a/drivers/net/mlx5/rte_pmd_mlx5_version.map b/drivers/net/mlx5/rte_pmd_mlx5_version.map
new file mode 100644 (file)
index 0000000..ad607bb
--- /dev/null
@@ -0,0 +1,3 @@
+DPDK_2.2 {
+       local: *;
+};
index 9e1909e..724efa7 100644 (file)
@@ -104,6 +104,10 @@ ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -libverbs
 endif # ! CONFIG_RTE_BUILD_SHARED_LIBS
 
+ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n)
+_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -libverbs
+endif # ! CONFIG_RTE_BUILD_SHARED_LIBS
+
 _LDLIBS-$(CONFIG_RTE_LIBRTE_BNX2X_PMD)      += -lz
 
 _LDLIBS-y += --start-group
@@ -137,6 +141,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD)      += -lrte_pmd_fm10k
 _LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD)      += -lrte_pmd_ixgbe
 _LDLIBS-$(CONFIG_RTE_LIBRTE_E1000_PMD)      += -lrte_pmd_e1000
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4
+_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD)      += -lrte_pmd_mpipe -lgxio
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_RING)       += -lrte_pmd_ring
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP)       += -lrte_pmd_pcap