* mlx4 driver initialization.
*/
-#include <assert.h>
-#include <dlfcn.h>
#include <errno.h>
#include <inttypes.h>
#include <stddef.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
+#ifdef RTE_IBVERBS_LINK_DLOPEN
+#include <dlfcn.h>
+#endif
/* Verbs headers do not support -pedantic. */
#ifdef PEDANTIC
#endif
#include <rte_common.h>
-#include <rte_config.h>
#include <rte_dev.h>
#include <rte_errno.h>
-#include <rte_ethdev_driver.h>
-#include <rte_ethdev_pci.h>
+#include <ethdev_driver.h>
+#include <ethdev_pci.h>
#include <rte_ether.h>
#include <rte_flow.h>
#include <rte_interrupts.h>
#include "mlx4_rxtx.h"
#include "mlx4_utils.h"
+#ifdef MLX4_GLUE
+const struct mlx4_glue *mlx4_glue;
+#endif
+
static const char *MZ_MLX4_PMD_SHARED_DATA = "mlx4_pmd_shared_data";
/* Shared memory between primary and secondary processes. */
/* Process local data for secondary processes. */
static struct mlx4_local_data mlx4_local_data;
-/** Driver-specific log messages type. */
-int mlx4_logtype;
-
/** Configuration structure for device arguments. */
struct mlx4_conf {
struct {
NULL,
};
-static void mlx4_dev_stop(struct rte_eth_dev *dev);
+static int mlx4_dev_stop(struct rte_eth_dev *dev);
/**
* Initialize shared data between primary and secondary process.
socket = rxq->socket;
}
- assert(data != NULL);
+ MLX4_ASSERT(data != NULL);
ret = rte_malloc_socket(__func__, size, alignment, socket);
if (!ret && size)
rte_errno = ENOMEM;
static void
mlx4_free_verbs_buf(void *ptr, void *data __rte_unused)
{
- assert(data != NULL);
+ MLX4_ASSERT(data != NULL);
rte_free(ptr);
}
#endif
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-static int
+int
mlx4_proc_priv_init(struct rte_eth_dev *dev)
{
struct mlx4_proc_priv *ppriv;
size_t ppriv_size;
+ mlx4_proc_priv_uninit(dev);
/*
* UAR register table follows the process private structure. BlueFlame
* registers for Tx queues are stored in the table.
*/
ppriv_size = sizeof(struct mlx4_proc_priv) +
dev->data->nb_tx_queues * sizeof(void *);
- ppriv = rte_malloc_socket("mlx4_proc_priv", ppriv_size,
- RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+ ppriv = rte_zmalloc_socket("mlx4_proc_priv", ppriv_size,
+ RTE_CACHE_LINE_SIZE, dev->device->numa_node);
if (!ppriv) {
rte_errno = ENOMEM;
return -rte_errno;
}
- ppriv->uar_table_sz = ppriv_size;
+ ppriv->uar_table_sz = dev->data->nb_tx_queues;
dev->process_private = ppriv;
return 0;
}
* @param dev
* Pointer to Ethernet device structure.
*/
-static void
+void
mlx4_proc_priv_uninit(struct rte_eth_dev *dev)
{
if (!dev->process_private)
(void *)dev, strerror(-ret));
goto err;
}
-#ifndef NDEBUG
+#ifdef RTE_LIBRTE_MLX4_DEBUG
mlx4_mr_dump_dev(dev);
#endif
ret = mlx4_rxq_intr_enable(priv);
* @param dev
* Pointer to Ethernet device structure.
*/
-static void
+static int
mlx4_dev_stop(struct rte_eth_dev *dev)
{
struct mlx4_priv *priv = dev->data->dev_private;
if (!priv->started)
- return;
+ return 0;
DEBUG("%p: detaching flows from all RX queues", (void *)dev);
priv->started = 0;
dev->tx_pkt_burst = mlx4_tx_burst_removed;
mlx4_flow_sync(priv, NULL);
mlx4_rxq_intr_disable(priv);
mlx4_rss_deinit(priv);
+
+ return 0;
}
/**
* @param dev
* Pointer to Ethernet device structure.
*/
-static void
+static int
mlx4_dev_close(struct rte_eth_dev *dev)
{
struct mlx4_priv *priv = dev->data->dev_private;
unsigned int i;
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ rte_eth_dev_release_port(dev);
+ return 0;
+ }
DEBUG("%p: closing device \"%s\"",
(void *)dev,
((priv->ctx != NULL) ? priv->ctx->device->name : ""));
mlx4_proc_priv_uninit(dev);
mlx4_mr_release(dev);
if (priv->pd != NULL) {
- assert(priv->ctx != NULL);
+ MLX4_ASSERT(priv->ctx != NULL);
claim_zero(mlx4_glue->dealloc_pd(priv->pd));
claim_zero(mlx4_glue->close_device(priv->ctx));
} else
- assert(priv->ctx == NULL);
+ MLX4_ASSERT(priv->ctx == NULL);
mlx4_intr_uninstall(priv);
memset(priv, 0, sizeof(*priv));
+ /* mac_addrs must not be freed because part of dev_private */
+ dev->data->mac_addrs = NULL;
+ return 0;
}
static const struct eth_dev_ops mlx4_dev_ops = {
.flow_ctrl_get = mlx4_flow_ctrl_get,
.flow_ctrl_set = mlx4_flow_ctrl_set,
.mtu_set = mlx4_mtu_set,
- .filter_ctrl = mlx4_filter_ctrl,
+ .flow_ops_get = mlx4_flow_ops_get,
.rx_queue_intr_enable = mlx4_rx_intr_enable,
.rx_queue_intr_disable = mlx4_rx_intr_disable,
.is_removed = mlx4_is_removed,
&pci_addr->bus,
&pci_addr->devid,
&pci_addr->function) == 4) {
- ret = 0;
break;
}
}
if (mlx4_init_shared_data())
return -rte_errno;
sd = mlx4_shared_data;
- assert(sd);
+ MLX4_ASSERT(sd);
rte_spinlock_lock(&sd->lock);
switch (rte_eal_process_type()) {
case RTE_PROC_PRIMARY:
struct ibv_context *attr_ctx = NULL;
struct ibv_device_attr device_attr;
struct ibv_device_attr_ex device_attr_ex;
+ struct rte_eth_dev *prev_dev = NULL;
struct mlx4_conf conf = {
.ports.present = 0,
.mr_ext_memseg_en = 1,
};
unsigned int vf;
int i;
+ char ifname[IF_NAMESIZE];
(void)pci_drv;
err = mlx4_init_once();
strerror(rte_errno));
return -rte_errno;
}
- assert(pci_drv == &mlx4_driver);
+ MLX4_ASSERT(pci_drv == &mlx4_driver);
list = mlx4_glue->get_device_list(&i);
if (list == NULL) {
rte_errno = errno;
- assert(rte_errno);
+ MLX4_ASSERT(rte_errno);
if (rte_errno == ENOSYS)
ERROR("cannot list devices, is ib_uverbs loaded?");
return -rte_errno;
}
- assert(i >= 0);
+ MLX4_ASSERT(i >= 0);
/*
* For each listed device, check related sysfs entry against
* the provided PCI ID.
ERROR("cannot use device, are drivers up to date?");
return -rte_errno;
}
- assert(err > 0);
+ MLX4_ASSERT(err > 0);
rte_errno = err;
return -rte_errno;
}
err = ENODEV;
goto error;
}
- assert(device_attr.max_sge >= MLX4_MAX_SGE);
+ MLX4_ASSERT(device_attr.max_sge >= MLX4_MAX_SGE);
for (i = 0; i < device_attr.phys_port_cnt; i++) {
uint32_t port = i + 1; /* ports are indexed from one */
struct ibv_context *ctx = NULL;
ERROR("can not attach rte ethdev");
rte_errno = ENOMEM;
err = rte_errno;
- goto error;
+ goto err_secondary;
}
priv = eth_dev->data->dev_private;
if (!priv->verbs_alloc_ctx.enabled) {
" from Verbs");
rte_errno = ENOTSUP;
err = rte_errno;
- goto error;
+ goto err_secondary;
}
eth_dev->device = &pci_dev->device;
eth_dev->dev_ops = &mlx4_dev_sec_ops;
err = mlx4_proc_priv_init(eth_dev);
if (err)
- goto error;
+ goto err_secondary;
/* Receive command fd from primary process. */
err = mlx4_mp_req_verbs_cmd_fd(eth_dev);
if (err < 0) {
err = rte_errno;
- goto error;
+ goto err_secondary;
}
/* Remap UAR for Tx queues. */
err = mlx4_tx_uar_init_secondary(eth_dev, err);
if (err) {
err = rte_errno;
- goto error;
+ goto err_secondary;
}
/*
* Ethdev pointer is still required as input since
claim_zero(mlx4_glue->close_device(ctx));
rte_eth_copy_pci_info(eth_dev, pci_dev);
rte_eth_dev_probing_finish(eth_dev);
+ prev_dev = eth_dev;
continue;
+err_secondary:
+ claim_zero(mlx4_glue->close_device(ctx));
+ rte_eth_dev_release_port(eth_dev);
+ if (prev_dev)
+ rte_eth_dev_release_port(prev_dev);
+ break;
}
/* Check port status. */
err = mlx4_glue->query_port(ctx, port, &port_attr);
mac.addr_bytes[4], mac.addr_bytes[5]);
/* Register MAC address. */
priv->mac[0] = mac;
-#ifndef NDEBUG
- {
- char ifname[IF_NAMESIZE];
-
- if (mlx4_get_ifname(priv, &ifname) == 0)
- DEBUG("port %u ifname is \"%s\"",
- priv->port, ifname);
- else
- DEBUG("port %u ifname is unknown", priv->port);
+
+ if (mlx4_get_ifname(priv, &ifname) == 0) {
+ DEBUG("port %u ifname is \"%s\"",
+ priv->port, ifname);
+ priv->if_index = if_nametoindex(ifname);
+ } else {
+ DEBUG("port %u ifname is unknown", priv->port);
}
-#endif
+
/* Get actual MTU if possible. */
mlx4_mtu_get(priv, &priv->mtu);
DEBUG("port %u MTU is %u", priv->port, priv->mtu);
eth_dev->data->mac_addrs = priv->mac;
eth_dev->device = &pci_dev->device;
rte_eth_copy_pci_info(eth_dev, pci_dev);
+ eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
/* Initialize local interrupt handle for current port. */
- priv->intr_handle = (struct rte_intr_handle){
- .fd = -1,
- .type = RTE_INTR_HANDLE_EXT,
- };
+ memset(&priv->intr_handle, 0, sizeof(struct rte_intr_handle));
+ priv->intr_handle.fd = -1;
+ priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
/*
* Override ethdev interrupt handle pointer with private
* handle instead of that of the parent PCI device used by
eth_dev->dev_ops = &mlx4_dev_ops;
#ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS
/* Hint libmlx4 to use PMD allocator for data plane resources */
- struct mlx4dv_ctx_allocators alctr = {
- .alloc = &mlx4_alloc_verbs_buf,
- .free = &mlx4_free_verbs_buf,
- .data = priv,
- };
err = mlx4_glue->dv_set_context_attr
(ctx, MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS,
- (void *)((uintptr_t)&alctr));
+ (void *)((uintptr_t)&(struct mlx4dv_ctx_allocators){
+ .alloc = &mlx4_alloc_verbs_buf,
+ .free = &mlx4_free_verbs_buf,
+ .data = priv,
+ }));
if (err)
WARN("Verbs external allocator is not supported");
else
priv, mem_event_cb);
rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock);
rte_eth_dev_probing_finish(eth_dev);
+ prev_dev = eth_dev;
continue;
port_error:
rte_free(priv);
eth_dev->data->mac_addrs = NULL;
rte_eth_dev_release_port(eth_dev);
}
+ if (prev_dev)
+ mlx4_dev_close(prev_dev);
break;
}
- /*
- * XXX if something went wrong in the loop above, there is a resource
- * leak (ctx, pd, priv, dpdk ethdev) but we can do nothing about it as
- * long as the dpdk does not provide a way to deallocate a ethdev and a
- * way to enumerate the registered ethdevs to free the previous ones.
- */
error:
if (attr_ctx)
claim_zero(mlx4_glue->close_device(attr_ctx));
return -err;
}
+/**
+ * DPDK callback to remove a PCI device.
+ *
+ * This function removes all Ethernet devices belong to a given PCI device.
+ *
+ * @param[in] pci_dev
+ * Pointer to the PCI device.
+ *
+ * @return
+ * 0 on success, the function cannot fail.
+ */
+static int
+mlx4_pci_remove(struct rte_pci_device *pci_dev)
+{
+ uint16_t port_id;
+ int ret = 0;
+
+ RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) {
+ /*
+ * mlx4_dev_close() is not registered to secondary process,
+ * call the close function explicitly for secondary process.
+ */
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+ ret |= mlx4_dev_close(&rte_eth_devices[port_id]);
+ else
+ ret |= rte_eth_dev_close(port_id);
+ }
+ return ret == 0 ? 0 : -EIO;
+}
+
static const struct rte_pci_id mlx4_pci_id_map[] = {
{
RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
},
.id_table = mlx4_pci_id_map,
.probe = mlx4_pci_probe,
- .drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV |
- RTE_PCI_DRV_IOVA_AS_VA,
+ .remove = mlx4_pci_remove,
+ .drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV,
};
#ifdef RTE_IBVERBS_LINK_DLOPEN
#endif
+/* Initialize driver log type. */
+RTE_LOG_REGISTER_DEFAULT(mlx4_logtype, NOTICE)
+
/**
* Driver initialization routine.
*/
RTE_INIT(rte_mlx4_pmd_init)
{
- /* Initialize driver log type. */
- mlx4_logtype = rte_log_register("pmd.net.mlx4");
- if (mlx4_logtype >= 0)
- rte_log_set_level(mlx4_logtype, RTE_LOG_NOTICE);
-
/*
* MLX4_DEVICE_FATAL_CLEANUP tells ibv_destroy functions we
* want to get success errno value in case of calling them
#ifdef RTE_IBVERBS_LINK_DLOPEN
if (mlx4_glue_init())
return;
- assert(mlx4_glue);
+ MLX4_ASSERT(mlx4_glue);
#endif
-#ifndef NDEBUG
+#ifdef RTE_LIBRTE_MLX4_DEBUG
/* Glue structure must not contain any NULL pointers. */
{
unsigned int i;
for (i = 0; i != sizeof(*mlx4_glue) / sizeof(void *); ++i)
- assert(((const void *const *)mlx4_glue)[i]);
+ MLX4_ASSERT(((const void *const *)mlx4_glue)[i]);
}
#endif
if (strcmp(mlx4_glue->version, MLX4_GLUE_VERSION)) {