* there is no interrupt subhandler installed for
* the given port index i.
*/
- for (i = 0; i < sh->max_port; i++)
+ for (i = 0; i < sh->max_port; i++) {
sh->port[i].ih_port_id = RTE_MAX_ETHPORTS;
+ sh->port[i].devx_ih_port_id = RTE_MAX_ETHPORTS;
+ }
sh->pd = mlx5_glue->alloc_pd(sh->ctx);
if (sh->pd == NULL) {
DRV_LOG(ERR, "PD allocation failure");
if (sh->intr_cnt)
mlx5_intr_callback_unregister
(&sh->intr_handle, mlx5_dev_interrupt_handler, sh);
+#ifdef HAVE_MLX5_DEVX_ASYNC_SUPPORT
+ if (sh->devx_intr_cnt) {
+ if (sh->intr_handle_devx.fd)
+ rte_intr_callback_unregister(&sh->intr_handle_devx,
+ mlx5_dev_interrupt_handler_devx, sh);
+ if (sh->devx_comp)
+ mlx5dv_devx_destroy_cmd_comp(sh->devx_comp);
+ }
+#endif
pthread_mutex_destroy(&sh->intr_mutex);
if (sh->pd)
claim_zero(mlx5_glue->dealloc_pd(sh->pd));
((priv->sh->ctx != NULL) ? priv->sh->ctx->device->name : ""));
/* In case mlx5_dev_stop() has not been called. */
mlx5_dev_interrupt_handler_uninstall(dev);
+ mlx5_dev_interrupt_handler_devx_uninstall(dev);
mlx5_traffic_disable(dev);
mlx5_flow_flush(dev, NULL);
/* Prevent crashes when queues are still in use. */
unsigned int c = 0;
uint16_t port_id;
- RTE_ETH_FOREACH_DEV_OF(port_id, dev->device) {
+ MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
struct mlx5_priv *opriv =
rte_eth_devices[port_id].data->dev_private;
&rte_eth_devices[port_id] == dev)
continue;
++c;
+ break;
}
if (!c)
claim_zero(rte_eth_switch_domain_free(priv->domain_id));
return ret;
}
+/**
+ * Check sibling device configurations.
+ *
+ * Sibling devices sharing the Infiniband device context
+ * should have compatible configurations. This regards
+ * representors and bonding slaves.
+ *
+ * @param priv
+ * Private device descriptor.
+ * @param config
+ * Configuration of the device is going to be created.
+ *
+ * @return
+ * 0 on success, EINVAL otherwise
+ */
+static int
+mlx5_dev_check_sibling_config(struct mlx5_priv *priv,
+ struct mlx5_dev_config *config)
+{
+ struct mlx5_ibv_shared *sh = priv->sh;
+ struct mlx5_dev_config *sh_conf = NULL;
+ uint16_t port_id;
+
+ assert(sh);
+ /* Nothing to compare for the single/first device. */
+ if (sh->refcnt == 1)
+ return 0;
+ /* Find the device with shared context. */
+ MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
+ struct mlx5_priv *opriv =
+ rte_eth_devices[port_id].data->dev_private;
+
+ if (opriv && opriv != priv && opriv->sh == sh) {
+ sh_conf = &opriv->config;
+ break;
+ }
+ }
+ if (!sh_conf)
+ return 0;
+ if (sh_conf->dv_flow_en ^ config->dv_flow_en) {
+ DRV_LOG(ERR, "\"dv_flow_en\" configuration mismatch"
+ " for shared %s context", sh->ibdev_name);
+ rte_errno = EINVAL;
+ return rte_errno;
+ }
+ return 0;
+}
/**
* Spawn an Ethernet device from Verbs information.
*
int own_domain_id = 0;
uint16_t port_id;
unsigned int i;
+#ifdef HAVE_MLX5DV_DR_DEVX_PORT
+ struct mlx5dv_devx_port devx_port;
+#endif
/* Determine if this port representor is supposed to be spawned. */
if (switch_info->representor && dpdk_dev->devargs) {
priv->representor = !!switch_info->representor;
priv->master = !!switch_info->master;
priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
+ priv->vport_meta_tag = 0;
+ priv->vport_meta_mask = 0;
+ priv->pf_bond = spawn->pf_bond;
+#ifdef HAVE_MLX5DV_DR_DEVX_PORT
+ /*
+ * The DevX port query API is implemented. E-Switch may use
+ * either vport or reg_c[0] metadata register to match on
+ * vport index. The engaged part of metadata register is
+ * defined by mask.
+ */
+ devx_port.comp_mask = MLX5DV_DEVX_PORT_VPORT |
+ MLX5DV_DEVX_PORT_MATCH_REG_C_0;
+ err = mlx5_glue->devx_port_query(sh->ctx, spawn->ibv_port, &devx_port);
+ if (err) {
+ DRV_LOG(WARNING, "can't query devx port %d on device %s\n",
+ spawn->ibv_port, spawn->ibv_dev->name);
+ devx_port.comp_mask = 0;
+ }
+ if (devx_port.comp_mask & MLX5DV_DEVX_PORT_MATCH_REG_C_0) {
+ priv->vport_meta_tag = devx_port.reg_c_0.value;
+ priv->vport_meta_mask = devx_port.reg_c_0.mask;
+ if (!priv->vport_meta_mask) {
+ DRV_LOG(ERR, "vport zero mask for port %d"
+ " on bonding device %s\n",
+ spawn->ibv_port, spawn->ibv_dev->name);
+ err = ENOTSUP;
+ goto error;
+ }
+ if (priv->vport_meta_tag & ~priv->vport_meta_mask) {
+ DRV_LOG(ERR, "invalid vport tag for port %d"
+ " on bonding device %s\n",
+ spawn->ibv_port, spawn->ibv_dev->name);
+ err = ENOTSUP;
+ goto error;
+ }
+ } else if (devx_port.comp_mask & MLX5DV_DEVX_PORT_VPORT) {
+ priv->vport_id = devx_port.vport_num;
+ } else if (spawn->pf_bond >= 0) {
+ DRV_LOG(ERR, "can't deduce vport index for port %d"
+ " on bonding device %s\n",
+ spawn->ibv_port, spawn->ibv_dev->name);
+ err = ENOTSUP;
+ goto error;
+ } else {
+ /* Suppose vport index in compatible way. */
+ priv->vport_id = switch_info->representor ?
+ switch_info->port_name + 1 : -1;
+ }
+#else
/*
- * Currently we support single E-Switch per PF configurations
+ * Kernel/rdma_core support single E-Switch per PF configurations
* only and vport_id field contains the vport index for
* associated VF, which is deduced from representor port name.
* For example, let's have the IB device port 10, it has
*/
priv->vport_id = switch_info->representor ?
switch_info->port_name + 1 : -1;
- /* representor_id field keeps the unmodified port/VF index. */
+#endif
+ /* representor_id field keeps the unmodified VF index. */
priv->representor_id = switch_info->representor ?
switch_info->port_name : -1;
/*
* Look for sibling devices in order to reuse their switch domain
* if any, otherwise allocate one.
*/
- RTE_ETH_FOREACH_DEV_OF(port_id, dpdk_dev) {
+ MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
const struct mlx5_priv *opriv =
rte_eth_devices[port_id].data->dev_private;
if (!opriv ||
+ opriv->sh != priv->sh ||
opriv->domain_id ==
RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)
continue;
strerror(rte_errno));
goto error;
}
+ err = mlx5_dev_check_sibling_config(priv, &config);
+ if (err)
+ goto error;
config.hw_csum = !!(sh->device_attr.device_cap_flags_ex &
IBV_DEVICE_RAW_IP_CSUM);
DRV_LOG(DEBUG, "checksum offloading is %ssupported",
if (priv->counter_fallback)
DRV_LOG(INFO, "Use fall-back DV counter management\n");
/* Check for LRO support. */
- if (config.dest_tir && config.hca_attr.lro_cap) {
+ if (config.dest_tir && config.hca_attr.lro_cap &&
+ config.dv_flow_en) {
/* TBD check tunnel lro caps. */
config.lro.supported = config.hca_attr.lro_cap;
DRV_LOG(DEBUG, "Device supports LRO");
goto exit;
}
}
+#ifndef HAVE_MLX5DV_DR_DEVX_PORT
+ if (bd >= 0) {
+ /*
+ * This may happen if there is VF LAG kernel support and
+ * application is compiled with older rdma_core library.
+ */
+ DRV_LOG(ERR,
+ "No kernel/verbs support for VF LAG bonding found.");
+ rte_errno = ENOTSUP;
+ ret = -rte_errno;
+ goto exit;
+ }
+#endif
/*
* Now we can determine the maximal
* amount of devices to be spawned.
rte_eth_copy_pci_info(list[i].eth_dev, pci_dev);
/* Restore non-PCI flags cleared by the above call. */
list[i].eth_dev->data->dev_flags |= restore;
+ mlx5_dev_interrupt_handler_devx_install(list[i].eth_dev);
rte_eth_dev_probing_finish(list[i].eth_dev);
}
if (i != ns) {
return ret;
}
+/**
+ * Look for the ethernet device belonging to mlx5 driver.
+ *
+ * @param[in] port_id
+ * port_id to start looking for device.
+ * @param[in] pci_dev
+ * Pointer to the hint PCI device. When device is being probed
+ * the its siblings (master and preceding representors might
+ * not have assigned driver yet (because the mlx5_pci_probe()
+ * is not completed yet, for this case match on hint PCI
+ * device may be used to detect sibling device.
+ *
+ * @return
+ * port_id of found device, RTE_MAX_ETHPORT if not found.
+ */
+uint16_t
+mlx5_eth_find_next(uint16_t port_id, struct rte_pci_device *pci_dev)
+{
+ while (port_id < RTE_MAX_ETHPORTS) {
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+ if (dev->state != RTE_ETH_DEV_UNUSED &&
+ dev->device &&
+ (dev->device == &pci_dev->device ||
+ (dev->device->driver &&
+ dev->device->driver->name &&
+ !strcmp(dev->device->driver->name, MLX5_DRIVER_NAME))))
+ break;
+ port_id++;
+ }
+ if (port_id >= RTE_MAX_ETHPORTS)
+ return RTE_MAX_ETHPORTS;
+ return port_id;
+}
+
/**
* DPDK callback to remove a PCI device.
*