net/mlx5: fix LRO dependency to include DV flow
[dpdk.git] / drivers / net / mlx5 / mlx5.c
index 1fe7150..6dd3def 100644 (file)
@@ -379,8 +379,10 @@ mlx5_alloc_shared_ibctx(const struct mlx5_dev_spawn_data *spawn)
         * there is no interrupt subhandler installed for
         * the given port index i.
         */
-       for (i = 0; i < sh->max_port; i++)
+       for (i = 0; i < sh->max_port; i++) {
                sh->port[i].ih_port_id = RTE_MAX_ETHPORTS;
+               sh->port[i].devx_ih_port_id = RTE_MAX_ETHPORTS;
+       }
        sh->pd = mlx5_glue->alloc_pd(sh->ctx);
        if (sh->pd == NULL) {
                DRV_LOG(ERR, "PD allocation failure");
@@ -481,6 +483,15 @@ mlx5_free_shared_ibctx(struct mlx5_ibv_shared *sh)
        if (sh->intr_cnt)
                mlx5_intr_callback_unregister
                        (&sh->intr_handle, mlx5_dev_interrupt_handler, sh);
+#ifdef HAVE_MLX5_DEVX_ASYNC_SUPPORT
+       if (sh->devx_intr_cnt) {
+               if (sh->intr_handle_devx.fd)
+                       rte_intr_callback_unregister(&sh->intr_handle_devx,
+                                         mlx5_dev_interrupt_handler_devx, sh);
+               if (sh->devx_comp)
+                       mlx5dv_devx_destroy_cmd_comp(sh->devx_comp);
+       }
+#endif
        pthread_mutex_destroy(&sh->intr_mutex);
        if (sh->pd)
                claim_zero(mlx5_glue->dealloc_pd(sh->pd));
@@ -845,6 +856,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
                ((priv->sh->ctx != NULL) ? priv->sh->ctx->device->name : ""));
        /* In case mlx5_dev_stop() has not been called. */
        mlx5_dev_interrupt_handler_uninstall(dev);
+       mlx5_dev_interrupt_handler_devx_uninstall(dev);
        mlx5_traffic_disable(dev);
        mlx5_flow_flush(dev, NULL);
        /* Prevent crashes when queues are still in use. */
@@ -926,7 +938,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
                unsigned int c = 0;
                uint16_t port_id;
 
-               RTE_ETH_FOREACH_DEV_OF(port_id, dev->device) {
+               MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
                        struct mlx5_priv *opriv =
                                rte_eth_devices[port_id].data->dev_private;
 
@@ -935,6 +947,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
                            &rte_eth_devices[port_id] == dev)
                                continue;
                        ++c;
+                       break;
                }
                if (!c)
                        claim_zero(rte_eth_switch_domain_free(priv->domain_id));
@@ -1516,6 +1529,53 @@ mlx5_release_dbr(struct rte_eth_dev *dev, uint32_t umem_id, uint64_t offset)
        return ret;
 }
 
+/**
+ * Check sibling device configurations.
+ *
+ * Sibling devices sharing the Infiniband device context
+ * should have compatible configurations. This regards
+ * representors and bonding slaves.
+ *
+ * @param priv
+ *   Private device descriptor.
+ * @param config
+ *   Configuration of the device is going to be created.
+ *
+ * @return
+ *   0 on success, EINVAL otherwise
+ */
+static int
+mlx5_dev_check_sibling_config(struct mlx5_priv *priv,
+                             struct mlx5_dev_config *config)
+{
+       struct mlx5_ibv_shared *sh = priv->sh;
+       struct mlx5_dev_config *sh_conf = NULL;
+       uint16_t port_id;
+
+       assert(sh);
+       /* Nothing to compare for the single/first device. */
+       if (sh->refcnt == 1)
+               return 0;
+       /* Find the device with shared context. */
+       MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
+               struct mlx5_priv *opriv =
+                       rte_eth_devices[port_id].data->dev_private;
+
+               if (opriv && opriv != priv && opriv->sh == sh) {
+                       sh_conf = &opriv->config;
+                       break;
+               }
+       }
+       if (!sh_conf)
+               return 0;
+       if (sh_conf->dv_flow_en ^ config->dv_flow_en) {
+               DRV_LOG(ERR, "\"dv_flow_en\" configuration mismatch"
+                            " for shared %s context", sh->ibdev_name);
+               rte_errno = EINVAL;
+               return rte_errno;
+       }
+       return 0;
+}
 /**
  * Spawn an Ethernet device from Verbs information.
  *
@@ -1562,6 +1622,9 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
        int own_domain_id = 0;
        uint16_t port_id;
        unsigned int i;
+#ifdef HAVE_MLX5DV_DR_DEVX_PORT
+       struct mlx5dv_devx_port devx_port;
+#endif
 
        /* Determine if this port representor is supposed to be spawned. */
        if (switch_info->representor && dpdk_dev->devargs) {
@@ -1782,8 +1845,57 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
        priv->representor = !!switch_info->representor;
        priv->master = !!switch_info->master;
        priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
+       priv->vport_meta_tag = 0;
+       priv->vport_meta_mask = 0;
+       priv->pf_bond = spawn->pf_bond;
+#ifdef HAVE_MLX5DV_DR_DEVX_PORT
+       /*
+        * The DevX port query API is implemented. E-Switch may use
+        * either vport or reg_c[0] metadata register to match on
+        * vport index. The engaged part of metadata register is
+        * defined by mask.
+        */
+       devx_port.comp_mask = MLX5DV_DEVX_PORT_VPORT |
+                             MLX5DV_DEVX_PORT_MATCH_REG_C_0;
+       err = mlx5_glue->devx_port_query(sh->ctx, spawn->ibv_port, &devx_port);
+       if (err) {
+               DRV_LOG(WARNING, "can't query devx port %d on device %s\n",
+                       spawn->ibv_port, spawn->ibv_dev->name);
+               devx_port.comp_mask = 0;
+       }
+       if (devx_port.comp_mask & MLX5DV_DEVX_PORT_MATCH_REG_C_0) {
+               priv->vport_meta_tag = devx_port.reg_c_0.value;
+               priv->vport_meta_mask = devx_port.reg_c_0.mask;
+               if (!priv->vport_meta_mask) {
+                       DRV_LOG(ERR, "vport zero mask for port %d"
+                                    " on bonding device %s\n",
+                                    spawn->ibv_port, spawn->ibv_dev->name);
+                       err = ENOTSUP;
+                       goto error;
+               }
+               if (priv->vport_meta_tag & ~priv->vport_meta_mask) {
+                       DRV_LOG(ERR, "invalid vport tag for port %d"
+                                    " on bonding device %s\n",
+                                    spawn->ibv_port, spawn->ibv_dev->name);
+                       err = ENOTSUP;
+                       goto error;
+               }
+       } else if (devx_port.comp_mask & MLX5DV_DEVX_PORT_VPORT) {
+               priv->vport_id = devx_port.vport_num;
+       } else if (spawn->pf_bond >= 0) {
+               DRV_LOG(ERR, "can't deduce vport index for port %d"
+                            " on bonding device %s\n",
+                            spawn->ibv_port, spawn->ibv_dev->name);
+               err = ENOTSUP;
+               goto error;
+       } else {
+               /* Suppose vport index in compatible way. */
+               priv->vport_id = switch_info->representor ?
+                                switch_info->port_name + 1 : -1;
+       }
+#else
        /*
-        * Currently we support single E-Switch per PF configurations
+        * Kernel/rdma_core support single E-Switch per PF configurations
         * only and vport_id field contains the vport index for
         * associated VF, which is deduced from representor port name.
         * For example, let's have the IB device port 10, it has
@@ -1795,18 +1907,20 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
         */
        priv->vport_id = switch_info->representor ?
                         switch_info->port_name + 1 : -1;
-       /* representor_id field keeps the unmodified port/VF index. */
+#endif
+       /* representor_id field keeps the unmodified VF index. */
        priv->representor_id = switch_info->representor ?
                               switch_info->port_name : -1;
        /*
         * Look for sibling devices in order to reuse their switch domain
         * if any, otherwise allocate one.
         */
-       RTE_ETH_FOREACH_DEV_OF(port_id, dpdk_dev) {
+       MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
                const struct mlx5_priv *opriv =
                        rte_eth_devices[port_id].data->dev_private;
 
                if (!opriv ||
+                   opriv->sh != priv->sh ||
                        opriv->domain_id ==
                        RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)
                        continue;
@@ -1830,6 +1944,9 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
                        strerror(rte_errno));
                goto error;
        }
+       err = mlx5_dev_check_sibling_config(priv, &config);
+       if (err)
+               goto error;
        config.hw_csum = !!(sh->device_attr.device_cap_flags_ex &
                            IBV_DEVICE_RAW_IP_CSUM);
        DRV_LOG(DEBUG, "checksum offloading is %ssupported",
@@ -1916,7 +2033,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
                if (priv->counter_fallback)
                        DRV_LOG(INFO, "Use fall-back DV counter management\n");
                /* Check for LRO support. */
-               if (config.dest_tir && config.hca_attr.lro_cap) {
+               if (config.dest_tir && config.hca_attr.lro_cap &&
+                   config.dv_flow_en) {
                        /* TBD check tunnel lro caps. */
                        config.lro.supported = config.hca_attr.lro_cap;
                        DRV_LOG(DEBUG, "Device supports LRO");
@@ -2369,6 +2487,19 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                        goto exit;
                }
        }
+#ifndef HAVE_MLX5DV_DR_DEVX_PORT
+       if (bd >= 0) {
+               /*
+                * This may happen if there is VF LAG kernel support and
+                * application is compiled with older rdma_core library.
+                */
+               DRV_LOG(ERR,
+                       "No kernel/verbs support for VF LAG bonding found.");
+               rte_errno = ENOTSUP;
+               ret = -rte_errno;
+               goto exit;
+       }
+#endif
        /*
         * Now we can determine the maximal
         * amount of devices to be spawned.
@@ -2621,6 +2752,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                rte_eth_copy_pci_info(list[i].eth_dev, pci_dev);
                /* Restore non-PCI flags cleared by the above call. */
                list[i].eth_dev->data->dev_flags |= restore;
+               mlx5_dev_interrupt_handler_devx_install(list[i].eth_dev);
                rte_eth_dev_probing_finish(list[i].eth_dev);
        }
        if (i != ns) {
@@ -2663,6 +2795,41 @@ exit:
        return ret;
 }
 
+/**
+ * Look for the ethernet device belonging to mlx5 driver.
+ *
+ * @param[in] port_id
+ *   port_id to start looking for device.
+ * @param[in] pci_dev
+ *   Pointer to the hint PCI device. When device is being probed
+ *   the its siblings (master and preceding representors might
+ *   not have assigned driver yet (because the mlx5_pci_probe()
+ *   is not completed yet, for this case match on hint PCI
+ *   device may be used to detect sibling device.
+ *
+ * @return
+ *   port_id of found device, RTE_MAX_ETHPORT if not found.
+ */
+uint16_t
+mlx5_eth_find_next(uint16_t port_id, struct rte_pci_device *pci_dev)
+{
+       while (port_id < RTE_MAX_ETHPORTS) {
+               struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+               if (dev->state != RTE_ETH_DEV_UNUSED &&
+                   dev->device &&
+                   (dev->device == &pci_dev->device ||
+                    (dev->device->driver &&
+                    dev->device->driver->name &&
+                    !strcmp(dev->device->driver->name, MLX5_DRIVER_NAME))))
+                       break;
+               port_id++;
+       }
+       if (port_id >= RTE_MAX_ETHPORTS)
+               return RTE_MAX_ETHPORTS;
+       return port_id;
+}
+
 /**
  * DPDK callback to remove a PCI device.
  *