net/mlx5: support sub-function representor
authorXueming Li <xuemingl@nvidia.com>
Sun, 28 Mar 2021 13:48:08 +0000 (13:48 +0000)
committerRaslan Darawsheh <rasland@nvidia.com>
Wed, 31 Mar 2021 07:16:25 +0000 (09:16 +0200)
This patch adds support for SF representor. Similar to VF representor,
switch port name of SF representor in phys_port_name sysfs key is
"pf<x>sf<y>".

Device representor argument is "representors=sf[list]", list member
could be mix of instance and range. Example:
  representors=sf[0,2,4,8-12,-1]

To probe VF representor and SF representor, need to separate into 2
devices:
  -a <BDF>,representor=vf[list] -a <BDF>,representor=sf[list]

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
doc/guides/nics/mlx5.rst
drivers/net/mlx5/linux/mlx5_ethdev_os.c
drivers/net/mlx5/linux/mlx5_os.c
drivers/net/mlx5/mlx5.c
drivers/net/mlx5/mlx5.h
drivers/net/mlx5/mlx5_ethdev.c

index a2cfc51..2e2909d 100644 (file)
@@ -931,14 +931,18 @@ Driver options
 - ``representor`` parameter [list]
 
   This parameter can be used to instantiate DPDK Ethernet devices from
 - ``representor`` parameter [list]
 
   This parameter can be used to instantiate DPDK Ethernet devices from
-  existing port (or VF) representors configured on the device.
+  existing port (PF, VF or SF) representors configured on the device.
 
   It is a standard parameter whose format is described in
   :ref:`ethernet_device_standard_device_arguments`.
 
 
   It is a standard parameter whose format is described in
   :ref:`ethernet_device_standard_device_arguments`.
 
-  For instance, to probe port representors 0 through 2::
+  For instance, to probe VF port representors 0 through 2::
 
 
-    representor=[0-2]
+    representor=vf[0-2]
+
+  To probe SF port representors 0 through 2::
+
+    representor=sf[0-2]
 
 - ``max_dump_files_num`` parameter [int]
 
 
 - ``max_dump_files_num`` parameter [int]
 
@@ -1351,15 +1355,15 @@ Quick Start Guide on OFED/EN
 Enable switchdev mode
 ---------------------
 
 Enable switchdev mode
 ---------------------
 
-Switchdev mode is a mode in E-Switch, that binds between representor and VF.
-Representor is a port in DPDK that is connected to a VF in such a way
-that assuming there are no offload flows, each packet that is sent from the VF
-will be received by the corresponding representor. While each packet that is
-sent to a representor will be received by the VF.
+Switchdev mode is a mode in E-Switch, that binds between representor and VF or SF.
+Representor is a port in DPDK that is connected to a VF or SF in such a way
+that assuming there are no offload flows, each packet that is sent from the VF or SF
+will be received by the corresponding representor. While each packet that is or SF
+sent to a representor will be received by the VF or SF.
 This is very useful in case of SRIOV mode, where the first packet that is sent
 This is very useful in case of SRIOV mode, where the first packet that is sent
-by the VF will be received by the DPDK application which will decide if this
+by the VF or SF will be received by the DPDK application which will decide if this
 flow should be offloaded to the E-Switch. After offloading the flow packet
 flow should be offloaded to the E-Switch. After offloading the flow packet
-that the VF that are matching the flow will not be received any more by
+that the VF or SF that are matching the flow will not be received any more by
 the DPDK application.
 
 1. Enable SRIOV mode::
 the DPDK application.
 
 1. Enable SRIOV mode::
@@ -1386,6 +1390,40 @@ the DPDK application.
 
         echo switchdev > /sys/class/net/<net device>/compat/devlink/mode
 
 
         echo switchdev > /sys/class/net/<net device>/compat/devlink/mode
 
+SubFunction representor support
+-------------------------------
+SubFunction is a portion of the PCI device, a SF netdev has its own
+dedicated queues(txq, rxq). A SF netdev supports E-Switch representation
+offload similar to existing PF and VF representors. A SF shares PCI
+level resources with other SFs and/or with its parent PCI function.
+
+1. Configure SF feature::
+
+        mlxconfig -d <mst device> set PF_BAR2_SIZE=<0/1/2/3> PF_BAR2_ENABLE=1
+
+        Value of PF_BAR2_SIZE:
+
+            0: 8 SFs
+            1: 16 SFs
+            2: 32 SFs
+            3: 64 SFs
+
+2. Reset the FW::
+
+        mlxfwreset -d <mst device> reset
+
+3. Enable switchdev mode::
+
+        echo switchdev > /sys/class/net/<net device>/compat/devlink/mode
+
+4. Create SF::
+
+        mlnx-sf -d <PCI_BDF> -a create
+
+5. Probe SF representor::
+
+        testpmd> port attach <PCI_BDF>,representor=sf0,dv_flow_en=1
+
 Performance tuning
 ------------------
 
 Performance tuning
 ------------------
 
index cb692b2..2127fcf 100644 (file)
@@ -1010,6 +1010,8 @@ mlx5_sysfs_check_switch_info(bool device_dir,
        case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
                /* Fallthrough */
        case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
        case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
                /* Fallthrough */
        case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+               /* Fallthrough */
+       case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
                /* New representors naming schema. */
                switch_info->representor = 1;
                break;
                /* New representors naming schema. */
                switch_info->representor = 1;
                break;
index 5e3ae9f..2241d6a 100644 (file)
@@ -701,6 +701,8 @@ mlx5_queue_counter_id_prepare(struct rte_eth_dev *dev)
  *   Verbs device parameters (name, port, switch_info) to spawn.
  * @param config
  *   Device configuration parameters.
  *   Verbs device parameters (name, port, switch_info) to spawn.
  * @param config
  *   Device configuration parameters.
+ * @param config
+ *   Device arguments.
  *
  * @return
  *   A valid Ethernet device object on success, NULL otherwise and rte_errno
  *
  * @return
  *   A valid Ethernet device object on success, NULL otherwise and rte_errno
@@ -712,7 +714,8 @@ mlx5_queue_counter_id_prepare(struct rte_eth_dev *dev)
 static struct rte_eth_dev *
 mlx5_dev_spawn(struct rte_device *dpdk_dev,
               struct mlx5_dev_spawn_data *spawn,
 static struct rte_eth_dev *
 mlx5_dev_spawn(struct rte_device *dpdk_dev,
               struct mlx5_dev_spawn_data *spawn,
-              struct mlx5_dev_config *config)
+              struct mlx5_dev_config *config,
+              struct rte_eth_devargs *eth_da)
 {
        const struct mlx5_switch_info *switch_info = &spawn->info;
        struct mlx5_dev_ctx_shared *sh = NULL;
 {
        const struct mlx5_switch_info *switch_info = &spawn->info;
        struct mlx5_dev_ctx_shared *sh = NULL;
@@ -742,34 +745,82 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 
        /* Determine if this port representor is supposed to be spawned. */
        if (switch_info->representor && dpdk_dev->devargs) {
 
        /* Determine if this port representor is supposed to be spawned. */
        if (switch_info->representor && dpdk_dev->devargs) {
-               struct rte_eth_devargs eth_da;
-
-               err = rte_eth_devargs_parse(dpdk_dev->devargs->args, &eth_da);
-               if (err) {
-                       rte_errno = -err;
-                       DRV_LOG(ERR, "failed to process device arguments: %s",
-                               strerror(rte_errno));
-                       return NULL;
-               }
-               if (eth_da.type == RTE_ETH_REPRESENTOR_NONE) {
-                       /* Representor not specified. */
+               switch (eth_da->type) {
+               case RTE_ETH_REPRESENTOR_SF:
+                       if (switch_info->name_type !=
+                                       MLX5_PHYS_PORT_NAME_TYPE_PFSF) {
+                               rte_errno = EBUSY;
+                               return NULL;
+                       }
+                       break;
+               case RTE_ETH_REPRESENTOR_VF:
+                       /* Allows HPF representor index -1 as exception. */
+                       if (!(spawn->info.port_name == -1 &&
+                             switch_info->name_type ==
+                                       MLX5_PHYS_PORT_NAME_TYPE_PFHPF) &&
+                           switch_info->name_type !=
+                                       MLX5_PHYS_PORT_NAME_TYPE_PFVF) {
+                               rte_errno = EBUSY;
+                               return NULL;
+                       }
+                       break;
+               case RTE_ETH_REPRESENTOR_NONE:
                        rte_errno = EBUSY;
                        return NULL;
                        rte_errno = EBUSY;
                        return NULL;
-               }
-               if (eth_da.type != RTE_ETH_REPRESENTOR_VF) {
+                       break;
+               default:
                        rte_errno = ENOTSUP;
                        DRV_LOG(ERR, "unsupported representor type: %s",
                                dpdk_dev->devargs->args);
                        return NULL;
                }
                        rte_errno = ENOTSUP;
                        DRV_LOG(ERR, "unsupported representor type: %s",
                                dpdk_dev->devargs->args);
                        return NULL;
                }
-               for (i = 0; i < eth_da.nb_representor_ports; ++i)
-                       if (eth_da.representor_ports[i] ==
+               /* Check controller ID: */
+               for (i = 0; i < eth_da->nb_mh_controllers; ++i)
+                       if (eth_da->mh_controllers[i] ==
+                           (uint16_t)switch_info->ctrl_num)
+                               break;
+               if (eth_da->nb_mh_controllers &&
+                   i == eth_da->nb_mh_controllers) {
+                       rte_errno = EBUSY;
+                       return NULL;
+               }
+               /* Check SF/VF ID: */
+               for (i = 0; i < eth_da->nb_representor_ports; ++i)
+                       if (eth_da->representor_ports[i] ==
                            (uint16_t)switch_info->port_name)
                                break;
                            (uint16_t)switch_info->port_name)
                                break;
-               if (i == eth_da.nb_representor_ports) {
+               if (eth_da->type != RTE_ETH_REPRESENTOR_PF &&
+                   i == eth_da->nb_representor_ports) {
                        rte_errno = EBUSY;
                        return NULL;
                }
                        rte_errno = EBUSY;
                        return NULL;
                }
+               /* Check PF ID. Check after repr port to avoid warning flood. */
+               if (spawn->pf_bond >= 0) {
+                       for (i = 0; i < eth_da->nb_ports; ++i)
+                               if (eth_da->ports[i] ==
+                                   (uint16_t)switch_info->pf_num)
+                                       break;
+                       if (eth_da->nb_ports && i == eth_da->nb_ports) {
+                               /* For backward compatibility, bonding
+                                * representor syntax supported with limitation,
+                                * device iterator won't find it:
+                                *    <PF1_BDF>,representor=#
+                                */
+                               if (switch_info->pf_num > 0 &&
+                                   eth_da->ports[0] == 0) {
+                                       DRV_LOG(WARNING, "Representor on Bonding PF should use pf#vf# format: %s",
+                                               dpdk_dev->devargs->args);
+                               } else {
+                                       rte_errno = EBUSY;
+                                       return NULL;
+                               }
+                       }
+               } else if (eth_da->nb_ports > 1 || eth_da->ports[0]) {
+                       rte_errno = EINVAL;
+                       DRV_LOG(ERR, "PF id not supported by non-bond device: %s",
+                               dpdk_dev->devargs->args);
+                       return NULL;
+               }
        }
        /* Build device name. */
        if (spawn->pf_bond <  0) {
        }
        /* Build device name. */
        if (spawn->pf_bond <  0) {
@@ -777,8 +828,11 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
                if (!switch_info->representor)
                        strlcpy(name, dpdk_dev->name, sizeof(name));
                else
                if (!switch_info->representor)
                        strlcpy(name, dpdk_dev->name, sizeof(name));
                else
-                       snprintf(name, sizeof(name), "%s_representor_%u",
-                                dpdk_dev->name, switch_info->port_name);
+                       snprintf(name, sizeof(name), "%s_representor_%s%u",
+                                dpdk_dev->name,
+                                switch_info->name_type ==
+                                MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf",
+                                switch_info->port_name);
        } else {
                /* Bonding device. */
                if (!switch_info->representor)
        } else {
                /* Bonding device. */
                if (!switch_info->representor)
@@ -786,9 +840,11 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
                                 dpdk_dev->name,
                                 mlx5_os_get_dev_device_name(spawn->phys_dev));
                else
                                 dpdk_dev->name,
                                 mlx5_os_get_dev_device_name(spawn->phys_dev));
                else
-                       snprintf(name, sizeof(name), "%s_%s_representor_%u",
+                       snprintf(name, sizeof(name), "%s_%s_representor_%s%u",
                                 dpdk_dev->name,
                                 mlx5_os_get_dev_device_name(spawn->phys_dev),
                                 dpdk_dev->name,
                                 mlx5_os_get_dev_device_name(spawn->phys_dev),
+                                switch_info->name_type ==
+                                MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf",
                                 switch_info->port_name);
        }
        /* check if the device is already spawned */
                                 switch_info->port_name);
        }
        /* check if the device is already spawned */
@@ -1063,9 +1119,7 @@ err_secondary:
        priv->vport_id = switch_info->representor ?
                         switch_info->port_name + 1 : -1;
 #endif
        priv->vport_id = switch_info->representor ?
                         switch_info->port_name + 1 : -1;
 #endif
-       /* representor_id field keeps the unmodified VF index. */
-       priv->representor_id = switch_info->representor ?
-                              switch_info->port_name : -1;
+       priv->representor_id = mlx5_representor_id_encode(switch_info);
        /*
         * Look for sibling devices in order to reuse their switch domain
         * if any, otherwise allocate one.
        /*
         * Look for sibling devices in order to reuse their switch domain
         * if any, otherwise allocate one.
@@ -1849,6 +1903,7 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
        struct mlx5_dev_spawn_data *list = NULL;
        struct mlx5_dev_config dev_config;
        unsigned int dev_config_vf;
        struct mlx5_dev_spawn_data *list = NULL;
        struct mlx5_dev_config dev_config;
        unsigned int dev_config_vf;
+       struct rte_eth_devargs eth_da = { .type = RTE_ETH_REPRESENTOR_NONE };
        int ret;
 
        if (rte_eal_process_type() == RTE_PROC_PRIMARY)
        int ret;
 
        if (rte_eal_process_type() == RTE_PROC_PRIMARY)
@@ -1859,6 +1914,27 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                        strerror(rte_errno));
                return -rte_errno;
        }
                        strerror(rte_errno));
                return -rte_errno;
        }
+       if (pci_dev->device.devargs) {
+               /* Parse representor information from device argument. */
+               if (pci_dev->device.devargs->cls_str)
+                       ret = rte_eth_devargs_parse
+                               (pci_dev->device.devargs->cls_str, &eth_da);
+               if (ret) {
+                       DRV_LOG(ERR, "failed to parse device arguments: %s",
+                               pci_dev->device.devargs->cls_str);
+                       return -rte_errno;
+               }
+               if (eth_da.type == RTE_ETH_REPRESENTOR_NONE) {
+                       /* Support legacy device argument */
+                       ret = rte_eth_devargs_parse
+                               (pci_dev->device.devargs->args, &eth_da);
+                       if (ret) {
+                               DRV_LOG(ERR, "failed to parse device arguments: %s",
+                                       pci_dev->device.devargs->args);
+                               return -rte_errno;
+                       }
+               }
+       }
        errno = 0;
        ibv_list = mlx5_glue->get_device_list(&ret);
        if (!ibv_list) {
        errno = 0;
        ibv_list = mlx5_glue->get_device_list(&ret);
        if (!ibv_list) {
@@ -2031,6 +2107,8 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                                case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
                                        /* Fallthrough */
                                case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
                                case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
                                        /* Fallthrough */
                                case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+                                       /* Fallthrough */
+                               case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
                                        if (list[ns].info.pf_num == bd)
                                                ns++;
                                        break;
                                        if (list[ns].info.pf_num == bd)
                                                ns++;
                                        break;
@@ -2208,7 +2286,8 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                dev_config.log_hp_size = MLX5_ARG_UNSET;
                list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device,
                                                 &list[i],
                dev_config.log_hp_size = MLX5_ARG_UNSET;
                list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device,
                                                 &list[i],
-                                                &dev_config);
+                                                &dev_config,
+                                                &eth_da);
                if (!list[i].eth_dev) {
                        if (rte_errno != EBUSY && rte_errno != EEXIST)
                                break;
                if (!list[i].eth_dev) {
                        if (rte_errno != EBUSY && rte_errno != EEXIST)
                                break;
index d9372f0..8ce3d7d 100644 (file)
@@ -1451,6 +1451,7 @@ const struct eth_dev_ops mlx5_dev_ops = {
        .xstats_get_names = mlx5_xstats_get_names,
        .fw_version_get = mlx5_fw_version_get,
        .dev_infos_get = mlx5_dev_infos_get,
        .xstats_get_names = mlx5_xstats_get_names,
        .fw_version_get = mlx5_fw_version_get,
        .dev_infos_get = mlx5_dev_infos_get,
+       .representor_info_get = mlx5_representor_info_get,
        .read_clock = mlx5_txpp_read_clock,
        .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
        .vlan_filter_set = mlx5_vlan_filter_set,
        .read_clock = mlx5_txpp_read_clock,
        .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
        .vlan_filter_set = mlx5_vlan_filter_set,
index e4963bd..a585c96 100644 (file)
@@ -1043,6 +1043,15 @@ int mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh);
 /* mlx5_ethdev.c */
 
 int mlx5_dev_configure(struct rte_eth_dev *dev);
 /* mlx5_ethdev.c */
 
 int mlx5_dev_configure(struct rte_eth_dev *dev);
+int mlx5_representor_info_get(struct rte_eth_dev *dev,
+                             struct rte_eth_representor_info *info);
+#define MLX5_REPRESENTOR_ID(pf, type, repr) \
+               (((pf) << 14) + ((type) << 12) + ((repr) & 0xfff))
+#define MLX5_REPRESENTOR_REPR(repr_id) \
+               ((repr_id) & 0xfff)
+#define MLX5_REPRESENTOR_TYPE(repr_id) \
+               (((repr_id) >> 12) & 3)
+uint16_t mlx5_representor_id_encode(const struct mlx5_switch_info *info);
 int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver,
                        size_t fw_size);
 int mlx5_dev_infos_get(struct rte_eth_dev *dev,
 int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver,
                        size_t fw_size);
 int mlx5_dev_infos_get(struct rte_eth_dev *dev,
index 51b39dd..1ffb13c 100644 (file)
@@ -377,6 +377,106 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
        return 0;
 }
 
        return 0;
 }
 
+/**
+ * Calculate representor ID from port switch info.
+ *
+ * Uint16 representor ID bits definition:
+ *   pf: 2
+ *   type: 2
+ *   vf/sf: 12
+ *
+ * @param info
+ *   Port switch info.
+ *
+ * @return
+ *   Encoded representor ID.
+ */
+uint16_t
+mlx5_representor_id_encode(const struct mlx5_switch_info *info)
+{
+       enum rte_eth_representor_type type = RTE_ETH_REPRESENTOR_VF;
+       uint16_t repr = info->port_name;
+
+       if (info->representor == 0)
+               return UINT16_MAX;
+       if (info->name_type == MLX5_PHYS_PORT_NAME_TYPE_PFSF)
+               type = RTE_ETH_REPRESENTOR_SF;
+       if (info->name_type == MLX5_PHYS_PORT_NAME_TYPE_PFHPF)
+               repr = UINT16_MAX;
+       return MLX5_REPRESENTOR_ID(info->pf_num, type, repr);
+}
+
+/**
+ * DPDK callback to get information about representor.
+ *
+ * Representor ID bits definition:
+ *   vf/sf: 12
+ *   type: 2
+ *   pf: 2
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param[out] info
+ *   Nullable info structure output buffer.
+ *
+ * @return
+ *   negative on error, or the number of representor ranges.
+ */
+int
+mlx5_representor_info_get(struct rte_eth_dev *dev,
+                         struct rte_eth_representor_info *info)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       int n_type = 3; /* Number of representor types, VF, HPF and SF. */
+       int n_pf = 2; /* Number of PFs. */
+       int i = 0, pf;
+
+       if (info == NULL)
+               goto out;
+       info->controller = 0;
+       info->pf = priv->pf_bond >= 0 ? priv->pf_bond : 0;
+       for (pf = 0; pf < n_pf; ++pf) {
+               /* VF range. */
+               info->ranges[i].type = RTE_ETH_REPRESENTOR_VF;
+               info->ranges[i].controller = 0;
+               info->ranges[i].pf = pf;
+               info->ranges[i].vf = 0;
+               info->ranges[i].id_base =
+                       MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, 0);
+               info->ranges[i].id_end =
+                       MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
+               snprintf(info->ranges[i].name,
+                        sizeof(info->ranges[i].name), "pf%dvf", pf);
+               i++;
+               /* HPF range. */
+               info->ranges[i].type = RTE_ETH_REPRESENTOR_VF;
+               info->ranges[i].controller = 0;
+               info->ranges[i].pf = pf;
+               info->ranges[i].vf = UINT16_MAX;
+               info->ranges[i].id_base =
+                       MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
+               info->ranges[i].id_end =
+                       MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
+               snprintf(info->ranges[i].name,
+                        sizeof(info->ranges[i].name), "pf%dvf", pf);
+               i++;
+               /* SF range. */
+               info->ranges[i].type = RTE_ETH_REPRESENTOR_SF;
+               info->ranges[i].controller = 0;
+               info->ranges[i].pf = pf;
+               info->ranges[i].vf = 0;
+               info->ranges[i].id_base =
+                       MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, 0);
+               info->ranges[i].id_end =
+                       MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
+               snprintf(info->ranges[i].name,
+                        sizeof(info->ranges[i].name), "pf%dsf", pf);
+               i++;
+       }
+out:
+       return n_type * n_pf;
+}
+
 /**
  * Get firmware version of a device.
  *
 /**
  * Get firmware version of a device.
  *