From 919488fbfa7121887410f3863f674d99dabc7d64 Mon Sep 17 00:00:00 2001 From: Xueming Li Date: Wed, 21 Jul 2021 22:37:35 +0800 Subject: [PATCH] net/mlx5: support Sub-Function Introduce SF support. Similar to VF, SF on auxiliary bus is a portion of hardware PF, no representor or bonding parameters for SF. Devargs to support SF: -a auxiliary:mlx5_core.sf.8,dv_flow_en=1 New global syntax to support SF: -a bus=auxiliary,name=mlx5_core.sf.8/class=eth/driver=mlx5,dv_flow_en=1 Signed-off-by: Xueming Li Acked-by: Viacheslav Ovsiienko --- doc/guides/nics/mlx5.rst | 52 +++++---- doc/guides/rel_notes/release_21_08.rst | 1 + drivers/net/mlx5/linux/mlx5_ethdev_os.c | 12 +- drivers/net/mlx5/linux/mlx5_os.c | 145 +++++++++++++++++------- drivers/net/mlx5/linux/mlx5_os.h | 2 + drivers/net/mlx5/mlx5.c | 23 +++- drivers/net/mlx5/mlx5.h | 2 + drivers/net/mlx5/mlx5_mac.c | 2 +- drivers/net/mlx5/mlx5_rxmode.c | 8 +- drivers/net/mlx5/mlx5_trigger.c | 2 +- drivers/net/mlx5/windows/mlx5_os.c | 12 +- 11 files changed, 187 insertions(+), 74 deletions(-) diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index f5b727c1ee..c3883c09ef 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -112,6 +112,8 @@ Features - Flow integrity offload API. - Connection tracking. - Sub-Function representors. +- Sub-Function. + Limitations ----------- @@ -1478,40 +1480,52 @@ the DPDK application. echo switchdev > /sys/class/net//compat/devlink/mode -Sub-Function representor ------------------------- +Sub-Function support +-------------------- Sub-Function is a portion of the PCI device, a SF netdev has its own -dedicated queues(txq, rxq). A SF netdev supports E-Switch representation -offload similar to existing PF and VF representors. A SF shares PCI -level resources with other SFs and/or with its parent PCI function. +dedicated queues (txq, rxq). +A SF shares PCI level resources with other SFs and/or with its parent PCI function. + +0. Requirement:: + + OFED version >= 5.4-0.3.3.0 1. Configure SF feature:: - mlxconfig -d set PF_BAR2_SIZE=<0/1/2/3> PF_BAR2_ENABLE=1 + # Run mlxconfig on both PFs on host and ECPFs on BlueField. + mlxconfig -d set PER_PF_NUM_SF=1 PF_TOTAL_SF=252 PF_SF_BAR_SIZE=12 - Value of PF_BAR2_SIZE: +2. Enable switchdev mode:: - 0: 8 SFs - 1: 16 SFs - 2: 32 SFs - 3: 64 SFs + mlxdevm dev eswitch set pci/ mode switchdev -2. Reset the FW:: +3. Add SF port:: - mlxfwreset -d reset + mlxdevm port add pci/ flavour pcisf pfnum 0 sfnum -3. Enable switchdev mode:: + Get SFID from output: pci// - echo switchdev > /sys/class/net//compat/devlink/mode +4. Modify MAC address:: + + mlxdevm port function set pci// hw_addr + +5. Activate SF port:: + + mlxdevm port function set pci// state active + +6. Devargs to probe SF device:: -4. Create SF:: + auxiliary:mlx5_core.sf.,dv_flow_en=1 - mlnx-sf -d -a create +Sub-Function representor support +-------------------------------- -5. Probe SF representor:: +A SF netdev supports E-Switch representation offload +similar to PF and VF representors. +Use to probe SF representor:: - testpmd> port attach ,representor=sf0,dv_flow_en=1 + testpmd> port attach ,representor=sf,dv_flow_en=1 Performance tuning ------------------ diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst index f77d38d607..543e93ff1d 100644 --- a/doc/guides/rel_notes/release_21_08.rst +++ b/doc/guides/rel_notes/release_21_08.rst @@ -88,6 +88,7 @@ New Features * **Updated Mellanox mlx5 driver.** + * Added Sub-Function support based on auxiliary bus. * Added support for meter hierarchy. * Added devargs options ``allow_duplicate_pattern``. * Added matching on IPv4 Internet Header Length (IHL). diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c index b05b9fc950..f34133e2c6 100644 --- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c +++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c @@ -128,6 +128,17 @@ struct ethtool_link_settings { #define ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT 2 /* 66 - 64 */ #endif +/* Get interface index from SubFunction device name. */ +int +mlx5_auxiliary_get_ifindex(const char *sf_name) +{ + char if_name[IF_NAMESIZE] = { 0 }; + + if (mlx5_auxiliary_get_child_name(sf_name, "/net", + if_name, sizeof(if_name)) != 0) + return -rte_errno; + return if_nametoindex(if_name); +} /** * Get interface name from private structure. @@ -1619,4 +1630,3 @@ mlx5_get_mac(struct rte_eth_dev *dev, uint8_t (*mac)[RTE_ETHER_ADDR_LEN]) memcpy(mac, request.ifr_hwaddr.sa_data, RTE_ETHER_ADDR_LEN); return 0; } - diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 1b7ee419d1..f041f9054f 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -2061,6 +2062,27 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, return pf; } +static void +mlx5_os_config_default(struct mlx5_dev_config *config) +{ + memset(config, 0, sizeof(*config)); + config->mps = MLX5_ARG_UNSET; + config->dbnc = MLX5_ARG_UNSET; + config->rx_vec_en = 1; + config->txq_inline_max = MLX5_ARG_UNSET; + config->txq_inline_min = MLX5_ARG_UNSET; + config->txq_inline_mpw = MLX5_ARG_UNSET; + config->txqs_inline = MLX5_ARG_UNSET; + config->vf_nl_en = 1; + config->mr_ext_memseg_en = 1; + config->mprq.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN; + config->mprq.min_rxqs_num = MLX5_MPRQ_MIN_RXQS; + config->dv_esw_en = 1; + config->dv_flow_en = 1; + config->decap_en = 1; + config->log_hp_size = MLX5_ARG_UNSET; +} + /** * Register a PCI device within bonding. * @@ -2485,23 +2507,8 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev, uint32_t restore; /* Default configuration. */ - memset(&dev_config, 0, sizeof(struct mlx5_dev_config)); + mlx5_os_config_default(&dev_config); dev_config.vf = dev_config_vf; - dev_config.mps = MLX5_ARG_UNSET; - dev_config.dbnc = MLX5_ARG_UNSET; - dev_config.rx_vec_en = 1; - dev_config.txq_inline_max = MLX5_ARG_UNSET; - dev_config.txq_inline_min = MLX5_ARG_UNSET; - dev_config.txq_inline_mpw = MLX5_ARG_UNSET; - dev_config.txqs_inline = MLX5_ARG_UNSET; - dev_config.vf_nl_en = 1; - dev_config.mr_ext_memseg_en = 1; - dev_config.mprq.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN; - dev_config.mprq.min_rxqs_num = MLX5_MPRQ_MIN_RXQS; - dev_config.dv_esw_en = 1; - dev_config.dv_flow_en = 1; - dev_config.decap_en = 1; - dev_config.log_hp_size = MLX5_ARG_UNSET; dev_config.allow_duplicate_pattern = 1; list[i].numa_node = pci_dev->device.numa_node; list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device, @@ -2560,6 +2567,35 @@ exit: return ret; } +static int +mlx5_os_parse_eth_devargs(struct rte_device *dev, + struct rte_eth_devargs *eth_da) +{ + int ret = 0; + + if (dev->devargs == NULL) + return 0; + memset(eth_da, 0, sizeof(*eth_da)); + /* Parse representor information first from class argument. */ + if (dev->devargs->cls_str) + ret = rte_eth_devargs_parse(dev->devargs->cls_str, eth_da); + if (ret != 0) { + DRV_LOG(ERR, "failed to parse device arguments: %s", + dev->devargs->cls_str); + return -rte_errno; + } + if (eth_da->type == RTE_ETH_REPRESENTOR_NONE) { + /* Parse legacy device argument */ + ret = rte_eth_devargs_parse(dev->devargs->args, eth_da); + if (ret) { + DRV_LOG(ERR, "failed to parse device arguments: %s", + dev->devargs->args); + return -rte_errno; + } + } + return 0; +} + /** * Callback to register a PCI device. * @@ -2574,31 +2610,13 @@ exit: static int mlx5_os_pci_probe(struct rte_pci_device *pci_dev) { - struct rte_eth_devargs eth_da = { .type = RTE_ETH_REPRESENTOR_NONE }; + struct rte_eth_devargs eth_da = { .nb_ports = 0 }; int ret = 0; uint16_t p; - if (pci_dev->device.devargs) { - /* Parse representor information from device argument. */ - if (pci_dev->device.devargs->cls_str) - ret = rte_eth_devargs_parse - (pci_dev->device.devargs->cls_str, ð_da); - if (ret) { - DRV_LOG(ERR, "failed to parse device arguments: %s", - pci_dev->device.devargs->cls_str); - return -rte_errno; - } - if (eth_da.type == RTE_ETH_REPRESENTOR_NONE) { - /* Support legacy device argument */ - ret = rte_eth_devargs_parse - (pci_dev->device.devargs->args, ð_da); - if (ret) { - DRV_LOG(ERR, "failed to parse device arguments: %s", - pci_dev->device.devargs->args); - return -rte_errno; - } - } - } + ret = mlx5_os_parse_eth_devargs(&pci_dev->device, ð_da); + if (ret != 0) + return ret; if (eth_da.nb_ports > 0) { /* Iterate all port if devargs pf is range: "pf[0-1]vf[...]". */ @@ -2611,10 +2629,56 @@ mlx5_os_pci_probe(struct rte_pci_device *pci_dev) return ret; } +/* Probe a single SF device on auxiliary bus, no representor support. */ +static int +mlx5_os_auxiliary_probe(struct rte_device *dev) +{ + struct rte_eth_devargs eth_da = { .nb_ports = 0 }; + struct mlx5_dev_config config; + struct mlx5_dev_spawn_data spawn = { .pf_bond = -1 }; + struct rte_auxiliary_device *adev = RTE_DEV_TO_AUXILIARY(dev); + struct rte_eth_dev *eth_dev; + int ret = 0; + + /* Parse ethdev devargs. */ + ret = mlx5_os_parse_eth_devargs(dev, ð_da); + if (ret != 0) + return ret; + /* Set default config data. */ + mlx5_os_config_default(&config); + config.sf = 1; + /* Init spawn data. */ + spawn.max_port = 1; + spawn.phys_port = 1; + spawn.phys_dev = mlx5_os_get_ibv_dev(dev); + if (spawn.phys_dev == NULL) + return -rte_errno; + ret = mlx5_auxiliary_get_ifindex(dev->name); + if (ret < 0) { + DRV_LOG(ERR, "failed to get ethdev ifindex: %s", dev->name); + return ret; + } + spawn.ifindex = ret; + spawn.numa_node = dev->numa_node; + /* Spawn device. */ + eth_dev = mlx5_dev_spawn(dev, &spawn, &config, ð_da); + if (eth_dev == NULL) + return -rte_errno; + /* Post create. */ + eth_dev->intr_handle = &adev->intr_handle; + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; + eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_RMV; + eth_dev->data->numa_node = dev->numa_node; + } + rte_eth_dev_probing_finish(eth_dev); + return 0; +} + /** * Net class driver callback to probe a device. * - * This function probe PCI bus device(s). + * This function probe PCI bus device(s) or a single SF on auxiliary bus. * * @param[in] dev * Pointer to the generic device. @@ -2637,7 +2701,8 @@ mlx5_os_net_probe(struct rte_device *dev) } if (mlx5_dev_is_pci(dev)) return mlx5_os_pci_probe(RTE_DEV_TO_PCI(dev)); - return 0; + else + return mlx5_os_auxiliary_probe(dev); } static int diff --git a/drivers/net/mlx5/linux/mlx5_os.h b/drivers/net/mlx5/linux/mlx5_os.h index af7cbeb418..2991d37df2 100644 --- a/drivers/net/mlx5/linux/mlx5_os.h +++ b/drivers/net/mlx5/linux/mlx5_os.h @@ -19,4 +19,6 @@ enum { #define MLX5_NAMESIZE IF_NAMESIZE +int mlx5_auxiliary_get_ifindex(const char *sf_name); + #endif /* RTE_PMD_MLX5_OS_H_ */ diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 96e8d189ba..818e37fd48 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -399,6 +399,24 @@ mlx5_is_hpf(struct rte_eth_dev *dev) MLX5_REPRESENTOR_REPR(-1) == repr; } +/** + * Decide whether representor ID is a SF port representor. + * + * @param dev + * Pointer to Ethernet device structure. + * + * @return + * Non-zero if HPF, otherwise 0. + */ +bool +mlx5_is_sf_repr(struct rte_eth_dev *dev) +{ + struct mlx5_priv *priv = dev->data->dev_private; + int type = MLX5_REPRESENTOR_TYPE(priv->representor_id); + + return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_SF; +} + /** * Initialize the ASO aging management structure. * @@ -2335,7 +2353,10 @@ mlx5_eth_find_next(uint16_t port_id, struct rte_device *odev) (dev->device == odev || (dev->device->driver && dev->device->driver->name && - !strcmp(dev->device->driver->name, MLX5_PCI_DRIVER_NAME)))) + ((strcmp(dev->device->driver->name, + MLX5_PCI_DRIVER_NAME) == 0) || + (strcmp(dev->device->driver->name, + MLX5_AUXILIARY_DRIVER_NAME) == 0))))) break; port_id++; } diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index d88b1433fb..c3736e3d56 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -243,6 +243,7 @@ struct mlx5_dev_config { unsigned int hw_fcs_strip:1; /* FCS stripping is supported. */ unsigned int hw_padding:1; /* End alignment padding is supported. */ unsigned int vf:1; /* This is a VF. */ + unsigned int sf:1; /* This is a SF. */ unsigned int tunnel_en:1; /* Whether tunnel stateless offloads are supported. */ unsigned int mpls_en:1; /* MPLS over GRE/UDP is enabled. */ @@ -1466,6 +1467,7 @@ int mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev, uint16_t mlx5_eth_find_next(uint16_t port_id, struct rte_device *odev); int mlx5_dev_close(struct rte_eth_dev *dev); bool mlx5_is_hpf(struct rte_eth_dev *dev); +bool mlx5_is_sf_repr(struct rte_eth_dev *dev); void mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh); /* Macro to iterate over all valid ports for mlx5 driver. */ diff --git a/drivers/net/mlx5/mlx5_mac.c b/drivers/net/mlx5/mlx5_mac.c index 19981d26d8..a791fedc91 100644 --- a/drivers/net/mlx5/mlx5_mac.c +++ b/drivers/net/mlx5/mlx5_mac.c @@ -159,7 +159,7 @@ mlx5_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr) * Configuring the VF instead of its representor, * need to skip the special case of HPF on Bluefield. */ - if (priv->representor && !mlx5_is_hpf(dev)) { + if (priv->representor && !mlx5_is_hpf(dev) && !mlx5_is_sf_repr(dev)) { DRV_LOG(DEBUG, "VF represented by port %u setting primary MAC address", dev->data->port_id); if (priv->pf_bond >= 0) { diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c index 25fb47c9ed..7f19b235c2 100644 --- a/drivers/net/mlx5/mlx5_rxmode.c +++ b/drivers/net/mlx5/mlx5_rxmode.c @@ -36,7 +36,7 @@ mlx5_promiscuous_enable(struct rte_eth_dev *dev) dev->data->port_id); return 0; } - if (priv->config.vf) { + if (priv->config.vf || priv->config.sf) { ret = mlx5_os_set_promisc(dev, 1); if (ret) return ret; @@ -69,7 +69,7 @@ mlx5_promiscuous_disable(struct rte_eth_dev *dev) int ret; dev->data->promiscuous = 0; - if (priv->config.vf) { + if (priv->config.vf || priv->config.sf) { ret = mlx5_os_set_promisc(dev, 0); if (ret) return ret; @@ -109,7 +109,7 @@ mlx5_allmulticast_enable(struct rte_eth_dev *dev) dev->data->port_id); return 0; } - if (priv->config.vf) { + if (priv->config.vf || priv->config.sf) { ret = mlx5_os_set_allmulti(dev, 1); if (ret) goto error; @@ -142,7 +142,7 @@ mlx5_allmulticast_disable(struct rte_eth_dev *dev) int ret; dev->data->all_multicast = 0; - if (priv->config.vf) { + if (priv->config.vf || priv->config.sf) { ret = mlx5_os_set_allmulti(dev, 0); if (ret) goto error; diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c index 6d2351f5a8..a9d5d58fd9 100644 --- a/drivers/net/mlx5/mlx5_trigger.c +++ b/drivers/net/mlx5/mlx5_trigger.c @@ -1259,7 +1259,7 @@ mlx5_traffic_enable(struct rte_eth_dev *dev) } mlx5_txq_release(dev, i); } - if (priv->config.dv_esw_en && !priv->config.vf) { + if (priv->config.dv_esw_en && !priv->config.vf && !priv->config.sf) { if (mlx5_flow_create_esw_table_zero_flow(dev)) priv->fdb_def_rule = 1; else diff --git a/drivers/net/mlx5/windows/mlx5_os.c b/drivers/net/mlx5/windows/mlx5_os.c index bf20adaa30..ee09acc96b 100644 --- a/drivers/net/mlx5/windows/mlx5_os.c +++ b/drivers/net/mlx5/windows/mlx5_os.c @@ -922,20 +922,18 @@ mlx5_match_devx_devices_to_addr(struct devx_device_bdf *devx_bdf, /** * DPDK callback to register a PCI device. * - * This function spawns Ethernet devices out of a given PCI device. + * This function spawns Ethernet devices out of a given device. * - * @param[in] pci_drv - * PCI driver structure (mlx5_driver). - * @param[in] pci_dev - * PCI device information. + * @param[in] dev + * Pointer to the generic device. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, - struct rte_pci_device *pci_dev) +mlx5_os_net_probe(struct rte_device *dev) { + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev); struct devx_device_bdf *devx_bdf_devs, *orig_devx_bdf_devs; /* * Number of found IB Devices matching with requested PCI BDF. -- 2.20.1