From f5f4c4823740d6f5c0b5c1a2353b2a0857f162e0 Mon Sep 17 00:00:00 2001 From: Xueming Li Date: Sun, 28 Mar 2021 13:48:12 +0000 Subject: [PATCH] net/mlx5: save bonding member ports information Since kernel bonding netdev doesn't provide statistics counter that reflects all member ports, PMD has to manually summarize counters from each member ports. As a preparation, this patch collects bonding member port information and saves to shared context data. Signed-off-by: Xueming Li Acked-by: Viacheslav Ovsiienko --- drivers/net/mlx5/linux/mlx5_ethdev_os.c | 4 +- drivers/net/mlx5/linux/mlx5_os.c | 91 ++++++++++++++++--------- drivers/net/mlx5/mlx5.c | 2 + drivers/net/mlx5/mlx5.h | 21 +++++- drivers/net/mlx5/mlx5_ethdev.c | 5 +- 5 files changed, 86 insertions(+), 37 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c index 2127fcfbfa..e7ec07e364 100644 --- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c +++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c @@ -150,8 +150,8 @@ mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[MLX5_NAMESIZE]) MLX5_ASSERT(priv); MLX5_ASSERT(priv->sh); - if (priv->bond_ifindex > 0) { - memcpy(ifname, priv->bond_name, MLX5_NAMESIZE); + if (priv->master && priv->sh->bond.ifindex > 0) { + memcpy(ifname, priv->sh->bond.ifname, MLX5_NAMESIZE); return 0; } ifindex = mlx5_ifindex(dev); diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 46e04f68c9..a2cfa50a4a 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -1467,19 +1467,6 @@ err_secondary: */ MLX5_ASSERT(spawn->ifindex); priv->if_index = spawn->ifindex; - if (priv->pf_bond >= 0 && priv->master) { - /* Get bond interface info */ - err = mlx5_sysfs_bond_info(priv->if_index, - &priv->bond_ifindex, - priv->bond_name); - if (err) - DRV_LOG(ERR, "unable to get bond info: %s", - strerror(rte_errno)); - else - DRV_LOG(INFO, "PF device %u, bond device %u(%s)", - priv->if_index, priv->bond_ifindex, - priv->bond_name); - } eth_dev->data->dev_private = priv; priv->dev_data = eth_dev->data; eth_dev->data->mac_addrs = priv->mac; @@ -1748,6 +1735,8 @@ mlx5_dev_spawn_data_cmp(const void *a, const void *b) * Netlink RDMA group socket handle. * @param[in] owner * Rerepsentor owner PF index. + * @param[out] bond_info + * Pointer to bonding information. * * @return * negative value if no bonding device found, otherwise @@ -1756,19 +1745,22 @@ mlx5_dev_spawn_data_cmp(const void *a, const void *b) static int mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, const struct rte_pci_addr *pci_dev, - int nl_rdma, uint16_t owner) + int nl_rdma, uint16_t owner, + struct mlx5_bond_info *bond_info) { char ifname[IF_NAMESIZE + 1]; unsigned int ifindex; unsigned int np, i; - FILE *file = NULL; + FILE *bond_file = NULL, *file; int pf = -1; + int ret; /* * Try to get master device name. If something goes * wrong suppose the lack of kernel support and no * bonding devices. */ + memset(bond_info, 0, sizeof(*bond_info)); if (nl_rdma < 0) return -1; if (!strstr(ibv_dev->name, "bond")) @@ -1792,15 +1784,15 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, /* Try to read bonding slave names from sysfs. */ MKSTR(slaves, "/sys/class/net/%s/master/bonding/slaves", ifname); - file = fopen(slaves, "r"); - if (file) + bond_file = fopen(slaves, "r"); + if (bond_file) break; } - if (!file) + if (!bond_file) return -1; /* Use safe format to check maximal buffer length. */ MLX5_ASSERT(atol(RTE_STR(IF_NAMESIZE)) == IF_NAMESIZE); - while (fscanf(file, "%" RTE_STR(IF_NAMESIZE) "s", ifname) == 1) { + while (fscanf(bond_file, "%" RTE_STR(IF_NAMESIZE) "s", ifname) == 1) { char tmp_str[IF_NAMESIZE + 32]; struct rte_pci_addr pci_addr; struct mlx5_switch_info info; @@ -1813,13 +1805,7 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, " for netdev \"%s\"", ifname); continue; } - if (pci_dev->domain != pci_addr.domain || - pci_dev->bus != pci_addr.bus || - pci_dev->devid != pci_addr.devid || - pci_dev->function + owner != pci_addr.function) - continue; /* Slave interface PCI address match found. */ - fclose(file); snprintf(tmp_str, sizeof(tmp_str), "/sys/class/net/%s/phys_port_name", ifname); file = fopen(tmp_str, "rb"); @@ -1828,13 +1814,52 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, info.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET; if (fscanf(file, "%32s", tmp_str) == 1) mlx5_translate_port_name(tmp_str, &info); - if (info.name_type == MLX5_PHYS_PORT_NAME_TYPE_LEGACY || - info.name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK) + fclose(file); + /* Only process PF ports. */ + if (info.name_type != MLX5_PHYS_PORT_NAME_TYPE_LEGACY && + info.name_type != MLX5_PHYS_PORT_NAME_TYPE_UPLINK) + continue; + /* Check max bonding member. */ + if (info.port_name >= MLX5_BOND_MAX_PORTS) { + DRV_LOG(WARNING, "bonding index out of range, " + "please increase MLX5_BOND_MAX_PORTS: %s", + tmp_str); + break; + } + /* Match PCI address. */ + if (pci_dev->domain == pci_addr.domain && + pci_dev->bus == pci_addr.bus && + pci_dev->devid == pci_addr.devid && + pci_dev->function + owner == pci_addr.function) pf = info.port_name; - break; - } - if (file) + /* Get ifindex. */ + snprintf(tmp_str, sizeof(tmp_str), + "/sys/class/net/%s/ifindex", ifname); + file = fopen(tmp_str, "rb"); + if (!file) + break; + ret = fscanf(file, "%u", &ifindex); fclose(file); + if (ret != 1) + break; + /* Save bonding info. */ + strncpy(bond_info->ports[info.port_name].ifname, ifname, + sizeof(bond_info->ports[0].ifname)); + bond_info->ports[info.port_name].pci_addr = pci_addr; + bond_info->ports[info.port_name].ifindex = ifindex; + bond_info->n_port++; + } + if (pf >= 0) { + /* Get bond interface info */ + ret = mlx5_sysfs_bond_info(ifindex, &bond_info->ifindex, + bond_info->ifname); + if (ret) + DRV_LOG(ERR, "unable to get bond info: %s", + strerror(rte_errno)); + else + DRV_LOG(INFO, "PF device %u, bond device %u(%s)", + ifindex, bond_info->ifindex, bond_info->ifname); + } return pf; } @@ -1889,6 +1914,7 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev, unsigned int dev_config_vf; struct rte_eth_devargs eth_da = *req_eth_da; struct rte_pci_addr owner_pci = pci_dev->addr; /* Owner PF. */ + struct mlx5_bond_info bond_info; int ret = -1; if (rte_eal_process_type() == RTE_PROC_PRIMARY) @@ -1920,7 +1946,8 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev, DRV_LOG(DEBUG, "checking device \"%s\"", ibv_list[ret]->name); bd = mlx5_device_bond_pci_match - (ibv_list[ret], &owner_pci, nl_rdma, owner_id); + (ibv_list[ret], &owner_pci, nl_rdma, owner_id, + &bond_info); if (bd >= 0) { /* * Bonding device detected. Only one match is allowed, @@ -2029,6 +2056,7 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev, MLX5_ASSERT(nd == 1); MLX5_ASSERT(np); for (i = 1; i <= np; ++i) { + list[ns].bond_info = &bond_info; list[ns].max_port = np; list[ns].phys_port = i; list[ns].phys_dev = ibv_match[0]; @@ -2119,6 +2147,7 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev, */ for (i = 0; i != nd; ++i) { memset(&list[ns].info, 0, sizeof(list[ns].info)); + list[ns].bond_info = NULL; list[ns].max_port = 1; list[ns].phys_port = 1; list[ns].phys_dev = ibv_match[i]; diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 99a15afe5b..3538cc8c20 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -925,6 +925,8 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, rte_errno = ENOMEM; goto exit; } + if (spawn->bond_info) + sh->bond = *spawn->bond_info; err = mlx5_os_open_device(spawn, config, sh); if (!sh->ctx) goto error; diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 68b599a126..412b6cbb6c 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -115,6 +115,7 @@ struct mlx5_dev_spawn_data { void *phys_dev; /**< Associated physical device. */ struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */ struct rte_pci_device *pci_dev; /**< Backend PCI device. */ + struct mlx5_bond_info *bond_info; }; /** Key string for IPC. */ @@ -671,6 +672,21 @@ struct mlx5_flex_parser_profiles { void *obj; /* Flex parser node object. */ }; +/* Max member ports per bonding device. */ +#define MLX5_BOND_MAX_PORTS 2 + +/* Bonding device information. */ +struct mlx5_bond_info { + int n_port; /* Number of bond member ports. */ + uint32_t ifindex; + char ifname[MLX5_NAMESIZE + 1]; + struct { + char ifname[MLX5_NAMESIZE + 1]; + uint32_t ifindex; + struct rte_pci_addr pci_addr; + } ports[MLX5_BOND_MAX_PORTS]; +}; + /* * Shared Infiniband device context for Master/Representors * which belong to same IB device with multiple IB ports. @@ -684,6 +700,7 @@ struct mlx5_dev_ctx_shared { uint32_t sq_ts_format:2; /* SQ timestamp formats supported. */ uint32_t qp_ts_format:2; /* QP timestamp formats supported. */ uint32_t max_port; /* Maximal IB device port index. */ + struct mlx5_bond_info bond; /* Bonding information. */ void *ctx; /* Verbs/DV/DevX context. */ void *pd; /* Protection Domain. */ uint32_t pdn; /* Protection Domain number. */ @@ -935,10 +952,8 @@ struct mlx5_priv { uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */ uint32_t vport_meta_mask; /* Used for vport index field match mask. */ int32_t representor_id; /* -1 if not a representor. */ - int32_t pf_bond; /* >=0 means PF index in bonding configuration. */ + int32_t pf_bond; /* >=0, representor owner PF index in bonding. */ unsigned int if_index; /* Associated kernel network device index. */ - uint32_t bond_ifindex; /**< Bond interface index. */ - char bond_name[MLX5_NAMESIZE]; /**< Bond interface name. */ /* RX/TX queues. */ unsigned int rxqs_n; /* RX queues array size. */ unsigned int txqs_n; /* TX queues array size. */ diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index 130980d4d6..4f97a69a20 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -42,7 +42,10 @@ mlx5_ifindex(const struct rte_eth_dev *dev) MLX5_ASSERT(priv); MLX5_ASSERT(priv->if_index); - ifindex = priv->bond_ifindex > 0 ? priv->bond_ifindex : priv->if_index; + if (priv->master && priv->sh->bond.ifindex > 0) + ifindex = priv->sh->bond.ifindex; + else + ifindex = priv->if_index; if (!ifindex) rte_errno = ENXIO; return ifindex; -- 2.20.1