]> git.droids-corp.org - dpdk.git/commitdiff
net/mlx5: support socket direct mode bonding
authorRongwei Liu <rongweil@nvidia.com>
Tue, 26 Oct 2021 08:48:30 +0000 (11:48 +0300)
committerRaslan Darawsheh <rasland@nvidia.com>
Tue, 26 Oct 2021 11:24:20 +0000 (13:24 +0200)
In socket direct mode, it's possible to bind any two (maybe four
in future) PCIe devices with IDs like xxxx:xx:xx.x and
yyyy:yy:yy.y. Bonding member interfaces are unnecessary to have
the same PCIe domain/bus/device ID anymore,

Kernel driver uses "system_image_guid" to identify if devices can
be bound together or not. Sysfs "phys_switch_id" is used to get
"system_image_guid" of each network interface.

OFED 5.4+ is required to support "phys_switch_id".

Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
doc/guides/nics/mlx5.rst
doc/guides/rel_notes/release_21_11.rst
drivers/net/mlx5/linux/mlx5_os.c

index 47709d93b3384c5bf12d36de65fca8352dd40151..45f44c97d7236926e431b06a28428a2382731ff7 100644 (file)
@@ -468,6 +468,10 @@ Limitations
 
   - TXQ affinity subjects to HW hash once enabled.
 
+- Bonding under socket direct mode
+
+  - Needs OFED 5.4+.
+
 Statistics
 ----------
 
index d95bab5582305cb5b7ab7831ae00e0082dea3a30..eefb11334c68b4f4282484ecfd8682f54b9afe5b 100644 (file)
@@ -189,6 +189,7 @@ New Features
 
   * Added implicit mempool registration to avoid data path hiccups (opt-out).
   * Added NIC offloads for the PMD on Windows (TSO, VLAN strip, CRC keep).
+  * Added socket direct mode bonding support.
 
 * **Updated Solarflare network PMD.**
 
index 72bbb665cfdc20ea4545879272a4e857e81b4a4f..3deae861d5354eb2eaebded3d5c1f4d89d1b81a3 100644 (file)
@@ -1898,6 +1898,8 @@ mlx5_device_bond_pci_match(const char *ibdev_name,
        FILE *bond_file = NULL, *file;
        int pf = -1;
        int ret;
+       uint8_t cur_guid[32] = {0};
+       uint8_t guid[32] = {0};
 
        /*
         * Try to get master device name. If something goes wrong suppose
@@ -1911,6 +1913,8 @@ mlx5_device_bond_pci_match(const char *ibdev_name,
        np = mlx5_nl_portnum(nl_rdma, ibdev_name);
        if (!np)
                return -1;
+       if (mlx5_get_device_guid(pci_dev, cur_guid, sizeof(cur_guid)) < 0)
+               return -1;
        /*
         * The master device might not be on the predefined port(not on port
         * index 1, it is not guaranteed), we have to scan all Infiniband
@@ -1938,6 +1942,7 @@ mlx5_device_bond_pci_match(const char *ibdev_name,
                char tmp_str[IF_NAMESIZE + 32];
                struct rte_pci_addr pci_addr;
                struct mlx5_switch_info info;
+               int ret;
 
                /* Process slave interface names in the loop. */
                snprintf(tmp_str, sizeof(tmp_str),
@@ -1969,15 +1974,6 @@ mlx5_device_bond_pci_match(const char *ibdev_name,
                                tmp_str);
                        break;
                }
-               /* Match PCI address, allows BDF0+pfx or BDFx+pfx. */
-               if (pci_dev->domain == pci_addr.domain &&
-                   pci_dev->bus == pci_addr.bus &&
-                   pci_dev->devid == pci_addr.devid &&
-                   ((pci_dev->function == 0 &&
-                     pci_dev->function + owner == pci_addr.function) ||
-                    (pci_dev->function == owner &&
-                     pci_addr.function == owner)))
-                       pf = info.port_name;
                /* Get ifindex. */
                snprintf(tmp_str, sizeof(tmp_str),
                         "/sys/class/net/%s/ifindex", ifname);
@@ -1994,6 +1990,30 @@ mlx5_device_bond_pci_match(const char *ibdev_name,
                bond_info->ports[info.port_name].pci_addr = pci_addr;
                bond_info->ports[info.port_name].ifindex = ifindex;
                bond_info->n_port++;
+               /*
+                * Under socket direct mode, bonding will use
+                * system_image_guid as identification.
+                * After OFED 5.4, guid is readable (ret >= 0) under sysfs.
+                * All bonding members should have the same guid even if driver
+                * is using PCIe BDF.
+                */
+               ret = mlx5_get_device_guid(&pci_addr, guid, sizeof(guid));
+               if (ret < 0)
+                       break;
+               else if (ret > 0) {
+                       if (!memcmp(guid, cur_guid, sizeof(guid)) &&
+                           owner == info.port_name &&
+                           (owner != 0 || (owner == 0 &&
+                           !rte_pci_addr_cmp(pci_dev, &pci_addr))))
+                               pf = info.port_name;
+               } else if (pci_dev->domain == pci_addr.domain &&
+                   pci_dev->bus == pci_addr.bus &&
+                   pci_dev->devid == pci_addr.devid &&
+                   ((pci_dev->function == 0 &&
+                     pci_dev->function + owner == pci_addr.function) ||
+                    (pci_dev->function == owner &&
+                     pci_addr.function == owner)))
+                       pf = info.port_name;
        }
        if (pf >= 0) {
                /* Get bond interface info */
@@ -2006,6 +2026,11 @@ mlx5_device_bond_pci_match(const char *ibdev_name,
                        DRV_LOG(INFO, "PF device %u, bond device %u(%s)",
                                ifindex, bond_info->ifindex, bond_info->ifname);
        }
+       if (owner == 0 && pf != 0) {
+               DRV_LOG(INFO, "PCIe instance %04x:%02x:%02x.%x isn't bonding owner",
+                               pci_dev->domain, pci_dev->bus, pci_dev->devid,
+                               pci_dev->function);
+       }
        return pf;
 }