]> git.droids-corp.org - dpdk.git/commitdiff
net/mlx5: add representor recognition on Linux 5.x
authorViacheslav Ovsiienko <viacheslavo@mellanox.com>
Wed, 27 Mar 2019 13:15:35 +0000 (13:15 +0000)
committerFerruh Yigit <ferruh.yigit@intel.com>
Fri, 29 Mar 2019 16:25:32 +0000 (17:25 +0100)
The master device and VF representors were distinguished by
presence of port name, master device did not have one. The new Linux
kernels starting from 5.0 provide the port name for master device
and the implemented representor recognizing method does not work.
The new recognizing method is based on querying the VF number,
has been created on the base of the device.

The IFLA_NUM_VF attribute is returned by kernel if IFLA_EXT_MASK
attribute is specified in the Netlink request message.

Also the presence check of device symlink in device sysfs folder
is added to distinguish representors with sysfs based method.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Shahaf Shuler <shahafs@mellanox.com>
drivers/net/mlx5/Makefile
drivers/net/mlx5/meson.build
drivers/net/mlx5/mlx5.c
drivers/net/mlx5/mlx5.h
drivers/net/mlx5/mlx5_ethdev.c
drivers/net/mlx5/mlx5_nl.c

index 9a7da181962e21f76fc97516f62d350d56fa0247..c3264949a71802a5823be8b9f0bee27180a60c38 100644 (file)
@@ -225,6 +225,16 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
                rdma/rdma_netlink.h \
                enum RDMA_NLDEV_ATTR_NDEV_INDEX \
                $(AUTOCONF_OUTPUT)
+       $Q sh -- '$<' '$@' \
+               HAVE_IFLA_NUM_VF \
+               linux/if_link.h \
+               enum IFLA_NUM_VF \
+               $(AUTOCONF_OUTPUT)
+       $Q sh -- '$<' '$@' \
+               HAVE_IFLA_EXT_MASK \
+               linux/if_link.h \
+               enum IFLA_EXT_MASK \
+               $(AUTOCONF_OUTPUT)
        $Q sh -- '$<' '$@' \
                HAVE_IFLA_PHYS_SWITCH_ID \
                linux/if_link.h \
index 0cf2f0873eb59d565a81a2c2061e2736dee7265d..e3cb9bc201a2754e6d033b1f74f0d2e2636dd956 100644 (file)
@@ -133,6 +133,10 @@ if build
                'ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT' ],
                [ 'HAVE_ETHTOOL_LINK_MODE_100G', 'linux/ethtool.h',
                'ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT' ],
+               [ 'HAVE_IFLA_NUM_VF', 'linux/if_link.h',
+               'IFLA_NUM_VF' ],
+               [ 'HAVE_IFLA_EXT_MASK', 'linux/if_link.h',
+               'IFLA_EXT_MASK' ],
                [ 'HAVE_IFLA_PHYS_SWITCH_ID', 'linux/if_link.h',
                'IFLA_PHYS_SWITCH_ID' ],
                [ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h',
index 8141bda3fb4b59e00dd6fe611eb7398206c4d620..66411335ca1b95ff89802dbfe314b4880cd71e73 100644 (file)
@@ -13,7 +13,6 @@
 #include <errno.h>
 #include <net/if.h>
 #include <sys/mman.h>
-#include <linux/netlink.h>
 #include <linux/rtnetlink.h>
 
 /* Verbs header. */
@@ -998,9 +997,24 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
        priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE);
        priv->nl_sn = 0;
        priv->representor = !!switch_info->representor;
+       priv->master = !!switch_info->master;
        priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
-       priv->representor_id =
-               switch_info->representor ? switch_info->port_name : -1;
+       /*
+        * Currently we support single E-Switch per PF configurations
+        * only and vport_id field contains the vport index for
+        * associated VF, which is deduced from representor port name.
+        * For exapmple, let's have the IB device port 10, it has
+        * attached network device eth0, which has port name attribute
+        * pf0vf2, we can deduce the VF number as 2, and set vport index
+        * as 3 (2+1). This assigning schema should be changed if the
+        * multiple E-Switch instances per PF configurations or/and PCI
+        * subfunctions are added.
+        */
+       priv->vport_id = switch_info->representor ?
+                        switch_info->port_name + 1 : -1;
+       /* representor_id field keeps the unmodified port/VF index. */
+       priv->representor_id = switch_info->representor ?
+                              switch_info->port_name : -1;
        /*
         * Look for sibling devices in order to reuse their switch domain
         * if any, otherwise allocate one.
index 88ffb19247da17aa9c5333db7370067f3c6125ce..70a611f15a3fcf00d21b8211b54deaebe98fa047 100644 (file)
@@ -205,7 +205,9 @@ struct mlx5_priv {
        uint16_t mtu; /* Configured MTU. */
        unsigned int isolated:1; /* Whether isolated mode is enabled. */
        unsigned int representor:1; /* Device is a port representor. */
+       unsigned int master:1; /* Device is a E-Switch master. */
        uint16_t domain_id; /* Switch domain identifier. */
+       uint16_t vport_id; /* Associated VF vport index (if any). */
        int32_t representor_id; /* Port representor identifier. */
        /* RX/TX queues. */
        unsigned int rxqs_n; /* RX queues array size. */
index 2e8a2969fe7328b03b7f5ece85618497ed780021..933839c67c559d597a7c84d78fe6ff428e3ff406 100644 (file)
@@ -1362,8 +1362,10 @@ mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info)
                .port_name = 0,
                .switch_id = 0,
        };
+       DIR *dir;
        bool port_name_set = false;
        bool port_switch_id_set = false;
+       bool device_dir = false;
        char c;
        int ret;
 
@@ -1376,6 +1378,8 @@ mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info)
              ifname);
        MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
              ifname);
+       MKSTR(pci_device, "/sys/class/net/%s/device",
+             ifname);
 
        file = fopen(phys_port_name, "rb");
        if (file != NULL) {
@@ -1394,9 +1398,21 @@ mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info)
                fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
                c == '\n';
        fclose(file);
-       data.master = port_switch_id_set && !port_name_set;
-       data.representor = port_switch_id_set && port_name_set;
+       dir = opendir(pci_device);
+       if (dir != NULL) {
+               closedir(dir);
+               device_dir = true;
+       }
+       data.master = port_switch_id_set && (!port_name_set || device_dir);
+       data.representor = port_switch_id_set && port_name_set && !device_dir;
        *info = data;
+       assert(!(data.master && data.representor));
+       if (data.master && data.representor) {
+               DRV_LOG(ERR, "ifindex %u device is recognized as master"
+                            " and as representor", ifindex);
+               rte_errno = ENODEV;
+               return -rte_errno;
+       }
        return 0;
 }
 
index 8a10109f9b5a8f8687c0dc1a3ae60efb0360b7a2..58bbfed1d9e9665cce50dc35ad03339fdae1af85 100644 (file)
 #endif
 
 /* These are normally found in linux/if_link.h. */
+#ifndef HAVE_IFLA_NUM_VF
+#define IFLA_NUM_VF 21
+#endif
+#ifndef HAVE_IFLA_EXT_MASK
+#define IFLA_EXT_MASK 29
+#endif
 #ifndef HAVE_IFLA_PHYS_SWITCH_ID
 #define IFLA_PHYS_SWITCH_ID 36
 #endif
@@ -837,6 +843,7 @@ mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg)
        size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
        bool port_name_set = false;
        bool switch_id_set = false;
+       bool num_vf_set = false;
 
        if (nh->nlmsg_type != RTM_NEWLINK)
                goto error;
@@ -848,6 +855,9 @@ mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg)
                if (ra->rta_len > nh->nlmsg_len - off)
                        goto error;
                switch (ra->rta_type) {
+               case IFLA_NUM_VF:
+                       num_vf_set = true;
+                       break;
                case IFLA_PHYS_PORT_NAME:
                        port_name_set =
                                mlx5_translate_port_name((char *)payload,
@@ -864,8 +874,20 @@ mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg)
                }
                off += RTA_ALIGN(ra->rta_len);
        }
-       info.master = switch_id_set && !port_name_set;
-       info.representor = switch_id_set && port_name_set;
+       if (switch_id_set) {
+               if (info.port_name_new) {
+                       /* New representors naming schema. */
+                       if (port_name_set) {
+                               info.master = (info.port_name == -1);
+                               info.representor = (info.port_name != -1);
+                       }
+               } else {
+                       /* Legacy representors naming schema. */
+                       info.master = (!port_name_set || num_vf_set);
+                       info.representor = port_name_set && !num_vf_set;
+               }
+       }
+       assert(!(info.master && info.representor));
        memcpy(arg, &info, sizeof(info));
        return 0;
 error:
@@ -893,9 +915,13 @@ mlx5_nl_switch_info(int nl, unsigned int ifindex, struct mlx5_switch_info *info)
        struct {
                struct nlmsghdr nh;
                struct ifinfomsg info;
+               struct rtattr rta;
+               uint32_t extmask;
        } req = {
                .nh = {
-                       .nlmsg_len = NLMSG_LENGTH(sizeof(req.info)),
+                       .nlmsg_len = NLMSG_LENGTH
+                                       (sizeof(req.info) +
+                                        RTA_LENGTH(sizeof(uint32_t))),
                        .nlmsg_type = RTM_GETLINK,
                        .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
                },
@@ -903,11 +929,22 @@ mlx5_nl_switch_info(int nl, unsigned int ifindex, struct mlx5_switch_info *info)
                        .ifi_family = AF_UNSPEC,
                        .ifi_index = ifindex,
                },
+               .rta = {
+                       .rta_type = IFLA_EXT_MASK,
+                       .rta_len = RTA_LENGTH(sizeof(int32_t)),
+               },
+               .extmask = RTE_LE32(1),
        };
        int ret;
 
        ret = mlx5_nl_send(nl, &req.nh, seq);
        if (ret >= 0)
                ret = mlx5_nl_recv(nl, seq, mlx5_nl_switch_info_cb, info);
+       if (info->master && info->representor) {
+               DRV_LOG(ERR, "ifindex %u device is recognized as master"
+                            " and as representor", ifindex);
+               rte_errno = ENODEV;
+               ret = -rte_errno;
+       }
        return ret;
 }