net/mlx5: retry on link update failure
[dpdk.git] / drivers / net / mlx5 / mlx5_ethdev.c
index dfd9e97..5f05b2b 100644 (file)
@@ -239,51 +239,6 @@ mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE])
        return -rte_errno;
 }
 
-/**
- * Get interface name for the specified device, uses the extra base
- * device resources to perform Netlink requests.
- *
- * This is a port representor-aware version of mlx5_get_master_ifname().
- *
- * @param[in] base
- *   Pointer to Ethernet device to use Netlink socket from
- *   to perfrom requests.
- * @param[in] dev
- *   Pointer to Ethernet device.
- * @param[out] ifname
- *   Interface name output buffer.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_get_ifname_base(const struct rte_eth_dev *base,
-                    const struct rte_eth_dev *dev,
-                    char (*ifname)[IF_NAMESIZE])
-{
-       struct mlx5_priv *priv = dev->data->dev_private;
-       struct mlx5_priv *priv_base = base->data->dev_private;
-       unsigned int ifindex;
-
-       assert(priv);
-       assert(priv->sh);
-       assert(priv_base);
-       ifindex = priv_base->nl_socket_rdma >= 0 ?
-                 mlx5_nl_ifindex(priv_base->nl_socket_rdma,
-                                 priv->sh->ibdev_name,
-                                 priv->ibv_port) : 0;
-       if (!ifindex) {
-               if (!priv->representor)
-                       return mlx5_get_master_ifname(priv->sh->ibdev_path,
-                                                     ifname);
-               rte_errno = ENXIO;
-               return -rte_errno;
-       }
-       if (if_indextoname(ifindex, &(*ifname)[0]))
-               return 0;
-       rte_errno = errno;
-       return -rte_errno;
-}
 /**
  * Get the interface index from device name.
  *
@@ -345,51 +300,6 @@ error:
        return -rte_errno;
 }
 
-/**
- * Perform ifreq ioctl() on specified Ethernet device,
- * ifindex, name and other attributes are requested
- * on the base device to avoid specified device Netlink
- * socket sharing (this is not thread-safe).
- *
- * @param[in] base
- *   Pointer to Ethernet device to get dev attributes.
- * @param[in] dev
- *   Pointer to Ethernet device to perform ioctl.
- * @param req
- *   Request number to pass to ioctl().
- * @param[out] ifr
- *   Interface request structure output buffer.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_ifreq_base(const struct rte_eth_dev *base,
-               const struct rte_eth_dev *dev,
-               int req, struct ifreq *ifr)
-{
-       int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
-       int ret = 0;
-
-       if (sock == -1) {
-               rte_errno = errno;
-               return -rte_errno;
-       }
-       ret = mlx5_get_ifname_base(base, dev, &ifr->ifr_name);
-       if (ret)
-               goto error;
-       ret = ioctl(sock, req, ifr);
-       if (ret == -1) {
-               rte_errno = errno;
-               goto error;
-       }
-       close(sock);
-       return 0;
-error:
-       close(sock);
-       return -rte_errno;
-}
-
 /**
  * Get device MTU.
  *
@@ -632,7 +542,7 @@ mlx5_set_txlimit_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
  * @param[out] info
  *   Info structure output buffer.
  */
-void
+int
 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
@@ -670,16 +580,34 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
        info->switch_info.domain_id = priv->domain_id;
        info->switch_info.port_id = priv->representor_id;
        if (priv->representor) {
-               unsigned int i = mlx5_dev_to_port_id(dev->device, NULL, 0);
-               uint16_t port_id[i];
+               uint16_t port_id;
 
-               i = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, i), i);
-               while (i--) {
+               if (priv->pf_bond >= 0) {
+                       /*
+                        * Switch port ID is opaque value with driver defined
+                        * format. Push the PF index in bonding configurations
+                        * in upper four bits of port ID. If we get too many
+                        * representors (more than 4K) or PFs (more than 15)
+                        * this approach must be reconsidered.
+                        */
+                       if ((info->switch_info.port_id >>
+                               MLX5_PORT_ID_BONDING_PF_SHIFT) ||
+                           priv->pf_bond > MLX5_PORT_ID_BONDING_PF_MASK) {
+                               DRV_LOG(ERR, "can't update switch port ID"
+                                            " for bonding device");
+                               assert(false);
+                               return -ENODEV;
+                       }
+                       info->switch_info.port_id |=
+                               priv->pf_bond << MLX5_PORT_ID_BONDING_PF_SHIFT;
+               }
+               MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
                        struct mlx5_priv *opriv =
-                               rte_eth_devices[port_id[i]].data->dev_private;
+                               rte_eth_devices[port_id].data->dev_private;
 
                        if (!opriv ||
                            opriv->representor ||
+                           opriv->sh != priv->sh ||
                            opriv->domain_id != priv->domain_id)
                                continue;
                        /*
@@ -690,6 +618,7 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
                        break;
                }
        }
+       return 0;
 }
 
 /**
@@ -805,11 +734,13 @@ mlx5_find_master_dev(struct rte_eth_dev *dev)
        priv = dev->data->dev_private;
        domain_id = priv->domain_id;
        assert(priv->representor);
-       RTE_ETH_FOREACH_DEV_OF(port_id, dev->device) {
-               priv = rte_eth_devices[port_id].data->dev_private;
-               if (priv &&
-                   priv->master &&
-                   priv->domain_id == domain_id)
+       MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) {
+               struct mlx5_priv *opriv =
+                       rte_eth_devices[port_id].data->dev_private;
+               if (opriv &&
+                   opriv->master &&
+                   opriv->domain_id == domain_id &&
+                   opriv->sh == priv->sh)
                        return &rte_eth_devices[port_id];
        }
        return NULL;
@@ -872,15 +803,7 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev,
                                ifr = (struct ifreq) {
                                        .ifr_data = (void *)&edata,
                                };
-                               /*
-                                * Use special version of mlx5_ifreq()
-                                * to get master device name with local
-                                * device Netlink socket. Using master
-                                * device Netlink socket is not thread
-                                * safe.
-                                */
-                               ret = mlx5_ifreq_base(dev, master,
-                                                     SIOCETHTOOL, &ifr);
+                               ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr);
                        }
                }
                if (ret) {
@@ -977,12 +900,7 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
                                ifr = (struct ifreq) {
                                        .ifr_data = (void *)&gcmd,
                                };
-                               /*
-                                * Avoid using master Netlink socket.
-                                * This is not thread-safe.
-                                */
-                               ret = mlx5_ifreq_base(dev, master,
-                                                     SIOCETHTOOL, &ifr);
+                               ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr);
                        }
                }
                if (ret) {
@@ -1003,7 +921,7 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
 
        *ecmd = gcmd;
        ifr.ifr_data = (void *)ecmd;
-       ret = mlx5_ifreq_base(dev, master ? master : dev, SIOCETHTOOL, &ifr);
+       ret = mlx5_ifreq(master ? master : dev, SIOCETHTOOL, &ifr);
        if (ret) {
                DRV_LOG(DEBUG,
                        "port %u ioctl(SIOCETHTOOL,"
@@ -1011,7 +929,8 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
                        dev->data->port_id, strerror(rte_errno));
                return ret;
        }
-       dev_link.link_speed = ecmd->speed;
+       dev_link.link_speed = (ecmd->speed == UINT32_MAX) ? ETH_SPEED_NUM_NONE :
+                                                           ecmd->speed;
        sc = ecmd->link_mode_masks[0] |
                ((uint64_t)ecmd->link_mode_masks[1] << 32);
        priv->link_speed_capa = 0;
@@ -1080,6 +999,7 @@ mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
        int ret;
        struct rte_eth_link dev_link;
        time_t start_time = time(NULL);
+       int retry = MLX5_GET_LINK_STATUS_RETRY_COUNT;
 
        do {
                ret = mlx5_link_update_unlocked_gs(dev, &dev_link);
@@ -1088,7 +1008,7 @@ mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
                if (ret == 0)
                        break;
                /* Handle wait to complete situation. */
-               if (wait_to_complete && ret == -EAGAIN) {
+               if ((wait_to_complete || retry) && ret == -EAGAIN) {
                        if (abs((int)difftime(time(NULL), start_time)) <
                            MLX5_LINK_STATUS_TIMEOUT) {
                                usleep(0);
@@ -1100,7 +1020,7 @@ mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
                } else if (ret < 0) {
                        return ret;
                }
-       } while (wait_to_complete);
+       } while (wait_to_complete || retry-- > 0);
        ret = !!memcmp(&dev->data->dev_link, &dev_link,
                       sizeof(struct rte_eth_link));
        dev->data->dev_link = dev_link;
@@ -1231,10 +1151,10 @@ mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
 }
 
 /**
- * Get PCI information from struct ibv_device.
+ * Get PCI information by sysfs device path.
  *
- * @param device
- *   Pointer to Ethernet device structure.
+ * @param dev_path
+ *   Pointer to device sysfs folder name.
  * @param[out] pci_addr
  *   PCI bus address output buffer.
  *
@@ -1242,12 +1162,12 @@ mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_ibv_device_to_pci_addr(const struct ibv_device *device,
-                           struct rte_pci_addr *pci_addr)
+mlx5_dev_to_pci_addr(const char *dev_path,
+                    struct rte_pci_addr *pci_addr)
 {
        FILE *file;
        char line[32];
-       MKSTR(path, "%s/device/uevent", device->ibdev_path);
+       MKSTR(path, "%s/device/uevent", dev_path);
 
        file = fopen(path, "rb");
        if (file == NULL) {
@@ -1730,37 +1650,44 @@ mlx5_is_removed(struct rte_eth_dev *dev)
 }
 
 /**
- * Get port ID list of mlx5 instances sharing a common device.
+ * Get the E-Switch parameters by port id.
  *
- * @param[in] dev
- *   Device to look for.
- * @param[out] port_list
- *   Result buffer for collected port IDs.
- * @param port_list_n
- *   Maximum number of entries in result buffer. If 0, @p port_list can be
- *   NULL.
+ * @param[in] port
+ *   Device port id.
+ * @param[out] es_domain_id
+ *   E-Switch domain id.
+ * @param[out] es_port_id
+ *   The port id of the port in the E-Switch.
  *
  * @return
- *   Number of matching instances regardless of the @p port_list_n
- *   parameter, 0 if none were found.
+ *   pointer to device private data structure containing data needed
+ *   on success, NULL otherwise and rte_errno is set.
  */
-unsigned int
-mlx5_dev_to_port_id(const struct rte_device *dev, uint16_t *port_list,
-                   unsigned int port_list_n)
+struct mlx5_priv *
+mlx5_port_to_eswitch_info(uint16_t port)
 {
-       uint16_t id;
-       unsigned int n = 0;
+       struct rte_eth_dev *dev;
+       struct mlx5_priv *priv;
 
-       RTE_ETH_FOREACH_DEV_OF(id, dev) {
-               if (n < port_list_n)
-                       port_list[n] = id;
-               n++;
+       if (port >= RTE_MAX_ETHPORTS) {
+               rte_errno = EINVAL;
+               return NULL;
+       }
+       if (!rte_eth_dev_is_valid_port(port)) {
+               rte_errno = ENODEV;
+               return NULL;
+       }
+       dev = &rte_eth_devices[port];
+       priv = dev->data->dev_private;
+       if (!(priv->representor || priv->master)) {
+               rte_errno = EINVAL;
+               return NULL;
        }
-       return n;
+       return priv;
 }
 
 /**
- * Get the E-Switch domain id this port belongs to.
+ * Get the E-Switch parameters by device instance.
  *
  * @param[in] port
  *   Device port id.
@@ -1770,34 +1697,20 @@ mlx5_dev_to_port_id(const struct rte_device *dev, uint16_t *port_list,
  *   The port id of the port in the E-Switch.
  *
  * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ *   pointer to device private data structure containing data needed
+ *   on success, NULL otherwise and rte_errno is set.
  */
-int
-mlx5_port_to_eswitch_info(uint16_t port,
-                         uint16_t *es_domain_id, uint16_t *es_port_id)
+struct mlx5_priv *
+mlx5_dev_to_eswitch_info(struct rte_eth_dev *dev)
 {
-       struct rte_eth_dev *dev;
        struct mlx5_priv *priv;
 
-       if (port >= RTE_MAX_ETHPORTS) {
-               rte_errno = EINVAL;
-               return -rte_errno;
-       }
-       if (!rte_eth_dev_is_valid_port(port)) {
-               rte_errno = ENODEV;
-               return -rte_errno;
-       }
-       dev = &rte_eth_devices[port];
        priv = dev->data->dev_private;
        if (!(priv->representor || priv->master)) {
                rte_errno = EINVAL;
-               return -rte_errno;
+               return NULL;
        }
-       if (es_domain_id)
-               *es_domain_id = priv->domain_id;
-       if (es_port_id)
-               *es_port_id = priv->vport_id;
-       return 0;
+       return priv;
 }
 
 /**
@@ -2030,3 +1943,89 @@ mlx5_translate_port_name(const char *port_name_in,
        port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
        return;
 }
+
+/**
+ * DPDK callback to retrieve plug-in module EEPROM information (type and size).
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param[out] modinfo
+ *   Storage for plug-in module EEPROM information.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_get_module_info(struct rte_eth_dev *dev,
+                    struct rte_eth_dev_module_info *modinfo)
+{
+       struct ethtool_modinfo info = {
+               .cmd = ETHTOOL_GMODULEINFO,
+       };
+       struct ifreq ifr = (struct ifreq) {
+               .ifr_data = (void *)&info,
+       };
+       int ret = 0;
+
+       if (!dev || !modinfo) {
+               DRV_LOG(WARNING, "missing argument, cannot get module info");
+               rte_errno = EINVAL;
+               return -rte_errno;
+       }
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
+       if (ret) {
+               DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s",
+                       dev->data->port_id, strerror(rte_errno));
+               return ret;
+       }
+       modinfo->type = info.type;
+       modinfo->eeprom_len = info.eeprom_len;
+       return ret;
+}
+
+/**
+ * DPDK callback to retrieve plug-in module EEPROM data.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param[out] info
+ *   Storage for plug-in module EEPROM data.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int mlx5_get_module_eeprom(struct rte_eth_dev *dev,
+                          struct rte_dev_eeprom_info *info)
+{
+       struct ethtool_eeprom *eeprom;
+       struct ifreq ifr;
+       int ret = 0;
+
+       if (!dev || !info) {
+               DRV_LOG(WARNING, "missing argument, cannot get module eeprom");
+               rte_errno = EINVAL;
+               return -rte_errno;
+       }
+       eeprom = rte_calloc(__func__, 1,
+                           (sizeof(struct ethtool_eeprom) + info->length), 0);
+       if (!eeprom) {
+               DRV_LOG(WARNING, "port %u cannot allocate memory for "
+                       "eeprom data", dev->data->port_id);
+               rte_errno = ENOMEM;
+               return -rte_errno;
+       }
+       eeprom->cmd = ETHTOOL_GMODULEEEPROM;
+       eeprom->offset = info->offset;
+       eeprom->len = info->length;
+       ifr = (struct ifreq) {
+               .ifr_data = (void *)eeprom,
+       };
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
+       if (ret)
+               DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s",
+                       dev->data->port_id, strerror(rte_errno));
+       else
+               rte_memcpy(info->data, eeprom->data, info->length);
+       rte_free(eeprom);
+       return ret;
+}