mlx5: support link status update
[dpdk.git] / drivers / net / mlx5 / mlx5_ethdev.c
index b6c7d7a..d01dee5 100644 (file)
@@ -32,6 +32,7 @@
  */
 
 #include <stddef.h>
+#include <assert.h>
 #include <unistd.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -44,6 +45,8 @@
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include <linux/if.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
 
 /* DPDK headers don't like -pedantic. */
 #ifdef PEDANTIC
@@ -58,6 +61,7 @@
 #endif
 
 #include "mlx5.h"
+#include "mlx5_rxtx.h"
 #include "mlx5_utils.h"
 
 /**
@@ -344,6 +348,23 @@ priv_get_mtu(struct priv *priv, uint16_t *mtu)
        return 0;
 }
 
+/**
+ * Set device MTU.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param mtu
+ *   MTU value to set.
+ *
+ * @return
+ *   0 on success, -1 on failure and errno is set.
+ */
+static int
+priv_set_mtu(struct priv *priv, uint16_t mtu)
+{
+       return priv_set_sysfs_ulong(priv, "mtu", mtu);
+}
+
 /**
  * Set device flags.
  *
@@ -369,6 +390,310 @@ priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)
        return priv_set_sysfs_ulong(priv, "flags", tmp);
 }
 
+/**
+ * Ethernet device configuration.
+ *
+ * Prepare the driver for a given number of TX and RX queues.
+ * Allocate parent RSS queue when several RX queues are requested.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+dev_configure(struct rte_eth_dev *dev)
+{
+       struct priv *priv = dev->data->dev_private;
+       unsigned int rxqs_n = dev->data->nb_rx_queues;
+       unsigned int txqs_n = dev->data->nb_tx_queues;
+       unsigned int tmp;
+       int ret;
+
+       priv->rxqs = (void *)dev->data->rx_queues;
+       priv->txqs = (void *)dev->data->tx_queues;
+       if (txqs_n != priv->txqs_n) {
+               INFO("%p: TX queues number update: %u -> %u",
+                    (void *)dev, priv->txqs_n, txqs_n);
+               priv->txqs_n = txqs_n;
+       }
+       if (rxqs_n == priv->rxqs_n)
+               return 0;
+       INFO("%p: RX queues number update: %u -> %u",
+            (void *)dev, priv->rxqs_n, rxqs_n);
+       /* If RSS is enabled, disable it first. */
+       if (priv->rss) {
+               unsigned int i;
+
+               /* Only if there are no remaining child RX queues. */
+               for (i = 0; (i != priv->rxqs_n); ++i)
+                       if ((*priv->rxqs)[i] != NULL)
+                               return EINVAL;
+               rxq_cleanup(&priv->rxq_parent);
+               priv->rss = 0;
+               priv->rxqs_n = 0;
+       }
+       if (rxqs_n <= 1) {
+               /* Nothing else to do. */
+               priv->rxqs_n = rxqs_n;
+               return 0;
+       }
+       /* Allocate a new RSS parent queue if supported by hardware. */
+       if (!priv->hw_rss) {
+               ERROR("%p: only a single RX queue can be configured when"
+                     " hardware doesn't support RSS",
+                     (void *)dev);
+               return EINVAL;
+       }
+       /* Fail if hardware doesn't support that many RSS queues. */
+       if (rxqs_n >= priv->max_rss_tbl_sz) {
+               ERROR("%p: only %u RX queues can be configured for RSS",
+                     (void *)dev, priv->max_rss_tbl_sz);
+               return EINVAL;
+       }
+       priv->rss = 1;
+       tmp = priv->rxqs_n;
+       priv->rxqs_n = rxqs_n;
+       ret = rxq_setup(dev, &priv->rxq_parent, 0, 0, NULL, NULL);
+       if (!ret)
+               return 0;
+       /* Failure, rollback. */
+       priv->rss = 0;
+       priv->rxqs_n = tmp;
+       assert(ret > 0);
+       return ret;
+}
+
+/**
+ * DPDK callback for Ethernet device configuration.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+int
+mlx5_dev_configure(struct rte_eth_dev *dev)
+{
+       struct priv *priv = dev->data->dev_private;
+       int ret;
+
+       priv_lock(priv);
+       ret = dev_configure(dev);
+       assert(ret >= 0);
+       priv_unlock(priv);
+       return -ret;
+}
+
+/**
+ * DPDK callback to get information about the device.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param[out] info
+ *   Info structure output buffer.
+ */
+void
+mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
+{
+       struct priv *priv = dev->data->dev_private;
+       unsigned int max;
+       char ifname[IF_NAMESIZE];
+
+       priv_lock(priv);
+       /* FIXME: we should ask the device for these values. */
+       info->min_rx_bufsize = 32;
+       info->max_rx_pktlen = 65536;
+       /*
+        * Since we need one CQ per QP, the limit is the minimum number
+        * between the two values.
+        */
+       max = ((priv->device_attr.max_cq > priv->device_attr.max_qp) ?
+              priv->device_attr.max_qp : priv->device_attr.max_cq);
+       /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */
+       if (max >= 65535)
+               max = 65535;
+       info->max_rx_queues = max;
+       info->max_tx_queues = max;
+       /* Last array entry is reserved for broadcast. */
+       info->max_mac_addrs = (RTE_DIM(priv->mac) - 1);
+       info->rx_offload_capa =
+               (priv->hw_csum ?
+                (DEV_RX_OFFLOAD_IPV4_CKSUM |
+                 DEV_RX_OFFLOAD_UDP_CKSUM |
+                 DEV_RX_OFFLOAD_TCP_CKSUM) :
+                0);
+       info->tx_offload_capa =
+               (priv->hw_csum ?
+                (DEV_TX_OFFLOAD_IPV4_CKSUM |
+                 DEV_TX_OFFLOAD_UDP_CKSUM |
+                 DEV_TX_OFFLOAD_TCP_CKSUM) :
+                0);
+       if (priv_get_ifname(priv, &ifname) == 0)
+               info->if_index = if_nametoindex(ifname);
+       priv_unlock(priv);
+}
+
+/**
+ * DPDK callback to retrieve physical link information (unlocked version).
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param wait_to_complete
+ *   Wait for request completion (ignored).
+ */
+static int
+mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete)
+{
+       struct priv *priv = dev->data->dev_private;
+       struct ethtool_cmd edata = {
+               .cmd = ETHTOOL_GSET
+       };
+       struct ifreq ifr;
+       struct rte_eth_link dev_link;
+       int link_speed = 0;
+
+       (void)wait_to_complete;
+       if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
+               WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
+               return -1;
+       }
+       memset(&dev_link, 0, sizeof(dev_link));
+       dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
+                               (ifr.ifr_flags & IFF_RUNNING));
+       ifr.ifr_data = &edata;
+       if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
+               WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s",
+                    strerror(errno));
+               return -1;
+       }
+       link_speed = ethtool_cmd_speed(&edata);
+       if (link_speed == -1)
+               dev_link.link_speed = 0;
+       else
+               dev_link.link_speed = link_speed;
+       dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ?
+                               ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
+       if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) {
+               /* Link status changed. */
+               dev->data->dev_link = dev_link;
+               return 0;
+       }
+       /* Link status is still the same. */
+       return -1;
+}
+
+/**
+ * DPDK callback to retrieve physical link information.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param wait_to_complete
+ *   Wait for request completion (ignored).
+ */
+int
+mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)
+{
+       struct priv *priv = dev->data->dev_private;
+       int ret;
+
+       priv_lock(priv);
+       ret = mlx5_link_update_unlocked(dev, wait_to_complete);
+       priv_unlock(priv);
+       return ret;
+}
+
+/**
+ * DPDK callback to change the MTU.
+ *
+ * Setting the MTU affects hardware MRU (packets larger than the MTU cannot be
+ * received). Use this as a hint to enable/disable scattered packets support
+ * and improve performance when not needed.
+ * Since failure is not an option, reconfiguring queues on the fly is not
+ * recommended.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param in_mtu
+ *   New MTU.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+int
+mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
+{
+       struct priv *priv = dev->data->dev_private;
+       int ret = 0;
+       unsigned int i;
+       uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) =
+               mlx5_rx_burst;
+
+       priv_lock(priv);
+       /* Set kernel interface MTU first. */
+       if (priv_set_mtu(priv, mtu)) {
+               ret = errno;
+               WARN("cannot set port %u MTU to %u: %s", priv->port, mtu,
+                    strerror(ret));
+               goto out;
+       } else
+               DEBUG("adapter port %u MTU set to %u", priv->port, mtu);
+       priv->mtu = mtu;
+       /* Temporarily replace RX handler with a fake one, assuming it has not
+        * been copied elsewhere. */
+       dev->rx_pkt_burst = removed_rx_burst;
+       /* Make sure everyone has left mlx5_rx_burst() and uses
+        * removed_rx_burst() instead. */
+       rte_wmb();
+       usleep(1000);
+       /* Reconfigure each RX queue. */
+       for (i = 0; (i != priv->rxqs_n); ++i) {
+               struct rxq *rxq = (*priv->rxqs)[i];
+               unsigned int max_frame_len;
+               int sp;
+
+               if (rxq == NULL)
+                       continue;
+               /* Calculate new maximum frame length according to MTU and
+                * toggle scattered support (sp) if necessary. */
+               max_frame_len = (priv->mtu + ETHER_HDR_LEN +
+                                (ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN));
+               sp = (max_frame_len > (rxq->mb_len - RTE_PKTMBUF_HEADROOM));
+               /* Provide new values to rxq_setup(). */
+               dev->data->dev_conf.rxmode.jumbo_frame = sp;
+               dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame_len;
+               ret = rxq_rehash(dev, rxq);
+               if (ret) {
+                       /* Force SP RX if that queue requires it and abort. */
+                       if (rxq->sp)
+                               rx_func = mlx5_rx_burst_sp;
+                       break;
+               }
+               /* Reenable non-RSS queue attributes. No need to check
+                * for errors at this stage. */
+               if (!priv->rss) {
+                       if (priv->started)
+                               rxq_mac_addrs_add(rxq);
+                       if (priv->started && priv->promisc_req)
+                               rxq_promiscuous_enable(rxq);
+                       if (priv->started && priv->allmulti_req)
+                               rxq_allmulticast_enable(rxq);
+               }
+               /* Scattered burst function takes priority. */
+               if (rxq->sp)
+                       rx_func = mlx5_rx_burst_sp;
+       }
+       /* Burst functions can now be called again. */
+       rte_wmb();
+       dev->rx_pkt_burst = rx_func;
+out:
+       priv_unlock(priv);
+       assert(ret >= 0);
+       return -ret;
+}
+
 /**
  * Get PCI information from struct ibv_device.
  *