net/mlx5: fix double free on error handling
[dpdk.git] / drivers / net / mlx5 / mlx5.c
index 99b6223..05d4f02 100644 (file)
@@ -13,6 +13,7 @@
 #include <errno.h>
 #include <net/if.h>
 #include <sys/mman.h>
+#include <linux/rtnetlink.h>
 
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 /* Device parameter to enable hardware Rx vector. */
 #define MLX5_RX_VEC_EN "rx_vec_en"
 
+/* Allow L3 VXLAN flow creation. */
+#define MLX5_L3_VXLAN_EN "l3_vxlan_en"
+
+/* Activate Netlink support in VF mode. */
+#define MLX5_VF_NL_EN "vf_nl_en"
+
 #ifndef HAVE_IBV_MLX5_MOD_MPW
 #define MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED (1 << 2)
 #define MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW (1 << 3)
@@ -193,6 +200,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
                priv->txqs_n = 0;
                priv->txqs = NULL;
        }
+       mlx5_flow_delete_drop_queue(dev);
        if (priv->pd != NULL) {
                assert(priv->ctx != NULL);
                claim_zero(mlx5_glue->dealloc_pd(priv->pd));
@@ -205,6 +213,10 @@ mlx5_dev_close(struct rte_eth_dev *dev)
                rte_free(priv->reta_idx);
        if (priv->primary_socket)
                mlx5_socket_uninit(dev);
+       if (priv->config.vf)
+               mlx5_nl_mac_addr_flush(dev);
+       if (priv->nl_socket >= 0)
+               close(priv->nl_socket);
        ret = mlx5_hrxq_ibv_verify(dev);
        if (ret)
                DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -269,6 +281,7 @@ const struct eth_dev_ops mlx5_dev_ops = {
        .mac_addr_remove = mlx5_mac_addr_remove,
        .mac_addr_add = mlx5_mac_addr_add,
        .mac_addr_set = mlx5_mac_addr_set,
+       .set_mc_addr_list = mlx5_set_mc_addr_list,
        .mtu_set = mlx5_dev_set_mtu,
        .vlan_strip_queue_set = mlx5_vlan_strip_queue_set,
        .vlan_offload_set = mlx5_vlan_offload_set,
@@ -321,6 +334,7 @@ const struct eth_dev_ops mlx5_dev_ops_isolate = {
        .mac_addr_remove = mlx5_mac_addr_remove,
        .mac_addr_add = mlx5_mac_addr_add,
        .mac_addr_set = mlx5_mac_addr_set,
+       .set_mc_addr_list = mlx5_set_mc_addr_list,
        .mtu_set = mlx5_dev_set_mtu,
        .vlan_strip_queue_set = mlx5_vlan_strip_queue_set,
        .vlan_offload_set = mlx5_vlan_offload_set,
@@ -407,6 +421,10 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
                config->tx_vec_en = !!tmp;
        } else if (strcmp(MLX5_RX_VEC_EN, key) == 0) {
                config->rx_vec_en = !!tmp;
+       } else if (strcmp(MLX5_L3_VXLAN_EN, key) == 0) {
+               config->l3_vxlan_en = !!tmp;
+       } else if (strcmp(MLX5_VF_NL_EN, key) == 0) {
+               config->vf_nl_en = !!tmp;
        } else {
                DRV_LOG(WARNING, "%s: unknown parameter", key);
                rte_errno = EINVAL;
@@ -438,6 +456,8 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
                MLX5_TXQ_MAX_INLINE_LEN,
                MLX5_TX_VEC_EN,
                MLX5_RX_VEC_EN,
+               MLX5_L3_VXLAN_EN,
+               MLX5_VF_NL_EN,
                NULL,
        };
        struct rte_kvargs *kvlist;
@@ -477,6 +497,20 @@ static struct rte_pci_driver mlx5_driver;
  */
 static void *uar_base;
 
+static int
+find_lower_va_bound(const struct rte_memseg_list *msl __rte_unused,
+               const struct rte_memseg *ms, void *arg)
+{
+       void **addr = arg;
+
+       if (*addr == NULL)
+               *addr = ms->addr;
+       else
+               *addr = RTE_MIN(*addr, ms->addr);
+
+       return 0;
+}
+
 /**
  * Reserve UAR address space for primary process.
  *
@@ -491,21 +525,14 @@ mlx5_uar_init_primary(struct rte_eth_dev *dev)
 {
        struct priv *priv = dev->data->dev_private;
        void *addr = (void *)0;
-       int i;
-       const struct rte_mem_config *mcfg;
 
        if (uar_base) { /* UAR address space mapped. */
                priv->uar_base = uar_base;
                return 0;
        }
        /* find out lower bound of hugepage segments */
-       mcfg = rte_eal_get_configuration()->mem_config;
-       for (i = 0; i < RTE_MAX_MEMSEG && mcfg->memseg[i].addr; i++) {
-               if (addr)
-                       addr = RTE_MIN(addr, mcfg->memseg[i].addr);
-               else
-                       addr = mcfg->memseg[i].addr;
-       }
+       rte_memseg_walk(find_lower_va_bound, &addr);
+
        /* keep distance to hugepages to minimize potential conflicts. */
        addr = RTE_PTR_SUB(addr, MLX5_UAR_OFFSET + MLX5_UAR_SIZE);
        /* anonymous mmap, no real memory consumption. */
@@ -597,9 +624,12 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
        int err = 0;
        struct ibv_context *attr_ctx = NULL;
        struct ibv_device_attr_ex device_attr;
+       unsigned int vf;
        unsigned int mps;
        unsigned int cqe_comp;
        unsigned int tunnel_en = 0;
+       unsigned int swp = 0;
+       unsigned int verb_priorities = 0;
        int idx;
        int i;
        struct mlx5dv_context attrs_out = {0};
@@ -646,27 +676,38 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                        continue;
                DRV_LOG(INFO, "PCI information matches, using device \"%s\"",
                        list[i]->name);
+               vf = ((pci_dev->id.device_id ==
+                      PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) ||
+                     (pci_dev->id.device_id ==
+                      PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) ||
+                     (pci_dev->id.device_id ==
+                      PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) ||
+                     (pci_dev->id.device_id ==
+                      PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF));
                attr_ctx = mlx5_glue->open_device(list[i]);
                rte_errno = errno;
                err = rte_errno;
                break;
        }
        if (attr_ctx == NULL) {
-               mlx5_glue->free_device_list(list);
                switch (err) {
                case 0:
                        DRV_LOG(ERR,
                                "cannot access device, is mlx5_ib loaded?");
                        err = ENODEV;
-                       goto error;
+                       break;
                case EINVAL:
                        DRV_LOG(ERR,
                                "cannot use device, are drivers up to date?");
-                       goto error;
+                       break;
                }
+               goto error;
        }
        ibv_dev = list[i];
        DRV_LOG(DEBUG, "device opened");
+#ifdef HAVE_IBV_MLX5_MOD_SWP
+       attrs_out.comp_mask |= MLX5DV_CONTEXT_MASK_SWP;
+#endif
        /*
         * Multi-packet send is supported by ConnectX-4 Lx PF as well
         * as all ConnectX-5 devices.
@@ -687,6 +728,11 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                DRV_LOG(DEBUG, "MPW isn't supported");
                mps = MLX5_MPW_DISABLED;
        }
+#ifdef HAVE_IBV_MLX5_MOD_SWP
+       if (attrs_out.comp_mask & MLX5DV_CONTEXT_MASK_SWP)
+               swp = attrs_out.sw_parsing_caps.sw_parsing_offloads;
+       DRV_LOG(DEBUG, "SWP support: %u", swp);
+#endif
        if (RTE_CACHE_LINE_SIZE == 128 &&
            !(attrs_out.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP))
                cqe_comp = 0;
@@ -705,8 +751,9 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
        DRV_LOG(WARNING,
                "tunnel offloading disabled due to old OFED/rdma-core version");
 #endif
-       if (mlx5_glue->query_device_ex(attr_ctx, NULL, &device_attr)) {
-               err = errno;
+       err = mlx5_glue->query_device_ex(attr_ctx, NULL, &device_attr);
+       if (err) {
+               DEBUG("ibv_query_device_ex() failed");
                goto error;
        }
        DRV_LOG(INFO, "%u port(s) detected",
@@ -733,6 +780,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                        .txq_inline = MLX5_ARG_UNSET,
                        .txqs_inline = MLX5_ARG_UNSET,
                        .inline_max_packet_sz = MLX5_ARG_UNSET,
+                       .vf_nl_en = 1,
+                       .swp = !!swp,
                };
 
                len = snprintf(name, sizeof(name), PCI_PRI_FMT,
@@ -752,16 +801,22 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                        eth_dev->device = &pci_dev->device;
                        eth_dev->dev_ops = &mlx5_dev_sec_ops;
                        err = mlx5_uar_init_secondary(eth_dev);
-                       if (err)
+                       if (err) {
+                               err = rte_errno;
                                goto error;
+                       }
                        /* Receive command fd from primary process */
                        err = mlx5_socket_connect(eth_dev);
-                       if (err)
+                       if (err < 0) {
+                               err = rte_errno;
                                goto error;
+                       }
                        /* Remap UAR for Tx queues. */
                        err = mlx5_tx_uar_remap(eth_dev, err);
-                       if (err)
+                       if (err) {
+                               err = rte_errno;
                                goto error;
+                       }
                        /*
                         * Ethdev pointer is still required as input since
                         * the primary device is not accessible from the
@@ -825,11 +880,12 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                if (err) {
                        DRV_LOG(ERR, "failed to process device arguments: %s",
                                strerror(err));
+                       err = rte_errno;
                        goto port_error;
                }
-               if (mlx5_glue->query_device_ex(ctx, NULL, &device_attr_ex)) {
+               err = mlx5_glue->query_device_ex(ctx, NULL, &device_attr_ex);
+               if (err) {
                        DRV_LOG(ERR, "ibv_query_device_ex() failed");
-                       err = errno;
                        goto port_error;
                }
                config.hw_csum = !!(device_attr_ex.device_cap_flags_ex &
@@ -869,6 +925,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                DRV_LOG(DEBUG,
                        "hardware Rx end alignment padding is %ssupported",
                        (config.hw_padding ? "" : "not "));
+               config.vf = vf;
                config.tso = ((device_attr_ex.tso_caps.max_tso > 0) &&
                              (device_attr_ex.tso_caps.supported_qpts &
                              (1 << IBV_QPT_RAW_PACKET)));
@@ -903,8 +960,10 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                rte_eth_copy_pci_info(eth_dev, pci_dev);
                eth_dev->device->driver = &mlx5_driver.driver;
                err = mlx5_uar_init_primary(eth_dev);
-               if (err)
+               if (err) {
+                       err = rte_errno;
                        goto port_error;
+               }
                /* Configure the first MAC address by default. */
                if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) {
                        DRV_LOG(ERR,
@@ -934,8 +993,10 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 #endif
                /* Get actual MTU if possible. */
                err = mlx5_get_mtu(eth_dev, &priv->mtu);
-               if (err)
+               if (err) {
+                       err = rte_errno;
                        goto port_error;
+               }
                DRV_LOG(DEBUG, "port %u MTU is %u", eth_dev->data->port_id,
                        priv->mtu);
                /*
@@ -946,6 +1007,14 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                eth_dev->dev_ops = &mlx5_dev_ops;
                /* Register MAC address. */
                claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
+               priv->nl_socket = -1;
+               priv->nl_sn = 0;
+               if (vf && config.vf_nl_en) {
+                       priv->nl_socket = mlx5_nl_init(RTMGRP_LINK);
+                       if (priv->nl_socket < 0)
+                               priv->nl_socket = -1;
+                       mlx5_nl_mac_addr_sync(eth_dev);
+               }
                TAILQ_INIT(&priv->flows);
                TAILQ_INIT(&priv->ctrl_flows);
                /* Hint libmlx5 to use PMD allocator for data plane resources */
@@ -961,8 +1030,31 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                DRV_LOG(DEBUG, "port %u forcing Ethernet interface up",
                        eth_dev->data->port_id);
                mlx5_set_link_up(eth_dev);
+               /*
+                * Even though the interrupt handler is not installed yet,
+                * interrupts will still trigger on the asyn_fd from
+                * Verbs context returned by ibv_open_device().
+                */
+               mlx5_link_update(eth_dev, 0);
                /* Store device configuration on private structure. */
                priv->config = config;
+               /* Create drop queue. */
+               err = mlx5_flow_create_drop_queue(eth_dev);
+               if (err) {
+                       DRV_LOG(ERR, "port %u drop queue allocation failed: %s",
+                               eth_dev->data->port_id, strerror(rte_errno));
+                       err = rte_errno;
+                       goto port_error;
+               }
+               /* Supported Verbs flow priority number detection. */
+               if (verb_priorities == 0)
+                       verb_priorities = mlx5_get_max_verbs_prio(eth_dev);
+               if (verb_priorities < MLX5_VERBS_FLOW_PRIO_8) {
+                       DRV_LOG(ERR, "port %u wrong Verbs flow priorities: %u",
+                               eth_dev->data->port_id, verb_priorities);
+                       goto port_error;
+               }
+               priv->config.max_verbs_prio = verb_priorities;
                continue;
 port_error:
                if (priv)
@@ -1182,8 +1274,10 @@ RTE_INIT(rte_mlx5_pmd_init);
 static void
 rte_mlx5_pmd_init(void)
 {
-       /* Build the static table for ptype conversion. */
+       /* Build the static tables for Verbs conversion. */
        mlx5_set_ptype_table();
+       mlx5_set_cksum_table();
+       mlx5_set_swp_types_table();
        /*
         * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
         * huge pages. Calling ibv_fork_init() during init allows