net/mlx4: fix crash when configure is not called
authorYongseok Koh <yskoh@mellanox.com>
Sun, 27 May 2018 07:04:55 +0000 (00:04 -0700)
committerShahaf Shuler <shahafs@mellanox.com>
Mon, 28 May 2018 14:35:05 +0000 (16:35 +0200)
Although uncommon, applications may destroy a device immediately after
probing it without going through dev_configure() first.

This patch addresses a crash which occurs when mlx4_dev_close() calls
mlx4_mr_release() due to an uninitialized entry in the private structure.

In addition MR cache init takes place on the device configuration.
When the device is re-configured multiple times, for example when
changing the number of queue on the flight, deadlock can happen.

This patch moved MR cache init from device configuration function to
probe function to make sure init only once.

Fixes: 9797bfcce1c9 ("net/mlx4: add new memory region support")

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Signed-off-by: Xueming Li <xuemingl@mellanox.com>
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
drivers/net/mlx4/mlx4.c

index 9f8ecd0..a29814b 100644 (file)
@@ -98,20 +98,6 @@ mlx4_dev_configure(struct rte_eth_dev *dev)
        if (ret)
                ERROR("%p: interrupt handler installation failed",
                      (void *)dev);
-       /*
-        * Once the device is added to the list of memory event callback, its
-        * global MR cache table cannot be expanded on the fly because of
-        * deadlock. If it overflows, lookup should be done by searching MR list
-        * linearly, which is slow.
-        */
-       if (mlx4_mr_btree_init(&priv->mr.cache, MLX4_MR_BTREE_CACHE_N * 2,
-                              dev->device->numa_node)) {
-               /* rte_errno is already set. */
-               return -rte_errno;
-       }
-       rte_rwlock_write_lock(&mlx4_mem_event_rwlock);
-       LIST_INSERT_HEAD(&mlx4_mem_event_cb_list, priv, mem_event_cb);
-       rte_rwlock_write_unlock(&mlx4_mem_event_rwlock);
 exit:
        return ret;
 }
@@ -761,6 +747,23 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                /* Update link status once if waiting for LSC. */
                if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
                        mlx4_link_update(eth_dev, 0);
+               /*
+                * Once the device is added to the list of memory event
+                * callback, its global MR cache table cannot be expanded
+                * on the fly because of deadlock. If it overflows, lookup
+                * should be done by searching MR list linearly, which is slow.
+                */
+               err = mlx4_mr_btree_init(&priv->mr.cache,
+                                        MLX4_MR_BTREE_CACHE_N * 2,
+                                        eth_dev->device->numa_node);
+               if (err) {
+                       /* rte_errno is already set. */
+                       goto port_error;
+               }
+               /* Add device to memory callback list. */
+               rte_rwlock_write_lock(&mlx4_mem_event_rwlock);
+               LIST_INSERT_HEAD(&mlx4_mem_event_cb_list, priv, mem_event_cb);
+               rte_rwlock_write_unlock(&mlx4_mem_event_rwlock);
                rte_eth_dev_probing_finish(eth_dev);
                continue;
 port_error: