vdpa/mlx5: retry VAR allocation during vDPA restart
authorXueming Li <xuemingl@nvidia.com>
Fri, 15 Oct 2021 15:05:45 +0000 (23:05 +0800)
committerMaxime Coquelin <maxime.coquelin@redhat.com>
Thu, 21 Oct 2021 12:24:21 +0000 (14:24 +0200)
VAR is the device memory space for the virtio queues doorbells,
Qemu could mmap it to directly to speed up doorbell push.

On a busy system, Qemu takes time to release VAR resources during driver
shutdown. If vdpa restarted quickly, the VAR allocation failed with
error 28 since the VAR is singleton resource per device.

This patch adds retry mechanism for VAR allocation.

Fixes: 4cae722c1b06 ("vdpa/mlx5: move virtual doorbell alloc to probe")
Cc: stable@dpdk.org
Signed-off-by: Xueming Li <xuemingl@nvidia.com>
Reviewed-by: Matan Azrad <matan@nvidia.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
drivers/vdpa/mlx5/mlx5_vdpa.c

index 3971f2e..9a6f647 100644 (file)
@@ -506,6 +506,7 @@ mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev)
 {
        struct mlx5_vdpa_priv *priv = NULL;
        struct mlx5_hca_attr *attr = &cdev->config.hca_attr;
+       int retry;
 
        if (!attr->vdpa.valid || !attr->vdpa.max_num_virtio_queues) {
                DRV_LOG(ERR, "Not enough capabilities to support vdpa, maybe "
@@ -530,7 +531,14 @@ mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev)
        if (attr->num_lag_ports == 0)
                priv->num_lag_ports = 1;
        priv->cdev = cdev;
-       priv->var = mlx5_glue->dv_alloc_var(priv->cdev->ctx, 0);
+       for (retry = 0; retry < 7; retry++) {
+               priv->var = mlx5_glue->dv_alloc_var(priv->cdev->ctx, 0);
+               if (priv->var != NULL)
+                       break;
+               DRV_LOG(WARNING, "Failed to allocate VAR, retry %d.\n", retry);
+               /* Wait Qemu release VAR during vdpa restart, 0.1 sec based. */
+               usleep(100000U << retry);
+       }
        if (!priv->var) {
                DRV_LOG(ERR, "Failed to allocate VAR %u.", errno);
                goto error;