vdpa/mlx5: pre-create virtq at probing time
[dpdk.git] / drivers / vdpa / mlx5 / mlx5_vdpa.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 #include <unistd.h>
5 #include <net/if.h>
6 #include <sys/socket.h>
7 #include <sys/ioctl.h>
8 #include <sys/mman.h>
9 #include <fcntl.h>
10 #include <netinet/in.h>
11
12 #include <rte_malloc.h>
13 #include <rte_log.h>
14 #include <rte_errno.h>
15 #include <rte_string_fns.h>
16 #include <rte_bus_pci.h>
17 #include <rte_eal_paging.h>
18
19 #include <mlx5_glue.h>
20 #include <mlx5_common.h>
21 #include <mlx5_common_defs.h>
22 #include <mlx5_devx_cmds.h>
23 #include <mlx5_prm.h>
24 #include <mlx5_nl.h>
25
26 #include "mlx5_vdpa_utils.h"
27 #include "mlx5_vdpa.h"
28
29 #define MLX5_VDPA_DRIVER_NAME vdpa_mlx5
30
31 #define MLX5_VDPA_DEFAULT_FEATURES ((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
32                             (1ULL << VIRTIO_F_ANY_LAYOUT) | \
33                             (1ULL << VIRTIO_NET_F_MQ) | \
34                             (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
35                             (1ULL << VIRTIO_F_ORDER_PLATFORM) | \
36                             (1ULL << VHOST_F_LOG_ALL) | \
37                             (1ULL << VIRTIO_NET_F_MTU))
38
39 #define MLX5_VDPA_PROTOCOL_FEATURES \
40                             ((1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
41                              (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
42                              (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
43                              (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
44                              (1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
45                              (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
46                              (1ULL << VHOST_USER_PROTOCOL_F_STATUS))
47
48 #define MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX 16LLU
49
50 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
51                                               TAILQ_HEAD_INITIALIZER(priv_list);
52 static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER;
53
54 static void mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv);
55
56 static struct mlx5_vdpa_priv *
57 mlx5_vdpa_find_priv_resource_by_vdev(struct rte_vdpa_device *vdev)
58 {
59         struct mlx5_vdpa_priv *priv;
60         int found = 0;
61
62         pthread_mutex_lock(&priv_list_lock);
63         TAILQ_FOREACH(priv, &priv_list, next) {
64                 if (vdev == priv->vdev) {
65                         found = 1;
66                         break;
67                 }
68         }
69         pthread_mutex_unlock(&priv_list_lock);
70         if (!found) {
71                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
72                 rte_errno = EINVAL;
73                 return NULL;
74         }
75         return priv;
76 }
77
78 static int
79 mlx5_vdpa_get_queue_num(struct rte_vdpa_device *vdev, uint32_t *queue_num)
80 {
81         struct mlx5_vdpa_priv *priv =
82                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
83
84         if (priv == NULL) {
85                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
86                 return -1;
87         }
88         *queue_num = priv->caps.max_num_virtio_queues / 2;
89         return 0;
90 }
91
92 static int
93 mlx5_vdpa_get_vdpa_features(struct rte_vdpa_device *vdev, uint64_t *features)
94 {
95         struct mlx5_vdpa_priv *priv =
96                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
97
98         if (priv == NULL) {
99                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
100                 return -1;
101         }
102         *features = MLX5_VDPA_DEFAULT_FEATURES;
103         if (priv->caps.virtio_queue_type & (1 << MLX5_VIRTQ_TYPE_PACKED))
104                 *features |= (1ULL << VIRTIO_F_RING_PACKED);
105         if (priv->caps.tso_ipv4)
106                 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO4);
107         if (priv->caps.tso_ipv6)
108                 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO6);
109         if (priv->caps.tx_csum)
110                 *features |= (1ULL << VIRTIO_NET_F_CSUM);
111         if (priv->caps.rx_csum)
112                 *features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
113         if (priv->caps.virtio_version_1_0)
114                 *features |= (1ULL << VIRTIO_F_VERSION_1);
115         return 0;
116 }
117
118 static int
119 mlx5_vdpa_get_protocol_features(struct rte_vdpa_device *vdev,
120                 uint64_t *features)
121 {
122         struct mlx5_vdpa_priv *priv =
123                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
124
125         if (priv == NULL) {
126                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
127                 return -1;
128         }
129         *features = MLX5_VDPA_PROTOCOL_FEATURES;
130         return 0;
131 }
132
133 static int
134 mlx5_vdpa_set_vring_state(int vid, int vring, int state)
135 {
136         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
137         struct mlx5_vdpa_priv *priv =
138                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
139         int ret;
140
141         if (priv == NULL) {
142                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
143                 return -EINVAL;
144         }
145         if (vring >= (int)priv->caps.max_num_virtio_queues) {
146                 DRV_LOG(ERR, "Too big vring id: %d.", vring);
147                 return -E2BIG;
148         }
149         pthread_mutex_lock(&priv->vq_config_lock);
150         ret = mlx5_vdpa_virtq_enable(priv, vring, state);
151         pthread_mutex_unlock(&priv->vq_config_lock);
152         return ret;
153 }
154
155 static int
156 mlx5_vdpa_features_set(int vid)
157 {
158         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
159         struct mlx5_vdpa_priv *priv =
160                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
161         uint64_t log_base, log_size;
162         uint64_t features;
163         int ret;
164
165         if (priv == NULL) {
166                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
167                 return -EINVAL;
168         }
169         ret = rte_vhost_get_negotiated_features(vid, &features);
170         if (ret) {
171                 DRV_LOG(ERR, "Failed to get negotiated features.");
172                 return ret;
173         }
174         if (RTE_VHOST_NEED_LOG(features)) {
175                 ret = rte_vhost_get_log_base(vid, &log_base, &log_size);
176                 if (ret) {
177                         DRV_LOG(ERR, "Failed to get log base.");
178                         return ret;
179                 }
180                 ret = mlx5_vdpa_dirty_bitmap_set(priv, log_base, log_size);
181                 if (ret) {
182                         DRV_LOG(ERR, "Failed to set dirty bitmap.");
183                         return ret;
184                 }
185                 DRV_LOG(INFO, "mlx5 vdpa: enabling dirty logging...");
186                 ret = mlx5_vdpa_logging_enable(priv, 1);
187                 if (ret) {
188                         DRV_LOG(ERR, "Failed t enable dirty logging.");
189                         return ret;
190                 }
191         }
192         return 0;
193 }
194
195 static int
196 mlx5_vdpa_mtu_set(struct mlx5_vdpa_priv *priv)
197 {
198         struct ifreq request;
199         uint16_t vhost_mtu = 0;
200         uint16_t kern_mtu = 0;
201         int ret = rte_vhost_get_mtu(priv->vid, &vhost_mtu);
202         int sock;
203         int retries = MLX5_VDPA_MAX_RETRIES;
204
205         if (ret) {
206                 DRV_LOG(DEBUG, "Cannot get vhost MTU - %d.", ret);
207                 return ret;
208         }
209         if (!vhost_mtu) {
210                 DRV_LOG(DEBUG, "Vhost MTU is 0.");
211                 return ret;
212         }
213         ret = mlx5_get_ifname_sysfs
214                                 (mlx5_os_get_ctx_device_name(priv->cdev->ctx),
215                                  request.ifr_name);
216         if (ret) {
217                 DRV_LOG(DEBUG, "Cannot get kernel IF name - %d.", ret);
218                 return ret;
219         }
220         sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
221         if (sock == -1) {
222                 DRV_LOG(DEBUG, "Cannot open IF socket.");
223                 return sock;
224         }
225         while (retries--) {
226                 ret = ioctl(sock, SIOCGIFMTU, &request);
227                 if (ret == -1)
228                         break;
229                 kern_mtu = request.ifr_mtu;
230                 DRV_LOG(DEBUG, "MTU: current %d requested %d.", (int)kern_mtu,
231                         (int)vhost_mtu);
232                 if (kern_mtu == vhost_mtu)
233                         break;
234                 request.ifr_mtu = vhost_mtu;
235                 ret = ioctl(sock, SIOCSIFMTU, &request);
236                 if (ret == -1)
237                         break;
238                 request.ifr_mtu = 0;
239                 usleep(MLX5_VDPA_USEC);
240         }
241         close(sock);
242         return kern_mtu == vhost_mtu ? 0 : -1;
243 }
244
245 static void
246 mlx5_vdpa_dev_cache_clean(struct mlx5_vdpa_priv *priv)
247 {
248         /* Clean pre-created resource in dev removal only. */
249         if (!priv->queues)
250                 mlx5_vdpa_virtqs_cleanup(priv);
251         mlx5_vdpa_mem_dereg(priv);
252 }
253
254 static int
255 mlx5_vdpa_dev_close(int vid)
256 {
257         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
258         struct mlx5_vdpa_priv *priv =
259                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
260         int ret = 0;
261
262         if (priv == NULL) {
263                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
264                 return -1;
265         }
266         mlx5_vdpa_cqe_event_unset(priv);
267         if (priv->state == MLX5_VDPA_STATE_CONFIGURED) {
268                 ret |= mlx5_vdpa_lm_log(priv);
269                 priv->state = MLX5_VDPA_STATE_IN_PROGRESS;
270         }
271         mlx5_vdpa_steer_unset(priv);
272         mlx5_vdpa_virtqs_release(priv);
273         mlx5_vdpa_drain_cq(priv);
274         if (priv->lm_mr.addr)
275                 mlx5_os_wrapped_mkey_destroy(&priv->lm_mr);
276         priv->state = MLX5_VDPA_STATE_PROBED;
277         if (!priv->connected)
278                 mlx5_vdpa_dev_cache_clean(priv);
279         priv->vid = 0;
280         /* The mutex may stay locked after event thread cancel - initiate it. */
281         pthread_mutex_init(&priv->vq_config_lock, NULL);
282         DRV_LOG(INFO, "vDPA device %d was closed.", vid);
283         return ret;
284 }
285
286 static int
287 mlx5_vdpa_dev_config(int vid)
288 {
289         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
290         struct mlx5_vdpa_priv *priv =
291                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
292
293         if (priv == NULL) {
294                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
295                 return -EINVAL;
296         }
297         if (priv->state == MLX5_VDPA_STATE_CONFIGURED &&
298             mlx5_vdpa_dev_close(vid)) {
299                 DRV_LOG(ERR, "Failed to reconfigure vid %d.", vid);
300                 return -1;
301         }
302         priv->vid = vid;
303         priv->connected = true;
304         if (mlx5_vdpa_mtu_set(priv))
305                 DRV_LOG(WARNING, "MTU cannot be set on device %s.",
306                                 vdev->device->name);
307         if (mlx5_vdpa_mem_register(priv) ||
308             mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) ||
309             mlx5_vdpa_cqe_event_setup(priv)) {
310                 mlx5_vdpa_dev_close(vid);
311                 return -1;
312         }
313         priv->state = MLX5_VDPA_STATE_CONFIGURED;
314         DRV_LOG(INFO, "vDPA device %d was configured.", vid);
315         return 0;
316 }
317
318 static int
319 mlx5_vdpa_get_device_fd(int vid)
320 {
321         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
322         struct mlx5_vdpa_priv *priv =
323                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
324
325         if (priv == NULL) {
326                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
327                 return -EINVAL;
328         }
329         return ((struct ibv_context *)priv->cdev->ctx)->cmd_fd;
330 }
331
332 static int
333 mlx5_vdpa_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size)
334 {
335         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
336         struct mlx5_vdpa_priv *priv =
337                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
338
339         RTE_SET_USED(qid);
340         if (priv == NULL) {
341                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
342                 return -EINVAL;
343         }
344         if (!priv->var) {
345                 DRV_LOG(ERR, "VAR was not created for device %s, is the device"
346                         " configured?.", vdev->device->name);
347                 return -EINVAL;
348         }
349         *offset = priv->var->mmap_off;
350         *size = priv->var->length;
351         return 0;
352 }
353
354 static int
355 mlx5_vdpa_get_stats_names(struct rte_vdpa_device *vdev,
356                 struct rte_vdpa_stat_name *stats_names,
357                 unsigned int size)
358 {
359         static const char *mlx5_vdpa_stats_names[MLX5_VDPA_STATS_MAX] = {
360                 "received_descriptors",
361                 "completed_descriptors",
362                 "bad descriptor errors",
363                 "exceed max chain",
364                 "invalid buffer",
365                 "completion errors",
366         };
367         struct mlx5_vdpa_priv *priv =
368                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
369         unsigned int i;
370
371         if (priv == NULL) {
372                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
373                 return -ENODEV;
374         }
375         if (!stats_names)
376                 return MLX5_VDPA_STATS_MAX;
377         size = RTE_MIN(size, (unsigned int)MLX5_VDPA_STATS_MAX);
378         for (i = 0; i < size; ++i)
379                 strlcpy(stats_names[i].name, mlx5_vdpa_stats_names[i],
380                         RTE_VDPA_STATS_NAME_SIZE);
381         return size;
382 }
383
384 static int
385 mlx5_vdpa_get_stats(struct rte_vdpa_device *vdev, int qid,
386                 struct rte_vdpa_stat *stats, unsigned int n)
387 {
388         struct mlx5_vdpa_priv *priv =
389                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
390
391         if (priv == NULL) {
392                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
393                 return -ENODEV;
394         }
395         if (qid >= (int)priv->caps.max_num_virtio_queues) {
396                 DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
397                                 vdev->device->name);
398                 return -E2BIG;
399         }
400         if (!priv->caps.queue_counters_valid) {
401                 DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
402                         vdev->device->name);
403                 return -ENOTSUP;
404         }
405         return mlx5_vdpa_virtq_stats_get(priv, qid, stats, n);
406 }
407
408 static int
409 mlx5_vdpa_reset_stats(struct rte_vdpa_device *vdev, int qid)
410 {
411         struct mlx5_vdpa_priv *priv =
412                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
413
414         if (priv == NULL) {
415                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
416                 return -ENODEV;
417         }
418         if (qid >= (int)priv->caps.max_num_virtio_queues) {
419                 DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
420                                 vdev->device->name);
421                 return -E2BIG;
422         }
423         if (!priv->caps.queue_counters_valid) {
424                 DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
425                         vdev->device->name);
426                 return -ENOTSUP;
427         }
428         return mlx5_vdpa_virtq_stats_reset(priv, qid);
429 }
430
431 static int
432 mlx5_vdpa_dev_cleanup(int vid)
433 {
434         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
435         struct mlx5_vdpa_priv *priv;
436
437         if (vdev == NULL)
438                 return -1;
439         priv = mlx5_vdpa_find_priv_resource_by_vdev(vdev);
440         if (priv == NULL) {
441                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
442                 return -1;
443         }
444         if (priv->state == MLX5_VDPA_STATE_PROBED)
445                 mlx5_vdpa_dev_cache_clean(priv);
446         priv->connected = false;
447         return 0;
448 }
449
450 static struct rte_vdpa_dev_ops mlx5_vdpa_ops = {
451         .get_queue_num = mlx5_vdpa_get_queue_num,
452         .get_features = mlx5_vdpa_get_vdpa_features,
453         .get_protocol_features = mlx5_vdpa_get_protocol_features,
454         .dev_conf = mlx5_vdpa_dev_config,
455         .dev_close = mlx5_vdpa_dev_close,
456         .dev_cleanup = mlx5_vdpa_dev_cleanup,
457         .set_vring_state = mlx5_vdpa_set_vring_state,
458         .set_features = mlx5_vdpa_features_set,
459         .migration_done = NULL,
460         .get_vfio_group_fd = NULL,
461         .get_vfio_device_fd = mlx5_vdpa_get_device_fd,
462         .get_notify_area = mlx5_vdpa_get_notify_area,
463         .get_stats_names = mlx5_vdpa_get_stats_names,
464         .get_stats = mlx5_vdpa_get_stats,
465         .reset_stats = mlx5_vdpa_reset_stats,
466 };
467
468 static int
469 mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
470 {
471         struct mlx5_vdpa_priv *priv = opaque;
472         unsigned long tmp;
473         int n_cores = sysconf(_SC_NPROCESSORS_ONLN);
474
475         errno = 0;
476         tmp = strtoul(val, NULL, 0);
477         if (errno) {
478                 DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val);
479                 return -errno;
480         }
481         if (strcmp(key, "event_mode") == 0) {
482                 if (tmp <= MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT)
483                         priv->event_mode = (int)tmp;
484                 else
485                         DRV_LOG(WARNING, "Invalid event_mode %s.", val);
486         } else if (strcmp(key, "event_us") == 0) {
487                 priv->event_us = (uint32_t)tmp;
488         } else if (strcmp(key, "no_traffic_time") == 0) {
489                 priv->no_traffic_max = (uint32_t)tmp;
490         } else if (strcmp(key, "event_core") == 0) {
491                 if (tmp >= (unsigned long)n_cores)
492                         DRV_LOG(WARNING, "Invalid event_core %s.", val);
493                 else
494                         priv->event_core = tmp;
495         } else if (strcmp(key, "hw_latency_mode") == 0) {
496                 priv->hw_latency_mode = (uint32_t)tmp;
497         } else if (strcmp(key, "hw_max_latency_us") == 0) {
498                 priv->hw_max_latency_us = (uint32_t)tmp;
499         } else if (strcmp(key, "hw_max_pending_comp") == 0) {
500                 priv->hw_max_pending_comp = (uint32_t)tmp;
501         } else if (strcmp(key, "queue_size") == 0) {
502                 priv->queue_size = (uint16_t)tmp;
503         } else if (strcmp(key, "queues") == 0) {
504                 priv->queues = (uint16_t)tmp;
505         } else {
506                 DRV_LOG(WARNING, "Invalid key %s.", key);
507         }
508         return 0;
509 }
510
511 static void
512 mlx5_vdpa_config_get(struct mlx5_kvargs_ctrl *mkvlist,
513                      struct mlx5_vdpa_priv *priv)
514 {
515         const char **params = (const char *[]){
516                 "event_core",
517                 "event_mode",
518                 "event_us",
519                 "hw_latency_mode",
520                 "hw_max_latency_us",
521                 "hw_max_pending_comp",
522                 "no_traffic_time",
523                 NULL,
524         };
525
526         priv->event_mode = MLX5_VDPA_EVENT_MODE_FIXED_TIMER;
527         priv->event_us = 0;
528         priv->event_core = -1;
529         priv->no_traffic_max = MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX;
530         if (mkvlist == NULL)
531                 return;
532         mlx5_kvargs_process(mkvlist, params, mlx5_vdpa_args_check_handler,
533                             priv);
534         if (!priv->event_us &&
535             priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER)
536                 priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
537         if ((priv->queue_size && !priv->queues) ||
538                 (!priv->queue_size && priv->queues)) {
539                 priv->queue_size = 0;
540                 priv->queues = 0;
541                 DRV_LOG(WARNING, "Please provide both queue_size and queues.");
542         }
543         DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
544         DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
545         DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max);
546         DRV_LOG(DEBUG, "queues is %u, queue_size is %u.", priv->queues,
547                 priv->queue_size);
548 }
549
550 static int
551 mlx5_vdpa_virtq_resource_prepare(struct mlx5_vdpa_priv *priv)
552 {
553         uint32_t index;
554         uint32_t i;
555
556         if (!priv->queues)
557                 return 0;
558         for (index = 0; index < (priv->queues * 2); ++index) {
559                 struct mlx5_vdpa_virtq *virtq = &priv->virtqs[index];
560                 int ret = mlx5_vdpa_event_qp_prepare(priv, priv->queue_size,
561                                         -1, &virtq->eqp);
562
563                 if (ret) {
564                         DRV_LOG(ERR, "Failed to create event QPs for virtq %d.",
565                                 index);
566                         return -1;
567                 }
568                 if (priv->caps.queue_counters_valid) {
569                         if (!virtq->counters)
570                                 virtq->counters =
571                                         mlx5_devx_cmd_create_virtio_q_counters
572                                                 (priv->cdev->ctx);
573                         if (!virtq->counters) {
574                                 DRV_LOG(ERR, "Failed to create virtq couners for virtq"
575                                         " %d.", index);
576                                 return -1;
577                         }
578                 }
579                 for (i = 0; i < RTE_DIM(virtq->umems); ++i) {
580                         uint32_t size;
581                         void *buf;
582                         struct mlx5dv_devx_umem *obj;
583
584                         size = priv->caps.umems[i].a * priv->queue_size +
585                                         priv->caps.umems[i].b;
586                         buf = rte_zmalloc(__func__, size, 4096);
587                         if (buf == NULL) {
588                                 DRV_LOG(ERR, "Cannot allocate umem %d memory for virtq"
589                                                 " %u.", i, index);
590                                 return -1;
591                         }
592                         obj = mlx5_glue->devx_umem_reg(priv->cdev->ctx, buf,
593                                         size, IBV_ACCESS_LOCAL_WRITE);
594                         if (obj == NULL) {
595                                 rte_free(buf);
596                                 DRV_LOG(ERR, "Failed to register umem %d for virtq %u.",
597                                                 i, index);
598                                 return -1;
599                         }
600                         virtq->umems[i].size = size;
601                         virtq->umems[i].buf = buf;
602                         virtq->umems[i].obj = obj;
603                 }
604         }
605         return 0;
606 }
607
608 static int
609 mlx5_vdpa_create_dev_resources(struct mlx5_vdpa_priv *priv)
610 {
611         struct mlx5_devx_tis_attr tis_attr = {0};
612         struct ibv_context *ctx = priv->cdev->ctx;
613         uint32_t i;
614         int retry;
615
616         for (retry = 0; retry < 7; retry++) {
617                 priv->var = mlx5_glue->dv_alloc_var(ctx, 0);
618                 if (priv->var != NULL)
619                         break;
620                 DRV_LOG(WARNING, "Failed to allocate VAR, retry %d.", retry);
621                 /* Wait Qemu release VAR during vdpa restart, 0.1 sec based. */
622                 usleep(100000U << retry);
623         }
624         if (!priv->var) {
625                 DRV_LOG(ERR, "Failed to allocate VAR %u.", errno);
626                 rte_errno = ENOMEM;
627                 return -rte_errno;
628         }
629         /* Always map the entire page. */
630         priv->virtq_db_addr = mmap(NULL, priv->var->length, PROT_READ |
631                                    PROT_WRITE, MAP_SHARED, ctx->cmd_fd,
632                                    priv->var->mmap_off);
633         if (priv->virtq_db_addr == MAP_FAILED) {
634                 DRV_LOG(ERR, "Failed to map doorbell page %u.", errno);
635                 priv->virtq_db_addr = NULL;
636                 rte_errno = errno;
637                 return -rte_errno;
638         }
639         /* Add within page offset for 64K page system. */
640         priv->virtq_db_addr = (char *)priv->virtq_db_addr +
641                 ((rte_mem_page_size() - 1) & priv->caps.doorbell_bar_offset);
642         DRV_LOG(DEBUG, "VAR address of doorbell mapping is %p.",
643                 priv->virtq_db_addr);
644         priv->td = mlx5_devx_cmd_create_td(ctx);
645         if (!priv->td) {
646                 DRV_LOG(ERR, "Failed to create transport domain.");
647                 rte_errno = errno;
648                 return -rte_errno;
649         }
650         tis_attr.transport_domain = priv->td->id;
651         for (i = 0; i < priv->num_lag_ports; i++) {
652                 /* 0 is auto affinity, non-zero value to propose port. */
653                 tis_attr.lag_tx_port_affinity = i + 1;
654                 priv->tiss[i] = mlx5_devx_cmd_create_tis(ctx, &tis_attr);
655                 if (!priv->tiss[i]) {
656                         DRV_LOG(ERR, "Failed to create TIS %u.", i);
657                         return -rte_errno;
658                 }
659         }
660         priv->null_mr = mlx5_glue->alloc_null_mr(priv->cdev->pd);
661         if (!priv->null_mr) {
662                 DRV_LOG(ERR, "Failed to allocate null MR.");
663                 rte_errno = errno;
664                 return -rte_errno;
665         }
666         DRV_LOG(DEBUG, "Dump fill Mkey = %u.", priv->null_mr->lkey);
667 #ifdef HAVE_MLX5DV_DR
668         priv->steer.domain = mlx5_glue->dr_create_domain(ctx,
669                                         MLX5DV_DR_DOMAIN_TYPE_NIC_RX);
670         if (!priv->steer.domain) {
671                 DRV_LOG(ERR, "Failed to create Rx domain.");
672                 rte_errno = errno;
673                 return -rte_errno;
674         }
675 #endif
676         priv->steer.tbl = mlx5_glue->dr_create_flow_tbl(priv->steer.domain, 0);
677         if (!priv->steer.tbl) {
678                 DRV_LOG(ERR, "Failed to create table 0 with Rx domain.");
679                 rte_errno = errno;
680                 return -rte_errno;
681         }
682         if (mlx5_vdpa_err_event_setup(priv) != 0)
683                 return -rte_errno;
684         if (mlx5_vdpa_event_qp_global_prepare(priv))
685                 return -rte_errno;
686         if (mlx5_vdpa_virtq_resource_prepare(priv))
687                 return -rte_errno;
688         return 0;
689 }
690
691 static int
692 mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev,
693                     struct mlx5_kvargs_ctrl *mkvlist)
694 {
695         struct mlx5_vdpa_priv *priv = NULL;
696         struct mlx5_hca_attr *attr = &cdev->config.hca_attr;
697
698         if (!attr->vdpa.valid || !attr->vdpa.max_num_virtio_queues) {
699                 DRV_LOG(ERR, "Not enough capabilities to support vdpa, maybe "
700                         "old FW/OFED version?");
701                 rte_errno = ENOTSUP;
702                 return -rte_errno;
703         }
704         if (!attr->vdpa.queue_counters_valid)
705                 DRV_LOG(DEBUG, "No capability to support virtq statistics.");
706         priv = rte_zmalloc("mlx5 vDPA device private", sizeof(*priv) +
707                            sizeof(struct mlx5_vdpa_virtq) *
708                            attr->vdpa.max_num_virtio_queues,
709                            RTE_CACHE_LINE_SIZE);
710         if (!priv) {
711                 DRV_LOG(ERR, "Failed to allocate private memory.");
712                 rte_errno = ENOMEM;
713                 return -rte_errno;
714         }
715         priv->caps = attr->vdpa;
716         priv->log_max_rqt_size = attr->log_max_rqt_size;
717         priv->num_lag_ports = attr->num_lag_ports;
718         if (attr->num_lag_ports == 0)
719                 priv->num_lag_ports = 1;
720         pthread_mutex_init(&priv->vq_config_lock, NULL);
721         priv->cdev = cdev;
722         mlx5_vdpa_config_get(mkvlist, priv);
723         if (mlx5_vdpa_create_dev_resources(priv))
724                 goto error;
725         priv->vdev = rte_vdpa_register_device(cdev->dev, &mlx5_vdpa_ops);
726         if (priv->vdev == NULL) {
727                 DRV_LOG(ERR, "Failed to register vDPA device.");
728                 rte_errno = rte_errno ? rte_errno : EINVAL;
729                 goto error;
730         }
731         SLIST_INIT(&priv->mr_list);
732         pthread_mutex_lock(&priv_list_lock);
733         TAILQ_INSERT_TAIL(&priv_list, priv, next);
734         pthread_mutex_unlock(&priv_list_lock);
735         return 0;
736 error:
737         if (priv)
738                 mlx5_vdpa_dev_release(priv);
739         return -rte_errno;
740 }
741
742 static int
743 mlx5_vdpa_dev_remove(struct mlx5_common_device *cdev)
744 {
745         struct mlx5_vdpa_priv *priv = NULL;
746         int found = 0;
747
748         pthread_mutex_lock(&priv_list_lock);
749         TAILQ_FOREACH(priv, &priv_list, next) {
750                 if (priv->vdev->device == cdev->dev) {
751                         found = 1;
752                         break;
753                 }
754         }
755         if (found)
756                 TAILQ_REMOVE(&priv_list, priv, next);
757         pthread_mutex_unlock(&priv_list_lock);
758         if (found)
759                 mlx5_vdpa_dev_release(priv);
760         return 0;
761 }
762
763 static void
764 mlx5_vdpa_release_dev_resources(struct mlx5_vdpa_priv *priv)
765 {
766         uint32_t i;
767
768         if (priv->queues)
769                 mlx5_vdpa_virtqs_cleanup(priv);
770         mlx5_vdpa_dev_cache_clean(priv);
771         for (i = 0; i < priv->caps.max_num_virtio_queues; i++) {
772                 if (!priv->virtqs[i].counters)
773                         continue;
774                 claim_zero(mlx5_devx_cmd_destroy(priv->virtqs[i].counters));
775         }
776         mlx5_vdpa_event_qp_global_release(priv);
777         mlx5_vdpa_err_event_unset(priv);
778         if (priv->steer.tbl)
779                 claim_zero(mlx5_glue->dr_destroy_flow_tbl(priv->steer.tbl));
780         if (priv->steer.domain)
781                 claim_zero(mlx5_glue->dr_destroy_domain(priv->steer.domain));
782         if (priv->null_mr)
783                 claim_zero(mlx5_glue->dereg_mr(priv->null_mr));
784         for (i = 0; i < priv->num_lag_ports; i++) {
785                 if (priv->tiss[i])
786                         claim_zero(mlx5_devx_cmd_destroy(priv->tiss[i]));
787         }
788         if (priv->td)
789                 claim_zero(mlx5_devx_cmd_destroy(priv->td));
790         if (priv->virtq_db_addr)
791                 /* Mask out the within page offset for munmap. */
792                 claim_zero(munmap((void *)((uintptr_t)priv->virtq_db_addr &
793                         ~(rte_mem_page_size() - 1)), priv->var->length));
794         if (priv->var)
795                 mlx5_glue->dv_free_var(priv->var);
796 }
797
798 static void
799 mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv)
800 {
801         if (priv->state == MLX5_VDPA_STATE_CONFIGURED)
802                 mlx5_vdpa_dev_close(priv->vid);
803         mlx5_vdpa_release_dev_resources(priv);
804         if (priv->vdev)
805                 rte_vdpa_unregister_device(priv->vdev);
806         pthread_mutex_destroy(&priv->vq_config_lock);
807         rte_free(priv);
808 }
809
810 static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = {
811         {
812                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
813                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6)
814         },
815         {
816                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
817                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF)
818         },
819         {
820                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
821                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX)
822         },
823         {
824                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
825                                 PCI_DEVICE_ID_MELLANOX_CONNECTXVF)
826         },
827         {
828                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
829                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF)
830         },
831         {
832                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
833                                 PCI_DEVICE_ID_MELLANOX_CONNECTX7)
834         },
835         {
836                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
837                                 PCI_DEVICE_ID_MELLANOX_CONNECTX7BF)
838         },
839         {
840                 .vendor_id = 0
841         }
842 };
843
844 static struct mlx5_class_driver mlx5_vdpa_driver = {
845         .drv_class = MLX5_CLASS_VDPA,
846         .name = RTE_STR(MLX5_VDPA_DRIVER_NAME),
847         .id_table = mlx5_vdpa_pci_id_map,
848         .probe = mlx5_vdpa_dev_probe,
849         .remove = mlx5_vdpa_dev_remove,
850 };
851
852 RTE_LOG_REGISTER_DEFAULT(mlx5_vdpa_logtype, NOTICE)
853
854 /**
855  * Driver initialization routine.
856  */
857 RTE_INIT(rte_mlx5_vdpa_init)
858 {
859         mlx5_common_init();
860         if (mlx5_glue)
861                 mlx5_class_driver_register(&mlx5_vdpa_driver);
862 }
863
864 RTE_PMD_EXPORT_NAME(MLX5_VDPA_DRIVER_NAME, __COUNTER__);
865 RTE_PMD_REGISTER_PCI_TABLE(MLX5_VDPA_DRIVER_NAME, mlx5_vdpa_pci_id_map);
866 RTE_PMD_REGISTER_KMOD_DEP(MLX5_VDPA_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib");