mempool/cnxk: avoid batch op free for empty pools
[dpdk.git] / drivers / vdpa / mlx5 / mlx5_vdpa.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 #include <unistd.h>
5 #include <net/if.h>
6 #include <sys/socket.h>
7 #include <sys/ioctl.h>
8 #include <sys/mman.h>
9 #include <fcntl.h>
10 #include <netinet/in.h>
11
12 #include <rte_malloc.h>
13 #include <rte_log.h>
14 #include <rte_errno.h>
15 #include <rte_string_fns.h>
16 #include <rte_bus_pci.h>
17
18 #include <mlx5_glue.h>
19 #include <mlx5_common.h>
20 #include <mlx5_common_defs.h>
21 #include <mlx5_devx_cmds.h>
22 #include <mlx5_prm.h>
23 #include <mlx5_nl.h>
24
25 #include "mlx5_vdpa_utils.h"
26 #include "mlx5_vdpa.h"
27
28 #define MLX5_VDPA_DRIVER_NAME vdpa_mlx5
29
30 #define MLX5_VDPA_DEFAULT_FEATURES ((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
31                             (1ULL << VIRTIO_F_ANY_LAYOUT) | \
32                             (1ULL << VIRTIO_NET_F_MQ) | \
33                             (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
34                             (1ULL << VIRTIO_F_ORDER_PLATFORM) | \
35                             (1ULL << VHOST_F_LOG_ALL) | \
36                             (1ULL << VIRTIO_NET_F_MTU))
37
38 #define MLX5_VDPA_PROTOCOL_FEATURES \
39                             ((1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
40                              (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
41                              (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
42                              (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
43                              (1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
44                              (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
45                              (1ULL << VHOST_USER_PROTOCOL_F_STATUS))
46
47 #define MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX 16LLU
48
49 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
50                                               TAILQ_HEAD_INITIALIZER(priv_list);
51 static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER;
52
53 static void mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv);
54
55 static struct mlx5_vdpa_priv *
56 mlx5_vdpa_find_priv_resource_by_vdev(struct rte_vdpa_device *vdev)
57 {
58         struct mlx5_vdpa_priv *priv;
59         int found = 0;
60
61         pthread_mutex_lock(&priv_list_lock);
62         TAILQ_FOREACH(priv, &priv_list, next) {
63                 if (vdev == priv->vdev) {
64                         found = 1;
65                         break;
66                 }
67         }
68         pthread_mutex_unlock(&priv_list_lock);
69         if (!found) {
70                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
71                 rte_errno = EINVAL;
72                 return NULL;
73         }
74         return priv;
75 }
76
77 static int
78 mlx5_vdpa_get_queue_num(struct rte_vdpa_device *vdev, uint32_t *queue_num)
79 {
80         struct mlx5_vdpa_priv *priv =
81                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
82
83         if (priv == NULL) {
84                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
85                 return -1;
86         }
87         *queue_num = priv->caps.max_num_virtio_queues;
88         return 0;
89 }
90
91 static int
92 mlx5_vdpa_get_vdpa_features(struct rte_vdpa_device *vdev, uint64_t *features)
93 {
94         struct mlx5_vdpa_priv *priv =
95                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
96
97         if (priv == NULL) {
98                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
99                 return -1;
100         }
101         *features = MLX5_VDPA_DEFAULT_FEATURES;
102         if (priv->caps.virtio_queue_type & (1 << MLX5_VIRTQ_TYPE_PACKED))
103                 *features |= (1ULL << VIRTIO_F_RING_PACKED);
104         if (priv->caps.tso_ipv4)
105                 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO4);
106         if (priv->caps.tso_ipv6)
107                 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO6);
108         if (priv->caps.tx_csum)
109                 *features |= (1ULL << VIRTIO_NET_F_CSUM);
110         if (priv->caps.rx_csum)
111                 *features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
112         if (priv->caps.virtio_version_1_0)
113                 *features |= (1ULL << VIRTIO_F_VERSION_1);
114         return 0;
115 }
116
117 static int
118 mlx5_vdpa_get_protocol_features(struct rte_vdpa_device *vdev,
119                 uint64_t *features)
120 {
121         struct mlx5_vdpa_priv *priv =
122                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
123
124         if (priv == NULL) {
125                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
126                 return -1;
127         }
128         *features = MLX5_VDPA_PROTOCOL_FEATURES;
129         return 0;
130 }
131
132 static int
133 mlx5_vdpa_set_vring_state(int vid, int vring, int state)
134 {
135         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
136         struct mlx5_vdpa_priv *priv =
137                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
138         int ret;
139
140         if (priv == NULL) {
141                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
142                 return -EINVAL;
143         }
144         if (vring >= (int)priv->caps.max_num_virtio_queues * 2) {
145                 DRV_LOG(ERR, "Too big vring id: %d.", vring);
146                 return -E2BIG;
147         }
148         pthread_mutex_lock(&priv->vq_config_lock);
149         ret = mlx5_vdpa_virtq_enable(priv, vring, state);
150         pthread_mutex_unlock(&priv->vq_config_lock);
151         return ret;
152 }
153
154 static int
155 mlx5_vdpa_features_set(int vid)
156 {
157         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
158         struct mlx5_vdpa_priv *priv =
159                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
160         uint64_t log_base, log_size;
161         uint64_t features;
162         int ret;
163
164         if (priv == NULL) {
165                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
166                 return -EINVAL;
167         }
168         ret = rte_vhost_get_negotiated_features(vid, &features);
169         if (ret) {
170                 DRV_LOG(ERR, "Failed to get negotiated features.");
171                 return ret;
172         }
173         if (RTE_VHOST_NEED_LOG(features)) {
174                 ret = rte_vhost_get_log_base(vid, &log_base, &log_size);
175                 if (ret) {
176                         DRV_LOG(ERR, "Failed to get log base.");
177                         return ret;
178                 }
179                 ret = mlx5_vdpa_dirty_bitmap_set(priv, log_base, log_size);
180                 if (ret) {
181                         DRV_LOG(ERR, "Failed to set dirty bitmap.");
182                         return ret;
183                 }
184                 DRV_LOG(INFO, "mlx5 vdpa: enabling dirty logging...");
185                 ret = mlx5_vdpa_logging_enable(priv, 1);
186                 if (ret) {
187                         DRV_LOG(ERR, "Failed t enable dirty logging.");
188                         return ret;
189                 }
190         }
191         return 0;
192 }
193
194 static int
195 mlx5_vdpa_mtu_set(struct mlx5_vdpa_priv *priv)
196 {
197         struct ifreq request;
198         uint16_t vhost_mtu = 0;
199         uint16_t kern_mtu = 0;
200         int ret = rte_vhost_get_mtu(priv->vid, &vhost_mtu);
201         int sock;
202         int retries = MLX5_VDPA_MAX_RETRIES;
203
204         if (ret) {
205                 DRV_LOG(DEBUG, "Cannot get vhost MTU - %d.", ret);
206                 return ret;
207         }
208         if (!vhost_mtu) {
209                 DRV_LOG(DEBUG, "Vhost MTU is 0.");
210                 return ret;
211         }
212         ret = mlx5_get_ifname_sysfs
213                                 (mlx5_os_get_ctx_device_name(priv->cdev->ctx),
214                                  request.ifr_name);
215         if (ret) {
216                 DRV_LOG(DEBUG, "Cannot get kernel IF name - %d.", ret);
217                 return ret;
218         }
219         sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
220         if (sock == -1) {
221                 DRV_LOG(DEBUG, "Cannot open IF socket.");
222                 return sock;
223         }
224         while (retries--) {
225                 ret = ioctl(sock, SIOCGIFMTU, &request);
226                 if (ret == -1)
227                         break;
228                 kern_mtu = request.ifr_mtu;
229                 DRV_LOG(DEBUG, "MTU: current %d requested %d.", (int)kern_mtu,
230                         (int)vhost_mtu);
231                 if (kern_mtu == vhost_mtu)
232                         break;
233                 request.ifr_mtu = vhost_mtu;
234                 ret = ioctl(sock, SIOCSIFMTU, &request);
235                 if (ret == -1)
236                         break;
237                 request.ifr_mtu = 0;
238                 usleep(MLX5_VDPA_USEC);
239         }
240         close(sock);
241         return kern_mtu == vhost_mtu ? 0 : -1;
242 }
243
244 static void
245 mlx5_vdpa_dev_cache_clean(struct mlx5_vdpa_priv *priv)
246 {
247         mlx5_vdpa_virtqs_cleanup(priv);
248         mlx5_vdpa_mem_dereg(priv);
249 }
250
251 static int
252 mlx5_vdpa_dev_close(int vid)
253 {
254         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
255         struct mlx5_vdpa_priv *priv =
256                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
257         int ret = 0;
258
259         if (priv == NULL) {
260                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
261                 return -1;
262         }
263         mlx5_vdpa_cqe_event_unset(priv);
264         if (priv->state == MLX5_VDPA_STATE_CONFIGURED) {
265                 ret |= mlx5_vdpa_lm_log(priv);
266                 priv->state = MLX5_VDPA_STATE_IN_PROGRESS;
267         }
268         mlx5_vdpa_steer_unset(priv);
269         mlx5_vdpa_virtqs_release(priv);
270         if (priv->lm_mr.addr)
271                 mlx5_os_wrapped_mkey_destroy(&priv->lm_mr);
272         priv->state = MLX5_VDPA_STATE_PROBED;
273         if (!priv->connected)
274                 mlx5_vdpa_dev_cache_clean(priv);
275         priv->vid = 0;
276         /* The mutex may stay locked after event thread cancel - initiate it. */
277         pthread_mutex_init(&priv->vq_config_lock, NULL);
278         DRV_LOG(INFO, "vDPA device %d was closed.", vid);
279         return ret;
280 }
281
282 static int
283 mlx5_vdpa_dev_config(int vid)
284 {
285         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
286         struct mlx5_vdpa_priv *priv =
287                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
288
289         if (priv == NULL) {
290                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
291                 return -EINVAL;
292         }
293         if (priv->state == MLX5_VDPA_STATE_CONFIGURED &&
294             mlx5_vdpa_dev_close(vid)) {
295                 DRV_LOG(ERR, "Failed to reconfigure vid %d.", vid);
296                 return -1;
297         }
298         priv->vid = vid;
299         priv->connected = true;
300         if (mlx5_vdpa_mtu_set(priv))
301                 DRV_LOG(WARNING, "MTU cannot be set on device %s.",
302                                 vdev->device->name);
303         if (mlx5_vdpa_mem_register(priv) ||
304             mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) ||
305             mlx5_vdpa_cqe_event_setup(priv)) {
306                 mlx5_vdpa_dev_close(vid);
307                 return -1;
308         }
309         priv->state = MLX5_VDPA_STATE_CONFIGURED;
310         DRV_LOG(INFO, "vDPA device %d was configured.", vid);
311         return 0;
312 }
313
314 static int
315 mlx5_vdpa_get_device_fd(int vid)
316 {
317         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
318         struct mlx5_vdpa_priv *priv =
319                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
320
321         if (priv == NULL) {
322                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
323                 return -EINVAL;
324         }
325         return ((struct ibv_context *)priv->cdev->ctx)->cmd_fd;
326 }
327
328 static int
329 mlx5_vdpa_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size)
330 {
331         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
332         struct mlx5_vdpa_priv *priv =
333                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
334
335         RTE_SET_USED(qid);
336         if (priv == NULL) {
337                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
338                 return -EINVAL;
339         }
340         if (!priv->var) {
341                 DRV_LOG(ERR, "VAR was not created for device %s, is the device"
342                         " configured?.", vdev->device->name);
343                 return -EINVAL;
344         }
345         *offset = priv->var->mmap_off;
346         *size = priv->var->length;
347         return 0;
348 }
349
350 static int
351 mlx5_vdpa_get_stats_names(struct rte_vdpa_device *vdev,
352                 struct rte_vdpa_stat_name *stats_names,
353                 unsigned int size)
354 {
355         static const char *mlx5_vdpa_stats_names[MLX5_VDPA_STATS_MAX] = {
356                 "received_descriptors",
357                 "completed_descriptors",
358                 "bad descriptor errors",
359                 "exceed max chain",
360                 "invalid buffer",
361                 "completion errors",
362         };
363         struct mlx5_vdpa_priv *priv =
364                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
365         unsigned int i;
366
367         if (priv == NULL) {
368                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
369                 return -ENODEV;
370         }
371         if (!stats_names)
372                 return MLX5_VDPA_STATS_MAX;
373         size = RTE_MIN(size, (unsigned int)MLX5_VDPA_STATS_MAX);
374         for (i = 0; i < size; ++i)
375                 strlcpy(stats_names[i].name, mlx5_vdpa_stats_names[i],
376                         RTE_VDPA_STATS_NAME_SIZE);
377         return size;
378 }
379
380 static int
381 mlx5_vdpa_get_stats(struct rte_vdpa_device *vdev, int qid,
382                 struct rte_vdpa_stat *stats, unsigned int n)
383 {
384         struct mlx5_vdpa_priv *priv =
385                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
386
387         if (priv == NULL) {
388                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
389                 return -ENODEV;
390         }
391         if (qid >= (int)priv->caps.max_num_virtio_queues * 2) {
392                 DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
393                                 vdev->device->name);
394                 return -E2BIG;
395         }
396         if (!priv->caps.queue_counters_valid) {
397                 DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
398                         vdev->device->name);
399                 return -ENOTSUP;
400         }
401         return mlx5_vdpa_virtq_stats_get(priv, qid, stats, n);
402 }
403
404 static int
405 mlx5_vdpa_reset_stats(struct rte_vdpa_device *vdev, int qid)
406 {
407         struct mlx5_vdpa_priv *priv =
408                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
409
410         if (priv == NULL) {
411                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
412                 return -ENODEV;
413         }
414         if (qid >= (int)priv->caps.max_num_virtio_queues * 2) {
415                 DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
416                                 vdev->device->name);
417                 return -E2BIG;
418         }
419         if (!priv->caps.queue_counters_valid) {
420                 DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
421                         vdev->device->name);
422                 return -ENOTSUP;
423         }
424         return mlx5_vdpa_virtq_stats_reset(priv, qid);
425 }
426
427 static int
428 mlx5_vdpa_dev_cleanup(int vid)
429 {
430         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
431         struct mlx5_vdpa_priv *priv;
432
433         if (vdev == NULL)
434                 return -1;
435         priv = mlx5_vdpa_find_priv_resource_by_vdev(vdev);
436         if (priv == NULL) {
437                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
438                 return -1;
439         }
440         if (priv->state == MLX5_VDPA_STATE_PROBED)
441                 mlx5_vdpa_dev_cache_clean(priv);
442         priv->connected = false;
443         return 0;
444 }
445
446 static struct rte_vdpa_dev_ops mlx5_vdpa_ops = {
447         .get_queue_num = mlx5_vdpa_get_queue_num,
448         .get_features = mlx5_vdpa_get_vdpa_features,
449         .get_protocol_features = mlx5_vdpa_get_protocol_features,
450         .dev_conf = mlx5_vdpa_dev_config,
451         .dev_close = mlx5_vdpa_dev_close,
452         .dev_cleanup = mlx5_vdpa_dev_cleanup,
453         .set_vring_state = mlx5_vdpa_set_vring_state,
454         .set_features = mlx5_vdpa_features_set,
455         .migration_done = NULL,
456         .get_vfio_group_fd = NULL,
457         .get_vfio_device_fd = mlx5_vdpa_get_device_fd,
458         .get_notify_area = mlx5_vdpa_get_notify_area,
459         .get_stats_names = mlx5_vdpa_get_stats_names,
460         .get_stats = mlx5_vdpa_get_stats,
461         .reset_stats = mlx5_vdpa_reset_stats,
462 };
463
464 static int
465 mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
466 {
467         struct mlx5_vdpa_priv *priv = opaque;
468         unsigned long tmp;
469         int n_cores = sysconf(_SC_NPROCESSORS_ONLN);
470
471         errno = 0;
472         tmp = strtoul(val, NULL, 0);
473         if (errno) {
474                 DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val);
475                 return -errno;
476         }
477         if (strcmp(key, "event_mode") == 0) {
478                 if (tmp <= MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT)
479                         priv->event_mode = (int)tmp;
480                 else
481                         DRV_LOG(WARNING, "Invalid event_mode %s.", val);
482         } else if (strcmp(key, "event_us") == 0) {
483                 priv->event_us = (uint32_t)tmp;
484         } else if (strcmp(key, "no_traffic_time") == 0) {
485                 priv->no_traffic_max = (uint32_t)tmp;
486         } else if (strcmp(key, "event_core") == 0) {
487                 if (tmp >= (unsigned long)n_cores)
488                         DRV_LOG(WARNING, "Invalid event_core %s.", val);
489                 else
490                         priv->event_core = tmp;
491         } else if (strcmp(key, "hw_latency_mode") == 0) {
492                 priv->hw_latency_mode = (uint32_t)tmp;
493         } else if (strcmp(key, "hw_max_latency_us") == 0) {
494                 priv->hw_max_latency_us = (uint32_t)tmp;
495         } else if (strcmp(key, "hw_max_pending_comp") == 0) {
496                 priv->hw_max_pending_comp = (uint32_t)tmp;
497         }
498         return 0;
499 }
500
501 static void
502 mlx5_vdpa_config_get(struct mlx5_kvargs_ctrl *mkvlist,
503                      struct mlx5_vdpa_priv *priv)
504 {
505         const char **params = (const char *[]){
506                 "event_core",
507                 "event_mode",
508                 "event_us",
509                 "hw_latency_mode",
510                 "hw_max_latency_us",
511                 "hw_max_pending_comp",
512                 "no_traffic_time",
513                 NULL,
514         };
515
516         priv->event_mode = MLX5_VDPA_EVENT_MODE_FIXED_TIMER;
517         priv->event_us = 0;
518         priv->event_core = -1;
519         priv->no_traffic_max = MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX;
520         if (mkvlist == NULL)
521                 return;
522         mlx5_kvargs_process(mkvlist, params, mlx5_vdpa_args_check_handler,
523                             priv);
524         if (!priv->event_us &&
525             priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER)
526                 priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
527         DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
528         DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
529         DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max);
530 }
531
532 static int
533 mlx5_vdpa_create_dev_resources(struct mlx5_vdpa_priv *priv)
534 {
535         struct mlx5_devx_tis_attr tis_attr = {0};
536         struct ibv_context *ctx = priv->cdev->ctx;
537         uint32_t i;
538         int retry;
539
540         for (retry = 0; retry < 7; retry++) {
541                 priv->var = mlx5_glue->dv_alloc_var(ctx, 0);
542                 if (priv->var != NULL)
543                         break;
544                 DRV_LOG(WARNING, "Failed to allocate VAR, retry %d.", retry);
545                 /* Wait Qemu release VAR during vdpa restart, 0.1 sec based. */
546                 usleep(100000U << retry);
547         }
548         if (!priv->var) {
549                 DRV_LOG(ERR, "Failed to allocate VAR %u.", errno);
550                 rte_errno = ENOMEM;
551                 return -rte_errno;
552         }
553         /* Always map the entire page. */
554         priv->virtq_db_addr = mmap(NULL, priv->var->length, PROT_READ |
555                                    PROT_WRITE, MAP_SHARED, ctx->cmd_fd,
556                                    priv->var->mmap_off);
557         if (priv->virtq_db_addr == MAP_FAILED) {
558                 DRV_LOG(ERR, "Failed to map doorbell page %u.", errno);
559                 priv->virtq_db_addr = NULL;
560                 rte_errno = errno;
561                 return -rte_errno;
562         }
563         DRV_LOG(DEBUG, "VAR address of doorbell mapping is %p.",
564                 priv->virtq_db_addr);
565         priv->td = mlx5_devx_cmd_create_td(ctx);
566         if (!priv->td) {
567                 DRV_LOG(ERR, "Failed to create transport domain.");
568                 rte_errno = errno;
569                 return -rte_errno;
570         }
571         tis_attr.transport_domain = priv->td->id;
572         for (i = 0; i < priv->num_lag_ports; i++) {
573                 /* 0 is auto affinity, non-zero value to propose port. */
574                 tis_attr.lag_tx_port_affinity = i + 1;
575                 priv->tiss[i] = mlx5_devx_cmd_create_tis(ctx, &tis_attr);
576                 if (!priv->tiss[i]) {
577                         DRV_LOG(ERR, "Failed to create TIS %u.", i);
578                         return -rte_errno;
579                 }
580         }
581         priv->null_mr = mlx5_glue->alloc_null_mr(priv->cdev->pd);
582         if (!priv->null_mr) {
583                 DRV_LOG(ERR, "Failed to allocate null MR.");
584                 rte_errno = errno;
585                 return -rte_errno;
586         }
587         DRV_LOG(DEBUG, "Dump fill Mkey = %u.", priv->null_mr->lkey);
588 #ifdef HAVE_MLX5DV_DR
589         priv->steer.domain = mlx5_glue->dr_create_domain(ctx,
590                                         MLX5DV_DR_DOMAIN_TYPE_NIC_RX);
591         if (!priv->steer.domain) {
592                 DRV_LOG(ERR, "Failed to create Rx domain.");
593                 rte_errno = errno;
594                 return -rte_errno;
595         }
596 #endif
597         priv->steer.tbl = mlx5_glue->dr_create_flow_tbl(priv->steer.domain, 0);
598         if (!priv->steer.tbl) {
599                 DRV_LOG(ERR, "Failed to create table 0 with Rx domain.");
600                 rte_errno = errno;
601                 return -rte_errno;
602         }
603         if (mlx5_vdpa_err_event_setup(priv) != 0)
604                 return -rte_errno;
605         if (mlx5_vdpa_event_qp_global_prepare(priv))
606                 return -rte_errno;
607         return 0;
608 }
609
610 static int
611 mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev,
612                     struct mlx5_kvargs_ctrl *mkvlist)
613 {
614         struct mlx5_vdpa_priv *priv = NULL;
615         struct mlx5_hca_attr *attr = &cdev->config.hca_attr;
616
617         if (!attr->vdpa.valid || !attr->vdpa.max_num_virtio_queues) {
618                 DRV_LOG(ERR, "Not enough capabilities to support vdpa, maybe "
619                         "old FW/OFED version?");
620                 rte_errno = ENOTSUP;
621                 return -rte_errno;
622         }
623         if (!attr->vdpa.queue_counters_valid)
624                 DRV_LOG(DEBUG, "No capability to support virtq statistics.");
625         priv = rte_zmalloc("mlx5 vDPA device private", sizeof(*priv) +
626                            sizeof(struct mlx5_vdpa_virtq) *
627                            attr->vdpa.max_num_virtio_queues * 2,
628                            RTE_CACHE_LINE_SIZE);
629         if (!priv) {
630                 DRV_LOG(ERR, "Failed to allocate private memory.");
631                 rte_errno = ENOMEM;
632                 return -rte_errno;
633         }
634         priv->caps = attr->vdpa;
635         priv->log_max_rqt_size = attr->log_max_rqt_size;
636         priv->num_lag_ports = attr->num_lag_ports;
637         if (attr->num_lag_ports == 0)
638                 priv->num_lag_ports = 1;
639         pthread_mutex_init(&priv->vq_config_lock, NULL);
640         priv->cdev = cdev;
641         if (mlx5_vdpa_create_dev_resources(priv))
642                 goto error;
643         priv->vdev = rte_vdpa_register_device(cdev->dev, &mlx5_vdpa_ops);
644         if (priv->vdev == NULL) {
645                 DRV_LOG(ERR, "Failed to register vDPA device.");
646                 rte_errno = rte_errno ? rte_errno : EINVAL;
647                 goto error;
648         }
649         mlx5_vdpa_config_get(mkvlist, priv);
650         SLIST_INIT(&priv->mr_list);
651         pthread_mutex_lock(&priv_list_lock);
652         TAILQ_INSERT_TAIL(&priv_list, priv, next);
653         pthread_mutex_unlock(&priv_list_lock);
654         return 0;
655 error:
656         if (priv)
657                 mlx5_vdpa_dev_release(priv);
658         return -rte_errno;
659 }
660
661 static int
662 mlx5_vdpa_dev_remove(struct mlx5_common_device *cdev)
663 {
664         struct mlx5_vdpa_priv *priv = NULL;
665         int found = 0;
666
667         pthread_mutex_lock(&priv_list_lock);
668         TAILQ_FOREACH(priv, &priv_list, next) {
669                 if (priv->vdev->device == cdev->dev) {
670                         found = 1;
671                         break;
672                 }
673         }
674         if (found)
675                 TAILQ_REMOVE(&priv_list, priv, next);
676         pthread_mutex_unlock(&priv_list_lock);
677         if (found)
678                 mlx5_vdpa_dev_release(priv);
679         return 0;
680 }
681
682 static void
683 mlx5_vdpa_release_dev_resources(struct mlx5_vdpa_priv *priv)
684 {
685         uint32_t i;
686
687         mlx5_vdpa_dev_cache_clean(priv);
688         for (i = 0; i < priv->caps.max_num_virtio_queues * 2; i++) {
689                 if (!priv->virtqs[i].counters)
690                         continue;
691                 claim_zero(mlx5_devx_cmd_destroy(priv->virtqs[i].counters));
692         }
693         mlx5_vdpa_event_qp_global_release(priv);
694         mlx5_vdpa_err_event_unset(priv);
695         if (priv->steer.tbl)
696                 claim_zero(mlx5_glue->dr_destroy_flow_tbl(priv->steer.tbl));
697         if (priv->steer.domain)
698                 claim_zero(mlx5_glue->dr_destroy_domain(priv->steer.domain));
699         if (priv->null_mr)
700                 claim_zero(mlx5_glue->dereg_mr(priv->null_mr));
701         for (i = 0; i < priv->num_lag_ports; i++) {
702                 if (priv->tiss[i])
703                         claim_zero(mlx5_devx_cmd_destroy(priv->tiss[i]));
704         }
705         if (priv->td)
706                 claim_zero(mlx5_devx_cmd_destroy(priv->td));
707         if (priv->virtq_db_addr)
708                 claim_zero(munmap(priv->virtq_db_addr, priv->var->length));
709         if (priv->var)
710                 mlx5_glue->dv_free_var(priv->var);
711 }
712
713 static void
714 mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv)
715 {
716         if (priv->state == MLX5_VDPA_STATE_CONFIGURED)
717                 mlx5_vdpa_dev_close(priv->vid);
718         mlx5_vdpa_release_dev_resources(priv);
719         if (priv->vdev)
720                 rte_vdpa_unregister_device(priv->vdev);
721         pthread_mutex_destroy(&priv->vq_config_lock);
722         rte_free(priv);
723 }
724
725 static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = {
726         {
727                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
728                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6)
729         },
730         {
731                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
732                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF)
733         },
734         {
735                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
736                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX)
737         },
738         {
739                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
740                                 PCI_DEVICE_ID_MELLANOX_CONNECTXVF)
741         },
742         {
743                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
744                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF)
745         },
746         {
747                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
748                                 PCI_DEVICE_ID_MELLANOX_CONNECTX7)
749         },
750         {
751                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
752                                 PCI_DEVICE_ID_MELLANOX_CONNECTX7BF)
753         },
754         {
755                 .vendor_id = 0
756         }
757 };
758
759 static struct mlx5_class_driver mlx5_vdpa_driver = {
760         .drv_class = MLX5_CLASS_VDPA,
761         .name = RTE_STR(MLX5_VDPA_DRIVER_NAME),
762         .id_table = mlx5_vdpa_pci_id_map,
763         .probe = mlx5_vdpa_dev_probe,
764         .remove = mlx5_vdpa_dev_remove,
765 };
766
767 RTE_LOG_REGISTER_DEFAULT(mlx5_vdpa_logtype, NOTICE)
768
769 /**
770  * Driver initialization routine.
771  */
772 RTE_INIT(rte_mlx5_vdpa_init)
773 {
774         mlx5_common_init();
775         if (mlx5_glue)
776                 mlx5_class_driver_register(&mlx5_vdpa_driver);
777 }
778
779 RTE_PMD_EXPORT_NAME(MLX5_VDPA_DRIVER_NAME, __COUNTER__);
780 RTE_PMD_REGISTER_PCI_TABLE(MLX5_VDPA_DRIVER_NAME, mlx5_vdpa_pci_id_map);
781 RTE_PMD_REGISTER_KMOD_DEP(MLX5_VDPA_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib");