vdpa/mlx5: fix maximum number of virtqs
[dpdk.git] / drivers / vdpa / mlx5 / mlx5_vdpa.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 #include <unistd.h>
5 #include <net/if.h>
6 #include <sys/socket.h>
7 #include <sys/ioctl.h>
8 #include <sys/mman.h>
9 #include <fcntl.h>
10 #include <netinet/in.h>
11
12 #include <rte_malloc.h>
13 #include <rte_log.h>
14 #include <rte_errno.h>
15 #include <rte_string_fns.h>
16 #include <rte_bus_pci.h>
17 #include <rte_eal_paging.h>
18
19 #include <mlx5_glue.h>
20 #include <mlx5_common.h>
21 #include <mlx5_common_defs.h>
22 #include <mlx5_devx_cmds.h>
23 #include <mlx5_prm.h>
24 #include <mlx5_nl.h>
25
26 #include "mlx5_vdpa_utils.h"
27 #include "mlx5_vdpa.h"
28
29 #define MLX5_VDPA_DRIVER_NAME vdpa_mlx5
30
31 #define MLX5_VDPA_DEFAULT_FEATURES ((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
32                             (1ULL << VIRTIO_F_ANY_LAYOUT) | \
33                             (1ULL << VIRTIO_NET_F_MQ) | \
34                             (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
35                             (1ULL << VIRTIO_F_ORDER_PLATFORM) | \
36                             (1ULL << VHOST_F_LOG_ALL) | \
37                             (1ULL << VIRTIO_NET_F_MTU))
38
39 #define MLX5_VDPA_PROTOCOL_FEATURES \
40                             ((1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
41                              (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
42                              (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
43                              (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
44                              (1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
45                              (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
46                              (1ULL << VHOST_USER_PROTOCOL_F_STATUS))
47
48 #define MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX 16LLU
49
50 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
51                                               TAILQ_HEAD_INITIALIZER(priv_list);
52 static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER;
53
54 static void mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv);
55
56 static struct mlx5_vdpa_priv *
57 mlx5_vdpa_find_priv_resource_by_vdev(struct rte_vdpa_device *vdev)
58 {
59         struct mlx5_vdpa_priv *priv;
60         int found = 0;
61
62         pthread_mutex_lock(&priv_list_lock);
63         TAILQ_FOREACH(priv, &priv_list, next) {
64                 if (vdev == priv->vdev) {
65                         found = 1;
66                         break;
67                 }
68         }
69         pthread_mutex_unlock(&priv_list_lock);
70         if (!found) {
71                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
72                 rte_errno = EINVAL;
73                 return NULL;
74         }
75         return priv;
76 }
77
78 static int
79 mlx5_vdpa_get_queue_num(struct rte_vdpa_device *vdev, uint32_t *queue_num)
80 {
81         struct mlx5_vdpa_priv *priv =
82                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
83
84         if (priv == NULL) {
85                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
86                 return -1;
87         }
88         *queue_num = priv->caps.max_num_virtio_queues / 2;
89         return 0;
90 }
91
92 static int
93 mlx5_vdpa_get_vdpa_features(struct rte_vdpa_device *vdev, uint64_t *features)
94 {
95         struct mlx5_vdpa_priv *priv =
96                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
97
98         if (priv == NULL) {
99                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
100                 return -1;
101         }
102         *features = MLX5_VDPA_DEFAULT_FEATURES;
103         if (priv->caps.virtio_queue_type & (1 << MLX5_VIRTQ_TYPE_PACKED))
104                 *features |= (1ULL << VIRTIO_F_RING_PACKED);
105         if (priv->caps.tso_ipv4)
106                 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO4);
107         if (priv->caps.tso_ipv6)
108                 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO6);
109         if (priv->caps.tx_csum)
110                 *features |= (1ULL << VIRTIO_NET_F_CSUM);
111         if (priv->caps.rx_csum)
112                 *features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
113         if (priv->caps.virtio_version_1_0)
114                 *features |= (1ULL << VIRTIO_F_VERSION_1);
115         return 0;
116 }
117
118 static int
119 mlx5_vdpa_get_protocol_features(struct rte_vdpa_device *vdev,
120                 uint64_t *features)
121 {
122         struct mlx5_vdpa_priv *priv =
123                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
124
125         if (priv == NULL) {
126                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
127                 return -1;
128         }
129         *features = MLX5_VDPA_PROTOCOL_FEATURES;
130         return 0;
131 }
132
133 static int
134 mlx5_vdpa_set_vring_state(int vid, int vring, int state)
135 {
136         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
137         struct mlx5_vdpa_priv *priv =
138                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
139         int ret;
140
141         if (priv == NULL) {
142                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
143                 return -EINVAL;
144         }
145         if (vring >= (int)priv->caps.max_num_virtio_queues) {
146                 DRV_LOG(ERR, "Too big vring id: %d.", vring);
147                 return -E2BIG;
148         }
149         pthread_mutex_lock(&priv->vq_config_lock);
150         ret = mlx5_vdpa_virtq_enable(priv, vring, state);
151         pthread_mutex_unlock(&priv->vq_config_lock);
152         return ret;
153 }
154
155 static int
156 mlx5_vdpa_features_set(int vid)
157 {
158         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
159         struct mlx5_vdpa_priv *priv =
160                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
161         uint64_t log_base, log_size;
162         uint64_t features;
163         int ret;
164
165         if (priv == NULL) {
166                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
167                 return -EINVAL;
168         }
169         ret = rte_vhost_get_negotiated_features(vid, &features);
170         if (ret) {
171                 DRV_LOG(ERR, "Failed to get negotiated features.");
172                 return ret;
173         }
174         if (RTE_VHOST_NEED_LOG(features)) {
175                 ret = rte_vhost_get_log_base(vid, &log_base, &log_size);
176                 if (ret) {
177                         DRV_LOG(ERR, "Failed to get log base.");
178                         return ret;
179                 }
180                 ret = mlx5_vdpa_dirty_bitmap_set(priv, log_base, log_size);
181                 if (ret) {
182                         DRV_LOG(ERR, "Failed to set dirty bitmap.");
183                         return ret;
184                 }
185                 DRV_LOG(INFO, "mlx5 vdpa: enabling dirty logging...");
186                 ret = mlx5_vdpa_logging_enable(priv, 1);
187                 if (ret) {
188                         DRV_LOG(ERR, "Failed t enable dirty logging.");
189                         return ret;
190                 }
191         }
192         return 0;
193 }
194
195 static int
196 mlx5_vdpa_mtu_set(struct mlx5_vdpa_priv *priv)
197 {
198         struct ifreq request;
199         uint16_t vhost_mtu = 0;
200         uint16_t kern_mtu = 0;
201         int ret = rte_vhost_get_mtu(priv->vid, &vhost_mtu);
202         int sock;
203         int retries = MLX5_VDPA_MAX_RETRIES;
204
205         if (ret) {
206                 DRV_LOG(DEBUG, "Cannot get vhost MTU - %d.", ret);
207                 return ret;
208         }
209         if (!vhost_mtu) {
210                 DRV_LOG(DEBUG, "Vhost MTU is 0.");
211                 return ret;
212         }
213         ret = mlx5_get_ifname_sysfs
214                                 (mlx5_os_get_ctx_device_name(priv->cdev->ctx),
215                                  request.ifr_name);
216         if (ret) {
217                 DRV_LOG(DEBUG, "Cannot get kernel IF name - %d.", ret);
218                 return ret;
219         }
220         sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
221         if (sock == -1) {
222                 DRV_LOG(DEBUG, "Cannot open IF socket.");
223                 return sock;
224         }
225         while (retries--) {
226                 ret = ioctl(sock, SIOCGIFMTU, &request);
227                 if (ret == -1)
228                         break;
229                 kern_mtu = request.ifr_mtu;
230                 DRV_LOG(DEBUG, "MTU: current %d requested %d.", (int)kern_mtu,
231                         (int)vhost_mtu);
232                 if (kern_mtu == vhost_mtu)
233                         break;
234                 request.ifr_mtu = vhost_mtu;
235                 ret = ioctl(sock, SIOCSIFMTU, &request);
236                 if (ret == -1)
237                         break;
238                 request.ifr_mtu = 0;
239                 usleep(MLX5_VDPA_USEC);
240         }
241         close(sock);
242         return kern_mtu == vhost_mtu ? 0 : -1;
243 }
244
245 static void
246 mlx5_vdpa_dev_cache_clean(struct mlx5_vdpa_priv *priv)
247 {
248         mlx5_vdpa_virtqs_cleanup(priv);
249         mlx5_vdpa_mem_dereg(priv);
250 }
251
252 static int
253 mlx5_vdpa_dev_close(int vid)
254 {
255         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
256         struct mlx5_vdpa_priv *priv =
257                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
258         int ret = 0;
259
260         if (priv == NULL) {
261                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
262                 return -1;
263         }
264         mlx5_vdpa_cqe_event_unset(priv);
265         if (priv->state == MLX5_VDPA_STATE_CONFIGURED) {
266                 ret |= mlx5_vdpa_lm_log(priv);
267                 priv->state = MLX5_VDPA_STATE_IN_PROGRESS;
268         }
269         mlx5_vdpa_steer_unset(priv);
270         mlx5_vdpa_virtqs_release(priv);
271         if (priv->lm_mr.addr)
272                 mlx5_os_wrapped_mkey_destroy(&priv->lm_mr);
273         priv->state = MLX5_VDPA_STATE_PROBED;
274         if (!priv->connected)
275                 mlx5_vdpa_dev_cache_clean(priv);
276         priv->vid = 0;
277         /* The mutex may stay locked after event thread cancel - initiate it. */
278         pthread_mutex_init(&priv->vq_config_lock, NULL);
279         DRV_LOG(INFO, "vDPA device %d was closed.", vid);
280         return ret;
281 }
282
283 static int
284 mlx5_vdpa_dev_config(int vid)
285 {
286         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
287         struct mlx5_vdpa_priv *priv =
288                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
289
290         if (priv == NULL) {
291                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
292                 return -EINVAL;
293         }
294         if (priv->state == MLX5_VDPA_STATE_CONFIGURED &&
295             mlx5_vdpa_dev_close(vid)) {
296                 DRV_LOG(ERR, "Failed to reconfigure vid %d.", vid);
297                 return -1;
298         }
299         priv->vid = vid;
300         priv->connected = true;
301         if (mlx5_vdpa_mtu_set(priv))
302                 DRV_LOG(WARNING, "MTU cannot be set on device %s.",
303                                 vdev->device->name);
304         if (mlx5_vdpa_mem_register(priv) ||
305             mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) ||
306             mlx5_vdpa_cqe_event_setup(priv)) {
307                 mlx5_vdpa_dev_close(vid);
308                 return -1;
309         }
310         priv->state = MLX5_VDPA_STATE_CONFIGURED;
311         DRV_LOG(INFO, "vDPA device %d was configured.", vid);
312         return 0;
313 }
314
315 static int
316 mlx5_vdpa_get_device_fd(int vid)
317 {
318         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
319         struct mlx5_vdpa_priv *priv =
320                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
321
322         if (priv == NULL) {
323                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
324                 return -EINVAL;
325         }
326         return ((struct ibv_context *)priv->cdev->ctx)->cmd_fd;
327 }
328
329 static int
330 mlx5_vdpa_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size)
331 {
332         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
333         struct mlx5_vdpa_priv *priv =
334                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
335
336         RTE_SET_USED(qid);
337         if (priv == NULL) {
338                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
339                 return -EINVAL;
340         }
341         if (!priv->var) {
342                 DRV_LOG(ERR, "VAR was not created for device %s, is the device"
343                         " configured?.", vdev->device->name);
344                 return -EINVAL;
345         }
346         *offset = priv->var->mmap_off;
347         *size = priv->var->length;
348         return 0;
349 }
350
351 static int
352 mlx5_vdpa_get_stats_names(struct rte_vdpa_device *vdev,
353                 struct rte_vdpa_stat_name *stats_names,
354                 unsigned int size)
355 {
356         static const char *mlx5_vdpa_stats_names[MLX5_VDPA_STATS_MAX] = {
357                 "received_descriptors",
358                 "completed_descriptors",
359                 "bad descriptor errors",
360                 "exceed max chain",
361                 "invalid buffer",
362                 "completion errors",
363         };
364         struct mlx5_vdpa_priv *priv =
365                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
366         unsigned int i;
367
368         if (priv == NULL) {
369                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
370                 return -ENODEV;
371         }
372         if (!stats_names)
373                 return MLX5_VDPA_STATS_MAX;
374         size = RTE_MIN(size, (unsigned int)MLX5_VDPA_STATS_MAX);
375         for (i = 0; i < size; ++i)
376                 strlcpy(stats_names[i].name, mlx5_vdpa_stats_names[i],
377                         RTE_VDPA_STATS_NAME_SIZE);
378         return size;
379 }
380
381 static int
382 mlx5_vdpa_get_stats(struct rte_vdpa_device *vdev, int qid,
383                 struct rte_vdpa_stat *stats, unsigned int n)
384 {
385         struct mlx5_vdpa_priv *priv =
386                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
387
388         if (priv == NULL) {
389                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
390                 return -ENODEV;
391         }
392         if (qid >= (int)priv->caps.max_num_virtio_queues) {
393                 DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
394                                 vdev->device->name);
395                 return -E2BIG;
396         }
397         if (!priv->caps.queue_counters_valid) {
398                 DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
399                         vdev->device->name);
400                 return -ENOTSUP;
401         }
402         return mlx5_vdpa_virtq_stats_get(priv, qid, stats, n);
403 }
404
405 static int
406 mlx5_vdpa_reset_stats(struct rte_vdpa_device *vdev, int qid)
407 {
408         struct mlx5_vdpa_priv *priv =
409                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
410
411         if (priv == NULL) {
412                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
413                 return -ENODEV;
414         }
415         if (qid >= (int)priv->caps.max_num_virtio_queues) {
416                 DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
417                                 vdev->device->name);
418                 return -E2BIG;
419         }
420         if (!priv->caps.queue_counters_valid) {
421                 DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
422                         vdev->device->name);
423                 return -ENOTSUP;
424         }
425         return mlx5_vdpa_virtq_stats_reset(priv, qid);
426 }
427
428 static int
429 mlx5_vdpa_dev_cleanup(int vid)
430 {
431         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
432         struct mlx5_vdpa_priv *priv;
433
434         if (vdev == NULL)
435                 return -1;
436         priv = mlx5_vdpa_find_priv_resource_by_vdev(vdev);
437         if (priv == NULL) {
438                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
439                 return -1;
440         }
441         if (priv->state == MLX5_VDPA_STATE_PROBED)
442                 mlx5_vdpa_dev_cache_clean(priv);
443         priv->connected = false;
444         return 0;
445 }
446
447 static struct rte_vdpa_dev_ops mlx5_vdpa_ops = {
448         .get_queue_num = mlx5_vdpa_get_queue_num,
449         .get_features = mlx5_vdpa_get_vdpa_features,
450         .get_protocol_features = mlx5_vdpa_get_protocol_features,
451         .dev_conf = mlx5_vdpa_dev_config,
452         .dev_close = mlx5_vdpa_dev_close,
453         .dev_cleanup = mlx5_vdpa_dev_cleanup,
454         .set_vring_state = mlx5_vdpa_set_vring_state,
455         .set_features = mlx5_vdpa_features_set,
456         .migration_done = NULL,
457         .get_vfio_group_fd = NULL,
458         .get_vfio_device_fd = mlx5_vdpa_get_device_fd,
459         .get_notify_area = mlx5_vdpa_get_notify_area,
460         .get_stats_names = mlx5_vdpa_get_stats_names,
461         .get_stats = mlx5_vdpa_get_stats,
462         .reset_stats = mlx5_vdpa_reset_stats,
463 };
464
465 static int
466 mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
467 {
468         struct mlx5_vdpa_priv *priv = opaque;
469         unsigned long tmp;
470         int n_cores = sysconf(_SC_NPROCESSORS_ONLN);
471
472         errno = 0;
473         tmp = strtoul(val, NULL, 0);
474         if (errno) {
475                 DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val);
476                 return -errno;
477         }
478         if (strcmp(key, "event_mode") == 0) {
479                 if (tmp <= MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT)
480                         priv->event_mode = (int)tmp;
481                 else
482                         DRV_LOG(WARNING, "Invalid event_mode %s.", val);
483         } else if (strcmp(key, "event_us") == 0) {
484                 priv->event_us = (uint32_t)tmp;
485         } else if (strcmp(key, "no_traffic_time") == 0) {
486                 priv->no_traffic_max = (uint32_t)tmp;
487         } else if (strcmp(key, "event_core") == 0) {
488                 if (tmp >= (unsigned long)n_cores)
489                         DRV_LOG(WARNING, "Invalid event_core %s.", val);
490                 else
491                         priv->event_core = tmp;
492         } else if (strcmp(key, "hw_latency_mode") == 0) {
493                 priv->hw_latency_mode = (uint32_t)tmp;
494         } else if (strcmp(key, "hw_max_latency_us") == 0) {
495                 priv->hw_max_latency_us = (uint32_t)tmp;
496         } else if (strcmp(key, "hw_max_pending_comp") == 0) {
497                 priv->hw_max_pending_comp = (uint32_t)tmp;
498         }
499         return 0;
500 }
501
502 static void
503 mlx5_vdpa_config_get(struct mlx5_kvargs_ctrl *mkvlist,
504                      struct mlx5_vdpa_priv *priv)
505 {
506         const char **params = (const char *[]){
507                 "event_core",
508                 "event_mode",
509                 "event_us",
510                 "hw_latency_mode",
511                 "hw_max_latency_us",
512                 "hw_max_pending_comp",
513                 "no_traffic_time",
514                 NULL,
515         };
516
517         priv->event_mode = MLX5_VDPA_EVENT_MODE_FIXED_TIMER;
518         priv->event_us = 0;
519         priv->event_core = -1;
520         priv->no_traffic_max = MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX;
521         if (mkvlist == NULL)
522                 return;
523         mlx5_kvargs_process(mkvlist, params, mlx5_vdpa_args_check_handler,
524                             priv);
525         if (!priv->event_us &&
526             priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER)
527                 priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
528         DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
529         DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
530         DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max);
531 }
532
533 static int
534 mlx5_vdpa_create_dev_resources(struct mlx5_vdpa_priv *priv)
535 {
536         struct mlx5_devx_tis_attr tis_attr = {0};
537         struct ibv_context *ctx = priv->cdev->ctx;
538         uint32_t i;
539         int retry;
540
541         for (retry = 0; retry < 7; retry++) {
542                 priv->var = mlx5_glue->dv_alloc_var(ctx, 0);
543                 if (priv->var != NULL)
544                         break;
545                 DRV_LOG(WARNING, "Failed to allocate VAR, retry %d.", retry);
546                 /* Wait Qemu release VAR during vdpa restart, 0.1 sec based. */
547                 usleep(100000U << retry);
548         }
549         if (!priv->var) {
550                 DRV_LOG(ERR, "Failed to allocate VAR %u.", errno);
551                 rte_errno = ENOMEM;
552                 return -rte_errno;
553         }
554         /* Always map the entire page. */
555         priv->virtq_db_addr = mmap(NULL, priv->var->length, PROT_READ |
556                                    PROT_WRITE, MAP_SHARED, ctx->cmd_fd,
557                                    priv->var->mmap_off);
558         if (priv->virtq_db_addr == MAP_FAILED) {
559                 DRV_LOG(ERR, "Failed to map doorbell page %u.", errno);
560                 priv->virtq_db_addr = NULL;
561                 rte_errno = errno;
562                 return -rte_errno;
563         }
564         /* Add within page offset for 64K page system. */
565         priv->virtq_db_addr = (char *)priv->virtq_db_addr +
566                 ((rte_mem_page_size() - 1) & priv->caps.doorbell_bar_offset);
567         DRV_LOG(DEBUG, "VAR address of doorbell mapping is %p.",
568                 priv->virtq_db_addr);
569         priv->td = mlx5_devx_cmd_create_td(ctx);
570         if (!priv->td) {
571                 DRV_LOG(ERR, "Failed to create transport domain.");
572                 rte_errno = errno;
573                 return -rte_errno;
574         }
575         tis_attr.transport_domain = priv->td->id;
576         for (i = 0; i < priv->num_lag_ports; i++) {
577                 /* 0 is auto affinity, non-zero value to propose port. */
578                 tis_attr.lag_tx_port_affinity = i + 1;
579                 priv->tiss[i] = mlx5_devx_cmd_create_tis(ctx, &tis_attr);
580                 if (!priv->tiss[i]) {
581                         DRV_LOG(ERR, "Failed to create TIS %u.", i);
582                         return -rte_errno;
583                 }
584         }
585         priv->null_mr = mlx5_glue->alloc_null_mr(priv->cdev->pd);
586         if (!priv->null_mr) {
587                 DRV_LOG(ERR, "Failed to allocate null MR.");
588                 rte_errno = errno;
589                 return -rte_errno;
590         }
591         DRV_LOG(DEBUG, "Dump fill Mkey = %u.", priv->null_mr->lkey);
592 #ifdef HAVE_MLX5DV_DR
593         priv->steer.domain = mlx5_glue->dr_create_domain(ctx,
594                                         MLX5DV_DR_DOMAIN_TYPE_NIC_RX);
595         if (!priv->steer.domain) {
596                 DRV_LOG(ERR, "Failed to create Rx domain.");
597                 rte_errno = errno;
598                 return -rte_errno;
599         }
600 #endif
601         priv->steer.tbl = mlx5_glue->dr_create_flow_tbl(priv->steer.domain, 0);
602         if (!priv->steer.tbl) {
603                 DRV_LOG(ERR, "Failed to create table 0 with Rx domain.");
604                 rte_errno = errno;
605                 return -rte_errno;
606         }
607         if (mlx5_vdpa_err_event_setup(priv) != 0)
608                 return -rte_errno;
609         if (mlx5_vdpa_event_qp_global_prepare(priv))
610                 return -rte_errno;
611         return 0;
612 }
613
614 static int
615 mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev,
616                     struct mlx5_kvargs_ctrl *mkvlist)
617 {
618         struct mlx5_vdpa_priv *priv = NULL;
619         struct mlx5_hca_attr *attr = &cdev->config.hca_attr;
620
621         if (!attr->vdpa.valid || !attr->vdpa.max_num_virtio_queues) {
622                 DRV_LOG(ERR, "Not enough capabilities to support vdpa, maybe "
623                         "old FW/OFED version?");
624                 rte_errno = ENOTSUP;
625                 return -rte_errno;
626         }
627         if (!attr->vdpa.queue_counters_valid)
628                 DRV_LOG(DEBUG, "No capability to support virtq statistics.");
629         priv = rte_zmalloc("mlx5 vDPA device private", sizeof(*priv) +
630                            sizeof(struct mlx5_vdpa_virtq) *
631                            attr->vdpa.max_num_virtio_queues,
632                            RTE_CACHE_LINE_SIZE);
633         if (!priv) {
634                 DRV_LOG(ERR, "Failed to allocate private memory.");
635                 rte_errno = ENOMEM;
636                 return -rte_errno;
637         }
638         priv->caps = attr->vdpa;
639         priv->log_max_rqt_size = attr->log_max_rqt_size;
640         priv->num_lag_ports = attr->num_lag_ports;
641         if (attr->num_lag_ports == 0)
642                 priv->num_lag_ports = 1;
643         pthread_mutex_init(&priv->vq_config_lock, NULL);
644         priv->cdev = cdev;
645         if (mlx5_vdpa_create_dev_resources(priv))
646                 goto error;
647         priv->vdev = rte_vdpa_register_device(cdev->dev, &mlx5_vdpa_ops);
648         if (priv->vdev == NULL) {
649                 DRV_LOG(ERR, "Failed to register vDPA device.");
650                 rte_errno = rte_errno ? rte_errno : EINVAL;
651                 goto error;
652         }
653         mlx5_vdpa_config_get(mkvlist, priv);
654         SLIST_INIT(&priv->mr_list);
655         pthread_mutex_lock(&priv_list_lock);
656         TAILQ_INSERT_TAIL(&priv_list, priv, next);
657         pthread_mutex_unlock(&priv_list_lock);
658         return 0;
659 error:
660         if (priv)
661                 mlx5_vdpa_dev_release(priv);
662         return -rte_errno;
663 }
664
665 static int
666 mlx5_vdpa_dev_remove(struct mlx5_common_device *cdev)
667 {
668         struct mlx5_vdpa_priv *priv = NULL;
669         int found = 0;
670
671         pthread_mutex_lock(&priv_list_lock);
672         TAILQ_FOREACH(priv, &priv_list, next) {
673                 if (priv->vdev->device == cdev->dev) {
674                         found = 1;
675                         break;
676                 }
677         }
678         if (found)
679                 TAILQ_REMOVE(&priv_list, priv, next);
680         pthread_mutex_unlock(&priv_list_lock);
681         if (found)
682                 mlx5_vdpa_dev_release(priv);
683         return 0;
684 }
685
686 static void
687 mlx5_vdpa_release_dev_resources(struct mlx5_vdpa_priv *priv)
688 {
689         uint32_t i;
690
691         mlx5_vdpa_dev_cache_clean(priv);
692         for (i = 0; i < priv->caps.max_num_virtio_queues; i++) {
693                 if (!priv->virtqs[i].counters)
694                         continue;
695                 claim_zero(mlx5_devx_cmd_destroy(priv->virtqs[i].counters));
696         }
697         mlx5_vdpa_event_qp_global_release(priv);
698         mlx5_vdpa_err_event_unset(priv);
699         if (priv->steer.tbl)
700                 claim_zero(mlx5_glue->dr_destroy_flow_tbl(priv->steer.tbl));
701         if (priv->steer.domain)
702                 claim_zero(mlx5_glue->dr_destroy_domain(priv->steer.domain));
703         if (priv->null_mr)
704                 claim_zero(mlx5_glue->dereg_mr(priv->null_mr));
705         for (i = 0; i < priv->num_lag_ports; i++) {
706                 if (priv->tiss[i])
707                         claim_zero(mlx5_devx_cmd_destroy(priv->tiss[i]));
708         }
709         if (priv->td)
710                 claim_zero(mlx5_devx_cmd_destroy(priv->td));
711         if (priv->virtq_db_addr)
712                 /* Mask out the within page offset for munmap. */
713                 claim_zero(munmap((void *)((uintptr_t)priv->virtq_db_addr &
714                         ~(rte_mem_page_size() - 1)), priv->var->length));
715         if (priv->var)
716                 mlx5_glue->dv_free_var(priv->var);
717 }
718
719 static void
720 mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv)
721 {
722         if (priv->state == MLX5_VDPA_STATE_CONFIGURED)
723                 mlx5_vdpa_dev_close(priv->vid);
724         mlx5_vdpa_release_dev_resources(priv);
725         if (priv->vdev)
726                 rte_vdpa_unregister_device(priv->vdev);
727         pthread_mutex_destroy(&priv->vq_config_lock);
728         rte_free(priv);
729 }
730
731 static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = {
732         {
733                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
734                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6)
735         },
736         {
737                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
738                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF)
739         },
740         {
741                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
742                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX)
743         },
744         {
745                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
746                                 PCI_DEVICE_ID_MELLANOX_CONNECTXVF)
747         },
748         {
749                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
750                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF)
751         },
752         {
753                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
754                                 PCI_DEVICE_ID_MELLANOX_CONNECTX7)
755         },
756         {
757                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
758                                 PCI_DEVICE_ID_MELLANOX_CONNECTX7BF)
759         },
760         {
761                 .vendor_id = 0
762         }
763 };
764
765 static struct mlx5_class_driver mlx5_vdpa_driver = {
766         .drv_class = MLX5_CLASS_VDPA,
767         .name = RTE_STR(MLX5_VDPA_DRIVER_NAME),
768         .id_table = mlx5_vdpa_pci_id_map,
769         .probe = mlx5_vdpa_dev_probe,
770         .remove = mlx5_vdpa_dev_remove,
771 };
772
773 RTE_LOG_REGISTER_DEFAULT(mlx5_vdpa_logtype, NOTICE)
774
775 /**
776  * Driver initialization routine.
777  */
778 RTE_INIT(rte_mlx5_vdpa_init)
779 {
780         mlx5_common_init();
781         if (mlx5_glue)
782                 mlx5_class_driver_register(&mlx5_vdpa_driver);
783 }
784
785 RTE_PMD_EXPORT_NAME(MLX5_VDPA_DRIVER_NAME, __COUNTER__);
786 RTE_PMD_REGISTER_PCI_TABLE(MLX5_VDPA_DRIVER_NAME, mlx5_vdpa_pci_id_map);
787 RTE_PMD_REGISTER_KMOD_DEP(MLX5_VDPA_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib");