vdpa/mlx5: support device cleanup callback
[dpdk.git] / drivers / vdpa / mlx5 / mlx5_vdpa.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 #include <unistd.h>
5 #include <net/if.h>
6 #include <sys/socket.h>
7 #include <sys/ioctl.h>
8 #include <sys/mman.h>
9 #include <fcntl.h>
10 #include <netinet/in.h>
11
12 #include <rte_malloc.h>
13 #include <rte_log.h>
14 #include <rte_errno.h>
15 #include <rte_string_fns.h>
16 #include <rte_bus_pci.h>
17
18 #include <mlx5_glue.h>
19 #include <mlx5_common.h>
20 #include <mlx5_common_defs.h>
21 #include <mlx5_devx_cmds.h>
22 #include <mlx5_prm.h>
23 #include <mlx5_nl.h>
24
25 #include "mlx5_vdpa_utils.h"
26 #include "mlx5_vdpa.h"
27
28 #define MLX5_VDPA_DRIVER_NAME vdpa_mlx5
29
30 #define MLX5_VDPA_DEFAULT_FEATURES ((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
31                             (1ULL << VIRTIO_F_ANY_LAYOUT) | \
32                             (1ULL << VIRTIO_NET_F_MQ) | \
33                             (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
34                             (1ULL << VIRTIO_F_ORDER_PLATFORM) | \
35                             (1ULL << VHOST_F_LOG_ALL) | \
36                             (1ULL << VIRTIO_NET_F_MTU))
37
38 #define MLX5_VDPA_PROTOCOL_FEATURES \
39                             ((1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
40                              (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
41                              (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
42                              (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
43                              (1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
44                              (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
45                              (1ULL << VHOST_USER_PROTOCOL_F_STATUS))
46
47 #define MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX 16LLU
48
49 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
50                                               TAILQ_HEAD_INITIALIZER(priv_list);
51 static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER;
52
53 static void mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv);
54
55 static struct mlx5_vdpa_priv *
56 mlx5_vdpa_find_priv_resource_by_vdev(struct rte_vdpa_device *vdev)
57 {
58         struct mlx5_vdpa_priv *priv;
59         int found = 0;
60
61         pthread_mutex_lock(&priv_list_lock);
62         TAILQ_FOREACH(priv, &priv_list, next) {
63                 if (vdev == priv->vdev) {
64                         found = 1;
65                         break;
66                 }
67         }
68         pthread_mutex_unlock(&priv_list_lock);
69         if (!found) {
70                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
71                 rte_errno = EINVAL;
72                 return NULL;
73         }
74         return priv;
75 }
76
77 static int
78 mlx5_vdpa_get_queue_num(struct rte_vdpa_device *vdev, uint32_t *queue_num)
79 {
80         struct mlx5_vdpa_priv *priv =
81                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
82
83         if (priv == NULL) {
84                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
85                 return -1;
86         }
87         *queue_num = priv->caps.max_num_virtio_queues;
88         return 0;
89 }
90
91 static int
92 mlx5_vdpa_get_vdpa_features(struct rte_vdpa_device *vdev, uint64_t *features)
93 {
94         struct mlx5_vdpa_priv *priv =
95                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
96
97         if (priv == NULL) {
98                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
99                 return -1;
100         }
101         *features = MLX5_VDPA_DEFAULT_FEATURES;
102         if (priv->caps.virtio_queue_type & (1 << MLX5_VIRTQ_TYPE_PACKED))
103                 *features |= (1ULL << VIRTIO_F_RING_PACKED);
104         if (priv->caps.tso_ipv4)
105                 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO4);
106         if (priv->caps.tso_ipv6)
107                 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO6);
108         if (priv->caps.tx_csum)
109                 *features |= (1ULL << VIRTIO_NET_F_CSUM);
110         if (priv->caps.rx_csum)
111                 *features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
112         if (priv->caps.virtio_version_1_0)
113                 *features |= (1ULL << VIRTIO_F_VERSION_1);
114         return 0;
115 }
116
117 static int
118 mlx5_vdpa_get_protocol_features(struct rte_vdpa_device *vdev,
119                 uint64_t *features)
120 {
121         struct mlx5_vdpa_priv *priv =
122                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
123
124         if (priv == NULL) {
125                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
126                 return -1;
127         }
128         *features = MLX5_VDPA_PROTOCOL_FEATURES;
129         return 0;
130 }
131
132 static int
133 mlx5_vdpa_set_vring_state(int vid, int vring, int state)
134 {
135         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
136         struct mlx5_vdpa_priv *priv =
137                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
138         int ret;
139
140         if (priv == NULL) {
141                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
142                 return -EINVAL;
143         }
144         if (vring >= (int)priv->caps.max_num_virtio_queues * 2) {
145                 DRV_LOG(ERR, "Too big vring id: %d.", vring);
146                 return -E2BIG;
147         }
148         pthread_mutex_lock(&priv->vq_config_lock);
149         ret = mlx5_vdpa_virtq_enable(priv, vring, state);
150         pthread_mutex_unlock(&priv->vq_config_lock);
151         return ret;
152 }
153
154 static int
155 mlx5_vdpa_features_set(int vid)
156 {
157         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
158         struct mlx5_vdpa_priv *priv =
159                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
160         uint64_t log_base, log_size;
161         uint64_t features;
162         int ret;
163
164         if (priv == NULL) {
165                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
166                 return -EINVAL;
167         }
168         ret = rte_vhost_get_negotiated_features(vid, &features);
169         if (ret) {
170                 DRV_LOG(ERR, "Failed to get negotiated features.");
171                 return ret;
172         }
173         if (RTE_VHOST_NEED_LOG(features)) {
174                 ret = rte_vhost_get_log_base(vid, &log_base, &log_size);
175                 if (ret) {
176                         DRV_LOG(ERR, "Failed to get log base.");
177                         return ret;
178                 }
179                 ret = mlx5_vdpa_dirty_bitmap_set(priv, log_base, log_size);
180                 if (ret) {
181                         DRV_LOG(ERR, "Failed to set dirty bitmap.");
182                         return ret;
183                 }
184                 DRV_LOG(INFO, "mlx5 vdpa: enabling dirty logging...");
185                 ret = mlx5_vdpa_logging_enable(priv, 1);
186                 if (ret) {
187                         DRV_LOG(ERR, "Failed t enable dirty logging.");
188                         return ret;
189                 }
190         }
191         return 0;
192 }
193
194 static int
195 mlx5_vdpa_mtu_set(struct mlx5_vdpa_priv *priv)
196 {
197         struct ifreq request;
198         uint16_t vhost_mtu = 0;
199         uint16_t kern_mtu = 0;
200         int ret = rte_vhost_get_mtu(priv->vid, &vhost_mtu);
201         int sock;
202         int retries = MLX5_VDPA_MAX_RETRIES;
203
204         if (ret) {
205                 DRV_LOG(DEBUG, "Cannot get vhost MTU - %d.", ret);
206                 return ret;
207         }
208         if (!vhost_mtu) {
209                 DRV_LOG(DEBUG, "Vhost MTU is 0.");
210                 return ret;
211         }
212         ret = mlx5_get_ifname_sysfs
213                                 (mlx5_os_get_ctx_device_name(priv->cdev->ctx),
214                                  request.ifr_name);
215         if (ret) {
216                 DRV_LOG(DEBUG, "Cannot get kernel IF name - %d.", ret);
217                 return ret;
218         }
219         sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
220         if (sock == -1) {
221                 DRV_LOG(DEBUG, "Cannot open IF socket.");
222                 return sock;
223         }
224         while (retries--) {
225                 ret = ioctl(sock, SIOCGIFMTU, &request);
226                 if (ret == -1)
227                         break;
228                 kern_mtu = request.ifr_mtu;
229                 DRV_LOG(DEBUG, "MTU: current %d requested %d.", (int)kern_mtu,
230                         (int)vhost_mtu);
231                 if (kern_mtu == vhost_mtu)
232                         break;
233                 request.ifr_mtu = vhost_mtu;
234                 ret = ioctl(sock, SIOCSIFMTU, &request);
235                 if (ret == -1)
236                         break;
237                 request.ifr_mtu = 0;
238                 usleep(MLX5_VDPA_USEC);
239         }
240         close(sock);
241         return kern_mtu == vhost_mtu ? 0 : -1;
242 }
243
244 static void
245 mlx5_vdpa_dev_cache_clean(struct mlx5_vdpa_priv *priv)
246 {
247         mlx5_vdpa_virtqs_cleanup(priv);
248         mlx5_vdpa_mem_dereg(priv);
249 }
250
251 static int
252 mlx5_vdpa_dev_close(int vid)
253 {
254         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
255         struct mlx5_vdpa_priv *priv =
256                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
257         int ret = 0;
258
259         if (priv == NULL) {
260                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
261                 return -1;
262         }
263         mlx5_vdpa_cqe_event_unset(priv);
264         if (priv->state == MLX5_VDPA_STATE_CONFIGURED) {
265                 ret |= mlx5_vdpa_lm_log(priv);
266                 priv->state = MLX5_VDPA_STATE_IN_PROGRESS;
267         }
268         mlx5_vdpa_steer_unset(priv);
269         mlx5_vdpa_virtqs_release(priv);
270         if (priv->lm_mr.addr)
271                 mlx5_os_wrapped_mkey_destroy(&priv->lm_mr);
272         priv->state = MLX5_VDPA_STATE_PROBED;
273         if (!priv->connected)
274                 mlx5_vdpa_dev_cache_clean(priv);
275         priv->vid = 0;
276         /* The mutex may stay locked after event thread cancel - initiate it. */
277         pthread_mutex_init(&priv->vq_config_lock, NULL);
278         DRV_LOG(INFO, "vDPA device %d was closed.", vid);
279         return ret;
280 }
281
282 static int
283 mlx5_vdpa_dev_config(int vid)
284 {
285         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
286         struct mlx5_vdpa_priv *priv =
287                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
288
289         if (priv == NULL) {
290                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
291                 return -EINVAL;
292         }
293         if (priv->state == MLX5_VDPA_STATE_CONFIGURED &&
294             mlx5_vdpa_dev_close(vid)) {
295                 DRV_LOG(ERR, "Failed to reconfigure vid %d.", vid);
296                 return -1;
297         }
298         priv->vid = vid;
299         priv->connected = true;
300         if (mlx5_vdpa_mtu_set(priv))
301                 DRV_LOG(WARNING, "MTU cannot be set on device %s.",
302                                 vdev->device->name);
303         if (mlx5_vdpa_mem_register(priv) ||
304             mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) ||
305             mlx5_vdpa_cqe_event_setup(priv)) {
306                 mlx5_vdpa_dev_close(vid);
307                 return -1;
308         }
309         priv->state = MLX5_VDPA_STATE_CONFIGURED;
310         DRV_LOG(INFO, "vDPA device %d was configured.", vid);
311         return 0;
312 }
313
314 static int
315 mlx5_vdpa_get_device_fd(int vid)
316 {
317         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
318         struct mlx5_vdpa_priv *priv =
319                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
320
321         if (priv == NULL) {
322                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
323                 return -EINVAL;
324         }
325         return ((struct ibv_context *)priv->cdev->ctx)->cmd_fd;
326 }
327
328 static int
329 mlx5_vdpa_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size)
330 {
331         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
332         struct mlx5_vdpa_priv *priv =
333                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
334
335         RTE_SET_USED(qid);
336         if (priv == NULL) {
337                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
338                 return -EINVAL;
339         }
340         if (!priv->var) {
341                 DRV_LOG(ERR, "VAR was not created for device %s, is the device"
342                         " configured?.", vdev->device->name);
343                 return -EINVAL;
344         }
345         *offset = priv->var->mmap_off;
346         *size = priv->var->length;
347         return 0;
348 }
349
350 static int
351 mlx5_vdpa_get_stats_names(struct rte_vdpa_device *vdev,
352                 struct rte_vdpa_stat_name *stats_names,
353                 unsigned int size)
354 {
355         static const char *mlx5_vdpa_stats_names[MLX5_VDPA_STATS_MAX] = {
356                 "received_descriptors",
357                 "completed_descriptors",
358                 "bad descriptor errors",
359                 "exceed max chain",
360                 "invalid buffer",
361                 "completion errors",
362         };
363         struct mlx5_vdpa_priv *priv =
364                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
365         unsigned int i;
366
367         if (priv == NULL) {
368                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
369                 return -ENODEV;
370         }
371         if (!stats_names)
372                 return MLX5_VDPA_STATS_MAX;
373         size = RTE_MIN(size, (unsigned int)MLX5_VDPA_STATS_MAX);
374         for (i = 0; i < size; ++i)
375                 strlcpy(stats_names[i].name, mlx5_vdpa_stats_names[i],
376                         RTE_VDPA_STATS_NAME_SIZE);
377         return size;
378 }
379
380 static int
381 mlx5_vdpa_get_stats(struct rte_vdpa_device *vdev, int qid,
382                 struct rte_vdpa_stat *stats, unsigned int n)
383 {
384         struct mlx5_vdpa_priv *priv =
385                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
386
387         if (priv == NULL) {
388                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
389                 return -ENODEV;
390         }
391         if (priv->state == MLX5_VDPA_STATE_PROBED) {
392                 DRV_LOG(ERR, "Device %s was not configured.",
393                                 vdev->device->name);
394                 return -ENODATA;
395         }
396         if (qid >= (int)priv->nr_virtqs) {
397                 DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
398                                 vdev->device->name);
399                 return -E2BIG;
400         }
401         if (!priv->caps.queue_counters_valid) {
402                 DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
403                         vdev->device->name);
404                 return -ENOTSUP;
405         }
406         return mlx5_vdpa_virtq_stats_get(priv, qid, stats, n);
407 }
408
409 static int
410 mlx5_vdpa_reset_stats(struct rte_vdpa_device *vdev, int qid)
411 {
412         struct mlx5_vdpa_priv *priv =
413                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
414
415         if (priv == NULL) {
416                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
417                 return -ENODEV;
418         }
419         if (priv->state == MLX5_VDPA_STATE_PROBED) {
420                 DRV_LOG(ERR, "Device %s was not configured.",
421                                 vdev->device->name);
422                 return -ENODATA;
423         }
424         if (qid >= (int)priv->nr_virtqs) {
425                 DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
426                                 vdev->device->name);
427                 return -E2BIG;
428         }
429         if (!priv->caps.queue_counters_valid) {
430                 DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
431                         vdev->device->name);
432                 return -ENOTSUP;
433         }
434         return mlx5_vdpa_virtq_stats_reset(priv, qid);
435 }
436
437 static int
438 mlx5_vdpa_dev_cleanup(int vid)
439 {
440         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
441         struct mlx5_vdpa_priv *priv;
442
443         if (vdev == NULL)
444                 return -1;
445         priv = mlx5_vdpa_find_priv_resource_by_vdev(vdev);
446         if (priv == NULL) {
447                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
448                 return -1;
449         }
450         if (priv->state == MLX5_VDPA_STATE_PROBED)
451                 mlx5_vdpa_dev_cache_clean(priv);
452         priv->connected = false;
453         return 0;
454 }
455
456 static struct rte_vdpa_dev_ops mlx5_vdpa_ops = {
457         .get_queue_num = mlx5_vdpa_get_queue_num,
458         .get_features = mlx5_vdpa_get_vdpa_features,
459         .get_protocol_features = mlx5_vdpa_get_protocol_features,
460         .dev_conf = mlx5_vdpa_dev_config,
461         .dev_close = mlx5_vdpa_dev_close,
462         .dev_cleanup = mlx5_vdpa_dev_cleanup,
463         .set_vring_state = mlx5_vdpa_set_vring_state,
464         .set_features = mlx5_vdpa_features_set,
465         .migration_done = NULL,
466         .get_vfio_group_fd = NULL,
467         .get_vfio_device_fd = mlx5_vdpa_get_device_fd,
468         .get_notify_area = mlx5_vdpa_get_notify_area,
469         .get_stats_names = mlx5_vdpa_get_stats_names,
470         .get_stats = mlx5_vdpa_get_stats,
471         .reset_stats = mlx5_vdpa_reset_stats,
472 };
473
474 static int
475 mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
476 {
477         struct mlx5_vdpa_priv *priv = opaque;
478         unsigned long tmp;
479         int n_cores = sysconf(_SC_NPROCESSORS_ONLN);
480
481         errno = 0;
482         tmp = strtoul(val, NULL, 0);
483         if (errno) {
484                 DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val);
485                 return -errno;
486         }
487         if (strcmp(key, "event_mode") == 0) {
488                 if (tmp <= MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT)
489                         priv->event_mode = (int)tmp;
490                 else
491                         DRV_LOG(WARNING, "Invalid event_mode %s.", val);
492         } else if (strcmp(key, "event_us") == 0) {
493                 priv->event_us = (uint32_t)tmp;
494         } else if (strcmp(key, "no_traffic_time") == 0) {
495                 priv->no_traffic_max = (uint32_t)tmp;
496         } else if (strcmp(key, "event_core") == 0) {
497                 if (tmp >= (unsigned long)n_cores)
498                         DRV_LOG(WARNING, "Invalid event_core %s.", val);
499                 else
500                         priv->event_core = tmp;
501         } else if (strcmp(key, "hw_latency_mode") == 0) {
502                 priv->hw_latency_mode = (uint32_t)tmp;
503         } else if (strcmp(key, "hw_max_latency_us") == 0) {
504                 priv->hw_max_latency_us = (uint32_t)tmp;
505         } else if (strcmp(key, "hw_max_pending_comp") == 0) {
506                 priv->hw_max_pending_comp = (uint32_t)tmp;
507         }
508         return 0;
509 }
510
511 static void
512 mlx5_vdpa_config_get(struct mlx5_kvargs_ctrl *mkvlist,
513                      struct mlx5_vdpa_priv *priv)
514 {
515         const char **params = (const char *[]){
516                 "event_core",
517                 "event_mode",
518                 "event_us",
519                 "hw_latency_mode",
520                 "hw_max_latency_us",
521                 "hw_max_pending_comp",
522                 "no_traffic_time",
523                 NULL,
524         };
525
526         priv->event_mode = MLX5_VDPA_EVENT_MODE_FIXED_TIMER;
527         priv->event_us = 0;
528         priv->event_core = -1;
529         priv->no_traffic_max = MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX;
530         if (mkvlist == NULL)
531                 return;
532         mlx5_kvargs_process(mkvlist, params, mlx5_vdpa_args_check_handler,
533                             priv);
534         if (!priv->event_us &&
535             priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER)
536                 priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
537         DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
538         DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
539         DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max);
540 }
541
542 static int
543 mlx5_vdpa_create_dev_resources(struct mlx5_vdpa_priv *priv)
544 {
545         struct mlx5_devx_tis_attr tis_attr = {0};
546         struct ibv_context *ctx = priv->cdev->ctx;
547         uint32_t i;
548         int retry;
549
550         for (retry = 0; retry < 7; retry++) {
551                 priv->var = mlx5_glue->dv_alloc_var(ctx, 0);
552                 if (priv->var != NULL)
553                         break;
554                 DRV_LOG(WARNING, "Failed to allocate VAR, retry %d.", retry);
555                 /* Wait Qemu release VAR during vdpa restart, 0.1 sec based. */
556                 usleep(100000U << retry);
557         }
558         if (!priv->var) {
559                 DRV_LOG(ERR, "Failed to allocate VAR %u.", errno);
560                 rte_errno = ENOMEM;
561                 return -rte_errno;
562         }
563         /* Always map the entire page. */
564         priv->virtq_db_addr = mmap(NULL, priv->var->length, PROT_READ |
565                                    PROT_WRITE, MAP_SHARED, ctx->cmd_fd,
566                                    priv->var->mmap_off);
567         if (priv->virtq_db_addr == MAP_FAILED) {
568                 DRV_LOG(ERR, "Failed to map doorbell page %u.", errno);
569                 priv->virtq_db_addr = NULL;
570                 rte_errno = errno;
571                 return -rte_errno;
572         }
573         DRV_LOG(DEBUG, "VAR address of doorbell mapping is %p.",
574                 priv->virtq_db_addr);
575         priv->td = mlx5_devx_cmd_create_td(ctx);
576         if (!priv->td) {
577                 DRV_LOG(ERR, "Failed to create transport domain.");
578                 rte_errno = errno;
579                 return -rte_errno;
580         }
581         tis_attr.transport_domain = priv->td->id;
582         for (i = 0; i < priv->num_lag_ports; i++) {
583                 /* 0 is auto affinity, non-zero value to propose port. */
584                 tis_attr.lag_tx_port_affinity = i + 1;
585                 priv->tiss[i] = mlx5_devx_cmd_create_tis(ctx, &tis_attr);
586                 if (!priv->tiss[i]) {
587                         DRV_LOG(ERR, "Failed to create TIS %u.", i);
588                         return -rte_errno;
589                 }
590         }
591         priv->null_mr = mlx5_glue->alloc_null_mr(priv->cdev->pd);
592         if (!priv->null_mr) {
593                 DRV_LOG(ERR, "Failed to allocate null MR.");
594                 rte_errno = errno;
595                 return -rte_errno;
596         }
597         DRV_LOG(DEBUG, "Dump fill Mkey = %u.", priv->null_mr->lkey);
598 #ifdef HAVE_MLX5DV_DR
599         priv->steer.domain = mlx5_glue->dr_create_domain(ctx,
600                                         MLX5DV_DR_DOMAIN_TYPE_NIC_RX);
601         if (!priv->steer.domain) {
602                 DRV_LOG(ERR, "Failed to create Rx domain.");
603                 rte_errno = errno;
604                 return -rte_errno;
605         }
606 #endif
607         priv->steer.tbl = mlx5_glue->dr_create_flow_tbl(priv->steer.domain, 0);
608         if (!priv->steer.tbl) {
609                 DRV_LOG(ERR, "Failed to create table 0 with Rx domain.");
610                 rte_errno = errno;
611                 return -rte_errno;
612         }
613         if (mlx5_vdpa_err_event_setup(priv) != 0)
614                 return -rte_errno;
615         if (mlx5_vdpa_event_qp_global_prepare(priv))
616                 return -rte_errno;
617         return 0;
618 }
619
620 static int
621 mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev,
622                     struct mlx5_kvargs_ctrl *mkvlist)
623 {
624         struct mlx5_vdpa_priv *priv = NULL;
625         struct mlx5_hca_attr *attr = &cdev->config.hca_attr;
626
627         if (!attr->vdpa.valid || !attr->vdpa.max_num_virtio_queues) {
628                 DRV_LOG(ERR, "Not enough capabilities to support vdpa, maybe "
629                         "old FW/OFED version?");
630                 rte_errno = ENOTSUP;
631                 return -rte_errno;
632         }
633         if (!attr->vdpa.queue_counters_valid)
634                 DRV_LOG(DEBUG, "No capability to support virtq statistics.");
635         priv = rte_zmalloc("mlx5 vDPA device private", sizeof(*priv) +
636                            sizeof(struct mlx5_vdpa_virtq) *
637                            attr->vdpa.max_num_virtio_queues * 2,
638                            RTE_CACHE_LINE_SIZE);
639         if (!priv) {
640                 DRV_LOG(ERR, "Failed to allocate private memory.");
641                 rte_errno = ENOMEM;
642                 return -rte_errno;
643         }
644         priv->caps = attr->vdpa;
645         priv->log_max_rqt_size = attr->log_max_rqt_size;
646         priv->num_lag_ports = attr->num_lag_ports;
647         if (attr->num_lag_ports == 0)
648                 priv->num_lag_ports = 1;
649         pthread_mutex_init(&priv->vq_config_lock, NULL);
650         priv->cdev = cdev;
651         if (mlx5_vdpa_create_dev_resources(priv))
652                 goto error;
653         priv->vdev = rte_vdpa_register_device(cdev->dev, &mlx5_vdpa_ops);
654         if (priv->vdev == NULL) {
655                 DRV_LOG(ERR, "Failed to register vDPA device.");
656                 rte_errno = rte_errno ? rte_errno : EINVAL;
657                 goto error;
658         }
659         mlx5_vdpa_config_get(mkvlist, priv);
660         SLIST_INIT(&priv->mr_list);
661         pthread_mutex_lock(&priv_list_lock);
662         TAILQ_INSERT_TAIL(&priv_list, priv, next);
663         pthread_mutex_unlock(&priv_list_lock);
664         return 0;
665 error:
666         if (priv)
667                 mlx5_vdpa_dev_release(priv);
668         return -rte_errno;
669 }
670
671 static int
672 mlx5_vdpa_dev_remove(struct mlx5_common_device *cdev)
673 {
674         struct mlx5_vdpa_priv *priv = NULL;
675         int found = 0;
676
677         pthread_mutex_lock(&priv_list_lock);
678         TAILQ_FOREACH(priv, &priv_list, next) {
679                 if (priv->vdev->device == cdev->dev) {
680                         found = 1;
681                         break;
682                 }
683         }
684         if (found)
685                 TAILQ_REMOVE(&priv_list, priv, next);
686         pthread_mutex_unlock(&priv_list_lock);
687         if (found)
688                 mlx5_vdpa_dev_release(priv);
689         return 0;
690 }
691
692 static void
693 mlx5_vdpa_release_dev_resources(struct mlx5_vdpa_priv *priv)
694 {
695         uint32_t i;
696
697         mlx5_vdpa_dev_cache_clean(priv);
698         mlx5_vdpa_event_qp_global_release(priv);
699         mlx5_vdpa_err_event_unset(priv);
700         if (priv->steer.tbl)
701                 claim_zero(mlx5_glue->dr_destroy_flow_tbl(priv->steer.tbl));
702         if (priv->steer.domain)
703                 claim_zero(mlx5_glue->dr_destroy_domain(priv->steer.domain));
704         if (priv->null_mr)
705                 claim_zero(mlx5_glue->dereg_mr(priv->null_mr));
706         for (i = 0; i < priv->num_lag_ports; i++) {
707                 if (priv->tiss[i])
708                         claim_zero(mlx5_devx_cmd_destroy(priv->tiss[i]));
709         }
710         if (priv->td)
711                 claim_zero(mlx5_devx_cmd_destroy(priv->td));
712         if (priv->virtq_db_addr)
713                 claim_zero(munmap(priv->virtq_db_addr, priv->var->length));
714         if (priv->var)
715                 mlx5_glue->dv_free_var(priv->var);
716 }
717
718 static void
719 mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv)
720 {
721         if (priv->state == MLX5_VDPA_STATE_CONFIGURED)
722                 mlx5_vdpa_dev_close(priv->vid);
723         mlx5_vdpa_release_dev_resources(priv);
724         if (priv->vdev)
725                 rte_vdpa_unregister_device(priv->vdev);
726         pthread_mutex_destroy(&priv->vq_config_lock);
727         rte_free(priv);
728 }
729
730 static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = {
731         {
732                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
733                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6)
734         },
735         {
736                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
737                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF)
738         },
739         {
740                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
741                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX)
742         },
743         {
744                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
745                                 PCI_DEVICE_ID_MELLANOX_CONNECTXVF)
746         },
747         {
748                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
749                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF)
750         },
751         {
752                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
753                                 PCI_DEVICE_ID_MELLANOX_CONNECTX7)
754         },
755         {
756                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
757                                 PCI_DEVICE_ID_MELLANOX_CONNECTX7BF)
758         },
759         {
760                 .vendor_id = 0
761         }
762 };
763
764 static struct mlx5_class_driver mlx5_vdpa_driver = {
765         .drv_class = MLX5_CLASS_VDPA,
766         .name = RTE_STR(MLX5_VDPA_DRIVER_NAME),
767         .id_table = mlx5_vdpa_pci_id_map,
768         .probe = mlx5_vdpa_dev_probe,
769         .remove = mlx5_vdpa_dev_remove,
770 };
771
772 RTE_LOG_REGISTER_DEFAULT(mlx5_vdpa_logtype, NOTICE)
773
774 /**
775  * Driver initialization routine.
776  */
777 RTE_INIT(rte_mlx5_vdpa_init)
778 {
779         mlx5_common_init();
780         if (mlx5_glue)
781                 mlx5_class_driver_register(&mlx5_vdpa_driver);
782 }
783
784 RTE_PMD_EXPORT_NAME(MLX5_VDPA_DRIVER_NAME, __COUNTER__);
785 RTE_PMD_REGISTER_PCI_TABLE(MLX5_VDPA_DRIVER_NAME, mlx5_vdpa_pci_id_map);
786 RTE_PMD_REGISTER_KMOD_DEP(MLX5_VDPA_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib");