vdpa/mlx5: add ConnectX-6 LX device ID
[dpdk.git] / drivers / vdpa / mlx5 / mlx5_vdpa.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 #include <unistd.h>
5 #include <net/if.h>
6 #include <sys/socket.h>
7 #include <sys/ioctl.h>
8 #include <sys/mman.h>
9 #include <fcntl.h>
10 #include <netinet/in.h>
11
12 #include <rte_malloc.h>
13 #include <rte_log.h>
14 #include <rte_errno.h>
15 #include <rte_string_fns.h>
16 #include <rte_bus_pci.h>
17 #include <rte_eal_paging.h>
18
19 #include <mlx5_glue.h>
20 #include <mlx5_common.h>
21 #include <mlx5_common_defs.h>
22 #include <mlx5_devx_cmds.h>
23 #include <mlx5_prm.h>
24 #include <mlx5_nl.h>
25
26 #include "mlx5_vdpa_utils.h"
27 #include "mlx5_vdpa.h"
28
29 #define MLX5_VDPA_DRIVER_NAME vdpa_mlx5
30
31 #define MLX5_VDPA_DEFAULT_FEATURES ((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
32                             (1ULL << VIRTIO_F_ANY_LAYOUT) | \
33                             (1ULL << VIRTIO_NET_F_MQ) | \
34                             (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
35                             (1ULL << VIRTIO_F_ORDER_PLATFORM) | \
36                             (1ULL << VHOST_F_LOG_ALL) | \
37                             (1ULL << VIRTIO_NET_F_MTU))
38
39 #define MLX5_VDPA_PROTOCOL_FEATURES \
40                             ((1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
41                              (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
42                              (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
43                              (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
44                              (1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
45                              (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
46                              (1ULL << VHOST_USER_PROTOCOL_F_STATUS))
47
48 #define MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX 16LLU
49
50 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
51                                               TAILQ_HEAD_INITIALIZER(priv_list);
52 static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER;
53
54 struct mlx5_vdpa_conf_thread_mng conf_thread_mng;
55
56 static void mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv);
57
58 static struct mlx5_vdpa_priv *
59 mlx5_vdpa_find_priv_resource_by_vdev(struct rte_vdpa_device *vdev)
60 {
61         struct mlx5_vdpa_priv *priv;
62         int found = 0;
63
64         pthread_mutex_lock(&priv_list_lock);
65         TAILQ_FOREACH(priv, &priv_list, next) {
66                 if (vdev == priv->vdev) {
67                         found = 1;
68                         break;
69                 }
70         }
71         pthread_mutex_unlock(&priv_list_lock);
72         if (!found) {
73                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
74                 rte_errno = EINVAL;
75                 return NULL;
76         }
77         return priv;
78 }
79
80 static int
81 mlx5_vdpa_get_queue_num(struct rte_vdpa_device *vdev, uint32_t *queue_num)
82 {
83         struct mlx5_vdpa_priv *priv =
84                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
85
86         if (priv == NULL) {
87                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
88                 return -1;
89         }
90         *queue_num = priv->caps.max_num_virtio_queues / 2;
91         return 0;
92 }
93
94 static int
95 mlx5_vdpa_get_vdpa_features(struct rte_vdpa_device *vdev, uint64_t *features)
96 {
97         struct mlx5_vdpa_priv *priv =
98                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
99
100         if (priv == NULL) {
101                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
102                 return -1;
103         }
104         *features = MLX5_VDPA_DEFAULT_FEATURES;
105         if (priv->caps.virtio_queue_type & (1 << MLX5_VIRTQ_TYPE_PACKED))
106                 *features |= (1ULL << VIRTIO_F_RING_PACKED);
107         if (priv->caps.tso_ipv4)
108                 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO4);
109         if (priv->caps.tso_ipv6)
110                 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO6);
111         if (priv->caps.tx_csum)
112                 *features |= (1ULL << VIRTIO_NET_F_CSUM);
113         if (priv->caps.rx_csum)
114                 *features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
115         if (priv->caps.virtio_version_1_0)
116                 *features |= (1ULL << VIRTIO_F_VERSION_1);
117         return 0;
118 }
119
120 static int
121 mlx5_vdpa_get_protocol_features(struct rte_vdpa_device *vdev,
122                 uint64_t *features)
123 {
124         struct mlx5_vdpa_priv *priv =
125                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
126
127         if (priv == NULL) {
128                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
129                 return -1;
130         }
131         *features = MLX5_VDPA_PROTOCOL_FEATURES;
132         return 0;
133 }
134
135 static int
136 mlx5_vdpa_set_vring_state(int vid, int vring, int state)
137 {
138         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
139         struct mlx5_vdpa_priv *priv =
140                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
141         struct mlx5_vdpa_virtq *virtq;
142         int ret;
143
144         if (priv == NULL) {
145                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
146                 return -EINVAL;
147         }
148         if (vring >= (int)priv->caps.max_num_virtio_queues) {
149                 DRV_LOG(ERR, "Too big vring id: %d.", vring);
150                 return -E2BIG;
151         }
152         virtq = &priv->virtqs[vring];
153         pthread_mutex_lock(&virtq->virtq_lock);
154         ret = mlx5_vdpa_virtq_enable(priv, vring, state);
155         pthread_mutex_unlock(&virtq->virtq_lock);
156         return ret;
157 }
158
159 static int
160 mlx5_vdpa_features_set(int vid)
161 {
162         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
163         struct mlx5_vdpa_priv *priv =
164                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
165         uint64_t log_base, log_size;
166         uint64_t features;
167         int ret;
168
169         if (priv == NULL) {
170                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
171                 return -EINVAL;
172         }
173         ret = rte_vhost_get_negotiated_features(vid, &features);
174         if (ret) {
175                 DRV_LOG(ERR, "Failed to get negotiated features.");
176                 return ret;
177         }
178         if (RTE_VHOST_NEED_LOG(features)) {
179                 ret = rte_vhost_get_log_base(vid, &log_base, &log_size);
180                 if (ret) {
181                         DRV_LOG(ERR, "Failed to get log base.");
182                         return ret;
183                 }
184                 ret = mlx5_vdpa_dirty_bitmap_set(priv, log_base, log_size);
185                 if (ret) {
186                         DRV_LOG(ERR, "Failed to set dirty bitmap.");
187                         return ret;
188                 }
189                 DRV_LOG(INFO, "mlx5 vdpa: enabling dirty logging...");
190                 ret = mlx5_vdpa_logging_enable(priv, 1);
191                 if (ret) {
192                         DRV_LOG(ERR, "Failed t enable dirty logging.");
193                         return ret;
194                 }
195         }
196         return 0;
197 }
198
199 static int
200 mlx5_vdpa_mtu_set(struct mlx5_vdpa_priv *priv)
201 {
202         struct ifreq request;
203         uint16_t vhost_mtu = 0;
204         uint16_t kern_mtu = 0;
205         int ret = rte_vhost_get_mtu(priv->vid, &vhost_mtu);
206         int sock;
207         int retries = MLX5_VDPA_MAX_RETRIES;
208
209         if (ret) {
210                 DRV_LOG(DEBUG, "Cannot get vhost MTU - %d.", ret);
211                 return ret;
212         }
213         if (!vhost_mtu) {
214                 DRV_LOG(DEBUG, "Vhost MTU is 0.");
215                 return ret;
216         }
217         ret = mlx5_get_ifname_sysfs
218                                 (mlx5_os_get_ctx_device_name(priv->cdev->ctx),
219                                  request.ifr_name);
220         if (ret) {
221                 DRV_LOG(DEBUG, "Cannot get kernel IF name - %d.", ret);
222                 return ret;
223         }
224         sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
225         if (sock == -1) {
226                 DRV_LOG(DEBUG, "Cannot open IF socket.");
227                 return sock;
228         }
229         while (retries--) {
230                 ret = ioctl(sock, SIOCGIFMTU, &request);
231                 if (ret == -1)
232                         break;
233                 kern_mtu = request.ifr_mtu;
234                 DRV_LOG(DEBUG, "MTU: current %d requested %d.", (int)kern_mtu,
235                         (int)vhost_mtu);
236                 if (kern_mtu == vhost_mtu)
237                         break;
238                 request.ifr_mtu = vhost_mtu;
239                 ret = ioctl(sock, SIOCSIFMTU, &request);
240                 if (ret == -1)
241                         break;
242                 request.ifr_mtu = 0;
243                 usleep(MLX5_VDPA_USEC);
244         }
245         close(sock);
246         return kern_mtu == vhost_mtu ? 0 : -1;
247 }
248
249 void
250 mlx5_vdpa_dev_cache_clean(struct mlx5_vdpa_priv *priv)
251 {
252         /* Clean pre-created resource in dev removal only. */
253         if (!priv->queues)
254                 mlx5_vdpa_virtqs_cleanup(priv);
255         mlx5_vdpa_mem_dereg(priv);
256 }
257
258 static bool
259 mlx5_vdpa_wait_dev_close_tasks_done(struct mlx5_vdpa_priv *priv)
260 {
261         uint32_t timeout = 0;
262
263         /* Check and wait all close tasks done. */
264         while (__atomic_load_n(&priv->dev_close_progress,
265                 __ATOMIC_RELAXED) != 0 && timeout < 1000) {
266                 rte_delay_us_sleep(10000);
267                 timeout++;
268         }
269         if (priv->dev_close_progress) {
270                 DRV_LOG(ERR,
271                 "Failed to wait close device tasks done vid %d.",
272                 priv->vid);
273                 return true;
274         }
275         return false;
276 }
277
278 static int
279 _internal_mlx5_vdpa_dev_close(struct mlx5_vdpa_priv *priv,
280                 bool release_resource)
281 {
282         int ret = 0;
283         int vid = priv->vid;
284
285         mlx5_vdpa_cqe_event_unset(priv);
286         if (priv->state == MLX5_VDPA_STATE_CONFIGURED) {
287                 ret |= mlx5_vdpa_lm_log(priv);
288                 priv->state = MLX5_VDPA_STATE_IN_PROGRESS;
289         }
290         if (priv->use_c_thread && !release_resource) {
291                 if (priv->last_c_thrd_idx >=
292                         (conf_thread_mng.max_thrds - 1))
293                         priv->last_c_thrd_idx = 0;
294                 else
295                         priv->last_c_thrd_idx++;
296                 __atomic_store_n(&priv->dev_close_progress,
297                         1, __ATOMIC_RELAXED);
298                 if (mlx5_vdpa_task_add(priv,
299                         priv->last_c_thrd_idx,
300                         MLX5_VDPA_TASK_DEV_CLOSE_NOWAIT,
301                         NULL, NULL, NULL, 1)) {
302                         DRV_LOG(ERR,
303                         "Fail to add dev close task. ");
304                         goto single_thrd;
305                 }
306                 priv->state = MLX5_VDPA_STATE_PROBED;
307                 DRV_LOG(INFO, "vDPA device %d was closed.", vid);
308                 return ret;
309         }
310 single_thrd:
311         pthread_mutex_lock(&priv->steer_update_lock);
312         mlx5_vdpa_steer_unset(priv);
313         pthread_mutex_unlock(&priv->steer_update_lock);
314         mlx5_vdpa_virtqs_release(priv, release_resource);
315         mlx5_vdpa_drain_cq(priv);
316         if (priv->lm_mr.addr)
317                 mlx5_os_wrapped_mkey_destroy(&priv->lm_mr);
318         if (!priv->connected)
319                 mlx5_vdpa_dev_cache_clean(priv);
320         priv->vid = 0;
321         __atomic_store_n(&priv->dev_close_progress, 0,
322                 __ATOMIC_RELAXED);
323         priv->state = MLX5_VDPA_STATE_PROBED;
324         DRV_LOG(INFO, "vDPA device %d was closed.", vid);
325         return ret;
326 }
327
328 static int
329 mlx5_vdpa_dev_close(int vid)
330 {
331         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
332         struct mlx5_vdpa_priv *priv;
333
334         if (!vdev) {
335                 DRV_LOG(ERR, "Invalid vDPA device.");
336                 return -1;
337         }
338         priv = mlx5_vdpa_find_priv_resource_by_vdev(vdev);
339         if (priv == NULL) {
340                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
341                 return -1;
342         }
343         return _internal_mlx5_vdpa_dev_close(priv, false);
344 }
345
346 static int
347 mlx5_vdpa_dev_config(int vid)
348 {
349         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
350         struct mlx5_vdpa_priv *priv =
351                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
352
353         if (priv == NULL) {
354                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
355                 return -EINVAL;
356         }
357         if (priv->state == MLX5_VDPA_STATE_CONFIGURED &&
358             mlx5_vdpa_dev_close(vid)) {
359                 DRV_LOG(ERR, "Failed to reconfigure vid %d.", vid);
360                 return -1;
361         }
362         if (mlx5_vdpa_wait_dev_close_tasks_done(priv))
363                 return -1;
364         priv->vid = vid;
365         priv->connected = true;
366         if (mlx5_vdpa_mtu_set(priv))
367                 DRV_LOG(WARNING, "MTU cannot be set on device %s.",
368                                 vdev->device->name);
369         if (mlx5_vdpa_mem_register(priv) ||
370             mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) ||
371             mlx5_vdpa_cqe_event_setup(priv)) {
372                 mlx5_vdpa_dev_close(vid);
373                 return -1;
374         }
375         priv->state = MLX5_VDPA_STATE_CONFIGURED;
376         DRV_LOG(INFO, "vDPA device %d was configured.", vid);
377         return 0;
378 }
379
380 static int
381 mlx5_vdpa_get_device_fd(int vid)
382 {
383         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
384         struct mlx5_vdpa_priv *priv =
385                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
386
387         if (priv == NULL) {
388                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
389                 return -EINVAL;
390         }
391         return ((struct ibv_context *)priv->cdev->ctx)->cmd_fd;
392 }
393
394 static int
395 mlx5_vdpa_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size)
396 {
397         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
398         struct mlx5_vdpa_priv *priv =
399                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
400
401         RTE_SET_USED(qid);
402         if (priv == NULL) {
403                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
404                 return -EINVAL;
405         }
406         if (!priv->var) {
407                 DRV_LOG(ERR, "VAR was not created for device %s, is the device"
408                         " configured?.", vdev->device->name);
409                 return -EINVAL;
410         }
411         *offset = priv->var->mmap_off;
412         *size = priv->var->length;
413         return 0;
414 }
415
416 static int
417 mlx5_vdpa_get_stats_names(struct rte_vdpa_device *vdev,
418                 struct rte_vdpa_stat_name *stats_names,
419                 unsigned int size)
420 {
421         static const char *mlx5_vdpa_stats_names[MLX5_VDPA_STATS_MAX] = {
422                 "received_descriptors",
423                 "completed_descriptors",
424                 "bad descriptor errors",
425                 "exceed max chain",
426                 "invalid buffer",
427                 "completion errors",
428         };
429         struct mlx5_vdpa_priv *priv =
430                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
431         unsigned int i;
432
433         if (priv == NULL) {
434                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
435                 return -ENODEV;
436         }
437         if (!stats_names)
438                 return MLX5_VDPA_STATS_MAX;
439         size = RTE_MIN(size, (unsigned int)MLX5_VDPA_STATS_MAX);
440         for (i = 0; i < size; ++i)
441                 strlcpy(stats_names[i].name, mlx5_vdpa_stats_names[i],
442                         RTE_VDPA_STATS_NAME_SIZE);
443         return size;
444 }
445
446 static int
447 mlx5_vdpa_get_stats(struct rte_vdpa_device *vdev, int qid,
448                 struct rte_vdpa_stat *stats, unsigned int n)
449 {
450         struct mlx5_vdpa_priv *priv =
451                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
452
453         if (priv == NULL) {
454                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
455                 return -ENODEV;
456         }
457         if (qid >= (int)priv->caps.max_num_virtio_queues) {
458                 DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
459                                 vdev->device->name);
460                 return -E2BIG;
461         }
462         if (!priv->caps.queue_counters_valid) {
463                 DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
464                         vdev->device->name);
465                 return -ENOTSUP;
466         }
467         return mlx5_vdpa_virtq_stats_get(priv, qid, stats, n);
468 }
469
470 static int
471 mlx5_vdpa_reset_stats(struct rte_vdpa_device *vdev, int qid)
472 {
473         struct mlx5_vdpa_priv *priv =
474                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
475
476         if (priv == NULL) {
477                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
478                 return -ENODEV;
479         }
480         if (qid >= (int)priv->caps.max_num_virtio_queues) {
481                 DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
482                                 vdev->device->name);
483                 return -E2BIG;
484         }
485         if (!priv->caps.queue_counters_valid) {
486                 DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
487                         vdev->device->name);
488                 return -ENOTSUP;
489         }
490         return mlx5_vdpa_virtq_stats_reset(priv, qid);
491 }
492
493 static int
494 mlx5_vdpa_dev_cleanup(int vid)
495 {
496         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
497         struct mlx5_vdpa_priv *priv;
498
499         if (vdev == NULL)
500                 return -1;
501         priv = mlx5_vdpa_find_priv_resource_by_vdev(vdev);
502         if (priv == NULL) {
503                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
504                 return -1;
505         }
506         if (priv->state == MLX5_VDPA_STATE_PROBED) {
507                 if (priv->use_c_thread)
508                         mlx5_vdpa_wait_dev_close_tasks_done(priv);
509                 mlx5_vdpa_dev_cache_clean(priv);
510         }
511         priv->connected = false;
512         return 0;
513 }
514
515 static struct rte_vdpa_dev_ops mlx5_vdpa_ops = {
516         .get_queue_num = mlx5_vdpa_get_queue_num,
517         .get_features = mlx5_vdpa_get_vdpa_features,
518         .get_protocol_features = mlx5_vdpa_get_protocol_features,
519         .dev_conf = mlx5_vdpa_dev_config,
520         .dev_close = mlx5_vdpa_dev_close,
521         .dev_cleanup = mlx5_vdpa_dev_cleanup,
522         .set_vring_state = mlx5_vdpa_set_vring_state,
523         .set_features = mlx5_vdpa_features_set,
524         .migration_done = NULL,
525         .get_vfio_group_fd = NULL,
526         .get_vfio_device_fd = mlx5_vdpa_get_device_fd,
527         .get_notify_area = mlx5_vdpa_get_notify_area,
528         .get_stats_names = mlx5_vdpa_get_stats_names,
529         .get_stats = mlx5_vdpa_get_stats,
530         .reset_stats = mlx5_vdpa_reset_stats,
531 };
532
533 static int
534 mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
535 {
536         struct mlx5_vdpa_priv *priv = opaque;
537         unsigned long tmp;
538         int n_cores = sysconf(_SC_NPROCESSORS_ONLN);
539
540         errno = 0;
541         tmp = strtoul(val, NULL, 0);
542         if (errno) {
543                 DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val);
544                 return -errno;
545         }
546         if (strcmp(key, "event_mode") == 0) {
547                 if (tmp <= MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT)
548                         priv->event_mode = (int)tmp;
549                 else
550                         DRV_LOG(WARNING, "Invalid event_mode %s.", val);
551         } else if (strcmp(key, "event_us") == 0) {
552                 priv->event_us = (uint32_t)tmp;
553         } else if (strcmp(key, "no_traffic_time") == 0) {
554                 priv->no_traffic_max = (uint32_t)tmp;
555         } else if (strcmp(key, "event_core") == 0) {
556                 if (tmp >= (unsigned long)n_cores)
557                         DRV_LOG(WARNING, "Invalid event_core %s.", val);
558                 else
559                         priv->event_core = tmp;
560         } else if (strcmp(key, "max_conf_threads") == 0) {
561                 if (tmp) {
562                         priv->use_c_thread = true;
563                         if (!conf_thread_mng.initializer_priv) {
564                                 conf_thread_mng.initializer_priv = priv;
565                                 if (tmp > MLX5_VDPA_MAX_C_THRD) {
566                                         DRV_LOG(WARNING,
567                                 "Invalid max_conf_threads %s "
568                                 "and set max_conf_threads to %d",
569                                 val, MLX5_VDPA_MAX_C_THRD);
570                                         tmp = MLX5_VDPA_MAX_C_THRD;
571                                 }
572                                 conf_thread_mng.max_thrds = tmp;
573                         } else if (tmp != conf_thread_mng.max_thrds) {
574                                 DRV_LOG(WARNING,
575         "max_conf_threads is PMD argument and not per device, "
576         "only the first device configuration set it, current value is %d "
577         "and will not be changed to %d.",
578                                 conf_thread_mng.max_thrds, (int)tmp);
579                         }
580                 } else {
581                         priv->use_c_thread = false;
582                 }
583         } else if (strcmp(key, "hw_latency_mode") == 0) {
584                 priv->hw_latency_mode = (uint32_t)tmp;
585         } else if (strcmp(key, "hw_max_latency_us") == 0) {
586                 priv->hw_max_latency_us = (uint32_t)tmp;
587         } else if (strcmp(key, "hw_max_pending_comp") == 0) {
588                 priv->hw_max_pending_comp = (uint32_t)tmp;
589         } else if (strcmp(key, "queue_size") == 0) {
590                 priv->queue_size = (uint16_t)tmp;
591         } else if (strcmp(key, "queues") == 0) {
592                 priv->queues = (uint16_t)tmp;
593         } else {
594                 DRV_LOG(WARNING, "Invalid key %s.", key);
595         }
596         return 0;
597 }
598
599 static void
600 mlx5_vdpa_config_get(struct mlx5_kvargs_ctrl *mkvlist,
601                      struct mlx5_vdpa_priv *priv)
602 {
603         const char **params = (const char *[]){
604                 "event_core",
605                 "event_mode",
606                 "event_us",
607                 "hw_latency_mode",
608                 "hw_max_latency_us",
609                 "hw_max_pending_comp",
610                 "no_traffic_time",
611                 "queue_size",
612                 "queues",
613                 "max_conf_threads",
614                 NULL,
615         };
616
617         priv->event_mode = MLX5_VDPA_EVENT_MODE_FIXED_TIMER;
618         priv->event_us = 0;
619         priv->event_core = -1;
620         priv->no_traffic_max = MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX;
621         if (mkvlist == NULL)
622                 return;
623         mlx5_kvargs_process(mkvlist, params, mlx5_vdpa_args_check_handler,
624                             priv);
625         if (!priv->event_us &&
626             priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER)
627                 priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
628         if ((priv->queue_size && !priv->queues) ||
629                 (!priv->queue_size && priv->queues)) {
630                 priv->queue_size = 0;
631                 priv->queues = 0;
632                 DRV_LOG(WARNING, "Please provide both queue_size and queues.");
633         }
634         DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
635         DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
636         DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max);
637         DRV_LOG(DEBUG, "queues is %u, queue_size is %u.", priv->queues,
638                 priv->queue_size);
639 }
640
641 void
642 mlx5_vdpa_prepare_virtq_destroy(struct mlx5_vdpa_priv *priv)
643 {
644         uint32_t max_queues, index;
645         struct mlx5_vdpa_virtq *virtq;
646
647         if (!priv->queues || !priv->queue_size)
648                 return;
649         max_queues = ((priv->queues * 2) < priv->caps.max_num_virtio_queues) ?
650                 (priv->queues * 2) : (priv->caps.max_num_virtio_queues);
651         if (mlx5_vdpa_is_modify_virtq_supported(priv))
652                 mlx5_vdpa_steer_unset(priv);
653         for (index = 0; index < max_queues; ++index) {
654                 virtq = &priv->virtqs[index];
655                 if (virtq->virtq) {
656                         pthread_mutex_lock(&virtq->virtq_lock);
657                         mlx5_vdpa_virtq_unset(virtq);
658                         pthread_mutex_unlock(&virtq->virtq_lock);
659                 }
660         }
661 }
662
663 static int
664 mlx5_vdpa_virtq_resource_prepare(struct mlx5_vdpa_priv *priv)
665 {
666         uint32_t remaining_cnt = 0, err_cnt = 0, task_num = 0;
667         uint32_t max_queues, index, thrd_idx, data[1];
668         struct mlx5_vdpa_virtq *virtq;
669
670         for (index = 0; index < priv->caps.max_num_virtio_queues;
671                 index++) {
672                 virtq = &priv->virtqs[index];
673                 pthread_mutex_init(&virtq->virtq_lock, NULL);
674         }
675         if (!priv->queues || !priv->queue_size)
676                 return 0;
677         max_queues = (priv->queues < priv->caps.max_num_virtio_queues) ?
678                 (priv->queues * 2) : (priv->caps.max_num_virtio_queues);
679         if (priv->use_c_thread) {
680                 uint32_t main_task_idx[max_queues];
681
682                 for (index = 0; index < max_queues; ++index) {
683                         thrd_idx = index % (conf_thread_mng.max_thrds + 1);
684                         if (!thrd_idx) {
685                                 main_task_idx[task_num] = index;
686                                 task_num++;
687                                 continue;
688                         }
689                         thrd_idx = priv->last_c_thrd_idx + 1;
690                         if (thrd_idx >= conf_thread_mng.max_thrds)
691                                 thrd_idx = 0;
692                         priv->last_c_thrd_idx = thrd_idx;
693                         data[0] = index;
694                         if (mlx5_vdpa_task_add(priv, thrd_idx,
695                                 MLX5_VDPA_TASK_PREPARE_VIRTQ,
696                                 &remaining_cnt, &err_cnt,
697                                 (void **)&data, 1)) {
698                                 DRV_LOG(ERR, "Fail to add "
699                                 "task prepare virtq (%d).", index);
700                                 main_task_idx[task_num] = index;
701                                 task_num++;
702                         }
703                 }
704                 for (index = 0; index < task_num; ++index)
705                         if (mlx5_vdpa_virtq_single_resource_prepare(priv,
706                                 main_task_idx[index]))
707                                 goto error;
708                 if (mlx5_vdpa_c_thread_wait_bulk_tasks_done(&remaining_cnt,
709                         &err_cnt, 2000)) {
710                         DRV_LOG(ERR,
711                         "Failed to wait virt-queue prepare tasks ready.");
712                         goto error;
713                 }
714         } else {
715                 for (index = 0; index < max_queues; ++index)
716                         if (mlx5_vdpa_virtq_single_resource_prepare(priv,
717                                 index))
718                                 goto error;
719         }
720         if (mlx5_vdpa_is_modify_virtq_supported(priv))
721                 if (mlx5_vdpa_steer_update(priv, true))
722                         goto error;
723         return 0;
724 error:
725         mlx5_vdpa_prepare_virtq_destroy(priv);
726         return -1;
727 }
728
729 static int
730 mlx5_vdpa_create_dev_resources(struct mlx5_vdpa_priv *priv)
731 {
732         struct mlx5_devx_tis_attr tis_attr = {0};
733         struct ibv_context *ctx = priv->cdev->ctx;
734         uint32_t i;
735         int retry;
736
737         for (retry = 0; retry < 7; retry++) {
738                 priv->var = mlx5_glue->dv_alloc_var(ctx, 0);
739                 if (priv->var != NULL)
740                         break;
741                 DRV_LOG(WARNING, "Failed to allocate VAR, retry %d.", retry);
742                 /* Wait Qemu release VAR during vdpa restart, 0.1 sec based. */
743                 usleep(100000U << retry);
744         }
745         if (!priv->var) {
746                 DRV_LOG(ERR, "Failed to allocate VAR %u.", errno);
747                 rte_errno = ENOMEM;
748                 return -rte_errno;
749         }
750         /* Always map the entire page. */
751         priv->virtq_db_addr = mmap(NULL, priv->var->length, PROT_READ |
752                                    PROT_WRITE, MAP_SHARED, ctx->cmd_fd,
753                                    priv->var->mmap_off);
754         if (priv->virtq_db_addr == MAP_FAILED) {
755                 DRV_LOG(ERR, "Failed to map doorbell page %u.", errno);
756                 priv->virtq_db_addr = NULL;
757                 rte_errno = errno;
758                 return -rte_errno;
759         }
760         /* Add within page offset for 64K page system. */
761         priv->virtq_db_addr = (char *)priv->virtq_db_addr +
762                 ((rte_mem_page_size() - 1) & priv->caps.doorbell_bar_offset);
763         DRV_LOG(DEBUG, "VAR address of doorbell mapping is %p.",
764                 priv->virtq_db_addr);
765         priv->td = mlx5_devx_cmd_create_td(ctx);
766         if (!priv->td) {
767                 DRV_LOG(ERR, "Failed to create transport domain.");
768                 rte_errno = errno;
769                 return -rte_errno;
770         }
771         tis_attr.transport_domain = priv->td->id;
772         for (i = 0; i < priv->num_lag_ports; i++) {
773                 /* 0 is auto affinity, non-zero value to propose port. */
774                 tis_attr.lag_tx_port_affinity = i + 1;
775                 priv->tiss[i] = mlx5_devx_cmd_create_tis(ctx, &tis_attr);
776                 if (!priv->tiss[i]) {
777                         DRV_LOG(ERR, "Failed to create TIS %u.", i);
778                         return -rte_errno;
779                 }
780         }
781         priv->null_mr = mlx5_glue->alloc_null_mr(priv->cdev->pd);
782         if (!priv->null_mr) {
783                 DRV_LOG(ERR, "Failed to allocate null MR.");
784                 rte_errno = errno;
785                 return -rte_errno;
786         }
787         DRV_LOG(DEBUG, "Dump fill Mkey = %u.", priv->null_mr->lkey);
788 #ifdef HAVE_MLX5DV_DR
789         priv->steer.domain = mlx5_glue->dr_create_domain(ctx,
790                                         MLX5DV_DR_DOMAIN_TYPE_NIC_RX);
791         if (!priv->steer.domain) {
792                 DRV_LOG(ERR, "Failed to create Rx domain.");
793                 rte_errno = errno;
794                 return -rte_errno;
795         }
796 #endif
797         priv->steer.tbl = mlx5_glue->dr_create_flow_tbl(priv->steer.domain, 0);
798         if (!priv->steer.tbl) {
799                 DRV_LOG(ERR, "Failed to create table 0 with Rx domain.");
800                 rte_errno = errno;
801                 return -rte_errno;
802         }
803         if (mlx5_vdpa_err_event_setup(priv) != 0)
804                 return -rte_errno;
805         if (mlx5_vdpa_event_qp_global_prepare(priv))
806                 return -rte_errno;
807         if (mlx5_vdpa_virtq_resource_prepare(priv))
808                 return -rte_errno;
809         return 0;
810 }
811
812 static int
813 mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev,
814                     struct mlx5_kvargs_ctrl *mkvlist)
815 {
816         struct mlx5_vdpa_priv *priv = NULL;
817         struct mlx5_hca_attr *attr = &cdev->config.hca_attr;
818
819         if (!attr->vdpa.valid || !attr->vdpa.max_num_virtio_queues) {
820                 DRV_LOG(ERR, "Not enough capabilities to support vdpa, maybe "
821                         "old FW/OFED version?");
822                 rte_errno = ENOTSUP;
823                 return -rte_errno;
824         }
825         if (!attr->vdpa.queue_counters_valid)
826                 DRV_LOG(DEBUG, "No capability to support virtq statistics.");
827         priv = rte_zmalloc("mlx5 vDPA device private", sizeof(*priv) +
828                            sizeof(struct mlx5_vdpa_virtq) *
829                            attr->vdpa.max_num_virtio_queues,
830                            RTE_CACHE_LINE_SIZE);
831         if (!priv) {
832                 DRV_LOG(ERR, "Failed to allocate private memory.");
833                 rte_errno = ENOMEM;
834                 return -rte_errno;
835         }
836         priv->caps = attr->vdpa;
837         priv->log_max_rqt_size = attr->log_max_rqt_size;
838         priv->num_lag_ports = attr->num_lag_ports;
839         if (attr->num_lag_ports == 0)
840                 priv->num_lag_ports = 1;
841         rte_spinlock_init(&priv->db_lock);
842         pthread_mutex_init(&priv->steer_update_lock, NULL);
843         priv->cdev = cdev;
844         mlx5_vdpa_config_get(mkvlist, priv);
845         if (priv->use_c_thread) {
846                 if (conf_thread_mng.initializer_priv == priv)
847                         if (mlx5_vdpa_mult_threads_create(priv->event_core))
848                                 goto error;
849                 __atomic_fetch_add(&conf_thread_mng.refcnt, 1,
850                         __ATOMIC_RELAXED);
851         }
852         if (mlx5_vdpa_create_dev_resources(priv))
853                 goto error;
854         priv->vdev = rte_vdpa_register_device(cdev->dev, &mlx5_vdpa_ops);
855         if (priv->vdev == NULL) {
856                 DRV_LOG(ERR, "Failed to register vDPA device.");
857                 rte_errno = rte_errno ? rte_errno : EINVAL;
858                 goto error;
859         }
860         pthread_mutex_lock(&priv_list_lock);
861         TAILQ_INSERT_TAIL(&priv_list, priv, next);
862         pthread_mutex_unlock(&priv_list_lock);
863         return 0;
864 error:
865         if (conf_thread_mng.initializer_priv == priv)
866                 mlx5_vdpa_mult_threads_destroy(false);
867         if (priv)
868                 mlx5_vdpa_dev_release(priv);
869         return -rte_errno;
870 }
871
872 static int
873 mlx5_vdpa_dev_remove(struct mlx5_common_device *cdev)
874 {
875         struct mlx5_vdpa_priv *priv = NULL;
876         int found = 0;
877
878         pthread_mutex_lock(&priv_list_lock);
879         TAILQ_FOREACH(priv, &priv_list, next) {
880                 if (priv->vdev->device == cdev->dev) {
881                         found = 1;
882                         break;
883                 }
884         }
885         if (found)
886                 TAILQ_REMOVE(&priv_list, priv, next);
887         pthread_mutex_unlock(&priv_list_lock);
888         if (found)
889                 mlx5_vdpa_dev_release(priv);
890         return 0;
891 }
892
893 static void
894 mlx5_vdpa_release_dev_resources(struct mlx5_vdpa_priv *priv)
895 {
896         uint32_t i;
897
898         if (priv->queues)
899                 mlx5_vdpa_virtqs_cleanup(priv);
900         mlx5_vdpa_dev_cache_clean(priv);
901         for (i = 0; i < priv->caps.max_num_virtio_queues; i++) {
902                 if (!priv->virtqs[i].counters)
903                         continue;
904                 claim_zero(mlx5_devx_cmd_destroy(priv->virtqs[i].counters));
905         }
906         mlx5_vdpa_event_qp_global_release(priv);
907         mlx5_vdpa_err_event_unset(priv);
908         if (priv->steer.tbl)
909                 claim_zero(mlx5_glue->dr_destroy_flow_tbl(priv->steer.tbl));
910         if (priv->steer.domain)
911                 claim_zero(mlx5_glue->dr_destroy_domain(priv->steer.domain));
912         if (priv->null_mr)
913                 claim_zero(mlx5_glue->dereg_mr(priv->null_mr));
914         for (i = 0; i < priv->num_lag_ports; i++) {
915                 if (priv->tiss[i])
916                         claim_zero(mlx5_devx_cmd_destroy(priv->tiss[i]));
917         }
918         if (priv->td)
919                 claim_zero(mlx5_devx_cmd_destroy(priv->td));
920         if (priv->virtq_db_addr)
921                 /* Mask out the within page offset for munmap. */
922                 claim_zero(munmap((void *)((uintptr_t)priv->virtq_db_addr &
923                         ~(rte_mem_page_size() - 1)), priv->var->length));
924         if (priv->var)
925                 mlx5_glue->dv_free_var(priv->var);
926 }
927
928 static void
929 mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv)
930 {
931         if (priv->state == MLX5_VDPA_STATE_CONFIGURED)
932                 _internal_mlx5_vdpa_dev_close(priv, true);
933         if (priv->use_c_thread)
934                 mlx5_vdpa_wait_dev_close_tasks_done(priv);
935         mlx5_vdpa_release_dev_resources(priv);
936         if (priv->vdev)
937                 rte_vdpa_unregister_device(priv->vdev);
938         if (priv->use_c_thread)
939                 if (__atomic_fetch_sub(&conf_thread_mng.refcnt,
940                         1, __ATOMIC_RELAXED) == 1)
941                         mlx5_vdpa_mult_threads_destroy(true);
942         rte_free(priv);
943 }
944
945 static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = {
946         {
947                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
948                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6)
949         },
950         {
951                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
952                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF)
953         },
954         {
955                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
956                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX)
957         },
958         {
959                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
960                                 PCI_DEVICE_ID_MELLANOX_CONNECTXVF)
961         },
962         {
963                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
964                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF)
965         },
966         {
967                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
968                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6LX)
969         },
970         {
971                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
972                                 PCI_DEVICE_ID_MELLANOX_CONNECTX7)
973         },
974         {
975                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
976                                 PCI_DEVICE_ID_MELLANOX_CONNECTX7BF)
977         },
978         {
979                 .vendor_id = 0
980         }
981 };
982
983 static struct mlx5_class_driver mlx5_vdpa_driver = {
984         .drv_class = MLX5_CLASS_VDPA,
985         .name = RTE_STR(MLX5_VDPA_DRIVER_NAME),
986         .id_table = mlx5_vdpa_pci_id_map,
987         .probe = mlx5_vdpa_dev_probe,
988         .remove = mlx5_vdpa_dev_remove,
989 };
990
991 RTE_LOG_REGISTER_DEFAULT(mlx5_vdpa_logtype, NOTICE)
992
993 /**
994  * Driver initialization routine.
995  */
996 RTE_INIT(rte_mlx5_vdpa_init)
997 {
998         mlx5_common_init();
999         if (mlx5_glue)
1000                 mlx5_class_driver_register(&mlx5_vdpa_driver);
1001 }
1002
1003 RTE_PMD_EXPORT_NAME(MLX5_VDPA_DRIVER_NAME, __COUNTER__);
1004 RTE_PMD_REGISTER_PCI_TABLE(MLX5_VDPA_DRIVER_NAME, mlx5_vdpa_pci_id_map);
1005 RTE_PMD_REGISTER_KMOD_DEP(MLX5_VDPA_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib");