vdpa/mlx5: add task ring for multi-thread management
[dpdk.git] / drivers / vdpa / mlx5 / mlx5_vdpa.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 #include <unistd.h>
5 #include <net/if.h>
6 #include <sys/socket.h>
7 #include <sys/ioctl.h>
8 #include <sys/mman.h>
9 #include <fcntl.h>
10 #include <netinet/in.h>
11
12 #include <rte_malloc.h>
13 #include <rte_log.h>
14 #include <rte_errno.h>
15 #include <rte_string_fns.h>
16 #include <rte_bus_pci.h>
17 #include <rte_eal_paging.h>
18
19 #include <mlx5_glue.h>
20 #include <mlx5_common.h>
21 #include <mlx5_common_defs.h>
22 #include <mlx5_devx_cmds.h>
23 #include <mlx5_prm.h>
24 #include <mlx5_nl.h>
25
26 #include "mlx5_vdpa_utils.h"
27 #include "mlx5_vdpa.h"
28
29 #define MLX5_VDPA_DRIVER_NAME vdpa_mlx5
30
31 #define MLX5_VDPA_DEFAULT_FEATURES ((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
32                             (1ULL << VIRTIO_F_ANY_LAYOUT) | \
33                             (1ULL << VIRTIO_NET_F_MQ) | \
34                             (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
35                             (1ULL << VIRTIO_F_ORDER_PLATFORM) | \
36                             (1ULL << VHOST_F_LOG_ALL) | \
37                             (1ULL << VIRTIO_NET_F_MTU))
38
39 #define MLX5_VDPA_PROTOCOL_FEATURES \
40                             ((1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
41                              (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
42                              (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
43                              (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
44                              (1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
45                              (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
46                              (1ULL << VHOST_USER_PROTOCOL_F_STATUS))
47
48 #define MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX 16LLU
49
50 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
51                                               TAILQ_HEAD_INITIALIZER(priv_list);
52 static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER;
53
54 struct mlx5_vdpa_conf_thread_mng conf_thread_mng;
55
56 static void mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv);
57
58 static struct mlx5_vdpa_priv *
59 mlx5_vdpa_find_priv_resource_by_vdev(struct rte_vdpa_device *vdev)
60 {
61         struct mlx5_vdpa_priv *priv;
62         int found = 0;
63
64         pthread_mutex_lock(&priv_list_lock);
65         TAILQ_FOREACH(priv, &priv_list, next) {
66                 if (vdev == priv->vdev) {
67                         found = 1;
68                         break;
69                 }
70         }
71         pthread_mutex_unlock(&priv_list_lock);
72         if (!found) {
73                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
74                 rte_errno = EINVAL;
75                 return NULL;
76         }
77         return priv;
78 }
79
80 static int
81 mlx5_vdpa_get_queue_num(struct rte_vdpa_device *vdev, uint32_t *queue_num)
82 {
83         struct mlx5_vdpa_priv *priv =
84                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
85
86         if (priv == NULL) {
87                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
88                 return -1;
89         }
90         *queue_num = priv->caps.max_num_virtio_queues / 2;
91         return 0;
92 }
93
94 static int
95 mlx5_vdpa_get_vdpa_features(struct rte_vdpa_device *vdev, uint64_t *features)
96 {
97         struct mlx5_vdpa_priv *priv =
98                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
99
100         if (priv == NULL) {
101                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
102                 return -1;
103         }
104         *features = MLX5_VDPA_DEFAULT_FEATURES;
105         if (priv->caps.virtio_queue_type & (1 << MLX5_VIRTQ_TYPE_PACKED))
106                 *features |= (1ULL << VIRTIO_F_RING_PACKED);
107         if (priv->caps.tso_ipv4)
108                 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO4);
109         if (priv->caps.tso_ipv6)
110                 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO6);
111         if (priv->caps.tx_csum)
112                 *features |= (1ULL << VIRTIO_NET_F_CSUM);
113         if (priv->caps.rx_csum)
114                 *features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
115         if (priv->caps.virtio_version_1_0)
116                 *features |= (1ULL << VIRTIO_F_VERSION_1);
117         return 0;
118 }
119
120 static int
121 mlx5_vdpa_get_protocol_features(struct rte_vdpa_device *vdev,
122                 uint64_t *features)
123 {
124         struct mlx5_vdpa_priv *priv =
125                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
126
127         if (priv == NULL) {
128                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
129                 return -1;
130         }
131         *features = MLX5_VDPA_PROTOCOL_FEATURES;
132         return 0;
133 }
134
135 static int
136 mlx5_vdpa_set_vring_state(int vid, int vring, int state)
137 {
138         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
139         struct mlx5_vdpa_priv *priv =
140                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
141         struct mlx5_vdpa_virtq *virtq;
142         int ret;
143
144         if (priv == NULL) {
145                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
146                 return -EINVAL;
147         }
148         if (vring >= (int)priv->caps.max_num_virtio_queues) {
149                 DRV_LOG(ERR, "Too big vring id: %d.", vring);
150                 return -E2BIG;
151         }
152         virtq = &priv->virtqs[vring];
153         pthread_mutex_lock(&virtq->virtq_lock);
154         ret = mlx5_vdpa_virtq_enable(priv, vring, state);
155         pthread_mutex_unlock(&virtq->virtq_lock);
156         return ret;
157 }
158
159 static int
160 mlx5_vdpa_features_set(int vid)
161 {
162         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
163         struct mlx5_vdpa_priv *priv =
164                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
165         uint64_t log_base, log_size;
166         uint64_t features;
167         int ret;
168
169         if (priv == NULL) {
170                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
171                 return -EINVAL;
172         }
173         ret = rte_vhost_get_negotiated_features(vid, &features);
174         if (ret) {
175                 DRV_LOG(ERR, "Failed to get negotiated features.");
176                 return ret;
177         }
178         if (RTE_VHOST_NEED_LOG(features)) {
179                 ret = rte_vhost_get_log_base(vid, &log_base, &log_size);
180                 if (ret) {
181                         DRV_LOG(ERR, "Failed to get log base.");
182                         return ret;
183                 }
184                 ret = mlx5_vdpa_dirty_bitmap_set(priv, log_base, log_size);
185                 if (ret) {
186                         DRV_LOG(ERR, "Failed to set dirty bitmap.");
187                         return ret;
188                 }
189                 DRV_LOG(INFO, "mlx5 vdpa: enabling dirty logging...");
190                 ret = mlx5_vdpa_logging_enable(priv, 1);
191                 if (ret) {
192                         DRV_LOG(ERR, "Failed t enable dirty logging.");
193                         return ret;
194                 }
195         }
196         return 0;
197 }
198
199 static int
200 mlx5_vdpa_mtu_set(struct mlx5_vdpa_priv *priv)
201 {
202         struct ifreq request;
203         uint16_t vhost_mtu = 0;
204         uint16_t kern_mtu = 0;
205         int ret = rte_vhost_get_mtu(priv->vid, &vhost_mtu);
206         int sock;
207         int retries = MLX5_VDPA_MAX_RETRIES;
208
209         if (ret) {
210                 DRV_LOG(DEBUG, "Cannot get vhost MTU - %d.", ret);
211                 return ret;
212         }
213         if (!vhost_mtu) {
214                 DRV_LOG(DEBUG, "Vhost MTU is 0.");
215                 return ret;
216         }
217         ret = mlx5_get_ifname_sysfs
218                                 (mlx5_os_get_ctx_device_name(priv->cdev->ctx),
219                                  request.ifr_name);
220         if (ret) {
221                 DRV_LOG(DEBUG, "Cannot get kernel IF name - %d.", ret);
222                 return ret;
223         }
224         sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
225         if (sock == -1) {
226                 DRV_LOG(DEBUG, "Cannot open IF socket.");
227                 return sock;
228         }
229         while (retries--) {
230                 ret = ioctl(sock, SIOCGIFMTU, &request);
231                 if (ret == -1)
232                         break;
233                 kern_mtu = request.ifr_mtu;
234                 DRV_LOG(DEBUG, "MTU: current %d requested %d.", (int)kern_mtu,
235                         (int)vhost_mtu);
236                 if (kern_mtu == vhost_mtu)
237                         break;
238                 request.ifr_mtu = vhost_mtu;
239                 ret = ioctl(sock, SIOCSIFMTU, &request);
240                 if (ret == -1)
241                         break;
242                 request.ifr_mtu = 0;
243                 usleep(MLX5_VDPA_USEC);
244         }
245         close(sock);
246         return kern_mtu == vhost_mtu ? 0 : -1;
247 }
248
249 static void
250 mlx5_vdpa_dev_cache_clean(struct mlx5_vdpa_priv *priv)
251 {
252         /* Clean pre-created resource in dev removal only. */
253         if (!priv->queues)
254                 mlx5_vdpa_virtqs_cleanup(priv);
255         mlx5_vdpa_mem_dereg(priv);
256 }
257
258 static int
259 mlx5_vdpa_dev_close(int vid)
260 {
261         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
262         struct mlx5_vdpa_priv *priv =
263                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
264         int ret = 0;
265
266         if (priv == NULL) {
267                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
268                 return -1;
269         }
270         mlx5_vdpa_cqe_event_unset(priv);
271         if (priv->state == MLX5_VDPA_STATE_CONFIGURED) {
272                 ret |= mlx5_vdpa_lm_log(priv);
273                 priv->state = MLX5_VDPA_STATE_IN_PROGRESS;
274         }
275         pthread_mutex_lock(&priv->steer_update_lock);
276         mlx5_vdpa_steer_unset(priv);
277         pthread_mutex_unlock(&priv->steer_update_lock);
278         mlx5_vdpa_virtqs_release(priv);
279         mlx5_vdpa_drain_cq(priv);
280         if (priv->lm_mr.addr)
281                 mlx5_os_wrapped_mkey_destroy(&priv->lm_mr);
282         priv->state = MLX5_VDPA_STATE_PROBED;
283         if (!priv->connected)
284                 mlx5_vdpa_dev_cache_clean(priv);
285         priv->vid = 0;
286         DRV_LOG(INFO, "vDPA device %d was closed.", vid);
287         return ret;
288 }
289
290 static int
291 mlx5_vdpa_dev_config(int vid)
292 {
293         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
294         struct mlx5_vdpa_priv *priv =
295                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
296
297         if (priv == NULL) {
298                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
299                 return -EINVAL;
300         }
301         if (priv->state == MLX5_VDPA_STATE_CONFIGURED &&
302             mlx5_vdpa_dev_close(vid)) {
303                 DRV_LOG(ERR, "Failed to reconfigure vid %d.", vid);
304                 return -1;
305         }
306         priv->vid = vid;
307         priv->connected = true;
308         if (mlx5_vdpa_mtu_set(priv))
309                 DRV_LOG(WARNING, "MTU cannot be set on device %s.",
310                                 vdev->device->name);
311         if (mlx5_vdpa_mem_register(priv) ||
312             mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) ||
313             mlx5_vdpa_cqe_event_setup(priv)) {
314                 mlx5_vdpa_dev_close(vid);
315                 return -1;
316         }
317         priv->state = MLX5_VDPA_STATE_CONFIGURED;
318         DRV_LOG(INFO, "vDPA device %d was configured.", vid);
319         return 0;
320 }
321
322 static int
323 mlx5_vdpa_get_device_fd(int vid)
324 {
325         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
326         struct mlx5_vdpa_priv *priv =
327                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
328
329         if (priv == NULL) {
330                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
331                 return -EINVAL;
332         }
333         return ((struct ibv_context *)priv->cdev->ctx)->cmd_fd;
334 }
335
336 static int
337 mlx5_vdpa_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size)
338 {
339         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
340         struct mlx5_vdpa_priv *priv =
341                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
342
343         RTE_SET_USED(qid);
344         if (priv == NULL) {
345                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
346                 return -EINVAL;
347         }
348         if (!priv->var) {
349                 DRV_LOG(ERR, "VAR was not created for device %s, is the device"
350                         " configured?.", vdev->device->name);
351                 return -EINVAL;
352         }
353         *offset = priv->var->mmap_off;
354         *size = priv->var->length;
355         return 0;
356 }
357
358 static int
359 mlx5_vdpa_get_stats_names(struct rte_vdpa_device *vdev,
360                 struct rte_vdpa_stat_name *stats_names,
361                 unsigned int size)
362 {
363         static const char *mlx5_vdpa_stats_names[MLX5_VDPA_STATS_MAX] = {
364                 "received_descriptors",
365                 "completed_descriptors",
366                 "bad descriptor errors",
367                 "exceed max chain",
368                 "invalid buffer",
369                 "completion errors",
370         };
371         struct mlx5_vdpa_priv *priv =
372                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
373         unsigned int i;
374
375         if (priv == NULL) {
376                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
377                 return -ENODEV;
378         }
379         if (!stats_names)
380                 return MLX5_VDPA_STATS_MAX;
381         size = RTE_MIN(size, (unsigned int)MLX5_VDPA_STATS_MAX);
382         for (i = 0; i < size; ++i)
383                 strlcpy(stats_names[i].name, mlx5_vdpa_stats_names[i],
384                         RTE_VDPA_STATS_NAME_SIZE);
385         return size;
386 }
387
388 static int
389 mlx5_vdpa_get_stats(struct rte_vdpa_device *vdev, int qid,
390                 struct rte_vdpa_stat *stats, unsigned int n)
391 {
392         struct mlx5_vdpa_priv *priv =
393                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
394
395         if (priv == NULL) {
396                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
397                 return -ENODEV;
398         }
399         if (qid >= (int)priv->caps.max_num_virtio_queues) {
400                 DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
401                                 vdev->device->name);
402                 return -E2BIG;
403         }
404         if (!priv->caps.queue_counters_valid) {
405                 DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
406                         vdev->device->name);
407                 return -ENOTSUP;
408         }
409         return mlx5_vdpa_virtq_stats_get(priv, qid, stats, n);
410 }
411
412 static int
413 mlx5_vdpa_reset_stats(struct rte_vdpa_device *vdev, int qid)
414 {
415         struct mlx5_vdpa_priv *priv =
416                 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
417
418         if (priv == NULL) {
419                 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
420                 return -ENODEV;
421         }
422         if (qid >= (int)priv->caps.max_num_virtio_queues) {
423                 DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
424                                 vdev->device->name);
425                 return -E2BIG;
426         }
427         if (!priv->caps.queue_counters_valid) {
428                 DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
429                         vdev->device->name);
430                 return -ENOTSUP;
431         }
432         return mlx5_vdpa_virtq_stats_reset(priv, qid);
433 }
434
435 static int
436 mlx5_vdpa_dev_cleanup(int vid)
437 {
438         struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
439         struct mlx5_vdpa_priv *priv;
440
441         if (vdev == NULL)
442                 return -1;
443         priv = mlx5_vdpa_find_priv_resource_by_vdev(vdev);
444         if (priv == NULL) {
445                 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
446                 return -1;
447         }
448         if (priv->state == MLX5_VDPA_STATE_PROBED)
449                 mlx5_vdpa_dev_cache_clean(priv);
450         priv->connected = false;
451         return 0;
452 }
453
454 static struct rte_vdpa_dev_ops mlx5_vdpa_ops = {
455         .get_queue_num = mlx5_vdpa_get_queue_num,
456         .get_features = mlx5_vdpa_get_vdpa_features,
457         .get_protocol_features = mlx5_vdpa_get_protocol_features,
458         .dev_conf = mlx5_vdpa_dev_config,
459         .dev_close = mlx5_vdpa_dev_close,
460         .dev_cleanup = mlx5_vdpa_dev_cleanup,
461         .set_vring_state = mlx5_vdpa_set_vring_state,
462         .set_features = mlx5_vdpa_features_set,
463         .migration_done = NULL,
464         .get_vfio_group_fd = NULL,
465         .get_vfio_device_fd = mlx5_vdpa_get_device_fd,
466         .get_notify_area = mlx5_vdpa_get_notify_area,
467         .get_stats_names = mlx5_vdpa_get_stats_names,
468         .get_stats = mlx5_vdpa_get_stats,
469         .reset_stats = mlx5_vdpa_reset_stats,
470 };
471
472 static int
473 mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
474 {
475         struct mlx5_vdpa_priv *priv = opaque;
476         unsigned long tmp;
477         int n_cores = sysconf(_SC_NPROCESSORS_ONLN);
478
479         errno = 0;
480         tmp = strtoul(val, NULL, 0);
481         if (errno) {
482                 DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val);
483                 return -errno;
484         }
485         if (strcmp(key, "event_mode") == 0) {
486                 if (tmp <= MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT)
487                         priv->event_mode = (int)tmp;
488                 else
489                         DRV_LOG(WARNING, "Invalid event_mode %s.", val);
490         } else if (strcmp(key, "event_us") == 0) {
491                 priv->event_us = (uint32_t)tmp;
492         } else if (strcmp(key, "no_traffic_time") == 0) {
493                 priv->no_traffic_max = (uint32_t)tmp;
494         } else if (strcmp(key, "event_core") == 0) {
495                 if (tmp >= (unsigned long)n_cores)
496                         DRV_LOG(WARNING, "Invalid event_core %s.", val);
497                 else
498                         priv->event_core = tmp;
499         } else if (strcmp(key, "max_conf_threads") == 0) {
500                 if (tmp) {
501                         priv->use_c_thread = true;
502                         if (!conf_thread_mng.initializer_priv) {
503                                 conf_thread_mng.initializer_priv = priv;
504                                 if (tmp > MLX5_VDPA_MAX_C_THRD) {
505                                         DRV_LOG(WARNING,
506                                 "Invalid max_conf_threads %s "
507                                 "and set max_conf_threads to %d",
508                                 val, MLX5_VDPA_MAX_C_THRD);
509                                         tmp = MLX5_VDPA_MAX_C_THRD;
510                                 }
511                                 conf_thread_mng.max_thrds = tmp;
512                         } else if (tmp != conf_thread_mng.max_thrds) {
513                                 DRV_LOG(WARNING,
514         "max_conf_threads is PMD argument and not per device, "
515         "only the first device configuration set it, current value is %d "
516         "and will not be changed to %d.",
517                                 conf_thread_mng.max_thrds, (int)tmp);
518                         }
519                 } else {
520                         priv->use_c_thread = false;
521                 }
522         } else if (strcmp(key, "hw_latency_mode") == 0) {
523                 priv->hw_latency_mode = (uint32_t)tmp;
524         } else if (strcmp(key, "hw_max_latency_us") == 0) {
525                 priv->hw_max_latency_us = (uint32_t)tmp;
526         } else if (strcmp(key, "hw_max_pending_comp") == 0) {
527                 priv->hw_max_pending_comp = (uint32_t)tmp;
528         } else if (strcmp(key, "queue_size") == 0) {
529                 priv->queue_size = (uint16_t)tmp;
530         } else if (strcmp(key, "queues") == 0) {
531                 priv->queues = (uint16_t)tmp;
532         } else {
533                 DRV_LOG(WARNING, "Invalid key %s.", key);
534         }
535         return 0;
536 }
537
538 static void
539 mlx5_vdpa_config_get(struct mlx5_kvargs_ctrl *mkvlist,
540                      struct mlx5_vdpa_priv *priv)
541 {
542         const char **params = (const char *[]){
543                 "event_core",
544                 "event_mode",
545                 "event_us",
546                 "hw_latency_mode",
547                 "hw_max_latency_us",
548                 "hw_max_pending_comp",
549                 "no_traffic_time",
550                 "queue_size",
551                 "queues",
552                 "max_conf_threads",
553                 NULL,
554         };
555
556         priv->event_mode = MLX5_VDPA_EVENT_MODE_FIXED_TIMER;
557         priv->event_us = 0;
558         priv->event_core = -1;
559         priv->no_traffic_max = MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX;
560         if (mkvlist == NULL)
561                 return;
562         mlx5_kvargs_process(mkvlist, params, mlx5_vdpa_args_check_handler,
563                             priv);
564         if (!priv->event_us &&
565             priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER)
566                 priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
567         if ((priv->queue_size && !priv->queues) ||
568                 (!priv->queue_size && priv->queues)) {
569                 priv->queue_size = 0;
570                 priv->queues = 0;
571                 DRV_LOG(WARNING, "Please provide both queue_size and queues.");
572         }
573         DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
574         DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
575         DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max);
576         DRV_LOG(DEBUG, "queues is %u, queue_size is %u.", priv->queues,
577                 priv->queue_size);
578 }
579
580 static int
581 mlx5_vdpa_virtq_resource_prepare(struct mlx5_vdpa_priv *priv)
582 {
583         struct mlx5_vdpa_virtq *virtq;
584         uint32_t index;
585         uint32_t i;
586
587         for (index = 0; index < priv->caps.max_num_virtio_queues * 2;
588                 index++) {
589                 virtq = &priv->virtqs[index];
590                 pthread_mutex_init(&virtq->virtq_lock, NULL);
591         }
592         if (!priv->queues)
593                 return 0;
594         for (index = 0; index < (priv->queues * 2); ++index) {
595                 virtq = &priv->virtqs[index];
596                 int ret = mlx5_vdpa_event_qp_prepare(priv, priv->queue_size,
597                                         -1, virtq);
598
599                 if (ret) {
600                         DRV_LOG(ERR, "Failed to create event QPs for virtq %d.",
601                                 index);
602                         return -1;
603                 }
604                 if (priv->caps.queue_counters_valid) {
605                         if (!virtq->counters)
606                                 virtq->counters =
607                                         mlx5_devx_cmd_create_virtio_q_counters
608                                                 (priv->cdev->ctx);
609                         if (!virtq->counters) {
610                                 DRV_LOG(ERR, "Failed to create virtq couners for virtq"
611                                         " %d.", index);
612                                 return -1;
613                         }
614                 }
615                 for (i = 0; i < RTE_DIM(virtq->umems); ++i) {
616                         uint32_t size;
617                         void *buf;
618                         struct mlx5dv_devx_umem *obj;
619
620                         size = priv->caps.umems[i].a * priv->queue_size +
621                                         priv->caps.umems[i].b;
622                         buf = rte_zmalloc(__func__, size, 4096);
623                         if (buf == NULL) {
624                                 DRV_LOG(ERR, "Cannot allocate umem %d memory for virtq"
625                                                 " %u.", i, index);
626                                 return -1;
627                         }
628                         obj = mlx5_glue->devx_umem_reg(priv->cdev->ctx, buf,
629                                         size, IBV_ACCESS_LOCAL_WRITE);
630                         if (obj == NULL) {
631                                 rte_free(buf);
632                                 DRV_LOG(ERR, "Failed to register umem %d for virtq %u.",
633                                                 i, index);
634                                 return -1;
635                         }
636                         virtq->umems[i].size = size;
637                         virtq->umems[i].buf = buf;
638                         virtq->umems[i].obj = obj;
639                 }
640         }
641         return 0;
642 }
643
644 static int
645 mlx5_vdpa_create_dev_resources(struct mlx5_vdpa_priv *priv)
646 {
647         struct mlx5_devx_tis_attr tis_attr = {0};
648         struct ibv_context *ctx = priv->cdev->ctx;
649         uint32_t i;
650         int retry;
651
652         for (retry = 0; retry < 7; retry++) {
653                 priv->var = mlx5_glue->dv_alloc_var(ctx, 0);
654                 if (priv->var != NULL)
655                         break;
656                 DRV_LOG(WARNING, "Failed to allocate VAR, retry %d.", retry);
657                 /* Wait Qemu release VAR during vdpa restart, 0.1 sec based. */
658                 usleep(100000U << retry);
659         }
660         if (!priv->var) {
661                 DRV_LOG(ERR, "Failed to allocate VAR %u.", errno);
662                 rte_errno = ENOMEM;
663                 return -rte_errno;
664         }
665         /* Always map the entire page. */
666         priv->virtq_db_addr = mmap(NULL, priv->var->length, PROT_READ |
667                                    PROT_WRITE, MAP_SHARED, ctx->cmd_fd,
668                                    priv->var->mmap_off);
669         if (priv->virtq_db_addr == MAP_FAILED) {
670                 DRV_LOG(ERR, "Failed to map doorbell page %u.", errno);
671                 priv->virtq_db_addr = NULL;
672                 rte_errno = errno;
673                 return -rte_errno;
674         }
675         /* Add within page offset for 64K page system. */
676         priv->virtq_db_addr = (char *)priv->virtq_db_addr +
677                 ((rte_mem_page_size() - 1) & priv->caps.doorbell_bar_offset);
678         DRV_LOG(DEBUG, "VAR address of doorbell mapping is %p.",
679                 priv->virtq_db_addr);
680         priv->td = mlx5_devx_cmd_create_td(ctx);
681         if (!priv->td) {
682                 DRV_LOG(ERR, "Failed to create transport domain.");
683                 rte_errno = errno;
684                 return -rte_errno;
685         }
686         tis_attr.transport_domain = priv->td->id;
687         for (i = 0; i < priv->num_lag_ports; i++) {
688                 /* 0 is auto affinity, non-zero value to propose port. */
689                 tis_attr.lag_tx_port_affinity = i + 1;
690                 priv->tiss[i] = mlx5_devx_cmd_create_tis(ctx, &tis_attr);
691                 if (!priv->tiss[i]) {
692                         DRV_LOG(ERR, "Failed to create TIS %u.", i);
693                         return -rte_errno;
694                 }
695         }
696         priv->null_mr = mlx5_glue->alloc_null_mr(priv->cdev->pd);
697         if (!priv->null_mr) {
698                 DRV_LOG(ERR, "Failed to allocate null MR.");
699                 rte_errno = errno;
700                 return -rte_errno;
701         }
702         DRV_LOG(DEBUG, "Dump fill Mkey = %u.", priv->null_mr->lkey);
703 #ifdef HAVE_MLX5DV_DR
704         priv->steer.domain = mlx5_glue->dr_create_domain(ctx,
705                                         MLX5DV_DR_DOMAIN_TYPE_NIC_RX);
706         if (!priv->steer.domain) {
707                 DRV_LOG(ERR, "Failed to create Rx domain.");
708                 rte_errno = errno;
709                 return -rte_errno;
710         }
711 #endif
712         priv->steer.tbl = mlx5_glue->dr_create_flow_tbl(priv->steer.domain, 0);
713         if (!priv->steer.tbl) {
714                 DRV_LOG(ERR, "Failed to create table 0 with Rx domain.");
715                 rte_errno = errno;
716                 return -rte_errno;
717         }
718         if (mlx5_vdpa_err_event_setup(priv) != 0)
719                 return -rte_errno;
720         if (mlx5_vdpa_event_qp_global_prepare(priv))
721                 return -rte_errno;
722         if (mlx5_vdpa_virtq_resource_prepare(priv))
723                 return -rte_errno;
724         return 0;
725 }
726
727 static int
728 mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev,
729                     struct mlx5_kvargs_ctrl *mkvlist)
730 {
731         struct mlx5_vdpa_priv *priv = NULL;
732         struct mlx5_hca_attr *attr = &cdev->config.hca_attr;
733
734         if (!attr->vdpa.valid || !attr->vdpa.max_num_virtio_queues) {
735                 DRV_LOG(ERR, "Not enough capabilities to support vdpa, maybe "
736                         "old FW/OFED version?");
737                 rte_errno = ENOTSUP;
738                 return -rte_errno;
739         }
740         if (!attr->vdpa.queue_counters_valid)
741                 DRV_LOG(DEBUG, "No capability to support virtq statistics.");
742         priv = rte_zmalloc("mlx5 vDPA device private", sizeof(*priv) +
743                            sizeof(struct mlx5_vdpa_virtq) *
744                            attr->vdpa.max_num_virtio_queues,
745                            RTE_CACHE_LINE_SIZE);
746         if (!priv) {
747                 DRV_LOG(ERR, "Failed to allocate private memory.");
748                 rte_errno = ENOMEM;
749                 return -rte_errno;
750         }
751         priv->caps = attr->vdpa;
752         priv->log_max_rqt_size = attr->log_max_rqt_size;
753         priv->num_lag_ports = attr->num_lag_ports;
754         if (attr->num_lag_ports == 0)
755                 priv->num_lag_ports = 1;
756         rte_spinlock_init(&priv->db_lock);
757         pthread_mutex_init(&priv->steer_update_lock, NULL);
758         priv->cdev = cdev;
759         mlx5_vdpa_config_get(mkvlist, priv);
760         if (priv->use_c_thread) {
761                 if (conf_thread_mng.initializer_priv == priv)
762                         if (mlx5_vdpa_mult_threads_create(priv->event_core))
763                                 goto error;
764                 __atomic_fetch_add(&conf_thread_mng.refcnt, 1,
765                         __ATOMIC_RELAXED);
766         }
767         if (mlx5_vdpa_create_dev_resources(priv))
768                 goto error;
769         priv->vdev = rte_vdpa_register_device(cdev->dev, &mlx5_vdpa_ops);
770         if (priv->vdev == NULL) {
771                 DRV_LOG(ERR, "Failed to register vDPA device.");
772                 rte_errno = rte_errno ? rte_errno : EINVAL;
773                 goto error;
774         }
775         SLIST_INIT(&priv->mr_list);
776         pthread_mutex_lock(&priv_list_lock);
777         TAILQ_INSERT_TAIL(&priv_list, priv, next);
778         pthread_mutex_unlock(&priv_list_lock);
779         return 0;
780 error:
781         if (conf_thread_mng.initializer_priv == priv)
782                 mlx5_vdpa_mult_threads_destroy(false);
783         if (priv)
784                 mlx5_vdpa_dev_release(priv);
785         return -rte_errno;
786 }
787
788 static int
789 mlx5_vdpa_dev_remove(struct mlx5_common_device *cdev)
790 {
791         struct mlx5_vdpa_priv *priv = NULL;
792         int found = 0;
793
794         pthread_mutex_lock(&priv_list_lock);
795         TAILQ_FOREACH(priv, &priv_list, next) {
796                 if (priv->vdev->device == cdev->dev) {
797                         found = 1;
798                         break;
799                 }
800         }
801         if (found)
802                 TAILQ_REMOVE(&priv_list, priv, next);
803         pthread_mutex_unlock(&priv_list_lock);
804         if (found)
805                 mlx5_vdpa_dev_release(priv);
806         return 0;
807 }
808
809 static void
810 mlx5_vdpa_release_dev_resources(struct mlx5_vdpa_priv *priv)
811 {
812         uint32_t i;
813
814         if (priv->queues)
815                 mlx5_vdpa_virtqs_cleanup(priv);
816         mlx5_vdpa_dev_cache_clean(priv);
817         for (i = 0; i < priv->caps.max_num_virtio_queues; i++) {
818                 if (!priv->virtqs[i].counters)
819                         continue;
820                 claim_zero(mlx5_devx_cmd_destroy(priv->virtqs[i].counters));
821         }
822         mlx5_vdpa_event_qp_global_release(priv);
823         mlx5_vdpa_err_event_unset(priv);
824         if (priv->steer.tbl)
825                 claim_zero(mlx5_glue->dr_destroy_flow_tbl(priv->steer.tbl));
826         if (priv->steer.domain)
827                 claim_zero(mlx5_glue->dr_destroy_domain(priv->steer.domain));
828         if (priv->null_mr)
829                 claim_zero(mlx5_glue->dereg_mr(priv->null_mr));
830         for (i = 0; i < priv->num_lag_ports; i++) {
831                 if (priv->tiss[i])
832                         claim_zero(mlx5_devx_cmd_destroy(priv->tiss[i]));
833         }
834         if (priv->td)
835                 claim_zero(mlx5_devx_cmd_destroy(priv->td));
836         if (priv->virtq_db_addr)
837                 /* Mask out the within page offset for munmap. */
838                 claim_zero(munmap((void *)((uintptr_t)priv->virtq_db_addr &
839                         ~(rte_mem_page_size() - 1)), priv->var->length));
840         if (priv->var)
841                 mlx5_glue->dv_free_var(priv->var);
842 }
843
844 static void
845 mlx5_vdpa_dev_release(struct mlx5_vdpa_priv *priv)
846 {
847         if (priv->state == MLX5_VDPA_STATE_CONFIGURED)
848                 mlx5_vdpa_dev_close(priv->vid);
849         mlx5_vdpa_release_dev_resources(priv);
850         if (priv->vdev)
851                 rte_vdpa_unregister_device(priv->vdev);
852         if (priv->use_c_thread)
853                 if (__atomic_fetch_sub(&conf_thread_mng.refcnt,
854                         1, __ATOMIC_RELAXED) == 1)
855                         mlx5_vdpa_mult_threads_destroy(true);
856         rte_free(priv);
857 }
858
859 static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = {
860         {
861                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
862                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6)
863         },
864         {
865                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
866                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF)
867         },
868         {
869                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
870                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX)
871         },
872         {
873                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
874                                 PCI_DEVICE_ID_MELLANOX_CONNECTXVF)
875         },
876         {
877                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
878                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF)
879         },
880         {
881                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
882                                 PCI_DEVICE_ID_MELLANOX_CONNECTX7)
883         },
884         {
885                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
886                                 PCI_DEVICE_ID_MELLANOX_CONNECTX7BF)
887         },
888         {
889                 .vendor_id = 0
890         }
891 };
892
893 static struct mlx5_class_driver mlx5_vdpa_driver = {
894         .drv_class = MLX5_CLASS_VDPA,
895         .name = RTE_STR(MLX5_VDPA_DRIVER_NAME),
896         .id_table = mlx5_vdpa_pci_id_map,
897         .probe = mlx5_vdpa_dev_probe,
898         .remove = mlx5_vdpa_dev_remove,
899 };
900
901 RTE_LOG_REGISTER_DEFAULT(mlx5_vdpa_logtype, NOTICE)
902
903 /**
904  * Driver initialization routine.
905  */
906 RTE_INIT(rte_mlx5_vdpa_init)
907 {
908         mlx5_common_init();
909         if (mlx5_glue)
910                 mlx5_class_driver_register(&mlx5_vdpa_driver);
911 }
912
913 RTE_PMD_EXPORT_NAME(MLX5_VDPA_DRIVER_NAME, __COUNTER__);
914 RTE_PMD_REGISTER_PCI_TABLE(MLX5_VDPA_DRIVER_NAME, mlx5_vdpa_pci_id_map);
915 RTE_PMD_REGISTER_KMOD_DEP(MLX5_VDPA_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib");