vhost: prefix vDPA enum value for PCI address type
[dpdk.git] / drivers / vdpa / mlx5 / mlx5_vdpa.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 #include <unistd.h>
5
6 #include <rte_malloc.h>
7 #include <rte_log.h>
8 #include <rte_errno.h>
9 #include <rte_bus_pci.h>
10 #include <rte_pci.h>
11
12 #include <mlx5_glue.h>
13 #include <mlx5_common.h>
14 #include <mlx5_devx_cmds.h>
15 #include <mlx5_prm.h>
16 #include <mlx5_nl.h>
17
18 #include "mlx5_vdpa_utils.h"
19 #include "mlx5_vdpa.h"
20
21
22 #define MLX5_VDPA_DEFAULT_FEATURES ((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
23                             (1ULL << VIRTIO_F_ANY_LAYOUT) | \
24                             (1ULL << VIRTIO_NET_F_MQ) | \
25                             (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
26                             (1ULL << VIRTIO_F_ORDER_PLATFORM) | \
27                             (1ULL << VHOST_F_LOG_ALL))
28
29 #define MLX5_VDPA_PROTOCOL_FEATURES \
30                             ((1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
31                              (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
32                              (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
33                              (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
34                              (1ULL << VHOST_USER_PROTOCOL_F_MQ))
35
36 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
37                                               TAILQ_HEAD_INITIALIZER(priv_list);
38 static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER;
39 int mlx5_vdpa_logtype;
40
41 static struct mlx5_vdpa_priv *
42 mlx5_vdpa_find_priv_resource_by_did(int did)
43 {
44         struct mlx5_vdpa_priv *priv;
45         int found = 0;
46
47         pthread_mutex_lock(&priv_list_lock);
48         TAILQ_FOREACH(priv, &priv_list, next) {
49                 if (did == priv->id) {
50                         found = 1;
51                         break;
52                 }
53         }
54         pthread_mutex_unlock(&priv_list_lock);
55         if (!found) {
56                 DRV_LOG(ERR, "Invalid device id: %d.", did);
57                 rte_errno = EINVAL;
58                 return NULL;
59         }
60         return priv;
61 }
62
63 static int
64 mlx5_vdpa_get_queue_num(int did, uint32_t *queue_num)
65 {
66         struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
67
68         if (priv == NULL) {
69                 DRV_LOG(ERR, "Invalid device id: %d.", did);
70                 return -1;
71         }
72         *queue_num = priv->caps.max_num_virtio_queues;
73         return 0;
74 }
75
76 static int
77 mlx5_vdpa_get_vdpa_features(int did, uint64_t *features)
78 {
79         struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
80
81         if (priv == NULL) {
82                 DRV_LOG(ERR, "Invalid device id: %d.", did);
83                 return -1;
84         }
85         *features = MLX5_VDPA_DEFAULT_FEATURES;
86         if (priv->caps.virtio_queue_type & (1 << MLX5_VIRTQ_TYPE_PACKED))
87                 *features |= (1ULL << VIRTIO_F_RING_PACKED);
88         if (priv->caps.tso_ipv4)
89                 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO4);
90         if (priv->caps.tso_ipv6)
91                 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO6);
92         if (priv->caps.tx_csum)
93                 *features |= (1ULL << VIRTIO_NET_F_CSUM);
94         if (priv->caps.rx_csum)
95                 *features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
96         if (priv->caps.virtio_version_1_0)
97                 *features |= (1ULL << VIRTIO_F_VERSION_1);
98         return 0;
99 }
100
101 static int
102 mlx5_vdpa_get_protocol_features(int did, uint64_t *features)
103 {
104         struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
105
106         if (priv == NULL) {
107                 DRV_LOG(ERR, "Invalid device id: %d.", did);
108                 return -1;
109         }
110         *features = MLX5_VDPA_PROTOCOL_FEATURES;
111         return 0;
112 }
113
114 static int
115 mlx5_vdpa_set_vring_state(int vid, int vring, int state)
116 {
117         int did = rte_vhost_get_vdpa_device_id(vid);
118         struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
119         struct mlx5_vdpa_virtq *virtq = NULL;
120
121         if (priv == NULL) {
122                 DRV_LOG(ERR, "Invalid device id: %d.", did);
123                 return -EINVAL;
124         }
125         SLIST_FOREACH(virtq, &priv->virtq_list, next)
126                 if (virtq->index == vring)
127                         break;
128         if (!virtq) {
129                 DRV_LOG(ERR, "Invalid or unconfigured vring id: %d.", vring);
130                 return -EINVAL;
131         }
132         return mlx5_vdpa_virtq_enable(virtq, state);
133 }
134
135 static int
136 mlx5_vdpa_features_set(int vid)
137 {
138         int did = rte_vhost_get_vdpa_device_id(vid);
139         struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
140         uint64_t log_base, log_size;
141         uint64_t features;
142         int ret;
143
144         if (priv == NULL) {
145                 DRV_LOG(ERR, "Invalid device id: %d.", did);
146                 return -EINVAL;
147         }
148         ret = rte_vhost_get_negotiated_features(vid, &features);
149         if (ret) {
150                 DRV_LOG(ERR, "Failed to get negotiated features.");
151                 return ret;
152         }
153         if (RTE_VHOST_NEED_LOG(features)) {
154                 ret = rte_vhost_get_log_base(vid, &log_base, &log_size);
155                 if (ret) {
156                         DRV_LOG(ERR, "Failed to get log base.");
157                         return ret;
158                 }
159                 ret = mlx5_vdpa_dirty_bitmap_set(priv, log_base, log_size);
160                 if (ret) {
161                         DRV_LOG(ERR, "Failed to set dirty bitmap.");
162                         return ret;
163                 }
164                 DRV_LOG(INFO, "mlx5 vdpa: enabling dirty logging...");
165                 ret = mlx5_vdpa_logging_enable(priv, 1);
166                 if (ret) {
167                         DRV_LOG(ERR, "Failed t enable dirty logging.");
168                         return ret;
169                 }
170         }
171         return 0;
172 }
173
174 static int
175 mlx5_vdpa_dev_close(int vid)
176 {
177         int did = rte_vhost_get_vdpa_device_id(vid);
178         struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
179         int ret = 0;
180
181         if (priv == NULL) {
182                 DRV_LOG(ERR, "Invalid device id: %d.", did);
183                 return -1;
184         }
185         if (priv->configured)
186                 ret |= mlx5_vdpa_lm_log(priv);
187         mlx5_vdpa_cqe_event_unset(priv);
188         ret |= mlx5_vdpa_steer_unset(priv);
189         mlx5_vdpa_virtqs_release(priv);
190         mlx5_vdpa_event_qp_global_release(priv);
191         mlx5_vdpa_mem_dereg(priv);
192         priv->configured = 0;
193         priv->vid = 0;
194         return ret;
195 }
196
197 static int
198 mlx5_vdpa_dev_config(int vid)
199 {
200         int did = rte_vhost_get_vdpa_device_id(vid);
201         struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
202
203         if (priv == NULL) {
204                 DRV_LOG(ERR, "Invalid device id: %d.", did);
205                 return -EINVAL;
206         }
207         if (priv->configured && mlx5_vdpa_dev_close(vid)) {
208                 DRV_LOG(ERR, "Failed to reconfigure vid %d.", vid);
209                 return -1;
210         }
211         priv->vid = vid;
212         if (mlx5_vdpa_mem_register(priv) || mlx5_vdpa_virtqs_prepare(priv) ||
213             mlx5_vdpa_steer_setup(priv) || mlx5_vdpa_cqe_event_setup(priv)) {
214                 mlx5_vdpa_dev_close(vid);
215                 return -1;
216         }
217         priv->configured = 1;
218         return 0;
219 }
220
221 static struct rte_vdpa_dev_ops mlx5_vdpa_ops = {
222         .get_queue_num = mlx5_vdpa_get_queue_num,
223         .get_features = mlx5_vdpa_get_vdpa_features,
224         .get_protocol_features = mlx5_vdpa_get_protocol_features,
225         .dev_conf = mlx5_vdpa_dev_config,
226         .dev_close = mlx5_vdpa_dev_close,
227         .set_vring_state = mlx5_vdpa_set_vring_state,
228         .set_features = mlx5_vdpa_features_set,
229         .migration_done = NULL,
230         .get_vfio_group_fd = NULL,
231         .get_vfio_device_fd = NULL,
232         .get_notify_area = NULL,
233 };
234
235 static struct ibv_device *
236 mlx5_vdpa_get_ib_device_match(struct rte_pci_addr *addr)
237 {
238         int n;
239         struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n);
240         struct ibv_device *ibv_match = NULL;
241
242         if (!ibv_list) {
243                 rte_errno = ENOSYS;
244                 return NULL;
245         }
246         while (n-- > 0) {
247                 struct rte_pci_addr pci_addr;
248
249                 DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name);
250                 if (mlx5_dev_to_pci_addr(ibv_list[n]->ibdev_path, &pci_addr))
251                         continue;
252                 if (memcmp(addr, &pci_addr, sizeof(pci_addr)))
253                         continue;
254                 ibv_match = ibv_list[n];
255                 break;
256         }
257         if (!ibv_match)
258                 rte_errno = ENOENT;
259         mlx5_glue->free_device_list(ibv_list);
260         return ibv_match;
261 }
262
263 /* Try to disable ROCE by Netlink\Devlink. */
264 static int
265 mlx5_vdpa_nl_roce_disable(const char *addr)
266 {
267         int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC);
268         int devlink_id;
269         int enable;
270         int ret;
271
272         if (nlsk_fd < 0)
273                 return nlsk_fd;
274         devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd);
275         if (devlink_id < 0) {
276                 ret = devlink_id;
277                 DRV_LOG(DEBUG, "Failed to get devlink id for ROCE operations by"
278                         " Netlink.");
279                 goto close;
280         }
281         ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable);
282         if (ret) {
283                 DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.",
284                         ret);
285                 goto close;
286         } else if (!enable) {
287                 DRV_LOG(INFO, "ROCE has already disabled(Netlink).");
288                 goto close;
289         }
290         ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0);
291         if (ret)
292                 DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret);
293         else
294                 DRV_LOG(INFO, "ROCE is disabled by Netlink successfully.");
295 close:
296         close(nlsk_fd);
297         return ret;
298 }
299
300 /* Try to disable ROCE by sysfs. */
301 static int
302 mlx5_vdpa_sys_roce_disable(const char *addr)
303 {
304         FILE *file_o;
305         int enable;
306         int ret;
307
308         MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr);
309         file_o = fopen(file_p, "rb");
310         if (!file_o) {
311                 rte_errno = ENOTSUP;
312                 return -ENOTSUP;
313         }
314         ret = fscanf(file_o, "%d", &enable);
315         if (ret != 1) {
316                 rte_errno = EINVAL;
317                 ret = EINVAL;
318                 goto close;
319         } else if (!enable) {
320                 ret = 0;
321                 DRV_LOG(INFO, "ROCE has already disabled(sysfs).");
322                 goto close;
323         }
324         fclose(file_o);
325         file_o = fopen(file_p, "wb");
326         if (!file_o) {
327                 rte_errno = ENOTSUP;
328                 return -ENOTSUP;
329         }
330         fprintf(file_o, "0\n");
331         ret = 0;
332 close:
333         if (ret)
334                 DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret);
335         else
336                 DRV_LOG(INFO, "ROCE is disabled by sysfs successfully.");
337         fclose(file_o);
338         return ret;
339 }
340
341 #define MLX5_VDPA_MAX_RETRIES 20
342 #define MLX5_VDPA_USEC 1000
343 static int
344 mlx5_vdpa_roce_disable(struct rte_pci_addr *addr, struct ibv_device **ibv)
345 {
346         char addr_name[64] = {0};
347
348         rte_pci_device_name(addr, addr_name, sizeof(addr_name));
349         /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */
350         if (mlx5_vdpa_nl_roce_disable(addr_name) == 0 ||
351             mlx5_vdpa_sys_roce_disable(addr_name) == 0) {
352                 /*
353                  * Succeed to disable ROCE, wait for the IB device to appear
354                  * again after reload.
355                  */
356                 int r;
357                 struct ibv_device *ibv_new;
358
359                 for (r = MLX5_VDPA_MAX_RETRIES; r; r--) {
360                         ibv_new = mlx5_vdpa_get_ib_device_match(addr);
361                         if (ibv_new) {
362                                 *ibv = ibv_new;
363                                 return 0;
364                         }
365                         usleep(MLX5_VDPA_USEC);
366                 }
367                 DRV_LOG(ERR, "Cannot much device %s after ROCE disable, "
368                         "retries exceed %d", addr_name, MLX5_VDPA_MAX_RETRIES);
369                 rte_errno = EAGAIN;
370         }
371         return -rte_errno;
372 }
373
374 /**
375  * DPDK callback to register a PCI device.
376  *
377  * This function spawns vdpa device out of a given PCI device.
378  *
379  * @param[in] pci_drv
380  *   PCI driver structure (mlx5_vpda_driver).
381  * @param[in] pci_dev
382  *   PCI device information.
383  *
384  * @return
385  *   0 on success, 1 to skip this driver, a negative errno value otherwise
386  *   and rte_errno is set.
387  */
388 static int
389 mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
390                     struct rte_pci_device *pci_dev __rte_unused)
391 {
392         struct ibv_device *ibv;
393         struct mlx5_vdpa_priv *priv = NULL;
394         struct ibv_context *ctx = NULL;
395         struct mlx5_hca_attr attr;
396         int ret;
397
398         if (mlx5_class_get(pci_dev->device.devargs) != MLX5_CLASS_VDPA) {
399                 DRV_LOG(DEBUG, "Skip probing - should be probed by other mlx5"
400                         " driver.");
401                 return 1;
402         }
403         ibv = mlx5_vdpa_get_ib_device_match(&pci_dev->addr);
404         if (!ibv) {
405                 DRV_LOG(ERR, "No matching IB device for PCI slot "
406                         PCI_PRI_FMT ".", pci_dev->addr.domain,
407                         pci_dev->addr.bus, pci_dev->addr.devid,
408                         pci_dev->addr.function);
409                 return -rte_errno;
410         } else {
411                 DRV_LOG(INFO, "PCI information matches for device \"%s\".",
412                         ibv->name);
413         }
414         if (mlx5_vdpa_roce_disable(&pci_dev->addr, &ibv) != 0) {
415                 DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
416                         ibv->name);
417                 return -rte_errno;
418         }
419         ctx = mlx5_glue->dv_open_device(ibv);
420         if (!ctx) {
421                 DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name);
422                 rte_errno = ENODEV;
423                 return -rte_errno;
424         }
425         priv = rte_zmalloc("mlx5 vDPA device private", sizeof(*priv),
426                            RTE_CACHE_LINE_SIZE);
427         if (!priv) {
428                 DRV_LOG(ERR, "Failed to allocate private memory.");
429                 rte_errno = ENOMEM;
430                 goto error;
431         }
432         ret = mlx5_devx_cmd_query_hca_attr(ctx, &attr);
433         if (ret) {
434                 DRV_LOG(ERR, "Unable to read HCA capabilities.");
435                 rte_errno = ENOTSUP;
436                 goto error;
437         } else {
438                 if (!attr.vdpa.valid || !attr.vdpa.max_num_virtio_queues) {
439                         DRV_LOG(ERR, "Not enough capabilities to support vdpa,"
440                                 " maybe old FW/OFED version?");
441                         rte_errno = ENOTSUP;
442                         goto error;
443                 }
444                 priv->caps = attr.vdpa;
445                 priv->log_max_rqt_size = attr.log_max_rqt_size;
446         }
447         priv->ctx = ctx;
448         priv->dev_addr.pci_addr = pci_dev->addr;
449         priv->dev_addr.type = VDPA_ADDR_PCI;
450         priv->id = rte_vdpa_register_device(&priv->dev_addr, &mlx5_vdpa_ops);
451         if (priv->id < 0) {
452                 DRV_LOG(ERR, "Failed to register vDPA device.");
453                 rte_errno = rte_errno ? rte_errno : EINVAL;
454                 goto error;
455         }
456         SLIST_INIT(&priv->mr_list);
457         SLIST_INIT(&priv->virtq_list);
458         pthread_mutex_lock(&priv_list_lock);
459         TAILQ_INSERT_TAIL(&priv_list, priv, next);
460         pthread_mutex_unlock(&priv_list_lock);
461         return 0;
462
463 error:
464         if (priv)
465                 rte_free(priv);
466         if (ctx)
467                 mlx5_glue->close_device(ctx);
468         return -rte_errno;
469 }
470
471 /**
472  * DPDK callback to remove a PCI device.
473  *
474  * This function removes all vDPA devices belong to a given PCI device.
475  *
476  * @param[in] pci_dev
477  *   Pointer to the PCI device.
478  *
479  * @return
480  *   0 on success, the function cannot fail.
481  */
482 static int
483 mlx5_vdpa_pci_remove(struct rte_pci_device *pci_dev)
484 {
485         struct mlx5_vdpa_priv *priv = NULL;
486         int found = 0;
487
488         pthread_mutex_lock(&priv_list_lock);
489         TAILQ_FOREACH(priv, &priv_list, next) {
490                 if (memcmp(&priv->dev_addr.pci_addr, &pci_dev->addr,
491                            sizeof(pci_dev->addr)) == 0) {
492                         found = 1;
493                         break;
494                 }
495         }
496         if (found)
497                 TAILQ_REMOVE(&priv_list, priv, next);
498         pthread_mutex_unlock(&priv_list_lock);
499         if (found) {
500                 if (priv->configured)
501                         mlx5_vdpa_dev_close(priv->vid);
502                 mlx5_glue->close_device(priv->ctx);
503                 rte_free(priv);
504         }
505         return 0;
506 }
507
508 static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = {
509         {
510                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
511                                PCI_DEVICE_ID_MELLANOX_CONNECTX5BF)
512         },
513         {
514                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
515                                PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF)
516         },
517         {
518                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
519                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6)
520         },
521         {
522                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
523                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF)
524         },
525         {
526                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
527                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX)
528         },
529         {
530                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
531                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXVF)
532         },
533         {
534                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
535                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF)
536         },
537         {
538                 .vendor_id = 0
539         }
540 };
541
542 static struct rte_pci_driver mlx5_vdpa_driver = {
543         .driver = {
544                 .name = "mlx5_vdpa",
545         },
546         .id_table = mlx5_vdpa_pci_id_map,
547         .probe = mlx5_vdpa_pci_probe,
548         .remove = mlx5_vdpa_pci_remove,
549         .drv_flags = 0,
550 };
551
552 /**
553  * Driver initialization routine.
554  */
555 RTE_INIT(rte_mlx5_vdpa_init)
556 {
557         /* Initialize common log type. */
558         mlx5_vdpa_logtype = rte_log_register("pmd.vdpa.mlx5");
559         if (mlx5_vdpa_logtype >= 0)
560                 rte_log_set_level(mlx5_vdpa_logtype, RTE_LOG_NOTICE);
561         if (mlx5_glue)
562                 rte_pci_register(&mlx5_vdpa_driver);
563 }
564
565 RTE_PMD_EXPORT_NAME(net_mlx5_vdpa, __COUNTER__);
566 RTE_PMD_REGISTER_PCI_TABLE(net_mlx5_vdpa, mlx5_vdpa_pci_id_map);
567 RTE_PMD_REGISTER_KMOD_DEP(net_mlx5_vdpa, "* ib_uverbs & mlx5_core & mlx5_ib");