57619d268a2d457ea1ff6a3bb6e83028a7df4523
[dpdk.git] / drivers / vdpa / mlx5 / mlx5_vdpa.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 #include <rte_malloc.h>
5 #include <rte_log.h>
6 #include <rte_errno.h>
7 #include <rte_bus_pci.h>
8
9 #include <mlx5_glue.h>
10 #include <mlx5_common.h>
11 #include <mlx5_devx_cmds.h>
12 #include <mlx5_prm.h>
13
14 #include "mlx5_vdpa_utils.h"
15 #include "mlx5_vdpa.h"
16
17
18 #define MLX5_VDPA_DEFAULT_FEATURES ((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
19                             (1ULL << VIRTIO_F_ANY_LAYOUT) | \
20                             (1ULL << VIRTIO_NET_F_MQ) | \
21                             (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
22                             (1ULL << VIRTIO_F_ORDER_PLATFORM) | \
23                             (1ULL << VHOST_F_LOG_ALL))
24
25 #define MLX5_VDPA_PROTOCOL_FEATURES \
26                             ((1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
27                              (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
28                              (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
29                              (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
30                              (1ULL << VHOST_USER_PROTOCOL_F_MQ))
31
32 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
33                                               TAILQ_HEAD_INITIALIZER(priv_list);
34 static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER;
35 int mlx5_vdpa_logtype;
36
37 static struct mlx5_vdpa_priv *
38 mlx5_vdpa_find_priv_resource_by_did(int did)
39 {
40         struct mlx5_vdpa_priv *priv;
41         int found = 0;
42
43         pthread_mutex_lock(&priv_list_lock);
44         TAILQ_FOREACH(priv, &priv_list, next) {
45                 if (did == priv->id) {
46                         found = 1;
47                         break;
48                 }
49         }
50         pthread_mutex_unlock(&priv_list_lock);
51         if (!found) {
52                 DRV_LOG(ERR, "Invalid device id: %d.", did);
53                 rte_errno = EINVAL;
54                 return NULL;
55         }
56         return priv;
57 }
58
59 static int
60 mlx5_vdpa_get_queue_num(int did, uint32_t *queue_num)
61 {
62         struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
63
64         if (priv == NULL) {
65                 DRV_LOG(ERR, "Invalid device id: %d.", did);
66                 return -1;
67         }
68         *queue_num = priv->caps.max_num_virtio_queues;
69         return 0;
70 }
71
72 static int
73 mlx5_vdpa_get_vdpa_features(int did, uint64_t *features)
74 {
75         struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
76
77         if (priv == NULL) {
78                 DRV_LOG(ERR, "Invalid device id: %d.", did);
79                 return -1;
80         }
81         *features = MLX5_VDPA_DEFAULT_FEATURES;
82         if (priv->caps.virtio_queue_type & (1 << MLX5_VIRTQ_TYPE_PACKED))
83                 *features |= (1ULL << VIRTIO_F_RING_PACKED);
84         if (priv->caps.tso_ipv4)
85                 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO4);
86         if (priv->caps.tso_ipv6)
87                 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO6);
88         if (priv->caps.tx_csum)
89                 *features |= (1ULL << VIRTIO_NET_F_CSUM);
90         if (priv->caps.rx_csum)
91                 *features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
92         if (priv->caps.virtio_version_1_0)
93                 *features |= (1ULL << VIRTIO_F_VERSION_1);
94         return 0;
95 }
96
97 static int
98 mlx5_vdpa_get_protocol_features(int did, uint64_t *features)
99 {
100         struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
101
102         if (priv == NULL) {
103                 DRV_LOG(ERR, "Invalid device id: %d.", did);
104                 return -1;
105         }
106         *features = MLX5_VDPA_PROTOCOL_FEATURES;
107         return 0;
108 }
109
110 static int
111 mlx5_vdpa_set_vring_state(int vid, int vring, int state)
112 {
113         int did = rte_vhost_get_vdpa_device_id(vid);
114         struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
115         struct mlx5_vdpa_virtq *virtq = NULL;
116
117         if (priv == NULL) {
118                 DRV_LOG(ERR, "Invalid device id: %d.", did);
119                 return -EINVAL;
120         }
121         SLIST_FOREACH(virtq, &priv->virtq_list, next)
122                 if (virtq->index == vring)
123                         break;
124         if (!virtq) {
125                 DRV_LOG(ERR, "Invalid or unconfigured vring id: %d.", vring);
126                 return -EINVAL;
127         }
128         return mlx5_vdpa_virtq_enable(virtq, state);
129 }
130
131 static int
132 mlx5_vdpa_features_set(int vid)
133 {
134         int did = rte_vhost_get_vdpa_device_id(vid);
135         struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
136         uint64_t log_base, log_size;
137         uint64_t features;
138         int ret;
139
140         if (priv == NULL) {
141                 DRV_LOG(ERR, "Invalid device id: %d.", did);
142                 return -EINVAL;
143         }
144         ret = rte_vhost_get_negotiated_features(vid, &features);
145         if (ret) {
146                 DRV_LOG(ERR, "Failed to get negotiated features.");
147                 return ret;
148         }
149         if (RTE_VHOST_NEED_LOG(features)) {
150                 ret = rte_vhost_get_log_base(vid, &log_base, &log_size);
151                 if (ret) {
152                         DRV_LOG(ERR, "Failed to get log base.");
153                         return ret;
154                 }
155                 ret = mlx5_vdpa_dirty_bitmap_set(priv, log_base, log_size);
156                 if (ret) {
157                         DRV_LOG(ERR, "Failed to set dirty bitmap.");
158                         return ret;
159                 }
160                 DRV_LOG(INFO, "mlx5 vdpa: enabling dirty logging...");
161                 ret = mlx5_vdpa_logging_enable(priv, 1);
162                 if (ret) {
163                         DRV_LOG(ERR, "Failed t enable dirty logging.");
164                         return ret;
165                 }
166         }
167         return 0;
168 }
169
170 static int
171 mlx5_vdpa_dev_close(int vid)
172 {
173         int did = rte_vhost_get_vdpa_device_id(vid);
174         struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
175         int ret = 0;
176
177         if (priv == NULL) {
178                 DRV_LOG(ERR, "Invalid device id: %d.", did);
179                 return -1;
180         }
181         if (priv->configured)
182                 ret |= mlx5_vdpa_lm_log(priv);
183         mlx5_vdpa_cqe_event_unset(priv);
184         ret |= mlx5_vdpa_steer_unset(priv);
185         mlx5_vdpa_virtqs_release(priv);
186         mlx5_vdpa_event_qp_global_release(priv);
187         mlx5_vdpa_mem_dereg(priv);
188         priv->configured = 0;
189         priv->vid = 0;
190         return ret;
191 }
192
193 static int
194 mlx5_vdpa_dev_config(int vid)
195 {
196         int did = rte_vhost_get_vdpa_device_id(vid);
197         struct mlx5_vdpa_priv *priv = mlx5_vdpa_find_priv_resource_by_did(did);
198
199         if (priv == NULL) {
200                 DRV_LOG(ERR, "Invalid device id: %d.", did);
201                 return -EINVAL;
202         }
203         if (priv->configured && mlx5_vdpa_dev_close(vid)) {
204                 DRV_LOG(ERR, "Failed to reconfigure vid %d.", vid);
205                 return -1;
206         }
207         priv->vid = vid;
208         if (mlx5_vdpa_mem_register(priv) || mlx5_vdpa_virtqs_prepare(priv) ||
209             mlx5_vdpa_steer_setup(priv) || mlx5_vdpa_cqe_event_setup(priv)) {
210                 mlx5_vdpa_dev_close(vid);
211                 return -1;
212         }
213         priv->configured = 1;
214         return 0;
215 }
216
217 static struct rte_vdpa_dev_ops mlx5_vdpa_ops = {
218         .get_queue_num = mlx5_vdpa_get_queue_num,
219         .get_features = mlx5_vdpa_get_vdpa_features,
220         .get_protocol_features = mlx5_vdpa_get_protocol_features,
221         .dev_conf = mlx5_vdpa_dev_config,
222         .dev_close = mlx5_vdpa_dev_close,
223         .set_vring_state = mlx5_vdpa_set_vring_state,
224         .set_features = mlx5_vdpa_features_set,
225         .migration_done = NULL,
226         .get_vfio_group_fd = NULL,
227         .get_vfio_device_fd = NULL,
228         .get_notify_area = NULL,
229 };
230
231 /**
232  * DPDK callback to register a PCI device.
233  *
234  * This function spawns vdpa device out of a given PCI device.
235  *
236  * @param[in] pci_drv
237  *   PCI driver structure (mlx5_vpda_driver).
238  * @param[in] pci_dev
239  *   PCI device information.
240  *
241  * @return
242  *   0 on success, 1 to skip this driver, a negative errno value otherwise
243  *   and rte_errno is set.
244  */
245 static int
246 mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
247                     struct rte_pci_device *pci_dev __rte_unused)
248 {
249         struct ibv_device **ibv_list;
250         struct ibv_device *ibv_match = NULL;
251         struct mlx5_vdpa_priv *priv = NULL;
252         struct ibv_context *ctx = NULL;
253         struct mlx5_hca_attr attr;
254         int ret;
255
256         if (mlx5_class_get(pci_dev->device.devargs) != MLX5_CLASS_VDPA) {
257                 DRV_LOG(DEBUG, "Skip probing - should be probed by other mlx5"
258                         " driver.");
259                 return 1;
260         }
261         errno = 0;
262         ibv_list = mlx5_glue->get_device_list(&ret);
263         if (!ibv_list) {
264                 rte_errno = ENOSYS;
265                 DRV_LOG(ERR, "Failed to get device list, is ib_uverbs loaded?");
266                 return -rte_errno;
267         }
268         while (ret-- > 0) {
269                 struct rte_pci_addr pci_addr;
270
271                 DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[ret]->name);
272                 if (mlx5_dev_to_pci_addr(ibv_list[ret]->ibdev_path, &pci_addr))
273                         continue;
274                 if (pci_dev->addr.domain != pci_addr.domain ||
275                     pci_dev->addr.bus != pci_addr.bus ||
276                     pci_dev->addr.devid != pci_addr.devid ||
277                     pci_dev->addr.function != pci_addr.function)
278                         continue;
279                 DRV_LOG(INFO, "PCI information matches for device \"%s\".",
280                         ibv_list[ret]->name);
281                 ibv_match = ibv_list[ret];
282                 break;
283         }
284         mlx5_glue->free_device_list(ibv_list);
285         if (!ibv_match) {
286                 DRV_LOG(ERR, "No matching IB device for PCI slot "
287                         "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 ".",
288                         pci_dev->addr.domain, pci_dev->addr.bus,
289                         pci_dev->addr.devid, pci_dev->addr.function);
290                 rte_errno = ENOENT;
291                 return -rte_errno;
292         }
293         ctx = mlx5_glue->dv_open_device(ibv_match);
294         if (!ctx) {
295                 DRV_LOG(ERR, "Failed to open IB device \"%s\".",
296                         ibv_match->name);
297                 rte_errno = ENODEV;
298                 return -rte_errno;
299         }
300         priv = rte_zmalloc("mlx5 vDPA device private", sizeof(*priv),
301                            RTE_CACHE_LINE_SIZE);
302         if (!priv) {
303                 DRV_LOG(ERR, "Failed to allocate private memory.");
304                 rte_errno = ENOMEM;
305                 goto error;
306         }
307         ret = mlx5_devx_cmd_query_hca_attr(ctx, &attr);
308         if (ret) {
309                 DRV_LOG(ERR, "Unable to read HCA capabilities.");
310                 rte_errno = ENOTSUP;
311                 goto error;
312         } else {
313                 if (!attr.vdpa.valid || !attr.vdpa.max_num_virtio_queues) {
314                         DRV_LOG(ERR, "Not enough capabilities to support vdpa,"
315                                 " maybe old FW/OFED version?");
316                         rte_errno = ENOTSUP;
317                         goto error;
318                 }
319                 priv->caps = attr.vdpa;
320                 priv->log_max_rqt_size = attr.log_max_rqt_size;
321         }
322         priv->ctx = ctx;
323         priv->dev_addr.pci_addr = pci_dev->addr;
324         priv->dev_addr.type = PCI_ADDR;
325         priv->id = rte_vdpa_register_device(&priv->dev_addr, &mlx5_vdpa_ops);
326         if (priv->id < 0) {
327                 DRV_LOG(ERR, "Failed to register vDPA device.");
328                 rte_errno = rte_errno ? rte_errno : EINVAL;
329                 goto error;
330         }
331         SLIST_INIT(&priv->mr_list);
332         SLIST_INIT(&priv->virtq_list);
333         pthread_mutex_lock(&priv_list_lock);
334         TAILQ_INSERT_TAIL(&priv_list, priv, next);
335         pthread_mutex_unlock(&priv_list_lock);
336         return 0;
337
338 error:
339         if (priv)
340                 rte_free(priv);
341         if (ctx)
342                 mlx5_glue->close_device(ctx);
343         return -rte_errno;
344 }
345
346 /**
347  * DPDK callback to remove a PCI device.
348  *
349  * This function removes all vDPA devices belong to a given PCI device.
350  *
351  * @param[in] pci_dev
352  *   Pointer to the PCI device.
353  *
354  * @return
355  *   0 on success, the function cannot fail.
356  */
357 static int
358 mlx5_vdpa_pci_remove(struct rte_pci_device *pci_dev)
359 {
360         struct mlx5_vdpa_priv *priv = NULL;
361         int found = 0;
362
363         pthread_mutex_lock(&priv_list_lock);
364         TAILQ_FOREACH(priv, &priv_list, next) {
365                 if (memcmp(&priv->dev_addr.pci_addr, &pci_dev->addr,
366                            sizeof(pci_dev->addr)) == 0) {
367                         found = 1;
368                         break;
369                 }
370         }
371         if (found)
372                 TAILQ_REMOVE(&priv_list, priv, next);
373         pthread_mutex_unlock(&priv_list_lock);
374         if (found) {
375                 if (priv->configured)
376                         mlx5_vdpa_dev_close(priv->vid);
377                 mlx5_glue->close_device(priv->ctx);
378                 rte_free(priv);
379         }
380         return 0;
381 }
382
383 static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = {
384         {
385                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
386                                PCI_DEVICE_ID_MELLANOX_CONNECTX5BF)
387         },
388         {
389                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
390                                PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF)
391         },
392         {
393                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
394                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6)
395         },
396         {
397                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
398                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF)
399         },
400         {
401                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
402                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX)
403         },
404         {
405                 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
406                                 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXVF)
407         },
408         {
409                 .vendor_id = 0
410         }
411 };
412
413 static struct rte_pci_driver mlx5_vdpa_driver = {
414         .driver = {
415                 .name = "mlx5_vdpa",
416         },
417         .id_table = mlx5_vdpa_pci_id_map,
418         .probe = mlx5_vdpa_pci_probe,
419         .remove = mlx5_vdpa_pci_remove,
420         .drv_flags = 0,
421 };
422
423 /**
424  * Driver initialization routine.
425  */
426 RTE_INIT(rte_mlx5_vdpa_init)
427 {
428         /* Initialize common log type. */
429         mlx5_vdpa_logtype = rte_log_register("pmd.vdpa.mlx5");
430         if (mlx5_vdpa_logtype >= 0)
431                 rte_log_set_level(mlx5_vdpa_logtype, RTE_LOG_NOTICE);
432         if (mlx5_glue)
433                 rte_pci_register(&mlx5_vdpa_driver);
434 }
435
436 RTE_PMD_EXPORT_NAME(net_mlx5_vdpa, __COUNTER__);
437 RTE_PMD_REGISTER_PCI_TABLE(net_mlx5_vdpa, mlx5_vdpa_pci_id_map);
438 RTE_PMD_REGISTER_KMOD_DEP(net_mlx5_vdpa, "* ib_uverbs & mlx5_core & mlx5_ib");