1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2019 Mellanox Technologies, Ltd
6 #include <sys/socket.h>
9 #include <netinet/in.h>
11 #include <rte_malloc.h>
13 #include <rte_errno.h>
14 #include <rte_string_fns.h>
15 #include <rte_bus_pci.h>
17 #include <mlx5_glue.h>
18 #include <mlx5_common.h>
19 #include <mlx5_common_defs.h>
20 #include <mlx5_devx_cmds.h>
24 #include "mlx5_vdpa_utils.h"
25 #include "mlx5_vdpa.h"
27 #define MLX5_VDPA_DRIVER_NAME vdpa_mlx5
29 #define MLX5_VDPA_DEFAULT_FEATURES ((1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
30 (1ULL << VIRTIO_F_ANY_LAYOUT) | \
31 (1ULL << VIRTIO_NET_F_MQ) | \
32 (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
33 (1ULL << VIRTIO_F_ORDER_PLATFORM) | \
34 (1ULL << VHOST_F_LOG_ALL) | \
35 (1ULL << VIRTIO_NET_F_MTU))
37 #define MLX5_VDPA_PROTOCOL_FEATURES \
38 ((1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
39 (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
40 (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
41 (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \
42 (1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
43 (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
44 (1ULL << VHOST_USER_PROTOCOL_F_STATUS))
46 #define MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX 16LLU
48 TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
49 TAILQ_HEAD_INITIALIZER(priv_list);
50 static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER;
52 static struct mlx5_vdpa_priv *
53 mlx5_vdpa_find_priv_resource_by_vdev(struct rte_vdpa_device *vdev)
55 struct mlx5_vdpa_priv *priv;
58 pthread_mutex_lock(&priv_list_lock);
59 TAILQ_FOREACH(priv, &priv_list, next) {
60 if (vdev == priv->vdev) {
65 pthread_mutex_unlock(&priv_list_lock);
67 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
75 mlx5_vdpa_get_queue_num(struct rte_vdpa_device *vdev, uint32_t *queue_num)
77 struct mlx5_vdpa_priv *priv =
78 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
81 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
84 *queue_num = priv->caps.max_num_virtio_queues;
89 mlx5_vdpa_get_vdpa_features(struct rte_vdpa_device *vdev, uint64_t *features)
91 struct mlx5_vdpa_priv *priv =
92 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
95 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
98 *features = MLX5_VDPA_DEFAULT_FEATURES;
99 if (priv->caps.virtio_queue_type & (1 << MLX5_VIRTQ_TYPE_PACKED))
100 *features |= (1ULL << VIRTIO_F_RING_PACKED);
101 if (priv->caps.tso_ipv4)
102 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO4);
103 if (priv->caps.tso_ipv6)
104 *features |= (1ULL << VIRTIO_NET_F_HOST_TSO6);
105 if (priv->caps.tx_csum)
106 *features |= (1ULL << VIRTIO_NET_F_CSUM);
107 if (priv->caps.rx_csum)
108 *features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
109 if (priv->caps.virtio_version_1_0)
110 *features |= (1ULL << VIRTIO_F_VERSION_1);
115 mlx5_vdpa_get_protocol_features(struct rte_vdpa_device *vdev,
118 struct mlx5_vdpa_priv *priv =
119 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
122 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
125 *features = MLX5_VDPA_PROTOCOL_FEATURES;
130 mlx5_vdpa_set_vring_state(int vid, int vring, int state)
132 struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
133 struct mlx5_vdpa_priv *priv =
134 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
138 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
141 if (vring >= (int)priv->caps.max_num_virtio_queues * 2) {
142 DRV_LOG(ERR, "Too big vring id: %d.", vring);
145 pthread_mutex_lock(&priv->vq_config_lock);
146 ret = mlx5_vdpa_virtq_enable(priv, vring, state);
147 pthread_mutex_unlock(&priv->vq_config_lock);
152 mlx5_vdpa_features_set(int vid)
154 struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
155 struct mlx5_vdpa_priv *priv =
156 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
157 uint64_t log_base, log_size;
162 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
165 ret = rte_vhost_get_negotiated_features(vid, &features);
167 DRV_LOG(ERR, "Failed to get negotiated features.");
170 if (RTE_VHOST_NEED_LOG(features)) {
171 ret = rte_vhost_get_log_base(vid, &log_base, &log_size);
173 DRV_LOG(ERR, "Failed to get log base.");
176 ret = mlx5_vdpa_dirty_bitmap_set(priv, log_base, log_size);
178 DRV_LOG(ERR, "Failed to set dirty bitmap.");
181 DRV_LOG(INFO, "mlx5 vdpa: enabling dirty logging...");
182 ret = mlx5_vdpa_logging_enable(priv, 1);
184 DRV_LOG(ERR, "Failed t enable dirty logging.");
192 mlx5_vdpa_mtu_set(struct mlx5_vdpa_priv *priv)
194 struct ifreq request;
195 uint16_t vhost_mtu = 0;
196 uint16_t kern_mtu = 0;
197 int ret = rte_vhost_get_mtu(priv->vid, &vhost_mtu);
199 int retries = MLX5_VDPA_MAX_RETRIES;
202 DRV_LOG(DEBUG, "Cannot get vhost MTU - %d.", ret);
206 DRV_LOG(DEBUG, "Vhost MTU is 0.");
209 ret = mlx5_get_ifname_sysfs
210 (mlx5_os_get_ctx_device_name(priv->cdev->ctx),
213 DRV_LOG(DEBUG, "Cannot get kernel IF name - %d.", ret);
216 sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
218 DRV_LOG(DEBUG, "Cannot open IF socket.");
222 ret = ioctl(sock, SIOCGIFMTU, &request);
225 kern_mtu = request.ifr_mtu;
226 DRV_LOG(DEBUG, "MTU: current %d requested %d.", (int)kern_mtu,
228 if (kern_mtu == vhost_mtu)
230 request.ifr_mtu = vhost_mtu;
231 ret = ioctl(sock, SIOCSIFMTU, &request);
235 usleep(MLX5_VDPA_USEC);
238 return kern_mtu == vhost_mtu ? 0 : -1;
242 mlx5_vdpa_dev_close(int vid)
244 struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
245 struct mlx5_vdpa_priv *priv =
246 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
250 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
253 mlx5_vdpa_err_event_unset(priv);
254 mlx5_vdpa_cqe_event_unset(priv);
255 if (priv->state == MLX5_VDPA_STATE_CONFIGURED) {
256 ret |= mlx5_vdpa_lm_log(priv);
257 priv->state = MLX5_VDPA_STATE_IN_PROGRESS;
259 mlx5_vdpa_steer_unset(priv);
260 mlx5_vdpa_virtqs_release(priv);
261 mlx5_vdpa_event_qp_global_release(priv);
262 mlx5_vdpa_mem_dereg(priv);
263 priv->state = MLX5_VDPA_STATE_PROBED;
265 /* The mutex may stay locked after event thread cancel - initiate it. */
266 pthread_mutex_init(&priv->vq_config_lock, NULL);
267 DRV_LOG(INFO, "vDPA device %d was closed.", vid);
272 mlx5_vdpa_dev_config(int vid)
274 struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
275 struct mlx5_vdpa_priv *priv =
276 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
279 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
282 if (priv->state == MLX5_VDPA_STATE_CONFIGURED &&
283 mlx5_vdpa_dev_close(vid)) {
284 DRV_LOG(ERR, "Failed to reconfigure vid %d.", vid);
288 if (mlx5_vdpa_mtu_set(priv))
289 DRV_LOG(WARNING, "MTU cannot be set on device %s.",
291 if (mlx5_vdpa_mem_register(priv) || mlx5_vdpa_err_event_setup(priv) ||
292 mlx5_vdpa_virtqs_prepare(priv) || mlx5_vdpa_steer_setup(priv) ||
293 mlx5_vdpa_cqe_event_setup(priv)) {
294 mlx5_vdpa_dev_close(vid);
297 priv->state = MLX5_VDPA_STATE_CONFIGURED;
298 DRV_LOG(INFO, "vDPA device %d was configured.", vid);
303 mlx5_vdpa_get_device_fd(int vid)
305 struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
306 struct mlx5_vdpa_priv *priv =
307 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
310 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
313 return ((struct ibv_context *)priv->cdev->ctx)->cmd_fd;
317 mlx5_vdpa_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size)
319 struct rte_vdpa_device *vdev = rte_vhost_get_vdpa_device(vid);
320 struct mlx5_vdpa_priv *priv =
321 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
325 DRV_LOG(ERR, "Invalid vDPA device: %s.", vdev->device->name);
329 DRV_LOG(ERR, "VAR was not created for device %s, is the device"
330 " configured?.", vdev->device->name);
333 *offset = priv->var->mmap_off;
334 *size = priv->var->length;
339 mlx5_vdpa_get_stats_names(struct rte_vdpa_device *vdev,
340 struct rte_vdpa_stat_name *stats_names,
343 static const char *mlx5_vdpa_stats_names[MLX5_VDPA_STATS_MAX] = {
344 "received_descriptors",
345 "completed_descriptors",
346 "bad descriptor errors",
351 struct mlx5_vdpa_priv *priv =
352 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
356 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
360 return MLX5_VDPA_STATS_MAX;
361 size = RTE_MIN(size, (unsigned int)MLX5_VDPA_STATS_MAX);
362 for (i = 0; i < size; ++i)
363 strlcpy(stats_names[i].name, mlx5_vdpa_stats_names[i],
364 RTE_VDPA_STATS_NAME_SIZE);
369 mlx5_vdpa_get_stats(struct rte_vdpa_device *vdev, int qid,
370 struct rte_vdpa_stat *stats, unsigned int n)
372 struct mlx5_vdpa_priv *priv =
373 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
376 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
379 if (priv->state == MLX5_VDPA_STATE_PROBED) {
380 DRV_LOG(ERR, "Device %s was not configured.",
384 if (qid >= (int)priv->nr_virtqs) {
385 DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
389 if (!priv->caps.queue_counters_valid) {
390 DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
394 return mlx5_vdpa_virtq_stats_get(priv, qid, stats, n);
398 mlx5_vdpa_reset_stats(struct rte_vdpa_device *vdev, int qid)
400 struct mlx5_vdpa_priv *priv =
401 mlx5_vdpa_find_priv_resource_by_vdev(vdev);
404 DRV_LOG(ERR, "Invalid device: %s.", vdev->device->name);
407 if (priv->state == MLX5_VDPA_STATE_PROBED) {
408 DRV_LOG(ERR, "Device %s was not configured.",
412 if (qid >= (int)priv->nr_virtqs) {
413 DRV_LOG(ERR, "Too big vring id: %d for device %s.", qid,
417 if (!priv->caps.queue_counters_valid) {
418 DRV_LOG(ERR, "Virtq statistics is not supported for device %s.",
422 return mlx5_vdpa_virtq_stats_reset(priv, qid);
425 static struct rte_vdpa_dev_ops mlx5_vdpa_ops = {
426 .get_queue_num = mlx5_vdpa_get_queue_num,
427 .get_features = mlx5_vdpa_get_vdpa_features,
428 .get_protocol_features = mlx5_vdpa_get_protocol_features,
429 .dev_conf = mlx5_vdpa_dev_config,
430 .dev_close = mlx5_vdpa_dev_close,
431 .set_vring_state = mlx5_vdpa_set_vring_state,
432 .set_features = mlx5_vdpa_features_set,
433 .migration_done = NULL,
434 .get_vfio_group_fd = NULL,
435 .get_vfio_device_fd = mlx5_vdpa_get_device_fd,
436 .get_notify_area = mlx5_vdpa_get_notify_area,
437 .get_stats_names = mlx5_vdpa_get_stats_names,
438 .get_stats = mlx5_vdpa_get_stats,
439 .reset_stats = mlx5_vdpa_reset_stats,
443 mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
445 struct mlx5_vdpa_priv *priv = opaque;
447 int n_cores = sysconf(_SC_NPROCESSORS_ONLN);
450 tmp = strtoul(val, NULL, 0);
452 DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val);
455 if (strcmp(key, "event_mode") == 0) {
456 if (tmp <= MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT)
457 priv->event_mode = (int)tmp;
459 DRV_LOG(WARNING, "Invalid event_mode %s.", val);
460 } else if (strcmp(key, "event_us") == 0) {
461 priv->event_us = (uint32_t)tmp;
462 } else if (strcmp(key, "no_traffic_time") == 0) {
463 priv->no_traffic_max = (uint32_t)tmp;
464 } else if (strcmp(key, "event_core") == 0) {
465 if (tmp >= (unsigned long)n_cores)
466 DRV_LOG(WARNING, "Invalid event_core %s.", val);
468 priv->event_core = tmp;
469 } else if (strcmp(key, "hw_latency_mode") == 0) {
470 priv->hw_latency_mode = (uint32_t)tmp;
471 } else if (strcmp(key, "hw_max_latency_us") == 0) {
472 priv->hw_max_latency_us = (uint32_t)tmp;
473 } else if (strcmp(key, "hw_max_pending_comp") == 0) {
474 priv->hw_max_pending_comp = (uint32_t)tmp;
480 mlx5_vdpa_config_get(struct mlx5_kvargs_ctrl *mkvlist,
481 struct mlx5_vdpa_priv *priv)
483 const char **params = (const char *[]){
489 "hw_max_pending_comp",
494 priv->event_mode = MLX5_VDPA_EVENT_MODE_FIXED_TIMER;
496 priv->event_core = -1;
497 priv->no_traffic_max = MLX5_VDPA_DEFAULT_NO_TRAFFIC_MAX;
500 mlx5_kvargs_process(mkvlist, params, mlx5_vdpa_args_check_handler,
502 if (!priv->event_us &&
503 priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER)
504 priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
505 DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
506 DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
507 DRV_LOG(DEBUG, "no traffic max is %u.", priv->no_traffic_max);
511 mlx5_vdpa_dev_probe(struct mlx5_common_device *cdev,
512 struct mlx5_kvargs_ctrl *mkvlist)
514 struct mlx5_vdpa_priv *priv = NULL;
515 struct mlx5_hca_attr *attr = &cdev->config.hca_attr;
518 if (!attr->vdpa.valid || !attr->vdpa.max_num_virtio_queues) {
519 DRV_LOG(ERR, "Not enough capabilities to support vdpa, maybe "
520 "old FW/OFED version?");
524 if (!attr->vdpa.queue_counters_valid)
525 DRV_LOG(DEBUG, "No capability to support virtq statistics.");
526 priv = rte_zmalloc("mlx5 vDPA device private", sizeof(*priv) +
527 sizeof(struct mlx5_vdpa_virtq) *
528 attr->vdpa.max_num_virtio_queues * 2,
529 RTE_CACHE_LINE_SIZE);
531 DRV_LOG(ERR, "Failed to allocate private memory.");
535 priv->caps = attr->vdpa;
536 priv->log_max_rqt_size = attr->log_max_rqt_size;
537 priv->num_lag_ports = attr->num_lag_ports;
538 if (attr->num_lag_ports == 0)
539 priv->num_lag_ports = 1;
541 for (retry = 0; retry < 7; retry++) {
542 priv->var = mlx5_glue->dv_alloc_var(priv->cdev->ctx, 0);
543 if (priv->var != NULL)
545 DRV_LOG(WARNING, "Failed to allocate VAR, retry %d.\n", retry);
546 /* Wait Qemu release VAR during vdpa restart, 0.1 sec based. */
547 usleep(100000U << retry);
550 DRV_LOG(ERR, "Failed to allocate VAR %u.", errno);
553 priv->err_intr_handle =
554 rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
555 if (priv->err_intr_handle == NULL) {
556 DRV_LOG(ERR, "Fail to allocate intr_handle");
559 priv->vdev = rte_vdpa_register_device(cdev->dev, &mlx5_vdpa_ops);
560 if (priv->vdev == NULL) {
561 DRV_LOG(ERR, "Failed to register vDPA device.");
562 rte_errno = rte_errno ? rte_errno : EINVAL;
565 mlx5_vdpa_config_get(mkvlist, priv);
566 SLIST_INIT(&priv->mr_list);
567 pthread_mutex_init(&priv->vq_config_lock, NULL);
568 pthread_mutex_lock(&priv_list_lock);
569 TAILQ_INSERT_TAIL(&priv_list, priv, next);
570 pthread_mutex_unlock(&priv_list_lock);
576 mlx5_glue->dv_free_var(priv->var);
577 rte_intr_instance_free(priv->err_intr_handle);
584 mlx5_vdpa_dev_remove(struct mlx5_common_device *cdev)
586 struct mlx5_vdpa_priv *priv = NULL;
589 pthread_mutex_lock(&priv_list_lock);
590 TAILQ_FOREACH(priv, &priv_list, next) {
591 if (priv->vdev->device == cdev->dev) {
597 TAILQ_REMOVE(&priv_list, priv, next);
598 pthread_mutex_unlock(&priv_list_lock);
600 if (priv->state == MLX5_VDPA_STATE_CONFIGURED)
601 mlx5_vdpa_dev_close(priv->vid);
603 mlx5_glue->dv_free_var(priv->var);
607 rte_vdpa_unregister_device(priv->vdev);
608 pthread_mutex_destroy(&priv->vq_config_lock);
609 rte_intr_instance_free(priv->err_intr_handle);
615 static const struct rte_pci_id mlx5_vdpa_pci_id_map[] = {
617 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
618 PCI_DEVICE_ID_MELLANOX_CONNECTX6)
621 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
622 PCI_DEVICE_ID_MELLANOX_CONNECTX6VF)
625 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
626 PCI_DEVICE_ID_MELLANOX_CONNECTX6DX)
629 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
630 PCI_DEVICE_ID_MELLANOX_CONNECTXVF)
633 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
634 PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF)
637 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
638 PCI_DEVICE_ID_MELLANOX_CONNECTX7)
641 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
642 PCI_DEVICE_ID_MELLANOX_CONNECTX7BF)
649 static struct mlx5_class_driver mlx5_vdpa_driver = {
650 .drv_class = MLX5_CLASS_VDPA,
651 .name = RTE_STR(MLX5_VDPA_DRIVER_NAME),
652 .id_table = mlx5_vdpa_pci_id_map,
653 .probe = mlx5_vdpa_dev_probe,
654 .remove = mlx5_vdpa_dev_remove,
657 RTE_LOG_REGISTER_DEFAULT(mlx5_vdpa_logtype, NOTICE)
660 * Driver initialization routine.
662 RTE_INIT(rte_mlx5_vdpa_init)
666 mlx5_class_driver_register(&mlx5_vdpa_driver);
669 RTE_PMD_EXPORT_NAME(MLX5_VDPA_DRIVER_NAME, __COUNTER__);
670 RTE_PMD_REGISTER_PCI_TABLE(MLX5_VDPA_DRIVER_NAME, mlx5_vdpa_pci_id_map);
671 RTE_PMD_REGISTER_KMOD_DEP(MLX5_VDPA_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib");