1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Intel Corporation
11 #include <rte_malloc.h>
12 #include <rte_memory.h>
13 #include <rte_bus_pci.h>
14 #include <rte_vhost.h>
17 #include <rte_spinlock.h>
20 #include "base/ifcvf.h"
22 #define DRV_LOG(level, fmt, args...) \
23 rte_log(RTE_LOG_ ## level, ifcvf_vdpa_logtype, \
24 "%s(): " fmt "\n", __func__, ##args)
27 #define PAGE_SIZE 4096
30 static int ifcvf_vdpa_logtype;
32 struct ifcvf_internal {
33 struct rte_vdpa_dev_addr dev_addr;
34 struct rte_pci_device *pdev;
36 int vfio_container_fd;
39 pthread_t tid; /* thread for notify relay */
45 rte_atomic32_t started;
46 rte_atomic32_t dev_attached;
47 rte_atomic32_t running;
51 struct internal_list {
52 TAILQ_ENTRY(internal_list) next;
53 struct ifcvf_internal *internal;
56 TAILQ_HEAD(internal_list_head, internal_list);
57 static struct internal_list_head internal_list =
58 TAILQ_HEAD_INITIALIZER(internal_list);
60 static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER;
62 static struct internal_list *
63 find_internal_resource_by_did(int did)
66 struct internal_list *list;
68 pthread_mutex_lock(&internal_list_lock);
70 TAILQ_FOREACH(list, &internal_list, next) {
71 if (did == list->internal->did) {
77 pthread_mutex_unlock(&internal_list_lock);
85 static struct internal_list *
86 find_internal_resource_by_dev(struct rte_pci_device *pdev)
89 struct internal_list *list;
91 pthread_mutex_lock(&internal_list_lock);
93 TAILQ_FOREACH(list, &internal_list, next) {
94 if (pdev == list->internal->pdev) {
100 pthread_mutex_unlock(&internal_list_lock);
109 ifcvf_vfio_setup(struct ifcvf_internal *internal)
111 struct rte_pci_device *dev = internal->pdev;
112 char devname[RTE_DEV_NAME_MAX_LEN] = {0};
117 internal->vfio_dev_fd = -1;
118 internal->vfio_group_fd = -1;
119 internal->vfio_container_fd = -1;
121 rte_pci_device_name(&dev->addr, devname, RTE_DEV_NAME_MAX_LEN);
122 rte_vfio_get_group_num(rte_pci_get_sysfs_path(), devname,
125 internal->vfio_container_fd = rte_vfio_container_create();
126 if (internal->vfio_container_fd < 0)
129 internal->vfio_group_fd = rte_vfio_container_group_bind(
130 internal->vfio_container_fd, iommu_group_num);
131 if (internal->vfio_group_fd < 0)
134 if (rte_pci_map_device(dev))
137 internal->vfio_dev_fd = dev->intr_handle.vfio_dev_fd;
139 for (i = 0; i < RTE_MIN(PCI_MAX_RESOURCE, IFCVF_PCI_MAX_RESOURCE);
141 internal->hw.mem_resource[i].addr =
142 internal->pdev->mem_resource[i].addr;
143 internal->hw.mem_resource[i].phys_addr =
144 internal->pdev->mem_resource[i].phys_addr;
145 internal->hw.mem_resource[i].len =
146 internal->pdev->mem_resource[i].len;
148 ret = ifcvf_init_hw(&internal->hw, internal->pdev);
153 rte_vfio_container_destroy(internal->vfio_container_fd);
158 ifcvf_dma_map(struct ifcvf_internal *internal, int do_map)
162 struct rte_vhost_memory *mem = NULL;
163 int vfio_container_fd;
165 ret = rte_vhost_get_mem_table(internal->vid, &mem);
167 DRV_LOG(ERR, "failed to get VM memory layout.");
171 vfio_container_fd = internal->vfio_container_fd;
173 for (i = 0; i < mem->nregions; i++) {
174 struct rte_vhost_mem_region *reg;
176 reg = &mem->regions[i];
177 DRV_LOG(INFO, "%s, region %u: HVA 0x%" PRIx64 ", "
178 "GPA 0x%" PRIx64 ", size 0x%" PRIx64 ".",
179 do_map ? "DMA map" : "DMA unmap", i,
180 reg->host_user_addr, reg->guest_phys_addr, reg->size);
183 ret = rte_vfio_container_dma_map(vfio_container_fd,
184 reg->host_user_addr, reg->guest_phys_addr,
187 DRV_LOG(ERR, "DMA map failed.");
191 ret = rte_vfio_container_dma_unmap(vfio_container_fd,
192 reg->host_user_addr, reg->guest_phys_addr,
195 DRV_LOG(ERR, "DMA unmap failed.");
208 qva_to_gpa(int vid, uint64_t qva)
210 struct rte_vhost_memory *mem = NULL;
211 struct rte_vhost_mem_region *reg;
215 if (rte_vhost_get_mem_table(vid, &mem) < 0)
218 for (i = 0; i < mem->nregions; i++) {
219 reg = &mem->regions[i];
221 if (qva >= reg->host_user_addr &&
222 qva < reg->host_user_addr + reg->size) {
223 gpa = qva - reg->host_user_addr + reg->guest_phys_addr;
235 vdpa_ifcvf_start(struct ifcvf_internal *internal)
237 struct ifcvf_hw *hw = &internal->hw;
240 struct rte_vhost_vring vq;
244 nr_vring = rte_vhost_get_vring_num(vid);
245 rte_vhost_get_negotiated_features(vid, &hw->req_features);
247 for (i = 0; i < nr_vring; i++) {
248 rte_vhost_get_vhost_vring(vid, i, &vq);
249 gpa = qva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc);
251 DRV_LOG(ERR, "Fail to get GPA for descriptor ring.");
254 hw->vring[i].desc = gpa;
256 gpa = qva_to_gpa(vid, (uint64_t)(uintptr_t)vq.avail);
258 DRV_LOG(ERR, "Fail to get GPA for available ring.");
261 hw->vring[i].avail = gpa;
263 gpa = qva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
265 DRV_LOG(ERR, "Fail to get GPA for used ring.");
268 hw->vring[i].used = gpa;
270 hw->vring[i].size = vq.size;
271 rte_vhost_get_vring_base(vid, i, &hw->vring[i].last_avail_idx,
272 &hw->vring[i].last_used_idx);
276 return ifcvf_start_hw(&internal->hw);
280 vdpa_ifcvf_stop(struct ifcvf_internal *internal)
282 struct ifcvf_hw *hw = &internal->hw;
289 for (i = 0; i < hw->nr_vring; i++)
290 rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
291 hw->vring[i].last_used_idx);
294 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
295 sizeof(int) * (IFCVF_MAX_QUEUES * 2 + 1))
297 vdpa_enable_vfio_intr(struct ifcvf_internal *internal)
300 uint32_t i, nr_vring;
301 char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
302 struct vfio_irq_set *irq_set;
304 struct rte_vhost_vring vring;
306 nr_vring = rte_vhost_get_vring_num(internal->vid);
308 irq_set = (struct vfio_irq_set *)irq_set_buf;
309 irq_set->argsz = sizeof(irq_set_buf);
310 irq_set->count = nr_vring + 1;
311 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
312 VFIO_IRQ_SET_ACTION_TRIGGER;
313 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
315 fd_ptr = (int *)&irq_set->data;
316 fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = internal->pdev->intr_handle.fd;
318 for (i = 0; i < nr_vring; i++) {
319 rte_vhost_get_vhost_vring(internal->vid, i, &vring);
320 fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
323 ret = ioctl(internal->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
325 DRV_LOG(ERR, "Error enabling MSI-X interrupts: %s",
334 vdpa_disable_vfio_intr(struct ifcvf_internal *internal)
337 char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
338 struct vfio_irq_set *irq_set;
340 irq_set = (struct vfio_irq_set *)irq_set_buf;
341 irq_set->argsz = sizeof(irq_set_buf);
343 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
344 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
347 ret = ioctl(internal->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
349 DRV_LOG(ERR, "Error disabling MSI-X interrupts: %s",
358 notify_relay(void *arg)
360 int i, kickfd, epfd, nfds = 0;
362 struct epoll_event events[IFCVF_MAX_QUEUES * 2];
363 struct epoll_event ev;
366 struct rte_vhost_vring vring;
367 struct ifcvf_internal *internal = (struct ifcvf_internal *)arg;
368 struct ifcvf_hw *hw = &internal->hw;
370 q_num = rte_vhost_get_vring_num(internal->vid);
372 epfd = epoll_create(IFCVF_MAX_QUEUES * 2);
374 DRV_LOG(ERR, "failed to create epoll instance.");
377 internal->epfd = epfd;
379 for (qid = 0; qid < q_num; qid++) {
380 ev.events = EPOLLIN | EPOLLPRI;
381 rte_vhost_get_vhost_vring(internal->vid, qid, &vring);
382 ev.data.u64 = qid | (uint64_t)vring.kickfd << 32;
383 if (epoll_ctl(epfd, EPOLL_CTL_ADD, vring.kickfd, &ev) < 0) {
384 DRV_LOG(ERR, "epoll add error: %s", strerror(errno));
390 nfds = epoll_wait(epfd, events, q_num, -1);
394 DRV_LOG(ERR, "epoll_wait return fail\n");
398 for (i = 0; i < nfds; i++) {
399 qid = events[i].data.u32;
400 kickfd = (uint32_t)(events[i].data.u64 >> 32);
402 nbytes = read(kickfd, &buf, 8);
404 if (errno == EINTR ||
405 errno == EWOULDBLOCK ||
408 DRV_LOG(INFO, "Error reading "
415 ifcvf_notify_queue(hw, qid);
423 setup_notify_relay(struct ifcvf_internal *internal)
427 ret = pthread_create(&internal->tid, NULL, notify_relay,
430 DRV_LOG(ERR, "failed to create notify relay pthread.");
437 unset_notify_relay(struct ifcvf_internal *internal)
442 pthread_cancel(internal->tid);
443 pthread_join(internal->tid, &status);
447 if (internal->epfd >= 0)
448 close(internal->epfd);
455 update_datapath(struct ifcvf_internal *internal)
459 rte_spinlock_lock(&internal->lock);
461 if (!rte_atomic32_read(&internal->running) &&
462 (rte_atomic32_read(&internal->started) &&
463 rte_atomic32_read(&internal->dev_attached))) {
464 ret = ifcvf_dma_map(internal, 1);
468 ret = vdpa_enable_vfio_intr(internal);
472 ret = setup_notify_relay(internal);
476 ret = vdpa_ifcvf_start(internal);
480 rte_atomic32_set(&internal->running, 1);
481 } else if (rte_atomic32_read(&internal->running) &&
482 (!rte_atomic32_read(&internal->started) ||
483 !rte_atomic32_read(&internal->dev_attached))) {
484 vdpa_ifcvf_stop(internal);
486 ret = unset_notify_relay(internal);
490 ret = vdpa_disable_vfio_intr(internal);
494 ret = ifcvf_dma_map(internal, 0);
498 rte_atomic32_set(&internal->running, 0);
501 rte_spinlock_unlock(&internal->lock);
504 rte_spinlock_unlock(&internal->lock);
509 ifcvf_dev_config(int vid)
512 struct internal_list *list;
513 struct ifcvf_internal *internal;
515 did = rte_vhost_get_vdpa_device_id(vid);
516 list = find_internal_resource_by_did(did);
518 DRV_LOG(ERR, "Invalid device id: %d", did);
522 internal = list->internal;
524 rte_atomic32_set(&internal->dev_attached, 1);
525 update_datapath(internal);
531 ifcvf_dev_close(int vid)
534 struct internal_list *list;
535 struct ifcvf_internal *internal;
537 did = rte_vhost_get_vdpa_device_id(vid);
538 list = find_internal_resource_by_did(did);
540 DRV_LOG(ERR, "Invalid device id: %d", did);
544 internal = list->internal;
545 rte_atomic32_set(&internal->dev_attached, 0);
546 update_datapath(internal);
552 ifcvf_get_vfio_group_fd(int vid)
555 struct internal_list *list;
557 did = rte_vhost_get_vdpa_device_id(vid);
558 list = find_internal_resource_by_did(did);
560 DRV_LOG(ERR, "Invalid device id: %d", did);
564 return list->internal->vfio_group_fd;
568 ifcvf_get_vfio_device_fd(int vid)
571 struct internal_list *list;
573 did = rte_vhost_get_vdpa_device_id(vid);
574 list = find_internal_resource_by_did(did);
576 DRV_LOG(ERR, "Invalid device id: %d", did);
580 return list->internal->vfio_dev_fd;
584 ifcvf_get_notify_area(int vid, int qid, uint64_t *offset, uint64_t *size)
587 struct internal_list *list;
588 struct ifcvf_internal *internal;
589 struct vfio_region_info reg = { .argsz = sizeof(reg) };
592 did = rte_vhost_get_vdpa_device_id(vid);
593 list = find_internal_resource_by_did(did);
595 DRV_LOG(ERR, "Invalid device id: %d", did);
599 internal = list->internal;
601 reg.index = ifcvf_get_notify_region(&internal->hw);
602 ret = ioctl(internal->vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®);
604 DRV_LOG(ERR, "Get not get device region info: %s",
609 *offset = ifcvf_get_queue_notify_off(&internal->hw, qid) + reg.offset;
616 ifcvf_get_queue_num(int did, uint32_t *queue_num)
618 struct internal_list *list;
620 list = find_internal_resource_by_did(did);
622 DRV_LOG(ERR, "Invalid device id: %d", did);
626 *queue_num = list->internal->max_queues;
632 ifcvf_get_vdpa_features(int did, uint64_t *features)
634 struct internal_list *list;
636 list = find_internal_resource_by_did(did);
638 DRV_LOG(ERR, "Invalid device id: %d", did);
642 *features = list->internal->features;
647 #define VDPA_SUPPORTED_PROTOCOL_FEATURES \
648 (1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK | \
649 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD)
651 ifcvf_get_protocol_features(int did __rte_unused, uint64_t *features)
653 *features = VDPA_SUPPORTED_PROTOCOL_FEATURES;
657 struct rte_vdpa_dev_ops ifcvf_ops = {
658 .get_queue_num = ifcvf_get_queue_num,
659 .get_features = ifcvf_get_vdpa_features,
660 .get_protocol_features = ifcvf_get_protocol_features,
661 .dev_conf = ifcvf_dev_config,
662 .dev_close = ifcvf_dev_close,
663 .set_vring_state = NULL,
664 .set_features = NULL,
665 .migration_done = NULL,
666 .get_vfio_group_fd = ifcvf_get_vfio_group_fd,
667 .get_vfio_device_fd = ifcvf_get_vfio_device_fd,
668 .get_notify_area = ifcvf_get_notify_area,
672 ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
673 struct rte_pci_device *pci_dev)
676 struct ifcvf_internal *internal = NULL;
677 struct internal_list *list = NULL;
679 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
682 list = rte_zmalloc("ifcvf", sizeof(*list), 0);
686 internal = rte_zmalloc("ifcvf", sizeof(*internal), 0);
687 if (internal == NULL)
690 internal->pdev = pci_dev;
691 rte_spinlock_init(&internal->lock);
692 if (ifcvf_vfio_setup(internal) < 0)
695 internal->max_queues = IFCVF_MAX_QUEUES;
696 features = ifcvf_get_features(&internal->hw);
697 internal->features = (features &
698 ~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
699 (1ULL << VHOST_USER_F_PROTOCOL_FEATURES);
701 internal->dev_addr.pci_addr = pci_dev->addr;
702 internal->dev_addr.type = PCI_ADDR;
703 list->internal = internal;
705 pthread_mutex_lock(&internal_list_lock);
706 TAILQ_INSERT_TAIL(&internal_list, list, next);
707 pthread_mutex_unlock(&internal_list_lock);
709 internal->did = rte_vdpa_register_device(&internal->dev_addr,
711 if (internal->did < 0)
714 rte_atomic32_set(&internal->started, 1);
715 update_datapath(internal);
726 ifcvf_pci_remove(struct rte_pci_device *pci_dev)
728 struct ifcvf_internal *internal;
729 struct internal_list *list;
731 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
734 list = find_internal_resource_by_dev(pci_dev);
736 DRV_LOG(ERR, "Invalid device: %s", pci_dev->name);
740 internal = list->internal;
741 rte_atomic32_set(&internal->started, 0);
742 update_datapath(internal);
744 rte_pci_unmap_device(internal->pdev);
745 rte_vfio_container_destroy(internal->vfio_container_fd);
746 rte_vdpa_unregister_device(internal->did);
748 pthread_mutex_lock(&internal_list_lock);
749 TAILQ_REMOVE(&internal_list, list, next);
750 pthread_mutex_unlock(&internal_list_lock);
759 * IFCVF has the same vendor ID and device ID as virtio net PCI
760 * device, with its specific subsystem vendor ID and device ID.
762 static const struct rte_pci_id pci_id_ifcvf_map[] = {
763 { .class_id = RTE_CLASS_ANY_ID,
764 .vendor_id = IFCVF_VENDOR_ID,
765 .device_id = IFCVF_DEVICE_ID,
766 .subsystem_vendor_id = IFCVF_SUBSYS_VENDOR_ID,
767 .subsystem_device_id = IFCVF_SUBSYS_DEVICE_ID,
770 { .vendor_id = 0, /* sentinel */
774 static struct rte_pci_driver rte_ifcvf_vdpa = {
775 .id_table = pci_id_ifcvf_map,
777 .probe = ifcvf_pci_probe,
778 .remove = ifcvf_pci_remove,
781 RTE_PMD_REGISTER_PCI(net_ifcvf, rte_ifcvf_vdpa);
782 RTE_PMD_REGISTER_PCI_TABLE(net_ifcvf, pci_id_ifcvf_map);
783 RTE_PMD_REGISTER_KMOD_DEP(net_ifcvf, "* vfio-pci");
785 RTE_INIT(ifcvf_vdpa_init_log);
787 ifcvf_vdpa_init_log(void)
789 ifcvf_vdpa_logtype = rte_log_register("pmd.net.ifcvf_vdpa");
790 if (ifcvf_vdpa_logtype >= 0)
791 rte_log_set_level(ifcvf_vdpa_logtype, RTE_LOG_NOTICE);