-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
*/
#include <string.h>
#include <rte_eal_memconfig.h>
#include <rte_malloc.h>
#include <rte_vfio.h>
+#include <rte_eal.h>
+#include <rte_bus.h>
#include "eal_filesystem.h"
return -1;
}
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+static void
+pci_vfio_req_handler(void *param)
+{
+ struct rte_bus *bus;
+ int ret;
+ struct rte_device *device = (struct rte_device *)param;
+
+ bus = rte_bus_find_by_device(device);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n",
+ device->name);
+ return;
+ }
+
+ /*
+ * vfio kernel module request user space to release allocated
+ * resources before device be deleted in kernel, so it can directly
+ * call the vfio bus hot-unplug handler to process it.
+ */
+ ret = bus->hot_unplug_handler(device);
+ if (ret)
+ RTE_LOG(ERR, EAL,
+ "Can not handle hot-unplug for device (%s)\n",
+ device->name);
+}
+
+/* enable notifier (only enable req now) */
+static int
+pci_vfio_enable_notifier(struct rte_pci_device *dev, int vfio_dev_fd)
+{
+ int ret;
+ int fd = -1;
+
+ /* set up an eventfd for req notifier */
+ fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Cannot set up eventfd, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ dev->vfio_req_intr_handle.fd = fd;
+ dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_VFIO_REQ;
+ dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd;
+
+ ret = rte_intr_callback_register(&dev->vfio_req_intr_handle,
+ pci_vfio_req_handler,
+ (void *)&dev->device);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Fail to register req notifier handler.\n");
+ goto error;
+ }
+
+ ret = rte_intr_enable(&dev->vfio_req_intr_handle);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Fail to enable req notifier.\n");
+ ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle,
+ pci_vfio_req_handler,
+ (void *)&dev->device);
+ if (ret)
+ RTE_LOG(ERR, EAL,
+ "Fail to unregister req notifier handler.\n");
+ goto error;
+ }
+
+ return 0;
+error:
+ close(fd);
+
+ dev->vfio_req_intr_handle.fd = -1;
+ dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ dev->vfio_req_intr_handle.vfio_dev_fd = -1;
+
+ return -1;
+}
+
+/* disable notifier (only disable req now) */
+static int
+pci_vfio_disable_notifier(struct rte_pci_device *dev)
+{
+ int ret;
+
+ ret = rte_intr_disable(&dev->vfio_req_intr_handle);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "fail to disable req notifier.\n");
+ return -1;
+ }
+
+ ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle,
+ pci_vfio_req_handler,
+ (void *)&dev->device);
+ if (ret) {
+ RTE_LOG(ERR, EAL,
+ "fail to unregister req notifier handler.\n");
+ return -1;
+ }
+
+ close(dev->vfio_req_intr_handle.fd);
+
+ dev->vfio_req_intr_handle.fd = -1;
+ dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ dev->vfio_req_intr_handle.vfio_dev_fd = -1;
+
+ return 0;
+}
+#endif
+
static int
pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
{
}
static int
-pci_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd)
+pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd)
{
if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) {
RTE_LOG(ERR, EAL, "Error setting up interrupts!\n");
return -1;
}
- /* Reset the device */
- if (ioctl(vfio_dev_fd, VFIO_DEVICE_RESET)) {
+ /*
+ * Reset the device. If the device is not capable of resetting,
+ * then it updates errno as EINVAL.
+ */
+ if (ioctl(vfio_dev_fd, VFIO_DEVICE_RESET) && errno != EINVAL) {
RTE_LOG(ERR, EAL, "Unable to reset device! Error: %d (%s)\n",
errno, strerror(errno));
return -1;
return 0;
}
+/*
+ * region info may contain capability headers, so we need to keep reallocating
+ * the memory until we match allocated memory size with argsz.
+ */
+static int
+pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info,
+ int region)
+{
+ struct vfio_region_info *ri;
+ size_t argsz = sizeof(*ri);
+ int ret;
+
+ ri = malloc(sizeof(*ri));
+ if (ri == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n");
+ return -1;
+ }
+again:
+ memset(ri, 0, argsz);
+ ri->argsz = argsz;
+ ri->index = region;
+
+ ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri);
+ if (ret < 0) {
+ free(ri);
+ return ret;
+ }
+ if (ri->argsz != argsz) {
+ struct vfio_region_info *tmp;
+
+ argsz = ri->argsz;
+ tmp = realloc(ri, argsz);
+
+ if (tmp == NULL) {
+ /* realloc failed but the ri is still there */
+ free(ri);
+ RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n");
+ return -1;
+ }
+ ri = tmp;
+ goto again;
+ }
+ *info = ri;
+
+ return 0;
+}
+
+static struct vfio_info_cap_header *
+pci_vfio_info_cap(struct vfio_region_info *info, int cap)
+{
+ struct vfio_info_cap_header *h;
+ size_t offset;
+
+ if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) {
+ /* VFIO info does not advertise capabilities */
+ return NULL;
+ }
+
+ offset = VFIO_CAP_OFFSET(info);
+ while (offset != 0) {
+ h = RTE_PTR_ADD(info, offset);
+ if (h->id == cap)
+ return h;
+ offset = h->next;
+ }
+ return NULL;
+}
+
+static int
+pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
+{
+ struct vfio_region_info *info;
+ int ret;
+
+ ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region);
+ if (ret < 0)
+ return -1;
+
+ ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL;
+
+ /* cleanup */
+ free(info);
+
+ return ret;
+}
+
+
static int
pci_vfio_map_resource_primary(struct rte_pci_device *dev)
{
struct pci_map *maps;
dev->intr_handle.fd = -1;
- dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ dev->vfio_req_intr_handle.fd = -1;
+#endif
/* store PCI address string */
snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
loc->domain, loc->bus, loc->devid, loc->function);
- ret = vfio_setup_device(pci_get_sysfs_path(), pci_addr,
+ ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr,
&vfio_dev_fd, &device_info);
if (ret)
return ret;
if (ret < 0) {
RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n",
pci_addr);
- goto err_vfio_dev_fd;
+ goto err_vfio_res;
+ }
+ /* if we found our MSI-X BAR region, check if we can mmap it */
+ if (vfio_res->msix_table.bar_index != -1) {
+ int ret = pci_vfio_msix_is_mappable(vfio_dev_fd,
+ vfio_res->msix_table.bar_index);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n");
+ goto err_vfio_res;
+ } else if (ret != 0) {
+ /* we can map it, so we don't care where it is */
+ RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n");
+ vfio_res->msix_table.bar_index = -1;
+ }
}
for (i = 0; i < (int) vfio_res->nb_maps; i++) {
- struct vfio_region_info reg = { .argsz = sizeof(reg) };
+ struct vfio_region_info *reg = NULL;
void *bar_addr;
- reg.index = i;
-
- ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®);
- if (ret) {
+ ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i);
+ if (ret < 0) {
RTE_LOG(ERR, EAL, " %s cannot get device region info "
- "error %i (%s)\n", pci_addr, errno, strerror(errno));
+ "error %i (%s)\n", pci_addr, errno,
+ strerror(errno));
goto err_vfio_res;
}
/* chk for io port region */
ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
- if (ret < 0)
+ if (ret < 0) {
+ free(reg);
goto err_vfio_res;
- else if (ret) {
+ } else if (ret) {
RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n",
i);
+ free(reg);
continue;
}
/* skip non-mmapable BARs */
- if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
+ if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) {
+ free(reg);
continue;
+ }
/* try mapping somewhere close to the end of hugepages */
if (pci_map_addr == NULL)
pci_map_addr = pci_find_max_end_va();
bar_addr = pci_map_addr;
- pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
+ pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size);
maps[i].addr = bar_addr;
- maps[i].offset = reg.offset;
- maps[i].size = reg.size;
+ maps[i].offset = reg->offset;
+ maps[i].size = reg->size;
maps[i].path = NULL; /* vfio doesn't have per-resource paths */
ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
if (ret < 0) {
RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n",
pci_addr, i, strerror(errno));
+ free(reg);
goto err_vfio_res;
}
dev->mem_resource[i].addr = maps[i].addr;
+
+ free(reg);
}
- if (pci_vfio_setup_device(dev, vfio_dev_fd) < 0) {
+ if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) {
RTE_LOG(ERR, EAL, " %s setup device failed\n", pci_addr);
goto err_vfio_res;
}
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ if (pci_vfio_enable_notifier(dev, vfio_dev_fd) != 0) {
+ RTE_LOG(ERR, EAL, "Error setting up notifier!\n");
+ goto err_vfio_res;
+ }
+
+#endif
TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);
return 0;
struct pci_map *maps;
dev->intr_handle.fd = -1;
- dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ dev->vfio_req_intr_handle.fd = -1;
+#endif
/* store PCI address string */
snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
loc->domain, loc->bus, loc->devid, loc->function);
- ret = vfio_setup_device(pci_get_sysfs_path(), pci_addr,
+ ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr,
&vfio_dev_fd, &device_info);
if (ret)
return ret;
/* if we're in a secondary process, just find our tailq entry */
TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
- if (pci_addr_cmp(&vfio_res->pci_addr,
+ if (rte_pci_addr_cmp(&vfio_res->pci_addr,
&dev->addr))
continue;
break;
dev->mem_resource[i].addr = maps[i].addr;
}
+ /* we need save vfio_dev_fd, so it can be used during release */
+ dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd;
+#endif
+
return 0;
err_vfio_dev_fd:
close(vfio_dev_fd);
return pci_vfio_map_resource_secondary(dev);
}
-int
-pci_vfio_unmap_resource(struct rte_pci_device *dev)
+static struct mapped_pci_resource *
+find_and_unmap_vfio_resource(struct mapped_pci_res_list *vfio_res_list,
+ struct rte_pci_device *dev,
+ const char *pci_addr)
+{
+ struct mapped_pci_resource *vfio_res = NULL;
+ struct pci_map *maps;
+ int i;
+
+ /* Get vfio_res */
+ TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
+ if (rte_pci_addr_cmp(&vfio_res->pci_addr, &dev->addr))
+ continue;
+ break;
+ }
+
+ if (vfio_res == NULL)
+ return vfio_res;
+
+ RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n",
+ pci_addr);
+
+ maps = vfio_res->maps;
+ for (i = 0; i < (int) vfio_res->nb_maps; i++) {
+
+ /*
+ * We do not need to be aware of MSI-X table BAR mappings as
+ * when mapping. Just using current maps array is enough
+ */
+ if (maps[i].addr) {
+ RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n",
+ pci_addr, maps[i].addr);
+ pci_unmap_resource(maps[i].addr, maps[i].size);
+ }
+ }
+
+ return vfio_res;
+}
+
+static int
+pci_vfio_unmap_resource_primary(struct rte_pci_device *dev)
{
char pci_addr[PATH_MAX] = {0};
struct rte_pci_addr *loc = &dev->addr;
- int i, ret;
struct mapped_pci_resource *vfio_res = NULL;
struct mapped_pci_res_list *vfio_res_list;
-
- struct pci_map *maps;
+ int ret;
/* store PCI address string */
snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
loc->domain, loc->bus, loc->devid, loc->function);
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+ ret = pci_vfio_disable_notifier(dev);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "fail to disable req notifier.\n");
+ return -1;
+ }
+#endif
if (close(dev->intr_handle.fd) < 0) {
RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n",
pci_addr);
return -1;
}
- ret = vfio_release_device(pci_get_sysfs_path(), pci_addr,
+ ret = rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr,
dev->intr_handle.vfio_dev_fd);
if (ret < 0) {
RTE_LOG(ERR, EAL,
return ret;
}
- vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
- /* Get vfio_res */
- TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
- if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
- continue;
- break;
- }
+ vfio_res_list =
+ RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
+ vfio_res = find_and_unmap_vfio_resource(vfio_res_list, dev, pci_addr);
+
/* if we haven't found our tailq entry, something's wrong */
if (vfio_res == NULL) {
RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n",
return -1;
}
- /* unmap BARs */
- maps = vfio_res->maps;
+ TAILQ_REMOVE(vfio_res_list, vfio_res, next);
- RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n",
- pci_addr);
- for (i = 0; i < (int) vfio_res->nb_maps; i++) {
+ return 0;
+}
- /*
- * We do not need to be aware of MSI-X table BAR mappings as
- * when mapping. Just using current maps array is enough
- */
- if (maps[i].addr) {
- RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n",
- pci_addr, maps[i].addr);
- pci_unmap_resource(maps[i].addr, maps[i].size);
- }
+static int
+pci_vfio_unmap_resource_secondary(struct rte_pci_device *dev)
+{
+ char pci_addr[PATH_MAX] = {0};
+ struct rte_pci_addr *loc = &dev->addr;
+ struct mapped_pci_resource *vfio_res = NULL;
+ struct mapped_pci_res_list *vfio_res_list;
+ int ret;
+
+ /* store PCI address string */
+ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
+ loc->domain, loc->bus, loc->devid, loc->function);
+
+ ret = rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr,
+ dev->intr_handle.vfio_dev_fd);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL,
+ "%s(): cannot release device\n", __func__);
+ return ret;
}
- TAILQ_REMOVE(vfio_res_list, vfio_res, next);
+ vfio_res_list =
+ RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
+ vfio_res = find_and_unmap_vfio_resource(vfio_res_list, dev, pci_addr);
+
+ /* if we haven't found our tailq entry, something's wrong */
+ if (vfio_res == NULL) {
+ RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n",
+ pci_addr);
+ return -1;
+ }
return 0;
}
+int
+pci_vfio_unmap_resource(struct rte_pci_device *dev)
+{
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ return pci_vfio_unmap_resource_primary(dev);
+ else
+ return pci_vfio_unmap_resource_secondary(dev);
+}
+
int
pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
struct rte_pci_ioport *p)
int
pci_vfio_is_enabled(void)
{
- return vfio_is_enabled("vfio_pci");
+ return rte_vfio_is_enabled("vfio_pci");
}
#endif