#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
+#include <stdbool.h>
#include <rte_log.h>
#include <rte_pci.h>
/* set PCI bus mastering */
static int
-pci_vfio_set_bus_master(int dev_fd)
+pci_vfio_set_bus_master(int dev_fd, bool op)
{
uint16_t reg;
int ret;
return -1;
}
- /* set the master bit */
- reg |= PCI_COMMAND_MASTER;
+ if (op)
+ /* set the master bit */
+ reg |= PCI_COMMAND_MASTER;
+ else
+ reg &= ~(PCI_COMMAND_MASTER);
ret = pwrite64(dev_fd, ®, sizeof(reg),
VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
}
/* set bus mastering for the device */
- if (pci_vfio_set_bus_master(vfio_dev_fd)) {
+ if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
RTE_LOG(ERR, EAL, " %s cannot set up bus mastering!\n", pci_addr);
close(vfio_dev_fd);
rte_free(vfio_res);
return 0;
}
+int
+pci_vfio_unmap_resource(struct rte_pci_device *dev)
+{
+ char pci_addr[PATH_MAX] = {0};
+ struct rte_pci_addr *loc = &dev->addr;
+ int i, ret;
+ struct mapped_pci_resource *vfio_res = NULL;
+ struct mapped_pci_res_list *vfio_res_list;
+
+ struct pci_map *maps;
+
+ /* store PCI address string */
+ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
+ loc->domain, loc->bus, loc->devid, loc->function);
+
+
+ if (close(dev->intr_handle.fd) < 0) {
+ RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n",
+ pci_addr);
+ return -1;
+ }
+
+ if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) {
+ RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n",
+ pci_addr);
+ return -1;
+ }
+
+ ret = vfio_release_device(pci_get_sysfs_path(), pci_addr,
+ dev->intr_handle.vfio_dev_fd);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL,
+ "%s(): cannot release device\n", __func__);
+ return ret;
+ }
+
+ vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
+ /* Get vfio_res */
+ TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
+ if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
+ continue;
+ break;
+ }
+ /* if we haven't found our tailq entry, something's wrong */
+ if (vfio_res == NULL) {
+ RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n",
+ pci_addr);
+ return -1;
+ }
+
+ /* unmap BARs */
+ maps = vfio_res->maps;
+
+ RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n",
+ pci_addr);
+ for (i = 0; i < (int) vfio_res->nb_maps; i++) {
+
+ /*
+ * We do not need to be aware of MSI-X table BAR mappings as
+ * when mapping. Just using current maps array is enough
+ */
+ if (maps[i].addr) {
+ RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n",
+ pci_addr, maps[i].addr);
+ pci_unmap_resource(maps[i].addr, maps[i].size);
+ }
+ }
+
+ TAILQ_REMOVE(vfio_res_list, vfio_res, next);
+
+ return 0;
+}
+
int
pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
struct rte_pci_ioport *p)
{
int i;
int vfio_group_fd;
+ int group_idx = -1;
char filename[PATH_MAX];
/* check if we already have the group descriptor open */
- for (i = 0; i < vfio_cfg.vfio_group_idx; i++)
+ for (i = 0; i < VFIO_MAX_GROUPS; i++)
if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no)
return vfio_cfg.vfio_groups[i].fd;
+ /* Lets see first if there is room for a new group */
+ if (vfio_cfg.vfio_active_groups == VFIO_MAX_GROUPS) {
+ RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
+ return -1;
+ }
+
+ /* Now lets get an index for the new group */
+ for (i = 0; i < VFIO_MAX_GROUPS; i++)
+ if (vfio_cfg.vfio_groups[i].group_no == -1) {
+ group_idx = i;
+ break;
+ }
+
+ /* This should not happen */
+ if (group_idx == -1) {
+ RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
+ return -1;
+ }
/* if primary, try to open the group */
if (internal_config.process_type == RTE_PROC_PRIMARY) {
/* try regular group format */
/* noiommu group found */
}
- /* if the fd is valid, create a new group for it */
- if (vfio_cfg.vfio_group_idx == VFIO_MAX_GROUPS) {
- RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
- close(vfio_group_fd);
- return -1;
- }
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no;
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd;
+ vfio_cfg.vfio_groups[group_idx].group_no = iommu_group_no;
+ vfio_cfg.vfio_groups[group_idx].fd = vfio_group_fd;
+ vfio_cfg.vfio_active_groups++;
return vfio_group_fd;
}
/* if we're in a secondary process, request group fd from the primary
return -1;
}
-static void
-clear_current_group(void)
+int
+clear_group(int vfio_group_fd)
{
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = 0;
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = -1;
+ int i;
+ int socket_fd, ret;
+
+ if (internal_config.process_type == RTE_PROC_PRIMARY) {
+
+ for (i = 0; i < VFIO_MAX_GROUPS; i++)
+ if (vfio_cfg.vfio_groups[i].fd == vfio_group_fd) {
+ vfio_cfg.vfio_groups[i].group_no = -1;
+ vfio_cfg.vfio_groups[i].fd = -1;
+ vfio_cfg.vfio_active_groups--;
+ return 0;
+ }
+ return -1;
+ }
+
+ /* This is just for SECONDARY processes */
+ socket_fd = vfio_mp_sync_connect_to_primary();
+
+ if (socket_fd < 0) {
+ RTE_LOG(ERR, EAL, " cannot connect to primary process!\n");
+ return -1;
+ }
+
+ if (vfio_mp_sync_send_request(socket_fd, SOCKET_CLR_GROUP) < 0) {
+ RTE_LOG(ERR, EAL, " cannot request container fd!\n");
+ close(socket_fd);
+ return -1;
+ }
+
+ if (vfio_mp_sync_send_request(socket_fd, vfio_group_fd) < 0) {
+ RTE_LOG(ERR, EAL, " cannot send group fd!\n");
+ close(socket_fd);
+ return -1;
+ }
+
+ ret = vfio_mp_sync_receive_request(socket_fd);
+ switch (ret) {
+ case SOCKET_NO_FD:
+ RTE_LOG(ERR, EAL, " BAD VFIO group fd!\n");
+ close(socket_fd);
+ break;
+ case SOCKET_OK:
+ close(socket_fd);
+ return 0;
+ case SOCKET_ERR:
+ RTE_LOG(ERR, EAL, " Socket error\n");
+ close(socket_fd);
+ break;
+ default:
+ RTE_LOG(ERR, EAL, " UNKNOWN reply, %d\n", ret);
+ close(socket_fd);
+ }
+ return -1;
}
-int vfio_setup_device(const char *sysfs_base, const char *dev_addr,
+int
+vfio_setup_device(const char *sysfs_base, const char *dev_addr,
int *vfio_dev_fd, struct vfio_device_info *device_info)
{
struct vfio_group_status group_status = {
if (vfio_group_fd < 0)
return -1;
- /* store group fd */
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no;
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd;
-
/* if group_fd == 0, that means the device isn't managed by VFIO */
if (vfio_group_fd == 0) {
- RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
+ RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
dev_addr);
- /* we store 0 as group fd to distinguish between existing but
- * unbound VFIO groups, and groups that don't exist at all.
- */
- vfio_cfg.vfio_group_idx++;
return 1;
}
RTE_LOG(ERR, EAL, " %s cannot get group status, "
"error %i (%s)\n", dev_addr, errno, strerror(errno));
close(vfio_group_fd);
- clear_current_group();
+ clear_group(vfio_group_fd);
return -1;
} else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
RTE_LOG(ERR, EAL, " %s VFIO group is not viable!\n", dev_addr);
close(vfio_group_fd);
- clear_current_group();
+ clear_group(vfio_group_fd);
return -1;
}
RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, "
"error %i (%s)\n", dev_addr, errno, strerror(errno));
close(vfio_group_fd);
- clear_current_group();
+ clear_group(vfio_group_fd);
return -1;
}
- /*
- * at this point we know that this group has been successfully
- * initialized, so we increment vfio_group_idx to indicate that we can
- * add new groups.
- */
- vfio_cfg.vfio_group_idx++;
}
/*
* pick an IOMMU type and set up DMA mappings for container
*
- * needs to be done only once, only when at least one group is assigned to
- * a container and only in primary process
+ * needs to be done only once, only when first group is assigned to
+ * a container and only in primary process. Note this can happen several
+ * times with the hotplug functionality.
*/
if (internal_config.process_type == RTE_PROC_PRIMARY &&
- vfio_cfg.vfio_container_has_dma == 0) {
+ vfio_cfg.vfio_active_groups == 1) {
/* select an IOMMU type which we will be using */
const struct vfio_iommu_type *t =
vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
if (!t) {
RTE_LOG(ERR, EAL, " %s failed to select IOMMU type\n", dev_addr);
+ close(vfio_group_fd);
+ clear_group(vfio_group_fd);
return -1;
}
ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
if (ret) {
RTE_LOG(ERR, EAL, " %s DMA remapping failed, "
"error %i (%s)\n", dev_addr, errno, strerror(errno));
+ close(vfio_group_fd);
+ clear_group(vfio_group_fd);
return -1;
}
- vfio_cfg.vfio_container_has_dma = 1;
}
/* get a file descriptor for the device */
*vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev_addr);
if (*vfio_dev_fd < 0) {
- /* if we cannot get a device fd, this simply means that this
- * particular port is not bound to VFIO
- */
- RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
+ /* if we cannot get a device fd, this implies a problem with
+ * the VFIO group or the container not having IOMMU configured.
+ */
+
+ RTE_LOG(WARNING, EAL, "Getting a vfio_dev_fd for %s failed\n",
dev_addr);
- return 1;
+ close(vfio_group_fd);
+ clear_group(vfio_group_fd);
+ return -1;
}
/* test and setup the device */
ret = ioctl(*vfio_dev_fd, VFIO_DEVICE_GET_INFO, device_info);
if (ret) {
RTE_LOG(ERR, EAL, " %s cannot get device info, "
- "error %i (%s)\n", dev_addr, errno, strerror(errno));
+ "error %i (%s)\n", dev_addr, errno,
+ strerror(errno));
close(*vfio_dev_fd);
+ close(vfio_group_fd);
+ clear_group(vfio_group_fd);
return -1;
}
return 0;
}
+int
+vfio_release_device(const char *sysfs_base, const char *dev_addr,
+ int vfio_dev_fd)
+{
+ struct vfio_group_status group_status = {
+ .argsz = sizeof(group_status)
+ };
+ int vfio_group_fd;
+ int iommu_group_no;
+ int ret;
+
+ /* get group number */
+ ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no);
+ if (ret <= 0) {
+ RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver\n",
+ dev_addr);
+ /* This is an error at this point. */
+ return -1;
+ }
+
+ /* get the actual group fd */
+ vfio_group_fd = vfio_get_group_fd(iommu_group_no);
+ if (vfio_group_fd <= 0) {
+ RTE_LOG(INFO, EAL, "vfio_get_group_fd failed for %s\n",
+ dev_addr);
+ return -1;
+ }
+
+ /* At this point we got an active group. Closing it will make the
+ * container detachment. If this is the last active group, VFIO kernel
+ * code will unset the container and the IOMMU mappings.
+ */
+
+ if (close(vfio_group_fd) < 0)
+ RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n",
+ dev_addr);
+
+ if (close(vfio_dev_fd) < 0)
+ RTE_LOG(INFO, EAL, "Error when closing vfio_dev_fd for %s\n",
+ dev_addr);
+
+ if (clear_group(vfio_group_fd) < 0)
+ RTE_LOG(INFO, EAL, "Error when clearing group for %s\n",
+ dev_addr);
+
+ return 0;
+}
+
int
vfio_enable(const char *modname)
{
if (ret) {
RTE_LOG(ERR, EAL, " cannot set up DMA remapping, "
- "error %i (%s)\n", errno, strerror(errno));
+ "error %i (%s)\n", errno,
+ strerror(errno));
return -1;
}
}