X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_eal%2Flinuxapp%2Feal%2Feal_vfio.c;h=58f0123e29aee6ee63fcf634a88d9303c838772b;hb=7ba49d39f14c;hp=6e2e84ca7d3b247eb7349dbcb52901691e32be0a;hpb=94c0776b1badd1ee715d60f07391058f23494365;p=dpdk.git diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 6e2e84ca7d..58f0123e29 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "eal_filesystem.h" #include "eal_vfio.h" @@ -68,8 +69,8 @@ vfio_get_group_fd(int iommu_group_no) { int i; int vfio_group_fd; - int group_idx = -1; char filename[PATH_MAX]; + struct vfio_group *cur_grp; /* check if we already have the group descriptor open */ for (i = 0; i < VFIO_MAX_GROUPS; i++) @@ -85,12 +86,12 @@ vfio_get_group_fd(int iommu_group_no) /* Now lets get an index for the new group */ for (i = 0; i < VFIO_MAX_GROUPS; i++) if (vfio_cfg.vfio_groups[i].group_no == -1) { - group_idx = i; + cur_grp = &vfio_cfg.vfio_groups[i]; break; } /* This should not happen */ - if (group_idx == -1) { + if (i == VFIO_MAX_GROUPS) { RTE_LOG(ERR, EAL, "No VFIO group free slot found\n"); return -1; } @@ -123,8 +124,8 @@ vfio_get_group_fd(int iommu_group_no) /* noiommu group found */ } - vfio_cfg.vfio_groups[group_idx].group_no = iommu_group_no; - vfio_cfg.vfio_groups[group_idx].fd = vfio_group_fd; + cur_grp->group_no = iommu_group_no; + cur_grp->fd = vfio_group_fd; vfio_cfg.vfio_active_groups++; return vfio_group_fd; } @@ -157,9 +158,12 @@ vfio_get_group_fd(int iommu_group_no) return 0; case SOCKET_OK: vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd); - /* if we got the fd, return it */ + /* if we got the fd, store it and return it */ if (vfio_group_fd > 0) { close(socket_fd); + cur_grp->group_no = iommu_group_no; + cur_grp->fd = vfio_group_fd; + vfio_cfg.vfio_active_groups++; return vfio_group_fd; } /* fall-through on error */ @@ -172,6 +176,55 @@ vfio_get_group_fd(int iommu_group_no) return -1; } + +static int +get_vfio_group_idx(int vfio_group_fd) +{ + int i; + for (i = 0; i < VFIO_MAX_GROUPS; i++) + if (vfio_cfg.vfio_groups[i].fd == vfio_group_fd) + return i; + return -1; +} + +static void +vfio_group_device_get(int vfio_group_fd) +{ + int i; + + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) + RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); + else + vfio_cfg.vfio_groups[i].devices++; +} + +static void +vfio_group_device_put(int vfio_group_fd) +{ + int i; + + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) + RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); + else + vfio_cfg.vfio_groups[i].devices--; +} + +static int +vfio_group_device_count(int vfio_group_fd) +{ + int i; + + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) { + RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); + return -1; + } + + return vfio_cfg.vfio_groups[i].devices; +} + int clear_group(int vfio_group_fd) { @@ -180,14 +233,14 @@ clear_group(int vfio_group_fd) if (internal_config.process_type == RTE_PROC_PRIMARY) { - for (i = 0; i < VFIO_MAX_GROUPS; i++) - if (vfio_cfg.vfio_groups[i].fd == vfio_group_fd) { - vfio_cfg.vfio_groups[i].group_no = -1; - vfio_cfg.vfio_groups[i].fd = -1; - vfio_cfg.vfio_active_groups--; - return 0; - } - return -1; + i = get_vfio_group_idx(vfio_group_fd); + if (i < 0) + return -1; + vfio_cfg.vfio_groups[i].group_no = -1; + vfio_cfg.vfio_groups[i].fd = -1; + vfio_cfg.vfio_groups[i].devices = 0; + vfio_cfg.vfio_active_groups--; + return 0; } /* This is just for SECONDARY processes */ @@ -231,7 +284,7 @@ clear_group(int vfio_group_fd) } int -vfio_setup_device(const char *sysfs_base, const char *dev_addr, +rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, int *vfio_dev_fd, struct vfio_device_info *device_info) { struct vfio_group_status group_status = { @@ -298,33 +351,37 @@ vfio_setup_device(const char *sysfs_base, const char *dev_addr, clear_group(vfio_group_fd); return -1; } - } - /* - * pick an IOMMU type and set up DMA mappings for container - * - * needs to be done only once, only when first group is assigned to - * a container and only in primary process. Note this can happen several - * times with the hotplug functionality. - */ - if (internal_config.process_type == RTE_PROC_PRIMARY && - vfio_cfg.vfio_active_groups == 1) { - /* select an IOMMU type which we will be using */ - const struct vfio_iommu_type *t = + /* + * pick an IOMMU type and set up DMA mappings for container + * + * needs to be done only once, only when first group is + * assigned to a container and only in primary process. + * Note this can happen several times with the hotplug + * functionality. + */ + if (internal_config.process_type == RTE_PROC_PRIMARY && + vfio_cfg.vfio_active_groups == 1) { + /* select an IOMMU type which we will be using */ + const struct vfio_iommu_type *t = vfio_set_iommu_type(vfio_cfg.vfio_container_fd); - if (!t) { - RTE_LOG(ERR, EAL, " %s failed to select IOMMU type\n", dev_addr); - close(vfio_group_fd); - clear_group(vfio_group_fd); - return -1; - } - ret = t->dma_map_func(vfio_cfg.vfio_container_fd); - if (ret) { - RTE_LOG(ERR, EAL, " %s DMA remapping failed, " - "error %i (%s)\n", dev_addr, errno, strerror(errno)); - close(vfio_group_fd); - clear_group(vfio_group_fd); - return -1; + if (!t) { + RTE_LOG(ERR, EAL, + " %s failed to select IOMMU type\n", + dev_addr); + close(vfio_group_fd); + clear_group(vfio_group_fd); + return -1; + } + ret = t->dma_map_func(vfio_cfg.vfio_container_fd); + if (ret) { + RTE_LOG(ERR, EAL, + " %s DMA remapping failed, error %i (%s)\n", + dev_addr, errno, strerror(errno)); + close(vfio_group_fd); + clear_group(vfio_group_fd); + return -1; + } } } @@ -353,12 +410,13 @@ vfio_setup_device(const char *sysfs_base, const char *dev_addr, clear_group(vfio_group_fd); return -1; } + vfio_group_device_get(vfio_group_fd); return 0; } int -vfio_release_device(const char *sysfs_base, const char *dev_addr, +rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int vfio_dev_fd) { struct vfio_group_status group_status = { @@ -390,23 +448,37 @@ vfio_release_device(const char *sysfs_base, const char *dev_addr, * code will unset the container and the IOMMU mappings. */ - if (close(vfio_group_fd) < 0) - RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n", - dev_addr); - - if (close(vfio_dev_fd) < 0) + /* Closing a device */ + if (close(vfio_dev_fd) < 0) { RTE_LOG(INFO, EAL, "Error when closing vfio_dev_fd for %s\n", dev_addr); + return -1; + } - if (clear_group(vfio_group_fd) < 0) - RTE_LOG(INFO, EAL, "Error when clearing group for %s\n", - dev_addr); + /* An VFIO group can have several devices attached. Just when there is + * no devices remaining should the group be closed. + */ + vfio_group_device_put(vfio_group_fd); + if (!vfio_group_device_count(vfio_group_fd)) { + + if (close(vfio_group_fd) < 0) { + RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n", + dev_addr); + return -1; + } + + if (clear_group(vfio_group_fd) < 0) { + RTE_LOG(INFO, EAL, "Error when clearing group for %s\n", + dev_addr); + return -1; + } + } return 0; } int -vfio_enable(const char *modname) +rte_vfio_enable(const char *modname) { /* initialize group list */ int i; @@ -415,12 +487,13 @@ vfio_enable(const char *modname) for (i = 0; i < VFIO_MAX_GROUPS; i++) { vfio_cfg.vfio_groups[i].fd = -1; vfio_cfg.vfio_groups[i].group_no = -1; + vfio_cfg.vfio_groups[i].devices = 0; } /* inform the user that we are probing for VFIO */ RTE_LOG(INFO, EAL, "Probing VFIO support...\n"); - /* check if vfio-pci module is loaded */ + /* check if vfio module is loaded */ vfio_available = rte_eal_check_module(modname); /* return error directly */ @@ -450,7 +523,7 @@ vfio_enable(const char *modname) } int -vfio_is_enabled(const char *modname) +rte_vfio_is_enabled(const char *modname) { const int mod_available = rte_eal_check_module(modname); return vfio_cfg.vfio_enabled && mod_available; @@ -637,7 +710,10 @@ vfio_type1_dma_map(int vfio_container_fd) dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); dma_map.vaddr = ms[i].addr_64; dma_map.size = ms[i].len; - dma_map.iova = ms[i].phys_addr; + if (rte_eal_iova_mode() == RTE_IOVA_VA) + dma_map.iova = dma_map.vaddr; + else + dma_map.iova = ms[i].iova; dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); @@ -690,10 +766,19 @@ vfio_spapr_dma_map(int vfio_container_fd) return -1; } - /* calculate window size based on number of hugepages configured */ - create.window_size = rte_eal_get_physmem_size(); + /* create DMA window from 0 to max(phys_addr + len) */ + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + if (ms[i].addr == NULL) + break; + + create.window_size = RTE_MAX(create.window_size, + ms[i].iova + ms[i].len); + } + + /* sPAPR requires window size to be a power of 2 */ + create.window_size = rte_align64pow2(create.window_size); create.page_shift = __builtin_ctzll(ms->hugepage_sz); - create.levels = 2; + create.levels = 1; ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create); if (ret) { @@ -702,6 +787,11 @@ vfio_spapr_dma_map(int vfio_container_fd) return -1; } + if (create.start_addr != 0) { + RTE_LOG(ERR, EAL, " DMA window start address != 0\n"); + return -1; + } + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ for (i = 0; i < RTE_MAX_MEMSEG; i++) { struct vfio_iommu_type1_dma_map dma_map; @@ -723,7 +813,10 @@ vfio_spapr_dma_map(int vfio_container_fd) dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); dma_map.vaddr = ms[i].addr_64; dma_map.size = ms[i].len; - dma_map.iova = ms[i].phys_addr; + if (rte_eal_iova_mode() == RTE_IOVA_VA) + dma_map.iova = dma_map.vaddr; + else + dma_map.iova = ms[i].iova; dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; @@ -747,4 +840,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd) return 0; } +int +rte_vfio_noiommu_is_enabled(void) +{ + int fd, ret, cnt __rte_unused; + char c; + + ret = -1; + fd = open(VFIO_NOIOMMU_MODE, O_RDONLY); + if (fd < 0) + return -1; + + cnt = read(fd, &c, 1); + if (c == 'Y') + ret = 1; + + close(fd); + return ret; +} + #endif