From ea2dc1066870906fba326837d2efe8a37a99eebe Mon Sep 17 00:00:00 2001 From: Xiao Wang Date: Tue, 17 Apr 2018 15:06:21 +0800 Subject: [PATCH] vfio: add multi container support This patch adds APIs to support container create/destroy and device bind/unbind with a container. It also provides API for IOMMU programing on a specified container. A driver could use "rte_vfio_container_create" helper to create a new container from eal, use "rte_vfio_container_group_bind" to bind a device to the newly created container. During rte_vfio_setup_device the container bound with the device will be used for IOMMU setup. Signed-off-by: Junjie Chen Signed-off-by: Xiao Wang Reviewed-by: Maxime Coquelin Reviewed-by: Ferruh Yigit Acked-by: Anatoly Burakov --- lib/librte_eal/bsdapp/eal/eal.c | 44 ++++ lib/librte_eal/common/include/rte_vfio.h | 128 ++++++++++- lib/librte_eal/linuxapp/eal/eal_vfio.c | 269 +++++++++++++++++++++-- lib/librte_eal/rte_eal_version.map | 6 + 4 files changed, 428 insertions(+), 19 deletions(-) diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 10d8dc03f0..a63f11f313 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -828,3 +828,47 @@ rte_vfio_get_group_fd(__rte_unused int iommu_group_num) { return -1; } + +int __rte_experimental +rte_vfio_container_create(void) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_destroy(__rte_unused int container_fd) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_group_bind(__rte_unused int container_fd, + __rte_unused int iommu_group_num) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_group_unbind(__rte_unused int container_fd, + __rte_unused int iommu_group_num) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_dma_map(__rte_unused int container_fd, + __rte_unused uint64_t vaddr, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_dma_unmap(__rte_unused int container_fd, + __rte_unused uint64_t vaddr, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h index 8900064844..f90972faa3 100644 --- a/lib/librte_eal/common/include/rte_vfio.h +++ b/lib/librte_eal/common/include/rte_vfio.h @@ -161,7 +161,10 @@ rte_vfio_clear_group(int vfio_group_fd); /** * Map memory region for use with VFIO. * - * @note requires at least one device to be attached at the time of mapping. + * @note Require at least one device to be attached at the time of + * mapping. DMA maps done via this API will only apply to default + * container and will not apply to any of the containers created + * via rte_vfio_container_create(). * * @param vaddr * Starting virtual address of memory to be mapped. @@ -252,6 +255,129 @@ rte_vfio_get_container_fd(void); int __rte_experimental rte_vfio_get_group_fd(int iommu_group_num); +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Create a new container for device binding. + * + * @note Any newly allocated DPDK memory will not be mapped into these + * containers by default, user needs to manage DMA mappings for + * any container created by this API. + * + * @return + * the container fd if successful + * <0 if failed + */ +int __rte_experimental +rte_vfio_container_create(void); + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Destroy the container, unbind all vfio groups within it. + * + * @param container_fd + * the container fd to destroy + * + * @return + * 0 if successful + * <0 if failed + */ +int __rte_experimental +rte_vfio_container_destroy(int container_fd); + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Bind a IOMMU group to a container. + * + * @param container_fd + * the container's fd + * + * @param iommu_group_num + * the iommu group number to bind to container + * + * @return + * group fd if successful + * <0 if failed + */ +int __rte_experimental +rte_vfio_container_group_bind(int container_fd, int iommu_group_num); + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Unbind a IOMMU group from a container. + * + * @param container_fd + * the container fd of container + * + * @param iommu_group_num + * the iommu group number to delete from container + * + * @return + * 0 if successful + * <0 if failed + */ +int __rte_experimental +rte_vfio_container_group_unbind(int container_fd, int iommu_group_num); + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Perform DMA mapping for devices in a container. + * + * @param container_fd + * the specified container fd + * + * @param vaddr + * Starting virtual address of memory to be mapped. + * + * @param iova + * Starting IOVA address of memory to be mapped. + * + * @param len + * Length of memory segment being mapped. + * + * @return + * 0 if successful + * <0 if failed + */ +int __rte_experimental +rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, + uint64_t iova, uint64_t len); + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Perform DMA unmapping for devices in a container. + * + * @param container_fd + * the specified container fd + * + * @param vaddr + * Starting virtual address of memory to be unmapped. + * + * @param iova + * Starting IOVA address of memory to be unmapped. + * + * @param len + * Length of memory segment being unmapped. + * + * @return + * 0 if successful + * <0 if failed + */ +int __rte_experimental +rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, + uint64_t iova, uint64_t len); + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 261b104449..7afa33d3a0 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -1509,19 +1509,15 @@ vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, len, do_map); } -int __rte_experimental -rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len) +static int +container_dma_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, + uint64_t len) { struct user_mem_map *new_map; struct user_mem_maps *user_mem_maps; int ret = 0; - if (len == 0) { - rte_errno = EINVAL; - return -1; - } - - user_mem_maps = &default_vfio_cfg->mem_maps; + user_mem_maps = &vfio_cfg->mem_maps; rte_spinlock_recursive_lock(&user_mem_maps->lock); if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) { RTE_LOG(ERR, EAL, "No more space for user mem maps\n"); @@ -1530,7 +1526,7 @@ rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len) goto out; } /* map the entry */ - if (vfio_dma_mem_map(default_vfio_cfg, vaddr, iova, len, 1)) { + if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 1)) { /* technically, this will fail if there are currently no devices * plugged in, even if a device were added later, this mapping * might have succeeded. however, since we cannot verify if this @@ -1554,19 +1550,15 @@ out: return ret; } -int __rte_experimental -rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len) +static int +container_dma_unmap(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, + uint64_t len) { struct user_mem_map *map, *new_map = NULL; struct user_mem_maps *user_mem_maps; int ret = 0; - if (len == 0) { - rte_errno = EINVAL; - return -1; - } - - user_mem_maps = &default_vfio_cfg->mem_maps; + user_mem_maps = &vfio_cfg->mem_maps; rte_spinlock_recursive_lock(&user_mem_maps->lock); /* find our mapping */ @@ -1591,7 +1583,7 @@ rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len) } /* unmap the entry */ - if (vfio_dma_mem_map(default_vfio_cfg, vaddr, iova, len, 0)) { + if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 0)) { /* there may not be any devices plugged in, so unmapping will * fail with ENODEV/ENOTSUP rte_errno values, but that doesn't * stop us from removing the mapping, as the assumption is we @@ -1630,6 +1622,28 @@ out: return ret; } +int __rte_experimental +rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len) +{ + if (len == 0) { + rte_errno = EINVAL; + return -1; + } + + return container_dma_map(default_vfio_cfg, vaddr, iova, len); +} + +int __rte_experimental +rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len) +{ + if (len == 0) { + rte_errno = EINVAL; + return -1; + } + + return container_dma_unmap(default_vfio_cfg, vaddr, iova, len); +} + int rte_vfio_noiommu_is_enabled(void) { @@ -1662,6 +1676,181 @@ rte_vfio_noiommu_is_enabled(void) return c == 'Y'; } +int __rte_experimental +rte_vfio_container_create(void) +{ + int i; + + /* Find an empty slot to store new vfio config */ + for (i = 1; i < VFIO_MAX_CONTAINERS; i++) { + if (vfio_cfgs[i].vfio_container_fd == -1) + break; + } + + if (i == VFIO_MAX_CONTAINERS) { + RTE_LOG(ERR, EAL, "exceed max vfio container limit\n"); + return -1; + } + + vfio_cfgs[i].vfio_container_fd = rte_vfio_get_container_fd(); + if (vfio_cfgs[i].vfio_container_fd < 0) { + RTE_LOG(NOTICE, EAL, "fail to create a new container\n"); + return -1; + } + + return vfio_cfgs[i].vfio_container_fd; +} + +int __rte_experimental +rte_vfio_container_destroy(int container_fd) +{ + struct vfio_config *vfio_cfg; + int i; + + vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, "Invalid container fd\n"); + return -1; + } + + for (i = 0; i < VFIO_MAX_GROUPS; i++) + if (vfio_cfg->vfio_groups[i].group_num != -1) + rte_vfio_container_group_unbind(container_fd, + vfio_cfg->vfio_groups[i].group_num); + + close(container_fd); + vfio_cfg->vfio_container_fd = -1; + vfio_cfg->vfio_active_groups = 0; + vfio_cfg->vfio_iommu_type = NULL; + + return 0; +} + +int __rte_experimental +rte_vfio_container_group_bind(int container_fd, int iommu_group_num) +{ + struct vfio_config *vfio_cfg; + struct vfio_group *cur_grp; + int vfio_group_fd; + int i; + + vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, "Invalid container fd\n"); + return -1; + } + + /* Check room for new group */ + if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); + return -1; + } + + /* Get an index for the new group */ + for (i = 0; i < VFIO_MAX_GROUPS; i++) + if (vfio_cfg->vfio_groups[i].group_num == -1) { + cur_grp = &vfio_cfg->vfio_groups[i]; + break; + } + + /* This should not happen */ + if (i == VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, "No VFIO group free slot found\n"); + return -1; + } + + vfio_group_fd = vfio_open_group_fd(iommu_group_num); + if (vfio_group_fd < 0) { + RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num); + return -1; + } + cur_grp->group_num = iommu_group_num; + cur_grp->fd = vfio_group_fd; + cur_grp->devices = 0; + vfio_cfg->vfio_active_groups++; + + return vfio_group_fd; +} + +int __rte_experimental +rte_vfio_container_group_unbind(int container_fd, int iommu_group_num) +{ + struct vfio_config *vfio_cfg; + struct vfio_group *cur_grp; + int i; + + vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, "Invalid container fd\n"); + return -1; + } + + for (i = 0; i < VFIO_MAX_GROUPS; i++) { + if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num) { + cur_grp = &vfio_cfg->vfio_groups[i]; + break; + } + } + + /* This should not happen */ + if (i == VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, "Specified group number not found\n"); + return -1; + } + + if (cur_grp->fd >= 0 && close(cur_grp->fd) < 0) { + RTE_LOG(ERR, EAL, "Error when closing vfio_group_fd for" + " iommu_group_num %d\n", iommu_group_num); + return -1; + } + cur_grp->group_num = -1; + cur_grp->fd = -1; + cur_grp->devices = 0; + vfio_cfg->vfio_active_groups--; + + return 0; +} + +int __rte_experimental +rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, uint64_t iova, + uint64_t len) +{ + struct vfio_config *vfio_cfg; + + if (len == 0) { + rte_errno = EINVAL; + return -1; + } + + vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, "Invalid container fd\n"); + return -1; + } + + return container_dma_map(vfio_cfg, vaddr, iova, len); +} + +int __rte_experimental +rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, uint64_t iova, + uint64_t len) +{ + struct vfio_config *vfio_cfg; + + if (len == 0) { + rte_errno = EINVAL; + return -1; + } + + vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); + if (vfio_cfg == NULL) { + RTE_LOG(ERR, EAL, "Invalid container fd\n"); + return -1; + } + + return container_dma_unmap(vfio_cfg, vaddr, iova, len); +} + #else int __rte_experimental @@ -1738,4 +1927,48 @@ rte_vfio_get_group_fd(__rte_unused int iommu_group_num) return -1; } +int __rte_experimental +rte_vfio_container_create(void) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_destroy(__rte_unused int container_fd) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_group_bind(__rte_unused int container_fd, + __rte_unused int iommu_group_num) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_group_unbind(__rte_unused int container_fd, + __rte_unused int iommu_group_num) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_dma_map(__rte_unused int container_fd, + __rte_unused uint64_t vaddr, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} + +int __rte_experimental +rte_vfio_container_dma_unmap(__rte_unused int container_fd, + __rte_unused uint64_t vaddr, + __rte_unused uint64_t iova, + __rte_unused uint64_t len) +{ + return -1; +} + #endif /* VFIO_PRESENT */ diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index b329d34c0f..8597239743 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -304,5 +304,11 @@ EXPERIMENTAL { rte_vfio_get_container_fd; rte_vfio_get_group_fd; rte_vfio_get_group_num; + rte_vfio_container_create; + rte_vfio_container_destroy; + rte_vfio_container_dma_map; + rte_vfio_container_dma_unmap; + rte_vfio_container_group_bind; + rte_vfio_container_group_unbind; } DPDK_18.02; -- 2.20.1