+
+int
+rte_vfio_get_container_fd(void)
+{
+ int ret, vfio_container_fd;
+ struct rte_mp_msg mp_req, *mp_rep;
+ struct rte_mp_reply mp_reply;
+ struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+ struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
+
+
+ /* if we're in a primary process, try to open the container */
+ if (internal_config.process_type == RTE_PROC_PRIMARY) {
+ vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR);
+ if (vfio_container_fd < 0) {
+ RTE_LOG(ERR, EAL, " cannot open VFIO container, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* check VFIO API version */
+ ret = ioctl(vfio_container_fd, VFIO_GET_API_VERSION);
+ if (ret != VFIO_API_VERSION) {
+ if (ret < 0)
+ RTE_LOG(ERR, EAL, " could not get VFIO API version, "
+ "error %i (%s)\n", errno, strerror(errno));
+ else
+ RTE_LOG(ERR, EAL, " unsupported VFIO API version!\n");
+ close(vfio_container_fd);
+ return -1;
+ }
+
+ ret = vfio_has_supported_extensions(vfio_container_fd);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " no supported IOMMU "
+ "extensions found!\n");
+ return -1;
+ }
+
+ return vfio_container_fd;
+ }
+ /*
+ * if we're in a secondary process, request container fd from the
+ * primary process via mp channel
+ */
+ p->req = SOCKET_REQ_CONTAINER;
+ strcpy(mp_req.name, EAL_VFIO_MP);
+ mp_req.len_param = sizeof(*p);
+ mp_req.num_fds = 0;
+
+ vfio_container_fd = -1;
+ if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+ mp_reply.nb_received == 1) {
+ mp_rep = &mp_reply.msgs[0];
+ p = (struct vfio_mp_param *)mp_rep->param;
+ if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+ free(mp_reply.msgs);
+ return mp_rep->fds[0];
+ }
+ free(mp_reply.msgs);
+ }
+
+ RTE_LOG(ERR, EAL, " cannot request container fd\n");
+ return -1;
+}
+
+int
+rte_vfio_get_group_num(const char *sysfs_base,
+ const char *dev_addr, int *iommu_group_num)
+{
+ char linkname[PATH_MAX];
+ char filename[PATH_MAX];
+ char *tok[16], *group_tok, *end;
+ int ret;
+
+ memset(linkname, 0, sizeof(linkname));
+ memset(filename, 0, sizeof(filename));
+
+ /* try to find out IOMMU group for this device */
+ snprintf(linkname, sizeof(linkname),
+ "%s/%s/iommu_group", sysfs_base, dev_addr);
+
+ ret = readlink(linkname, filename, sizeof(filename));
+
+ /* if the link doesn't exist, no VFIO for us */
+ if (ret < 0)
+ return 0;
+
+ ret = rte_strsplit(filename, sizeof(filename),
+ tok, RTE_DIM(tok), '/');
+
+ if (ret <= 0) {
+ RTE_LOG(ERR, EAL, " %s cannot get IOMMU group\n", dev_addr);
+ return -1;
+ }
+
+ /* IOMMU group is always the last token */
+ errno = 0;
+ group_tok = tok[ret - 1];
+ end = group_tok;
+ *iommu_group_num = strtol(group_tok, &end, 10);
+ if ((end != group_tok && *end != '\0') || errno != 0) {
+ RTE_LOG(ERR, EAL, " %s error parsing IOMMU number!\n", dev_addr);
+ return -1;
+ }
+
+ return 1;
+}
+
+static int
+type1_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ void *arg)
+{
+ int *vfio_container_fd = arg;
+
+ if (msl->external)
+ return 0;
+
+ return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
+ ms->len, 1);
+}
+
+static int
+vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map)
+{
+ struct vfio_iommu_type1_dma_map dma_map;
+ struct vfio_iommu_type1_dma_unmap dma_unmap;
+ int ret;
+
+ if (do_map != 0) {
+ memset(&dma_map, 0, sizeof(dma_map));
+ dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ dma_map.vaddr = vaddr;
+ dma_map.size = len;
+ dma_map.iova = iova;
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
+ VFIO_DMA_MAP_FLAG_WRITE;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot set up DMA remapping, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+ } else {
+ memset(&dma_unmap, 0, sizeof(dma_unmap));
+ dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+ dma_unmap.size = len;
+ dma_unmap.iova = iova;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA,
+ &dma_unmap);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+vfio_type1_dma_map(int vfio_container_fd)
+{
+ return rte_memseg_walk(type1_map, &vfio_container_fd);
+}
+
+static int
+vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map)
+{
+ struct vfio_iommu_type1_dma_map dma_map;
+ struct vfio_iommu_type1_dma_unmap dma_unmap;
+ int ret;
+ struct vfio_iommu_spapr_register_memory reg = {
+ .argsz = sizeof(reg),
+ .flags = 0
+ };
+ reg.vaddr = (uintptr_t) vaddr;
+ reg.size = len;
+
+ if (do_map != 0) {
+ ret = ioctl(vfio_container_fd,
+ VFIO_IOMMU_SPAPR_REGISTER_MEMORY, ®);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot register vaddr for IOMMU, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ memset(&dma_map, 0, sizeof(dma_map));
+ dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ dma_map.vaddr = vaddr;
+ dma_map.size = len;
+ dma_map.iova = iova;
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
+ VFIO_DMA_MAP_FLAG_WRITE;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot set up DMA remapping, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ } else {
+ ret = ioctl(vfio_container_fd,
+ VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, ®);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot unregister vaddr for IOMMU, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ memset(&dma_unmap, 0, sizeof(dma_unmap));
+ dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+ dma_unmap.size = len;
+ dma_unmap.iova = iova;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA,
+ &dma_unmap);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+vfio_spapr_map_walk(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, void *arg)
+{
+ int *vfio_container_fd = arg;
+
+ if (msl->external)
+ return 0;
+
+ return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
+ ms->len, 1);
+}
+
+struct spapr_walk_param {
+ uint64_t window_size;
+ uint64_t hugepage_sz;
+};
+static int
+vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, void *arg)
+{
+ struct spapr_walk_param *param = arg;
+ uint64_t max = ms->iova + ms->len;
+
+ if (msl->external)
+ return 0;
+
+ if (max > param->window_size) {
+ param->hugepage_sz = ms->hugepage_sz;
+ param->window_size = max;
+ }
+
+ return 0;
+}
+
+static int
+vfio_spapr_create_new_dma_window(int vfio_container_fd,
+ struct vfio_iommu_spapr_tce_create *create) {
+ struct vfio_iommu_spapr_tce_remove remove = {
+ .argsz = sizeof(remove),
+ };
+ struct vfio_iommu_spapr_tce_info info = {
+ .argsz = sizeof(info),
+ };
+ int ret;
+
+ /* query spapr iommu info */
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot get iommu info, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* remove default DMA of 32 bit window */
+ remove.start_addr = info.dma32_window_start;
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot remove default DMA window, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* create new DMA window */
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, create);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot create new DMA window, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ if (create->start_addr != 0) {
+ RTE_LOG(ERR, EAL, " DMA window start address != 0\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map)
+{
+ struct spapr_walk_param param;
+ struct vfio_iommu_spapr_tce_create create = {
+ .argsz = sizeof(create),
+ };
+ struct vfio_config *vfio_cfg;
+ struct user_mem_maps *user_mem_maps;
+ int i, ret = 0;
+
+ vfio_cfg = get_vfio_cfg_by_container_fd(vfio_container_fd);
+ if (vfio_cfg == NULL) {
+ RTE_LOG(ERR, EAL, " invalid container fd!\n");
+ return -1;
+ }
+
+ user_mem_maps = &vfio_cfg->mem_maps;
+ rte_spinlock_recursive_lock(&user_mem_maps->lock);
+
+ /* check if window size needs to be adjusted */
+ memset(¶m, 0, sizeof(param));
+
+ /* we're inside a callback so use thread-unsafe version */
+ if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
+ ¶m) < 0) {
+ RTE_LOG(ERR, EAL, "Could not get window size\n");
+ ret = -1;
+ goto out;
+ }
+
+ /* also check user maps */
+ for (i = 0; i < user_mem_maps->n_maps; i++) {
+ uint64_t max = user_mem_maps->maps[i].iova +
+ user_mem_maps->maps[i].len;
+ create.window_size = RTE_MAX(create.window_size, max);
+ }
+
+ /* sPAPR requires window size to be a power of 2 */
+ create.window_size = rte_align64pow2(param.window_size);
+ create.page_shift = __builtin_ctzll(param.hugepage_sz);
+ create.levels = 1;
+
+ if (do_map) {
+ void *addr;
+ /* re-create window and remap the entire memory */
+ if (iova > create.window_size) {
+ if (vfio_spapr_create_new_dma_window(vfio_container_fd,
+ &create) < 0) {
+ RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
+ ret = -1;
+ goto out;
+ }
+ /* we're inside a callback, so use thread-unsafe version
+ */
+ if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
+ &vfio_container_fd) < 0) {
+ RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
+ ret = -1;
+ goto out;
+ }
+ /* remap all user maps */
+ for (i = 0; i < user_mem_maps->n_maps; i++) {
+ struct user_mem_map *map =
+ &user_mem_maps->maps[i];
+ if (vfio_spapr_dma_do_map(vfio_container_fd,
+ map->addr, map->iova, map->len,
+ 1)) {
+ RTE_LOG(ERR, EAL, "Could not recreate user DMA maps\n");
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ /* now that we've remapped all of the memory that was present
+ * before, map the segment that we were requested to map.
+ *
+ * however, if we were called by the callback, the memory we
+ * were called with was already in the memseg list, so previous
+ * mapping should've mapped that segment already.
+ *
+ * virt2memseg_list is a relatively cheap check, so use that. if
+ * memory is within any memseg list, it's a memseg, so it's
+ * already mapped.
+ */
+ addr = (void *)(uintptr_t)vaddr;
+ if (rte_mem_virt2memseg_list(addr) == NULL &&
+ vfio_spapr_dma_do_map(vfio_container_fd,
+ vaddr, iova, len, 1) < 0) {
+ RTE_LOG(ERR, EAL, "Could not map segment\n");
+ ret = -1;
+ goto out;
+ }
+ } else {
+ /* for unmap, check if iova within DMA window */
+ if (iova > create.window_size) {
+ RTE_LOG(ERR, EAL, "iova beyond DMA window for unmap");
+ ret = -1;
+ goto out;
+ }
+
+ vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 0);
+ }
+out:
+ rte_spinlock_recursive_unlock(&user_mem_maps->lock);
+ return ret;
+}
+
+static int
+vfio_spapr_dma_map(int vfio_container_fd)
+{
+ struct vfio_iommu_spapr_tce_create create = {
+ .argsz = sizeof(create),
+ };
+ struct spapr_walk_param param;
+
+ memset(¶m, 0, sizeof(param));
+
+ /* create DMA window from 0 to max(phys_addr + len) */
+ rte_memseg_walk(vfio_spapr_window_size_walk, ¶m);
+
+ /* sPAPR requires window size to be a power of 2 */
+ create.window_size = rte_align64pow2(param.window_size);
+ create.page_shift = __builtin_ctzll(param.hugepage_sz);
+ create.levels = 1;
+
+ if (vfio_spapr_create_new_dma_window(vfio_container_fd, &create) < 0) {
+ RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
+ return -1;
+ }
+
+ /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
+ if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int
+vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
+{
+ /* No-IOMMU mode does not need DMA mapping */
+ return 0;
+}
+
+static int
+vfio_noiommu_dma_mem_map(int __rte_unused vfio_container_fd,
+ uint64_t __rte_unused vaddr,
+ uint64_t __rte_unused iova, uint64_t __rte_unused len,
+ int __rte_unused do_map)
+{
+ /* No-IOMMU mode does not need DMA mapping */
+ return 0;
+}
+
+static int
+vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map)
+{
+ const struct vfio_iommu_type *t = vfio_cfg->vfio_iommu_type;
+
+ if (!t) {
+ RTE_LOG(ERR, EAL, " VFIO support not initialized\n");
+ rte_errno = ENODEV;
+ return -1;
+ }
+
+ if (!t->dma_user_map_func) {
+ RTE_LOG(ERR, EAL,
+ " VFIO custom DMA region maping not supported by IOMMU %s\n",
+ t->name);
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ return t->dma_user_map_func(vfio_cfg->vfio_container_fd, vaddr, iova,
+ len, do_map);
+}
+
+static int
+container_dma_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova,
+ uint64_t len)
+{
+ struct user_mem_map *new_map;
+ struct user_mem_maps *user_mem_maps;
+ int ret = 0;
+
+ user_mem_maps = &vfio_cfg->mem_maps;
+ rte_spinlock_recursive_lock(&user_mem_maps->lock);
+ if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) {
+ RTE_LOG(ERR, EAL, "No more space for user mem maps\n");
+ rte_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ /* map the entry */
+ if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 1)) {
+ /* technically, this will fail if there are currently no devices
+ * plugged in, even if a device were added later, this mapping
+ * might have succeeded. however, since we cannot verify if this
+ * is a valid mapping without having a device attached, consider
+ * this to be unsupported, because we can't just store any old
+ * mapping and pollute list of active mappings willy-nilly.
+ */
+ RTE_LOG(ERR, EAL, "Couldn't map new region for DMA\n");
+ ret = -1;
+ goto out;
+ }
+ /* create new user mem map entry */
+ new_map = &user_mem_maps->maps[user_mem_maps->n_maps++];
+ new_map->addr = vaddr;
+ new_map->iova = iova;
+ new_map->len = len;
+
+ compact_user_maps(user_mem_maps);
+out:
+ rte_spinlock_recursive_unlock(&user_mem_maps->lock);
+ return ret;
+}
+
+static int
+container_dma_unmap(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova,
+ uint64_t len)
+{
+ struct user_mem_map *map, *new_map = NULL;
+ struct user_mem_maps *user_mem_maps;
+ int ret = 0;
+
+ user_mem_maps = &vfio_cfg->mem_maps;
+ rte_spinlock_recursive_lock(&user_mem_maps->lock);
+
+ /* find our mapping */
+ map = find_user_mem_map(user_mem_maps, vaddr, iova, len);
+ if (!map) {
+ RTE_LOG(ERR, EAL, "Couldn't find previously mapped region\n");
+ rte_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+ if (map->addr != vaddr || map->iova != iova || map->len != len) {
+ /* we're partially unmapping a previously mapped region, so we
+ * need to split entry into two.
+ */
+ if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) {
+ RTE_LOG(ERR, EAL, "Not enough space to store partial mapping\n");
+ rte_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ new_map = &user_mem_maps->maps[user_mem_maps->n_maps++];
+ }
+
+ /* unmap the entry */
+ if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 0)) {
+ /* there may not be any devices plugged in, so unmapping will
+ * fail with ENODEV/ENOTSUP rte_errno values, but that doesn't
+ * stop us from removing the mapping, as the assumption is we
+ * won't be needing this memory any more and thus will want to
+ * prevent it from being remapped again on hotplug. so, only
+ * fail if we indeed failed to unmap (e.g. if the mapping was
+ * within our mapped range but had invalid alignment).
+ */
+ if (rte_errno != ENODEV && rte_errno != ENOTSUP) {
+ RTE_LOG(ERR, EAL, "Couldn't unmap region for DMA\n");
+ ret = -1;
+ goto out;
+ } else {
+ RTE_LOG(DEBUG, EAL, "DMA unmapping failed, but removing mappings anyway\n");
+ }
+ }
+ /* remove map from the list of active mappings */
+ if (new_map != NULL) {
+ adjust_map(map, new_map, vaddr, len);
+
+ /* if we've created a new map by splitting, sort everything */
+ if (!is_null_map(new_map)) {
+ compact_user_maps(user_mem_maps);
+ } else {
+ /* we've created a new mapping, but it was unused */
+ user_mem_maps->n_maps--;
+ }
+ } else {
+ memset(map, 0, sizeof(*map));
+ compact_user_maps(user_mem_maps);
+ user_mem_maps->n_maps--;
+ }
+
+out:
+ rte_spinlock_recursive_unlock(&user_mem_maps->lock);
+ return ret;
+}
+
+int
+rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ return container_dma_map(default_vfio_cfg, vaddr, iova, len);
+}
+
+int
+rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ return container_dma_unmap(default_vfio_cfg, vaddr, iova, len);
+}
+
+int
+rte_vfio_noiommu_is_enabled(void)
+{
+ int fd;
+ ssize_t cnt;
+ char c;
+
+ fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
+ if (fd < 0) {
+ if (errno != ENOENT) {
+ RTE_LOG(ERR, EAL, " cannot open vfio noiommu file %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+ /*
+ * else the file does not exists
+ * i.e. noiommu is not enabled
+ */
+ return 0;
+ }
+
+ cnt = read(fd, &c, 1);
+ close(fd);
+ if (cnt != 1) {
+ RTE_LOG(ERR, EAL, " unable to read from vfio noiommu "
+ "file %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ return c == 'Y';
+}
+
+int
+rte_vfio_container_create(void)
+{
+ int i;
+
+ /* Find an empty slot to store new vfio config */
+ for (i = 1; i < VFIO_MAX_CONTAINERS; i++) {
+ if (vfio_cfgs[i].vfio_container_fd == -1)
+ break;
+ }
+
+ if (i == VFIO_MAX_CONTAINERS) {
+ RTE_LOG(ERR, EAL, "exceed max vfio container limit\n");
+ return -1;
+ }
+
+ vfio_cfgs[i].vfio_container_fd = rte_vfio_get_container_fd();
+ if (vfio_cfgs[i].vfio_container_fd < 0) {
+ RTE_LOG(NOTICE, EAL, "fail to create a new container\n");
+ return -1;
+ }
+
+ return vfio_cfgs[i].vfio_container_fd;
+}
+
+int __rte_experimental
+rte_vfio_container_destroy(int container_fd)
+{
+ struct vfio_config *vfio_cfg;
+ int i;
+
+ vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+ if (vfio_cfg == NULL) {
+ RTE_LOG(ERR, EAL, "Invalid container fd\n");
+ return -1;
+ }
+
+ for (i = 0; i < VFIO_MAX_GROUPS; i++)
+ if (vfio_cfg->vfio_groups[i].group_num != -1)
+ rte_vfio_container_group_unbind(container_fd,
+ vfio_cfg->vfio_groups[i].group_num);
+
+ close(container_fd);
+ vfio_cfg->vfio_container_fd = -1;
+ vfio_cfg->vfio_active_groups = 0;
+ vfio_cfg->vfio_iommu_type = NULL;
+
+ return 0;
+}
+
+int
+rte_vfio_container_group_bind(int container_fd, int iommu_group_num)
+{
+ struct vfio_config *vfio_cfg;
+ struct vfio_group *cur_grp;
+ int vfio_group_fd;
+ int i;
+
+ vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+ if (vfio_cfg == NULL) {
+ RTE_LOG(ERR, EAL, "Invalid container fd\n");
+ return -1;
+ }
+
+ /* Check room for new group */
+ if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) {
+ RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
+ return -1;
+ }
+
+ /* Get an index for the new group */
+ for (i = 0; i < VFIO_MAX_GROUPS; i++)
+ if (vfio_cfg->vfio_groups[i].group_num == -1) {
+ cur_grp = &vfio_cfg->vfio_groups[i];
+ break;
+ }
+
+ /* This should not happen */
+ if (i == VFIO_MAX_GROUPS) {
+ RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
+ return -1;
+ }
+
+ vfio_group_fd = vfio_open_group_fd(iommu_group_num);
+ if (vfio_group_fd < 0) {
+ RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num);
+ return -1;
+ }
+ cur_grp->group_num = iommu_group_num;
+ cur_grp->fd = vfio_group_fd;
+ cur_grp->devices = 0;
+ vfio_cfg->vfio_active_groups++;
+
+ return vfio_group_fd;
+}
+
+int
+rte_vfio_container_group_unbind(int container_fd, int iommu_group_num)
+{
+ struct vfio_config *vfio_cfg;
+ struct vfio_group *cur_grp = NULL;
+ int i;
+
+ vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+ if (vfio_cfg == NULL) {
+ RTE_LOG(ERR, EAL, "Invalid container fd\n");
+ return -1;
+ }
+
+ for (i = 0; i < VFIO_MAX_GROUPS; i++) {
+ if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num) {
+ cur_grp = &vfio_cfg->vfio_groups[i];
+ break;
+ }
+ }
+
+ /* This should not happen */
+ if (i == VFIO_MAX_GROUPS || cur_grp == NULL) {
+ RTE_LOG(ERR, EAL, "Specified group number not found\n");
+ return -1;
+ }
+
+ if (cur_grp->fd >= 0 && close(cur_grp->fd) < 0) {
+ RTE_LOG(ERR, EAL, "Error when closing vfio_group_fd for"
+ " iommu_group_num %d\n", iommu_group_num);
+ return -1;
+ }
+ cur_grp->group_num = -1;
+ cur_grp->fd = -1;
+ cur_grp->devices = 0;
+ vfio_cfg->vfio_active_groups--;
+
+ return 0;
+}
+
+int
+rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len)
+{
+ struct vfio_config *vfio_cfg;
+
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+ if (vfio_cfg == NULL) {
+ RTE_LOG(ERR, EAL, "Invalid container fd\n");
+ return -1;
+ }
+
+ return container_dma_map(vfio_cfg, vaddr, iova, len);
+}
+
+int
+rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len)
+{
+ struct vfio_config *vfio_cfg;
+
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+ if (vfio_cfg == NULL) {
+ RTE_LOG(ERR, EAL, "Invalid container fd\n");
+ return -1;
+ }
+
+ return container_dma_unmap(vfio_cfg, vaddr, iova, len);
+}
+
+#else
+
+int
+rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
+
+int
+rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
+
+int
+rte_vfio_setup_device(__rte_unused const char *sysfs_base,
+ __rte_unused const char *dev_addr,
+ __rte_unused int *vfio_dev_fd,
+ __rte_unused struct vfio_device_info *device_info)
+{
+ return -1;
+}
+
+int
+rte_vfio_release_device(__rte_unused const char *sysfs_base,
+ __rte_unused const char *dev_addr, __rte_unused int fd)
+{
+ return -1;
+}
+
+int
+rte_vfio_enable(__rte_unused const char *modname)
+{
+ return -1;
+}
+
+int
+rte_vfio_is_enabled(__rte_unused const char *modname)
+{
+ return -1;
+}
+
+int
+rte_vfio_noiommu_is_enabled(void)
+{
+ return -1;
+}
+
+int
+rte_vfio_clear_group(__rte_unused int vfio_group_fd)
+{
+ return -1;
+}
+
+int
+rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
+ __rte_unused const char *dev_addr,
+ __rte_unused int *iommu_group_num)
+{
+ return -1;
+}
+
+int
+rte_vfio_get_container_fd(void)
+{
+ return -1;
+}
+
+int
+rte_vfio_get_group_fd(__rte_unused int iommu_group_num)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_create(void)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_destroy(__rte_unused int container_fd)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_group_bind(__rte_unused int container_fd,
+ __rte_unused int iommu_group_num)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_group_unbind(__rte_unused int container_fd,
+ __rte_unused int iommu_group_num)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_dma_map(__rte_unused int container_fd,
+ __rte_unused uint64_t vaddr,
+ __rte_unused uint64_t iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_dma_unmap(__rte_unused int container_fd,
+ __rte_unused uint64_t vaddr,
+ __rte_unused uint64_t iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
+
+#endif /* VFIO_PRESENT */