X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_eal%2Flinuxapp%2Feal%2Feal_vfio.c;h=95cd343b7f35a40b89896e2b6a5556650d2a64bc;hb=b3a022b17c94196149ec1665584fe81341020a18;hp=c1f0f87494e309929bcf3490a24e17d35c74b583;hpb=73a6390859385255510ca316ca5894a43a7d0c79;p=dpdk.git diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index c1f0f87494..95cd343b7f 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2014 Intel Corporation + * Copyright(c) 2010-2018 Intel Corporation */ #include @@ -20,6 +20,8 @@ #ifdef VFIO_PRESENT +#define VFIO_MEM_EVENT_CLB_NAME "vfio_mem_event_clb" + /* per-process VFIO config */ static struct vfio_config vfio_cfg; @@ -69,13 +71,49 @@ struct user_mem_map { uint64_t len; }; static struct { - rte_spinlock_t lock; + rte_spinlock_recursive_t lock; int n_maps; struct user_mem_map maps[VFIO_MAX_USER_MEM_MAPS]; } user_mem_maps = { - .lock = RTE_SPINLOCK_INITIALIZER + .lock = RTE_SPINLOCK_RECURSIVE_INITIALIZER }; +/* for sPAPR IOMMU, we will need to walk memseg list, but we cannot use + * rte_memseg_walk() because by the time we enter callback we will be holding a + * write lock, so regular rte-memseg_walk will deadlock. copying the same + * iteration code everywhere is not ideal as well. so, use a lockless copy of + * memseg walk here. + */ +static int +memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int i, ms_idx, ret = 0; + + for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { + struct rte_memseg_list *msl = &mcfg->memsegs[i]; + const struct rte_memseg *ms; + struct rte_fbarray *arr; + + if (msl->memseg_arr.count == 0) + continue; + + arr = &msl->memseg_arr; + + ms_idx = rte_fbarray_find_next_used(arr, 0); + while (ms_idx >= 0) { + ms = rte_fbarray_get(arr, ms_idx); + ret = func(msl, ms, arg); + if (ret < 0) + return -1; + if (ret > 0) + return 1; + ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1); + } + } + return 0; +} + static int is_null_map(const struct user_mem_map *map) { @@ -182,15 +220,15 @@ find_user_mem_map(uint64_t addr, uint64_t iova, uint64_t len) /* check start VA */ if (addr < map->addr || addr >= map_va_end) continue; - /* check if IOVA end is within boundaries */ - if (va_end <= map->addr || va_end >= map_va_end) + /* check if VA end is within boundaries */ + if (va_end <= map->addr || va_end > map_va_end) continue; - /* check start PA */ + /* check start IOVA */ if (iova < map->iova || iova >= map_iova_end) continue; /* check if IOVA end is within boundaries */ - if (iova_end <= map->iova || iova_end >= map_iova_end) + if (iova_end <= map->iova || iova_end > map_iova_end) continue; /* we've found our map */ @@ -246,16 +284,20 @@ compact_user_maps(void) } int -vfio_get_group_fd(int iommu_group_no) +rte_vfio_get_group_fd(int iommu_group_num) { int i; int vfio_group_fd; char filename[PATH_MAX]; struct vfio_group *cur_grp; + struct rte_mp_msg mp_req, *mp_rep; + struct rte_mp_reply mp_reply; + struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; + struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; /* check if we already have the group descriptor open */ for (i = 0; i < VFIO_MAX_GROUPS; i++) - if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no) + if (vfio_cfg.vfio_groups[i].group_num == iommu_group_num) return vfio_cfg.vfio_groups[i].fd; /* Lets see first if there is room for a new group */ @@ -266,7 +308,7 @@ vfio_get_group_fd(int iommu_group_no) /* Now lets get an index for the new group */ for (i = 0; i < VFIO_MAX_GROUPS; i++) - if (vfio_cfg.vfio_groups[i].group_no == -1) { + if (vfio_cfg.vfio_groups[i].group_num == -1) { cur_grp = &vfio_cfg.vfio_groups[i]; break; } @@ -280,7 +322,7 @@ vfio_get_group_fd(int iommu_group_no) if (internal_config.process_type == RTE_PROC_PRIMARY) { /* try regular group format */ snprintf(filename, sizeof(filename), - VFIO_GROUP_FMT, iommu_group_no); + VFIO_GROUP_FMT, iommu_group_num); vfio_group_fd = open(filename, O_RDWR); if (vfio_group_fd < 0) { /* if file not found, it's not an error */ @@ -292,7 +334,8 @@ vfio_get_group_fd(int iommu_group_no) /* special case: try no-IOMMU path as well */ snprintf(filename, sizeof(filename), - VFIO_NOIOMMU_GROUP_FMT, iommu_group_no); + VFIO_NOIOMMU_GROUP_FMT, + iommu_group_num); vfio_group_fd = open(filename, O_RDWR); if (vfio_group_fd < 0) { if (errno != ENOENT) { @@ -305,56 +348,40 @@ vfio_get_group_fd(int iommu_group_no) /* noiommu group found */ } - cur_grp->group_no = iommu_group_no; + cur_grp->group_num = iommu_group_num; cur_grp->fd = vfio_group_fd; vfio_cfg.vfio_active_groups++; return vfio_group_fd; } /* if we're in a secondary process, request group fd from the primary - * process via our socket + * process via mp channel. */ - else { - int socket_fd, ret; - - socket_fd = vfio_mp_sync_connect_to_primary(); - - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); - return -1; - } - if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) { - RTE_LOG(ERR, EAL, " cannot request container fd!\n"); - close(socket_fd); - return -1; - } - if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) { - RTE_LOG(ERR, EAL, " cannot send group number!\n"); - close(socket_fd); - return -1; - } - ret = vfio_mp_sync_receive_request(socket_fd); - switch (ret) { - case SOCKET_NO_FD: - close(socket_fd); - return 0; - case SOCKET_OK: - vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd); - /* if we got the fd, store it and return it */ - if (vfio_group_fd > 0) { - close(socket_fd); - cur_grp->group_no = iommu_group_no; - cur_grp->fd = vfio_group_fd; - vfio_cfg.vfio_active_groups++; - return vfio_group_fd; - } - /* fall-through on error */ - default: - RTE_LOG(ERR, EAL, " cannot get container fd!\n"); - close(socket_fd); - return -1; + p->req = SOCKET_REQ_GROUP; + p->group_num = iommu_group_num; + strcpy(mp_req.name, EAL_VFIO_MP); + mp_req.len_param = sizeof(*p); + mp_req.num_fds = 0; + + vfio_group_fd = -1; + if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && + mp_reply.nb_received == 1) { + mp_rep = &mp_reply.msgs[0]; + p = (struct vfio_mp_param *)mp_rep->param; + if (p->result == SOCKET_OK && mp_rep->num_fds == 1) { + cur_grp->group_num = iommu_group_num; + vfio_group_fd = mp_rep->fds[0]; + cur_grp->fd = vfio_group_fd; + vfio_cfg.vfio_active_groups++; + } else if (p->result == SOCKET_NO_FD) { + RTE_LOG(ERR, EAL, " bad VFIO group fd\n"); + vfio_group_fd = 0; } + free(mp_reply.msgs); } - return -1; + + if (vfio_group_fd < 0) + RTE_LOG(ERR, EAL, " cannot request group fd\n"); + return vfio_group_fd; } @@ -406,61 +433,80 @@ vfio_group_device_count(int vfio_group_fd) return vfio_cfg.vfio_groups[i].devices; } +static void +vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len) +{ + struct rte_memseg_list *msl; + struct rte_memseg *ms; + size_t cur_len = 0; + + msl = rte_mem_virt2memseg_list(addr); + + /* for IOVA as VA mode, no need to care for IOVA addresses */ + if (rte_eal_iova_mode() == RTE_IOVA_VA) { + uint64_t vfio_va = (uint64_t)(uintptr_t)addr; + if (type == RTE_MEM_EVENT_ALLOC) + vfio_dma_mem_map(vfio_va, vfio_va, len, 1); + else + vfio_dma_mem_map(vfio_va, vfio_va, len, 0); + return; + } + + /* memsegs are contiguous in memory */ + ms = rte_mem_virt2memseg(addr, msl); + while (cur_len < len) { + if (type == RTE_MEM_EVENT_ALLOC) + vfio_dma_mem_map(ms->addr_64, ms->iova, ms->len, 1); + else + vfio_dma_mem_map(ms->addr_64, ms->iova, ms->len, 0); + + cur_len += ms->len; + ++ms; + } +} + int rte_vfio_clear_group(int vfio_group_fd) { int i; - int socket_fd, ret; + struct rte_mp_msg mp_req, *mp_rep; + struct rte_mp_reply mp_reply; + struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; + struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; if (internal_config.process_type == RTE_PROC_PRIMARY) { i = get_vfio_group_idx(vfio_group_fd); if (i < 0) return -1; - vfio_cfg.vfio_groups[i].group_no = -1; + vfio_cfg.vfio_groups[i].group_num = -1; vfio_cfg.vfio_groups[i].fd = -1; vfio_cfg.vfio_groups[i].devices = 0; vfio_cfg.vfio_active_groups--; return 0; } - /* This is just for SECONDARY processes */ - socket_fd = vfio_mp_sync_connect_to_primary(); - - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); - return -1; - } - - if (vfio_mp_sync_send_request(socket_fd, SOCKET_CLR_GROUP) < 0) { - RTE_LOG(ERR, EAL, " cannot request container fd!\n"); - close(socket_fd); - return -1; - } + p->req = SOCKET_CLR_GROUP; + p->group_num = vfio_group_fd; + strcpy(mp_req.name, EAL_VFIO_MP); + mp_req.len_param = sizeof(*p); + mp_req.num_fds = 0; + + if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && + mp_reply.nb_received == 1) { + mp_rep = &mp_reply.msgs[0]; + p = (struct vfio_mp_param *)mp_rep->param; + if (p->result == SOCKET_OK) { + free(mp_reply.msgs); + return 0; + } else if (p->result == SOCKET_NO_FD) + RTE_LOG(ERR, EAL, " BAD VFIO group fd!\n"); + else + RTE_LOG(ERR, EAL, " no such VFIO group fd!\n"); - if (vfio_mp_sync_send_request(socket_fd, vfio_group_fd) < 0) { - RTE_LOG(ERR, EAL, " cannot send group fd!\n"); - close(socket_fd); - return -1; + free(mp_reply.msgs); } - ret = vfio_mp_sync_receive_request(socket_fd); - switch (ret) { - case SOCKET_NO_FD: - RTE_LOG(ERR, EAL, " BAD VFIO group fd!\n"); - close(socket_fd); - break; - case SOCKET_OK: - close(socket_fd); - return 0; - case SOCKET_ERR: - RTE_LOG(ERR, EAL, " Socket error\n"); - close(socket_fd); - break; - default: - RTE_LOG(ERR, EAL, " UNKNOWN reply, %d\n", ret); - close(socket_fd); - } return -1; } @@ -468,15 +514,17 @@ int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, int *vfio_dev_fd, struct vfio_device_info *device_info) { + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock; struct vfio_group_status group_status = { .argsz = sizeof(group_status) }; int vfio_group_fd; - int iommu_group_no; + int iommu_group_num; int i, ret; /* get group number */ - ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no); + ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num); if (ret == 0) { RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", dev_addr); @@ -488,7 +536,7 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, return -1; /* get the actual group fd */ - vfio_group_fd = vfio_get_group_fd(iommu_group_no); + vfio_group_fd = rte_vfio_get_group_fd(iommu_group_num); if (vfio_group_fd < 0) return -1; @@ -542,7 +590,8 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, * functionality. */ if (internal_config.process_type == RTE_PROC_PRIMARY && - vfio_cfg.vfio_active_groups == 1) { + vfio_cfg.vfio_active_groups == 1 && + vfio_group_device_count(vfio_group_fd) == 0) { const struct vfio_iommu_type *t; /* select an IOMMU type which we will be using */ @@ -555,6 +604,10 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, rte_vfio_clear_group(vfio_group_fd); return -1; } + /* lock memory hotplug before mapping and release it + * after registering callback, to prevent races + */ + rte_rwlock_read_lock(mem_lock); ret = t->dma_map_func(vfio_cfg.vfio_container_fd); if (ret) { RTE_LOG(ERR, EAL, @@ -562,13 +615,14 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, dev_addr, errno, strerror(errno)); close(vfio_group_fd); rte_vfio_clear_group(vfio_group_fd); + rte_rwlock_read_unlock(mem_lock); return -1; } vfio_cfg.vfio_iommu_type = t; /* re-map all user-mapped segments */ - rte_spinlock_lock(&user_mem_maps.lock); + rte_spinlock_recursive_lock(&user_mem_maps.lock); /* this IOMMU type may not support DMA mapping, but * if we have mappings in the list - that means we have @@ -590,12 +644,29 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, "len: 0x%" PRIu64 "\n", map->addr, map->iova, map->len); - rte_spinlock_unlock( + rte_spinlock_recursive_unlock( &user_mem_maps.lock); + rte_rwlock_read_unlock(mem_lock); return -1; } } - rte_spinlock_unlock(&user_mem_maps.lock); + rte_spinlock_recursive_unlock(&user_mem_maps.lock); + + /* register callback for mem events */ + ret = rte_mem_event_callback_register( + VFIO_MEM_EVENT_CLB_NAME, + vfio_mem_event_callback); + /* unlock memory hotplug */ + rte_rwlock_read_unlock(mem_lock); + + if (ret && rte_errno != ENOTSUP) { + RTE_LOG(ERR, EAL, "Could not install memory event callback for VFIO\n"); + return -1; + } + if (ret) + RTE_LOG(DEBUG, EAL, "Memory event callbacks not supported\n"); + else + RTE_LOG(DEBUG, EAL, "Installed memory event callback for VFIO\n"); } } @@ -633,28 +704,38 @@ int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int vfio_dev_fd) { + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock; struct vfio_group_status group_status = { .argsz = sizeof(group_status) }; int vfio_group_fd; - int iommu_group_no; + int iommu_group_num; int ret; + /* we don't want any DMA mapping messages to come while we're detaching + * VFIO device, because this might be the last device and we might need + * to unregister the callback. + */ + rte_rwlock_read_lock(mem_lock); + /* get group number */ - ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no); + ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num); if (ret <= 0) { RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver\n", dev_addr); /* This is an error at this point. */ - return -1; + ret = -1; + goto out; } /* get the actual group fd */ - vfio_group_fd = vfio_get_group_fd(iommu_group_no); + vfio_group_fd = rte_vfio_get_group_fd(iommu_group_num); if (vfio_group_fd <= 0) { - RTE_LOG(INFO, EAL, "vfio_get_group_fd failed for %s\n", + RTE_LOG(INFO, EAL, "rte_vfio_get_group_fd failed for %s\n", dev_addr); - return -1; + ret = -1; + goto out; } /* At this point we got an active group. Closing it will make the @@ -666,7 +747,8 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, if (close(vfio_dev_fd) < 0) { RTE_LOG(INFO, EAL, "Error when closing vfio_dev_fd for %s\n", dev_addr); - return -1; + ret = -1; + goto out; } /* An VFIO group can have several devices attached. Just when there is @@ -678,17 +760,30 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, if (close(vfio_group_fd) < 0) { RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n", dev_addr); - return -1; + ret = -1; + goto out; } if (rte_vfio_clear_group(vfio_group_fd) < 0) { RTE_LOG(INFO, EAL, "Error when clearing group for %s\n", dev_addr); - return -1; + ret = -1; + goto out; } } - return 0; + /* if there are no active device groups, unregister the callback to + * avoid spurious attempts to map/unmap memory from VFIO. + */ + if (vfio_cfg.vfio_active_groups == 0) + rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME); + + /* success */ + ret = 0; + +out: + rte_rwlock_read_unlock(mem_lock); + return ret; } int @@ -700,7 +795,7 @@ rte_vfio_enable(const char *modname) for (i = 0; i < VFIO_MAX_GROUPS; i++) { vfio_cfg.vfio_groups[i].fd = -1; - vfio_cfg.vfio_groups[i].group_no = -1; + vfio_cfg.vfio_groups[i].group_num = -1; vfio_cfg.vfio_groups[i].devices = 0; } @@ -723,7 +818,7 @@ rte_vfio_enable(const char *modname) return 0; } - vfio_cfg.vfio_container_fd = vfio_get_container_fd(); + vfio_cfg.vfio_container_fd = rte_vfio_get_container_fd(); /* check if we have VFIO driver enabled */ if (vfio_cfg.vfio_container_fd != -1) { @@ -801,9 +896,14 @@ vfio_has_supported_extensions(int vfio_container_fd) } int -vfio_get_container_fd(void) +rte_vfio_get_container_fd(void) { int ret, vfio_container_fd; + struct rte_mp_msg mp_req, *mp_rep; + struct rte_mp_reply mp_reply; + struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; + struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; + /* if we're in a primary process, try to open the container */ if (internal_config.process_type == RTE_PROC_PRIMARY) { @@ -834,39 +934,35 @@ vfio_get_container_fd(void) } return vfio_container_fd; - } else { - /* - * if we're in a secondary process, request container fd from the - * primary process via our socket - */ - int socket_fd; - - socket_fd = vfio_mp_sync_connect_to_primary(); - if (socket_fd < 0) { - RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); - return -1; - } - if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) { - RTE_LOG(ERR, EAL, " cannot request container fd!\n"); - close(socket_fd); - return -1; - } - vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd); - if (vfio_container_fd < 0) { - RTE_LOG(ERR, EAL, " cannot get container fd!\n"); - close(socket_fd); - return -1; + } + /* + * if we're in a secondary process, request container fd from the + * primary process via mp channel + */ + p->req = SOCKET_REQ_CONTAINER; + strcpy(mp_req.name, EAL_VFIO_MP); + mp_req.len_param = sizeof(*p); + mp_req.num_fds = 0; + + vfio_container_fd = -1; + if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && + mp_reply.nb_received == 1) { + mp_rep = &mp_reply.msgs[0]; + p = (struct vfio_mp_param *)mp_rep->param; + if (p->result == SOCKET_OK && mp_rep->num_fds == 1) { + free(mp_reply.msgs); + return mp_rep->fds[0]; } - close(socket_fd); - return vfio_container_fd; + free(mp_reply.msgs); } + RTE_LOG(ERR, EAL, " cannot request container fd\n"); return -1; } int -vfio_get_group_no(const char *sysfs_base, - const char *dev_addr, int *iommu_group_no) +rte_vfio_get_group_num(const char *sysfs_base, + const char *dev_addr, int *iommu_group_num) { char linkname[PATH_MAX]; char filename[PATH_MAX]; @@ -898,7 +994,7 @@ vfio_get_group_no(const char *sysfs_base, errno = 0; group_tok = tok[ret - 1]; end = group_tok; - *iommu_group_no = strtol(group_tok, &end, 10); + *iommu_group_num = strtol(group_tok, &end, 10); if ((end != group_tok && *end != '\0') || errno != 0) { RTE_LOG(ERR, EAL, " %s error parsing IOMMU number!\n", dev_addr); return -1; @@ -908,7 +1004,8 @@ vfio_get_group_no(const char *sysfs_base, } static int -type1_map(const struct rte_memseg *ms, void *arg) +type1_map(const struct rte_memseg_list *msl __rte_unused, + const struct rte_memseg *ms, void *arg) { int *vfio_container_fd = arg; @@ -1021,7 +1118,8 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, } static int -vfio_spapr_map_walk(const struct rte_memseg *ms, void *arg) +vfio_spapr_map_walk(const struct rte_memseg_list *msl __rte_unused, + const struct rte_memseg *ms, void *arg) { int *vfio_container_fd = arg; @@ -1034,7 +1132,8 @@ struct spapr_walk_param { uint64_t hugepage_sz; }; static int -vfio_spapr_window_size_walk(const struct rte_memseg *ms, void *arg) +vfio_spapr_window_size_walk(const struct rte_memseg_list *msl __rte_unused, + const struct rte_memseg *ms, void *arg) { struct spapr_walk_param *param = arg; uint64_t max = ms->iova + ms->len; @@ -1101,12 +1200,13 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, }; int i, ret = 0; - rte_spinlock_lock(&user_mem_maps.lock); + rte_spinlock_recursive_lock(&user_mem_maps.lock); /* check if window size needs to be adjusted */ memset(¶m, 0, sizeof(param)); - if (rte_memseg_walk(vfio_spapr_window_size_walk, ¶m) < 0) { + if (memseg_walk_thread_unsafe(vfio_spapr_window_size_walk, + ¶m) < 0) { RTE_LOG(ERR, EAL, "Could not get window size\n"); ret = -1; goto out; @@ -1125,6 +1225,7 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, create.levels = 1; if (do_map) { + void *addr; /* re-create window and remap the entire memory */ if (iova > create.window_size) { if (vfio_spapr_create_new_dma_window(vfio_container_fd, @@ -1133,7 +1234,7 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, ret = -1; goto out; } - if (rte_memseg_walk(vfio_spapr_map_walk, + if (memseg_walk_thread_unsafe(vfio_spapr_map_walk, &vfio_container_fd) < 0) { RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n"); ret = -1; @@ -1155,9 +1256,19 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, /* now that we've remapped all of the memory that was present * before, map the segment that we were requested to map. + * + * however, if we were called by the callback, the memory we + * were called with was already in the memseg list, so previous + * mapping should've mapped that segment already. + * + * virt2memseg_list is a relatively cheap check, so use that. if + * memory is within any memseg list, it's a memseg, so it's + * already mapped. */ - if (vfio_spapr_dma_do_map(vfio_container_fd, - vaddr, iova, len, 1) < 0) { + addr = (void *)(uintptr_t)vaddr; + if (rte_mem_virt2memseg_list(addr) == NULL && + vfio_spapr_dma_do_map(vfio_container_fd, + vaddr, iova, len, 1) < 0) { RTE_LOG(ERR, EAL, "Could not map segment\n"); ret = -1; goto out; @@ -1173,7 +1284,7 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 0); } out: - rte_spinlock_unlock(&user_mem_maps.lock); + rte_spinlock_recursive_unlock(&user_mem_maps.lock); return ret; } @@ -1258,7 +1369,7 @@ rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len) return -1; } - rte_spinlock_lock(&user_mem_maps.lock); + rte_spinlock_recursive_lock(&user_mem_maps.lock); if (user_mem_maps.n_maps == VFIO_MAX_USER_MEM_MAPS) { RTE_LOG(ERR, EAL, "No more space for user mem maps\n"); rte_errno = ENOMEM; @@ -1286,7 +1397,7 @@ rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len) compact_user_maps(); out: - rte_spinlock_unlock(&user_mem_maps.lock); + rte_spinlock_recursive_unlock(&user_mem_maps.lock); return ret; } @@ -1301,7 +1412,7 @@ rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len) return -1; } - rte_spinlock_lock(&user_mem_maps.lock); + rte_spinlock_recursive_lock(&user_mem_maps.lock); /* find our mapping */ map = find_user_mem_map(vaddr, iova, len); @@ -1360,7 +1471,7 @@ rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len) } out: - rte_spinlock_unlock(&user_mem_maps.lock); + rte_spinlock_recursive_unlock(&user_mem_maps.lock); return ret; } @@ -1412,4 +1523,64 @@ rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova, return -1; } -#endif +int +rte_vfio_setup_device(__rte_unused const char *sysfs_base, + __rte_unused const char *dev_addr, + __rte_unused int *vfio_dev_fd, + __rte_unused struct vfio_device_info *device_info) +{ + return -1; +} + +int +rte_vfio_release_device(__rte_unused const char *sysfs_base, + __rte_unused const char *dev_addr, __rte_unused int fd) +{ + return -1; +} + +int +rte_vfio_enable(__rte_unused const char *modname) +{ + return -1; +} + +int +rte_vfio_is_enabled(__rte_unused const char *modname) +{ + return -1; +} + +int +rte_vfio_noiommu_is_enabled(void) +{ + return -1; +} + +int +rte_vfio_clear_group(__rte_unused int vfio_group_fd) +{ + return -1; +} + +int __rte_experimental +rte_vfio_get_group_num(__rte_unused const char *sysfs_base, + __rte_unused const char *dev_addr, + __rte_unused int *iommu_group_num) +{ + return -1; +} + +int __rte_experimental +rte_vfio_get_container_fd(void) +{ + return -1; +} + +int __rte_experimental +rte_vfio_get_group_fd(__rte_unused int iommu_group_num) +{ + return -1; +} + +#endif /* VFIO_PRESENT */