vfio: fix boundary check in region search
[dpdk.git] / lib / librte_eal / linuxapp / eal / eal_vfio.c
index c1f0f87..95cd343 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
 #include <inttypes.h>
@@ -20,6 +20,8 @@
 
 #ifdef VFIO_PRESENT
 
+#define VFIO_MEM_EVENT_CLB_NAME "vfio_mem_event_clb"
+
 /* per-process VFIO config */
 static struct vfio_config vfio_cfg;
 
@@ -69,13 +71,49 @@ struct user_mem_map {
        uint64_t len;
 };
 static struct {
-       rte_spinlock_t lock;
+       rte_spinlock_recursive_t lock;
        int n_maps;
        struct user_mem_map maps[VFIO_MAX_USER_MEM_MAPS];
 } user_mem_maps = {
-       .lock = RTE_SPINLOCK_INITIALIZER
+       .lock = RTE_SPINLOCK_RECURSIVE_INITIALIZER
 };
 
+/* for sPAPR IOMMU, we will need to walk memseg list, but we cannot use
+ * rte_memseg_walk() because by the time we enter callback we will be holding a
+ * write lock, so regular rte-memseg_walk will deadlock. copying the same
+ * iteration code everywhere is not ideal as well. so, use a lockless copy of
+ * memseg walk here.
+ */
+static int
+memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       int i, ms_idx, ret = 0;
+
+       for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+               struct rte_memseg_list *msl = &mcfg->memsegs[i];
+               const struct rte_memseg *ms;
+               struct rte_fbarray *arr;
+
+               if (msl->memseg_arr.count == 0)
+                       continue;
+
+               arr = &msl->memseg_arr;
+
+               ms_idx = rte_fbarray_find_next_used(arr, 0);
+               while (ms_idx >= 0) {
+                       ms = rte_fbarray_get(arr, ms_idx);
+                       ret = func(msl, ms, arg);
+                       if (ret < 0)
+                               return -1;
+                       if (ret > 0)
+                               return 1;
+                       ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1);
+               }
+       }
+       return 0;
+}
+
 static int
 is_null_map(const struct user_mem_map *map)
 {
@@ -182,15 +220,15 @@ find_user_mem_map(uint64_t addr, uint64_t iova, uint64_t len)
                /* check start VA */
                if (addr < map->addr || addr >= map_va_end)
                        continue;
-               /* check if IOVA end is within boundaries */
-               if (va_end <= map->addr || va_end >= map_va_end)
+               /* check if VA end is within boundaries */
+               if (va_end <= map->addr || va_end > map_va_end)
                        continue;
 
-               /* check start PA */
+               /* check start IOVA */
                if (iova < map->iova || iova >= map_iova_end)
                        continue;
                /* check if IOVA end is within boundaries */
-               if (iova_end <= map->iova || iova_end >= map_iova_end)
+               if (iova_end <= map->iova || iova_end > map_iova_end)
                        continue;
 
                /* we've found our map */
@@ -246,16 +284,20 @@ compact_user_maps(void)
 }
 
 int
-vfio_get_group_fd(int iommu_group_no)
+rte_vfio_get_group_fd(int iommu_group_num)
 {
        int i;
        int vfio_group_fd;
        char filename[PATH_MAX];
        struct vfio_group *cur_grp;
+       struct rte_mp_msg mp_req, *mp_rep;
+       struct rte_mp_reply mp_reply;
+       struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+       struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
 
        /* check if we already have the group descriptor open */
        for (i = 0; i < VFIO_MAX_GROUPS; i++)
-               if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no)
+               if (vfio_cfg.vfio_groups[i].group_num == iommu_group_num)
                        return vfio_cfg.vfio_groups[i].fd;
 
        /* Lets see first if there is room for a new group */
@@ -266,7 +308,7 @@ vfio_get_group_fd(int iommu_group_no)
 
        /* Now lets get an index for the new group */
        for (i = 0; i < VFIO_MAX_GROUPS; i++)
-               if (vfio_cfg.vfio_groups[i].group_no == -1) {
+               if (vfio_cfg.vfio_groups[i].group_num == -1) {
                        cur_grp = &vfio_cfg.vfio_groups[i];
                        break;
                }
@@ -280,7 +322,7 @@ vfio_get_group_fd(int iommu_group_no)
        if (internal_config.process_type == RTE_PROC_PRIMARY) {
                /* try regular group format */
                snprintf(filename, sizeof(filename),
-                                VFIO_GROUP_FMT, iommu_group_no);
+                                VFIO_GROUP_FMT, iommu_group_num);
                vfio_group_fd = open(filename, O_RDWR);
                if (vfio_group_fd < 0) {
                        /* if file not found, it's not an error */
@@ -292,7 +334,8 @@ vfio_get_group_fd(int iommu_group_no)
 
                        /* special case: try no-IOMMU path as well */
                        snprintf(filename, sizeof(filename),
-                                       VFIO_NOIOMMU_GROUP_FMT, iommu_group_no);
+                                       VFIO_NOIOMMU_GROUP_FMT,
+                                       iommu_group_num);
                        vfio_group_fd = open(filename, O_RDWR);
                        if (vfio_group_fd < 0) {
                                if (errno != ENOENT) {
@@ -305,56 +348,40 @@ vfio_get_group_fd(int iommu_group_no)
                        /* noiommu group found */
                }
 
-               cur_grp->group_no = iommu_group_no;
+               cur_grp->group_num = iommu_group_num;
                cur_grp->fd = vfio_group_fd;
                vfio_cfg.vfio_active_groups++;
                return vfio_group_fd;
        }
        /* if we're in a secondary process, request group fd from the primary
-        * process via our socket
+        * process via mp channel.
         */
-       else {
-               int socket_fd, ret;
-
-               socket_fd = vfio_mp_sync_connect_to_primary();
-
-               if (socket_fd < 0) {
-                       RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
-                       return -1;
-               }
-               if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) {
-                       RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
-                       close(socket_fd);
-                       return -1;
-               }
-               if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) {
-                       RTE_LOG(ERR, EAL, "  cannot send group number!\n");
-                       close(socket_fd);
-                       return -1;
-               }
-               ret = vfio_mp_sync_receive_request(socket_fd);
-               switch (ret) {
-               case SOCKET_NO_FD:
-                       close(socket_fd);
-                       return 0;
-               case SOCKET_OK:
-                       vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd);
-                       /* if we got the fd, store it and return it */
-                       if (vfio_group_fd > 0) {
-                               close(socket_fd);
-                               cur_grp->group_no = iommu_group_no;
-                               cur_grp->fd = vfio_group_fd;
-                               vfio_cfg.vfio_active_groups++;
-                               return vfio_group_fd;
-                       }
-                       /* fall-through on error */
-               default:
-                       RTE_LOG(ERR, EAL, "  cannot get container fd!\n");
-                       close(socket_fd);
-                       return -1;
+       p->req = SOCKET_REQ_GROUP;
+       p->group_num = iommu_group_num;
+       strcpy(mp_req.name, EAL_VFIO_MP);
+       mp_req.len_param = sizeof(*p);
+       mp_req.num_fds = 0;
+
+       vfio_group_fd = -1;
+       if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+           mp_reply.nb_received == 1) {
+               mp_rep = &mp_reply.msgs[0];
+               p = (struct vfio_mp_param *)mp_rep->param;
+               if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+                       cur_grp->group_num = iommu_group_num;
+                       vfio_group_fd = mp_rep->fds[0];
+                       cur_grp->fd = vfio_group_fd;
+                       vfio_cfg.vfio_active_groups++;
+               } else if (p->result == SOCKET_NO_FD) {
+                       RTE_LOG(ERR, EAL, "  bad VFIO group fd\n");
+                       vfio_group_fd = 0;
                }
+               free(mp_reply.msgs);
        }
-       return -1;
+
+       if (vfio_group_fd < 0)
+               RTE_LOG(ERR, EAL, "  cannot request group fd\n");
+       return vfio_group_fd;
 }
 
 
@@ -406,61 +433,80 @@ vfio_group_device_count(int vfio_group_fd)
        return vfio_cfg.vfio_groups[i].devices;
 }
 
+static void
+vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len)
+{
+       struct rte_memseg_list *msl;
+       struct rte_memseg *ms;
+       size_t cur_len = 0;
+
+       msl = rte_mem_virt2memseg_list(addr);
+
+       /* for IOVA as VA mode, no need to care for IOVA addresses */
+       if (rte_eal_iova_mode() == RTE_IOVA_VA) {
+               uint64_t vfio_va = (uint64_t)(uintptr_t)addr;
+               if (type == RTE_MEM_EVENT_ALLOC)
+                       vfio_dma_mem_map(vfio_va, vfio_va, len, 1);
+               else
+                       vfio_dma_mem_map(vfio_va, vfio_va, len, 0);
+               return;
+       }
+
+       /* memsegs are contiguous in memory */
+       ms = rte_mem_virt2memseg(addr, msl);
+       while (cur_len < len) {
+               if (type == RTE_MEM_EVENT_ALLOC)
+                       vfio_dma_mem_map(ms->addr_64, ms->iova, ms->len, 1);
+               else
+                       vfio_dma_mem_map(ms->addr_64, ms->iova, ms->len, 0);
+
+               cur_len += ms->len;
+               ++ms;
+       }
+}
+
 int
 rte_vfio_clear_group(int vfio_group_fd)
 {
        int i;
-       int socket_fd, ret;
+       struct rte_mp_msg mp_req, *mp_rep;
+       struct rte_mp_reply mp_reply;
+       struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+       struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
 
        if (internal_config.process_type == RTE_PROC_PRIMARY) {
 
                i = get_vfio_group_idx(vfio_group_fd);
                if (i < 0)
                        return -1;
-               vfio_cfg.vfio_groups[i].group_no = -1;
+               vfio_cfg.vfio_groups[i].group_num = -1;
                vfio_cfg.vfio_groups[i].fd = -1;
                vfio_cfg.vfio_groups[i].devices = 0;
                vfio_cfg.vfio_active_groups--;
                return 0;
        }
 
-       /* This is just for SECONDARY processes */
-       socket_fd = vfio_mp_sync_connect_to_primary();
-
-       if (socket_fd < 0) {
-               RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
-               return -1;
-       }
-
-       if (vfio_mp_sync_send_request(socket_fd, SOCKET_CLR_GROUP) < 0) {
-               RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
-               close(socket_fd);
-               return -1;
-       }
+       p->req = SOCKET_CLR_GROUP;
+       p->group_num = vfio_group_fd;
+       strcpy(mp_req.name, EAL_VFIO_MP);
+       mp_req.len_param = sizeof(*p);
+       mp_req.num_fds = 0;
+
+       if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+           mp_reply.nb_received == 1) {
+               mp_rep = &mp_reply.msgs[0];
+               p = (struct vfio_mp_param *)mp_rep->param;
+               if (p->result == SOCKET_OK) {
+                       free(mp_reply.msgs);
+                       return 0;
+               } else if (p->result == SOCKET_NO_FD)
+                       RTE_LOG(ERR, EAL, "  BAD VFIO group fd!\n");
+               else
+                       RTE_LOG(ERR, EAL, "  no such VFIO group fd!\n");
 
-       if (vfio_mp_sync_send_request(socket_fd, vfio_group_fd) < 0) {
-               RTE_LOG(ERR, EAL, "  cannot send group fd!\n");
-               close(socket_fd);
-               return -1;
+               free(mp_reply.msgs);
        }
 
-       ret = vfio_mp_sync_receive_request(socket_fd);
-       switch (ret) {
-       case SOCKET_NO_FD:
-               RTE_LOG(ERR, EAL, "  BAD VFIO group fd!\n");
-               close(socket_fd);
-               break;
-       case SOCKET_OK:
-               close(socket_fd);
-               return 0;
-       case SOCKET_ERR:
-               RTE_LOG(ERR, EAL, "  Socket error\n");
-               close(socket_fd);
-               break;
-       default:
-               RTE_LOG(ERR, EAL, "  UNKNOWN reply, %d\n", ret);
-               close(socket_fd);
-       }
        return -1;
 }
 
@@ -468,15 +514,17 @@ int
 rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
                int *vfio_dev_fd, struct vfio_device_info *device_info)
 {
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock;
        struct vfio_group_status group_status = {
                        .argsz = sizeof(group_status)
        };
        int vfio_group_fd;
-       int iommu_group_no;
+       int iommu_group_num;
        int i, ret;
 
        /* get group number */
-       ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no);
+       ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num);
        if (ret == 0) {
                RTE_LOG(WARNING, EAL, "  %s not managed by VFIO driver, skipping\n",
                        dev_addr);
@@ -488,7 +536,7 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
                return -1;
 
        /* get the actual group fd */
-       vfio_group_fd = vfio_get_group_fd(iommu_group_no);
+       vfio_group_fd = rte_vfio_get_group_fd(iommu_group_num);
        if (vfio_group_fd < 0)
                return -1;
 
@@ -542,7 +590,8 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
                 * functionality.
                 */
                if (internal_config.process_type == RTE_PROC_PRIMARY &&
-                               vfio_cfg.vfio_active_groups == 1) {
+                               vfio_cfg.vfio_active_groups == 1 &&
+                               vfio_group_device_count(vfio_group_fd) == 0) {
                        const struct vfio_iommu_type *t;
 
                        /* select an IOMMU type which we will be using */
@@ -555,6 +604,10 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
                                rte_vfio_clear_group(vfio_group_fd);
                                return -1;
                        }
+                       /* lock memory hotplug before mapping and release it
+                        * after registering callback, to prevent races
+                        */
+                       rte_rwlock_read_lock(mem_lock);
                        ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
                        if (ret) {
                                RTE_LOG(ERR, EAL,
@@ -562,13 +615,14 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
                                        dev_addr, errno, strerror(errno));
                                close(vfio_group_fd);
                                rte_vfio_clear_group(vfio_group_fd);
+                               rte_rwlock_read_unlock(mem_lock);
                                return -1;
                        }
 
                        vfio_cfg.vfio_iommu_type = t;
 
                        /* re-map all user-mapped segments */
-                       rte_spinlock_lock(&user_mem_maps.lock);
+                       rte_spinlock_recursive_lock(&user_mem_maps.lock);
 
                        /* this IOMMU type may not support DMA mapping, but
                         * if we have mappings in the list - that means we have
@@ -590,12 +644,29 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
                                                        "len: 0x%" PRIu64 "\n",
                                                        map->addr, map->iova,
                                                        map->len);
-                                       rte_spinlock_unlock(
+                                       rte_spinlock_recursive_unlock(
                                                        &user_mem_maps.lock);
+                                       rte_rwlock_read_unlock(mem_lock);
                                        return -1;
                                }
                        }
-                       rte_spinlock_unlock(&user_mem_maps.lock);
+                       rte_spinlock_recursive_unlock(&user_mem_maps.lock);
+
+                       /* register callback for mem events */
+                       ret = rte_mem_event_callback_register(
+                                       VFIO_MEM_EVENT_CLB_NAME,
+                                       vfio_mem_event_callback);
+                       /* unlock memory hotplug */
+                       rte_rwlock_read_unlock(mem_lock);
+
+                       if (ret && rte_errno != ENOTSUP) {
+                               RTE_LOG(ERR, EAL, "Could not install memory event callback for VFIO\n");
+                               return -1;
+                       }
+                       if (ret)
+                               RTE_LOG(DEBUG, EAL, "Memory event callbacks not supported\n");
+                       else
+                               RTE_LOG(DEBUG, EAL, "Installed memory event callback for VFIO\n");
                }
        }
 
@@ -633,28 +704,38 @@ int
 rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
                    int vfio_dev_fd)
 {
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock;
        struct vfio_group_status group_status = {
                        .argsz = sizeof(group_status)
        };
        int vfio_group_fd;
-       int iommu_group_no;
+       int iommu_group_num;
        int ret;
 
+       /* we don't want any DMA mapping messages to come while we're detaching
+        * VFIO device, because this might be the last device and we might need
+        * to unregister the callback.
+        */
+       rte_rwlock_read_lock(mem_lock);
+
        /* get group number */
-       ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no);
+       ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num);
        if (ret <= 0) {
                RTE_LOG(WARNING, EAL, "  %s not managed by VFIO driver\n",
                        dev_addr);
                /* This is an error at this point. */
-               return -1;
+               ret = -1;
+               goto out;
        }
 
        /* get the actual group fd */
-       vfio_group_fd = vfio_get_group_fd(iommu_group_no);
+       vfio_group_fd = rte_vfio_get_group_fd(iommu_group_num);
        if (vfio_group_fd <= 0) {
-               RTE_LOG(INFO, EAL, "vfio_get_group_fd failed for %s\n",
+               RTE_LOG(INFO, EAL, "rte_vfio_get_group_fd failed for %s\n",
                                   dev_addr);
-               return -1;
+               ret = -1;
+               goto out;
        }
 
        /* At this point we got an active group. Closing it will make the
@@ -666,7 +747,8 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
        if (close(vfio_dev_fd) < 0) {
                RTE_LOG(INFO, EAL, "Error when closing vfio_dev_fd for %s\n",
                                   dev_addr);
-               return -1;
+               ret = -1;
+               goto out;
        }
 
        /* An VFIO group can have several devices attached. Just when there is
@@ -678,17 +760,30 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
                if (close(vfio_group_fd) < 0) {
                        RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n",
                                dev_addr);
-                       return -1;
+                       ret = -1;
+                       goto out;
                }
 
                if (rte_vfio_clear_group(vfio_group_fd) < 0) {
                        RTE_LOG(INFO, EAL, "Error when clearing group for %s\n",
                                           dev_addr);
-                       return -1;
+                       ret = -1;
+                       goto out;
                }
        }
 
-       return 0;
+       /* if there are no active device groups, unregister the callback to
+        * avoid spurious attempts to map/unmap memory from VFIO.
+        */
+       if (vfio_cfg.vfio_active_groups == 0)
+               rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME);
+
+       /* success */
+       ret = 0;
+
+out:
+       rte_rwlock_read_unlock(mem_lock);
+       return ret;
 }
 
 int
@@ -700,7 +795,7 @@ rte_vfio_enable(const char *modname)
 
        for (i = 0; i < VFIO_MAX_GROUPS; i++) {
                vfio_cfg.vfio_groups[i].fd = -1;
-               vfio_cfg.vfio_groups[i].group_no = -1;
+               vfio_cfg.vfio_groups[i].group_num = -1;
                vfio_cfg.vfio_groups[i].devices = 0;
        }
 
@@ -723,7 +818,7 @@ rte_vfio_enable(const char *modname)
                return 0;
        }
 
-       vfio_cfg.vfio_container_fd = vfio_get_container_fd();
+       vfio_cfg.vfio_container_fd = rte_vfio_get_container_fd();
 
        /* check if we have VFIO driver enabled */
        if (vfio_cfg.vfio_container_fd != -1) {
@@ -801,9 +896,14 @@ vfio_has_supported_extensions(int vfio_container_fd)
 }
 
 int
-vfio_get_container_fd(void)
+rte_vfio_get_container_fd(void)
 {
        int ret, vfio_container_fd;
+       struct rte_mp_msg mp_req, *mp_rep;
+       struct rte_mp_reply mp_reply;
+       struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+       struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
+
 
        /* if we're in a primary process, try to open the container */
        if (internal_config.process_type == RTE_PROC_PRIMARY) {
@@ -834,39 +934,35 @@ vfio_get_container_fd(void)
                }
 
                return vfio_container_fd;
-       } else {
-               /*
-                * if we're in a secondary process, request container fd from the
-                * primary process via our socket
-                */
-               int socket_fd;
-
-               socket_fd = vfio_mp_sync_connect_to_primary();
-               if (socket_fd < 0) {
-                       RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
-                       return -1;
-               }
-               if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) {
-                       RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
-                       close(socket_fd);
-                       return -1;
-               }
-               vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd);
-               if (vfio_container_fd < 0) {
-                       RTE_LOG(ERR, EAL, "  cannot get container fd!\n");
-                       close(socket_fd);
-                       return -1;
+       }
+       /*
+        * if we're in a secondary process, request container fd from the
+        * primary process via mp channel
+        */
+       p->req = SOCKET_REQ_CONTAINER;
+       strcpy(mp_req.name, EAL_VFIO_MP);
+       mp_req.len_param = sizeof(*p);
+       mp_req.num_fds = 0;
+
+       vfio_container_fd = -1;
+       if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+           mp_reply.nb_received == 1) {
+               mp_rep = &mp_reply.msgs[0];
+               p = (struct vfio_mp_param *)mp_rep->param;
+               if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+                       free(mp_reply.msgs);
+                       return mp_rep->fds[0];
                }
-               close(socket_fd);
-               return vfio_container_fd;
+               free(mp_reply.msgs);
        }
 
+       RTE_LOG(ERR, EAL, "  cannot request container fd\n");
        return -1;
 }
 
 int
-vfio_get_group_no(const char *sysfs_base,
-               const char *dev_addr, int *iommu_group_no)
+rte_vfio_get_group_num(const char *sysfs_base,
+               const char *dev_addr, int *iommu_group_num)
 {
        char linkname[PATH_MAX];
        char filename[PATH_MAX];
@@ -898,7 +994,7 @@ vfio_get_group_no(const char *sysfs_base,
        errno = 0;
        group_tok = tok[ret - 1];
        end = group_tok;
-       *iommu_group_no = strtol(group_tok, &end, 10);
+       *iommu_group_num = strtol(group_tok, &end, 10);
        if ((end != group_tok && *end != '\0') || errno != 0) {
                RTE_LOG(ERR, EAL, "  %s error parsing IOMMU number!\n", dev_addr);
                return -1;
@@ -908,7 +1004,8 @@ vfio_get_group_no(const char *sysfs_base,
 }
 
 static int
-type1_map(const struct rte_memseg *ms, void *arg)
+type1_map(const struct rte_memseg_list *msl __rte_unused,
+               const struct rte_memseg *ms, void *arg)
 {
        int *vfio_container_fd = arg;
 
@@ -1021,7 +1118,8 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 }
 
 static int
-vfio_spapr_map_walk(const struct rte_memseg *ms, void *arg)
+vfio_spapr_map_walk(const struct rte_memseg_list *msl __rte_unused,
+               const struct rte_memseg *ms, void *arg)
 {
        int *vfio_container_fd = arg;
 
@@ -1034,7 +1132,8 @@ struct spapr_walk_param {
        uint64_t hugepage_sz;
 };
 static int
-vfio_spapr_window_size_walk(const struct rte_memseg *ms, void *arg)
+vfio_spapr_window_size_walk(const struct rte_memseg_list *msl __rte_unused,
+               const struct rte_memseg *ms, void *arg)
 {
        struct spapr_walk_param *param = arg;
        uint64_t max = ms->iova + ms->len;
@@ -1101,12 +1200,13 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
        };
        int i, ret = 0;
 
-       rte_spinlock_lock(&user_mem_maps.lock);
+       rte_spinlock_recursive_lock(&user_mem_maps.lock);
 
        /* check if window size needs to be adjusted */
        memset(&param, 0, sizeof(param));
 
-       if (rte_memseg_walk(vfio_spapr_window_size_walk, &param) < 0) {
+       if (memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
+                               &param) < 0) {
                RTE_LOG(ERR, EAL, "Could not get window size\n");
                ret = -1;
                goto out;
@@ -1125,6 +1225,7 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
        create.levels = 1;
 
        if (do_map) {
+               void *addr;
                /* re-create window and remap the entire memory */
                if (iova > create.window_size) {
                        if (vfio_spapr_create_new_dma_window(vfio_container_fd,
@@ -1133,7 +1234,7 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
                                ret = -1;
                                goto out;
                        }
-                       if (rte_memseg_walk(vfio_spapr_map_walk,
+                       if (memseg_walk_thread_unsafe(vfio_spapr_map_walk,
                                        &vfio_container_fd) < 0) {
                                RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
                                ret = -1;
@@ -1155,9 +1256,19 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 
                /* now that we've remapped all of the memory that was present
                 * before, map the segment that we were requested to map.
+                *
+                * however, if we were called by the callback, the memory we
+                * were called with was already in the memseg list, so previous
+                * mapping should've mapped that segment already.
+                *
+                * virt2memseg_list is a relatively cheap check, so use that. if
+                * memory is within any memseg list, it's a memseg, so it's
+                * already mapped.
                 */
-               if (vfio_spapr_dma_do_map(vfio_container_fd,
-                               vaddr, iova, len, 1) < 0) {
+               addr = (void *)(uintptr_t)vaddr;
+               if (rte_mem_virt2memseg_list(addr) == NULL &&
+                               vfio_spapr_dma_do_map(vfio_container_fd,
+                                       vaddr, iova, len, 1) < 0) {
                        RTE_LOG(ERR, EAL, "Could not map segment\n");
                        ret = -1;
                        goto out;
@@ -1173,7 +1284,7 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
                vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 0);
        }
 out:
-       rte_spinlock_unlock(&user_mem_maps.lock);
+       rte_spinlock_recursive_unlock(&user_mem_maps.lock);
        return ret;
 }
 
@@ -1258,7 +1369,7 @@ rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
                return -1;
        }
 
-       rte_spinlock_lock(&user_mem_maps.lock);
+       rte_spinlock_recursive_lock(&user_mem_maps.lock);
        if (user_mem_maps.n_maps == VFIO_MAX_USER_MEM_MAPS) {
                RTE_LOG(ERR, EAL, "No more space for user mem maps\n");
                rte_errno = ENOMEM;
@@ -1286,7 +1397,7 @@ rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
 
        compact_user_maps();
 out:
-       rte_spinlock_unlock(&user_mem_maps.lock);
+       rte_spinlock_recursive_unlock(&user_mem_maps.lock);
        return ret;
 }
 
@@ -1301,7 +1412,7 @@ rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
                return -1;
        }
 
-       rte_spinlock_lock(&user_mem_maps.lock);
+       rte_spinlock_recursive_lock(&user_mem_maps.lock);
 
        /* find our mapping */
        map = find_user_mem_map(vaddr, iova, len);
@@ -1360,7 +1471,7 @@ rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
        }
 
 out:
-       rte_spinlock_unlock(&user_mem_maps.lock);
+       rte_spinlock_recursive_unlock(&user_mem_maps.lock);
        return ret;
 }
 
@@ -1412,4 +1523,64 @@ rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
        return -1;
 }
 
-#endif
+int
+rte_vfio_setup_device(__rte_unused const char *sysfs_base,
+               __rte_unused const char *dev_addr,
+               __rte_unused int *vfio_dev_fd,
+               __rte_unused struct vfio_device_info *device_info)
+{
+       return -1;
+}
+
+int
+rte_vfio_release_device(__rte_unused const char *sysfs_base,
+               __rte_unused const char *dev_addr, __rte_unused int fd)
+{
+       return -1;
+}
+
+int
+rte_vfio_enable(__rte_unused const char *modname)
+{
+       return -1;
+}
+
+int
+rte_vfio_is_enabled(__rte_unused const char *modname)
+{
+       return -1;
+}
+
+int
+rte_vfio_noiommu_is_enabled(void)
+{
+       return -1;
+}
+
+int
+rte_vfio_clear_group(__rte_unused int vfio_group_fd)
+{
+       return -1;
+}
+
+int __rte_experimental
+rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
+               __rte_unused const char *dev_addr,
+               __rte_unused int *iommu_group_num)
+{
+       return -1;
+}
+
+int __rte_experimental
+rte_vfio_get_container_fd(void)
+{
+       return -1;
+}
+
+int __rte_experimental
+rte_vfio_get_group_fd(__rte_unused int iommu_group_num)
+{
+       return -1;
+}
+
+#endif /* VFIO_PRESENT */