* Copyright(c) 2010-2014 Intel Corporation
*/
+#include <inttypes.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
+#include <rte_errno.h>
#include <rte_log.h>
#include <rte_memory.h>
#include <rte_eal_memconfig.h>
static struct vfio_config vfio_cfg;
static int vfio_type1_dma_map(int);
+static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
static int vfio_spapr_dma_map(int);
+static int vfio_spapr_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
static int vfio_noiommu_dma_map(int);
+static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
+static int vfio_dma_mem_map(uint64_t vaddr, uint64_t iova, uint64_t len,
+ int do_map);
/* IOMMU types we support */
static const struct vfio_iommu_type iommu_types[] = {
/* x86 IOMMU, otherwise known as type 1 */
- { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
+ {
+ .type_id = RTE_VFIO_TYPE1,
+ .name = "Type 1",
+ .dma_map_func = &vfio_type1_dma_map,
+ .dma_user_map_func = &vfio_type1_dma_mem_map
+ },
/* ppc64 IOMMU, otherwise known as spapr */
- { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map},
+ {
+ .type_id = RTE_VFIO_SPAPR,
+ .name = "sPAPR",
+ .dma_map_func = &vfio_spapr_dma_map,
+ .dma_user_map_func = &vfio_spapr_dma_mem_map
+ },
/* IOMMU-less mode */
- { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
+ {
+ .type_id = RTE_VFIO_NOIOMMU,
+ .name = "No-IOMMU",
+ .dma_map_func = &vfio_noiommu_dma_map,
+ .dma_user_map_func = &vfio_noiommu_dma_mem_map
+ },
};
+/* hot plug/unplug of VFIO groups may cause all DMA maps to be dropped. we can
+ * recreate the mappings for DPDK segments, but we cannot do so for memory that
+ * was registered by the user themselves, so we need to store the user mappings
+ * somewhere, to recreate them later.
+ */
+#define VFIO_MAX_USER_MEM_MAPS 256
+struct user_mem_map {
+ uint64_t addr;
+ uint64_t iova;
+ uint64_t len;
+};
+static struct {
+ rte_spinlock_t lock;
+ int n_maps;
+ struct user_mem_map maps[VFIO_MAX_USER_MEM_MAPS];
+} user_mem_maps = {
+ .lock = RTE_SPINLOCK_INITIALIZER
+};
+
+static int
+is_null_map(const struct user_mem_map *map)
+{
+ return map->addr == 0 && map->iova == 0 && map->len == 0;
+}
+
+/* we may need to merge user mem maps together in case of user mapping/unmapping
+ * chunks of memory, so we'll need a comparator function to sort segments.
+ */
+static int
+user_mem_map_cmp(const void *a, const void *b)
+{
+ const struct user_mem_map *umm_a = a;
+ const struct user_mem_map *umm_b = b;
+
+ /* move null entries to end */
+ if (is_null_map(umm_a))
+ return 1;
+ if (is_null_map(umm_b))
+ return -1;
+
+ /* sort by iova first */
+ if (umm_a->iova < umm_b->iova)
+ return -1;
+ if (umm_a->iova > umm_b->iova)
+ return 1;
+
+ if (umm_a->addr < umm_b->addr)
+ return -1;
+ if (umm_a->addr > umm_b->addr)
+ return 1;
+
+ if (umm_a->len < umm_b->len)
+ return -1;
+ if (umm_a->len > umm_b->len)
+ return 1;
+
+ return 0;
+}
+
+/* adjust user map entry. this may result in shortening of existing map, or in
+ * splitting existing map in two pieces.
+ */
+static void
+adjust_map(struct user_mem_map *src, struct user_mem_map *end,
+ uint64_t remove_va_start, uint64_t remove_len)
+{
+ /* if va start is same as start address, we're simply moving start */
+ if (remove_va_start == src->addr) {
+ src->addr += remove_len;
+ src->iova += remove_len;
+ src->len -= remove_len;
+ } else if (remove_va_start + remove_len == src->addr + src->len) {
+ /* we're shrinking mapping from the end */
+ src->len -= remove_len;
+ } else {
+ /* we're blowing a hole in the middle */
+ struct user_mem_map tmp;
+ uint64_t total_len = src->len;
+
+ /* adjust source segment length */
+ src->len = remove_va_start - src->addr;
+
+ /* create temporary segment in the middle */
+ tmp.addr = src->addr + src->len;
+ tmp.iova = src->iova + src->len;
+ tmp.len = remove_len;
+
+ /* populate end segment - this one we will be keeping */
+ end->addr = tmp.addr + tmp.len;
+ end->iova = tmp.iova + tmp.len;
+ end->len = total_len - src->len - tmp.len;
+ }
+}
+
+/* try merging two maps into one, return 1 if succeeded */
+static int
+merge_map(struct user_mem_map *left, struct user_mem_map *right)
+{
+ if (left->addr + left->len != right->addr)
+ return 0;
+ if (left->iova + left->len != right->iova)
+ return 0;
+
+ left->len += right->len;
+
+ memset(right, 0, sizeof(*right));
+
+ return 1;
+}
+
+static struct user_mem_map *
+find_user_mem_map(uint64_t addr, uint64_t iova, uint64_t len)
+{
+ uint64_t va_end = addr + len;
+ uint64_t iova_end = iova + len;
+ int i;
+
+ for (i = 0; i < user_mem_maps.n_maps; i++) {
+ struct user_mem_map *map = &user_mem_maps.maps[i];
+ uint64_t map_va_end = map->addr + map->len;
+ uint64_t map_iova_end = map->iova + map->len;
+
+ /* check start VA */
+ if (addr < map->addr || addr >= map_va_end)
+ continue;
+ /* check if IOVA end is within boundaries */
+ if (va_end <= map->addr || va_end >= map_va_end)
+ continue;
+
+ /* check start PA */
+ if (iova < map->iova || iova >= map_iova_end)
+ continue;
+ /* check if IOVA end is within boundaries */
+ if (iova_end <= map->iova || iova_end >= map_iova_end)
+ continue;
+
+ /* we've found our map */
+ return map;
+ }
+ return NULL;
+}
+
+/* this will sort all user maps, and merge/compact any adjacent maps */
+static void
+compact_user_maps(void)
+{
+ int i, n_merged, cur_idx;
+
+ qsort(user_mem_maps.maps, user_mem_maps.n_maps,
+ sizeof(user_mem_maps.maps[0]), user_mem_map_cmp);
+
+ /* we'll go over the list backwards when merging */
+ n_merged = 0;
+ for (i = user_mem_maps.n_maps - 2; i >= 0; i--) {
+ struct user_mem_map *l, *r;
+
+ l = &user_mem_maps.maps[i];
+ r = &user_mem_maps.maps[i + 1];
+
+ if (is_null_map(l) || is_null_map(r))
+ continue;
+
+ if (merge_map(l, r))
+ n_merged++;
+ }
+
+ /* the entries are still sorted, but now they have holes in them, so
+ * walk through the list and remove the holes
+ */
+ if (n_merged > 0) {
+ cur_idx = 0;
+ for (i = 0; i < user_mem_maps.n_maps; i++) {
+ if (!is_null_map(&user_mem_maps.maps[i])) {
+ struct user_mem_map *src, *dst;
+
+ src = &user_mem_maps.maps[i];
+ dst = &user_mem_maps.maps[cur_idx++];
+
+ if (src != dst) {
+ memcpy(dst, src, sizeof(*src));
+ memset(src, 0, sizeof(*src));
+ }
+ }
+ }
+ user_mem_maps.n_maps = cur_idx;
+ }
+}
+
int
vfio_get_group_fd(int iommu_group_no)
{
};
int vfio_group_fd;
int iommu_group_no;
- int ret;
+ int i, ret;
/* get group number */
ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no);
*/
if (internal_config.process_type == RTE_PROC_PRIMARY &&
vfio_cfg.vfio_active_groups == 1) {
+ const struct vfio_iommu_type *t;
+
/* select an IOMMU type which we will be using */
- const struct vfio_iommu_type *t =
- vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
+ t = vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
if (!t) {
RTE_LOG(ERR, EAL,
" %s failed to select IOMMU type\n",
rte_vfio_clear_group(vfio_group_fd);
return -1;
}
+
+ vfio_cfg.vfio_iommu_type = t;
+
+ /* re-map all user-mapped segments */
+ rte_spinlock_lock(&user_mem_maps.lock);
+
+ /* this IOMMU type may not support DMA mapping, but
+ * if we have mappings in the list - that means we have
+ * previously mapped something successfully, so we can
+ * be sure that DMA mapping is supported.
+ */
+ for (i = 0; i < user_mem_maps.n_maps; i++) {
+ struct user_mem_map *map;
+ map = &user_mem_maps.maps[i];
+
+ ret = t->dma_user_map_func(
+ vfio_cfg.vfio_container_fd,
+ map->addr, map->iova, map->len,
+ 1);
+ if (ret) {
+ RTE_LOG(ERR, EAL, "Couldn't map user memory for DMA: "
+ "va: 0x%" PRIx64 " "
+ "iova: 0x%" PRIx64 " "
+ "len: 0x%" PRIu64 "\n",
+ map->addr, map->iova,
+ map->len);
+ rte_spinlock_unlock(
+ &user_mem_maps.lock);
+ return -1;
+ }
+ }
+ rte_spinlock_unlock(&user_mem_maps.lock);
}
}
type1_map(const struct rte_memseg *ms, void *arg)
{
int *vfio_container_fd = arg;
+
+ return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
+ ms->len, 1);
+}
+
+static int
+vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map)
+{
struct vfio_iommu_type1_dma_map dma_map;
+ struct vfio_iommu_type1_dma_unmap dma_unmap;
int ret;
- memset(&dma_map, 0, sizeof(dma_map));
- dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
- dma_map.vaddr = ms->addr_64;
- dma_map.size = ms->len;
- dma_map.iova = ms->iova;
- dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+ if (do_map != 0) {
+ memset(&dma_map, 0, sizeof(dma_map));
+ dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ dma_map.vaddr = vaddr;
+ dma_map.size = len;
+ dma_map.iova = iova;
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
+ VFIO_DMA_MAP_FLAG_WRITE;
- ret = ioctl(*vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
-
- if (ret) {
- RTE_LOG(ERR, EAL, " cannot set up DMA remapping, error %i (%s)\n",
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot set up DMA remapping, error %i (%s)\n",
errno, strerror(errno));
- return -1;
+ return -1;
+ }
+ } else {
+ memset(&dma_unmap, 0, sizeof(dma_unmap));
+ dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+ dma_unmap.size = len;
+ dma_unmap.iova = iova;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA,
+ &dma_unmap);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
}
+
return 0;
}
return rte_memseg_walk(type1_map, &vfio_container_fd);
}
+static int
+vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map)
+{
+ struct vfio_iommu_type1_dma_map dma_map;
+ struct vfio_iommu_type1_dma_unmap dma_unmap;
+ int ret;
+
+ if (do_map != 0) {
+ memset(&dma_map, 0, sizeof(dma_map));
+ dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ dma_map.vaddr = vaddr;
+ dma_map.size = len;
+ dma_map.iova = iova;
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
+ VFIO_DMA_MAP_FLAG_WRITE;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot set up DMA remapping, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ } else {
+ struct vfio_iommu_spapr_register_memory reg = {
+ .argsz = sizeof(reg),
+ .flags = 0
+ };
+ reg.vaddr = (uintptr_t) vaddr;
+ reg.size = len;
+
+ ret = ioctl(vfio_container_fd,
+ VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, ®);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot unregister vaddr for IOMMU, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ memset(&dma_unmap, 0, sizeof(dma_unmap));
+ dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+ dma_unmap.size = len;
+ dma_unmap.iova = iova;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA,
+ &dma_unmap);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+vfio_spapr_map_walk(const struct rte_memseg *ms, void *arg)
+{
+ int *vfio_container_fd = arg;
+
+ return vfio_spapr_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
+ ms->len, 1);
+}
+
struct spapr_walk_param {
uint64_t window_size;
uint64_t hugepage_sz;
};
static int
-spapr_window_size(const struct rte_memseg *ms, void *arg)
+vfio_spapr_window_size_walk(const struct rte_memseg *ms, void *arg)
{
struct spapr_walk_param *param = arg;
uint64_t max = ms->iova + ms->len;
}
static int
-spapr_map(const struct rte_memseg *ms, void *arg)
-{
- struct vfio_iommu_type1_dma_map dma_map;
- struct vfio_iommu_spapr_register_memory reg = {
- .argsz = sizeof(reg),
- .flags = 0
+vfio_spapr_create_new_dma_window(int vfio_container_fd,
+ struct vfio_iommu_spapr_tce_create *create) {
+ struct vfio_iommu_spapr_tce_remove remove = {
+ .argsz = sizeof(remove),
+ };
+ struct vfio_iommu_spapr_tce_info info = {
+ .argsz = sizeof(info),
};
- int *vfio_container_fd = arg;
int ret;
- reg.vaddr = (uintptr_t) ms->addr;
- reg.size = ms->len;
- ret = ioctl(*vfio_container_fd,
- VFIO_IOMMU_SPAPR_REGISTER_MEMORY, ®);
+ /* query spapr iommu info */
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
if (ret) {
- RTE_LOG(ERR, EAL, " cannot register vaddr for IOMMU, error %i (%s)\n",
- errno, strerror(errno));
+ RTE_LOG(ERR, EAL, " cannot get iommu info, "
+ "error %i (%s)\n", errno, strerror(errno));
return -1;
}
- memset(&dma_map, 0, sizeof(dma_map));
- dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
- dma_map.vaddr = ms->addr_64;
- dma_map.size = ms->len;
- dma_map.iova = ms->iova;
- dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
- VFIO_DMA_MAP_FLAG_WRITE;
-
- ret = ioctl(*vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+ /* remove default DMA of 32 bit window */
+ remove.start_addr = info.dma32_window_start;
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot remove default DMA window, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+ /* create new DMA window */
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, create);
if (ret) {
- RTE_LOG(ERR, EAL, " cannot set up DMA remapping, error %i (%s)\n",
- errno, strerror(errno));
+ RTE_LOG(ERR, EAL, " cannot create new DMA window, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ if (create->start_addr != 0) {
+ RTE_LOG(ERR, EAL, " DMA window start address != 0\n");
return -1;
}
}
static int
-vfio_spapr_dma_map(int vfio_container_fd)
+vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map)
{
struct spapr_walk_param param;
- int ret;
- struct vfio_iommu_spapr_tce_info info = {
- .argsz = sizeof(info),
- };
struct vfio_iommu_spapr_tce_create create = {
.argsz = sizeof(create),
};
- struct vfio_iommu_spapr_tce_remove remove = {
- .argsz = sizeof(remove),
- };
+ int i, ret = 0;
+
+ rte_spinlock_lock(&user_mem_maps.lock);
+ /* check if window size needs to be adjusted */
memset(¶m, 0, sizeof(param));
- /* query spapr iommu info */
- ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
- if (ret) {
- RTE_LOG(ERR, EAL, " cannot get iommu info, "
- "error %i (%s)\n", errno, strerror(errno));
- return -1;
+ if (rte_memseg_walk(vfio_spapr_window_size_walk, ¶m) < 0) {
+ RTE_LOG(ERR, EAL, "Could not get window size\n");
+ ret = -1;
+ goto out;
}
- /* remove default DMA of 32 bit window */
- remove.start_addr = info.dma32_window_start;
- ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);
- if (ret) {
- RTE_LOG(ERR, EAL, " cannot remove default DMA window, "
- "error %i (%s)\n", errno, strerror(errno));
- return -1;
+ /* also check user maps */
+ for (i = 0; i < user_mem_maps.n_maps; i++) {
+ uint64_t max = user_mem_maps.maps[i].iova +
+ user_mem_maps.maps[i].len;
+ create.window_size = RTE_MAX(create.window_size, max);
}
- /* create DMA window from 0 to max(phys_addr + len) */
- rte_memseg_walk(spapr_window_size, ¶m);
-
/* sPAPR requires window size to be a power of 2 */
create.window_size = rte_align64pow2(param.window_size);
create.page_shift = __builtin_ctzll(param.hugepage_sz);
create.levels = 1;
- ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create);
- if (ret) {
- RTE_LOG(ERR, EAL, " cannot create new DMA window, "
- "error %i (%s)\n", errno, strerror(errno));
- return -1;
+ if (do_map) {
+ /* re-create window and remap the entire memory */
+ if (iova > create.window_size) {
+ if (vfio_spapr_create_new_dma_window(vfio_container_fd,
+ &create) < 0) {
+ RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
+ ret = -1;
+ goto out;
+ }
+ if (rte_memseg_walk(vfio_spapr_map_walk,
+ &vfio_container_fd) < 0) {
+ RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
+ ret = -1;
+ goto out;
+ }
+ /* remap all user maps */
+ for (i = 0; i < user_mem_maps.n_maps; i++) {
+ struct user_mem_map *map =
+ &user_mem_maps.maps[i];
+ if (vfio_spapr_dma_do_map(vfio_container_fd,
+ map->addr, map->iova, map->len,
+ 1)) {
+ RTE_LOG(ERR, EAL, "Could not recreate user DMA maps\n");
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ /* now that we've remapped all of the memory that was present
+ * before, map the segment that we were requested to map.
+ */
+ if (vfio_spapr_dma_do_map(vfio_container_fd,
+ vaddr, iova, len, 1) < 0) {
+ RTE_LOG(ERR, EAL, "Could not map segment\n");
+ ret = -1;
+ goto out;
+ }
+ } else {
+ /* for unmap, check if iova within DMA window */
+ if (iova > create.window_size) {
+ RTE_LOG(ERR, EAL, "iova beyond DMA window for unmap");
+ ret = -1;
+ goto out;
+ }
+
+ vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 0);
}
+out:
+ rte_spinlock_unlock(&user_mem_maps.lock);
+ return ret;
+}
- if (create.start_addr != 0) {
- RTE_LOG(ERR, EAL, " DMA window start address != 0\n");
+static int
+vfio_spapr_dma_map(int vfio_container_fd)
+{
+ struct vfio_iommu_spapr_tce_create create = {
+ .argsz = sizeof(create),
+ };
+ struct spapr_walk_param param;
+
+ memset(¶m, 0, sizeof(param));
+
+ /* create DMA window from 0 to max(phys_addr + len) */
+ rte_memseg_walk(vfio_spapr_window_size_walk, ¶m);
+
+ /* sPAPR requires window size to be a power of 2 */
+ create.window_size = rte_align64pow2(param.window_size);
+ create.page_shift = __builtin_ctzll(param.hugepage_sz);
+ create.levels = 1;
+
+ if (vfio_spapr_create_new_dma_window(vfio_container_fd, &create) < 0) {
+ RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
return -1;
}
/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
- if (rte_memseg_walk(spapr_map, &vfio_container_fd) < 0)
+ if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0)
return -1;
return 0;
return 0;
}
+static int
+vfio_noiommu_dma_mem_map(int __rte_unused vfio_container_fd,
+ uint64_t __rte_unused vaddr,
+ uint64_t __rte_unused iova, uint64_t __rte_unused len,
+ int __rte_unused do_map)
+{
+ /* No-IOMMU mode does not need DMA mapping */
+ return 0;
+}
+
+static int
+vfio_dma_mem_map(uint64_t vaddr, uint64_t iova, uint64_t len, int do_map)
+{
+ const struct vfio_iommu_type *t = vfio_cfg.vfio_iommu_type;
+
+ if (!t) {
+ RTE_LOG(ERR, EAL, " VFIO support not initialized\n");
+ rte_errno = ENODEV;
+ return -1;
+ }
+
+ if (!t->dma_user_map_func) {
+ RTE_LOG(ERR, EAL,
+ " VFIO custom DMA region maping not supported by IOMMU %s\n",
+ t->name);
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+
+ return t->dma_user_map_func(vfio_cfg.vfio_container_fd, vaddr, iova,
+ len, do_map);
+}
+
+int __rte_experimental
+rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+ struct user_mem_map *new_map;
+ int ret = 0;
+
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ rte_spinlock_lock(&user_mem_maps.lock);
+ if (user_mem_maps.n_maps == VFIO_MAX_USER_MEM_MAPS) {
+ RTE_LOG(ERR, EAL, "No more space for user mem maps\n");
+ rte_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ /* map the entry */
+ if (vfio_dma_mem_map(vaddr, iova, len, 1)) {
+ /* technically, this will fail if there are currently no devices
+ * plugged in, even if a device were added later, this mapping
+ * might have succeeded. however, since we cannot verify if this
+ * is a valid mapping without having a device attached, consider
+ * this to be unsupported, because we can't just store any old
+ * mapping and pollute list of active mappings willy-nilly.
+ */
+ RTE_LOG(ERR, EAL, "Couldn't map new region for DMA\n");
+ ret = -1;
+ goto out;
+ }
+ /* create new user mem map entry */
+ new_map = &user_mem_maps.maps[user_mem_maps.n_maps++];
+ new_map->addr = vaddr;
+ new_map->iova = iova;
+ new_map->len = len;
+
+ compact_user_maps();
+out:
+ rte_spinlock_unlock(&user_mem_maps.lock);
+ return ret;
+}
+
+int __rte_experimental
+rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+ struct user_mem_map *map, *new_map = NULL;
+ int ret = 0;
+
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ rte_spinlock_lock(&user_mem_maps.lock);
+
+ /* find our mapping */
+ map = find_user_mem_map(vaddr, iova, len);
+ if (!map) {
+ RTE_LOG(ERR, EAL, "Couldn't find previously mapped region\n");
+ rte_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+ if (map->addr != vaddr || map->iova != iova || map->len != len) {
+ /* we're partially unmapping a previously mapped region, so we
+ * need to split entry into two.
+ */
+ if (user_mem_maps.n_maps == VFIO_MAX_USER_MEM_MAPS) {
+ RTE_LOG(ERR, EAL, "Not enough space to store partial mapping\n");
+ rte_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ new_map = &user_mem_maps.maps[user_mem_maps.n_maps++];
+ }
+
+ /* unmap the entry */
+ if (vfio_dma_mem_map(vaddr, iova, len, 0)) {
+ /* there may not be any devices plugged in, so unmapping will
+ * fail with ENODEV/ENOTSUP rte_errno values, but that doesn't
+ * stop us from removing the mapping, as the assumption is we
+ * won't be needing this memory any more and thus will want to
+ * prevent it from being remapped again on hotplug. so, only
+ * fail if we indeed failed to unmap (e.g. if the mapping was
+ * within our mapped range but had invalid alignment).
+ */
+ if (rte_errno != ENODEV && rte_errno != ENOTSUP) {
+ RTE_LOG(ERR, EAL, "Couldn't unmap region for DMA\n");
+ ret = -1;
+ goto out;
+ } else {
+ RTE_LOG(DEBUG, EAL, "DMA unmapping failed, but removing mappings anyway\n");
+ }
+ }
+ /* remove map from the list of active mappings */
+ if (new_map != NULL) {
+ adjust_map(map, new_map, vaddr, len);
+
+ /* if we've created a new map by splitting, sort everything */
+ if (!is_null_map(new_map)) {
+ compact_user_maps();
+ } else {
+ /* we've created a new mapping, but it was unused */
+ user_mem_maps.n_maps--;
+ }
+ } else {
+ memset(map, 0, sizeof(*map));
+ compact_user_maps();
+ user_mem_maps.n_maps--;
+ }
+
+out:
+ rte_spinlock_unlock(&user_mem_maps.lock);
+ return ret;
+}
+
int
rte_vfio_noiommu_is_enabled(void)
{
return c == 'Y';
}
+#else
+
+int __rte_experimental
+rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
+
+int __rte_experimental
+rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
+
#endif