X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_eal%2Flinuxapp%2Feal%2Feal_vfio.c;h=ebecde12ca8ab523967a71317b0449f2544b6d33;hb=998c89f148ee31564ccde958056e54418c18f10c;hp=984b1a123a7cd9cc68c8e1e70c582e24125fd29c;hpb=964b2f3bfb07ae95fcf4570269a6bc0e1c0affec;p=dpdk.git

diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 984b1a123a..ebecde12ca 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
 #include <inttypes.h>
@@ -22,8 +22,36 @@
 
 #define VFIO_MEM_EVENT_CLB_NAME "vfio_mem_event_clb"
 
+/* hot plug/unplug of VFIO groups may cause all DMA maps to be dropped. we can
+ * recreate the mappings for DPDK segments, but we cannot do so for memory that
+ * was registered by the user themselves, so we need to store the user mappings
+ * somewhere, to recreate them later.
+ */
+#define VFIO_MAX_USER_MEM_MAPS 256
+struct user_mem_map {
+	uint64_t addr;
+	uint64_t iova;
+	uint64_t len;
+};
+
+struct user_mem_maps {
+	rte_spinlock_recursive_t lock;
+	int n_maps;
+	struct user_mem_map maps[VFIO_MAX_USER_MEM_MAPS];
+};
+
+struct vfio_config {
+	int vfio_enabled;
+	int vfio_container_fd;
+	int vfio_active_groups;
+	const struct vfio_iommu_type *vfio_iommu_type;
+	struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
+	struct user_mem_maps mem_maps;
+};
+
 /* per-process VFIO config */
-static struct vfio_config vfio_cfg;
+static struct vfio_config vfio_cfgs[VFIO_MAX_CONTAINERS];
+static struct vfio_config *default_vfio_cfg = &vfio_cfgs[0];
 
 static int vfio_type1_dma_map(int);
 static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
@@ -31,8 +59,8 @@ static int vfio_spapr_dma_map(int);
 static int vfio_spapr_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
 static int vfio_noiommu_dma_map(int);
 static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
-static int vfio_dma_mem_map(uint64_t vaddr, uint64_t iova, uint64_t len,
-		int do_map);
+static int vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr,
+		uint64_t iova, uint64_t len, int do_map);
 
 /* IOMMU types we support */
 static const struct vfio_iommu_type iommu_types[] = {
@@ -59,61 +87,6 @@ static const struct vfio_iommu_type iommu_types[] = {
 	},
 };
 
-/* hot plug/unplug of VFIO groups may cause all DMA maps to be dropped. we can
- * recreate the mappings for DPDK segments, but we cannot do so for memory that
- * was registered by the user themselves, so we need to store the user mappings
- * somewhere, to recreate them later.
- */
-#define VFIO_MAX_USER_MEM_MAPS 256
-struct user_mem_map {
-	uint64_t addr;
-	uint64_t iova;
-	uint64_t len;
-};
-static struct {
-	rte_spinlock_recursive_t lock;
-	int n_maps;
-	struct user_mem_map maps[VFIO_MAX_USER_MEM_MAPS];
-} user_mem_maps = {
-	.lock = RTE_SPINLOCK_RECURSIVE_INITIALIZER
-};
-
-/* for sPAPR IOMMU, we will need to walk memseg list, but we cannot use
- * rte_memseg_walk() because by the time we enter callback we will be holding a
- * write lock, so regular rte-memseg_walk will deadlock. copying the same
- * iteration code everywhere is not ideal as well. so, use a lockless copy of
- * memseg walk here.
- */
-static int
-memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg)
-{
-	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	int i, ms_idx, ret = 0;
-
-	for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
-		struct rte_memseg_list *msl = &mcfg->memsegs[i];
-		const struct rte_memseg *ms;
-		struct rte_fbarray *arr;
-
-		if (msl->memseg_arr.count == 0)
-			continue;
-
-		arr = &msl->memseg_arr;
-
-		ms_idx = rte_fbarray_find_next_used(arr, 0);
-		while (ms_idx >= 0) {
-			ms = rte_fbarray_get(arr, ms_idx);
-			ret = func(msl, ms, arg);
-			if (ret < 0)
-				return -1;
-			if (ret > 0)
-				return 1;
-			ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1);
-		}
-	}
-	return 0;
-}
-
 static int
 is_null_map(const struct user_mem_map *map)
 {
@@ -206,29 +179,30 @@ merge_map(struct user_mem_map *left, struct user_mem_map *right)
 }
 
 static struct user_mem_map *
-find_user_mem_map(uint64_t addr, uint64_t iova, uint64_t len)
+find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr,
+		uint64_t iova, uint64_t len)
 {
 	uint64_t va_end = addr + len;
 	uint64_t iova_end = iova + len;
 	int i;
 
-	for (i = 0; i < user_mem_maps.n_maps; i++) {
-		struct user_mem_map *map = &user_mem_maps.maps[i];
+	for (i = 0; i < user_mem_maps->n_maps; i++) {
+		struct user_mem_map *map = &user_mem_maps->maps[i];
 		uint64_t map_va_end = map->addr + map->len;
 		uint64_t map_iova_end = map->iova + map->len;
 
 		/* check start VA */
 		if (addr < map->addr || addr >= map_va_end)
 			continue;
-		/* check if IOVA end is within boundaries */
-		if (va_end <= map->addr || va_end >= map_va_end)
+		/* check if VA end is within boundaries */
+		if (va_end <= map->addr || va_end > map_va_end)
 			continue;
 
-		/* check start PA */
+		/* check start IOVA */
 		if (iova < map->iova || iova >= map_iova_end)
 			continue;
 		/* check if IOVA end is within boundaries */
-		if (iova_end <= map->iova || iova_end >= map_iova_end)
+		if (iova_end <= map->iova || iova_end > map_iova_end)
 			continue;
 
 		/* we've found our map */
@@ -239,20 +213,20 @@ find_user_mem_map(uint64_t addr, uint64_t iova, uint64_t len)
 
 /* this will sort all user maps, and merge/compact any adjacent maps */
 static void
-compact_user_maps(void)
+compact_user_maps(struct user_mem_maps *user_mem_maps)
 {
 	int i, n_merged, cur_idx;
 
-	qsort(user_mem_maps.maps, user_mem_maps.n_maps,
-			sizeof(user_mem_maps.maps[0]), user_mem_map_cmp);
+	qsort(user_mem_maps->maps, user_mem_maps->n_maps,
+			sizeof(user_mem_maps->maps[0]), user_mem_map_cmp);
 
 	/* we'll go over the list backwards when merging */
 	n_merged = 0;
-	for (i = user_mem_maps.n_maps - 2; i >= 0; i--) {
+	for (i = user_mem_maps->n_maps - 2; i >= 0; i--) {
 		struct user_mem_map *l, *r;
 
-		l = &user_mem_maps.maps[i];
-		r = &user_mem_maps.maps[i + 1];
+		l = &user_mem_maps->maps[i];
+		r = &user_mem_maps->maps[i + 1];
 
 		if (is_null_map(l) || is_null_map(r))
 			continue;
@@ -266,12 +240,12 @@ compact_user_maps(void)
 	 */
 	if (n_merged > 0) {
 		cur_idx = 0;
-		for (i = 0; i < user_mem_maps.n_maps; i++) {
-			if (!is_null_map(&user_mem_maps.maps[i])) {
+		for (i = 0; i < user_mem_maps->n_maps; i++) {
+			if (!is_null_map(&user_mem_maps->maps[i])) {
 				struct user_mem_map *src, *dst;
 
-				src = &user_mem_maps.maps[i];
-				dst = &user_mem_maps.maps[cur_idx++];
+				src = &user_mem_maps->maps[i];
+				dst = &user_mem_maps->maps[cur_idx++];
 
 				if (src != dst) {
 					memcpy(dst, src, sizeof(*src));
@@ -279,41 +253,20 @@ compact_user_maps(void)
 				}
 			}
 		}
-		user_mem_maps.n_maps = cur_idx;
+		user_mem_maps->n_maps = cur_idx;
 	}
 }
 
-int
-rte_vfio_get_group_fd(int iommu_group_num)
+static int
+vfio_open_group_fd(int iommu_group_num)
 {
-	int i;
 	int vfio_group_fd;
 	char filename[PATH_MAX];
-	struct vfio_group *cur_grp;
-
-	/* check if we already have the group descriptor open */
-	for (i = 0; i < VFIO_MAX_GROUPS; i++)
-		if (vfio_cfg.vfio_groups[i].group_num == iommu_group_num)
-			return vfio_cfg.vfio_groups[i].fd;
-
-	/* Lets see first if there is room for a new group */
-	if (vfio_cfg.vfio_active_groups == VFIO_MAX_GROUPS) {
-		RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
-		return -1;
-	}
+	struct rte_mp_msg mp_req, *mp_rep;
+	struct rte_mp_reply mp_reply;
+	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
 
-	/* Now lets get an index for the new group */
-	for (i = 0; i < VFIO_MAX_GROUPS; i++)
-		if (vfio_cfg.vfio_groups[i].group_num == -1) {
-			cur_grp = &vfio_cfg.vfio_groups[i];
-			break;
-		}
-
-	/* This should not happen */
-	if (i == VFIO_MAX_GROUPS) {
-		RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
-		return -1;
-	}
 	/* if primary, try to open the group */
 	if (internal_config.process_type == RTE_PROC_PRIMARY) {
 		/* try regular group format */
@@ -344,109 +297,210 @@ rte_vfio_get_group_fd(int iommu_group_num)
 			/* noiommu group found */
 		}
 
-		cur_grp->group_num = iommu_group_num;
-		cur_grp->fd = vfio_group_fd;
-		vfio_cfg.vfio_active_groups++;
 		return vfio_group_fd;
 	}
 	/* if we're in a secondary process, request group fd from the primary
-	 * process via our socket
+	 * process via mp channel.
 	 */
-	else {
-		int socket_fd, ret;
+	p->req = SOCKET_REQ_GROUP;
+	p->group_num = iommu_group_num;
+	strcpy(mp_req.name, EAL_VFIO_MP);
+	mp_req.len_param = sizeof(*p);
+	mp_req.num_fds = 0;
+
+	vfio_group_fd = -1;
+	if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+	    mp_reply.nb_received == 1) {
+		mp_rep = &mp_reply.msgs[0];
+		p = (struct vfio_mp_param *)mp_rep->param;
+		if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+			vfio_group_fd = mp_rep->fds[0];
+		} else if (p->result == SOCKET_NO_FD) {
+			RTE_LOG(ERR, EAL, "  bad VFIO group fd\n");
+			vfio_group_fd = 0;
+		}
+		free(mp_reply.msgs);
+	}
 
-		socket_fd = vfio_mp_sync_connect_to_primary();
+	if (vfio_group_fd < 0)
+		RTE_LOG(ERR, EAL, "  cannot request group fd\n");
+	return vfio_group_fd;
+}
 
-		if (socket_fd < 0) {
-			RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
-			return -1;
-		}
-		if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) {
-			RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
-			close(socket_fd);
-			return -1;
-		}
-		if (vfio_mp_sync_send_request(socket_fd, iommu_group_num) < 0) {
-			RTE_LOG(ERR, EAL, "  cannot send group number!\n");
-			close(socket_fd);
-			return -1;
-		}
-		ret = vfio_mp_sync_receive_request(socket_fd);
-		switch (ret) {
-		case SOCKET_NO_FD:
-			close(socket_fd);
-			return 0;
-		case SOCKET_OK:
-			vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd);
-			/* if we got the fd, store it and return it */
-			if (vfio_group_fd > 0) {
-				close(socket_fd);
-				cur_grp->group_num = iommu_group_num;
-				cur_grp->fd = vfio_group_fd;
-				vfio_cfg.vfio_active_groups++;
-				return vfio_group_fd;
-			}
-			/* fall-through on error */
-		default:
-			RTE_LOG(ERR, EAL, "  cannot get container fd!\n");
-			close(socket_fd);
-			return -1;
+static struct vfio_config *
+get_vfio_cfg_by_group_num(int iommu_group_num)
+{
+	struct vfio_config *vfio_cfg;
+	int i, j;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		vfio_cfg = &vfio_cfgs[i];
+		for (j = 0; j < VFIO_MAX_GROUPS; j++) {
+			if (vfio_cfg->vfio_groups[j].group_num ==
+					iommu_group_num)
+				return vfio_cfg;
 		}
 	}
-	return -1;
+
+	return NULL;
 }
 
+static struct vfio_config *
+get_vfio_cfg_by_group_fd(int vfio_group_fd)
+{
+	struct vfio_config *vfio_cfg;
+	int i, j;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		vfio_cfg = &vfio_cfgs[i];
+		for (j = 0; j < VFIO_MAX_GROUPS; j++)
+			if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd)
+				return vfio_cfg;
+	}
 
-static int
-get_vfio_group_idx(int vfio_group_fd)
+	return NULL;
+}
+
+static struct vfio_config *
+get_vfio_cfg_by_container_fd(int container_fd)
 {
 	int i;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		if (vfio_cfgs[i].vfio_container_fd == container_fd)
+			return &vfio_cfgs[i];
+	}
+
+	return NULL;
+}
+
+int
+rte_vfio_get_group_fd(int iommu_group_num)
+{
+	int i;
+	int vfio_group_fd;
+	struct vfio_group *cur_grp;
+	struct vfio_config *vfio_cfg;
+
+	/* get the vfio_config it belongs to */
+	vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num);
+	vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg;
+
+	/* check if we already have the group descriptor open */
+	for (i = 0; i < VFIO_MAX_GROUPS; i++)
+		if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num)
+			return vfio_cfg->vfio_groups[i].fd;
+
+	/* Lets see first if there is room for a new group */
+	if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) {
+		RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
+		return -1;
+	}
+
+	/* Now lets get an index for the new group */
 	for (i = 0; i < VFIO_MAX_GROUPS; i++)
-		if (vfio_cfg.vfio_groups[i].fd == vfio_group_fd)
-			return i;
+		if (vfio_cfg->vfio_groups[i].group_num == -1) {
+			cur_grp = &vfio_cfg->vfio_groups[i];
+			break;
+		}
+
+	/* This should not happen */
+	if (i == VFIO_MAX_GROUPS) {
+		RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
+		return -1;
+	}
+
+	vfio_group_fd = vfio_open_group_fd(iommu_group_num);
+	if (vfio_group_fd < 0) {
+		RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num);
+		return -1;
+	}
+
+	cur_grp->group_num = iommu_group_num;
+	cur_grp->fd = vfio_group_fd;
+	vfio_cfg->vfio_active_groups++;
+
+	return vfio_group_fd;
+}
+
+static int
+get_vfio_group_idx(int vfio_group_fd)
+{
+	struct vfio_config *vfio_cfg;
+	int i, j;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		vfio_cfg = &vfio_cfgs[i];
+		for (j = 0; j < VFIO_MAX_GROUPS; j++)
+			if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd)
+				return j;
+	}
+
 	return -1;
 }
 
 static void
 vfio_group_device_get(int vfio_group_fd)
 {
+	struct vfio_config *vfio_cfg;
 	int i;
 
+	vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "  invalid group fd!\n");
+		return;
+	}
+
 	i = get_vfio_group_idx(vfio_group_fd);
 	if (i < 0 || i > (VFIO_MAX_GROUPS - 1))
 		RTE_LOG(ERR, EAL, "  wrong vfio_group index (%d)\n", i);
 	else
-		vfio_cfg.vfio_groups[i].devices++;
+		vfio_cfg->vfio_groups[i].devices++;
 }
 
 static void
 vfio_group_device_put(int vfio_group_fd)
 {
+	struct vfio_config *vfio_cfg;
 	int i;
 
+	vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "  invalid group fd!\n");
+		return;
+	}
+
 	i = get_vfio_group_idx(vfio_group_fd);
 	if (i < 0 || i > (VFIO_MAX_GROUPS - 1))
 		RTE_LOG(ERR, EAL, "  wrong vfio_group index (%d)\n", i);
 	else
-		vfio_cfg.vfio_groups[i].devices--;
+		vfio_cfg->vfio_groups[i].devices--;
 }
 
 static int
 vfio_group_device_count(int vfio_group_fd)
 {
+	struct vfio_config *vfio_cfg;
 	int i;
 
+	vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "  invalid group fd!\n");
+		return -1;
+	}
+
 	i = get_vfio_group_idx(vfio_group_fd);
 	if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) {
 		RTE_LOG(ERR, EAL, "  wrong vfio_group index (%d)\n", i);
 		return -1;
 	}
 
-	return vfio_cfg.vfio_groups[i].devices;
+	return vfio_cfg->vfio_groups[i].devices;
 }
 
 static void
-vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len)
+vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len,
+		void *arg __rte_unused)
 {
 	struct rte_memseg_list *msl;
 	struct rte_memseg *ms;
@@ -455,23 +509,33 @@ vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len)
 	msl = rte_mem_virt2memseg_list(addr);
 
 	/* for IOVA as VA mode, no need to care for IOVA addresses */
-	if (rte_eal_iova_mode() == RTE_IOVA_VA) {
+	if (rte_eal_iova_mode() == RTE_IOVA_VA && msl->external == 0) {
 		uint64_t vfio_va = (uint64_t)(uintptr_t)addr;
 		if (type == RTE_MEM_EVENT_ALLOC)
-			vfio_dma_mem_map(vfio_va, vfio_va, len, 1);
+			vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va,
+					len, 1);
 		else
-			vfio_dma_mem_map(vfio_va, vfio_va, len, 0);
+			vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va,
+					len, 0);
 		return;
 	}
 
 	/* memsegs are contiguous in memory */
 	ms = rte_mem_virt2memseg(addr, msl);
 	while (cur_len < len) {
+		/* some memory segments may have invalid IOVA */
+		if (ms->iova == RTE_BAD_IOVA) {
+			RTE_LOG(DEBUG, EAL, "Memory segment at %p has bad IOVA, skipping\n",
+					ms->addr);
+			goto next;
+		}
 		if (type == RTE_MEM_EVENT_ALLOC)
-			vfio_dma_mem_map(ms->addr_64, ms->iova, ms->len, 1);
+			vfio_dma_mem_map(default_vfio_cfg, ms->addr_64,
+					ms->iova, ms->len, 1);
 		else
-			vfio_dma_mem_map(ms->addr_64, ms->iova, ms->len, 0);
-
+			vfio_dma_mem_map(default_vfio_cfg, ms->addr_64,
+					ms->iova, ms->len, 0);
+next:
 		cur_len += ms->len;
 		++ms;
 	}
@@ -481,58 +545,23 @@ int
 rte_vfio_clear_group(int vfio_group_fd)
 {
 	int i;
-	int socket_fd, ret;
-
-	if (internal_config.process_type == RTE_PROC_PRIMARY) {
+	struct vfio_config *vfio_cfg;
 
-		i = get_vfio_group_idx(vfio_group_fd);
-		if (i < 0)
-			return -1;
-		vfio_cfg.vfio_groups[i].group_num = -1;
-		vfio_cfg.vfio_groups[i].fd = -1;
-		vfio_cfg.vfio_groups[i].devices = 0;
-		vfio_cfg.vfio_active_groups--;
-		return 0;
-	}
-
-	/* This is just for SECONDARY processes */
-	socket_fd = vfio_mp_sync_connect_to_primary();
-
-	if (socket_fd < 0) {
-		RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
-		return -1;
-	}
-
-	if (vfio_mp_sync_send_request(socket_fd, SOCKET_CLR_GROUP) < 0) {
-		RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
-		close(socket_fd);
+	vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "  invalid group fd!\n");
 		return -1;
 	}
 
-	if (vfio_mp_sync_send_request(socket_fd, vfio_group_fd) < 0) {
-		RTE_LOG(ERR, EAL, "  cannot send group fd!\n");
-		close(socket_fd);
+	i = get_vfio_group_idx(vfio_group_fd);
+	if (i < 0)
 		return -1;
-	}
+	vfio_cfg->vfio_groups[i].group_num = -1;
+	vfio_cfg->vfio_groups[i].fd = -1;
+	vfio_cfg->vfio_groups[i].devices = 0;
+	vfio_cfg->vfio_active_groups--;
 
-	ret = vfio_mp_sync_receive_request(socket_fd);
-	switch (ret) {
-	case SOCKET_NO_FD:
-		RTE_LOG(ERR, EAL, "  BAD VFIO group fd!\n");
-		close(socket_fd);
-		break;
-	case SOCKET_OK:
-		close(socket_fd);
-		return 0;
-	case SOCKET_ERR:
-		RTE_LOG(ERR, EAL, "  Socket error\n");
-		close(socket_fd);
-		break;
-	default:
-		RTE_LOG(ERR, EAL, "  UNKNOWN reply, %d\n", ret);
-		close(socket_fd);
-	}
-	return -1;
+	return 0;
 }
 
 int
@@ -544,6 +573,9 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 	struct vfio_group_status group_status = {
 			.argsz = sizeof(group_status)
 	};
+	struct vfio_config *vfio_cfg;
+	struct user_mem_maps *user_mem_maps;
+	int vfio_container_fd;
 	int vfio_group_fd;
 	int iommu_group_num;
 	int i, ret;
@@ -592,12 +624,18 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 		return -1;
 	}
 
+	/* get the vfio_config it belongs to */
+	vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num);
+	vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg;
+	vfio_container_fd = vfio_cfg->vfio_container_fd;
+	user_mem_maps = &vfio_cfg->mem_maps;
+
 	/* check if group does not have a container yet */
 	if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {
 
 		/* add group to a container */
 		ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER,
-				&vfio_cfg.vfio_container_fd);
+				&vfio_container_fd);
 		if (ret) {
 			RTE_LOG(ERR, EAL, "  %s cannot add VFIO group to container, "
 					"error %i (%s)\n", dev_addr, errno, strerror(errno));
@@ -615,11 +653,12 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 		 * functionality.
 		 */
 		if (internal_config.process_type == RTE_PROC_PRIMARY &&
-				vfio_cfg.vfio_active_groups == 1) {
+				vfio_cfg->vfio_active_groups == 1 &&
+				vfio_group_device_count(vfio_group_fd) == 0) {
 			const struct vfio_iommu_type *t;
 
 			/* select an IOMMU type which we will be using */
-			t = vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
+			t = vfio_set_iommu_type(vfio_container_fd);
 			if (!t) {
 				RTE_LOG(ERR, EAL,
 					"  %s failed to select IOMMU type\n",
@@ -632,7 +671,10 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 			 * after registering callback, to prevent races
 			 */
 			rte_rwlock_read_lock(mem_lock);
-			ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
+			if (vfio_cfg == default_vfio_cfg)
+				ret = t->dma_map_func(vfio_container_fd);
+			else
+				ret = 0;
 			if (ret) {
 				RTE_LOG(ERR, EAL,
 					"  %s DMA remapping failed, error %i (%s)\n",
@@ -643,22 +685,22 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 				return -1;
 			}
 
-			vfio_cfg.vfio_iommu_type = t;
+			vfio_cfg->vfio_iommu_type = t;
 
 			/* re-map all user-mapped segments */
-			rte_spinlock_recursive_lock(&user_mem_maps.lock);
+			rte_spinlock_recursive_lock(&user_mem_maps->lock);
 
 			/* this IOMMU type may not support DMA mapping, but
 			 * if we have mappings in the list - that means we have
 			 * previously mapped something successfully, so we can
 			 * be sure that DMA mapping is supported.
 			 */
-			for (i = 0; i < user_mem_maps.n_maps; i++) {
+			for (i = 0; i < user_mem_maps->n_maps; i++) {
 				struct user_mem_map *map;
-				map = &user_mem_maps.maps[i];
+				map = &user_mem_maps->maps[i];
 
 				ret = t->dma_user_map_func(
-						vfio_cfg.vfio_container_fd,
+						vfio_container_fd,
 						map->addr, map->iova, map->len,
 						1);
 				if (ret) {
@@ -669,17 +711,20 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 							map->addr, map->iova,
 							map->len);
 					rte_spinlock_recursive_unlock(
-							&user_mem_maps.lock);
+							&user_mem_maps->lock);
 					rte_rwlock_read_unlock(mem_lock);
 					return -1;
 				}
 			}
-			rte_spinlock_recursive_unlock(&user_mem_maps.lock);
+			rte_spinlock_recursive_unlock(&user_mem_maps->lock);
 
 			/* register callback for mem events */
-			ret = rte_mem_event_callback_register(
+			if (vfio_cfg == default_vfio_cfg)
+				ret = rte_mem_event_callback_register(
 					VFIO_MEM_EVENT_CLB_NAME,
-					vfio_mem_event_callback);
+					vfio_mem_event_callback, NULL);
+			else
+				ret = 0;
 			/* unlock memory hotplug */
 			rte_rwlock_read_unlock(mem_lock);
 
@@ -733,6 +778,7 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
 	struct vfio_group_status group_status = {
 			.argsz = sizeof(group_status)
 	};
+	struct vfio_config *vfio_cfg;
 	int vfio_group_fd;
 	int iommu_group_num;
 	int ret;
@@ -762,6 +808,10 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
 		goto out;
 	}
 
+	/* get the vfio_config it belongs to */
+	vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num);
+	vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg;
+
 	/* At this point we got an active group. Closing it will make the
 	 * container detachment. If this is the last active group, VFIO kernel
 	 * code will unset the container and the IOMMU mappings.
@@ -799,8 +849,9 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,
 	/* if there are no active device groups, unregister the callback to
 	 * avoid spurious attempts to map/unmap memory from VFIO.
 	 */
-	if (vfio_cfg.vfio_active_groups == 0)
-		rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME);
+	if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0)
+		rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME,
+				NULL);
 
 	/* success */
 	ret = 0;
@@ -814,13 +865,22 @@ int
 rte_vfio_enable(const char *modname)
 {
 	/* initialize group list */
-	int i;
+	int i, j;
 	int vfio_available;
 
-	for (i = 0; i < VFIO_MAX_GROUPS; i++) {
-		vfio_cfg.vfio_groups[i].fd = -1;
-		vfio_cfg.vfio_groups[i].group_num = -1;
-		vfio_cfg.vfio_groups[i].devices = 0;
+	rte_spinlock_recursive_t lock = RTE_SPINLOCK_RECURSIVE_INITIALIZER;
+
+	for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+		vfio_cfgs[i].vfio_container_fd = -1;
+		vfio_cfgs[i].vfio_active_groups = 0;
+		vfio_cfgs[i].vfio_iommu_type = NULL;
+		vfio_cfgs[i].mem_maps.lock = lock;
+
+		for (j = 0; j < VFIO_MAX_GROUPS; j++) {
+			vfio_cfgs[i].vfio_groups[j].fd = -1;
+			vfio_cfgs[i].vfio_groups[j].group_num = -1;
+			vfio_cfgs[i].vfio_groups[j].devices = 0;
+		}
 	}
 
 	/* inform the user that we are probing for VFIO */
@@ -842,12 +902,12 @@ rte_vfio_enable(const char *modname)
 		return 0;
 	}
 
-	vfio_cfg.vfio_container_fd = rte_vfio_get_container_fd();
+	default_vfio_cfg->vfio_container_fd = rte_vfio_get_container_fd();
 
 	/* check if we have VFIO driver enabled */
-	if (vfio_cfg.vfio_container_fd != -1) {
+	if (default_vfio_cfg->vfio_container_fd != -1) {
 		RTE_LOG(NOTICE, EAL, "VFIO support initialized\n");
-		vfio_cfg.vfio_enabled = 1;
+		default_vfio_cfg->vfio_enabled = 1;
 	} else {
 		RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n");
 	}
@@ -859,7 +919,7 @@ int
 rte_vfio_is_enabled(const char *modname)
 {
 	const int mod_available = rte_eal_check_module(modname) > 0;
-	return vfio_cfg.vfio_enabled && mod_available;
+	return default_vfio_cfg->vfio_enabled && mod_available;
 }
 
 const struct vfio_iommu_type *
@@ -923,6 +983,11 @@ int
 rte_vfio_get_container_fd(void)
 {
 	int ret, vfio_container_fd;
+	struct rte_mp_msg mp_req, *mp_rep;
+	struct rte_mp_reply mp_reply;
+	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
+
 
 	/* if we're in a primary process, try to open the container */
 	if (internal_config.process_type == RTE_PROC_PRIMARY) {
@@ -953,33 +1018,29 @@ rte_vfio_get_container_fd(void)
 		}
 
 		return vfio_container_fd;
-	} else {
-		/*
-		 * if we're in a secondary process, request container fd from the
-		 * primary process via our socket
-		 */
-		int socket_fd;
-
-		socket_fd = vfio_mp_sync_connect_to_primary();
-		if (socket_fd < 0) {
-			RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
-			return -1;
-		}
-		if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) {
-			RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
-			close(socket_fd);
-			return -1;
-		}
-		vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd);
-		if (vfio_container_fd < 0) {
-			RTE_LOG(ERR, EAL, "  cannot get container fd!\n");
-			close(socket_fd);
-			return -1;
+	}
+	/*
+	 * if we're in a secondary process, request container fd from the
+	 * primary process via mp channel
+	 */
+	p->req = SOCKET_REQ_CONTAINER;
+	strcpy(mp_req.name, EAL_VFIO_MP);
+	mp_req.len_param = sizeof(*p);
+	mp_req.num_fds = 0;
+
+	vfio_container_fd = -1;
+	if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+	    mp_reply.nb_received == 1) {
+		mp_rep = &mp_reply.msgs[0];
+		p = (struct vfio_mp_param *)mp_rep->param;
+		if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+			free(mp_reply.msgs);
+			return mp_rep->fds[0];
 		}
-		close(socket_fd);
-		return vfio_container_fd;
+		free(mp_reply.msgs);
 	}
 
+	RTE_LOG(ERR, EAL, "  cannot request container fd\n");
 	return -1;
 }
 
@@ -1027,11 +1088,14 @@ rte_vfio_get_group_num(const char *sysfs_base,
 }
 
 static int
-type1_map(const struct rte_memseg_list *msl __rte_unused,
-		const struct rte_memseg *ms, void *arg)
+type1_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+		void *arg)
 {
 	int *vfio_container_fd = arg;
 
+	if (msl->external)
+		return 0;
+
 	return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
 			ms->len, 1);
 }
@@ -1090,8 +1154,22 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 	struct vfio_iommu_type1_dma_map dma_map;
 	struct vfio_iommu_type1_dma_unmap dma_unmap;
 	int ret;
+	struct vfio_iommu_spapr_register_memory reg = {
+		.argsz = sizeof(reg),
+		.flags = 0
+	};
+	reg.vaddr = (uintptr_t) vaddr;
+	reg.size = len;
 
 	if (do_map != 0) {
+		ret = ioctl(vfio_container_fd,
+				VFIO_IOMMU_SPAPR_REGISTER_MEMORY, &reg);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "  cannot register vaddr for IOMMU, "
+				"error %i (%s)\n", errno, strerror(errno));
+			return -1;
+		}
+
 		memset(&dma_map, 0, sizeof(dma_map));
 		dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
 		dma_map.vaddr = vaddr;
@@ -1108,13 +1186,6 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		}
 
 	} else {
-		struct vfio_iommu_spapr_register_memory reg = {
-			.argsz = sizeof(reg),
-			.flags = 0
-		};
-		reg.vaddr = (uintptr_t) vaddr;
-		reg.size = len;
-
 		ret = ioctl(vfio_container_fd,
 				VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
 		if (ret) {
@@ -1141,12 +1212,15 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 }
 
 static int
-vfio_spapr_map_walk(const struct rte_memseg_list *msl __rte_unused,
+vfio_spapr_map_walk(const struct rte_memseg_list *msl,
 		const struct rte_memseg *ms, void *arg)
 {
 	int *vfio_container_fd = arg;
 
-	return vfio_spapr_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
+	if (msl->external)
+		return 0;
+
+	return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
 			ms->len, 1);
 }
 
@@ -1155,12 +1229,15 @@ struct spapr_walk_param {
 	uint64_t hugepage_sz;
 };
 static int
-vfio_spapr_window_size_walk(const struct rte_memseg_list *msl __rte_unused,
+vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
 		const struct rte_memseg *ms, void *arg)
 {
 	struct spapr_walk_param *param = arg;
 	uint64_t max = ms->iova + ms->len;
 
+	if (msl->external)
+		return 0;
+
 	if (max > param->window_size) {
 		param->hugepage_sz = ms->hugepage_sz;
 		param->window_size = max;
@@ -1221,14 +1298,24 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 	struct vfio_iommu_spapr_tce_create create = {
 		.argsz = sizeof(create),
 	};
+	struct vfio_config *vfio_cfg;
+	struct user_mem_maps *user_mem_maps;
 	int i, ret = 0;
 
-	rte_spinlock_recursive_lock(&user_mem_maps.lock);
+	vfio_cfg = get_vfio_cfg_by_container_fd(vfio_container_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "  invalid container fd!\n");
+		return -1;
+	}
+
+	user_mem_maps = &vfio_cfg->mem_maps;
+	rte_spinlock_recursive_lock(&user_mem_maps->lock);
 
 	/* check if window size needs to be adjusted */
 	memset(&param, 0, sizeof(param));
 
-	if (memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
+	/* we're inside a callback so use thread-unsafe version */
+	if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
 				&param) < 0) {
 		RTE_LOG(ERR, EAL, "Could not get window size\n");
 		ret = -1;
@@ -1236,9 +1323,9 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 	}
 
 	/* also check user maps */
-	for (i = 0; i < user_mem_maps.n_maps; i++) {
-		uint64_t max = user_mem_maps.maps[i].iova +
-				user_mem_maps.maps[i].len;
+	for (i = 0; i < user_mem_maps->n_maps; i++) {
+		uint64_t max = user_mem_maps->maps[i].iova +
+				user_mem_maps->maps[i].len;
 		create.window_size = RTE_MAX(create.window_size, max);
 	}
 
@@ -1257,16 +1344,18 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 				ret = -1;
 				goto out;
 			}
-			if (memseg_walk_thread_unsafe(vfio_spapr_map_walk,
+			/* we're inside a callback, so use thread-unsafe version
+			 */
+			if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
 					&vfio_container_fd) < 0) {
 				RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
 				ret = -1;
 				goto out;
 			}
 			/* remap all user maps */
-			for (i = 0; i < user_mem_maps.n_maps; i++) {
+			for (i = 0; i < user_mem_maps->n_maps; i++) {
 				struct user_mem_map *map =
-						&user_mem_maps.maps[i];
+						&user_mem_maps->maps[i];
 				if (vfio_spapr_dma_do_map(vfio_container_fd,
 						map->addr, map->iova, map->len,
 						1)) {
@@ -1307,7 +1396,7 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 		vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 0);
 	}
 out:
-	rte_spinlock_recursive_unlock(&user_mem_maps.lock);
+	rte_spinlock_recursive_unlock(&user_mem_maps->lock);
 	return ret;
 }
 
@@ -1359,9 +1448,10 @@ vfio_noiommu_dma_mem_map(int __rte_unused vfio_container_fd,
 }
 
 static int
-vfio_dma_mem_map(uint64_t vaddr, uint64_t iova, uint64_t len, int do_map)
+vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova,
+		uint64_t len, int do_map)
 {
-	const struct vfio_iommu_type *t = vfio_cfg.vfio_iommu_type;
+	const struct vfio_iommu_type *t = vfio_cfg->vfio_iommu_type;
 
 	if (!t) {
 		RTE_LOG(ERR, EAL, "  VFIO support not initialized\n");
@@ -1377,30 +1467,28 @@ vfio_dma_mem_map(uint64_t vaddr, uint64_t iova, uint64_t len, int do_map)
 		return -1;
 	}
 
-	return t->dma_user_map_func(vfio_cfg.vfio_container_fd, vaddr, iova,
+	return t->dma_user_map_func(vfio_cfg->vfio_container_fd, vaddr, iova,
 			len, do_map);
 }
 
-int __rte_experimental
-rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
+static int
+container_dma_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova,
+		uint64_t len)
 {
 	struct user_mem_map *new_map;
+	struct user_mem_maps *user_mem_maps;
 	int ret = 0;
 
-	if (len == 0) {
-		rte_errno = EINVAL;
-		return -1;
-	}
-
-	rte_spinlock_recursive_lock(&user_mem_maps.lock);
-	if (user_mem_maps.n_maps == VFIO_MAX_USER_MEM_MAPS) {
+	user_mem_maps = &vfio_cfg->mem_maps;
+	rte_spinlock_recursive_lock(&user_mem_maps->lock);
+	if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) {
 		RTE_LOG(ERR, EAL, "No more space for user mem maps\n");
 		rte_errno = ENOMEM;
 		ret = -1;
 		goto out;
 	}
 	/* map the entry */
-	if (vfio_dma_mem_map(vaddr, iova, len, 1)) {
+	if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 1)) {
 		/* technically, this will fail if there are currently no devices
 		 * plugged in, even if a device were added later, this mapping
 		 * might have succeeded. however, since we cannot verify if this
@@ -1413,32 +1501,30 @@ rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
 		goto out;
 	}
 	/* create new user mem map entry */
-	new_map = &user_mem_maps.maps[user_mem_maps.n_maps++];
+	new_map = &user_mem_maps->maps[user_mem_maps->n_maps++];
 	new_map->addr = vaddr;
 	new_map->iova = iova;
 	new_map->len = len;
 
-	compact_user_maps();
+	compact_user_maps(user_mem_maps);
 out:
-	rte_spinlock_recursive_unlock(&user_mem_maps.lock);
+	rte_spinlock_recursive_unlock(&user_mem_maps->lock);
 	return ret;
 }
 
-int __rte_experimental
-rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
+static int
+container_dma_unmap(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova,
+		uint64_t len)
 {
 	struct user_mem_map *map, *new_map = NULL;
+	struct user_mem_maps *user_mem_maps;
 	int ret = 0;
 
-	if (len == 0) {
-		rte_errno = EINVAL;
-		return -1;
-	}
-
-	rte_spinlock_recursive_lock(&user_mem_maps.lock);
+	user_mem_maps = &vfio_cfg->mem_maps;
+	rte_spinlock_recursive_lock(&user_mem_maps->lock);
 
 	/* find our mapping */
-	map = find_user_mem_map(vaddr, iova, len);
+	map = find_user_mem_map(user_mem_maps, vaddr, iova, len);
 	if (!map) {
 		RTE_LOG(ERR, EAL, "Couldn't find previously mapped region\n");
 		rte_errno = EINVAL;
@@ -1449,17 +1535,17 @@ rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
 		/* we're partially unmapping a previously mapped region, so we
 		 * need to split entry into two.
 		 */
-		if (user_mem_maps.n_maps == VFIO_MAX_USER_MEM_MAPS) {
+		if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) {
 			RTE_LOG(ERR, EAL, "Not enough space to store partial mapping\n");
 			rte_errno = ENOMEM;
 			ret = -1;
 			goto out;
 		}
-		new_map = &user_mem_maps.maps[user_mem_maps.n_maps++];
+		new_map = &user_mem_maps->maps[user_mem_maps->n_maps++];
 	}
 
 	/* unmap the entry */
-	if (vfio_dma_mem_map(vaddr, iova, len, 0)) {
+	if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 0)) {
 		/* there may not be any devices plugged in, so unmapping will
 		 * fail with ENODEV/ENOTSUP rte_errno values, but that doesn't
 		 * stop us from removing the mapping, as the assumption is we
@@ -1482,22 +1568,44 @@ rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
 
 		/* if we've created a new map by splitting, sort everything */
 		if (!is_null_map(new_map)) {
-			compact_user_maps();
+			compact_user_maps(user_mem_maps);
 		} else {
 			/* we've created a new mapping, but it was unused */
-			user_mem_maps.n_maps--;
+			user_mem_maps->n_maps--;
 		}
 	} else {
 		memset(map, 0, sizeof(*map));
-		compact_user_maps();
-		user_mem_maps.n_maps--;
+		compact_user_maps(user_mem_maps);
+		user_mem_maps->n_maps--;
 	}
 
 out:
-	rte_spinlock_recursive_unlock(&user_mem_maps.lock);
+	rte_spinlock_recursive_unlock(&user_mem_maps->lock);
 	return ret;
 }
 
+int
+rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+	if (len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	return container_dma_map(default_vfio_cfg, vaddr, iova, len);
+}
+
+int
+rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+	if (len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	return container_dma_unmap(default_vfio_cfg, vaddr, iova, len);
+}
+
 int
 rte_vfio_noiommu_is_enabled(void)
 {
@@ -1530,20 +1638,299 @@ rte_vfio_noiommu_is_enabled(void)
 	return c == 'Y';
 }
 
-#else
+int
+rte_vfio_container_create(void)
+{
+	int i;
+
+	/* Find an empty slot to store new vfio config */
+	for (i = 1; i < VFIO_MAX_CONTAINERS; i++) {
+		if (vfio_cfgs[i].vfio_container_fd == -1)
+			break;
+	}
+
+	if (i == VFIO_MAX_CONTAINERS) {
+		RTE_LOG(ERR, EAL, "exceed max vfio container limit\n");
+		return -1;
+	}
+
+	vfio_cfgs[i].vfio_container_fd = rte_vfio_get_container_fd();
+	if (vfio_cfgs[i].vfio_container_fd < 0) {
+		RTE_LOG(NOTICE, EAL, "fail to create a new container\n");
+		return -1;
+	}
+
+	return vfio_cfgs[i].vfio_container_fd;
+}
 
 int __rte_experimental
+rte_vfio_container_destroy(int container_fd)
+{
+	struct vfio_config *vfio_cfg;
+	int i;
+
+	vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "Invalid container fd\n");
+		return -1;
+	}
+
+	for (i = 0; i < VFIO_MAX_GROUPS; i++)
+		if (vfio_cfg->vfio_groups[i].group_num != -1)
+			rte_vfio_container_group_unbind(container_fd,
+				vfio_cfg->vfio_groups[i].group_num);
+
+	close(container_fd);
+	vfio_cfg->vfio_container_fd = -1;
+	vfio_cfg->vfio_active_groups = 0;
+	vfio_cfg->vfio_iommu_type = NULL;
+
+	return 0;
+}
+
+int
+rte_vfio_container_group_bind(int container_fd, int iommu_group_num)
+{
+	struct vfio_config *vfio_cfg;
+	struct vfio_group *cur_grp;
+	int vfio_group_fd;
+	int i;
+
+	vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "Invalid container fd\n");
+		return -1;
+	}
+
+	/* Check room for new group */
+	if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) {
+		RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
+		return -1;
+	}
+
+	/* Get an index for the new group */
+	for (i = 0; i < VFIO_MAX_GROUPS; i++)
+		if (vfio_cfg->vfio_groups[i].group_num == -1) {
+			cur_grp = &vfio_cfg->vfio_groups[i];
+			break;
+		}
+
+	/* This should not happen */
+	if (i == VFIO_MAX_GROUPS) {
+		RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
+		return -1;
+	}
+
+	vfio_group_fd = vfio_open_group_fd(iommu_group_num);
+	if (vfio_group_fd < 0) {
+		RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num);
+		return -1;
+	}
+	cur_grp->group_num = iommu_group_num;
+	cur_grp->fd = vfio_group_fd;
+	cur_grp->devices = 0;
+	vfio_cfg->vfio_active_groups++;
+
+	return vfio_group_fd;
+}
+
+int
+rte_vfio_container_group_unbind(int container_fd, int iommu_group_num)
+{
+	struct vfio_config *vfio_cfg;
+	struct vfio_group *cur_grp = NULL;
+	int i;
+
+	vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "Invalid container fd\n");
+		return -1;
+	}
+
+	for (i = 0; i < VFIO_MAX_GROUPS; i++) {
+		if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num) {
+			cur_grp = &vfio_cfg->vfio_groups[i];
+			break;
+		}
+	}
+
+	/* This should not happen */
+	if (i == VFIO_MAX_GROUPS || cur_grp == NULL) {
+		RTE_LOG(ERR, EAL, "Specified group number not found\n");
+		return -1;
+	}
+
+	if (cur_grp->fd >= 0 && close(cur_grp->fd) < 0) {
+		RTE_LOG(ERR, EAL, "Error when closing vfio_group_fd for"
+			" iommu_group_num %d\n", iommu_group_num);
+		return -1;
+	}
+	cur_grp->group_num = -1;
+	cur_grp->fd = -1;
+	cur_grp->devices = 0;
+	vfio_cfg->vfio_active_groups--;
+
+	return 0;
+}
+
+int
+rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, uint64_t iova,
+		uint64_t len)
+{
+	struct vfio_config *vfio_cfg;
+
+	if (len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "Invalid container fd\n");
+		return -1;
+	}
+
+	return container_dma_map(vfio_cfg, vaddr, iova, len);
+}
+
+int
+rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, uint64_t iova,
+		uint64_t len)
+{
+	struct vfio_config *vfio_cfg;
+
+	if (len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
+	if (vfio_cfg == NULL) {
+		RTE_LOG(ERR, EAL, "Invalid container fd\n");
+		return -1;
+	}
+
+	return container_dma_unmap(vfio_cfg, vaddr, iova, len);
+}
+
+#else
+
+int
 rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
 		  __rte_unused uint64_t len)
 {
 	return -1;
 }
 
-int __rte_experimental
+int
 rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
 		    __rte_unused uint64_t len)
 {
 	return -1;
 }
 
-#endif
+int
+rte_vfio_setup_device(__rte_unused const char *sysfs_base,
+		__rte_unused const char *dev_addr,
+		__rte_unused int *vfio_dev_fd,
+		__rte_unused struct vfio_device_info *device_info)
+{
+	return -1;
+}
+
+int
+rte_vfio_release_device(__rte_unused const char *sysfs_base,
+		__rte_unused const char *dev_addr, __rte_unused int fd)
+{
+	return -1;
+}
+
+int
+rte_vfio_enable(__rte_unused const char *modname)
+{
+	return -1;
+}
+
+int
+rte_vfio_is_enabled(__rte_unused const char *modname)
+{
+	return -1;
+}
+
+int
+rte_vfio_noiommu_is_enabled(void)
+{
+	return -1;
+}
+
+int
+rte_vfio_clear_group(__rte_unused int vfio_group_fd)
+{
+	return -1;
+}
+
+int
+rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
+		__rte_unused const char *dev_addr,
+		__rte_unused int *iommu_group_num)
+{
+	return -1;
+}
+
+int
+rte_vfio_get_container_fd(void)
+{
+	return -1;
+}
+
+int
+rte_vfio_get_group_fd(__rte_unused int iommu_group_num)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_create(void)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_destroy(__rte_unused int container_fd)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_group_bind(__rte_unused int container_fd,
+		__rte_unused int iommu_group_num)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_group_unbind(__rte_unused int container_fd,
+		__rte_unused int iommu_group_num)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_dma_map(__rte_unused int container_fd,
+		__rte_unused uint64_t vaddr,
+		__rte_unused uint64_t iova,
+		__rte_unused uint64_t len)
+{
+	return -1;
+}
+
+int
+rte_vfio_container_dma_unmap(__rte_unused int container_fd,
+		__rte_unused uint64_t vaddr,
+		__rte_unused uint64_t iova,
+		__rte_unused uint64_t len)
+{
+	return -1;
+}
+
+#endif /* VFIO_PRESENT */