#include <rte_log.h>
#include <rte_memory.h>
#include <rte_eal_memconfig.h>
+#include <rte_vfio.h>
#include "eal_filesystem.h"
#include "eal_vfio.h"
{
int i;
int vfio_group_fd;
- int group_idx = -1;
char filename[PATH_MAX];
+ struct vfio_group *cur_grp;
/* check if we already have the group descriptor open */
for (i = 0; i < VFIO_MAX_GROUPS; i++)
/* Now lets get an index for the new group */
for (i = 0; i < VFIO_MAX_GROUPS; i++)
if (vfio_cfg.vfio_groups[i].group_no == -1) {
- group_idx = i;
+ cur_grp = &vfio_cfg.vfio_groups[i];
break;
}
/* This should not happen */
- if (group_idx == -1) {
+ if (i == VFIO_MAX_GROUPS) {
RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
return -1;
}
/* noiommu group found */
}
- vfio_cfg.vfio_groups[group_idx].group_no = iommu_group_no;
- vfio_cfg.vfio_groups[group_idx].fd = vfio_group_fd;
+ cur_grp->group_no = iommu_group_no;
+ cur_grp->fd = vfio_group_fd;
vfio_cfg.vfio_active_groups++;
return vfio_group_fd;
}
return 0;
case SOCKET_OK:
vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd);
- /* if we got the fd, return it */
+ /* if we got the fd, store it and return it */
if (vfio_group_fd > 0) {
close(socket_fd);
+ cur_grp->group_no = iommu_group_no;
+ cur_grp->fd = vfio_group_fd;
+ vfio_cfg.vfio_active_groups++;
return vfio_group_fd;
}
/* fall-through on error */
return -1;
}
+
+static int
+get_vfio_group_idx(int vfio_group_fd)
+{
+ int i;
+ for (i = 0; i < VFIO_MAX_GROUPS; i++)
+ if (vfio_cfg.vfio_groups[i].fd == vfio_group_fd)
+ return i;
+ return -1;
+}
+
+static void
+vfio_group_device_get(int vfio_group_fd)
+{
+ int i;
+
+ i = get_vfio_group_idx(vfio_group_fd);
+ if (i < 0 || i > (VFIO_MAX_GROUPS - 1))
+ RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i);
+ else
+ vfio_cfg.vfio_groups[i].devices++;
+}
+
+static void
+vfio_group_device_put(int vfio_group_fd)
+{
+ int i;
+
+ i = get_vfio_group_idx(vfio_group_fd);
+ if (i < 0 || i > (VFIO_MAX_GROUPS - 1))
+ RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i);
+ else
+ vfio_cfg.vfio_groups[i].devices--;
+}
+
+static int
+vfio_group_device_count(int vfio_group_fd)
+{
+ int i;
+
+ i = get_vfio_group_idx(vfio_group_fd);
+ if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) {
+ RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i);
+ return -1;
+ }
+
+ return vfio_cfg.vfio_groups[i].devices;
+}
+
int
clear_group(int vfio_group_fd)
{
if (internal_config.process_type == RTE_PROC_PRIMARY) {
- for (i = 0; i < VFIO_MAX_GROUPS; i++)
- if (vfio_cfg.vfio_groups[i].fd == vfio_group_fd) {
- vfio_cfg.vfio_groups[i].group_no = -1;
- vfio_cfg.vfio_groups[i].fd = -1;
- vfio_cfg.vfio_active_groups--;
- return 0;
- }
- return -1;
+ i = get_vfio_group_idx(vfio_group_fd);
+ if (i < 0)
+ return -1;
+ vfio_cfg.vfio_groups[i].group_no = -1;
+ vfio_cfg.vfio_groups[i].fd = -1;
+ vfio_cfg.vfio_groups[i].devices = 0;
+ vfio_cfg.vfio_active_groups--;
+ return 0;
}
/* This is just for SECONDARY processes */
clear_group(vfio_group_fd);
return -1;
}
- }
- /*
- * pick an IOMMU type and set up DMA mappings for container
- *
- * needs to be done only once, only when first group is assigned to
- * a container and only in primary process. Note this can happen several
- * times with the hotplug functionality.
- */
- if (internal_config.process_type == RTE_PROC_PRIMARY &&
- vfio_cfg.vfio_active_groups == 1) {
- /* select an IOMMU type which we will be using */
- const struct vfio_iommu_type *t =
+ /*
+ * pick an IOMMU type and set up DMA mappings for container
+ *
+ * needs to be done only once, only when first group is
+ * assigned to a container and only in primary process.
+ * Note this can happen several times with the hotplug
+ * functionality.
+ */
+ if (internal_config.process_type == RTE_PROC_PRIMARY &&
+ vfio_cfg.vfio_active_groups == 1) {
+ /* select an IOMMU type which we will be using */
+ const struct vfio_iommu_type *t =
vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
- if (!t) {
- RTE_LOG(ERR, EAL, " %s failed to select IOMMU type\n", dev_addr);
- close(vfio_group_fd);
- clear_group(vfio_group_fd);
- return -1;
- }
- ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
- if (ret) {
- RTE_LOG(ERR, EAL, " %s DMA remapping failed, "
- "error %i (%s)\n", dev_addr, errno, strerror(errno));
- close(vfio_group_fd);
- clear_group(vfio_group_fd);
- return -1;
+ if (!t) {
+ RTE_LOG(ERR, EAL,
+ " %s failed to select IOMMU type\n",
+ dev_addr);
+ close(vfio_group_fd);
+ clear_group(vfio_group_fd);
+ return -1;
+ }
+ ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
+ if (ret) {
+ RTE_LOG(ERR, EAL,
+ " %s DMA remapping failed, error %i (%s)\n",
+ dev_addr, errno, strerror(errno));
+ close(vfio_group_fd);
+ clear_group(vfio_group_fd);
+ return -1;
+ }
}
}
clear_group(vfio_group_fd);
return -1;
}
+ vfio_group_device_get(vfio_group_fd);
return 0;
}
* code will unset the container and the IOMMU mappings.
*/
- if (close(vfio_group_fd) < 0)
- RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n",
- dev_addr);
-
- if (close(vfio_dev_fd) < 0)
+ /* Closing a device */
+ if (close(vfio_dev_fd) < 0) {
RTE_LOG(INFO, EAL, "Error when closing vfio_dev_fd for %s\n",
dev_addr);
+ return -1;
+ }
- if (clear_group(vfio_group_fd) < 0)
- RTE_LOG(INFO, EAL, "Error when clearing group for %s\n",
- dev_addr);
+ /* An VFIO group can have several devices attached. Just when there is
+ * no devices remaining should the group be closed.
+ */
+ vfio_group_device_put(vfio_group_fd);
+ if (!vfio_group_device_count(vfio_group_fd)) {
+
+ if (close(vfio_group_fd) < 0) {
+ RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n",
+ dev_addr);
+ return -1;
+ }
+
+ if (clear_group(vfio_group_fd) < 0) {
+ RTE_LOG(INFO, EAL, "Error when clearing group for %s\n",
+ dev_addr);
+ return -1;
+ }
+ }
return 0;
}
for (i = 0; i < VFIO_MAX_GROUPS; i++) {
vfio_cfg.vfio_groups[i].fd = -1;
vfio_cfg.vfio_groups[i].group_no = -1;
+ vfio_cfg.vfio_groups[i].devices = 0;
}
/* inform the user that we are probing for VFIO */
RTE_LOG(INFO, EAL, "Probing VFIO support...\n");
- /* check if vfio-pci module is loaded */
+ /* check if vfio module is loaded */
vfio_available = rte_eal_check_module(modname);
/* return error directly */
dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
dma_map.vaddr = ms[i].addr_64;
dma_map.size = ms[i].len;
- dma_map.iova = ms[i].phys_addr;
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ dma_map.iova = dma_map.vaddr;
+ else
+ dma_map.iova = ms[i].phys_addr;
dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
return -1;
}
- /* calculate window size based on number of hugepages configured */
- create.window_size = rte_eal_get_physmem_size();
+ /* create DMA window from 0 to max(phys_addr + len) */
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ if (ms[i].addr == NULL)
+ break;
+
+ create.window_size = RTE_MAX(create.window_size,
+ ms[i].phys_addr + ms[i].len);
+ }
+
+ /* sPAPR requires window size to be a power of 2 */
+ create.window_size = rte_align64pow2(create.window_size);
create.page_shift = __builtin_ctzll(ms->hugepage_sz);
- create.levels = 2;
+ create.levels = 1;
ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create);
if (ret) {
return -1;
}
+ if (create.start_addr != 0) {
+ RTE_LOG(ERR, EAL, " DMA window start address != 0\n");
+ return -1;
+ }
+
/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
for (i = 0; i < RTE_MAX_MEMSEG; i++) {
struct vfio_iommu_type1_dma_map dma_map;
dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
dma_map.vaddr = ms[i].addr_64;
dma_map.size = ms[i].len;
- dma_map.iova = ms[i].phys_addr;
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ dma_map.iova = dma_map.vaddr;
+ else
+ dma_map.iova = ms[i].phys_addr;
dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
VFIO_DMA_MAP_FLAG_WRITE;
return 0;
}
+int
+vfio_noiommu_is_enabled(void)
+{
+ int fd, ret, cnt __rte_unused;
+ char c;
+
+ ret = -1;
+ fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ cnt = read(fd, &c, 1);
+ if (c == 'Y')
+ ret = 1;
+
+ close(fd);
+ return ret;
+}
+
#endif