vfio: fix mapping failures in ppc64le
[dpdk.git] / lib / librte_eal / linux / eal / eal_vfio.c
index d788738..01b5ef3 100644 (file)
@@ -532,6 +532,17 @@ vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len,
                return;
        }
 
+#ifdef RTE_ARCH_PPC_64
+       ms = rte_mem_virt2memseg(addr, msl);
+       while (cur_len < len) {
+               int idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
+
+               rte_fbarray_set_free(&msl->memseg_arr, idx);
+               cur_len += ms->len;
+               ++ms;
+       }
+       cur_len = 0;
+#endif
        /* memsegs are contiguous in memory */
        ms = rte_mem_virt2memseg(addr, msl);
        while (cur_len < len) {
@@ -551,6 +562,17 @@ next:
                cur_len += ms->len;
                ++ms;
        }
+#ifdef RTE_ARCH_PPC_64
+       cur_len = 0;
+       ms = rte_mem_virt2memseg(addr, msl);
+       while (cur_len < len) {
+               int idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
+
+               rte_fbarray_set_used(&msl->memseg_arr, idx);
+               cur_len += ms->len;
+               ++ms;
+       }
+#endif
 }
 
 static int
@@ -1253,7 +1275,16 @@ type1_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
 {
        int *vfio_container_fd = arg;
 
-       if (msl->external)
+       /* skip external memory that isn't a heap */
+       if (msl->external && !msl->heap)
+               return 0;
+
+       /* skip any segments with invalid IOVA addresses */
+       if (ms->iova == RTE_BAD_IOVA)
+               return 0;
+
+       /* if IOVA mode is VA, we've already mapped the internal segments */
+       if (!msl->external && rte_eal_iova_mode() == RTE_IOVA_VA)
                return 0;
 
        return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
@@ -1285,7 +1316,7 @@ vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
                         */
                        if (errno == EEXIST) {
                                RTE_LOG(DEBUG, EAL,
-                                       " Memory segment is allready mapped,"
+                                       " Memory segment is already mapped,"
                                        " skipping");
                        } else {
                                RTE_LOG(ERR, EAL,
@@ -1320,8 +1351,13 @@ vfio_type1_dma_map(int vfio_container_fd)
                /* with IOVA as VA mode, we can get away with mapping contiguous
                 * chunks rather than going page-by-page.
                 */
-               return rte_memseg_contig_walk(type1_map_contig,
+               int ret = rte_memseg_contig_walk(type1_map_contig,
                                &vfio_container_fd);
+               if (ret)
+                       return ret;
+               /* we have to continue the walk because we've skipped the
+                * external segments during the config walk.
+                */
        }
        return rte_memseg_walk(type1_map, &vfio_container_fd);
 }
@@ -1365,7 +1401,7 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
                         */
                        if (errno == EBUSY) {
                                RTE_LOG(DEBUG, EAL,
-                                       " Memory segment is allready mapped,"
+                                       " Memory segment is already mapped,"
                                        " skipping");
                        } else {
                                RTE_LOG(ERR, EAL,
@@ -1402,21 +1438,21 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
        return 0;
 }
 
-struct spapr_remap_walk_param {
-       int vfio_container_fd;
-       uint64_t addr_64;
-};
-
 static int
 vfio_spapr_map_walk(const struct rte_memseg_list *msl,
                const struct rte_memseg *ms, void *arg)
 {
-       struct spapr_remap_walk_param *param = arg;
+       int *vfio_container_fd = arg;
+
+       /* skip external memory that isn't a heap */
+       if (msl->external && !msl->heap)
+               return 0;
 
-       if (msl->external || ms->addr_64 == param->addr_64)
+       /* skip any segments with invalid IOVA addresses */
+       if (ms->iova == RTE_BAD_IOVA)
                return 0;
 
-       return vfio_spapr_dma_do_map(param->vfio_container_fd, ms->addr_64, ms->iova,
+       return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
                        ms->len, 1);
 }
 
@@ -1424,19 +1460,23 @@ static int
 vfio_spapr_unmap_walk(const struct rte_memseg_list *msl,
                const struct rte_memseg *ms, void *arg)
 {
-       struct spapr_remap_walk_param *param = arg;
+       int *vfio_container_fd = arg;
 
-       if (msl->external || ms->addr_64 == param->addr_64)
+       /* skip external memory that isn't a heap */
+       if (msl->external && !msl->heap)
                return 0;
 
-       return vfio_spapr_dma_do_map(param->vfio_container_fd, ms->addr_64, ms->iova,
+       /* skip any segments with invalid IOVA addresses */
+       if (ms->iova == RTE_BAD_IOVA)
+               return 0;
+
+       return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
                        ms->len, 0);
 }
 
 struct spapr_walk_param {
        uint64_t window_size;
        uint64_t hugepage_sz;
-       uint64_t addr_64;
 };
 
 static int
@@ -1446,11 +1486,12 @@ vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
        struct spapr_walk_param *param = arg;
        uint64_t max = ms->iova + ms->len;
 
-       if (msl->external)
+       /* skip external memory that isn't a heap */
+       if (msl->external && !msl->heap)
                return 0;
 
-       /* do not iterate ms we haven't mapped yet  */
-       if (param->addr_64 && ms->addr_64 == param->addr_64)
+       /* skip any segments with invalid IOVA addresses */
+       if (ms->iova == RTE_BAD_IOVA)
                return 0;
 
        if (max > param->window_size) {
@@ -1496,20 +1537,11 @@ vfio_spapr_create_new_dma_window(int vfio_container_fd,
                /* try possible page_shift and levels for workaround */
                uint32_t levels;
 
-               for (levels = 1; levels <= info.ddw.levels; levels++) {
-                       uint32_t pgsizes = info.ddw.pgsizes;
-
-                       while (pgsizes != 0) {
-                               create->page_shift = 31 - __builtin_clz(pgsizes);
-                               create->levels = levels;
-                               ret = ioctl(vfio_container_fd,
-                                       VFIO_IOMMU_SPAPR_TCE_CREATE, create);
-                               if (!ret)
-                                       break;
-                               pgsizes &= ~(1 << create->page_shift);
-                       }
-                       if (!ret)
-                               break;
+               for (levels = create->levels + 1;
+                       ret && levels <= info.ddw.levels; levels++) {
+                       create->levels = levels;
+                       ret = ioctl(vfio_container_fd,
+                               VFIO_IOMMU_SPAPR_TCE_CREATE, create);
                }
 #endif
                if (ret) {
@@ -1550,7 +1582,6 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 
        /* check if window size needs to be adjusted */
        memset(&param, 0, sizeof(param));
-       param.addr_64 = vaddr;
 
        /* we're inside a callback so use thread-unsafe version */
        if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
@@ -1575,14 +1606,9 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
        if (do_map) {
                /* re-create window and remap the entire memory */
                if (iova + len > create.window_size) {
-                       struct spapr_remap_walk_param remap_param = {
-                               .vfio_container_fd = vfio_container_fd,
-                               .addr_64 = vaddr,
-                       };
-
                        /* release all maps before recreating the window */
                        if (rte_memseg_walk_thread_unsafe(vfio_spapr_unmap_walk,
-                                       &remap_param) < 0) {
+                                       &vfio_container_fd) < 0) {
                                RTE_LOG(ERR, EAL, "Could not release DMA maps\n");
                                ret = -1;
                                goto out;
@@ -1609,7 +1635,7 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
                        /* we're inside a callback, so use thread-unsafe version
                         */
                        if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
-                                       &remap_param) < 0) {
+                                       &vfio_container_fd) < 0) {
                                RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
                                ret = -1;
                                goto out;
@@ -1656,7 +1682,6 @@ vfio_spapr_dma_map(int vfio_container_fd)
        struct spapr_walk_param param;
 
        memset(&param, 0, sizeof(param));
-       param.addr_64 = 0UL;
 
        /* create DMA window from 0 to max(phys_addr + len) */
        rte_memseg_walk(vfio_spapr_window_size_walk, &param);
@@ -1832,28 +1857,6 @@ out:
        return ret;
 }
 
-int
-rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
-{
-       if (len == 0) {
-               rte_errno = EINVAL;
-               return -1;
-       }
-
-       return container_dma_map(default_vfio_cfg, vaddr, iova, len);
-}
-
-int
-rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
-{
-       if (len == 0) {
-               rte_errno = EINVAL;
-               return -1;
-       }
-
-       return container_dma_unmap(default_vfio_cfg, vaddr, iova, len);
-}
-
 int
 rte_vfio_noiommu_is_enabled(void)
 {
@@ -2031,20 +2034,6 @@ rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, uint64_t iova,
 
 #else
 
-int
-rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
-                 __rte_unused uint64_t len)
-{
-       return -1;
-}
-
-int
-rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
-                   __rte_unused uint64_t len)
-{
-       return -1;
-}
-
 int
 rte_vfio_setup_device(__rte_unused const char *sysfs_base,
                __rte_unused const char *dev_addr,