int vfio_group_fd;
char filename[PATH_MAX];
struct rte_mp_msg mp_req, *mp_rep;
- struct rte_mp_reply mp_reply;
+ struct rte_mp_reply mp_reply = {0};
struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
RTE_LOG(ERR, EAL, " bad VFIO group fd\n");
vfio_group_fd = 0;
}
- free(mp_reply.msgs);
}
+ free(mp_reply.msgs);
if (vfio_group_fd < 0)
RTE_LOG(ERR, EAL, " cannot request group fd\n");
return vfio_group_fd;
{
int i;
+ if (container_fd == RTE_VFIO_DEFAULT_CONTAINER_FD)
+ return default_vfio_cfg;
+
for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
if (vfio_cfgs[i].vfio_container_fd == container_fd)
return &vfio_cfgs[i];
vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len,
void *arg __rte_unused)
{
+ rte_iova_t iova_start, iova_expected;
struct rte_memseg_list *msl;
struct rte_memseg *ms;
size_t cur_len = 0;
+ uint64_t va_start;
msl = rte_mem_virt2memseg_list(addr);
return;
}
+#ifdef RTE_ARCH_PPC_64
+ ms = rte_mem_virt2memseg(addr, msl);
+ while (cur_len < len) {
+ int idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
+
+ rte_fbarray_set_free(&msl->memseg_arr, idx);
+ cur_len += ms->len;
+ ++ms;
+ }
+ cur_len = 0;
+#endif
/* memsegs are contiguous in memory */
ms = rte_mem_virt2memseg(addr, msl);
+
+ /*
+ * This memory is not guaranteed to be contiguous, but it still could
+ * be, or it could have some small contiguous chunks. Since the number
+ * of VFIO mappings is limited, and VFIO appears to not concatenate
+ * adjacent mappings, we have to do this ourselves.
+ *
+ * So, find contiguous chunks, then map them.
+ */
+ va_start = ms->addr_64;
+ iova_start = iova_expected = ms->iova;
while (cur_len < len) {
+ bool new_contig_area = ms->iova != iova_expected;
+ bool last_seg = (len - cur_len) == ms->len;
+ bool skip_last = false;
+
+ /* only do mappings when current contiguous area ends */
+ if (new_contig_area) {
+ if (type == RTE_MEM_EVENT_ALLOC)
+ vfio_dma_mem_map(default_vfio_cfg, va_start,
+ iova_start,
+ iova_expected - iova_start, 1);
+ else
+ vfio_dma_mem_map(default_vfio_cfg, va_start,
+ iova_start,
+ iova_expected - iova_start, 0);
+ va_start = ms->addr_64;
+ iova_start = ms->iova;
+ }
/* some memory segments may have invalid IOVA */
if (ms->iova == RTE_BAD_IOVA) {
RTE_LOG(DEBUG, EAL, "Memory segment at %p has bad IOVA, skipping\n",
ms->addr);
- goto next;
+ skip_last = true;
}
- if (type == RTE_MEM_EVENT_ALLOC)
- vfio_dma_mem_map(default_vfio_cfg, ms->addr_64,
- ms->iova, ms->len, 1);
- else
- vfio_dma_mem_map(default_vfio_cfg, ms->addr_64,
- ms->iova, ms->len, 0);
-next:
+ iova_expected = ms->iova + ms->len;
cur_len += ms->len;
++ms;
+
+ /*
+ * don't count previous segment, and don't attempt to
+ * dereference a potentially invalid pointer.
+ */
+ if (skip_last && !last_seg) {
+ iova_expected = iova_start = ms->iova;
+ va_start = ms->addr_64;
+ } else if (!skip_last && last_seg) {
+ /* this is the last segment and we're not skipping */
+ if (type == RTE_MEM_EVENT_ALLOC)
+ vfio_dma_mem_map(default_vfio_cfg, va_start,
+ iova_start,
+ iova_expected - iova_start, 1);
+ else
+ vfio_dma_mem_map(default_vfio_cfg, va_start,
+ iova_start,
+ iova_expected - iova_start, 0);
+ }
}
+#ifdef RTE_ARCH_PPC_64
+ cur_len = 0;
+ ms = rte_mem_virt2memseg(addr, msl);
+ while (cur_len < len) {
+ int idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
+
+ rte_fbarray_set_used(&msl->memseg_arr, idx);
+ cur_len += ms->len;
+ ++ms;
+ }
+#endif
}
static int
vfio_sync_default_container(void)
{
struct rte_mp_msg mp_req, *mp_rep;
- struct rte_mp_reply mp_reply;
+ struct rte_mp_reply mp_reply = {0};
struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
int iommu_type_id;
p = (struct vfio_mp_param *)mp_rep->param;
if (p->result == SOCKET_OK)
iommu_type_id = p->iommu_type_id;
- free(mp_reply.msgs);
}
+ free(mp_reply.msgs);
if (iommu_type_id < 0) {
RTE_LOG(ERR, EAL, "Could not get IOMMU type for default container\n");
return -1;
vfio_get_default_container_fd(void)
{
struct rte_mp_msg mp_req, *mp_rep;
- struct rte_mp_reply mp_reply;
+ struct rte_mp_reply mp_reply = {0};
struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
free(mp_reply.msgs);
return mp_rep->fds[0];
}
- free(mp_reply.msgs);
}
+ free(mp_reply.msgs);
RTE_LOG(ERR, EAL, " cannot request default container fd\n");
return -1;
}
{
int ret, vfio_container_fd;
struct rte_mp_msg mp_req, *mp_rep;
- struct rte_mp_reply mp_reply;
+ struct rte_mp_reply mp_reply = {0};
struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
free(mp_reply.msgs);
return vfio_container_fd;
}
- free(mp_reply.msgs);
}
+ free(mp_reply.msgs);
RTE_LOG(ERR, EAL, " cannot request container fd\n");
return -1;
}
return 1;
}
+static int
+type1_map_contig(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ size_t len, void *arg)
+{
+ int *vfio_container_fd = arg;
+
+ if (msl->external)
+ return 0;
+
+ return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
+ len, 1);
+}
+
static int
type1_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
void *arg)
{
int *vfio_container_fd = arg;
- if (msl->external)
+ /* skip external memory that isn't a heap */
+ if (msl->external && !msl->heap)
+ return 0;
+
+ /* skip any segments with invalid IOVA addresses */
+ if (ms->iova == RTE_BAD_IOVA)
+ return 0;
+
+ /* if IOVA mode is VA, we've already mapped the internal segments */
+ if (!msl->external && rte_eal_iova_mode() == RTE_IOVA_VA)
return 0;
return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
*/
if (errno == EEXIST) {
RTE_LOG(DEBUG, EAL,
- " Memory segment is allready mapped,"
+ " Memory segment is already mapped,"
" skipping");
} else {
RTE_LOG(ERR, EAL,
static int
vfio_type1_dma_map(int vfio_container_fd)
{
+ if (rte_eal_iova_mode() == RTE_IOVA_VA) {
+ /* with IOVA as VA mode, we can get away with mapping contiguous
+ * chunks rather than going page-by-page.
+ */
+ int ret = rte_memseg_contig_walk(type1_map_contig,
+ &vfio_container_fd);
+ if (ret)
+ return ret;
+ /* we have to continue the walk because we've skipped the
+ * external segments during the config walk.
+ */
+ }
return rte_memseg_walk(type1_map, &vfio_container_fd);
}
*/
if (errno == EBUSY) {
RTE_LOG(DEBUG, EAL,
- " Memory segment is allready mapped,"
+ " Memory segment is already mapped,"
" skipping");
} else {
RTE_LOG(ERR, EAL,
return 0;
}
-struct spapr_remap_walk_param {
- int vfio_container_fd;
- uint64_t addr_64;
-};
-
static int
vfio_spapr_map_walk(const struct rte_memseg_list *msl,
const struct rte_memseg *ms, void *arg)
{
- struct spapr_remap_walk_param *param = arg;
+ int *vfio_container_fd = arg;
+
+ /* skip external memory that isn't a heap */
+ if (msl->external && !msl->heap)
+ return 0;
- if (msl->external || ms->addr_64 == param->addr_64)
+ /* skip any segments with invalid IOVA addresses */
+ if (ms->iova == RTE_BAD_IOVA)
return 0;
- return vfio_spapr_dma_do_map(param->vfio_container_fd, ms->addr_64, ms->iova,
+ return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
ms->len, 1);
}
vfio_spapr_unmap_walk(const struct rte_memseg_list *msl,
const struct rte_memseg *ms, void *arg)
{
- struct spapr_remap_walk_param *param = arg;
+ int *vfio_container_fd = arg;
+
+ /* skip external memory that isn't a heap */
+ if (msl->external && !msl->heap)
+ return 0;
- if (msl->external || ms->addr_64 == param->addr_64)
+ /* skip any segments with invalid IOVA addresses */
+ if (ms->iova == RTE_BAD_IOVA)
return 0;
- return vfio_spapr_dma_do_map(param->vfio_container_fd, ms->addr_64, ms->iova,
+ return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
ms->len, 0);
}
struct spapr_walk_param {
uint64_t window_size;
uint64_t hugepage_sz;
- uint64_t addr_64;
};
static int
struct spapr_walk_param *param = arg;
uint64_t max = ms->iova + ms->len;
- if (msl->external)
+ /* skip external memory that isn't a heap */
+ if (msl->external && !msl->heap)
return 0;
- /* do not iterate ms we haven't mapped yet */
- if (param->addr_64 && ms->addr_64 == param->addr_64)
+ /* skip any segments with invalid IOVA addresses */
+ if (ms->iova == RTE_BAD_IOVA)
return 0;
if (max > param->window_size) {
/* try possible page_shift and levels for workaround */
uint32_t levels;
- for (levels = 1; levels <= info.ddw.levels; levels++) {
- uint32_t pgsizes = info.ddw.pgsizes;
-
- while (pgsizes != 0) {
- create->page_shift = 31 - __builtin_clz(pgsizes);
- create->levels = levels;
- ret = ioctl(vfio_container_fd,
- VFIO_IOMMU_SPAPR_TCE_CREATE, create);
- if (!ret)
- break;
- pgsizes &= ~(1 << create->page_shift);
- }
- if (!ret)
- break;
+ for (levels = create->levels + 1;
+ ret && levels <= info.ddw.levels; levels++) {
+ create->levels = levels;
+ ret = ioctl(vfio_container_fd,
+ VFIO_IOMMU_SPAPR_TCE_CREATE, create);
}
#endif
if (ret) {
/* check if window size needs to be adjusted */
memset(¶m, 0, sizeof(param));
- param.addr_64 = vaddr;
/* we're inside a callback so use thread-unsafe version */
if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
if (do_map) {
/* re-create window and remap the entire memory */
if (iova + len > create.window_size) {
- struct spapr_remap_walk_param remap_param = {
- .vfio_container_fd = vfio_container_fd,
- .addr_64 = vaddr,
- };
-
/* release all maps before recreating the window */
if (rte_memseg_walk_thread_unsafe(vfio_spapr_unmap_walk,
- &remap_param) < 0) {
+ &vfio_container_fd) < 0) {
RTE_LOG(ERR, EAL, "Could not release DMA maps\n");
ret = -1;
goto out;
/* we're inside a callback, so use thread-unsafe version
*/
if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
- &remap_param) < 0) {
+ &vfio_container_fd) < 0) {
RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
ret = -1;
goto out;
struct spapr_walk_param param;
memset(¶m, 0, sizeof(param));
- param.addr_64 = 0UL;
/* create DMA window from 0 to max(phys_addr + len) */
rte_memseg_walk(vfio_spapr_window_size_walk, ¶m);
return ret;
}
-int
-rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
-{
- if (len == 0) {
- rte_errno = EINVAL;
- return -1;
- }
-
- return container_dma_map(default_vfio_cfg, vaddr, iova, len);
-}
-
-int
-rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
-{
- if (len == 0) {
- rte_errno = EINVAL;
- return -1;
- }
-
- return container_dma_unmap(default_vfio_cfg, vaddr, iova, len);
-}
-
int
rte_vfio_noiommu_is_enabled(void)
{
#else
-int
-rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
- __rte_unused uint64_t len)
-{
- return -1;
-}
-
-int
-rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
- __rte_unused uint64_t len)
-{
- return -1;
-}
-
int
rte_vfio_setup_device(__rte_unused const char *sysfs_base,
__rte_unused const char *dev_addr,