malloc: enable event callbacks for external memory
[dpdk.git] / drivers / bus / fslmc / fslmc_vfio.c
index 8b15312..493b6e5 100644 (file)
@@ -30,6 +30,7 @@
 #include <rte_kvargs.h>
 #include <rte_dev.h>
 #include <rte_bus.h>
+#include <rte_eal_memconfig.h>
 
 #include "rte_fslmc.h"
 #include "fslmc_vfio.h"
@@ -51,7 +52,6 @@ static int container_device_fd;
 static char *g_container;
 static uint32_t *msi_intr_vaddr;
 void *(*rte_mcp_ptr_list);
-static int is_dma_done;
 
 static struct rte_dpaa2_object_list dpaa2_obj_list =
        TAILQ_HEAD_INITIALIZER(dpaa2_obj_list);
@@ -74,7 +74,7 @@ fslmc_get_container_group(int *groupid)
        if (!g_container) {
                container = getenv("DPRC");
                if (container == NULL) {
-                       DPAA2_BUS_INFO("DPAA2: DPRC not available");
+                       DPAA2_BUS_DEBUG("DPAA2: DPRC not available");
                        return -EINVAL;
                }
 
@@ -91,7 +91,8 @@ fslmc_get_container_group(int *groupid)
        }
 
        /* get group number */
-       ret = vfio_get_group_no(SYSFS_FSL_MC_DEVICES, g_container, groupid);
+       ret = rte_vfio_get_group_num(SYSFS_FSL_MC_DEVICES,
+                                    g_container, groupid);
        if (ret <= 0) {
                DPAA2_BUS_ERR("Unable to find %s IOMMU group", g_container);
                return -1;
@@ -124,7 +125,7 @@ vfio_connect_container(void)
        }
 
        /* Opens main vfio file descriptor which represents the "container" */
-       fd = vfio_get_container_fd();
+       fd = rte_vfio_get_container_fd();
        if (fd < 0) {
                DPAA2_BUS_ERR("Failed to open VFIO container");
                return -errno;
@@ -189,11 +190,70 @@ static int vfio_map_irq_region(struct fslmc_vfio_group *group)
        return -errno;
 }
 
+static int fslmc_map_dma(uint64_t vaddr, rte_iova_t iovaddr, size_t len);
+static int fslmc_unmap_dma(uint64_t vaddr, rte_iova_t iovaddr, size_t len);
+
+static void
+fslmc_memevent_cb(enum rte_mem_event type, const void *addr, size_t len,
+               void *arg __rte_unused)
+{
+       struct rte_memseg_list *msl;
+       struct rte_memseg *ms;
+       size_t cur_len = 0, map_len = 0;
+       uint64_t virt_addr;
+       rte_iova_t iova_addr;
+       int ret;
+
+       msl = rte_mem_virt2memseg_list(addr);
+
+       while (cur_len < len) {
+               const void *va = RTE_PTR_ADD(addr, cur_len);
+
+               ms = rte_mem_virt2memseg(va, msl);
+               iova_addr = ms->iova;
+               virt_addr = ms->addr_64;
+               map_len = ms->len;
+
+               DPAA2_BUS_DEBUG("Request for %s, va=%p, "
+                               "virt_addr=0x%" PRIx64 ", "
+                               "iova=0x%" PRIx64 ", map_len=%zu",
+                               type == RTE_MEM_EVENT_ALLOC ?
+                                       "alloc" : "dealloc",
+                               va, virt_addr, iova_addr, map_len);
+
+               /* iova_addr may be set to RTE_BAD_IOVA */
+               if (iova_addr == RTE_BAD_IOVA) {
+                       DPAA2_BUS_DEBUG("Segment has invalid iova, skipping\n");
+                       cur_len += map_len;
+                       continue;
+               }
+
+               if (type == RTE_MEM_EVENT_ALLOC)
+                       ret = fslmc_map_dma(virt_addr, iova_addr, map_len);
+               else
+                       ret = fslmc_unmap_dma(virt_addr, iova_addr, map_len);
+
+               if (ret != 0) {
+                       DPAA2_BUS_ERR("DMA Mapping/Unmapping failed. "
+                                       "Map=%d, addr=%p, len=%zu, err:(%d)",
+                                       type, va, map_len, ret);
+                       return;
+               }
+
+               cur_len += map_len;
+       }
+
+       if (type == RTE_MEM_EVENT_ALLOC)
+               DPAA2_BUS_DEBUG("Total Mapped: addr=%p, len=%zu",
+                               addr, len);
+       else
+               DPAA2_BUS_DEBUG("Total Unmapped: addr=%p, len=%zu",
+                               addr, len);
+}
+
 static int
-fslmc_vfio_map(const struct rte_memseg_list *msl __rte_unused,
-               const struct rte_memseg *ms, void *arg)
+fslmc_map_dma(uint64_t vaddr, rte_iova_t iovaddr __rte_unused, size_t len)
 {
-       int *n_segs = arg;
        struct fslmc_vfio_group *group;
        struct vfio_iommu_type1_dma_map dma_map = {
                .argsz = sizeof(struct vfio_iommu_type1_dma_map),
@@ -201,10 +261,11 @@ fslmc_vfio_map(const struct rte_memseg_list *msl __rte_unused,
        };
        int ret;
 
-       dma_map.size = ms->len;
-       dma_map.vaddr = ms->addr_64;
+       dma_map.size = len;
+       dma_map.vaddr = vaddr;
+
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
-       dma_map.iova = ms->iova;
+       dma_map.iova = iovaddr;
 #else
        dma_map.iova = dma_map.vaddr;
 #endif
@@ -217,35 +278,95 @@ fslmc_vfio_map(const struct rte_memseg_list *msl __rte_unused,
                return -1;
        }
 
-       DPAA2_BUS_DEBUG("-->Initial SHM Virtual ADDR %llX",
-                       dma_map.vaddr);
-       DPAA2_BUS_DEBUG("-----> DMA size 0x%llX", dma_map.size);
-       ret = ioctl(group->container->fd, VFIO_IOMMU_MAP_DMA,
-                       &dma_map);
+       DPAA2_BUS_DEBUG("--> Map address: 0x%"PRIx64", size: %"PRIu64"",
+                       (uint64_t)dma_map.vaddr, (uint64_t)dma_map.size);
+       ret = ioctl(group->container->fd, VFIO_IOMMU_MAP_DMA, &dma_map);
        if (ret) {
                DPAA2_BUS_ERR("VFIO_IOMMU_MAP_DMA API(errno = %d)",
                                errno);
                return -1;
        }
-       (*n_segs)++;
+
        return 0;
 }
 
-int rte_fslmc_vfio_dmamap(void)
+static int
+fslmc_unmap_dma(uint64_t vaddr, uint64_t iovaddr __rte_unused, size_t len)
 {
-       int i = 0;
+       struct fslmc_vfio_group *group;
+       struct vfio_iommu_type1_dma_unmap dma_unmap = {
+               .argsz = sizeof(struct vfio_iommu_type1_dma_unmap),
+               .flags = 0,
+       };
+       int ret;
 
-       if (is_dma_done)
-               return 0;
+       dma_unmap.size = len;
+       dma_unmap.iova = vaddr;
+
+       /* SET DMA MAP for IOMMU */
+       group = &vfio_group;
 
-       if (rte_memseg_walk(fslmc_vfio_map, &i) < 0)
+       if (!group->container) {
+               DPAA2_BUS_ERR("Container is not connected ");
+               return -1;
+       }
+
+       DPAA2_BUS_DEBUG("--> Unmap address: 0x%"PRIx64", size: %"PRIu64"",
+                       (uint64_t)dma_unmap.iova, (uint64_t)dma_unmap.size);
+       ret = ioctl(group->container->fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
+       if (ret) {
+               DPAA2_BUS_ERR("VFIO_IOMMU_UNMAP_DMA API(errno = %d)",
+                               errno);
                return -1;
+       }
+
+       return 0;
+}
+
+static int
+fslmc_dmamap_seg(const struct rte_memseg_list *msl __rte_unused,
+               const struct rte_memseg *ms, void *arg)
+{
+       int *n_segs = arg;
+       int ret;
+
+       /* if IOVA address is invalid, skip */
+       if (ms->iova == RTE_BAD_IOVA)
+               return 0;
+
+       ret = fslmc_map_dma(ms->addr_64, ms->iova, ms->len);
+       if (ret)
+               DPAA2_BUS_ERR("Unable to VFIO map (addr=%p, len=%zu)",
+                               ms->addr, ms->len);
+       else
+               (*n_segs)++;
 
-       /* Verifying that at least single segment is available */
-       if (i <= 0) {
-               DPAA2_BUS_ERR("No Segments found for VFIO Mapping");
+       return ret;
+}
+
+int rte_fslmc_vfio_dmamap(void)
+{
+       int i = 0, ret;
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock;
+
+       /* Lock before parsing and registering callback to memory subsystem */
+       rte_rwlock_read_lock(mem_lock);
+
+       if (rte_memseg_walk(fslmc_dmamap_seg, &i) < 0) {
+               rte_rwlock_read_unlock(mem_lock);
                return -1;
        }
+
+       ret = rte_mem_event_callback_register("fslmc_memevent_clb",
+                       fslmc_memevent_cb, NULL);
+       if (ret && rte_errno == ENOTSUP)
+               DPAA2_BUS_DEBUG("Memory event callbacks not supported");
+       else if (ret)
+               DPAA2_BUS_DEBUG("Unable to install memory handler");
+       else
+               DPAA2_BUS_DEBUG("Installed memory callback handler");
+
        DPAA2_BUS_DEBUG("Total %d segments found.", i);
 
        /* TODO - This is a W.A. as VFIO currently does not add the mapping of
@@ -254,7 +375,10 @@ int rte_fslmc_vfio_dmamap(void)
         */
        vfio_map_irq_region(&vfio_group);
 
-       is_dma_done = 1;
+       /* Existing segments have been mapped and memory callback for hotplug
+        * has been installed.
+        */
+       rte_rwlock_read_unlock(mem_lock);
 
        return 0;
 }
@@ -289,9 +413,6 @@ static int64_t vfio_map_mcp_obj(struct fslmc_vfio_group *group, char *mcp_obj)
                goto MC_FAILURE;
        }
 
-       DPAA2_BUS_DEBUG("Region offset = %llx  , region size = %llx",
-                       reg_info.offset, reg_info.size);
-
        v_addr = (size_t)mmap(NULL, reg_info.size,
                PROT_WRITE | PROT_READ, MAP_SHARED,
                mc_fd, reg_info.offset);
@@ -451,46 +572,47 @@ fslmc_process_iodevices(struct rte_dpaa2_device *dev)
                break;
        }
 
-       DPAA2_BUS_DEBUG("Device (%s) abstracted from VFIO",
-                       dev->device.name);
+       DPAA2_BUS_LOG(DEBUG, "Device (%s) abstracted from VFIO",
+                     dev->device.name);
        return 0;
 }
 
 static int
 fslmc_process_mcp(struct rte_dpaa2_device *dev)
 {
+       int ret;
        intptr_t v_addr;
-       char *dev_name;
+       char *dev_name = NULL;
        struct fsl_mc_io dpmng  = {0};
        struct mc_version mc_ver_info = {0};
 
        rte_mcp_ptr_list = malloc(sizeof(void *) * 1);
        if (!rte_mcp_ptr_list) {
                DPAA2_BUS_ERR("Unable to allocate MC portal memory");
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto cleanup;
        }
 
        dev_name = strdup(dev->device.name);
        if (!dev_name) {
                DPAA2_BUS_ERR("Unable to allocate MC device name memory");
-               free(rte_mcp_ptr_list);
-               rte_mcp_ptr_list = NULL;
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto cleanup;
        }
 
        v_addr = vfio_map_mcp_obj(&vfio_group, dev_name);
        if (v_addr == (intptr_t)MAP_FAILED) {
                DPAA2_BUS_ERR("Error mapping region (errno = %d)", errno);
-               free(rte_mcp_ptr_list);
-               rte_mcp_ptr_list = NULL;
-               return -1;
+               ret = -1;
+               goto cleanup;
        }
 
        /* check the MC version compatibility */
        dpmng.regs = (void *)v_addr;
        if (mc_get_version(&dpmng, CMD_PRI_LOW, &mc_ver_info)) {
                DPAA2_BUS_ERR("Unable to obtain MC version");
-               return -1;
+               ret = -1;
+               goto cleanup;
        }
 
        if ((mc_ver_info.major != MC_VER_MAJOR) ||
@@ -500,13 +622,24 @@ fslmc_process_mcp(struct rte_dpaa2_device *dev)
                              MC_VER_MAJOR, MC_VER_MINOR,
                              mc_ver_info.major, mc_ver_info.minor,
                              mc_ver_info.revision);
-               free(rte_mcp_ptr_list);
-               rte_mcp_ptr_list = NULL;
-               return -1;
+               ret = -1;
+               goto cleanup;
        }
        rte_mcp_ptr_list[0] = (void *)v_addr;
 
+       free(dev_name);
        return 0;
+
+cleanup:
+       if (dev_name)
+               free(dev_name);
+
+       if (rte_mcp_ptr_list) {
+               free(rte_mcp_ptr_list);
+               rte_mcp_ptr_list = NULL;
+       }
+
+       return ret;
 }
 
 int
@@ -543,12 +676,10 @@ fslmc_vfio_process_group(void)
        }
 
        TAILQ_FOREACH_SAFE(dev, &rte_fslmc_bus.device_list, next, dev_temp) {
-               if (!dev)
-                       break;
-
                switch (dev->dev_type) {
                case DPAA2_ETH:
                case DPAA2_CRYPTO:
+               case DPAA2_QDMA:
                        ret = fslmc_process_iodevices(dev);
                        if (ret) {
                                DPAA2_BUS_DEBUG("Dev (%s) init failed",
@@ -615,7 +746,7 @@ fslmc_vfio_setup_group(void)
        }
 
        /* Get the actual group fd */
-       ret = vfio_get_group_fd(groupid);
+       ret = rte_vfio_get_group_fd(groupid);
        if (ret < 0)
                return ret;
        vfio_group.fd = ret;