vfio: remove deprecated DMA mapping functions
[dpdk.git] / lib / librte_eal / linux / eal / eal.c
index 03d5e71..9e2d50c 100644 (file)
@@ -32,7 +32,6 @@
 #include <rte_memory.h>
 #include <rte_launch.h>
 #include <rte_eal.h>
-#include <rte_eal_memconfig.h>
 #include <rte_errno.h>
 #include <rte_per_lcore.h>
 #include <rte_lcore.h>
@@ -66,6 +65,8 @@
 
 #define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10)
 
+#define KERNEL_IOMMU_GROUPS_PATH "/sys/kernel/iommu_groups"
+
 /* Allow the application to print its usage message too if set */
 static rte_usage_hook_t        rte_application_usage_hook = NULL;
 
@@ -305,7 +306,10 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val)
 static int
 rte_eal_config_create(void)
 {
-       void *rte_mem_cfg_addr;
+       size_t page_sz = sysconf(_SC_PAGE_SIZE);
+       size_t cfg_len = sizeof(*rte_config.mem_config);
+       size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz);
+       void *rte_mem_cfg_addr, *mapped_mem_cfg_addr;
        int retval;
 
        const char *pathname = eal_runtime_config_path();
@@ -317,7 +321,7 @@ rte_eal_config_create(void)
        if (internal_config.base_virtaddr != 0)
                rte_mem_cfg_addr = (void *)
                        RTE_ALIGN_FLOOR(internal_config.base_virtaddr -
-                       sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE));
+                       sizeof(struct rte_mem_config), page_sz);
        else
                rte_mem_cfg_addr = NULL;
 
@@ -330,7 +334,7 @@ rte_eal_config_create(void)
                }
        }
 
-       retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
+       retval = ftruncate(mem_cfg_fd, cfg_len);
        if (retval < 0){
                close(mem_cfg_fd);
                mem_cfg_fd = -1;
@@ -348,13 +352,25 @@ rte_eal_config_create(void)
                return -1;
        }
 
-       rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config),
-                               PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
+       /* reserve space for config */
+       rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr,
+                       &cfg_len_aligned, page_sz, 0, 0);
+       if (rte_mem_cfg_addr == NULL) {
+               RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n");
+               close(mem_cfg_fd);
+               mem_cfg_fd = -1;
+               return -1;
+       }
 
-       if (rte_mem_cfg_addr == MAP_FAILED){
+       /* remap the actual file into the space we've just reserved */
+       mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr,
+                       cfg_len_aligned, PROT_READ | PROT_WRITE,
+                       MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0);
+       if (mapped_mem_cfg_addr == MAP_FAILED) {
+               munmap(rte_mem_cfg_addr, cfg_len);
                close(mem_cfg_fd);
                mem_cfg_fd = -1;
-               RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n");
+               RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n");
                return -1;
        }
 
@@ -434,8 +450,9 @@ rte_eal_config_reattach(void)
                if (mem_config != MAP_FAILED) {
                        /* errno is stale, don't use */
                        RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config at [%p], got [%p]"
-                               " - please use '--base-virtaddr' option\n",
-                               rte_mem_cfg_addr, mem_config);
+                               " - please use '--" OPT_BASE_VIRTADDR
+                               "' option\n", rte_mem_cfg_addr, mem_config);
+                       munmap(mem_config, sizeof(struct rte_mem_config));
                        return -1;
                }
                RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n",
@@ -473,24 +490,6 @@ eal_proc_type_detect(void)
        return ptype;
 }
 
-/* copies data from internal config to shared config */
-static void
-eal_update_mem_config(void)
-{
-       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-       mcfg->legacy_mem = internal_config.legacy_mem;
-       mcfg->single_file_segments = internal_config.single_file_segments;
-}
-
-/* copies data from shared config to internal config */
-static void
-eal_update_internal_config(void)
-{
-       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-       internal_config.legacy_mem = mcfg->legacy_mem;
-       internal_config.single_file_segments = mcfg->single_file_segments;
-}
-
 /* Sets up rte_config structure with the pointer to shared memory config.*/
 static int
 rte_config_init(void)
@@ -501,15 +500,19 @@ rte_config_init(void)
        case RTE_PROC_PRIMARY:
                if (rte_eal_config_create() < 0)
                        return -1;
-               eal_update_mem_config();
+               eal_mcfg_update_from_internal();
                break;
        case RTE_PROC_SECONDARY:
                if (rte_eal_config_attach() < 0)
                        return -1;
                eal_mcfg_wait_complete();
+               if (eal_mcfg_check_version() < 0) {
+                       RTE_LOG(ERR, EAL, "Primary and secondary process DPDK version mismatch\n");
+                       return -1;
+               }
                if (rte_eal_config_reattach() < 0)
                        return -1;
-               eal_update_internal_config();
+               eal_mcfg_update_internal();
                break;
        case RTE_PROC_AUTO:
        case RTE_PROC_INVALID:
@@ -551,7 +554,6 @@ eal_usage(const char *prgname)
               "  --"OPT_SOCKET_LIMIT"      Limit memory allocation on sockets (comma separated values)\n"
               "  --"OPT_HUGE_DIR"          Directory where hugetlbfs is mounted\n"
               "  --"OPT_FILE_PREFIX"       Prefix for hugepage filenames\n"
-              "  --"OPT_BASE_VIRTADDR"     Base virtual address\n"
               "  --"OPT_CREATE_UIO_DEV"    Create /dev/uioX (usually done by hotplug)\n"
               "  --"OPT_VFIO_INTR"         Interrupt mode for VFIO (legacy|msi|msix)\n"
               "  --"OPT_LEGACY_MEM"        Legacy memory mode (no dynamic allocation, contiguous segments)\n"
@@ -623,35 +625,6 @@ eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg)
        return 0;
 }
 
-static int
-eal_parse_base_virtaddr(const char *arg)
-{
-       char *end;
-       uint64_t addr;
-
-       errno = 0;
-       addr = strtoull(arg, &end, 16);
-
-       /* check for errors */
-       if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0'))
-               return -1;
-
-       /* make sure we don't exceed 32-bit boundary on 32-bit target */
-#ifndef RTE_ARCH_64
-       if (addr >= UINTPTR_MAX)
-               return -1;
-#endif
-
-       /* align the addr on 16M boundary, 16MB is the minimum huge page
-        * size on IBM Power architecture. If the addr is aligned to 16MB,
-        * it can align to 2MB for x86. So this alignment can also be used
-        * on x86 */
-       internal_config.base_virtaddr =
-               RTE_PTR_ALIGN_CEIL((uintptr_t)addr, (size_t)RTE_PGSIZE_16M);
-
-       return 0;
-}
-
 static int
 eal_parse_vfio_intr(const char *mode)
 {
@@ -810,16 +783,6 @@ eal_parse_args(int argc, char **argv)
                        internal_config.force_socket_limits = 1;
                        break;
 
-               case OPT_BASE_VIRTADDR_NUM:
-                       if (eal_parse_base_virtaddr(optarg) < 0) {
-                               RTE_LOG(ERR, EAL, "invalid parameter for --"
-                                               OPT_BASE_VIRTADDR "\n");
-                               eal_usage(prgname);
-                               ret = -1;
-                               goto out;
-                       }
-                       break;
-
                case OPT_VFIO_INTR_NUM:
                        if (eal_parse_vfio_intr(optarg) < 0) {
                                RTE_LOG(ERR, EAL, "invalid parameters for --"
@@ -934,16 +897,6 @@ sync_func(__attribute__((unused)) void *arg)
        return 0;
 }
 
-inline static void
-rte_eal_mcfg_complete(void)
-{
-       /* ALL shared mem_config related INIT DONE */
-       if (rte_config.process_type == RTE_PROC_PRIMARY)
-               rte_config.mem_config->magic = RTE_MAGIC;
-
-       internal_config.init_complete = 1;
-}
-
 /*
  * Request iopl privilege for all RPL, returns 0 on success
  * iopl() call is mostly for the i386 architecture. For other architectures,
@@ -975,6 +928,33 @@ static void rte_eal_init_alert(const char *msg)
        RTE_LOG(ERR, EAL, "%s\n", msg);
 }
 
+/*
+ * On Linux 3.6+, even if VFIO is not loaded, whenever IOMMU is enabled in the
+ * BIOS and in the kernel, /sys/kernel/iommu_groups path will contain kernel
+ * IOMMU groups. If IOMMU is not enabled, that path would be empty.
+ * Therefore, checking if the path is empty will tell us if IOMMU is enabled.
+ */
+static bool
+is_iommu_enabled(void)
+{
+       DIR *dir = opendir(KERNEL_IOMMU_GROUPS_PATH);
+       struct dirent *d;
+       int n = 0;
+
+       /* if directory doesn't exist, assume IOMMU is not enabled */
+       if (dir == NULL)
+               return false;
+
+       while ((d = readdir(dir)) != NULL) {
+               /* skip dot and dot-dot */
+               if (++n > 2)
+                       break;
+       }
+       closedir(dir);
+
+       return n > 2;
+}
+
 /* Launch threads, called at application init(). */
 int
 rte_eal_init(int argc, char **argv)
@@ -1085,10 +1065,25 @@ rte_eal_init(int argc, char **argv)
                enum rte_iova_mode iova_mode = rte_bus_get_iommu_class();
 
                if (iova_mode == RTE_IOVA_DC) {
-                       iova_mode = phys_addrs ? RTE_IOVA_PA : RTE_IOVA_VA;
-                       RTE_LOG(DEBUG, EAL,
-                               "Buses did not request a specific IOVA mode, using '%s' based on physical addresses availability.\n",
-                               phys_addrs ? "PA" : "VA");
+                       RTE_LOG(DEBUG, EAL, "Buses did not request a specific IOVA mode.\n");
+
+                       if (!phys_addrs) {
+                               /* if we have no access to physical addresses,
+                                * pick IOVA as VA mode.
+                                */
+                               iova_mode = RTE_IOVA_VA;
+                               RTE_LOG(DEBUG, EAL, "Physical addresses are unavailable, selecting IOVA as VA mode.\n");
+                       } else if (is_iommu_enabled()) {
+                               /* we have an IOMMU, pick IOVA as VA mode */
+                               iova_mode = RTE_IOVA_VA;
+                               RTE_LOG(DEBUG, EAL, "IOMMU is available, selecting IOVA as VA mode.\n");
+                       } else {
+                               /* physical addresses available, and no IOMMU
+                                * found, so pick IOVA as PA.
+                                */
+                               iova_mode = RTE_IOVA_PA;
+                               RTE_LOG(DEBUG, EAL, "IOMMU is not available, selecting IOVA as PA mode.\n");
+                       }
                }
 #ifdef RTE_LIBRTE_KNI
                /* Workaround for KNI which requires physical address to work */
@@ -1289,7 +1284,7 @@ rte_eal_init(int argc, char **argv)
                return -1;
        }
 
-       rte_eal_mcfg_complete();
+       eal_mcfg_complete();
 
        /* Call each registered callback, if enabled */
        rte_option_init();
@@ -1328,13 +1323,6 @@ rte_eal_cleanup(void)
        return 0;
 }
 
-/* get core role */
-enum rte_lcore_role_t
-rte_eal_lcore_role(unsigned lcore_id)
-{
-       return rte_config.lcore_role[lcore_id];
-}
-
 enum rte_proc_type_t
 rte_eal_process_type(void)
 {