X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_eal%2Flinux%2Feal%2Feal.c;h=9e2d50cfbac5b36c3ab477a79d67c3a546117948;hb=b14d192ca1fc7575f415f363990c202cece84e28;hp=03d5e7170559cefa11cf73add7b174efcfde14a6;hpb=a08a5dd20e51100792a5bf55b33088837055520c;p=dpdk.git diff --git a/lib/librte_eal/linux/eal/eal.c b/lib/librte_eal/linux/eal/eal.c index 03d5e71705..9e2d50cfba 100644 --- a/lib/librte_eal/linux/eal/eal.c +++ b/lib/librte_eal/linux/eal/eal.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -66,6 +65,8 @@ #define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10) +#define KERNEL_IOMMU_GROUPS_PATH "/sys/kernel/iommu_groups" + /* Allow the application to print its usage message too if set */ static rte_usage_hook_t rte_application_usage_hook = NULL; @@ -305,7 +306,10 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) static int rte_eal_config_create(void) { - void *rte_mem_cfg_addr; + size_t page_sz = sysconf(_SC_PAGE_SIZE); + size_t cfg_len = sizeof(*rte_config.mem_config); + size_t cfg_len_aligned = RTE_ALIGN(cfg_len, page_sz); + void *rte_mem_cfg_addr, *mapped_mem_cfg_addr; int retval; const char *pathname = eal_runtime_config_path(); @@ -317,7 +321,7 @@ rte_eal_config_create(void) if (internal_config.base_virtaddr != 0) rte_mem_cfg_addr = (void *) RTE_ALIGN_FLOOR(internal_config.base_virtaddr - - sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE)); + sizeof(struct rte_mem_config), page_sz); else rte_mem_cfg_addr = NULL; @@ -330,7 +334,7 @@ rte_eal_config_create(void) } } - retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + retval = ftruncate(mem_cfg_fd, cfg_len); if (retval < 0){ close(mem_cfg_fd); mem_cfg_fd = -1; @@ -348,13 +352,25 @@ rte_eal_config_create(void) return -1; } - rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); + /* reserve space for config */ + rte_mem_cfg_addr = eal_get_virtual_area(rte_mem_cfg_addr, + &cfg_len_aligned, page_sz, 0, 0); + if (rte_mem_cfg_addr == NULL) { + RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); + close(mem_cfg_fd); + mem_cfg_fd = -1; + return -1; + } - if (rte_mem_cfg_addr == MAP_FAILED){ + /* remap the actual file into the space we've just reserved */ + mapped_mem_cfg_addr = mmap(rte_mem_cfg_addr, + cfg_len_aligned, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, mem_cfg_fd, 0); + if (mapped_mem_cfg_addr == MAP_FAILED) { + munmap(rte_mem_cfg_addr, cfg_len); close(mem_cfg_fd); mem_cfg_fd = -1; - RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config\n"); + RTE_LOG(ERR, EAL, "Cannot remap memory for rte_config\n"); return -1; } @@ -434,8 +450,9 @@ rte_eal_config_reattach(void) if (mem_config != MAP_FAILED) { /* errno is stale, don't use */ RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config at [%p], got [%p]" - " - please use '--base-virtaddr' option\n", - rte_mem_cfg_addr, mem_config); + " - please use '--" OPT_BASE_VIRTADDR + "' option\n", rte_mem_cfg_addr, mem_config); + munmap(mem_config, sizeof(struct rte_mem_config)); return -1; } RTE_LOG(ERR, EAL, "Cannot mmap memory for rte_config! error %i (%s)\n", @@ -473,24 +490,6 @@ eal_proc_type_detect(void) return ptype; } -/* copies data from internal config to shared config */ -static void -eal_update_mem_config(void) -{ - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - mcfg->legacy_mem = internal_config.legacy_mem; - mcfg->single_file_segments = internal_config.single_file_segments; -} - -/* copies data from shared config to internal config */ -static void -eal_update_internal_config(void) -{ - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - internal_config.legacy_mem = mcfg->legacy_mem; - internal_config.single_file_segments = mcfg->single_file_segments; -} - /* Sets up rte_config structure with the pointer to shared memory config.*/ static int rte_config_init(void) @@ -501,15 +500,19 @@ rte_config_init(void) case RTE_PROC_PRIMARY: if (rte_eal_config_create() < 0) return -1; - eal_update_mem_config(); + eal_mcfg_update_from_internal(); break; case RTE_PROC_SECONDARY: if (rte_eal_config_attach() < 0) return -1; eal_mcfg_wait_complete(); + if (eal_mcfg_check_version() < 0) { + RTE_LOG(ERR, EAL, "Primary and secondary process DPDK version mismatch\n"); + return -1; + } if (rte_eal_config_reattach() < 0) return -1; - eal_update_internal_config(); + eal_mcfg_update_internal(); break; case RTE_PROC_AUTO: case RTE_PROC_INVALID: @@ -551,7 +554,6 @@ eal_usage(const char *prgname) " --"OPT_SOCKET_LIMIT" Limit memory allocation on sockets (comma separated values)\n" " --"OPT_HUGE_DIR" Directory where hugetlbfs is mounted\n" " --"OPT_FILE_PREFIX" Prefix for hugepage filenames\n" - " --"OPT_BASE_VIRTADDR" Base virtual address\n" " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n" " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" " --"OPT_LEGACY_MEM" Legacy memory mode (no dynamic allocation, contiguous segments)\n" @@ -623,35 +625,6 @@ eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg) return 0; } -static int -eal_parse_base_virtaddr(const char *arg) -{ - char *end; - uint64_t addr; - - errno = 0; - addr = strtoull(arg, &end, 16); - - /* check for errors */ - if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0')) - return -1; - - /* make sure we don't exceed 32-bit boundary on 32-bit target */ -#ifndef RTE_ARCH_64 - if (addr >= UINTPTR_MAX) - return -1; -#endif - - /* align the addr on 16M boundary, 16MB is the minimum huge page - * size on IBM Power architecture. If the addr is aligned to 16MB, - * it can align to 2MB for x86. So this alignment can also be used - * on x86 */ - internal_config.base_virtaddr = - RTE_PTR_ALIGN_CEIL((uintptr_t)addr, (size_t)RTE_PGSIZE_16M); - - return 0; -} - static int eal_parse_vfio_intr(const char *mode) { @@ -810,16 +783,6 @@ eal_parse_args(int argc, char **argv) internal_config.force_socket_limits = 1; break; - case OPT_BASE_VIRTADDR_NUM: - if (eal_parse_base_virtaddr(optarg) < 0) { - RTE_LOG(ERR, EAL, "invalid parameter for --" - OPT_BASE_VIRTADDR "\n"); - eal_usage(prgname); - ret = -1; - goto out; - } - break; - case OPT_VFIO_INTR_NUM: if (eal_parse_vfio_intr(optarg) < 0) { RTE_LOG(ERR, EAL, "invalid parameters for --" @@ -934,16 +897,6 @@ sync_func(__attribute__((unused)) void *arg) return 0; } -inline static void -rte_eal_mcfg_complete(void) -{ - /* ALL shared mem_config related INIT DONE */ - if (rte_config.process_type == RTE_PROC_PRIMARY) - rte_config.mem_config->magic = RTE_MAGIC; - - internal_config.init_complete = 1; -} - /* * Request iopl privilege for all RPL, returns 0 on success * iopl() call is mostly for the i386 architecture. For other architectures, @@ -975,6 +928,33 @@ static void rte_eal_init_alert(const char *msg) RTE_LOG(ERR, EAL, "%s\n", msg); } +/* + * On Linux 3.6+, even if VFIO is not loaded, whenever IOMMU is enabled in the + * BIOS and in the kernel, /sys/kernel/iommu_groups path will contain kernel + * IOMMU groups. If IOMMU is not enabled, that path would be empty. + * Therefore, checking if the path is empty will tell us if IOMMU is enabled. + */ +static bool +is_iommu_enabled(void) +{ + DIR *dir = opendir(KERNEL_IOMMU_GROUPS_PATH); + struct dirent *d; + int n = 0; + + /* if directory doesn't exist, assume IOMMU is not enabled */ + if (dir == NULL) + return false; + + while ((d = readdir(dir)) != NULL) { + /* skip dot and dot-dot */ + if (++n > 2) + break; + } + closedir(dir); + + return n > 2; +} + /* Launch threads, called at application init(). */ int rte_eal_init(int argc, char **argv) @@ -1085,10 +1065,25 @@ rte_eal_init(int argc, char **argv) enum rte_iova_mode iova_mode = rte_bus_get_iommu_class(); if (iova_mode == RTE_IOVA_DC) { - iova_mode = phys_addrs ? RTE_IOVA_PA : RTE_IOVA_VA; - RTE_LOG(DEBUG, EAL, - "Buses did not request a specific IOVA mode, using '%s' based on physical addresses availability.\n", - phys_addrs ? "PA" : "VA"); + RTE_LOG(DEBUG, EAL, "Buses did not request a specific IOVA mode.\n"); + + if (!phys_addrs) { + /* if we have no access to physical addresses, + * pick IOVA as VA mode. + */ + iova_mode = RTE_IOVA_VA; + RTE_LOG(DEBUG, EAL, "Physical addresses are unavailable, selecting IOVA as VA mode.\n"); + } else if (is_iommu_enabled()) { + /* we have an IOMMU, pick IOVA as VA mode */ + iova_mode = RTE_IOVA_VA; + RTE_LOG(DEBUG, EAL, "IOMMU is available, selecting IOVA as VA mode.\n"); + } else { + /* physical addresses available, and no IOMMU + * found, so pick IOVA as PA. + */ + iova_mode = RTE_IOVA_PA; + RTE_LOG(DEBUG, EAL, "IOMMU is not available, selecting IOVA as PA mode.\n"); + } } #ifdef RTE_LIBRTE_KNI /* Workaround for KNI which requires physical address to work */ @@ -1289,7 +1284,7 @@ rte_eal_init(int argc, char **argv) return -1; } - rte_eal_mcfg_complete(); + eal_mcfg_complete(); /* Call each registered callback, if enabled */ rte_option_init(); @@ -1328,13 +1323,6 @@ rte_eal_cleanup(void) return 0; } -/* get core role */ -enum rte_lcore_role_t -rte_eal_lcore_role(unsigned lcore_id) -{ - return rte_config.lcore_role[lcore_id]; -} - enum rte_proc_type_t rte_eal_process_type(void) {