From: Bruce Richardson Date: Wed, 12 Feb 2014 13:38:45 +0000 (+0000) Subject: mem: allow virtual memory address hinting X-Git-Tag: spdx-start~11036 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=8ea9ff83;p=dpdk.git mem: allow virtual memory address hinting For multi-process applications, it can sometimes occur that part of the address ranges used for memory mapping in the primary process are not free in the secondary process, which causes the secondary processes to abort on startup. This patch adds in a memory hinting mechanism, where you can hint a starting base address to the primary process for where you would like the hugepage memory to be mapped. It is just a hint, so the memory will not always go exactly where requested, but it should allow the memory addresses used by a primary process to be adjusted up or down a little, thereby fixing issues with secondary process startup. Signed-off-by: Bruce Richardson --- diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index b5e755fc8b..7a32794f36 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -91,6 +91,7 @@ #define OPT_SOCKET_MEM "socket-mem" #define OPT_USE_DEVICE "use-device" #define OPT_SYSLOG "syslog" +#define OPT_BASE_VIRTADDR "base-virtaddr" #define RTE_EAL_BLACKLIST_SIZE 0x100 @@ -309,13 +310,13 @@ eal_hugedirs_unlock(void) for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) { /* skip uninitialized */ - if (internal_config.hugepage_info[i].lock_descriptor == 0) + if (internal_config.hugepage_info[i].lock_descriptor < 0) continue; /* unlock hugepage file */ flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN); close(internal_config.hugepage_info[i].lock_descriptor); /* reset the field */ - internal_config.hugepage_info[i].lock_descriptor = 0; + internal_config.hugepage_info[i].lock_descriptor = -1; } } @@ -345,6 +346,7 @@ eal_usage(const char *prgname) " [NOTE: Cannot be used with -b option]\n" " --"OPT_VMWARE_TSC_MAP": use VMware TSC map instead of " "native RDTSC\n" + " --"OPT_BASE_VIRTADDR": specify base virtual address\n" "\nEAL options for DEBUG use only:\n" " --"OPT_NO_HUGE" : use malloc instead of hugetlbfs\n" " --"OPT_NO_PCI" : disable pci\n" @@ -530,6 +532,31 @@ eal_parse_socket_mem(char *socket_mem) return 0; } +static int +eal_parse_base_virtaddr(const char *arg) +{ + char *end; + uint64_t addr; + + addr = strtoull(arg, &end, 16); + + /* check for errors */ + if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0')) + return -1; + + /* make sure we don't exceed 32-bit boundary on 32-bit target */ +#ifndef RTE_ARCH_X86_64 + if (addr >= UINTPTR_MAX) + return -1; +#endif + + /* align the addr on 2M boundary */ + addr = RTE_PTR_ALIGN_CEIL(addr, RTE_PGSIZE_2M); + + internal_config.base_virtaddr = (uintptr_t) addr; + return 0; +} + static inline size_t eal_get_hugepage_mem_size(void) { @@ -599,6 +626,7 @@ eal_parse_args(int argc, char **argv) {OPT_SOCKET_MEM, 1, 0, 0}, {OPT_USE_DEVICE, 1, 0, 0}, {OPT_SYSLOG, 1, NULL, 0}, + {OPT_BASE_VIRTADDR, 1, 0, 0}, {0, 0, 0, 0} }; @@ -622,9 +650,10 @@ eal_parse_args(int argc, char **argv) /* zero out hugedir descriptors */ for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) - internal_config.hugepage_info[i].lock_descriptor = 0; + internal_config.hugepage_info[i].lock_descriptor = -1; internal_config.vmware_tsc_map = 0; + internal_config.base_virtaddr = 0; while ((opt = getopt_long(argc, argvopt, "b:c:m:n:r:v", lgopts, &option_index)) != EOF) { @@ -725,6 +754,14 @@ eal_parse_args(int argc, char **argv) return -1; } } + else if (!strcmp(lgopts[option_index].name, OPT_BASE_VIRTADDR)) { + if (eal_parse_base_virtaddr(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameter for --" + OPT_BASE_VIRTADDR "\n"); + eal_usage(prgname); + return -1; + } + } break; default: diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 4bd8987cab..2eb4617546 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -109,6 +109,7 @@ * zone as well as a physical contiguous zone. */ +static uint64_t baseaddr_offset; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" @@ -156,7 +157,13 @@ get_virtual_area(size_t *size, size_t hugepage_sz) int fd; long aligned_addr; - RTE_LOG(INFO, EAL, "Ask a virtual area of 0x%zu bytes\n", *size); + if (internal_config.base_virtaddr != 0) { + addr = (void*) (uintptr_t) (internal_config.base_virtaddr + + baseaddr_offset); + } + else addr = NULL; + + RTE_LOG(INFO, EAL, "Ask a virtual area of 0x%zx bytes\n", *size); fd = open("/dev/zero", O_RDONLY); if (fd < 0){ @@ -164,7 +171,8 @@ get_virtual_area(size_t *size, size_t hugepage_sz) return NULL; } do { - addr = mmap(NULL, (*size) + hugepage_sz, PROT_READ, MAP_PRIVATE, fd, 0); + addr = mmap(addr, + (*size) + hugepage_sz, PROT_READ, MAP_PRIVATE, fd, 0); if (addr == MAP_FAILED) *size -= hugepage_sz; } while (addr == MAP_FAILED && *size > 0); @@ -187,6 +195,9 @@ get_virtual_area(size_t *size, size_t hugepage_sz) RTE_LOG(INFO, EAL, "Virtual area found at %p (size = 0x%zx)\n", addr, *size); + /* increment offset */ + baseaddr_offset += *size; + return addr; } diff --git a/lib/librte_eal/linuxapp/eal/include/eal_internal_cfg.h b/lib/librte_eal/linuxapp/eal/include/eal_internal_cfg.h index 5cacfdc149..0a2eddda5e 100644 --- a/lib/librte_eal/linuxapp/eal/include/eal_internal_cfg.h +++ b/lib/librte_eal/linuxapp/eal/include/eal_internal_cfg.h @@ -71,7 +71,8 @@ struct internal_config { volatile enum rte_proc_type_t process_type; /* multi-process proc type */ /* true to try allocating memory on specific sockets */ volatile unsigned force_sockets; - volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket*/ + volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */ + uintptr_t base_virtaddr; /**< base address to try and reserve memory from */ volatile int syslog_facility; /**< facility passed to openlog() */ const char *hugefile_prefix; /**< the base filename of hugetlbfs files */ const char *hugepage_dir; /**< specific hugetlbfs directory to use */