mem: allow virtual memory address hinting
authorBruce Richardson <bruce.richardson@intel.com>
Wed, 12 Feb 2014 13:38:45 +0000 (13:38 +0000)
committerDavid Marchand <david.marchand@6wind.com>
Tue, 25 Feb 2014 20:29:18 +0000 (21:29 +0100)
For multi-process applications, it can sometimes occur that part of the
address ranges used for memory mapping in the primary process are not
free in the secondary process, which causes the secondary processes to
abort on startup.
This patch adds in a memory hinting mechanism, where you can hint a
starting base address to the primary process for where you would like
the hugepage memory to be mapped. It is just a hint, so the memory will
not always go exactly where requested, but it should allow the memory
addresses used by a primary process to be adjusted up or down a little,
thereby fixing issues with secondary process startup.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
lib/librte_eal/linuxapp/eal/eal.c
lib/librte_eal/linuxapp/eal/eal_memory.c
lib/librte_eal/linuxapp/eal/include/eal_internal_cfg.h

index b5e755f..7a32794 100644 (file)
@@ -91,6 +91,7 @@
 #define OPT_SOCKET_MEM  "socket-mem"
 #define OPT_USE_DEVICE  "use-device"
 #define OPT_SYSLOG      "syslog"
+#define OPT_BASE_VIRTADDR   "base-virtaddr"
 
 #define RTE_EAL_BLACKLIST_SIZE 0x100
 
@@ -309,13 +310,13 @@ eal_hugedirs_unlock(void)
        for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
        {
                /* skip uninitialized */
-               if (internal_config.hugepage_info[i].lock_descriptor == 0)
+               if (internal_config.hugepage_info[i].lock_descriptor < 0)
                        continue;
                /* unlock hugepage file */
                flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN);
                close(internal_config.hugepage_info[i].lock_descriptor);
                /* reset the field */
-               internal_config.hugepage_info[i].lock_descriptor = 0;
+               internal_config.hugepage_info[i].lock_descriptor = -1;
        }
 }
 
@@ -345,6 +346,7 @@ eal_usage(const char *prgname)
               "               [NOTE: Cannot be used with -b option]\n"
               "  --"OPT_VMWARE_TSC_MAP": use VMware TSC map instead of "
                           "native RDTSC\n"
+              "  --"OPT_BASE_VIRTADDR": specify base virtual address\n"
               "\nEAL options for DEBUG use only:\n"
               "  --"OPT_NO_HUGE"  : use malloc instead of hugetlbfs\n"
               "  --"OPT_NO_PCI"   : disable pci\n"
@@ -530,6 +532,31 @@ eal_parse_socket_mem(char *socket_mem)
        return 0;
 }
 
+static int
+eal_parse_base_virtaddr(const char *arg)
+{
+       char *end;
+       uint64_t addr;
+
+       addr = strtoull(arg, &end, 16);
+
+       /* check for errors */
+       if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0'))
+               return -1;
+
+       /* make sure we don't exceed 32-bit boundary on 32-bit target */
+#ifndef RTE_ARCH_X86_64
+       if (addr >= UINTPTR_MAX)
+               return -1;
+#endif
+
+       /* align the addr on 2M boundary */
+       addr = RTE_PTR_ALIGN_CEIL(addr, RTE_PGSIZE_2M);
+
+       internal_config.base_virtaddr = (uintptr_t) addr;
+       return 0;
+}
+
 static inline size_t
 eal_get_hugepage_mem_size(void)
 {
@@ -599,6 +626,7 @@ eal_parse_args(int argc, char **argv)
                {OPT_SOCKET_MEM, 1, 0, 0},
                {OPT_USE_DEVICE, 1, 0, 0},
                {OPT_SYSLOG, 1, NULL, 0},
+               {OPT_BASE_VIRTADDR, 1, 0, 0},
                {0, 0, 0, 0}
        };
 
@@ -622,9 +650,10 @@ eal_parse_args(int argc, char **argv)
 
        /* zero out hugedir descriptors */
        for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
-               internal_config.hugepage_info[i].lock_descriptor = 0;
+               internal_config.hugepage_info[i].lock_descriptor = -1;
 
        internal_config.vmware_tsc_map = 0;
+       internal_config.base_virtaddr = 0;
 
        while ((opt = getopt_long(argc, argvopt, "b:c:m:n:r:v",
                                  lgopts, &option_index)) != EOF) {
@@ -725,6 +754,14 @@ eal_parse_args(int argc, char **argv)
                                        return -1;
                                }
                        }
+                       else if (!strcmp(lgopts[option_index].name, OPT_BASE_VIRTADDR)) {
+                               if (eal_parse_base_virtaddr(optarg) < 0) {
+                                       RTE_LOG(ERR, EAL, "invalid parameter for --"
+                                                       OPT_BASE_VIRTADDR "\n");
+                                       eal_usage(prgname);
+                                       return -1;
+                               }
+                       }
                        break;
 
                default:
index 4bd8987..2eb4617 100644 (file)
  * zone as well as a physical contiguous zone.
  */
 
+static uint64_t baseaddr_offset;
 
 #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
 
@@ -156,7 +157,13 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
        int fd;
        long aligned_addr;
 
-       RTE_LOG(INFO, EAL, "Ask a virtual area of 0x%zu bytes\n", *size);
+       if (internal_config.base_virtaddr != 0) {
+               addr = (void*) (uintptr_t) (internal_config.base_virtaddr +
+                               baseaddr_offset);
+       }
+       else addr = NULL;
+
+       RTE_LOG(INFO, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
 
        fd = open("/dev/zero", O_RDONLY);
        if (fd < 0){
@@ -164,7 +171,8 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
                return NULL;
        }
        do {
-               addr = mmap(NULL, (*size) + hugepage_sz, PROT_READ, MAP_PRIVATE, fd, 0);
+               addr = mmap(addr,
+                               (*size) + hugepage_sz, PROT_READ, MAP_PRIVATE, fd, 0);
                if (addr == MAP_FAILED)
                        *size -= hugepage_sz;
        } while (addr == MAP_FAILED && *size > 0);
@@ -187,6 +195,9 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
        RTE_LOG(INFO, EAL, "Virtual area found at %p (size = 0x%zx)\n",
                addr, *size);
 
+       /* increment offset */
+       baseaddr_offset += *size;
+
        return addr;
 }
 
index 5cacfdc..0a2eddd 100644 (file)
@@ -71,7 +71,8 @@ struct internal_config {
        volatile enum rte_proc_type_t process_type; /* multi-process proc type */
        /* true to try allocating memory on specific sockets */
        volatile unsigned force_sockets;
-       volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket*/
+       volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */
+       uintptr_t base_virtaddr;          /**< base address to try and reserve memory from */
        volatile int syslog_facility;     /**< facility passed to openlog() */
        const char *hugefile_prefix;      /**< the base filename of hugetlbfs files */
        const char *hugepage_dir;         /**< specific hugetlbfs directory to use */