From: Anatoly Burakov Date: Wed, 11 Apr 2018 12:30:33 +0000 (+0100) Subject: mem: share hugepage info primary and secondary X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=cb97d93e9d3b;p=dpdk.git mem: share hugepage info primary and secondary Since we are going to need to map hugepages in both primary and secondary processes, we need to know where we should look for hugetlbfs mountpoints. So, share those with secondary processes, and map them on init. Signed-off-by: Anatoly Burakov Tested-by: Santosh Shukla Tested-by: Hemant Agrawal Tested-by: Gowrishankar Muthukrishnan --- diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 54330e1e24..727adc5d2c 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -289,7 +289,7 @@ eal_get_hugepage_mem_size(void) for (i = 0; i < internal_config.num_hugepage_sizes; i++) { struct hugepage_info *hpi = &internal_config.hugepage_info[i]; - if (hpi->hugedir != NULL) { + if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) { for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { size += hpi->hugepage_sz * hpi->num_pages[j]; } @@ -561,12 +561,17 @@ rte_eal_init(int argc, char **argv) /* autodetect the iova mapping mode (default is iova_pa) */ rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class(); - if (internal_config.no_hugetlbfs == 0 && - eal_hugepage_info_init() < 0) { - rte_eal_init_alert("Cannot get hugepage information."); - rte_errno = EACCES; - rte_atomic32_clear(&run_once); - return -1; + if (internal_config.no_hugetlbfs == 0) { + /* rte_config isn't initialized yet */ + ret = internal_config.process_type == RTE_PROC_PRIMARY ? + eal_hugepage_info_init() : + eal_hugepage_info_read(); + if (ret < 0) { + rte_eal_init_alert("Cannot get hugepage information."); + rte_errno = EACCES; + rte_atomic32_clear(&run_once); + return -1; + } } if (internal_config.memory == 0 && internal_config.force_sockets == 0) { diff --git a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c index ba44da0e61..38d143cd9e 100644 --- a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c @@ -19,10 +19,10 @@ * Used in this file to store the hugepage file map on disk */ static void * -create_shared_memory(const char *filename, const size_t mem_size) +map_shared_memory(const char *filename, const size_t mem_size, int flags) { void *retval; - int fd = open(filename, O_CREAT | O_RDWR, 0666); + int fd = open(filename, flags, 0666); if (fd < 0) return NULL; if (ftruncate(fd, mem_size) < 0) { @@ -34,6 +34,18 @@ create_shared_memory(const char *filename, const size_t mem_size) return retval; } +static void * +open_shared_memory(const char *filename, const size_t mem_size) +{ + return map_shared_memory(filename, mem_size, O_RDWR); +} + +static void * +create_shared_memory(const char *filename, const size_t mem_size) +{ + return map_shared_memory(filename, mem_size, O_RDWR | O_CREAT); +} + /* * No hugepage support on freebsd, but we dummy it, using contigmem driver */ @@ -46,13 +58,10 @@ eal_hugepage_info_init(void) /* re-use the linux "internal config" structure for our memory data */ struct hugepage_info *hpi = &internal_config.hugepage_info[0]; struct hugepage_info *tmp_hpi; + unsigned int i; internal_config.num_hugepage_sizes = 1; - /* nothing more to be done for secondary */ - if (rte_eal_process_type() == RTE_PROC_SECONDARY) - return 0; - sysctl_size = sizeof(num_buffers); error = sysctlbyname("hw.contigmem.num_buffers", &num_buffers, &sysctl_size, NULL, 0); @@ -87,7 +96,7 @@ eal_hugepage_info_init(void) RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dKB\n", num_buffers, (int)(buffer_size>>10)); - hpi->hugedir = CONTIGMEM_DEV; + snprintf(hpi->hugedir, sizeof(hpi->hugedir), "%s", CONTIGMEM_DEV); hpi->hugepage_sz = buffer_size; hpi->num_pages[0] = num_buffers; hpi->lock_descriptor = fd; @@ -101,6 +110,14 @@ eal_hugepage_info_init(void) memcpy(tmp_hpi, hpi, sizeof(internal_config.hugepage_info)); + /* we've copied file descriptors along with everything else, but they + * will be invalid in secondary process, so overwrite them + */ + for (i = 0; i < RTE_DIM(internal_config.hugepage_info); i++) { + struct hugepage_info *tmp = &tmp_hpi[i]; + tmp->lock_descriptor = -1; + } + if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) { RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n"); return -1; @@ -108,3 +125,28 @@ eal_hugepage_info_init(void) return 0; } + +/* copy stuff from shared info into internal config */ +int +eal_hugepage_info_read(void) +{ + struct hugepage_info *hpi = &internal_config.hugepage_info[0]; + struct hugepage_info *tmp_hpi; + + internal_config.num_hugepage_sizes = 1; + + tmp_hpi = open_shared_memory(eal_hugepage_info_path(), + sizeof(internal_config.hugepage_info)); + if (tmp_hpi == NULL) { + RTE_LOG(ERR, EAL, "Failed to open shared memory!\n"); + return -1; + } + + memcpy(hpi, tmp_hpi, sizeof(internal_config.hugepage_info)); + + if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) { + RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n"); + return -1; + } + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c index 2f5651d1c9..b27262c7eb 100644 --- a/lib/librte_eal/bsdapp/eal/eal_memory.c +++ b/lib/librte_eal/bsdapp/eal/eal_memory.c @@ -242,23 +242,10 @@ int rte_eal_hugepage_attach(void) { const struct hugepage_info *hpi; - int fd_hugepage_info, fd_hugepage = -1; + int fd_hugepage = -1; unsigned int i; - /* Obtain a file descriptor for hugepage_info */ - fd_hugepage_info = open(eal_hugepage_info_path(), O_RDONLY); - if (fd_hugepage_info < 0) { - RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path()); - return -1; - } - - /* Map the shared hugepage_info into the process address spaces */ - hpi = mmap(NULL, sizeof(internal_config.hugepage_info), - PROT_READ, MAP_PRIVATE, fd_hugepage_info, 0); - if (hpi == MAP_FAILED) { - RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); - goto error; - } + hpi = &internal_config.hugepage_info[0]; for (i = 0; i < internal_config.num_hugepage_sizes; i++) { const struct hugepage_info *cur_hpi = &hpi[i]; @@ -288,13 +275,9 @@ rte_eal_hugepage_attach(void) } /* hugepage_info is no longer required */ - munmap((void *)(uintptr_t)hpi, sizeof(internal_config.hugepage_info)); - close(fd_hugepage_info); return 0; error: - if (fd_hugepage_info >= 0) - close(fd_hugepage_info); if (fd_hugepage >= 0) close(fd_hugepage); return -1; diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index e764e43037..40c5b26f3c 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -179,8 +179,11 @@ eal_reset_internal_config(struct internal_config *internal_cfg) for (i = 0; i < RTE_MAX_NUMA_NODES; i++) internal_cfg->socket_mem[i] = 0; /* zero out hugedir descriptors */ - for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) + for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) { + memset(&internal_cfg->hugepage_info[i], 0, + sizeof(internal_cfg->hugepage_info[0])); internal_cfg->hugepage_info[i].lock_descriptor = -1; + } internal_cfg->base_virtaddr = 0; internal_cfg->syslog_facility = LOG_DAEMON; diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h index 1c6048b5f9..ad059ef37e 100644 --- a/lib/librte_eal/common/eal_filesystem.h +++ b/lib/librte_eal/common/eal_filesystem.h @@ -85,6 +85,23 @@ eal_hugepage_info_path(void) return buffer; } +/** Path of hugepage info file. */ +#define HUGEPAGE_FILE_FMT "%s/.%s_hugepage_file" + +static inline const char * +eal_hugepage_file_path(void) +{ + static char buffer[PATH_MAX]; /* static so auto-zeroed */ + const char *directory = default_config_dir; + const char *home_dir = getenv("HOME"); + + if (getuid() != 0 && home_dir != NULL) + directory = home_dir; + snprintf(buffer, sizeof(buffer) - 1, HUGEPAGE_FILE_FMT, directory, + internal_config.hugefile_prefix); + return buffer; +} + /** String format for hugepage map files. */ #define HUGEFILE_FMT "%s/%smap_%d" #define TEMP_HUGEFILE_FMT "%s/%smap_temp_%d" diff --git a/lib/librte_eal/common/eal_hugepages.h b/lib/librte_eal/common/eal_hugepages.h index ad1b0b6a20..4582f19cfb 100644 --- a/lib/librte_eal/common/eal_hugepages.h +++ b/lib/librte_eal/common/eal_hugepages.h @@ -26,9 +26,15 @@ struct hugepage_file { }; /** - * Read the information from linux on what hugepages are available - * for the EAL to use + * Read the information on what hugepages are available for the EAL to use, + * clearing out any unused ones. */ int eal_hugepage_info_init(void); +/** + * Read whatever information primary process has shared about hugepages into + * secondary process. + */ +int eal_hugepage_info_read(void); + #endif /* EAL_HUGEPAGES_H */ diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h index 9d33cf41c5..c4cbf3acda 100644 --- a/lib/librte_eal/common/eal_internal_cfg.h +++ b/lib/librte_eal/common/eal_internal_cfg.h @@ -21,7 +21,7 @@ */ struct hugepage_info { uint64_t hugepage_sz; /**< size of a huge page */ - const char *hugedir; /**< dir where hugetlbfs is mounted */ + char hugedir[PATH_MAX]; /**< dir where hugetlbfs is mounted */ uint32_t num_pages[RTE_MAX_NUMA_NODES]; /**< number of hugepages of that size on each socket */ int lock_descriptor; /**< file descriptor for hugepage dir */ diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 2c12811530..e7c6dcf0d7 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -807,13 +807,17 @@ rte_eal_init(int argc, char **argv) "KNI module inserted\n"); } - if (internal_config.no_hugetlbfs == 0 && - internal_config.process_type != RTE_PROC_SECONDARY && - eal_hugepage_info_init() < 0) { - rte_eal_init_alert("Cannot get hugepage information."); - rte_errno = EACCES; - rte_atomic32_clear(&run_once); - return -1; + if (internal_config.no_hugetlbfs == 0) { + /* rte_config isn't initialized yet */ + ret = internal_config.process_type == RTE_PROC_PRIMARY ? + eal_hugepage_info_init() : + eal_hugepage_info_read(); + if (ret < 0) { + rte_eal_init_alert("Cannot get hugepage information."); + rte_errno = EACCES; + rte_atomic32_clear(&run_once); + return -1; + } } if (internal_config.memory == 0 && internal_config.force_sockets == 0) { diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c index 2e0819ffef..fb4b667364 100644 --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -33,6 +34,39 @@ static const char sys_dir_path[] = "/sys/kernel/mm/hugepages"; static const char sys_pages_numa_dir_path[] = "/sys/devices/system/node"; +/* + * Uses mmap to create a shared memory area for storage of data + * Used in this file to store the hugepage file map on disk + */ +static void * +map_shared_memory(const char *filename, const size_t mem_size, int flags) +{ + void *retval; + int fd = open(filename, flags, 0666); + if (fd < 0) + return NULL; + if (ftruncate(fd, mem_size) < 0) { + close(fd); + return NULL; + } + retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + close(fd); + return retval; +} + +static void * +open_shared_memory(const char *filename, const size_t mem_size) +{ + return map_shared_memory(filename, mem_size, O_RDWR); +} + +static void * +create_shared_memory(const char *filename, const size_t mem_size) +{ + return map_shared_memory(filename, mem_size, O_RDWR | O_CREAT); +} + /* this function is only called from eal_hugepage_info_init which itself * is only called from a primary process */ static uint32_t @@ -299,15 +333,9 @@ compare_hpi(const void *a, const void *b) return hpi_b->hugepage_sz - hpi_a->hugepage_sz; } -/* - * when we initialize the hugepage info, everything goes - * to socket 0 by default. it will later get sorted by memory - * initialization procedure. - */ -int -eal_hugepage_info_init(void) -{ - const char dirent_start_text[] = "hugepages-"; +static int +hugepage_info_init(void) +{ const char dirent_start_text[] = "hugepages-"; const size_t dirent_start_len = sizeof(dirent_start_text) - 1; unsigned int i, total_pages, num_sizes = 0; DIR *dir; @@ -323,6 +351,7 @@ eal_hugepage_info_init(void) for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) { struct hugepage_info *hpi; + const char *hugedir; if (strncmp(dirent->d_name, dirent_start_text, dirent_start_len) != 0) @@ -334,10 +363,10 @@ eal_hugepage_info_init(void) hpi = &internal_config.hugepage_info[num_sizes]; hpi->hugepage_sz = rte_str_to_size(&dirent->d_name[dirent_start_len]); - hpi->hugedir = get_hugepage_dir(hpi->hugepage_sz); + hugedir = get_hugepage_dir(hpi->hugepage_sz); /* first, check if we have a mountpoint */ - if (hpi->hugedir == NULL) { + if (hugedir == NULL) { uint32_t num_pages; num_pages = get_num_hugepages(dirent->d_name); @@ -349,6 +378,7 @@ eal_hugepage_info_init(void) num_pages, hpi->hugepage_sz); continue; } + snprintf(hpi->hugedir, sizeof(hpi->hugedir), "%s", hugedir); /* try to obtain a writelock */ hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY); @@ -411,13 +441,11 @@ eal_hugepage_info_init(void) for (i = 0; i < num_sizes; i++) { /* pages may no longer all be on socket 0, so check all */ unsigned int j, num_pages = 0; + struct hugepage_info *hpi = &internal_config.hugepage_info[i]; - for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { - struct hugepage_info *hpi = - &internal_config.hugepage_info[i]; + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) num_pages += hpi->num_pages[j]; - } - if (internal_config.hugepage_info[i].hugedir != NULL && + if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0 && num_pages > 0) return 0; } @@ -425,3 +453,64 @@ eal_hugepage_info_init(void) /* no valid hugepage mounts available, return error */ return -1; } + +/* + * when we initialize the hugepage info, everything goes + * to socket 0 by default. it will later get sorted by memory + * initialization procedure. + */ +int +eal_hugepage_info_init(void) +{ + struct hugepage_info *hpi, *tmp_hpi; + unsigned int i; + + if (hugepage_info_init() < 0) + return -1; + + hpi = &internal_config.hugepage_info[0]; + + tmp_hpi = create_shared_memory(eal_hugepage_info_path(), + sizeof(internal_config.hugepage_info)); + if (tmp_hpi == NULL) { + RTE_LOG(ERR, EAL, "Failed to create shared memory!\n"); + return -1; + } + + memcpy(tmp_hpi, hpi, sizeof(internal_config.hugepage_info)); + + /* we've copied file descriptors along with everything else, but they + * will be invalid in secondary process, so overwrite them + */ + for (i = 0; i < RTE_DIM(internal_config.hugepage_info); i++) { + struct hugepage_info *tmp = &tmp_hpi[i]; + tmp->lock_descriptor = -1; + } + + if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) { + RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n"); + return -1; + } + return 0; +} + +int eal_hugepage_info_read(void) +{ + struct hugepage_info *hpi = &internal_config.hugepage_info[0]; + struct hugepage_info *tmp_hpi; + + tmp_hpi = open_shared_memory(eal_hugepage_info_path(), + sizeof(internal_config.hugepage_info)); + if (tmp_hpi == NULL) { + RTE_LOG(ERR, EAL, "Failed to open shared memory!\n"); + return -1; + } + + memcpy(hpi, tmp_hpi, sizeof(internal_config.hugepage_info)); + + if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) { + RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n"); + return -1; + } + return 0; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index c68db32f17..d91924718a 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -1060,7 +1060,7 @@ get_socket_mem_size(int socket) for (i = 0; i < internal_config.num_hugepage_sizes; i++){ struct hugepage_info *hpi = &internal_config.hugepage_info[i]; - if (hpi->hugedir != NULL) + if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) size += hpi->hugepage_sz * hpi->num_pages[socket]; } @@ -1160,7 +1160,8 @@ calc_num_pages_per_socket(uint64_t * memory, for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) { /* skips if the memory on specific socket wasn't requested */ for (i = 0; i < num_hp_info && memory[socket] != 0; i++){ - hp_used[i].hugedir = hp_info[i].hugedir; + snprintf(hp_used[i].hugedir, sizeof(hp_used[i].hugedir), + "%s", hp_info[i].hugedir); hp_used[i].num_pages[socket] = RTE_MIN( memory[socket] / hp_info[i].hugepage_sz, hp_info[i].num_pages[socket]); @@ -1235,7 +1236,7 @@ eal_get_hugepage_mem_size(void) for (i = 0; i < internal_config.num_hugepage_sizes; i++) { struct hugepage_info *hpi = &internal_config.hugepage_info[i]; - if (hpi->hugedir != NULL) { + if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) { for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { size += hpi->hugepage_sz * hpi->num_pages[j]; } @@ -1509,7 +1510,7 @@ eal_legacy_hugepage_init(void) } /* create shared memory */ - hugepage = create_shared_memory(eal_hugepage_info_path(), + hugepage = create_shared_memory(eal_hugepage_file_path(), nr_hugefiles * sizeof(struct hugepage_file)); if (hugepage == NULL) { @@ -1694,16 +1695,16 @@ eal_legacy_hugepage_attach(void) test_phys_addrs_available(); - fd_hugepage = open(eal_hugepage_info_path(), O_RDONLY); + fd_hugepage = open(eal_hugepage_file_path(), O_RDONLY); if (fd_hugepage < 0) { - RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path()); + RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_file_path()); goto error; } size = getFileSize(fd_hugepage); hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0); if (hp == MAP_FAILED) { - RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); + RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_file_path()); goto error; }