#include <sys/queue.h>
#include <sys/stat.h>
+#include <linux/mman.h> /* for hugetlb-related flags */
+
#include <rte_memory.h>
#include <rte_eal.h>
#include <rte_launch.h>
return hpi_b->hugepage_sz - hpi_a->hugepage_sz;
}
+static void
+calc_num_pages(struct hugepage_info *hpi, struct dirent *dirent)
+{
+ uint64_t total_pages = 0;
+ unsigned int i;
+
+ /*
+ * first, try to put all hugepages into relevant sockets, but
+ * if first attempts fails, fall back to collecting all pages
+ * in one socket and sorting them later
+ */
+ total_pages = 0;
+ /* we also don't want to do this for legacy init */
+ if (!internal_config.legacy_mem)
+ for (i = 0; i < rte_socket_count(); i++) {
+ int socket = rte_socket_id_by_idx(i);
+ unsigned int num_pages =
+ get_num_hugepages_on_node(
+ dirent->d_name, socket);
+ hpi->num_pages[socket] = num_pages;
+ total_pages += num_pages;
+ }
+ /*
+ * we failed to sort memory from the get go, so fall
+ * back to old way
+ */
+ if (total_pages == 0) {
+ hpi->num_pages[0] = get_num_hugepages(dirent->d_name);
+
+#ifndef RTE_ARCH_64
+ /* for 32-bit systems, limit number of hugepages to
+ * 1GB per page size */
+ hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0],
+ RTE_PGSIZE_1G / hpi->hugepage_sz);
+#endif
+ }
+}
+
static int
hugepage_info_init(void)
{ const char dirent_start_text[] = "hugepages-";
const size_t dirent_start_len = sizeof(dirent_start_text) - 1;
- unsigned int i, total_pages, num_sizes = 0;
+ unsigned int i, num_sizes = 0;
DIR *dir;
struct dirent *dirent;
"%" PRIu64 " reserved, but no mounted "
"hugetlbfs found for that size\n",
num_pages, hpi->hugepage_sz);
+ /* if we have kernel support for reserving hugepages
+ * through mmap, and we're in in-memory mode, treat this
+ * page size as valid. we cannot be in legacy mode at
+ * this point because we've checked this earlier in the
+ * init process.
+ */
+#ifdef MAP_HUGE_SHIFT
+ if (internal_config.in_memory) {
+ RTE_LOG(DEBUG, EAL, "In-memory mode enabled, "
+ "hugepages of size %" PRIu64 " bytes "
+ "will be allocated anonymously\n",
+ hpi->hugepage_sz);
+ calc_num_pages(hpi, dirent);
+ num_sizes++;
+ }
+#endif
continue;
}
if (clear_hugedir(hpi->hugedir) == -1)
break;
- /*
- * first, try to put all hugepages into relevant sockets, but
- * if first attempts fails, fall back to collecting all pages
- * in one socket and sorting them later
- */
- total_pages = 0;
- /* we also don't want to do this for legacy init */
- if (!internal_config.legacy_mem)
- for (i = 0; i < rte_socket_count(); i++) {
- int socket = rte_socket_id_by_idx(i);
- unsigned int num_pages =
- get_num_hugepages_on_node(
- dirent->d_name, socket);
- hpi->num_pages[socket] = num_pages;
- total_pages += num_pages;
- }
- /*
- * we failed to sort memory from the get go, so fall
- * back to old way
- */
- if (total_pages == 0)
- hpi->num_pages[0] = get_num_hugepages(dirent->d_name);
-
-#ifndef RTE_ARCH_64
- /* for 32-bit systems, limit number of hugepages to
- * 1GB per page size */
- hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0],
- RTE_PGSIZE_1G / hpi->hugepage_sz);
-#endif
+ calc_num_pages(hpi, dirent);
num_sizes++;
}
for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
num_pages += hpi->num_pages[j];
- if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0 &&
- num_pages > 0)
+ if (num_pages > 0)
return 0;
}
#include <numaif.h>
#endif
#include <linux/falloc.h>
+#include <linux/mman.h> /* for hugetlb-related mmap flags */
#include <rte_common.h>
#include <rte_log.h>
#include "eal_memalloc.h"
#include "eal_private.h"
+const int anonymous_hugepages_supported =
+#ifdef MAP_HUGE_SHIFT
+ 1;
+#define RTE_MAP_HUGE_SHIFT MAP_HUGE_SHIFT
+#else
+ 0;
+#define RTE_MAP_HUGE_SHIFT 26
+#endif
+
/*
* not all kernel version support fallocate on hugetlbfs, so fall back to
* ftruncate and disallow deallocation if fallocate is not supported.
int cur_socket_id = 0;
#endif
uint64_t map_offset;
+ rte_iova_t iova;
+ void *va;
char path[PATH_MAX];
int ret = 0;
int fd;
int flags;
void *new_addr;
- /* takes out a read lock on segment or segment list */
- fd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "Couldn't get fd on hugepage file\n");
- return -1;
- }
-
alloc_sz = hi->hugepage_sz;
- if (internal_config.single_file_segments) {
- map_offset = seg_idx * alloc_sz;
- ret = resize_hugefile(fd, path, list_idx, seg_idx, map_offset,
- alloc_sz, true);
- if (ret < 0)
- goto resized;
- } else {
+ if (internal_config.in_memory && anonymous_hugepages_supported) {
+ int log2, flags;
+
+ log2 = rte_log2_u32(alloc_sz);
+ /* as per mmap() manpage, all page sizes are log2 of page size
+ * shifted by MAP_HUGE_SHIFT
+ */
+ flags = (log2 << RTE_MAP_HUGE_SHIFT) | MAP_HUGETLB | MAP_FIXED |
+ MAP_PRIVATE | MAP_ANONYMOUS;
+ fd = -1;
+ va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE, flags, -1, 0);
+
+ /* single-file segments codepath will never be active because
+ * in-memory mode is incompatible with it and it's stopped at
+ * EAL initialization stage, however the compiler doesn't know
+ * that and complains about map_offset being used uninitialized
+ * on failure codepaths while having in-memory mode enabled. so,
+ * assign a value here.
+ */
map_offset = 0;
- if (ftruncate(fd, alloc_sz) < 0) {
- RTE_LOG(DEBUG, EAL, "%s(): ftruncate() failed: %s\n",
- __func__, strerror(errno));
- goto resized;
+ } else {
+ /* takes out a read lock on segment or segment list */
+ fd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Couldn't get fd on hugepage file\n");
+ return -1;
}
- if (internal_config.hugepage_unlink) {
- if (unlink(path)) {
- RTE_LOG(DEBUG, EAL, "%s(): unlink() failed: %s\n",
+
+ if (internal_config.single_file_segments) {
+ map_offset = seg_idx * alloc_sz;
+ ret = resize_hugefile(fd, path, list_idx, seg_idx,
+ map_offset, alloc_sz, true);
+ if (ret < 0)
+ goto resized;
+ } else {
+ map_offset = 0;
+ if (ftruncate(fd, alloc_sz) < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): ftruncate() failed: %s\n",
__func__, strerror(errno));
goto resized;
}
+ if (internal_config.hugepage_unlink) {
+ if (unlink(path)) {
+ RTE_LOG(DEBUG, EAL, "%s(): unlink() failed: %s\n",
+ __func__, strerror(errno));
+ goto resized;
+ }
+ }
}
- }
- /*
- * map the segment, and populate page tables, the kernel fills this
- * segment with zeros if it's a new page.
- */
- void *va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd, map_offset);
+ /*
+ * map the segment, and populate page tables, the kernel fills
+ * this segment with zeros if it's a new page.
+ */
+ va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd,
+ map_offset);
+ }
if (va == MAP_FAILED) {
RTE_LOG(DEBUG, EAL, "%s(): mmap() failed: %s\n", __func__,
goto resized;
}
- rte_iova_t iova = rte_mem_virt2iova(addr);
- if (iova == RTE_BAD_PHYS_ADDR) {
- RTE_LOG(DEBUG, EAL, "%s(): can't get IOVA addr\n",
- __func__);
- goto mapped;
- }
-
-#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
- move_pages(getpid(), 1, &addr, NULL, &cur_socket_id, 0);
-
- if (cur_socket_id != socket_id) {
- RTE_LOG(DEBUG, EAL,
- "%s(): allocation happened on wrong socket (wanted %d, got %d)\n",
- __func__, socket_id, cur_socket_id);
- goto mapped;
- }
-#endif
-
/* In linux, hugetlb limitations, like cgroup, are
* enforced at fault time instead of mmap(), even
* with the option of MAP_POPULATE. Kernel will send
(unsigned int)(alloc_sz >> 20));
goto mapped;
}
- /* for non-single file segments, we can close fd here */
- if (!internal_config.single_file_segments)
- close(fd);
/* we need to trigger a write to the page to enforce page fault and
* ensure that page is accessible to us, but we can't overwrite value
*/
*(volatile int *)addr = *(volatile int *)addr;
+ iova = rte_mem_virt2iova(addr);
+ if (iova == RTE_BAD_PHYS_ADDR) {
+ RTE_LOG(DEBUG, EAL, "%s(): can't get IOVA addr\n",
+ __func__);
+ goto mapped;
+ }
+
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ move_pages(getpid(), 1, &addr, NULL, &cur_socket_id, 0);
+
+ if (cur_socket_id != socket_id) {
+ RTE_LOG(DEBUG, EAL,
+ "%s(): allocation happened on wrong socket (wanted %d, got %d)\n",
+ __func__, socket_id, cur_socket_id);
+ goto mapped;
+ }
+#endif
+ /* for non-single file segments that aren't in-memory, we can close fd
+ * here */
+ if (!internal_config.single_file_segments && !internal_config.in_memory)
+ close(fd);
+
ms->addr = addr;
ms->hugepage_sz = alloc_sz;
ms->len = alloc_sz;
RTE_LOG(CRIT, EAL, "Can't mmap holes in our virtual address space\n");
}
resized:
+ /* in-memory mode will never be single-file-segments mode */
if (internal_config.single_file_segments) {
resize_hugefile(fd, path, list_idx, seg_idx, map_offset,
alloc_sz, false);
} else {
/* only remove file if we can take out a write lock */
if (internal_config.hugepage_unlink == 0 &&
+ internal_config.in_memory == 0 &&
lock(fd, LOCK_EX) == 1)
unlink(path);
close(fd);
* during init, we already hold a write lock, so don't try to take out
* another one.
*/
- if (wa->hi->lock_descriptor == -1) {
+ if (wa->hi->lock_descriptor == -1 && !internal_config.in_memory) {
dir_fd = open(wa->hi->hugedir, O_RDONLY);
if (dir_fd < 0) {
RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n",
* during init, we already hold a write lock, so don't try to take out
* another one.
*/
- if (wa->hi->lock_descriptor == -1) {
+ if (wa->hi->lock_descriptor == -1 && !internal_config.in_memory) {
dir_fd = open(wa->hi->hugedir, O_RDONLY);
if (dir_fd < 0) {
RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n",