#include <fcntl.h>
#include <inttypes.h>
#include <limits.h>
-#include <sys/mman.h>
#include <stdint.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <rte_common.h>
-#include <rte_log.h>
+#include <rte_eal_paging.h>
#include <rte_errno.h>
+#include <rte_log.h>
+#include <rte_memory.h>
#include <rte_spinlock.h>
#include <rte_tailq.h>
return -1;
}
- map_addr = mmap(addr, len, PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_FIXED, fd, 0);
+ map_addr = rte_mem_map(addr, len, RTE_PROT_READ | RTE_PROT_WRITE,
+ RTE_MAP_SHARED | RTE_MAP_FORCE_ADDRESS, fd, 0);
if (map_addr != addr) {
- RTE_LOG(ERR, EAL, "mmap() failed: %s\n", strerror(errno));
- /* pass errno up the chain */
- rte_errno = errno;
return -1;
}
return 0;
return -1;
}
- page_sz = sysconf(_SC_PAGESIZE);
+ page_sz = rte_mem_page_size();
if (page_sz == (size_t)-1) {
free(ma);
return -1;
if (internal_config.no_shconf) {
/* remap virtual area as writable */
- void *new_data = mmap(data, mmap_len, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, fd, 0);
- if (new_data == MAP_FAILED) {
+ static const int flags = RTE_MAP_FORCE_ADDRESS |
+ RTE_MAP_PRIVATE | RTE_MAP_ANONYMOUS;
+ void *new_data = rte_mem_map(data, mmap_len,
+ RTE_PROT_READ | RTE_PROT_WRITE, flags, fd, 0);
+ if (new_data == NULL) {
RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n",
- __func__, strerror(errno));
+ __func__, rte_strerror(rte_errno));
goto fail;
}
} else {
return 0;
fail:
if (data)
- munmap(data, mmap_len);
+ rte_mem_unmap(data, mmap_len);
if (fd >= 0)
close(fd);
free(ma);
return -1;
}
- page_sz = sysconf(_SC_PAGESIZE);
+ page_sz = rte_mem_page_size();
if (page_sz == (size_t)-1) {
free(ma);
return -1;
return 0;
fail:
if (data)
- munmap(data, mmap_len);
+ rte_mem_unmap(data, mmap_len);
if (fd >= 0)
close(fd);
free(ma);
* really do anything about it, things will blow up either way.
*/
- size_t page_sz = sysconf(_SC_PAGESIZE);
-
+ size_t page_sz = rte_mem_page_size();
if (page_sz == (size_t)-1)
return -1;
goto out;
}
- munmap(arr->data, mmap_len);
+ rte_mem_unmap(arr->data, mmap_len);
/* area is unmapped, close fd and remove the tailq entry */
if (tmp->fd >= 0)
* really do anything about it, things will blow up either way.
*/
- size_t page_sz = sysconf(_SC_PAGESIZE);
-
+ size_t page_sz = rte_mem_page_size();
if (page_sz == (size_t)-1)
return -1;
}
close(fd);
}
- munmap(arr->data, mmap_len);
+ rte_mem_unmap(arr->data, mmap_len);
/* area is unmapped, remove the tailq entry */
TAILQ_REMOVE(&mem_area_tailq, tmp, next);
#include <string.h>
#include <unistd.h>
#include <inttypes.h>
-#include <sys/mman.h>
#include <sys/queue.h>
#include <rte_fbarray.h>
#include <rte_memory.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
+#include <rte_eal_paging.h>
#include <rte_errno.h>
#include <rte_log.h>
static void *next_baseaddr;
static uint64_t system_page_sz;
-#ifdef RTE_EXEC_ENV_LINUX
-#define RTE_DONTDUMP MADV_DONTDUMP
-#elif defined RTE_EXEC_ENV_FREEBSD
-#define RTE_DONTDUMP MADV_NOCORE
-#else
-#error "madvise doesn't support this OS"
-#endif
-
#define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5
void *
eal_get_virtual_area(void *requested_addr, size_t *size,
- size_t page_sz, int flags, int mmap_flags)
+ size_t page_sz, int flags, int reserve_flags)
{
bool addr_is_hint, allow_shrink, unmap, no_align;
uint64_t map_sz;
uint8_t try = 0;
if (system_page_sz == 0)
- system_page_sz = sysconf(_SC_PAGESIZE);
-
- mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+ system_page_sz = rte_mem_page_size();
RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
return NULL;
}
- mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_NONE,
- mmap_flags, -1, 0);
- if (mapped_addr == MAP_FAILED && allow_shrink)
+ mapped_addr = eal_mem_reserve(
+ requested_addr, (size_t)map_sz, reserve_flags);
+ if ((mapped_addr == NULL) && allow_shrink)
*size -= page_sz;
- if (mapped_addr != MAP_FAILED && addr_is_hint &&
- mapped_addr != requested_addr) {
+ if ((mapped_addr != NULL) && addr_is_hint &&
+ (mapped_addr != requested_addr)) {
try++;
next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
if (try <= MAX_MMAP_WITH_DEFINED_ADDR_TRIES) {
/* hint was not used. Try with another offset */
- munmap(mapped_addr, map_sz);
- mapped_addr = MAP_FAILED;
+ eal_mem_free(mapped_addr, map_sz);
+ mapped_addr = NULL;
requested_addr = next_baseaddr;
}
}
} while ((allow_shrink || addr_is_hint) &&
- mapped_addr == MAP_FAILED && *size > 0);
+ (mapped_addr == NULL) && (*size > 0));
/* align resulting address - if map failed, we will ignore the value
* anyway, so no need to add additional checks.
if (*size == 0) {
RTE_LOG(ERR, EAL, "Cannot get a virtual area of any size: %s\n",
- strerror(errno));
- rte_errno = errno;
+ rte_strerror(rte_errno));
return NULL;
- } else if (mapped_addr == MAP_FAILED) {
+ } else if (mapped_addr == NULL) {
RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n",
- strerror(errno));
- /* pass errno up the call chain */
- rte_errno = errno;
+ rte_strerror(rte_errno));
return NULL;
} else if (requested_addr != NULL && !addr_is_hint &&
aligned_addr != requested_addr) {
RTE_LOG(ERR, EAL, "Cannot get a virtual area at requested address: %p (got %p)\n",
requested_addr, aligned_addr);
- munmap(mapped_addr, map_sz);
+ eal_mem_free(mapped_addr, map_sz);
rte_errno = EADDRNOTAVAIL;
return NULL;
} else if (requested_addr != NULL && addr_is_hint &&
aligned_addr, *size);
if (unmap) {
- munmap(mapped_addr, map_sz);
+ eal_mem_free(mapped_addr, map_sz);
} else if (!no_align) {
void *map_end, *aligned_end;
size_t before_len, after_len;
/* unmap space before aligned mmap address */
before_len = RTE_PTR_DIFF(aligned_addr, mapped_addr);
if (before_len > 0)
- munmap(mapped_addr, before_len);
+ eal_mem_free(mapped_addr, before_len);
/* unmap space after aligned end mmap address */
after_len = RTE_PTR_DIFF(map_end, aligned_end);
if (after_len > 0)
- munmap(aligned_end, after_len);
+ eal_mem_free(aligned_end, after_len);
}
if (!unmap) {
/* Exclude these pages from a core dump. */
- if (madvise(aligned_addr, *size, RTE_DONTDUMP) != 0)
- RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
- strerror(errno));
+ eal_mem_set_dump(aligned_addr, *size, false);
}
return aligned_addr;
int
rte_mem_lock_page(const void *virt)
{
- unsigned long virtual = (unsigned long)virt;
- int page_size = getpagesize();
- unsigned long aligned = (virtual & ~(page_size - 1));
- return mlock((void *)aligned, page_size);
+ uintptr_t virtual = (uintptr_t)virt;
+ size_t page_size = rte_mem_page_size();
+ uintptr_t aligned = RTE_PTR_ALIGN_FLOOR(virtual, page_size);
+ return rte_mem_lock((void *)aligned, page_size);
}
int
#include <rte_dev.h>
#include <rte_lcore.h>
+#include <rte_memory.h>
/**
* Structure storing internal configuration (per-lcore)
*/
int rte_eal_check_module(const char *module_name);
+/**
+ * Memory reservation flags.
+ */
+enum eal_mem_reserve_flags {
+ /**
+ * Reserve hugepages. May be unsupported by some platforms.
+ */
+ EAL_RESERVE_HUGEPAGES = 1 << 0,
+ /**
+ * Force reserving memory at the requested address.
+ * This can be a destructive action depending on the implementation.
+ *
+ * @see RTE_MAP_FORCE_ADDRESS for description of possible consequences
+ * (although implementations are not required to use it).
+ */
+ EAL_RESERVE_FORCE_ADDRESS = 1 << 1
+};
+
/**
* Get virtual area of specified size from the OS.
*
* Page size on which to align requested virtual area.
* @param flags
* EAL_VIRTUAL_AREA_* flags.
- * @param mmap_flags
- * Extra flags passed directly to mmap().
+ * @param reserve_flags
+ * Extra flags passed directly to eal_mem_reserve().
*
* @return
* Virtual area address if successful.
/**< immediately unmap reserved virtual area. */
void *
eal_get_virtual_area(void *requested_addr, size_t *size,
- size_t page_sz, int flags, int mmap_flags);
+ size_t page_sz, int flags, int reserve_flags);
/**
* Get cpu core_id.
int
eal_file_truncate(int fd, ssize_t size);
+/**
+ * Reserve a region of virtual memory.
+ *
+ * Use eal_mem_free() to free reserved memory.
+ *
+ * @param requested_addr
+ * A desired reservation address which must be page-aligned.
+ * The system might not respect it.
+ * NULL means the address will be chosen by the system.
+ * @param size
+ * Reservation size. Must be a multiple of system page size.
+ * @param flags
+ * Reservation options, a combination of eal_mem_reserve_flags.
+ * @returns
+ * Starting address of the reserved area on success, NULL on failure.
+ * Callers must not access this memory until remapping it.
+ */
+void *
+eal_mem_reserve(void *requested_addr, size_t size, int flags);
+
+/**
+ * Free memory obtained by eal_mem_reserve() or eal_mem_alloc().
+ *
+ * If *virt* and *size* describe a part of the reserved region,
+ * only this part of the region is freed (accurately up to the system
+ * page size). If *virt* points to allocated memory, *size* must match
+ * the one specified on allocation. The behavior is undefined
+ * if the memory pointed by *virt* is obtained from another source
+ * than listed above.
+ *
+ * @param virt
+ * A virtual address in a region previously reserved.
+ * @param size
+ * Number of bytes to unreserve.
+ */
+void
+eal_mem_free(void *virt, size_t size);
+
+/**
+ * Configure memory region inclusion into dumps.
+ *
+ * @param virt
+ * Starting address of the region.
+ * @param size
+ * Size of the region.
+ * @param dump
+ * True to include memory into dumps, false to exclude.
+ * @return
+ * 0 on success, (-1) on failure and rte_errno is set.
+ */
+int
+eal_mem_set_dump(void *virt, size_t size, bool dump);
+
#endif /* _EAL_PRIVATE_H_ */
# from unix dir
SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_file.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_unix_memory.c
# from arch dir
SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += rte_cpuflags.c
--- /dev/null
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Dmitry Kozlyuk
+ */
+
+#include <stdint.h>
+
+#include <rte_compat.h>
+
+/**
+ * @file
+ * @internal
+ *
+ * Wrappers for OS facilities related to memory paging, used across DPDK.
+ */
+
+/** Memory protection flags. */
+enum rte_mem_prot {
+ RTE_PROT_READ = 1 << 0, /**< Read access. */
+ RTE_PROT_WRITE = 1 << 1, /**< Write access. */
+ RTE_PROT_EXECUTE = 1 << 2 /**< Code execution. */
+};
+
+/** Additional flags for memory mapping. */
+enum rte_map_flags {
+ /** Changes to the mapped memory are visible to other processes. */
+ RTE_MAP_SHARED = 1 << 0,
+ /** Mapping is not backed by a regular file. */
+ RTE_MAP_ANONYMOUS = 1 << 1,
+ /** Copy-on-write mapping, changes are invisible to other processes. */
+ RTE_MAP_PRIVATE = 1 << 2,
+ /**
+ * Force mapping to the requested address. This flag should be used
+ * with caution, because to fulfill the request implementation
+ * may remove all other mappings in the requested region. However,
+ * it is not required to do so, thus mapping with this flag may fail.
+ */
+ RTE_MAP_FORCE_ADDRESS = 1 << 3
+};
+
+/**
+ * Map a portion of an opened file or the page file into memory.
+ *
+ * This function is similar to POSIX mmap(3) with common MAP_ANONYMOUS
+ * extension, except for the return value.
+ *
+ * @param requested_addr
+ * Desired virtual address for mapping. Can be NULL to let OS choose.
+ * @param size
+ * Size of the mapping in bytes.
+ * @param prot
+ * Protection flags, a combination of rte_mem_prot values.
+ * @param flags
+ * Additional mapping flags, a combination of rte_map_flags.
+ * @param fd
+ * Mapped file descriptor. Can be negative for anonymous mapping.
+ * @param offset
+ * Offset of the mapped region in fd. Must be 0 for anonymous mappings.
+ * @return
+ * Mapped address or NULL on failure and rte_errno is set to OS error.
+ */
+__rte_internal
+void *
+rte_mem_map(void *requested_addr, size_t size, int prot, int flags,
+ int fd, size_t offset);
+
+/**
+ * OS-independent implementation of POSIX munmap(3).
+ */
+__rte_internal
+int
+rte_mem_unmap(void *virt, size_t size);
+
+/**
+ * Get system page size. This function never fails.
+ *
+ * @return
+ * Page size in bytes.
+ */
+__rte_internal
+size_t
+rte_mem_page_size(void);
+
+/**
+ * Lock in physical memory all pages crossed by the address region.
+ *
+ * @param virt
+ * Base virtual address of the region.
+ * @param size
+ * Size of the region.
+ * @return
+ * 0 on success, negative on error.
+ *
+ * @see rte_mem_page_size() to retrieve the page size.
+ * @see rte_mem_lock_page() to lock an entire single page.
+ */
+__rte_internal
+int
+rte_mem_lock(const void *virt, size_t size);
# from unix dir
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_file.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_unix_memory.c
# from arch dir
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += rte_cpuflags.c
mapped:
munmap(addr, alloc_sz);
unmapped:
- flags = MAP_FIXED;
+ flags = EAL_RESERVE_FORCE_ADDRESS;
new_addr = eal_get_virtual_area(addr, &alloc_sz, alloc_sz, 0, flags);
if (new_addr != addr) {
if (new_addr != NULL)
return -1;
}
- if (madvise(ms->addr, ms->len, MADV_DONTDUMP) != 0)
- RTE_LOG(DEBUG, EAL, "madvise failed: %s\n", strerror(errno));
+ eal_mem_set_dump(ms->addr, ms->len, false);
exit_early = false;
rte_trace_regexp;
rte_trace_save;
};
+
+INTERNAL {
+ global:
+
+ rte_mem_lock;
+ rte_mem_map;
+ rte_mem_page_size;
+ rte_mem_unmap;
+};
--- /dev/null
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Dmitry Kozlyuk
+ */
+
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <rte_eal_paging.h>
+#include <rte_errno.h>
+#include <rte_log.h>
+
+#include "eal_private.h"
+
+#ifdef RTE_EXEC_ENV_LINUX
+#define EAL_DONTDUMP MADV_DONTDUMP
+#define EAL_DODUMP MADV_DODUMP
+#elif defined RTE_EXEC_ENV_FREEBSD
+#define EAL_DONTDUMP MADV_NOCORE
+#define EAL_DODUMP MADV_CORE
+#else
+#error "madvise doesn't support this OS"
+#endif
+
+static void *
+mem_map(void *requested_addr, size_t size, int prot, int flags,
+ int fd, size_t offset)
+{
+ void *virt = mmap(requested_addr, size, prot, flags, fd, offset);
+ if (virt == MAP_FAILED) {
+ RTE_LOG(DEBUG, EAL,
+ "Cannot mmap(%p, 0x%zx, 0x%x, 0x%x, %d, 0x%zx): %s\n",
+ requested_addr, size, prot, flags, fd, offset,
+ strerror(errno));
+ rte_errno = errno;
+ return NULL;
+ }
+ return virt;
+}
+
+static int
+mem_unmap(void *virt, size_t size)
+{
+ int ret = munmap(virt, size);
+ if (ret < 0) {
+ RTE_LOG(DEBUG, EAL, "Cannot munmap(%p, 0x%zx): %s\n",
+ virt, size, strerror(errno));
+ rte_errno = errno;
+ }
+ return ret;
+}
+
+void *
+eal_mem_reserve(void *requested_addr, size_t size, int flags)
+{
+ int sys_flags = MAP_PRIVATE | MAP_ANONYMOUS;
+
+ if (flags & EAL_RESERVE_HUGEPAGES) {
+#ifdef MAP_HUGETLB
+ sys_flags |= MAP_HUGETLB;
+#else
+ rte_errno = ENOTSUP;
+ return NULL;
+#endif
+ }
+
+ if (flags & EAL_RESERVE_FORCE_ADDRESS)
+ sys_flags |= MAP_FIXED;
+
+ return mem_map(requested_addr, size, PROT_NONE, sys_flags, -1, 0);
+}
+
+void
+eal_mem_free(void *virt, size_t size)
+{
+ mem_unmap(virt, size);
+}
+
+int
+eal_mem_set_dump(void *virt, size_t size, bool dump)
+{
+ int flags = dump ? EAL_DODUMP : EAL_DONTDUMP;
+ int ret = madvise(virt, size, flags);
+ if (ret) {
+ RTE_LOG(DEBUG, EAL, "madvise(%p, %#zx, %d) failed: %s\n",
+ virt, size, flags, strerror(rte_errno));
+ rte_errno = errno;
+ }
+ return ret;
+}
+
+static int
+mem_rte_to_sys_prot(int prot)
+{
+ int sys_prot = PROT_NONE;
+
+ if (prot & RTE_PROT_READ)
+ sys_prot |= PROT_READ;
+ if (prot & RTE_PROT_WRITE)
+ sys_prot |= PROT_WRITE;
+ if (prot & RTE_PROT_EXECUTE)
+ sys_prot |= PROT_EXEC;
+
+ return sys_prot;
+}
+
+void *
+rte_mem_map(void *requested_addr, size_t size, int prot, int flags,
+ int fd, size_t offset)
+{
+ int sys_flags = 0;
+ int sys_prot;
+
+ sys_prot = mem_rte_to_sys_prot(prot);
+
+ if (flags & RTE_MAP_SHARED)
+ sys_flags |= MAP_SHARED;
+ if (flags & RTE_MAP_ANONYMOUS)
+ sys_flags |= MAP_ANONYMOUS;
+ if (flags & RTE_MAP_PRIVATE)
+ sys_flags |= MAP_PRIVATE;
+ if (flags & RTE_MAP_FORCE_ADDRESS)
+ sys_flags |= MAP_FIXED;
+
+ return mem_map(requested_addr, size, sys_prot, sys_flags, fd, offset);
+}
+
+int
+rte_mem_unmap(void *virt, size_t size)
+{
+ return mem_unmap(virt, size);
+}
+
+size_t
+rte_mem_page_size(void)
+{
+ static size_t page_size;
+
+ if (!page_size)
+ page_size = sysconf(_SC_PAGESIZE);
+
+ return page_size;
+}
+
+int
+rte_mem_lock(const void *virt, size_t size)
+{
+ int ret = mlock(virt, size);
+ if (ret)
+ rte_errno = errno;
+ return ret;
+}
sources += files(
'eal_file.c',
+ 'eal_unix_memory.c',
)