* Copyright(c) 2013 6WIND S.A.
*/
-#define _FILE_OFFSET_BITS 64
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
#include <rte_memory.h>
#include <rte_launch.h>
#include <rte_eal.h>
-#include <rte_eal_memconfig.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_common.h>
#include "eal_private.h"
#include "eal_memalloc.h"
+#include "eal_memcfg.h"
#include "eal_internal_cfg.h"
#include "eal_filesystem.h"
#include "eal_hugepages.h"
+#include "eal_options.h"
#define PFN_MASK_SIZE 8
* zone as well as a physical contiguous zone.
*/
-static bool phys_addrs_available = true;
+static int phys_addrs_available = -1;
#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
-static void
-test_phys_addrs_available(void)
+uint64_t eal_get_baseaddr(void)
{
- uint64_t tmp = 0;
- phys_addr_t physaddr;
-
- if (!rte_eal_has_hugepages()) {
- RTE_LOG(ERR, EAL,
- "Started without hugepages support, physical addresses not available\n");
- phys_addrs_available = false;
- return;
- }
-
- physaddr = rte_mem_virt2phy(&tmp);
- if (physaddr == RTE_BAD_PHYS_ADDR) {
- if (rte_eal_iova_mode() == RTE_IOVA_PA)
- RTE_LOG(ERR, EAL,
- "Cannot obtain physical addresses: %s. "
- "Only vfio will function.\n",
- strerror(errno));
- phys_addrs_available = false;
- }
+ /*
+ * Linux kernel uses a really high address as starting address for
+ * serving mmaps calls. If there exists addressing limitations and IOVA
+ * mode is VA, this starting address is likely too high for those
+ * devices. However, it is possible to use a lower address in the
+ * process virtual address space as with 64 bits there is a lot of
+ * available space.
+ *
+ * Current known limitations are 39 or 40 bits. Setting the starting
+ * address at 4GB implies there are 508GB or 1020GB for mapping the
+ * available hugepages. This is likely enough for most systems, although
+ * a device with addressing limitations should call
+ * rte_mem_check_dma_mask for ensuring all memory is within supported
+ * range.
+ */
+ return 0x100000000ULL;
}
/*
int page_size;
off_t offset;
- /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
- if (!phys_addrs_available)
+ if (phys_addrs_available == 0)
return RTE_BAD_IOVA;
/* standard page size */
fd = open("/proc/self/pagemap", O_RDONLY);
if (fd < 0) {
- RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
+ RTE_LOG(INFO, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
__func__, strerror(errno));
return RTE_BAD_IOVA;
}
virt_pfn = (unsigned long)virtaddr / page_size;
offset = sizeof(uint64_t) * virt_pfn;
if (lseek(fd, offset, SEEK_SET) == (off_t) -1) {
- RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
+ RTE_LOG(INFO, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
__func__, strerror(errno));
close(fd);
return RTE_BAD_IOVA;
retval = read(fd, &page, PFN_MASK_SIZE);
close(fd);
if (retval < 0) {
- RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
+ RTE_LOG(INFO, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
__func__, strerror(errno));
return RTE_BAD_IOVA;
} else if (retval != PFN_MASK_SIZE) {
- RTE_LOG(ERR, EAL, "%s(): read %d bytes from /proc/self/pagemap "
+ RTE_LOG(INFO, EAL, "%s(): read %d bytes from /proc/self/pagemap "
"but expected %d:\n",
__func__, retval, PFN_MASK_SIZE);
return RTE_BAD_IOVA;
return retval;
}
- fd = open(filename, O_CREAT | O_RDWR, 0666);
+ fd = open(filename, O_CREAT | O_RDWR, 0600);
if (fd < 0)
return NULL;
if (ftruncate(fd, mem_size) < 0) {
return -1;
}
-#ifdef RTE_ARCH_PPC64
+#ifdef RTE_ARCH_PPC_64
/* for PPC64 we go through the list backwards */
for (cur_page = seg_end - 1; cur_page >= seg_start;
cur_page--, ms_idx++) {
msl->page_sz = page_sz;
msl->socket_id = socket_id;
msl->base_va = NULL;
+ msl->heap = 1; /* mark it as a heap segment */
RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
(size_t)page_sz >> 10, socket_id);
addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
if (addr == NULL) {
if (rte_errno == EADDRNOTAVAIL)
- RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n",
+ RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - "
+ "please use '--" OPT_BASE_VIRTADDR "' option\n",
(unsigned long long)mem_sz, msl->base_va);
else
RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
return 0;
}
+__rte_unused /* function is unused on 32-bit builds */
static inline uint64_t
get_socket_mem_size(int socket)
{
int nr_hugefiles, nr_hugepages = 0;
void *addr;
- test_phys_addrs_available();
-
memset(used_hp, 0, sizeof(used_hp));
/* get pointer to global configuration */
msl->page_sz = page_sz;
msl->socket_id = 0;
msl->len = internal_config.memory;
+ msl->heap = 1;
/* we're in single-file segments mode, so only the segment list
* fd needs to be set up.
if (mcfg->dma_maskbits &&
rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
RTE_LOG(ERR, EAL,
- "%s(): couldnt allocate memory due to IOVA exceeding limits of current DMA mask.\n",
+ "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask.\n",
__func__);
if (rte_eal_iova_mode() == RTE_IOVA_VA &&
rte_eal_using_phys_addrs())
continue;
}
- if (phys_addrs_available &&
+ if (rte_eal_using_phys_addrs() &&
rte_eal_iova_mode() != RTE_IOVA_VA) {
/* find physical addresses for each hugepage */
if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
mem_sz = msl->len;
munmap(msl->base_va, mem_sz);
msl->base_va = NULL;
+ msl->heap = 0;
/* destroy backing fbarray */
rte_fbarray_destroy(&msl->memseg_arr);
uint64_t memory[RTE_MAX_NUMA_NODES];
int hp_sz_idx, socket_id;
- test_phys_addrs_available();
-
memset(used_hp, 0, sizeof(used_hp));
for (hp_sz_idx = 0;
struct rte_memseg **pages;
struct hugepage_info *hpi = &used_hp[hp_sz_idx];
unsigned int num_pages = hpi->num_pages[socket_id];
- int num_pages_alloc, i;
+ unsigned int num_pages_alloc;
if (num_pages == 0)
continue;
- pages = malloc(sizeof(*pages) * num_pages);
-
RTE_LOG(DEBUG, EAL, "Allocating %u pages of size %" PRIu64 "M on socket %i\n",
num_pages, hpi->hugepage_sz >> 20, socket_id);
- num_pages_alloc = eal_memalloc_alloc_seg_bulk(pages,
- num_pages, hpi->hugepage_sz,
- socket_id, true);
- if (num_pages_alloc < 0) {
+ /* we may not be able to allocate all pages in one go,
+ * because we break up our memory map into multiple
+ * memseg lists. therefore, try allocating multiple
+ * times and see if we can get the desired number of
+ * pages from multiple allocations.
+ */
+
+ num_pages_alloc = 0;
+ do {
+ int i, cur_pages, needed;
+
+ needed = num_pages - num_pages_alloc;
+
+ pages = malloc(sizeof(*pages) * needed);
+
+ /* do not request exact number of pages */
+ cur_pages = eal_memalloc_alloc_seg_bulk(pages,
+ needed, hpi->hugepage_sz,
+ socket_id, false);
+ if (cur_pages <= 0) {
+ free(pages);
+ return -1;
+ }
+
+ /* mark preallocated pages as unfreeable */
+ for (i = 0; i < cur_pages; i++) {
+ struct rte_memseg *ms = pages[i];
+ ms->flags |= RTE_MEMSEG_FLAG_DO_NOT_FREE;
+ }
free(pages);
- return -1;
- }
- /* mark preallocated pages as unfreeable */
- for (i = 0; i < num_pages_alloc; i++) {
- struct rte_memseg *ms = pages[i];
- ms->flags |= RTE_MEMSEG_FLAG_DO_NOT_FREE;
- }
- free(pages);
+ num_pages_alloc += cur_pages;
+ } while (num_pages_alloc != num_pages);
}
}
/* if socket limits were specified, set them */
"into secondary processes\n");
}
- test_phys_addrs_available();
-
fd_hugepage = open(eal_hugepage_data_path(), O_RDONLY);
if (fd_hugepage < 0) {
RTE_LOG(ERR, EAL, "Could not open %s\n",
if (flock(fd, LOCK_SH) < 0) {
RTE_LOG(DEBUG, EAL, "%s(): Locking file failed: %s\n",
__func__, strerror(errno));
- goto fd_error;
+ goto mmap_error;
}
/* find segment data */
if (msl == NULL) {
RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg list\n",
__func__);
- goto fd_error;
+ goto mmap_error;
}
ms = rte_mem_virt2memseg(map_addr, msl);
if (ms == NULL) {
RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg\n",
__func__);
- goto fd_error;
+ goto mmap_error;
}
msl_idx = msl - mcfg->memsegs;
if (ms_idx < 0) {
RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg idx\n",
__func__);
- goto fd_error;
+ goto mmap_error;
}
/* store segment fd internally */
close(fd_hugepage);
return 0;
+mmap_error:
+ munmap(hp[i].final_va, hp[i].size);
fd_error:
close(fd);
error:
- /* map all segments into memory to make sure we get the addrs */
- cur_seg = 0;
- for (cur_seg = 0; cur_seg < i; cur_seg++) {
- struct hugepage_file *hf = &hp[i];
- size_t map_sz = hf->size;
- void *map_addr = hf->final_va;
+ /* unwind mmap's done so far */
+ for (cur_seg = 0; cur_seg < i; cur_seg++)
+ munmap(hp[cur_seg].final_va, hp[cur_seg].size);
- munmap(map_addr, map_sz);
- }
if (hp != NULL && hp != MAP_FAILED)
munmap(hp, size);
if (fd_hugepage >= 0)
int
rte_eal_using_phys_addrs(void)
{
+ if (phys_addrs_available == -1) {
+ uint64_t tmp = 0;
+
+ if (rte_eal_has_hugepages() != 0 &&
+ rte_mem_virt2phy(&tmp) != RTE_BAD_PHYS_ADDR)
+ phys_addrs_available = 1;
+ else
+ phys_addrs_available = 0;
+ }
return phys_addrs_available;
}
socket_id = rte_socket_id_by_idx(i);
#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
- if (socket_id > 0)
+ /* we can still sort pages by socket in legacy mode */
+ if (!internal_config.legacy_mem && socket_id > 0)
break;
#endif
int socket_id = rte_socket_id_by_idx(i);
#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
- if (socket_id > 0)
+ /* we can still sort pages by socket in legacy mode */
+ if (!internal_config.legacy_mem && socket_id > 0)
break;
#endif
memtypes[cur_type].page_sz = hugepage_sz;
} else {
RTE_LOG(ERR, EAL, "Cannot get current resource limits\n");
}
+#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ if (!internal_config.legacy_mem && rte_socket_count() > 1) {
+ RTE_LOG(WARNING, EAL, "DPDK is running on a NUMA system, but is compiled without NUMA support.\n");
+ RTE_LOG(WARNING, EAL, "This will have adverse consequences for performance and usability.\n");
+ RTE_LOG(WARNING, EAL, "Please use --"OPT_LEGACY_MEM" option, or recompile with NUMA support.\n");
+ }
+#endif
return rte_eal_process_type() == RTE_PROC_PRIMARY ?
#ifndef RTE_ARCH_64