1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
7 #include <sys/sysctl.h>
14 #include <rte_eal_memconfig.h>
15 #include <rte_errno.h>
17 #include <rte_string_fns.h>
18 #include "eal_private.h"
19 #include "eal_internal_cfg.h"
20 #include "eal_filesystem.h"
21 #include "eal_memcfg.h"
23 #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE))
26 * Get physical address of any mapped virtual address in the current process.
29 rte_mem_virt2phy(const void *virtaddr)
31 /* XXX not implemented. This function is only used by
32 * rte_mempool_virt2iova() when hugepages are disabled. */
37 rte_mem_virt2iova(const void *virtaddr)
39 return rte_mem_virt2phy(virtaddr);
43 rte_eal_hugepage_init(void)
45 struct rte_mem_config *mcfg;
46 uint64_t total_mem = 0;
48 unsigned int i, j, seg_idx = 0;
50 /* get pointer to global configuration */
51 mcfg = rte_eal_get_configuration()->mem_config;
53 /* for debug purposes, hugetlbfs can be disabled */
54 if (internal_config.no_hugetlbfs) {
55 struct rte_memseg_list *msl;
56 struct rte_fbarray *arr;
57 struct rte_memseg *ms;
61 /* create a memseg list */
62 msl = &mcfg->memsegs[0];
64 page_sz = RTE_PGSIZE_4K;
65 n_segs = internal_config.memory / page_sz;
67 if (rte_fbarray_init(&msl->memseg_arr, "nohugemem", n_segs,
68 sizeof(struct rte_memseg))) {
69 RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
73 addr = mmap(NULL, internal_config.memory,
74 PROT_READ | PROT_WRITE,
75 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
76 if (addr == MAP_FAILED) {
77 RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
82 msl->page_sz = page_sz;
83 msl->len = internal_config.memory;
86 /* populate memsegs. each memseg is 1 page long */
87 for (cur_seg = 0; cur_seg < n_segs; cur_seg++) {
88 arr = &msl->memseg_arr;
90 ms = rte_fbarray_get(arr, cur_seg);
91 if (rte_eal_iova_mode() == RTE_IOVA_VA)
92 ms->iova = (uintptr_t)addr;
94 ms->iova = RTE_BAD_IOVA;
96 ms->hugepage_sz = page_sz;
100 rte_fbarray_set_used(arr, cur_seg);
102 addr = RTE_PTR_ADD(addr, page_sz);
107 /* map all hugepages and sort them */
108 for (i = 0; i < internal_config.num_hugepage_sizes; i ++){
109 struct hugepage_info *hpi;
110 rte_iova_t prev_end = 0;
111 int prev_ms_idx = -1;
112 uint64_t page_sz, mem_needed;
113 unsigned int n_pages, max_pages;
115 hpi = &internal_config.hugepage_info[i];
116 page_sz = hpi->hugepage_sz;
117 max_pages = hpi->num_pages[0];
118 mem_needed = RTE_ALIGN_CEIL(internal_config.memory - total_mem,
121 n_pages = RTE_MIN(mem_needed / page_sz, max_pages);
123 for (j = 0; j < n_pages; j++) {
124 struct rte_memseg_list *msl;
125 struct rte_fbarray *arr;
126 struct rte_memseg *seg;
130 size_t sysctl_size = sizeof(physaddr);
131 char physaddr_str[64];
134 /* first, check if this segment is IOVA-adjacent to
137 snprintf(physaddr_str, sizeof(physaddr_str),
138 "hw.contigmem.physaddr.%d", j);
139 error = sysctlbyname(physaddr_str, &physaddr,
140 &sysctl_size, NULL, 0);
142 RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
143 "from %s\n", j, hpi->hugedir);
147 is_adjacent = prev_end != 0 && physaddr == prev_end;
148 prev_end = physaddr + hpi->hugepage_sz;
150 for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
152 bool empty, need_hole;
153 msl = &mcfg->memsegs[msl_idx];
154 arr = &msl->memseg_arr;
156 if (msl->page_sz != page_sz)
159 empty = arr->count == 0;
161 /* we need a hole if this isn't an empty memseg
162 * list, and if previous segment was not
163 * adjacent to current one.
165 need_hole = !empty && !is_adjacent;
167 /* we need 1, plus hole if not adjacent */
168 ms_idx = rte_fbarray_find_next_n_free(arr,
169 0, 1 + (need_hole ? 1 : 0));
171 /* memseg list is full? */
175 if (need_hole && prev_ms_idx == ms_idx - 1)
177 prev_ms_idx = ms_idx;
181 if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
182 RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
183 RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE),
184 RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE));
187 arr = &msl->memseg_arr;
188 seg = rte_fbarray_get(arr, ms_idx);
190 addr = RTE_PTR_ADD(msl->base_va,
191 (size_t)msl->page_sz * ms_idx);
193 /* address is already mapped in memseg list, so using
194 * MAP_FIXED here is safe.
196 addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE,
197 MAP_SHARED | MAP_FIXED,
198 hpi->lock_descriptor,
200 if (addr == MAP_FAILED) {
201 RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
207 seg->iova = physaddr;
208 seg->hugepage_sz = page_sz;
210 seg->nchannel = mcfg->nchannel;
211 seg->nrank = mcfg->nrank;
214 rte_fbarray_set_used(arr, ms_idx);
216 RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
218 seg_idx++, addr, physaddr, page_sz);
220 total_mem += seg->len;
222 if (total_mem >= internal_config.memory)
225 if (total_mem < internal_config.memory) {
226 RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, "
227 "requested: %" PRIu64 "M "
228 "available: %" PRIu64 "M\n",
229 internal_config.memory >> 20, total_mem >> 20);
235 struct attach_walk_args {
240 attach_segment(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
243 struct attach_walk_args *wa = arg;
249 addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
250 MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
251 wa->seg_idx * EAL_PAGE_SIZE);
252 if (addr == MAP_FAILED || addr != ms->addr)
260 rte_eal_hugepage_attach(void)
262 const struct hugepage_info *hpi;
263 int fd_hugepage = -1;
266 hpi = &internal_config.hugepage_info[0];
268 for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
269 const struct hugepage_info *cur_hpi = &hpi[i];
270 struct attach_walk_args wa;
272 memset(&wa, 0, sizeof(wa));
274 /* Obtain a file descriptor for contiguous memory */
275 fd_hugepage = open(cur_hpi->hugedir, O_RDWR);
276 if (fd_hugepage < 0) {
277 RTE_LOG(ERR, EAL, "Could not open %s\n",
281 wa.fd_hugepage = fd_hugepage;
284 /* Map the contiguous memory into each memory segment */
285 if (rte_memseg_walk(attach_segment, &wa) < 0) {
286 RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
287 wa.seg_idx, cur_hpi->hugedir);
295 /* hugepage_info is no longer required */
299 if (fd_hugepage >= 0)
305 rte_eal_using_phys_addrs(void)
311 get_mem_amount(uint64_t page_sz, uint64_t max_mem)
313 uint64_t area_sz, max_pages;
315 /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
316 max_pages = RTE_MAX_MEMSEG_PER_LIST;
317 max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
319 area_sz = RTE_MIN(page_sz * max_pages, max_mem);
321 /* make sure the list isn't smaller than the page size */
322 area_sz = RTE_MAX(area_sz, page_sz);
324 return RTE_ALIGN(area_sz, page_sz);
327 #define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
329 alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
330 int n_segs, int socket_id, int type_msl_idx)
332 char name[RTE_FBARRAY_NAME_LEN];
334 snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
336 if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
337 sizeof(struct rte_memseg))) {
338 RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
339 rte_strerror(rte_errno));
343 msl->page_sz = page_sz;
344 msl->socket_id = socket_id;
347 RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
348 (size_t)page_sz >> 10, socket_id);
354 alloc_va_space(struct rte_memseg_list *msl)
361 #ifdef RTE_ARCH_PPC_64
362 flags |= MAP_HUGETLB;
365 page_sz = msl->page_sz;
366 mem_sz = page_sz * msl->memseg_arr.len;
368 addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
370 if (rte_errno == EADDRNOTAVAIL)
371 RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n",
372 (unsigned long long)mem_sz, msl->base_va);
374 RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
385 memseg_primary_init(void)
387 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
388 int hpi_idx, msl_idx = 0;
389 struct rte_memseg_list *msl;
390 uint64_t max_mem, total_mem;
392 /* no-huge does not need this at all */
393 if (internal_config.no_hugetlbfs)
396 /* FreeBSD has an issue where core dump will dump the entire memory
397 * contents, including anonymous zero-page memory. Therefore, while we
398 * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will
399 * also be further limiting total memory amount to whatever memory is
400 * available to us through contigmem driver (plus spacing blocks).
402 * so, at each stage, we will be checking how much memory we are
403 * preallocating, and adjust all the values accordingly.
406 max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
409 /* create memseg lists */
410 for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
412 uint64_t max_type_mem, total_type_mem = 0;
414 int type_msl_idx, max_segs, avail_segs, total_segs = 0;
415 struct hugepage_info *hpi;
416 uint64_t hugepage_sz;
418 hpi = &internal_config.hugepage_info[hpi_idx];
419 hugepage_sz = hpi->hugepage_sz;
421 /* no NUMA support on FreeBSD */
423 /* check if we've already exceeded total memory amount */
424 if (total_mem >= max_mem)
427 /* first, calculate theoretical limits according to config */
428 max_type_mem = RTE_MIN(max_mem - total_mem,
429 (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
430 max_segs = RTE_MAX_MEMSEG_PER_TYPE;
432 /* now, limit all of that to whatever will actually be
433 * available to us, because without dynamic allocation support,
434 * all of that extra memory will be sitting there being useless
435 * and slowing down core dumps in case of a crash.
437 * we need (N*2)-1 segments because we cannot guarantee that
438 * each segment will be IOVA-contiguous with the previous one,
439 * so we will allocate more and put spaces inbetween segments
440 * that are non-contiguous.
442 avail_segs = (hpi->num_pages[0] * 2) - 1;
443 avail_mem = avail_segs * hugepage_sz;
445 max_type_mem = RTE_MIN(avail_mem, max_type_mem);
446 max_segs = RTE_MIN(avail_segs, max_segs);
449 while (total_type_mem < max_type_mem &&
450 total_segs < max_segs) {
451 uint64_t cur_max_mem, cur_mem;
454 if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
456 "No more space in memseg lists, please increase %s\n",
457 RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
461 msl = &mcfg->memsegs[msl_idx++];
463 cur_max_mem = max_type_mem - total_type_mem;
465 cur_mem = get_mem_amount(hugepage_sz,
467 n_segs = cur_mem / hugepage_sz;
469 if (alloc_memseg_list(msl, hugepage_sz, n_segs,
473 total_segs += msl->memseg_arr.len;
474 total_type_mem = total_segs * hugepage_sz;
477 if (alloc_va_space(msl)) {
478 RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
482 total_mem += total_type_mem;
488 memseg_secondary_init(void)
490 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
492 struct rte_memseg_list *msl;
494 for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
496 msl = &mcfg->memsegs[msl_idx];
498 /* skip empty memseg lists */
499 if (msl->memseg_arr.len == 0)
502 if (rte_fbarray_attach(&msl->memseg_arr)) {
503 RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
507 /* preallocate VA space */
508 if (alloc_va_space(msl)) {
509 RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
518 rte_eal_memseg_init(void)
520 return rte_eal_process_type() == RTE_PROC_PRIMARY ?
521 memseg_primary_init() :
522 memseg_secondary_init();