1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
7 #include <sys/sysctl.h>
14 #include <rte_errno.h>
16 #include <rte_string_fns.h>
18 #include "eal_private.h"
19 #include "eal_internal_cfg.h"
20 #include "eal_filesystem.h"
21 #include "eal_memcfg.h"
22 #include "eal_options.h"
24 #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE))
26 uint64_t eal_get_baseaddr(void)
29 * FreeBSD may allocate something in the space we will be mapping things
30 * before we get a chance to do that, so use a base address that's far
31 * away from where malloc() et al usually map things.
33 return 0x1000000000ULL;
37 * Get physical address of any mapped virtual address in the current process.
40 rte_mem_virt2phy(const void *virtaddr)
42 /* XXX not implemented. This function is only used by
43 * rte_mempool_virt2iova() when hugepages are disabled. */
48 rte_mem_virt2iova(const void *virtaddr)
50 return rte_mem_virt2phy(virtaddr);
54 rte_eal_hugepage_init(void)
56 struct rte_mem_config *mcfg;
57 uint64_t total_mem = 0;
59 unsigned int i, j, seg_idx = 0;
61 /* get pointer to global configuration */
62 mcfg = rte_eal_get_configuration()->mem_config;
64 /* for debug purposes, hugetlbfs can be disabled */
65 if (internal_config.no_hugetlbfs) {
66 struct rte_memseg_list *msl;
67 uint64_t mem_sz, page_sz;
70 /* create a memseg list */
71 msl = &mcfg->memsegs[0];
73 mem_sz = internal_config.memory;
74 page_sz = RTE_PGSIZE_4K;
75 n_segs = mem_sz / page_sz;
77 if (eal_memseg_list_init_named(
78 msl, "nohugemem", page_sz, n_segs, 0, true)) {
82 addr = mmap(NULL, mem_sz, PROT_READ | PROT_WRITE,
83 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
84 if (addr == MAP_FAILED) {
85 RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
93 eal_memseg_list_populate(msl, addr, n_segs);
98 /* map all hugepages and sort them */
99 for (i = 0; i < internal_config.num_hugepage_sizes; i ++){
100 struct hugepage_info *hpi;
101 rte_iova_t prev_end = 0;
102 int prev_ms_idx = -1;
103 uint64_t page_sz, mem_needed;
104 unsigned int n_pages, max_pages;
106 hpi = &internal_config.hugepage_info[i];
107 page_sz = hpi->hugepage_sz;
108 max_pages = hpi->num_pages[0];
109 mem_needed = RTE_ALIGN_CEIL(internal_config.memory - total_mem,
112 n_pages = RTE_MIN(mem_needed / page_sz, max_pages);
114 for (j = 0; j < n_pages; j++) {
115 struct rte_memseg_list *msl;
116 struct rte_fbarray *arr;
117 struct rte_memseg *seg;
121 size_t sysctl_size = sizeof(physaddr);
122 char physaddr_str[64];
125 /* first, check if this segment is IOVA-adjacent to
128 snprintf(physaddr_str, sizeof(physaddr_str),
129 "hw.contigmem.physaddr.%d", j);
130 error = sysctlbyname(physaddr_str, &physaddr,
131 &sysctl_size, NULL, 0);
133 RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
134 "from %s\n", j, hpi->hugedir);
138 is_adjacent = prev_end != 0 && physaddr == prev_end;
139 prev_end = physaddr + hpi->hugepage_sz;
141 for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
143 bool empty, need_hole;
144 msl = &mcfg->memsegs[msl_idx];
145 arr = &msl->memseg_arr;
147 if (msl->page_sz != page_sz)
150 empty = arr->count == 0;
152 /* we need a hole if this isn't an empty memseg
153 * list, and if previous segment was not
154 * adjacent to current one.
156 need_hole = !empty && !is_adjacent;
158 /* we need 1, plus hole if not adjacent */
159 ms_idx = rte_fbarray_find_next_n_free(arr,
160 0, 1 + (need_hole ? 1 : 0));
162 /* memseg list is full? */
166 if (need_hole && prev_ms_idx == ms_idx - 1)
168 prev_ms_idx = ms_idx;
172 if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
173 RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
174 RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE),
175 RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE));
178 arr = &msl->memseg_arr;
179 seg = rte_fbarray_get(arr, ms_idx);
181 addr = RTE_PTR_ADD(msl->base_va,
182 (size_t)msl->page_sz * ms_idx);
184 /* address is already mapped in memseg list, so using
185 * MAP_FIXED here is safe.
187 addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE,
188 MAP_SHARED | MAP_FIXED,
189 hpi->lock_descriptor,
191 if (addr == MAP_FAILED) {
192 RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
198 seg->iova = physaddr;
199 seg->hugepage_sz = page_sz;
201 seg->nchannel = mcfg->nchannel;
202 seg->nrank = mcfg->nrank;
205 rte_fbarray_set_used(arr, ms_idx);
207 RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
209 seg_idx++, addr, physaddr, page_sz);
211 total_mem += seg->len;
213 if (total_mem >= internal_config.memory)
216 if (total_mem < internal_config.memory) {
217 RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, "
218 "requested: %" PRIu64 "M "
219 "available: %" PRIu64 "M\n",
220 internal_config.memory >> 20, total_mem >> 20);
226 struct attach_walk_args {
231 attach_segment(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
234 struct attach_walk_args *wa = arg;
240 addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
241 MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
242 wa->seg_idx * EAL_PAGE_SIZE);
243 if (addr == MAP_FAILED || addr != ms->addr)
251 rte_eal_hugepage_attach(void)
253 const struct hugepage_info *hpi;
254 int fd_hugepage = -1;
257 hpi = &internal_config.hugepage_info[0];
259 for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
260 const struct hugepage_info *cur_hpi = &hpi[i];
261 struct attach_walk_args wa;
263 memset(&wa, 0, sizeof(wa));
265 /* Obtain a file descriptor for contiguous memory */
266 fd_hugepage = open(cur_hpi->hugedir, O_RDWR);
267 if (fd_hugepage < 0) {
268 RTE_LOG(ERR, EAL, "Could not open %s\n",
272 wa.fd_hugepage = fd_hugepage;
275 /* Map the contiguous memory into each memory segment */
276 if (rte_memseg_walk(attach_segment, &wa) < 0) {
277 RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
278 wa.seg_idx, cur_hpi->hugedir);
286 /* hugepage_info is no longer required */
290 if (fd_hugepage >= 0)
296 rte_eal_using_phys_addrs(void)
302 get_mem_amount(uint64_t page_sz, uint64_t max_mem)
304 uint64_t area_sz, max_pages;
306 /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
307 max_pages = RTE_MAX_MEMSEG_PER_LIST;
308 max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
310 area_sz = RTE_MIN(page_sz * max_pages, max_mem);
312 /* make sure the list isn't smaller than the page size */
313 area_sz = RTE_MAX(area_sz, page_sz);
315 return RTE_ALIGN(area_sz, page_sz);
319 memseg_list_alloc(struct rte_memseg_list *msl)
323 #ifdef RTE_ARCH_PPC_64
324 flags |= EAL_RESERVE_HUGEPAGES;
326 return eal_memseg_list_alloc(msl, flags);
330 memseg_primary_init(void)
332 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
333 int hpi_idx, msl_idx = 0;
334 struct rte_memseg_list *msl;
335 uint64_t max_mem, total_mem;
337 /* no-huge does not need this at all */
338 if (internal_config.no_hugetlbfs)
341 /* FreeBSD has an issue where core dump will dump the entire memory
342 * contents, including anonymous zero-page memory. Therefore, while we
343 * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will
344 * also be further limiting total memory amount to whatever memory is
345 * available to us through contigmem driver (plus spacing blocks).
347 * so, at each stage, we will be checking how much memory we are
348 * preallocating, and adjust all the values accordingly.
351 max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
354 /* create memseg lists */
355 for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
357 uint64_t max_type_mem, total_type_mem = 0;
359 int type_msl_idx, max_segs, avail_segs, total_segs = 0;
360 struct hugepage_info *hpi;
361 uint64_t hugepage_sz;
363 hpi = &internal_config.hugepage_info[hpi_idx];
364 hugepage_sz = hpi->hugepage_sz;
366 /* no NUMA support on FreeBSD */
368 /* check if we've already exceeded total memory amount */
369 if (total_mem >= max_mem)
372 /* first, calculate theoretical limits according to config */
373 max_type_mem = RTE_MIN(max_mem - total_mem,
374 (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
375 max_segs = RTE_MAX_MEMSEG_PER_TYPE;
377 /* now, limit all of that to whatever will actually be
378 * available to us, because without dynamic allocation support,
379 * all of that extra memory will be sitting there being useless
380 * and slowing down core dumps in case of a crash.
382 * we need (N*2)-1 segments because we cannot guarantee that
383 * each segment will be IOVA-contiguous with the previous one,
384 * so we will allocate more and put spaces between segments
385 * that are non-contiguous.
387 avail_segs = (hpi->num_pages[0] * 2) - 1;
388 avail_mem = avail_segs * hugepage_sz;
390 max_type_mem = RTE_MIN(avail_mem, max_type_mem);
391 max_segs = RTE_MIN(avail_segs, max_segs);
394 while (total_type_mem < max_type_mem &&
395 total_segs < max_segs) {
396 uint64_t cur_max_mem, cur_mem;
399 if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
401 "No more space in memseg lists, please increase %s\n",
402 RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
406 msl = &mcfg->memsegs[msl_idx++];
408 cur_max_mem = max_type_mem - total_type_mem;
410 cur_mem = get_mem_amount(hugepage_sz,
412 n_segs = cur_mem / hugepage_sz;
414 if (eal_memseg_list_init(msl, hugepage_sz, n_segs,
415 0, type_msl_idx, false))
418 total_segs += msl->memseg_arr.len;
419 total_type_mem = total_segs * hugepage_sz;
422 if (memseg_list_alloc(msl)) {
423 RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
427 total_mem += total_type_mem;
433 memseg_secondary_init(void)
435 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
437 struct rte_memseg_list *msl;
439 for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
441 msl = &mcfg->memsegs[msl_idx];
443 /* skip empty memseg lists */
444 if (msl->memseg_arr.len == 0)
447 if (rte_fbarray_attach(&msl->memseg_arr)) {
448 RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
452 /* preallocate VA space */
453 if (memseg_list_alloc(msl)) {
454 RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
463 rte_eal_memseg_init(void)
465 return rte_eal_process_type() == RTE_PROC_PRIMARY ?
466 memseg_primary_init() :
467 memseg_secondary_init();