1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
12 #include <rte_fbarray.h>
13 #include <rte_memory.h>
15 #include <rte_eal_memconfig.h>
16 #include <rte_eal_paging.h>
17 #include <rte_errno.h>
19 #ifndef RTE_EXEC_ENV_WINDOWS
20 #include <rte_telemetry.h>
23 #include "eal_memalloc.h"
24 #include "eal_private.h"
25 #include "eal_internal_cfg.h"
26 #include "eal_memcfg.h"
27 #include "eal_options.h"
28 #include "malloc_heap.h"
31 * Try to mmap *size bytes in /dev/zero. If it is successful, return the
32 * pointer to the mmap'd area and keep *size unmodified. Else, retry
33 * with a smaller zone: decrease *size by hugepage_sz until it reaches
34 * 0. In this case, return NULL. Note: this function returns an address
35 * which is a multiple of hugepage size.
38 #define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
40 static void *next_baseaddr;
41 static uint64_t system_page_sz;
43 #define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5
45 eal_get_virtual_area(void *requested_addr, size_t *size,
46 size_t page_sz, int flags, int reserve_flags)
48 bool addr_is_hint, allow_shrink, unmap, no_align;
50 void *mapped_addr, *aligned_addr;
52 struct internal_config *internal_conf =
53 eal_get_internal_configuration();
55 if (system_page_sz == 0)
56 system_page_sz = rte_mem_page_size();
58 RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
60 addr_is_hint = (flags & EAL_VIRTUAL_AREA_ADDR_IS_HINT) > 0;
61 allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0;
62 unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0;
64 if (next_baseaddr == NULL && internal_conf->base_virtaddr != 0 &&
65 rte_eal_process_type() == RTE_PROC_PRIMARY)
66 next_baseaddr = (void *) internal_conf->base_virtaddr;
69 if (next_baseaddr == NULL && internal_conf->base_virtaddr == 0 &&
70 rte_eal_process_type() == RTE_PROC_PRIMARY)
71 next_baseaddr = (void *) eal_get_baseaddr();
73 if (requested_addr == NULL && next_baseaddr != NULL) {
74 requested_addr = next_baseaddr;
75 requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
79 /* we don't need alignment of resulting pointer in the following cases:
81 * 1. page size is equal to system size
82 * 2. we have a requested address, and it is page-aligned, and we will
83 * be discarding the address if we get a different one.
85 * for all other cases, alignment is potentially necessary.
87 no_align = (requested_addr != NULL &&
88 requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) &&
90 page_sz == system_page_sz;
93 map_sz = no_align ? *size : *size + page_sz;
94 if (map_sz > SIZE_MAX) {
95 RTE_LOG(ERR, EAL, "Map size too big\n");
100 mapped_addr = eal_mem_reserve(
101 requested_addr, (size_t)map_sz, reserve_flags);
102 if ((mapped_addr == NULL) && allow_shrink)
105 if ((mapped_addr != NULL) && addr_is_hint &&
106 (mapped_addr != requested_addr)) {
108 next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
109 if (try <= MAX_MMAP_WITH_DEFINED_ADDR_TRIES) {
110 /* hint was not used. Try with another offset */
111 eal_mem_free(mapped_addr, map_sz);
113 requested_addr = next_baseaddr;
116 } while ((allow_shrink || addr_is_hint) &&
117 (mapped_addr == NULL) && (*size > 0));
119 /* align resulting address - if map failed, we will ignore the value
120 * anyway, so no need to add additional checks.
122 aligned_addr = no_align ? mapped_addr :
123 RTE_PTR_ALIGN(mapped_addr, page_sz);
126 RTE_LOG(ERR, EAL, "Cannot get a virtual area of any size: %s\n",
127 rte_strerror(rte_errno));
129 } else if (mapped_addr == NULL) {
130 RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n",
131 rte_strerror(rte_errno));
133 } else if (requested_addr != NULL && !addr_is_hint &&
134 aligned_addr != requested_addr) {
135 RTE_LOG(ERR, EAL, "Cannot get a virtual area at requested address: %p (got %p)\n",
136 requested_addr, aligned_addr);
137 eal_mem_free(mapped_addr, map_sz);
138 rte_errno = EADDRNOTAVAIL;
140 } else if (requested_addr != NULL && addr_is_hint &&
141 aligned_addr != requested_addr) {
143 * demote this warning to debug if we did not explicitly request
144 * a base virtual address.
146 if (internal_conf->base_virtaddr != 0) {
147 RTE_LOG(WARNING, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n",
148 requested_addr, aligned_addr);
149 RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory into secondary processes\n");
151 RTE_LOG(DEBUG, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n",
152 requested_addr, aligned_addr);
153 RTE_LOG(DEBUG, EAL, " This may cause issues with mapping memory into secondary processes\n");
155 } else if (next_baseaddr != NULL) {
156 next_baseaddr = RTE_PTR_ADD(aligned_addr, *size);
159 RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
160 aligned_addr, *size);
163 eal_mem_free(mapped_addr, map_sz);
164 } else if (!no_align) {
165 void *map_end, *aligned_end;
166 size_t before_len, after_len;
168 /* when we reserve space with alignment, we add alignment to
169 * mapping size. On 32-bit, if 1GB alignment was requested, this
170 * would waste 1GB of address space, which is a luxury we cannot
171 * afford. so, if alignment was performed, check if any unneeded
172 * address space can be unmapped back.
175 map_end = RTE_PTR_ADD(mapped_addr, (size_t)map_sz);
176 aligned_end = RTE_PTR_ADD(aligned_addr, *size);
178 /* unmap space before aligned mmap address */
179 before_len = RTE_PTR_DIFF(aligned_addr, mapped_addr);
181 eal_mem_free(mapped_addr, before_len);
183 /* unmap space after aligned end mmap address */
184 after_len = RTE_PTR_DIFF(map_end, aligned_end);
186 eal_mem_free(aligned_end, after_len);
190 /* Exclude these pages from a core dump. */
191 eal_mem_set_dump(aligned_addr, *size, false);
198 eal_memseg_list_init_named(struct rte_memseg_list *msl, const char *name,
199 uint64_t page_sz, int n_segs, int socket_id, bool heap)
201 if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
202 sizeof(struct rte_memseg))) {
203 RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
204 rte_strerror(rte_errno));
208 msl->page_sz = page_sz;
209 msl->socket_id = socket_id;
214 "Memseg list allocated at socket %i, page size 0x%"PRIx64"kB\n",
215 socket_id, page_sz >> 10);
221 eal_memseg_list_init(struct rte_memseg_list *msl, uint64_t page_sz,
222 int n_segs, int socket_id, int type_msl_idx, bool heap)
224 char name[RTE_FBARRAY_NAME_LEN];
226 snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
229 return eal_memseg_list_init_named(
230 msl, name, page_sz, n_segs, socket_id, heap);
234 eal_memseg_list_alloc(struct rte_memseg_list *msl, int reserve_flags)
236 size_t page_sz, mem_sz;
239 page_sz = msl->page_sz;
240 mem_sz = page_sz * msl->memseg_arr.len;
242 addr = eal_get_virtual_area(
243 msl->base_va, &mem_sz, page_sz, 0, reserve_flags);
245 #ifndef RTE_EXEC_ENV_WINDOWS
246 /* The hint would be misleading on Windows, because address
247 * is by default system-selected (base VA = 0).
248 * However, this function is called from many places,
249 * including common code, so don't duplicate the message.
251 if (rte_errno == EADDRNOTAVAIL)
252 RTE_LOG(ERR, EAL, "Cannot reserve %llu bytes at [%p] - "
253 "please use '--" OPT_BASE_VIRTADDR "' option\n",
254 (unsigned long long)mem_sz, msl->base_va);
261 RTE_LOG(DEBUG, EAL, "VA reserved for memseg list at %p, size %zx\n",
268 eal_memseg_list_populate(struct rte_memseg_list *msl, void *addr, int n_segs)
270 size_t page_sz = msl->page_sz;
273 for (i = 0; i < n_segs; i++) {
274 struct rte_fbarray *arr = &msl->memseg_arr;
275 struct rte_memseg *ms = rte_fbarray_get(arr, i);
277 if (rte_eal_iova_mode() == RTE_IOVA_VA)
278 ms->iova = (uintptr_t)addr;
280 ms->iova = RTE_BAD_IOVA;
282 ms->hugepage_sz = page_sz;
286 rte_fbarray_set_used(arr, i);
288 addr = RTE_PTR_ADD(addr, page_sz);
292 static struct rte_memseg *
293 virt2memseg(const void *addr, const struct rte_memseg_list *msl)
295 const struct rte_fbarray *arr;
302 /* a memseg list was specified, check if it's the right one */
303 start = msl->base_va;
304 end = RTE_PTR_ADD(start, msl->len);
306 if (addr < start || addr >= end)
309 /* now, calculate index */
310 arr = &msl->memseg_arr;
311 ms_idx = RTE_PTR_DIFF(addr, msl->base_va) / msl->page_sz;
312 return rte_fbarray_get(arr, ms_idx);
315 static struct rte_memseg_list *
316 virt2memseg_list(const void *addr)
318 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
319 struct rte_memseg_list *msl;
322 for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
324 msl = &mcfg->memsegs[msl_idx];
326 start = msl->base_va;
327 end = RTE_PTR_ADD(start, msl->len);
328 if (addr >= start && addr < end)
331 /* if we didn't find our memseg list */
332 if (msl_idx == RTE_MAX_MEMSEG_LISTS)
337 struct rte_memseg_list *
338 rte_mem_virt2memseg_list(const void *addr)
340 return virt2memseg_list(addr);
348 find_virt(const struct rte_memseg_list *msl __rte_unused,
349 const struct rte_memseg *ms, void *arg)
351 struct virtiova *vi = arg;
352 if (vi->iova >= ms->iova && vi->iova < (ms->iova + ms->len)) {
353 size_t offset = vi->iova - ms->iova;
354 vi->virt = RTE_PTR_ADD(ms->addr, offset);
361 find_virt_legacy(const struct rte_memseg_list *msl __rte_unused,
362 const struct rte_memseg *ms, size_t len, void *arg)
364 struct virtiova *vi = arg;
365 if (vi->iova >= ms->iova && vi->iova < (ms->iova + len)) {
366 size_t offset = vi->iova - ms->iova;
367 vi->virt = RTE_PTR_ADD(ms->addr, offset);
375 rte_mem_iova2virt(rte_iova_t iova)
378 const struct internal_config *internal_conf =
379 eal_get_internal_configuration();
381 memset(&vi, 0, sizeof(vi));
384 /* for legacy mem, we can get away with scanning VA-contiguous segments,
385 * as we know they are PA-contiguous as well
387 if (internal_conf->legacy_mem)
388 rte_memseg_contig_walk(find_virt_legacy, &vi);
390 rte_memseg_walk(find_virt, &vi);
396 rte_mem_virt2memseg(const void *addr, const struct rte_memseg_list *msl)
398 return virt2memseg(addr, msl != NULL ? msl :
399 rte_mem_virt2memseg_list(addr));
403 physmem_size(const struct rte_memseg_list *msl, void *arg)
405 uint64_t *total_len = arg;
410 *total_len += msl->memseg_arr.count * msl->page_sz;
415 /* get the total size of memory */
417 rte_eal_get_physmem_size(void)
419 uint64_t total_len = 0;
421 rte_memseg_list_walk(physmem_size, &total_len);
427 dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
430 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
431 int msl_idx, ms_idx, fd;
434 msl_idx = msl - mcfg->memsegs;
435 if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
438 ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
442 fd = eal_memalloc_get_seg_fd(msl_idx, ms_idx);
443 fprintf(f, "Segment %i-%i: IOVA:0x%"PRIx64", len:%zu, "
444 "virt:%p, socket_id:%"PRId32", "
445 "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
446 "nrank:%"PRIx32" fd:%i\n",
461 * Defining here because declared in rte_memory.h, but the actual implementation
462 * is in eal_common_memalloc.c, like all other memalloc internals.
465 rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
468 const struct internal_config *internal_conf =
469 eal_get_internal_configuration();
471 /* FreeBSD boots with legacy mem enabled by default */
472 if (internal_conf->legacy_mem) {
473 RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n");
477 return eal_memalloc_mem_event_callback_register(name, clb, arg);
481 rte_mem_event_callback_unregister(const char *name, void *arg)
483 const struct internal_config *internal_conf =
484 eal_get_internal_configuration();
486 /* FreeBSD boots with legacy mem enabled by default */
487 if (internal_conf->legacy_mem) {
488 RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n");
492 return eal_memalloc_mem_event_callback_unregister(name, arg);
496 rte_mem_alloc_validator_register(const char *name,
497 rte_mem_alloc_validator_t clb, int socket_id, size_t limit)
499 const struct internal_config *internal_conf =
500 eal_get_internal_configuration();
502 /* FreeBSD boots with legacy mem enabled by default */
503 if (internal_conf->legacy_mem) {
504 RTE_LOG(DEBUG, EAL, "Registering mem alloc validators not supported\n");
508 return eal_memalloc_mem_alloc_validator_register(name, clb, socket_id,
513 rte_mem_alloc_validator_unregister(const char *name, int socket_id)
515 const struct internal_config *internal_conf =
516 eal_get_internal_configuration();
518 /* FreeBSD boots with legacy mem enabled by default */
519 if (internal_conf->legacy_mem) {
520 RTE_LOG(DEBUG, EAL, "Registering mem alloc validators not supported\n");
524 return eal_memalloc_mem_alloc_validator_unregister(name, socket_id);
527 /* Dump the physical memory layout on console */
529 rte_dump_physmem_layout(FILE *f)
531 rte_memseg_walk(dump_memseg, f);
535 check_iova(const struct rte_memseg_list *msl __rte_unused,
536 const struct rte_memseg *ms, void *arg)
538 uint64_t *mask = arg;
541 /* higher address within segment */
542 iova = (ms->iova + ms->len) - 1;
546 RTE_LOG(DEBUG, EAL, "memseg iova %"PRIx64", len %zx, out of range\n",
549 RTE_LOG(DEBUG, EAL, "\tusing dma mask %"PRIx64"\n", *mask);
553 #define MAX_DMA_MASK_BITS 63
555 /* check memseg iovas are within the required range based on dma mask */
557 check_dma_mask(uint8_t maskbits, bool thread_unsafe)
559 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
563 /* Sanity check. We only check width can be managed with 64 bits
564 * variables. Indeed any higher value is likely wrong. */
565 if (maskbits > MAX_DMA_MASK_BITS) {
566 RTE_LOG(ERR, EAL, "wrong dma mask size %u (Max: %u)\n",
567 maskbits, MAX_DMA_MASK_BITS);
571 /* create dma mask */
572 mask = ~((1ULL << maskbits) - 1);
575 ret = rte_memseg_walk_thread_unsafe(check_iova, &mask);
577 ret = rte_memseg_walk(check_iova, &mask);
581 * Dma mask precludes hugepage usage.
582 * This device can not be used and we do not need to keep
588 * we need to keep the more restricted maskbit for checking
589 * potential dynamic memory allocation in the future.
591 mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits :
592 RTE_MIN(mcfg->dma_maskbits, maskbits);
598 rte_mem_check_dma_mask(uint8_t maskbits)
600 return check_dma_mask(maskbits, false);
604 rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits)
606 return check_dma_mask(maskbits, true);
610 * Set dma mask to use when memory initialization is done.
612 * This function should ONLY be used by code executed before the memory
613 * initialization. PMDs should use rte_mem_check_dma_mask if addressing
614 * limitations by the device.
617 rte_mem_set_dma_mask(uint8_t maskbits)
619 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
621 mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits :
622 RTE_MIN(mcfg->dma_maskbits, maskbits);
625 /* return the number of memory channels */
626 unsigned rte_memory_get_nchannel(void)
628 return rte_eal_get_configuration()->mem_config->nchannel;
631 /* return the number of memory rank */
632 unsigned rte_memory_get_nrank(void)
634 return rte_eal_get_configuration()->mem_config->nrank;
638 rte_eal_memdevice_init(void)
640 struct rte_config *config;
641 const struct internal_config *internal_conf;
643 if (rte_eal_process_type() == RTE_PROC_SECONDARY)
646 internal_conf = eal_get_internal_configuration();
647 config = rte_eal_get_configuration();
648 config->mem_config->nchannel = internal_conf->force_nchannel;
649 config->mem_config->nrank = internal_conf->force_nrank;
654 /* Lock page in physical memory and prevent from swapping. */
656 rte_mem_lock_page(const void *virt)
658 uintptr_t virtual = (uintptr_t)virt;
659 size_t page_size = rte_mem_page_size();
660 uintptr_t aligned = RTE_PTR_ALIGN_FLOOR(virtual, page_size);
661 return rte_mem_lock((void *)aligned, page_size);
665 rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg)
667 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
668 int i, ms_idx, ret = 0;
670 for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
671 struct rte_memseg_list *msl = &mcfg->memsegs[i];
672 const struct rte_memseg *ms;
673 struct rte_fbarray *arr;
675 if (msl->memseg_arr.count == 0)
678 arr = &msl->memseg_arr;
680 ms_idx = rte_fbarray_find_next_used(arr, 0);
681 while (ms_idx >= 0) {
685 ms = rte_fbarray_get(arr, ms_idx);
687 /* find how many more segments there are, starting with
690 n_segs = rte_fbarray_find_contig_used(arr, ms_idx);
691 len = n_segs * msl->page_sz;
693 ret = func(msl, ms, len, arg);
696 ms_idx = rte_fbarray_find_next_used(arr,
704 rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
708 /* do not allow allocations/frees/init while we iterate */
709 rte_mcfg_mem_read_lock();
710 ret = rte_memseg_contig_walk_thread_unsafe(func, arg);
711 rte_mcfg_mem_read_unlock();
717 rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg)
719 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
720 int i, ms_idx, ret = 0;
722 for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
723 struct rte_memseg_list *msl = &mcfg->memsegs[i];
724 const struct rte_memseg *ms;
725 struct rte_fbarray *arr;
727 if (msl->memseg_arr.count == 0)
730 arr = &msl->memseg_arr;
732 ms_idx = rte_fbarray_find_next_used(arr, 0);
733 while (ms_idx >= 0) {
734 ms = rte_fbarray_get(arr, ms_idx);
735 ret = func(msl, ms, arg);
738 ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1);
745 rte_memseg_walk(rte_memseg_walk_t func, void *arg)
749 /* do not allow allocations/frees/init while we iterate */
750 rte_mcfg_mem_read_lock();
751 ret = rte_memseg_walk_thread_unsafe(func, arg);
752 rte_mcfg_mem_read_unlock();
758 rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg)
760 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
763 for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
764 struct rte_memseg_list *msl = &mcfg->memsegs[i];
766 if (msl->base_va == NULL)
769 ret = func(msl, arg);
777 rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
781 /* do not allow allocations/frees/init while we iterate */
782 rte_mcfg_mem_read_lock();
783 ret = rte_memseg_list_walk_thread_unsafe(func, arg);
784 rte_mcfg_mem_read_unlock();
790 rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms)
792 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
793 struct rte_memseg_list *msl;
794 struct rte_fbarray *arr;
795 int msl_idx, seg_idx, ret;
802 msl = rte_mem_virt2memseg_list(ms->addr);
807 arr = &msl->memseg_arr;
809 msl_idx = msl - mcfg->memsegs;
810 seg_idx = rte_fbarray_find_idx(arr, ms);
812 if (!rte_fbarray_is_used(arr, seg_idx)) {
817 /* segment fd API is not supported for external segments */
823 ret = eal_memalloc_get_seg_fd(msl_idx, seg_idx);
832 rte_memseg_get_fd(const struct rte_memseg *ms)
836 rte_mcfg_mem_read_lock();
837 ret = rte_memseg_get_fd_thread_unsafe(ms);
838 rte_mcfg_mem_read_unlock();
844 rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
847 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
848 struct rte_memseg_list *msl;
849 struct rte_fbarray *arr;
850 int msl_idx, seg_idx, ret;
852 if (ms == NULL || offset == NULL) {
857 msl = rte_mem_virt2memseg_list(ms->addr);
862 arr = &msl->memseg_arr;
864 msl_idx = msl - mcfg->memsegs;
865 seg_idx = rte_fbarray_find_idx(arr, ms);
867 if (!rte_fbarray_is_used(arr, seg_idx)) {
872 /* segment fd API is not supported for external segments */
878 ret = eal_memalloc_get_seg_fd_offset(msl_idx, seg_idx, offset);
887 rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset)
891 rte_mcfg_mem_read_lock();
892 ret = rte_memseg_get_fd_offset_thread_unsafe(ms, offset);
893 rte_mcfg_mem_read_unlock();
899 rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[],
900 unsigned int n_pages, size_t page_sz)
902 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
903 unsigned int socket_id, n;
906 if (va_addr == NULL || page_sz == 0 || len == 0 ||
907 !rte_is_power_of_2(page_sz) ||
908 RTE_ALIGN(len, page_sz) != len ||
909 ((len / page_sz) != n_pages && iova_addrs != NULL) ||
910 !rte_is_aligned(va_addr, page_sz)) {
914 rte_mcfg_mem_write_lock();
916 /* make sure the segment doesn't already exist */
917 if (malloc_heap_find_external_seg(va_addr, len) != NULL) {
923 /* get next available socket ID */
924 socket_id = mcfg->next_socket_id;
925 if (socket_id > INT32_MAX) {
926 RTE_LOG(ERR, EAL, "Cannot assign new socket ID's\n");
932 /* we can create a new memseg */
934 if (malloc_heap_create_external_seg(va_addr, iova_addrs, n,
935 page_sz, "extmem", socket_id) == NULL) {
940 /* memseg list successfully created - increment next socket ID */
941 mcfg->next_socket_id++;
943 rte_mcfg_mem_write_unlock();
948 rte_extmem_unregister(void *va_addr, size_t len)
950 struct rte_memseg_list *msl;
953 if (va_addr == NULL || len == 0) {
957 rte_mcfg_mem_write_lock();
959 /* find our segment */
960 msl = malloc_heap_find_external_seg(va_addr, len);
967 ret = malloc_heap_destroy_external_seg(msl);
969 rte_mcfg_mem_write_unlock();
974 sync_memory(void *va_addr, size_t len, bool attach)
976 struct rte_memseg_list *msl;
979 if (va_addr == NULL || len == 0) {
983 rte_mcfg_mem_write_lock();
985 /* find our segment */
986 msl = malloc_heap_find_external_seg(va_addr, len);
993 ret = rte_fbarray_attach(&msl->memseg_arr);
995 ret = rte_fbarray_detach(&msl->memseg_arr);
998 rte_mcfg_mem_write_unlock();
1003 rte_extmem_attach(void *va_addr, size_t len)
1005 return sync_memory(va_addr, len, true);
1009 rte_extmem_detach(void *va_addr, size_t len)
1011 return sync_memory(va_addr, len, false);
1014 /* detach all EAL memory */
1016 rte_eal_memory_detach(void)
1018 const struct internal_config *internal_conf =
1019 eal_get_internal_configuration();
1020 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1021 size_t page_sz = rte_mem_page_size();
1024 if (internal_conf->in_memory == 1)
1027 rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
1029 /* detach internal memory subsystem data first */
1030 if (eal_memalloc_cleanup())
1031 RTE_LOG(ERR, EAL, "Could not release memory subsystem data\n");
1033 for (i = 0; i < RTE_DIM(mcfg->memsegs); i++) {
1034 struct rte_memseg_list *msl = &mcfg->memsegs[i];
1036 /* skip uninitialized segments */
1037 if (msl->base_va == NULL)
1040 * external segments are supposed to be detached at this point,
1041 * but if they aren't, we can't really do anything about it,
1042 * because if we skip them here, they'll become invalid after
1043 * we unmap the memconfig anyway. however, if this is externally
1044 * referenced memory, we have no business unmapping it.
1047 if (rte_mem_unmap(msl->base_va, msl->len) != 0)
1048 RTE_LOG(ERR, EAL, "Could not unmap memory: %s\n",
1049 rte_strerror(rte_errno));
1052 * we are detaching the fbarray rather than destroying because
1053 * other processes might still reference this fbarray, and we
1054 * have no way of knowing if they still do.
1056 if (rte_fbarray_detach(&msl->memseg_arr))
1057 RTE_LOG(ERR, EAL, "Could not detach fbarray: %s\n",
1058 rte_strerror(rte_errno));
1060 rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
1063 * we've detached the memseg lists, so we can unmap the shared mem
1064 * config - we can't zero it out because it might still be referenced
1065 * by other processes.
1067 if (internal_conf->no_shconf == 0 && mcfg->mem_cfg_addr != 0) {
1068 if (rte_mem_unmap(mcfg, RTE_ALIGN(sizeof(*mcfg), page_sz)) != 0)
1069 RTE_LOG(ERR, EAL, "Could not unmap shared memory config: %s\n",
1070 rte_strerror(rte_errno));
1072 rte_eal_get_configuration()->mem_config = NULL;
1077 /* init memory subsystem */
1079 rte_eal_memory_init(void)
1081 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1082 const struct internal_config *internal_conf =
1083 eal_get_internal_configuration();
1086 RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n");
1091 /* lock mem hotplug here, to prevent races while we init */
1092 rte_mcfg_mem_read_lock();
1094 if (rte_eal_memseg_init() < 0)
1097 if (eal_memalloc_init() < 0)
1100 retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
1101 rte_eal_hugepage_init() :
1102 rte_eal_hugepage_attach();
1106 if (internal_conf->no_shconf == 0 && rte_eal_memdevice_init() < 0)
1111 rte_mcfg_mem_read_unlock();
1115 #ifndef RTE_EXEC_ENV_WINDOWS
1116 #define EAL_MEMZONE_LIST_REQ "/eal/memzone_list"
1117 #define EAL_MEMZONE_INFO_REQ "/eal/memzone_info"
1118 #define EAL_HEAP_LIST_REQ "/eal/heap_list"
1119 #define EAL_HEAP_INFO_REQ "/eal/heap_info"
1122 /* Telemetry callback handler to return heap stats for requested heap id. */
1124 handle_eal_heap_info_request(const char *cmd __rte_unused, const char *params,
1125 struct rte_tel_data *d)
1127 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1128 struct rte_malloc_socket_stats sock_stats;
1129 struct malloc_heap *heap;
1130 unsigned int heap_id;
1132 if (params == NULL || strlen(params) == 0)
1135 heap_id = (unsigned int)strtoul(params, NULL, 10);
1137 /* Get the heap stats of user provided heap id */
1138 heap = &mcfg->malloc_heaps[heap_id];
1139 malloc_heap_get_stats(heap, &sock_stats);
1141 rte_tel_data_start_dict(d);
1142 rte_tel_data_add_dict_int(d, "Head id", heap_id);
1143 rte_tel_data_add_dict_string(d, "Name", heap->name);
1144 rte_tel_data_add_dict_u64(d, "Heap_size",
1145 sock_stats.heap_totalsz_bytes);
1146 rte_tel_data_add_dict_u64(d, "Free_size", sock_stats.heap_freesz_bytes);
1147 rte_tel_data_add_dict_u64(d, "Alloc_size",
1148 sock_stats.heap_allocsz_bytes);
1149 rte_tel_data_add_dict_u64(d, "Greatest_free_size",
1150 sock_stats.greatest_free_size);
1151 rte_tel_data_add_dict_u64(d, "Alloc_count", sock_stats.alloc_count);
1152 rte_tel_data_add_dict_u64(d, "Free_count", sock_stats.free_count);
1157 /* Telemetry callback handler to list the heap ids setup. */
1159 handle_eal_heap_list_request(const char *cmd __rte_unused,
1160 const char *params __rte_unused,
1161 struct rte_tel_data *d)
1163 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1164 struct rte_malloc_socket_stats sock_stats;
1165 unsigned int heap_id;
1167 rte_tel_data_start_array(d, RTE_TEL_INT_VAL);
1168 /* Iterate through all initialised heaps */
1169 for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) {
1170 struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
1172 malloc_heap_get_stats(heap, &sock_stats);
1173 if (sock_stats.heap_totalsz_bytes != 0)
1174 rte_tel_data_add_array_int(d, heap_id);
1180 /* Telemetry callback handler to return memzone info for requested index. */
1182 handle_eal_memzone_info_request(const char *cmd __rte_unused,
1183 const char *params, struct rte_tel_data *d)
1185 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1186 struct rte_memseg_list *msl = NULL;
1187 int ms_idx, ms_count = 0;
1188 void *cur_addr, *mz_end;
1189 struct rte_memzone *mz;
1190 struct rte_memseg *ms;
1191 char addr[ADDR_STR];
1192 unsigned int mz_idx;
1195 if (params == NULL || strlen(params) == 0)
1198 mz_idx = strtoul(params, NULL, 10);
1200 /* Get the memzone handle using index */
1201 mz = rte_fbarray_get(&mcfg->memzones, mz_idx);
1203 rte_tel_data_start_dict(d);
1204 rte_tel_data_add_dict_int(d, "Zone", mz_idx);
1205 rte_tel_data_add_dict_string(d, "Name", mz->name);
1206 rte_tel_data_add_dict_int(d, "Length", mz->len);
1207 snprintf(addr, ADDR_STR, "%p", mz->addr);
1208 rte_tel_data_add_dict_string(d, "Address", addr);
1209 rte_tel_data_add_dict_int(d, "Socket", mz->socket_id);
1210 rte_tel_data_add_dict_int(d, "Flags", mz->flags);
1212 /* go through each page occupied by this memzone */
1213 msl = rte_mem_virt2memseg_list(mz->addr);
1215 RTE_LOG(DEBUG, EAL, "Skipping bad memzone\n");
1218 page_sz = (size_t)mz->hugepage_sz;
1219 cur_addr = RTE_PTR_ALIGN_FLOOR(mz->addr, page_sz);
1220 mz_end = RTE_PTR_ADD(cur_addr, mz->len);
1222 ms_idx = RTE_PTR_DIFF(mz->addr, msl->base_va) / page_sz;
1223 ms = rte_fbarray_get(&msl->memseg_arr, ms_idx);
1225 rte_tel_data_add_dict_int(d, "Hugepage_size", page_sz);
1226 snprintf(addr, ADDR_STR, "%p", ms->addr);
1227 rte_tel_data_add_dict_string(d, "Hugepage_base", addr);
1230 /* advance VA to next page */
1231 cur_addr = RTE_PTR_ADD(cur_addr, page_sz);
1233 /* memzones occupy contiguous segments */
1236 } while (cur_addr < mz_end);
1238 rte_tel_data_add_dict_int(d, "Hugepage_used", ms_count);
1244 memzone_list_cb(const struct rte_memzone *mz __rte_unused,
1245 void *arg __rte_unused)
1247 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1248 struct rte_tel_data *d = arg;
1251 mz_idx = rte_fbarray_find_idx(&mcfg->memzones, mz);
1252 rte_tel_data_add_array_int(d, mz_idx);
1256 /* Telemetry callback handler to list the memzones reserved. */
1258 handle_eal_memzone_list_request(const char *cmd __rte_unused,
1259 const char *params __rte_unused,
1260 struct rte_tel_data *d)
1262 rte_tel_data_start_array(d, RTE_TEL_INT_VAL);
1263 rte_memzone_walk(memzone_list_cb, d);
1268 RTE_INIT(memory_telemetry)
1270 rte_telemetry_register_cmd(
1271 EAL_MEMZONE_LIST_REQ, handle_eal_memzone_list_request,
1272 "List of memzone index reserved. Takes no parameters");
1273 rte_telemetry_register_cmd(
1274 EAL_MEMZONE_INFO_REQ, handle_eal_memzone_info_request,
1275 "Returns memzone info. Parameters: int mz_id");
1276 rte_telemetry_register_cmd(
1277 EAL_HEAP_LIST_REQ, handle_eal_heap_list_request,
1278 "List of heap index setup. Takes no parameters");
1279 rte_telemetry_register_cmd(
1280 EAL_HEAP_INFO_REQ, handle_eal_heap_info_request,
1281 "Returns malloc heap stats. Parameters: int heap_id");