1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation.
3 * Copyright(c) 2013 6WIND S.A.
10 #include <rte_string_fns.h>
12 #include "eal_internal_cfg.h"
13 #include "eal_memalloc.h"
14 #include "eal_memcfg.h"
15 #include "eal_private.h"
17 /** @file Functions common to EALs that support dynamic memory allocation. */
20 eal_dynmem_memseg_lists_init(void)
22 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
27 int i, hpi_idx, msl_idx, ret = -1; /* fail unless told to succeed */
28 struct rte_memseg_list *msl;
29 uint64_t max_mem, max_mem_per_type;
30 unsigned int max_seglists_per_type;
31 unsigned int n_memtypes, cur_type;
33 /* no-huge does not need this at all */
34 if (internal_config.no_hugetlbfs)
38 * figuring out amount of memory we're going to have is a long and very
39 * involved process. the basic element we're operating with is a memory
40 * type, defined as a combination of NUMA node ID and page size (so that
41 * e.g. 2 sockets with 2 page sizes yield 4 memory types in total).
43 * deciding amount of memory going towards each memory type is a
44 * balancing act between maximum segments per type, maximum memory per
45 * type, and number of detected NUMA nodes. the goal is to make sure
46 * each memory type gets at least one memseg list.
48 * the total amount of memory is limited by RTE_MAX_MEM_MB value.
50 * the total amount of memory per type is limited by either
51 * RTE_MAX_MEM_MB_PER_TYPE, or by RTE_MAX_MEM_MB divided by the number
52 * of detected NUMA nodes. additionally, maximum number of segments per
53 * type is also limited by RTE_MAX_MEMSEG_PER_TYPE. this is because for
54 * smaller page sizes, it can take hundreds of thousands of segments to
55 * reach the above specified per-type memory limits.
57 * additionally, each type may have multiple memseg lists associated
58 * with it, each limited by either RTE_MAX_MEM_MB_PER_LIST for bigger
59 * page sizes, or RTE_MAX_MEMSEG_PER_LIST segments for smaller ones.
61 * the number of memseg lists per type is decided based on the above
62 * limits, and also taking number of detected NUMA nodes, to make sure
63 * that we don't run out of memseg lists before we populate all NUMA
66 * we do this in three stages. first, we collect the number of types.
67 * then, we figure out memory constraints and populate the list of
68 * would-be memseg lists. then, we go ahead and allocate the memseg
72 /* create space for mem types */
73 n_memtypes = internal_config.num_hugepage_sizes * rte_socket_count();
74 memtypes = calloc(n_memtypes, sizeof(*memtypes));
75 if (memtypes == NULL) {
76 RTE_LOG(ERR, EAL, "Cannot allocate space for memory types\n");
80 /* populate mem types */
82 for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
84 struct hugepage_info *hpi;
87 hpi = &internal_config.hugepage_info[hpi_idx];
88 hugepage_sz = hpi->hugepage_sz;
90 for (i = 0; i < (int) rte_socket_count(); i++, cur_type++) {
91 int socket_id = rte_socket_id_by_idx(i);
93 #ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
94 /* we can still sort pages by socket in legacy mode */
95 if (!internal_config.legacy_mem && socket_id > 0)
98 memtypes[cur_type].page_sz = hugepage_sz;
99 memtypes[cur_type].socket_id = socket_id;
101 RTE_LOG(DEBUG, EAL, "Detected memory type: "
102 "socket_id:%u hugepage_sz:%" PRIu64 "\n",
103 socket_id, hugepage_sz);
106 /* number of memtypes could have been lower due to no NUMA support */
107 n_memtypes = cur_type;
109 /* set up limits for types */
110 max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
111 max_mem_per_type = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20,
112 max_mem / n_memtypes);
114 * limit maximum number of segment lists per type to ensure there's
115 * space for memseg lists for all NUMA nodes with all page sizes
117 max_seglists_per_type = RTE_MAX_MEMSEG_LISTS / n_memtypes;
119 if (max_seglists_per_type == 0) {
120 RTE_LOG(ERR, EAL, "Cannot accommodate all memory types, please increase %s\n",
121 RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
125 /* go through all mem types and create segment lists */
127 for (cur_type = 0; cur_type < n_memtypes; cur_type++) {
128 unsigned int cur_seglist, n_seglists, n_segs;
129 unsigned int max_segs_per_type, max_segs_per_list;
130 struct memtype *type = &memtypes[cur_type];
131 uint64_t max_mem_per_list, pagesz;
134 pagesz = type->page_sz;
135 socket_id = type->socket_id;
138 * we need to create segment lists for this type. we must take
139 * into account the following things:
141 * 1. total amount of memory we can use for this memory type
142 * 2. total amount of memory per memseg list allowed
143 * 3. number of segments needed to fit the amount of memory
144 * 4. number of segments allowed per type
145 * 5. number of segments allowed per memseg list
146 * 6. number of memseg lists we are allowed to take up
149 /* calculate how much segments we will need in total */
150 max_segs_per_type = max_mem_per_type / pagesz;
151 /* limit number of segments to maximum allowed per type */
152 max_segs_per_type = RTE_MIN(max_segs_per_type,
153 (unsigned int)RTE_MAX_MEMSEG_PER_TYPE);
154 /* limit number of segments to maximum allowed per list */
155 max_segs_per_list = RTE_MIN(max_segs_per_type,
156 (unsigned int)RTE_MAX_MEMSEG_PER_LIST);
158 /* calculate how much memory we can have per segment list */
159 max_mem_per_list = RTE_MIN(max_segs_per_list * pagesz,
160 (uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20);
162 /* calculate how many segments each segment list will have */
163 n_segs = RTE_MIN(max_segs_per_list, max_mem_per_list / pagesz);
165 /* calculate how many segment lists we can have */
166 n_seglists = RTE_MIN(max_segs_per_type / n_segs,
167 max_mem_per_type / max_mem_per_list);
169 /* limit number of segment lists according to our maximum */
170 n_seglists = RTE_MIN(n_seglists, max_seglists_per_type);
172 RTE_LOG(DEBUG, EAL, "Creating %i segment lists: "
173 "n_segs:%i socket_id:%i hugepage_sz:%" PRIu64 "\n",
174 n_seglists, n_segs, socket_id, pagesz);
176 /* create all segment lists */
177 for (cur_seglist = 0; cur_seglist < n_seglists; cur_seglist++) {
178 if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
180 "No more space in memseg lists, please increase %s\n",
181 RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
184 msl = &mcfg->memsegs[msl_idx++];
186 if (eal_memseg_list_init(msl, pagesz, n_segs,
187 socket_id, cur_seglist, true))
190 if (eal_memseg_list_alloc(msl, 0)) {
191 RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
196 /* we're successful */
203 static int __rte_unused
204 hugepage_count_walk(const struct rte_memseg_list *msl, void *arg)
206 struct hugepage_info *hpi = arg;
208 if (msl->page_sz != hpi->hugepage_sz)
211 hpi->num_pages[msl->socket_id] += msl->memseg_arr.len;
216 limits_callback(int socket_id, size_t cur_limit, size_t new_len)
218 RTE_SET_USED(socket_id);
219 RTE_SET_USED(cur_limit);
220 RTE_SET_USED(new_len);
225 eal_dynmem_hugepage_init(void)
227 struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
228 uint64_t memory[RTE_MAX_NUMA_NODES];
229 int hp_sz_idx, socket_id;
231 memset(used_hp, 0, sizeof(used_hp));
234 hp_sz_idx < (int) internal_config.num_hugepage_sizes;
237 struct hugepage_info dummy;
240 /* also initialize used_hp hugepage sizes in used_hp */
241 struct hugepage_info *hpi;
242 hpi = &internal_config.hugepage_info[hp_sz_idx];
243 used_hp[hp_sz_idx].hugepage_sz = hpi->hugepage_sz;
246 /* for 32-bit, limit number of pages on socket to whatever we've
247 * preallocated, as we cannot allocate more.
249 memset(&dummy, 0, sizeof(dummy));
250 dummy.hugepage_sz = hpi->hugepage_sz;
251 if (rte_memseg_list_walk(hugepage_count_walk, &dummy) < 0)
254 for (i = 0; i < RTE_DIM(dummy.num_pages); i++) {
255 hpi->num_pages[i] = RTE_MIN(hpi->num_pages[i],
261 /* make a copy of socket_mem, needed for balanced allocation. */
262 for (hp_sz_idx = 0; hp_sz_idx < RTE_MAX_NUMA_NODES; hp_sz_idx++)
263 memory[hp_sz_idx] = internal_config.socket_mem[hp_sz_idx];
265 /* calculate final number of pages */
266 if (eal_dynmem_calc_num_pages_per_socket(memory,
267 internal_config.hugepage_info, used_hp,
268 internal_config.num_hugepage_sizes) < 0)
272 hp_sz_idx < (int)internal_config.num_hugepage_sizes;
274 for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES;
276 struct rte_memseg **pages;
277 struct hugepage_info *hpi = &used_hp[hp_sz_idx];
278 unsigned int num_pages = hpi->num_pages[socket_id];
279 unsigned int num_pages_alloc;
285 "Allocating %u pages of size %" PRIu64 "M "
287 num_pages, hpi->hugepage_sz >> 20, socket_id);
289 /* we may not be able to allocate all pages in one go,
290 * because we break up our memory map into multiple
291 * memseg lists. therefore, try allocating multiple
292 * times and see if we can get the desired number of
293 * pages from multiple allocations.
298 int i, cur_pages, needed;
300 needed = num_pages - num_pages_alloc;
302 pages = malloc(sizeof(*pages) * needed);
304 /* do not request exact number of pages */
305 cur_pages = eal_memalloc_alloc_seg_bulk(pages,
306 needed, hpi->hugepage_sz,
308 if (cur_pages <= 0) {
313 /* mark preallocated pages as unfreeable */
314 for (i = 0; i < cur_pages; i++) {
315 struct rte_memseg *ms = pages[i];
317 RTE_MEMSEG_FLAG_DO_NOT_FREE;
321 num_pages_alloc += cur_pages;
322 } while (num_pages_alloc != num_pages);
326 /* if socket limits were specified, set them */
327 if (internal_config.force_socket_limits) {
329 for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
330 uint64_t limit = internal_config.socket_limit[i];
333 if (rte_mem_alloc_validator_register("socket-limit",
334 limits_callback, i, limit))
335 RTE_LOG(ERR, EAL, "Failed to register socket limits validator callback\n");
341 __rte_unused /* function is unused on 32-bit builds */
342 static inline uint64_t
343 get_socket_mem_size(int socket)
348 for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
349 struct hugepage_info *hpi = &internal_config.hugepage_info[i];
350 size += hpi->hugepage_sz * hpi->num_pages[socket];
357 eal_dynmem_calc_num_pages_per_socket(
358 uint64_t *memory, struct hugepage_info *hp_info,
359 struct hugepage_info *hp_used, unsigned int num_hp_info)
361 unsigned int socket, j, i = 0;
362 unsigned int requested, available;
363 int total_num_pages = 0;
364 uint64_t remaining_mem, cur_mem;
365 uint64_t total_mem = internal_config.memory;
367 if (num_hp_info == 0)
370 /* if specific memory amounts per socket weren't requested */
371 if (internal_config.force_sockets == 0) {
374 int cpu_per_socket[RTE_MAX_NUMA_NODES];
376 unsigned int lcore_id;
378 /* Compute number of cores per socket */
379 memset(cpu_per_socket, 0, sizeof(cpu_per_socket));
380 RTE_LCORE_FOREACH(lcore_id) {
381 cpu_per_socket[rte_lcore_to_socket_id(lcore_id)]++;
385 * Automatically spread requested memory amongst detected
386 * sockets according to number of cores from CPU mask present
389 total_size = internal_config.memory;
390 for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
393 /* Set memory amount per socket */
394 default_size = internal_config.memory *
395 cpu_per_socket[socket] / rte_lcore_count();
397 /* Limit to maximum available memory on socket */
398 default_size = RTE_MIN(
399 default_size, get_socket_mem_size(socket));
402 memory[socket] = default_size;
403 total_size -= default_size;
407 * If some memory is remaining, try to allocate it by getting
408 * all available memory from sockets, one after the other.
410 for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
412 /* take whatever is available */
413 default_size = RTE_MIN(
414 get_socket_mem_size(socket) - memory[socket],
418 memory[socket] += default_size;
419 total_size -= default_size;
422 /* in 32-bit mode, allocate all of the memory only on master
425 total_size = internal_config.memory;
426 for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
428 struct rte_config *cfg = rte_eal_get_configuration();
429 unsigned int master_lcore_socket;
431 master_lcore_socket =
432 rte_lcore_to_socket_id(cfg->master_lcore);
434 if (master_lcore_socket != socket)
438 memory[socket] = total_size;
444 for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0;
446 /* skips if the memory on specific socket wasn't requested */
447 for (i = 0; i < num_hp_info && memory[socket] != 0; i++) {
448 rte_strscpy(hp_used[i].hugedir, hp_info[i].hugedir,
449 sizeof(hp_used[i].hugedir));
450 hp_used[i].num_pages[socket] = RTE_MIN(
451 memory[socket] / hp_info[i].hugepage_sz,
452 hp_info[i].num_pages[socket]);
454 cur_mem = hp_used[i].num_pages[socket] *
455 hp_used[i].hugepage_sz;
457 memory[socket] -= cur_mem;
458 total_mem -= cur_mem;
460 total_num_pages += hp_used[i].num_pages[socket];
462 /* check if we have met all memory requests */
463 if (memory[socket] == 0)
466 /* Check if we have any more pages left at this size,
467 * if so, move on to next size.
469 if (hp_used[i].num_pages[socket] ==
470 hp_info[i].num_pages[socket])
472 /* At this point we know that there are more pages
473 * available that are bigger than the memory we want,
474 * so lets see if we can get enough from other page
478 for (j = i+1; j < num_hp_info; j++)
479 remaining_mem += hp_info[j].hugepage_sz *
480 hp_info[j].num_pages[socket];
482 /* Is there enough other memory?
483 * If not, allocate another page and quit.
485 if (remaining_mem < memory[socket]) {
487 memory[socket], hp_info[i].hugepage_sz);
488 memory[socket] -= cur_mem;
489 total_mem -= cur_mem;
490 hp_used[i].num_pages[socket]++;
492 break; /* we are done with this socket*/
496 /* if we didn't satisfy all memory requirements per socket */
497 if (memory[socket] > 0 &&
498 internal_config.socket_mem[socket] != 0) {
499 /* to prevent icc errors */
500 requested = (unsigned int)(
501 internal_config.socket_mem[socket] / 0x100000);
502 available = requested -
503 ((unsigned int)(memory[socket] / 0x100000));
504 RTE_LOG(ERR, EAL, "Not enough memory available on "
505 "socket %u! Requested: %uMB, available: %uMB\n",
506 socket, requested, available);
511 /* if we didn't satisfy total memory requirements */
513 requested = (unsigned int)(internal_config.memory / 0x100000);
514 available = requested - (unsigned int)(total_mem / 0x100000);
515 RTE_LOG(ERR, EAL, "Not enough memory available! "
516 "Requested: %uMB, available: %uMB\n",
517 requested, available);
520 return total_num_pages;