mem: extract common dynamic memory allocation
authorDmitry Kozlyuk <dmitry.kozliuk@gmail.com>
Mon, 15 Jun 2020 00:43:47 +0000 (03:43 +0300)
committerThomas Monjalon <thomas@monjalon.net>
Mon, 15 Jun 2020 17:26:37 +0000 (19:26 +0200)
Code in Linux EAL that supports dynamic memory allocation (as opposed to
static allocation used by FreeBSD) is not OS-dependent and can be reused
by Windows EAL. Move such code to a file compiled only for the OS that
require it. Keep Anatoly Burakov maintainer of extracted code.

Signed-off-by: Dmitry Kozlyuk <dmitry.kozliuk@gmail.com>
MAINTAINERS
lib/librte_eal/common/eal_common_dynmem.c [new file with mode: 0644]
lib/librte_eal/common/eal_private.h
lib/librte_eal/common/meson.build
lib/librte_eal/freebsd/eal_memory.c
lib/librte_eal/linux/Makefile
lib/librte_eal/linux/eal_memory.c

index 8d0ea2a..687d5cd 100644 (file)
@@ -209,6 +209,7 @@ F: lib/librte_eal/include/rte_fbarray.h
 F: lib/librte_eal/include/rte_mem*
 F: lib/librte_eal/include/rte_malloc.h
 F: lib/librte_eal/common/*malloc*
+F: lib/librte_eal/common/eal_common_dynmem.c
 F: lib/librte_eal/common/eal_common_fbarray.c
 F: lib/librte_eal/common/eal_common_mem*
 F: lib/librte_eal/common/eal_hugepages.h
diff --git a/lib/librte_eal/common/eal_common_dynmem.c b/lib/librte_eal/common/eal_common_dynmem.c
new file mode 100644 (file)
index 0000000..6b07672
--- /dev/null
@@ -0,0 +1,521 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ * Copyright(c) 2013 6WIND S.A.
+ */
+
+#include <inttypes.h>
+#include <string.h>
+
+#include <rte_log.h>
+#include <rte_string_fns.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
+#include "eal_memcfg.h"
+#include "eal_private.h"
+
+/** @file Functions common to EALs that support dynamic memory allocation. */
+
+int
+eal_dynmem_memseg_lists_init(void)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       struct memtype {
+               uint64_t page_sz;
+               int socket_id;
+       } *memtypes = NULL;
+       int i, hpi_idx, msl_idx, ret = -1; /* fail unless told to succeed */
+       struct rte_memseg_list *msl;
+       uint64_t max_mem, max_mem_per_type;
+       unsigned int max_seglists_per_type;
+       unsigned int n_memtypes, cur_type;
+
+       /* no-huge does not need this at all */
+       if (internal_config.no_hugetlbfs)
+               return 0;
+
+       /*
+        * figuring out amount of memory we're going to have is a long and very
+        * involved process. the basic element we're operating with is a memory
+        * type, defined as a combination of NUMA node ID and page size (so that
+        * e.g. 2 sockets with 2 page sizes yield 4 memory types in total).
+        *
+        * deciding amount of memory going towards each memory type is a
+        * balancing act between maximum segments per type, maximum memory per
+        * type, and number of detected NUMA nodes. the goal is to make sure
+        * each memory type gets at least one memseg list.
+        *
+        * the total amount of memory is limited by RTE_MAX_MEM_MB value.
+        *
+        * the total amount of memory per type is limited by either
+        * RTE_MAX_MEM_MB_PER_TYPE, or by RTE_MAX_MEM_MB divided by the number
+        * of detected NUMA nodes. additionally, maximum number of segments per
+        * type is also limited by RTE_MAX_MEMSEG_PER_TYPE. this is because for
+        * smaller page sizes, it can take hundreds of thousands of segments to
+        * reach the above specified per-type memory limits.
+        *
+        * additionally, each type may have multiple memseg lists associated
+        * with it, each limited by either RTE_MAX_MEM_MB_PER_LIST for bigger
+        * page sizes, or RTE_MAX_MEMSEG_PER_LIST segments for smaller ones.
+        *
+        * the number of memseg lists per type is decided based on the above
+        * limits, and also taking number of detected NUMA nodes, to make sure
+        * that we don't run out of memseg lists before we populate all NUMA
+        * nodes with memory.
+        *
+        * we do this in three stages. first, we collect the number of types.
+        * then, we figure out memory constraints and populate the list of
+        * would-be memseg lists. then, we go ahead and allocate the memseg
+        * lists.
+        */
+
+       /* create space for mem types */
+       n_memtypes = internal_config.num_hugepage_sizes * rte_socket_count();
+       memtypes = calloc(n_memtypes, sizeof(*memtypes));
+       if (memtypes == NULL) {
+               RTE_LOG(ERR, EAL, "Cannot allocate space for memory types\n");
+               return -1;
+       }
+
+       /* populate mem types */
+       cur_type = 0;
+       for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
+                       hpi_idx++) {
+               struct hugepage_info *hpi;
+               uint64_t hugepage_sz;
+
+               hpi = &internal_config.hugepage_info[hpi_idx];
+               hugepage_sz = hpi->hugepage_sz;
+
+               for (i = 0; i < (int) rte_socket_count(); i++, cur_type++) {
+                       int socket_id = rte_socket_id_by_idx(i);
+
+#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
+                       /* we can still sort pages by socket in legacy mode */
+                       if (!internal_config.legacy_mem && socket_id > 0)
+                               break;
+#endif
+                       memtypes[cur_type].page_sz = hugepage_sz;
+                       memtypes[cur_type].socket_id = socket_id;
+
+                       RTE_LOG(DEBUG, EAL, "Detected memory type: "
+                               "socket_id:%u hugepage_sz:%" PRIu64 "\n",
+                               socket_id, hugepage_sz);
+               }
+       }
+       /* number of memtypes could have been lower due to no NUMA support */
+       n_memtypes = cur_type;
+
+       /* set up limits for types */
+       max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
+       max_mem_per_type = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20,
+                       max_mem / n_memtypes);
+       /*
+        * limit maximum number of segment lists per type to ensure there's
+        * space for memseg lists for all NUMA nodes with all page sizes
+        */
+       max_seglists_per_type = RTE_MAX_MEMSEG_LISTS / n_memtypes;
+
+       if (max_seglists_per_type == 0) {
+               RTE_LOG(ERR, EAL, "Cannot accommodate all memory types, please increase %s\n",
+                       RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+               goto out;
+       }
+
+       /* go through all mem types and create segment lists */
+       msl_idx = 0;
+       for (cur_type = 0; cur_type < n_memtypes; cur_type++) {
+               unsigned int cur_seglist, n_seglists, n_segs;
+               unsigned int max_segs_per_type, max_segs_per_list;
+               struct memtype *type = &memtypes[cur_type];
+               uint64_t max_mem_per_list, pagesz;
+               int socket_id;
+
+               pagesz = type->page_sz;
+               socket_id = type->socket_id;
+
+               /*
+                * we need to create segment lists for this type. we must take
+                * into account the following things:
+                *
+                * 1. total amount of memory we can use for this memory type
+                * 2. total amount of memory per memseg list allowed
+                * 3. number of segments needed to fit the amount of memory
+                * 4. number of segments allowed per type
+                * 5. number of segments allowed per memseg list
+                * 6. number of memseg lists we are allowed to take up
+                */
+
+               /* calculate how much segments we will need in total */
+               max_segs_per_type = max_mem_per_type / pagesz;
+               /* limit number of segments to maximum allowed per type */
+               max_segs_per_type = RTE_MIN(max_segs_per_type,
+                               (unsigned int)RTE_MAX_MEMSEG_PER_TYPE);
+               /* limit number of segments to maximum allowed per list */
+               max_segs_per_list = RTE_MIN(max_segs_per_type,
+                               (unsigned int)RTE_MAX_MEMSEG_PER_LIST);
+
+               /* calculate how much memory we can have per segment list */
+               max_mem_per_list = RTE_MIN(max_segs_per_list * pagesz,
+                               (uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20);
+
+               /* calculate how many segments each segment list will have */
+               n_segs = RTE_MIN(max_segs_per_list, max_mem_per_list / pagesz);
+
+               /* calculate how many segment lists we can have */
+               n_seglists = RTE_MIN(max_segs_per_type / n_segs,
+                               max_mem_per_type / max_mem_per_list);
+
+               /* limit number of segment lists according to our maximum */
+               n_seglists = RTE_MIN(n_seglists, max_seglists_per_type);
+
+               RTE_LOG(DEBUG, EAL, "Creating %i segment lists: "
+                               "n_segs:%i socket_id:%i hugepage_sz:%" PRIu64 "\n",
+                       n_seglists, n_segs, socket_id, pagesz);
+
+               /* create all segment lists */
+               for (cur_seglist = 0; cur_seglist < n_seglists; cur_seglist++) {
+                       if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
+                               RTE_LOG(ERR, EAL,
+                                       "No more space in memseg lists, please increase %s\n",
+                                       RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+                               goto out;
+                       }
+                       msl = &mcfg->memsegs[msl_idx++];
+
+                       if (eal_memseg_list_init(msl, pagesz, n_segs,
+                                       socket_id, cur_seglist, true))
+                               goto out;
+
+                       if (eal_memseg_list_alloc(msl, 0)) {
+                               RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
+                               goto out;
+                       }
+               }
+       }
+       /* we're successful */
+       ret = 0;
+out:
+       free(memtypes);
+       return ret;
+}
+
+static int __rte_unused
+hugepage_count_walk(const struct rte_memseg_list *msl, void *arg)
+{
+       struct hugepage_info *hpi = arg;
+
+       if (msl->page_sz != hpi->hugepage_sz)
+               return 0;
+
+       hpi->num_pages[msl->socket_id] += msl->memseg_arr.len;
+       return 0;
+}
+
+static int
+limits_callback(int socket_id, size_t cur_limit, size_t new_len)
+{
+       RTE_SET_USED(socket_id);
+       RTE_SET_USED(cur_limit);
+       RTE_SET_USED(new_len);
+       return -1;
+}
+
+int
+eal_dynmem_hugepage_init(void)
+{
+       struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
+       uint64_t memory[RTE_MAX_NUMA_NODES];
+       int hp_sz_idx, socket_id;
+
+       memset(used_hp, 0, sizeof(used_hp));
+
+       for (hp_sz_idx = 0;
+                       hp_sz_idx < (int) internal_config.num_hugepage_sizes;
+                       hp_sz_idx++) {
+#ifndef RTE_ARCH_64
+               struct hugepage_info dummy;
+               unsigned int i;
+#endif
+               /* also initialize used_hp hugepage sizes in used_hp */
+               struct hugepage_info *hpi;
+               hpi = &internal_config.hugepage_info[hp_sz_idx];
+               used_hp[hp_sz_idx].hugepage_sz = hpi->hugepage_sz;
+
+#ifndef RTE_ARCH_64
+               /* for 32-bit, limit number of pages on socket to whatever we've
+                * preallocated, as we cannot allocate more.
+                */
+               memset(&dummy, 0, sizeof(dummy));
+               dummy.hugepage_sz = hpi->hugepage_sz;
+               if (rte_memseg_list_walk(hugepage_count_walk, &dummy) < 0)
+                       return -1;
+
+               for (i = 0; i < RTE_DIM(dummy.num_pages); i++) {
+                       hpi->num_pages[i] = RTE_MIN(hpi->num_pages[i],
+                                       dummy.num_pages[i]);
+               }
+#endif
+       }
+
+       /* make a copy of socket_mem, needed for balanced allocation. */
+       for (hp_sz_idx = 0; hp_sz_idx < RTE_MAX_NUMA_NODES; hp_sz_idx++)
+               memory[hp_sz_idx] = internal_config.socket_mem[hp_sz_idx];
+
+       /* calculate final number of pages */
+       if (eal_dynmem_calc_num_pages_per_socket(memory,
+                       internal_config.hugepage_info, used_hp,
+                       internal_config.num_hugepage_sizes) < 0)
+               return -1;
+
+       for (hp_sz_idx = 0;
+                       hp_sz_idx < (int)internal_config.num_hugepage_sizes;
+                       hp_sz_idx++) {
+               for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES;
+                               socket_id++) {
+                       struct rte_memseg **pages;
+                       struct hugepage_info *hpi = &used_hp[hp_sz_idx];
+                       unsigned int num_pages = hpi->num_pages[socket_id];
+                       unsigned int num_pages_alloc;
+
+                       if (num_pages == 0)
+                               continue;
+
+                       RTE_LOG(DEBUG, EAL,
+                               "Allocating %u pages of size %" PRIu64 "M "
+                               "on socket %i\n",
+                               num_pages, hpi->hugepage_sz >> 20, socket_id);
+
+                       /* we may not be able to allocate all pages in one go,
+                        * because we break up our memory map into multiple
+                        * memseg lists. therefore, try allocating multiple
+                        * times and see if we can get the desired number of
+                        * pages from multiple allocations.
+                        */
+
+                       num_pages_alloc = 0;
+                       do {
+                               int i, cur_pages, needed;
+
+                               needed = num_pages - num_pages_alloc;
+
+                               pages = malloc(sizeof(*pages) * needed);
+
+                               /* do not request exact number of pages */
+                               cur_pages = eal_memalloc_alloc_seg_bulk(pages,
+                                               needed, hpi->hugepage_sz,
+                                               socket_id, false);
+                               if (cur_pages <= 0) {
+                                       free(pages);
+                                       return -1;
+                               }
+
+                               /* mark preallocated pages as unfreeable */
+                               for (i = 0; i < cur_pages; i++) {
+                                       struct rte_memseg *ms = pages[i];
+                                       ms->flags |=
+                                               RTE_MEMSEG_FLAG_DO_NOT_FREE;
+                               }
+                               free(pages);
+
+                               num_pages_alloc += cur_pages;
+                       } while (num_pages_alloc != num_pages);
+               }
+       }
+
+       /* if socket limits were specified, set them */
+       if (internal_config.force_socket_limits) {
+               unsigned int i;
+               for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+                       uint64_t limit = internal_config.socket_limit[i];
+                       if (limit == 0)
+                               continue;
+                       if (rte_mem_alloc_validator_register("socket-limit",
+                                       limits_callback, i, limit))
+                               RTE_LOG(ERR, EAL, "Failed to register socket limits validator callback\n");
+               }
+       }
+       return 0;
+}
+
+__rte_unused /* function is unused on 32-bit builds */
+static inline uint64_t
+get_socket_mem_size(int socket)
+{
+       uint64_t size = 0;
+       unsigned int i;
+
+       for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
+               struct hugepage_info *hpi = &internal_config.hugepage_info[i];
+               size += hpi->hugepage_sz * hpi->num_pages[socket];
+       }
+
+       return size;
+}
+
+int
+eal_dynmem_calc_num_pages_per_socket(
+       uint64_t *memory, struct hugepage_info *hp_info,
+       struct hugepage_info *hp_used, unsigned int num_hp_info)
+{
+       unsigned int socket, j, i = 0;
+       unsigned int requested, available;
+       int total_num_pages = 0;
+       uint64_t remaining_mem, cur_mem;
+       uint64_t total_mem = internal_config.memory;
+
+       if (num_hp_info == 0)
+               return -1;
+
+       /* if specific memory amounts per socket weren't requested */
+       if (internal_config.force_sockets == 0) {
+               size_t total_size;
+#ifdef RTE_ARCH_64
+               int cpu_per_socket[RTE_MAX_NUMA_NODES];
+               size_t default_size;
+               unsigned int lcore_id;
+
+               /* Compute number of cores per socket */
+               memset(cpu_per_socket, 0, sizeof(cpu_per_socket));
+               RTE_LCORE_FOREACH(lcore_id) {
+                       cpu_per_socket[rte_lcore_to_socket_id(lcore_id)]++;
+               }
+
+               /*
+                * Automatically spread requested memory amongst detected
+                * sockets according to number of cores from CPU mask present
+                * on each socket.
+                */
+               total_size = internal_config.memory;
+               for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
+                               socket++) {
+
+                       /* Set memory amount per socket */
+                       default_size = internal_config.memory *
+                               cpu_per_socket[socket] / rte_lcore_count();
+
+                       /* Limit to maximum available memory on socket */
+                       default_size = RTE_MIN(
+                               default_size, get_socket_mem_size(socket));
+
+                       /* Update sizes */
+                       memory[socket] = default_size;
+                       total_size -= default_size;
+               }
+
+               /*
+                * If some memory is remaining, try to allocate it by getting
+                * all available memory from sockets, one after the other.
+                */
+               for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
+                               socket++) {
+                       /* take whatever is available */
+                       default_size = RTE_MIN(
+                               get_socket_mem_size(socket) - memory[socket],
+                               total_size);
+
+                       /* Update sizes */
+                       memory[socket] += default_size;
+                       total_size -= default_size;
+               }
+#else
+               /* in 32-bit mode, allocate all of the memory only on master
+                * lcore socket
+                */
+               total_size = internal_config.memory;
+               for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
+                               socket++) {
+                       struct rte_config *cfg = rte_eal_get_configuration();
+                       unsigned int master_lcore_socket;
+
+                       master_lcore_socket =
+                               rte_lcore_to_socket_id(cfg->master_lcore);
+
+                       if (master_lcore_socket != socket)
+                               continue;
+
+                       /* Update sizes */
+                       memory[socket] = total_size;
+                       break;
+               }
+#endif
+       }
+
+       for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0;
+                       socket++) {
+               /* skips if the memory on specific socket wasn't requested */
+               for (i = 0; i < num_hp_info && memory[socket] != 0; i++) {
+                       rte_strscpy(hp_used[i].hugedir, hp_info[i].hugedir,
+                               sizeof(hp_used[i].hugedir));
+                       hp_used[i].num_pages[socket] = RTE_MIN(
+                                       memory[socket] / hp_info[i].hugepage_sz,
+                                       hp_info[i].num_pages[socket]);
+
+                       cur_mem = hp_used[i].num_pages[socket] *
+                                       hp_used[i].hugepage_sz;
+
+                       memory[socket] -= cur_mem;
+                       total_mem -= cur_mem;
+
+                       total_num_pages += hp_used[i].num_pages[socket];
+
+                       /* check if we have met all memory requests */
+                       if (memory[socket] == 0)
+                               break;
+
+                       /* Check if we have any more pages left at this size,
+                        * if so, move on to next size.
+                        */
+                       if (hp_used[i].num_pages[socket] ==
+                                       hp_info[i].num_pages[socket])
+                               continue;
+                       /* At this point we know that there are more pages
+                        * available that are bigger than the memory we want,
+                        * so lets see if we can get enough from other page
+                        * sizes.
+                        */
+                       remaining_mem = 0;
+                       for (j = i+1; j < num_hp_info; j++)
+                               remaining_mem += hp_info[j].hugepage_sz *
+                               hp_info[j].num_pages[socket];
+
+                       /* Is there enough other memory?
+                        * If not, allocate another page and quit.
+                        */
+                       if (remaining_mem < memory[socket]) {
+                               cur_mem = RTE_MIN(
+                                       memory[socket], hp_info[i].hugepage_sz);
+                               memory[socket] -= cur_mem;
+                               total_mem -= cur_mem;
+                               hp_used[i].num_pages[socket]++;
+                               total_num_pages++;
+                               break; /* we are done with this socket*/
+                       }
+               }
+
+               /* if we didn't satisfy all memory requirements per socket */
+               if (memory[socket] > 0 &&
+                               internal_config.socket_mem[socket] != 0) {
+                       /* to prevent icc errors */
+                       requested = (unsigned int)(
+                               internal_config.socket_mem[socket] / 0x100000);
+                       available = requested -
+                               ((unsigned int)(memory[socket] / 0x100000));
+                       RTE_LOG(ERR, EAL, "Not enough memory available on "
+                               "socket %u! Requested: %uMB, available: %uMB\n",
+                               socket, requested, available);
+                       return -1;
+               }
+       }
+
+       /* if we didn't satisfy total memory requirements */
+       if (total_mem > 0) {
+               requested = (unsigned int)(internal_config.memory / 0x100000);
+               available = requested - (unsigned int)(total_mem / 0x100000);
+               RTE_LOG(ERR, EAL, "Not enough memory available! "
+                       "Requested: %uMB, available: %uMB\n",
+                       requested, available);
+               return -1;
+       }
+       return total_num_pages;
+}
index 75521d0..0592fcd 100644 (file)
@@ -13,6 +13,8 @@
 #include <rte_lcore.h>
 #include <rte_memory.h>
 
+#include "eal_internal_cfg.h"
+
 /**
  * Structure storing internal configuration (per-lcore)
  */
@@ -316,6 +318,45 @@ eal_memseg_list_alloc(struct rte_memseg_list *msl, int reserve_flags);
 void
 eal_memseg_list_populate(struct rte_memseg_list *msl, void *addr, int n_segs);
 
+/**
+ * Distribute available memory between MSLs.
+ *
+ * @return
+ *  0 on success, (-1) on failure.
+ */
+int
+eal_dynmem_memseg_lists_init(void);
+
+/**
+ * Preallocate hugepages for dynamic allocation.
+ *
+ * @return
+ *  0 on success, (-1) on failure.
+ */
+int
+eal_dynmem_hugepage_init(void);
+
+/**
+ * Given the list of hugepage sizes and the number of pages thereof,
+ * calculate the best number of pages of each size to fulfill the request
+ * for RAM on each NUMA node.
+ *
+ * @param memory
+ *  Amounts of memory requested for each NUMA node of RTE_MAX_NUMA_NODES.
+ * @param hp_info
+ *  Information about hugepages of different size.
+ * @param hp_used
+ *  Receives information about used hugepages of each size.
+ * @param num_hp_info
+ *  Number of elements in hp_info and hp_used.
+ * @return
+ *  0 on success, (-1) on failure.
+ */
+int
+eal_dynmem_calc_num_pages_per_socket(
+               uint64_t *memory, struct hugepage_info *hp_info,
+               struct hugepage_info *hp_used, unsigned int num_hp_info);
+
 /**
  * Get cpu core_id.
  *
@@ -595,7 +636,7 @@ void *
 eal_mem_reserve(void *requested_addr, size_t size, int flags);
 
 /**
- * Free memory obtained by eal_mem_reserve() or eal_mem_alloc().
+ * Free memory obtained by eal_mem_reserve() and possibly allocated.
  *
  * If *virt* and *size* describe a part of the reserved region,
  * only this part of the region is freed (accurately up to the system
index 55aaeb1..d91c222 100644 (file)
@@ -56,3 +56,7 @@ sources += files(
        'rte_reciprocal.c',
        'rte_service.c',
 )
+
+if is_linux
+       sources += files('eal_common_dynmem.c')
+endif
index 2eb70c2..72a30f2 100644 (file)
@@ -315,14 +315,6 @@ get_mem_amount(uint64_t page_sz, uint64_t max_mem)
        return RTE_ALIGN(area_sz, page_sz);
 }
 
-static int
-memseg_list_init(struct rte_memseg_list *msl, uint64_t page_sz,
-               int n_segs, int socket_id, int type_msl_idx)
-{
-       return eal_memseg_list_init(
-               msl, page_sz, n_segs, socket_id, type_msl_idx, false);
-}
-
 static int
 memseg_list_alloc(struct rte_memseg_list *msl)
 {
@@ -419,8 +411,8 @@ memseg_primary_init(void)
                                        cur_max_mem);
                        n_segs = cur_mem / hugepage_sz;
 
-                       if (memseg_list_init(msl, hugepage_sz, n_segs,
-                                       0, type_msl_idx))
+                       if (eal_memseg_list_init(msl, hugepage_sz, n_segs,
+                                       0, type_msl_idx, false))
                                return -1;
 
                        total_segs += msl->memseg_arr.len;
index 8febf22..07ce643 100644 (file)
@@ -50,6 +50,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_timer.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_memzone.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_log.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_launch.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_dynmem.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_mcfg.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_memalloc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_common_memory.c
index 9cc39e6..5986dab 100644 (file)
@@ -812,20 +812,6 @@ memseg_list_free(struct rte_memseg_list *msl)
        return 0;
 }
 
-static int
-memseg_list_init(struct rte_memseg_list *msl, uint64_t page_sz,
-               int n_segs, int socket_id, int type_msl_idx)
-{
-       return eal_memseg_list_init(
-               msl, page_sz, n_segs, socket_id, type_msl_idx, true);
-}
-
-static int
-memseg_list_alloc(struct rte_memseg_list *msl)
-{
-       return eal_memseg_list_alloc(msl, 0);
-}
-
 /*
  * Our VA space is not preallocated yet, so preallocate it here. We need to know
  * how many segments there are in order to map all pages into one address space,
@@ -969,12 +955,12 @@ prealloc_segments(struct hugepage_file *hugepages, int n_pages)
                        }
 
                        /* now, allocate fbarray itself */
-                       if (memseg_list_init(msl, page_sz, n_segs, socket,
-                                               msl_idx) < 0)
+                       if (eal_memseg_list_init(msl, page_sz, n_segs,
+                                       socket, msl_idx, true) < 0)
                                return -1;
 
                        /* finally, allocate VA space */
-                       if (memseg_list_alloc(msl) < 0) {
+                       if (eal_memseg_list_alloc(msl, 0) < 0) {
                                RTE_LOG(ERR, EAL, "Cannot preallocate 0x%"PRIx64"kB hugepages\n",
                                        page_sz >> 10);
                                return -1;
@@ -1048,182 +1034,6 @@ remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages)
        return 0;
 }
 
-__rte_unused /* function is unused on 32-bit builds */
-static inline uint64_t
-get_socket_mem_size(int socket)
-{
-       uint64_t size = 0;
-       unsigned i;
-
-       for (i = 0; i < internal_config.num_hugepage_sizes; i++){
-               struct hugepage_info *hpi = &internal_config.hugepage_info[i];
-               size += hpi->hugepage_sz * hpi->num_pages[socket];
-       }
-
-       return size;
-}
-
-/*
- * This function is a NUMA-aware equivalent of calc_num_pages.
- * It takes in the list of hugepage sizes and the
- * number of pages thereof, and calculates the best number of
- * pages of each size to fulfill the request for <memory> ram
- */
-static int
-calc_num_pages_per_socket(uint64_t * memory,
-               struct hugepage_info *hp_info,
-               struct hugepage_info *hp_used,
-               unsigned num_hp_info)
-{
-       unsigned socket, j, i = 0;
-       unsigned requested, available;
-       int total_num_pages = 0;
-       uint64_t remaining_mem, cur_mem;
-       uint64_t total_mem = internal_config.memory;
-
-       if (num_hp_info == 0)
-               return -1;
-
-       /* if specific memory amounts per socket weren't requested */
-       if (internal_config.force_sockets == 0) {
-               size_t total_size;
-#ifdef RTE_ARCH_64
-               int cpu_per_socket[RTE_MAX_NUMA_NODES];
-               size_t default_size;
-               unsigned lcore_id;
-
-               /* Compute number of cores per socket */
-               memset(cpu_per_socket, 0, sizeof(cpu_per_socket));
-               RTE_LCORE_FOREACH(lcore_id) {
-                       cpu_per_socket[rte_lcore_to_socket_id(lcore_id)]++;
-               }
-
-               /*
-                * Automatically spread requested memory amongst detected sockets according
-                * to number of cores from cpu mask present on each socket
-                */
-               total_size = internal_config.memory;
-               for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) {
-
-                       /* Set memory amount per socket */
-                       default_size = (internal_config.memory * cpu_per_socket[socket])
-                                       / rte_lcore_count();
-
-                       /* Limit to maximum available memory on socket */
-                       default_size = RTE_MIN(default_size, get_socket_mem_size(socket));
-
-                       /* Update sizes */
-                       memory[socket] = default_size;
-                       total_size -= default_size;
-               }
-
-               /*
-                * If some memory is remaining, try to allocate it by getting all
-                * available memory from sockets, one after the other
-                */
-               for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) {
-                       /* take whatever is available */
-                       default_size = RTE_MIN(get_socket_mem_size(socket) - memory[socket],
-                                              total_size);
-
-                       /* Update sizes */
-                       memory[socket] += default_size;
-                       total_size -= default_size;
-               }
-#else
-               /* in 32-bit mode, allocate all of the memory only on master
-                * lcore socket
-                */
-               total_size = internal_config.memory;
-               for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
-                               socket++) {
-                       struct rte_config *cfg = rte_eal_get_configuration();
-                       unsigned int master_lcore_socket;
-
-                       master_lcore_socket =
-                               rte_lcore_to_socket_id(cfg->master_lcore);
-
-                       if (master_lcore_socket != socket)
-                               continue;
-
-                       /* Update sizes */
-                       memory[socket] = total_size;
-                       break;
-               }
-#endif
-       }
-
-       for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) {
-               /* skips if the memory on specific socket wasn't requested */
-               for (i = 0; i < num_hp_info && memory[socket] != 0; i++){
-                       strlcpy(hp_used[i].hugedir, hp_info[i].hugedir,
-                               sizeof(hp_used[i].hugedir));
-                       hp_used[i].num_pages[socket] = RTE_MIN(
-                                       memory[socket] / hp_info[i].hugepage_sz,
-                                       hp_info[i].num_pages[socket]);
-
-                       cur_mem = hp_used[i].num_pages[socket] *
-                                       hp_used[i].hugepage_sz;
-
-                       memory[socket] -= cur_mem;
-                       total_mem -= cur_mem;
-
-                       total_num_pages += hp_used[i].num_pages[socket];
-
-                       /* check if we have met all memory requests */
-                       if (memory[socket] == 0)
-                               break;
-
-                       /* check if we have any more pages left at this size, if so
-                        * move on to next size */
-                       if (hp_used[i].num_pages[socket] == hp_info[i].num_pages[socket])
-                               continue;
-                       /* At this point we know that there are more pages available that are
-                        * bigger than the memory we want, so lets see if we can get enough
-                        * from other page sizes.
-                        */
-                       remaining_mem = 0;
-                       for (j = i+1; j < num_hp_info; j++)
-                               remaining_mem += hp_info[j].hugepage_sz *
-                               hp_info[j].num_pages[socket];
-
-                       /* is there enough other memory, if not allocate another page and quit */
-                       if (remaining_mem < memory[socket]){
-                               cur_mem = RTE_MIN(memory[socket],
-                                               hp_info[i].hugepage_sz);
-                               memory[socket] -= cur_mem;
-                               total_mem -= cur_mem;
-                               hp_used[i].num_pages[socket]++;
-                               total_num_pages++;
-                               break; /* we are done with this socket*/
-                       }
-               }
-               /* if we didn't satisfy all memory requirements per socket */
-               if (memory[socket] > 0 &&
-                               internal_config.socket_mem[socket] != 0) {
-                       /* to prevent icc errors */
-                       requested = (unsigned) (internal_config.socket_mem[socket] /
-                                       0x100000);
-                       available = requested -
-                                       ((unsigned) (memory[socket] / 0x100000));
-                       RTE_LOG(ERR, EAL, "Not enough memory available on socket %u! "
-                                       "Requested: %uMB, available: %uMB\n", socket,
-                                       requested, available);
-                       return -1;
-               }
-       }
-
-       /* if we didn't satisfy total memory requirements */
-       if (total_mem > 0) {
-               requested = (unsigned) (internal_config.memory / 0x100000);
-               available = requested - (unsigned) (total_mem / 0x100000);
-               RTE_LOG(ERR, EAL, "Not enough memory available! Requested: %uMB,"
-                               " available: %uMB\n", requested, available);
-               return -1;
-       }
-       return total_num_pages;
-}
-
 static inline size_t
 eal_get_hugepage_mem_size(void)
 {
@@ -1529,7 +1339,7 @@ eal_legacy_hugepage_init(void)
                memory[i] = internal_config.socket_mem[i];
 
        /* calculate final number of pages */
-       nr_hugepages = calc_num_pages_per_socket(memory,
+       nr_hugepages = eal_dynmem_calc_num_pages_per_socket(memory,
                        internal_config.hugepage_info, used_hp,
                        internal_config.num_hugepage_sizes);
 
@@ -1656,140 +1466,6 @@ fail:
        return -1;
 }
 
-static int __rte_unused
-hugepage_count_walk(const struct rte_memseg_list *msl, void *arg)
-{
-       struct hugepage_info *hpi = arg;
-
-       if (msl->page_sz != hpi->hugepage_sz)
-               return 0;
-
-       hpi->num_pages[msl->socket_id] += msl->memseg_arr.len;
-       return 0;
-}
-
-static int
-limits_callback(int socket_id, size_t cur_limit, size_t new_len)
-{
-       RTE_SET_USED(socket_id);
-       RTE_SET_USED(cur_limit);
-       RTE_SET_USED(new_len);
-       return -1;
-}
-
-static int
-eal_hugepage_init(void)
-{
-       struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
-       uint64_t memory[RTE_MAX_NUMA_NODES];
-       int hp_sz_idx, socket_id;
-
-       memset(used_hp, 0, sizeof(used_hp));
-
-       for (hp_sz_idx = 0;
-                       hp_sz_idx < (int) internal_config.num_hugepage_sizes;
-                       hp_sz_idx++) {
-#ifndef RTE_ARCH_64
-               struct hugepage_info dummy;
-               unsigned int i;
-#endif
-               /* also initialize used_hp hugepage sizes in used_hp */
-               struct hugepage_info *hpi;
-               hpi = &internal_config.hugepage_info[hp_sz_idx];
-               used_hp[hp_sz_idx].hugepage_sz = hpi->hugepage_sz;
-
-#ifndef RTE_ARCH_64
-               /* for 32-bit, limit number of pages on socket to whatever we've
-                * preallocated, as we cannot allocate more.
-                */
-               memset(&dummy, 0, sizeof(dummy));
-               dummy.hugepage_sz = hpi->hugepage_sz;
-               if (rte_memseg_list_walk(hugepage_count_walk, &dummy) < 0)
-                       return -1;
-
-               for (i = 0; i < RTE_DIM(dummy.num_pages); i++) {
-                       hpi->num_pages[i] = RTE_MIN(hpi->num_pages[i],
-                                       dummy.num_pages[i]);
-               }
-#endif
-       }
-
-       /* make a copy of socket_mem, needed for balanced allocation. */
-       for (hp_sz_idx = 0; hp_sz_idx < RTE_MAX_NUMA_NODES; hp_sz_idx++)
-               memory[hp_sz_idx] = internal_config.socket_mem[hp_sz_idx];
-
-       /* calculate final number of pages */
-       if (calc_num_pages_per_socket(memory,
-                       internal_config.hugepage_info, used_hp,
-                       internal_config.num_hugepage_sizes) < 0)
-               return -1;
-
-       for (hp_sz_idx = 0;
-                       hp_sz_idx < (int)internal_config.num_hugepage_sizes;
-                       hp_sz_idx++) {
-               for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES;
-                               socket_id++) {
-                       struct rte_memseg **pages;
-                       struct hugepage_info *hpi = &used_hp[hp_sz_idx];
-                       unsigned int num_pages = hpi->num_pages[socket_id];
-                       unsigned int num_pages_alloc;
-
-                       if (num_pages == 0)
-                               continue;
-
-                       RTE_LOG(DEBUG, EAL, "Allocating %u pages of size %" PRIu64 "M on socket %i\n",
-                               num_pages, hpi->hugepage_sz >> 20, socket_id);
-
-                       /* we may not be able to allocate all pages in one go,
-                        * because we break up our memory map into multiple
-                        * memseg lists. therefore, try allocating multiple
-                        * times and see if we can get the desired number of
-                        * pages from multiple allocations.
-                        */
-
-                       num_pages_alloc = 0;
-                       do {
-                               int i, cur_pages, needed;
-
-                               needed = num_pages - num_pages_alloc;
-
-                               pages = malloc(sizeof(*pages) * needed);
-
-                               /* do not request exact number of pages */
-                               cur_pages = eal_memalloc_alloc_seg_bulk(pages,
-                                               needed, hpi->hugepage_sz,
-                                               socket_id, false);
-                               if (cur_pages <= 0) {
-                                       free(pages);
-                                       return -1;
-                               }
-
-                               /* mark preallocated pages as unfreeable */
-                               for (i = 0; i < cur_pages; i++) {
-                                       struct rte_memseg *ms = pages[i];
-                                       ms->flags |= RTE_MEMSEG_FLAG_DO_NOT_FREE;
-                               }
-                               free(pages);
-
-                               num_pages_alloc += cur_pages;
-                       } while (num_pages_alloc != num_pages);
-               }
-       }
-       /* if socket limits were specified, set them */
-       if (internal_config.force_socket_limits) {
-               unsigned int i;
-               for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
-                       uint64_t limit = internal_config.socket_limit[i];
-                       if (limit == 0)
-                               continue;
-                       if (rte_mem_alloc_validator_register("socket-limit",
-                                       limits_callback, i, limit))
-                               RTE_LOG(ERR, EAL, "Failed to register socket limits validator callback\n");
-               }
-       }
-       return 0;
-}
-
 /*
  * uses fstat to report the size of a file on disk
  */
@@ -1948,7 +1624,7 @@ rte_eal_hugepage_init(void)
 {
        return internal_config.legacy_mem ?
                        eal_legacy_hugepage_init() :
-                       eal_hugepage_init();
+                       eal_dynmem_hugepage_init();
 }
 
 int
@@ -2127,8 +1803,9 @@ memseg_primary_init_32(void)
                                                max_pagesz_mem);
                                n_segs = cur_mem / hugepage_sz;
 
-                               if (memseg_list_init(msl, hugepage_sz, n_segs,
-                                               socket_id, type_msl_idx)) {
+                               if (eal_memseg_list_init(msl, hugepage_sz,
+                                               n_segs, socket_id, type_msl_idx,
+                                               true)) {
                                        /* failing to allocate a memseg list is
                                         * a serious error.
                                         */
@@ -2136,7 +1813,7 @@ memseg_primary_init_32(void)
                                        return -1;
                                }
 
-                               if (memseg_list_alloc(msl)) {
+                               if (eal_memseg_list_alloc(msl, 0)) {
                                        /* if we couldn't allocate VA space, we
                                         * can try with smaller page sizes.
                                         */
@@ -2167,185 +1844,7 @@ memseg_primary_init_32(void)
 static int __rte_unused
 memseg_primary_init(void)
 {
-       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-       struct memtype {
-               uint64_t page_sz;
-               int socket_id;
-       } *memtypes = NULL;
-       int i, hpi_idx, msl_idx, ret = -1; /* fail unless told to succeed */
-       struct rte_memseg_list *msl;
-       uint64_t max_mem, max_mem_per_type;
-       unsigned int max_seglists_per_type;
-       unsigned int n_memtypes, cur_type;
-
-       /* no-huge does not need this at all */
-       if (internal_config.no_hugetlbfs)
-               return 0;
-
-       /*
-        * figuring out amount of memory we're going to have is a long and very
-        * involved process. the basic element we're operating with is a memory
-        * type, defined as a combination of NUMA node ID and page size (so that
-        * e.g. 2 sockets with 2 page sizes yield 4 memory types in total).
-        *
-        * deciding amount of memory going towards each memory type is a
-        * balancing act between maximum segments per type, maximum memory per
-        * type, and number of detected NUMA nodes. the goal is to make sure
-        * each memory type gets at least one memseg list.
-        *
-        * the total amount of memory is limited by RTE_MAX_MEM_MB value.
-        *
-        * the total amount of memory per type is limited by either
-        * RTE_MAX_MEM_MB_PER_TYPE, or by RTE_MAX_MEM_MB divided by the number
-        * of detected NUMA nodes. additionally, maximum number of segments per
-        * type is also limited by RTE_MAX_MEMSEG_PER_TYPE. this is because for
-        * smaller page sizes, it can take hundreds of thousands of segments to
-        * reach the above specified per-type memory limits.
-        *
-        * additionally, each type may have multiple memseg lists associated
-        * with it, each limited by either RTE_MAX_MEM_MB_PER_LIST for bigger
-        * page sizes, or RTE_MAX_MEMSEG_PER_LIST segments for smaller ones.
-        *
-        * the number of memseg lists per type is decided based on the above
-        * limits, and also taking number of detected NUMA nodes, to make sure
-        * that we don't run out of memseg lists before we populate all NUMA
-        * nodes with memory.
-        *
-        * we do this in three stages. first, we collect the number of types.
-        * then, we figure out memory constraints and populate the list of
-        * would-be memseg lists. then, we go ahead and allocate the memseg
-        * lists.
-        */
-
-       /* create space for mem types */
-       n_memtypes = internal_config.num_hugepage_sizes * rte_socket_count();
-       memtypes = calloc(n_memtypes, sizeof(*memtypes));
-       if (memtypes == NULL) {
-               RTE_LOG(ERR, EAL, "Cannot allocate space for memory types\n");
-               return -1;
-       }
-
-       /* populate mem types */
-       cur_type = 0;
-       for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
-                       hpi_idx++) {
-               struct hugepage_info *hpi;
-               uint64_t hugepage_sz;
-
-               hpi = &internal_config.hugepage_info[hpi_idx];
-               hugepage_sz = hpi->hugepage_sz;
-
-               for (i = 0; i < (int) rte_socket_count(); i++, cur_type++) {
-                       int socket_id = rte_socket_id_by_idx(i);
-
-#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
-                       /* we can still sort pages by socket in legacy mode */
-                       if (!internal_config.legacy_mem && socket_id > 0)
-                               break;
-#endif
-                       memtypes[cur_type].page_sz = hugepage_sz;
-                       memtypes[cur_type].socket_id = socket_id;
-
-                       RTE_LOG(DEBUG, EAL, "Detected memory type: "
-                               "socket_id:%u hugepage_sz:%" PRIu64 "\n",
-                               socket_id, hugepage_sz);
-               }
-       }
-       /* number of memtypes could have been lower due to no NUMA support */
-       n_memtypes = cur_type;
-
-       /* set up limits for types */
-       max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
-       max_mem_per_type = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20,
-                       max_mem / n_memtypes);
-       /*
-        * limit maximum number of segment lists per type to ensure there's
-        * space for memseg lists for all NUMA nodes with all page sizes
-        */
-       max_seglists_per_type = RTE_MAX_MEMSEG_LISTS / n_memtypes;
-
-       if (max_seglists_per_type == 0) {
-               RTE_LOG(ERR, EAL, "Cannot accommodate all memory types, please increase %s\n",
-                       RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
-               goto out;
-       }
-
-       /* go through all mem types and create segment lists */
-       msl_idx = 0;
-       for (cur_type = 0; cur_type < n_memtypes; cur_type++) {
-               unsigned int cur_seglist, n_seglists, n_segs;
-               unsigned int max_segs_per_type, max_segs_per_list;
-               struct memtype *type = &memtypes[cur_type];
-               uint64_t max_mem_per_list, pagesz;
-               int socket_id;
-
-               pagesz = type->page_sz;
-               socket_id = type->socket_id;
-
-               /*
-                * we need to create segment lists for this type. we must take
-                * into account the following things:
-                *
-                * 1. total amount of memory we can use for this memory type
-                * 2. total amount of memory per memseg list allowed
-                * 3. number of segments needed to fit the amount of memory
-                * 4. number of segments allowed per type
-                * 5. number of segments allowed per memseg list
-                * 6. number of memseg lists we are allowed to take up
-                */
-
-               /* calculate how much segments we will need in total */
-               max_segs_per_type = max_mem_per_type / pagesz;
-               /* limit number of segments to maximum allowed per type */
-               max_segs_per_type = RTE_MIN(max_segs_per_type,
-                               (unsigned int)RTE_MAX_MEMSEG_PER_TYPE);
-               /* limit number of segments to maximum allowed per list */
-               max_segs_per_list = RTE_MIN(max_segs_per_type,
-                               (unsigned int)RTE_MAX_MEMSEG_PER_LIST);
-
-               /* calculate how much memory we can have per segment list */
-               max_mem_per_list = RTE_MIN(max_segs_per_list * pagesz,
-                               (uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20);
-
-               /* calculate how many segments each segment list will have */
-               n_segs = RTE_MIN(max_segs_per_list, max_mem_per_list / pagesz);
-
-               /* calculate how many segment lists we can have */
-               n_seglists = RTE_MIN(max_segs_per_type / n_segs,
-                               max_mem_per_type / max_mem_per_list);
-
-               /* limit number of segment lists according to our maximum */
-               n_seglists = RTE_MIN(n_seglists, max_seglists_per_type);
-
-               RTE_LOG(DEBUG, EAL, "Creating %i segment lists: "
-                               "n_segs:%i socket_id:%i hugepage_sz:%" PRIu64 "\n",
-                       n_seglists, n_segs, socket_id, pagesz);
-
-               /* create all segment lists */
-               for (cur_seglist = 0; cur_seglist < n_seglists; cur_seglist++) {
-                       if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
-                               RTE_LOG(ERR, EAL,
-                                       "No more space in memseg lists, please increase %s\n",
-                                       RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
-                               goto out;
-                       }
-                       msl = &mcfg->memsegs[msl_idx++];
-
-                       if (memseg_list_init(msl, pagesz, n_segs,
-                                       socket_id, cur_seglist))
-                               goto out;
-
-                       if (memseg_list_alloc(msl)) {
-                               RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
-                               goto out;
-                       }
-               }
-       }
-       /* we're successful */
-       ret = 0;
-out:
-       free(memtypes);
-       return ret;
+       return eal_dynmem_memseg_lists_init();
 }
 
 static int
@@ -2369,7 +1868,7 @@ memseg_secondary_init(void)
                }
 
                /* preallocate VA space */
-               if (memseg_list_alloc(msl)) {
+               if (eal_memseg_list_alloc(msl, 0)) {
                        RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
                        return -1;
                }