X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_eal%2Flinux%2Feal_memory.c;h=03a4f2dd2d5c3be955a1925be648ee817fff89c1;hb=378cd4887d6c02bf771eee3a8b56a3af41f37b7c;hp=7a9c97ff8854e799a1f30cb34937a554d2d213ac;hpb=a083f8cc77460c15ac99a427ab6833dc8c8ae5bc;p=dpdk.git diff --git a/lib/librte_eal/linux/eal_memory.c b/lib/librte_eal/linux/eal_memory.c index 7a9c97ff88..03a4f2dd2d 100644 --- a/lib/librte_eal/linux/eal_memory.c +++ b/lib/librte_eal/linux/eal_memory.c @@ -267,6 +267,8 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, struct bitmask *oldmask = NULL; bool have_numa = true; unsigned long maxnode = 0; + const struct internal_config *internal_conf = + eal_get_internal_configuration(); /* Check if kernel supports NUMA. */ if (numa_available() != 0) { @@ -285,7 +287,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, oldpolicy = MPOL_DEFAULT; } for (i = 0; i < RTE_MAX_NUMA_NODES; i++) - if (internal_config.socket_mem[i]) + if (internal_conf->socket_mem[i]) maxnode = i + 1; } #endif @@ -304,7 +306,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, if (j == maxnode) { node_id = (node_id + 1) % maxnode; - while (!internal_config.socket_mem[node_id]) { + while (!internal_conf->socket_mem[node_id]) { node_id++; node_id %= maxnode; } @@ -525,9 +527,11 @@ create_shared_memory(const char *filename, const size_t mem_size) { void *retval; int fd; + const struct internal_config *internal_conf = + eal_get_internal_configuration(); /* if no shared files mode is used, create anonymous memory instead */ - if (internal_config.no_shconf) { + if (internal_conf->no_shconf) { retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (retval == MAP_FAILED) @@ -577,12 +581,14 @@ unlink_hugepage_files(struct hugepage_file *hugepg_tbl, { unsigned socket, size; int page, nrpages = 0; + const struct internal_config *internal_conf = + eal_get_internal_configuration(); /* get total number of hugepages */ for (size = 0; size < num_hp_info; size++) for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) nrpages += - internal_config.hugepage_info[size].num_pages[socket]; + internal_conf->hugepage_info[size].num_pages[socket]; for (page = 0; page < nrpages; page++) { struct hugepage_file *hp = &hugepg_tbl[page]; @@ -606,11 +612,13 @@ unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, { unsigned socket, size; int page, nrpages = 0; + const struct internal_config *internal_conf = + eal_get_internal_configuration(); /* get total number of hugepages */ for (size = 0; size < num_hp_info; size++) for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) - nrpages += internal_config.hugepage_info[size].num_pages[socket]; + nrpages += internal_conf->hugepage_info[size].num_pages[socket]; for (size = 0; size < num_hp_info; size++) { for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { @@ -665,7 +673,10 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end) uint64_t page_sz; size_t memseg_len; int socket_id; - +#ifndef RTE_ARCH_64 + const struct internal_config *internal_conf = + eal_get_internal_configuration(); +#endif page_sz = hugepages[seg_start].size; socket_id = hugepages[seg_start].socket_id; seg_len = seg_end - seg_start; @@ -702,8 +713,8 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end) } if (msl_idx == RTE_MAX_MEMSEG_LISTS) { RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n", - RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE), - RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE)); + RTE_STR(RTE_MAX_MEMSEG_PER_TYPE), + RTE_STR(RTE_MAX_MEM_MB_PER_TYPE)); return -1; } @@ -750,7 +761,7 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end) /* we have a new address, so unmap previous one */ #ifndef RTE_ARCH_64 /* in 32-bit legacy mode, we have already unmapped the page */ - if (!internal_config.legacy_mem) + if (!internal_conf->legacy_mem) munmap(hfile->orig_va, page_sz); #else munmap(hfile->orig_va, page_sz); @@ -802,7 +813,7 @@ get_mem_amount(uint64_t page_sz, uint64_t max_mem) } static int -free_memseg_list(struct rte_memseg_list *msl) +memseg_list_free(struct rte_memseg_list *msl) { if (rte_fbarray_destroy(&msl->memseg_arr)) { RTE_LOG(ERR, EAL, "Cannot destroy memseg list\n"); @@ -812,60 +823,6 @@ free_memseg_list(struct rte_memseg_list *msl) return 0; } -#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i" -static int -alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz, - int n_segs, int socket_id, int type_msl_idx) -{ - char name[RTE_FBARRAY_NAME_LEN]; - - snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id, - type_msl_idx); - if (rte_fbarray_init(&msl->memseg_arr, name, n_segs, - sizeof(struct rte_memseg))) { - RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n", - rte_strerror(rte_errno)); - return -1; - } - - msl->page_sz = page_sz; - msl->socket_id = socket_id; - msl->base_va = NULL; - msl->heap = 1; /* mark it as a heap segment */ - - RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n", - (size_t)page_sz >> 10, socket_id); - - return 0; -} - -static int -alloc_va_space(struct rte_memseg_list *msl) -{ - uint64_t page_sz; - size_t mem_sz; - void *addr; - int flags = 0; - - page_sz = msl->page_sz; - mem_sz = page_sz * msl->memseg_arr.len; - - addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags); - if (addr == NULL) { - if (rte_errno == EADDRNOTAVAIL) - RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - " - "please use '--" OPT_BASE_VIRTADDR "' option\n", - (unsigned long long)mem_sz, msl->base_va); - else - RTE_LOG(ERR, EAL, "Cannot reserve memory\n"); - return -1; - } - msl->base_va = addr; - msl->len = mem_sz; - - return 0; -} - /* * Our VA space is not preallocated yet, so preallocate it here. We need to know * how many segments there are in order to map all pages into one address space, @@ -882,6 +839,8 @@ prealloc_segments(struct hugepage_file *hugepages, int n_pages) unsigned int hpi_idx, socket, i; int n_contig_segs, n_segs; int msl_idx; + const struct internal_config *internal_conf = + eal_get_internal_configuration(); /* before we preallocate segments, we need to free up our VA space. * we're not removing files, and we already have information about @@ -896,10 +855,10 @@ prealloc_segments(struct hugepage_file *hugepages, int n_pages) /* we cannot know how many page sizes and sockets we have discovered, so * loop over all of them */ - for (hpi_idx = 0; hpi_idx < internal_config.num_hugepage_sizes; + for (hpi_idx = 0; hpi_idx < internal_conf->num_hugepage_sizes; hpi_idx++) { uint64_t page_sz = - internal_config.hugepage_info[hpi_idx].hugepage_sz; + internal_conf->hugepage_info[hpi_idx].hugepage_sz; for (i = 0; i < rte_socket_count(); i++) { struct rte_memseg_list *msl; @@ -1004,18 +963,21 @@ prealloc_segments(struct hugepage_file *hugepages, int n_pages) } if (msl_idx == RTE_MAX_MEMSEG_LISTS) { RTE_LOG(ERR, EAL, "Not enough space in memseg lists, please increase %s\n", - RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS)); + RTE_STR(RTE_MAX_MEMSEG_LISTS)); return -1; } /* now, allocate fbarray itself */ - if (alloc_memseg_list(msl, page_sz, n_segs, socket, - msl_idx) < 0) + if (eal_memseg_list_init(msl, page_sz, n_segs, + socket, msl_idx, true) < 0) return -1; /* finally, allocate VA space */ - if (alloc_va_space(msl) < 0) + if (eal_memseg_list_alloc(msl, 0) < 0) { + RTE_LOG(ERR, EAL, "Cannot preallocate 0x%"PRIx64"kB hugepages\n", + page_sz >> 10); return -1; + } } } return 0; @@ -1085,190 +1047,16 @@ remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages) return 0; } -__rte_unused /* function is unused on 32-bit builds */ -static inline uint64_t -get_socket_mem_size(int socket) -{ - uint64_t size = 0; - unsigned i; - - for (i = 0; i < internal_config.num_hugepage_sizes; i++){ - struct hugepage_info *hpi = &internal_config.hugepage_info[i]; - size += hpi->hugepage_sz * hpi->num_pages[socket]; - } - - return size; -} - -/* - * This function is a NUMA-aware equivalent of calc_num_pages. - * It takes in the list of hugepage sizes and the - * number of pages thereof, and calculates the best number of - * pages of each size to fulfill the request for ram - */ -static int -calc_num_pages_per_socket(uint64_t * memory, - struct hugepage_info *hp_info, - struct hugepage_info *hp_used, - unsigned num_hp_info) -{ - unsigned socket, j, i = 0; - unsigned requested, available; - int total_num_pages = 0; - uint64_t remaining_mem, cur_mem; - uint64_t total_mem = internal_config.memory; - - if (num_hp_info == 0) - return -1; - - /* if specific memory amounts per socket weren't requested */ - if (internal_config.force_sockets == 0) { - size_t total_size; -#ifdef RTE_ARCH_64 - int cpu_per_socket[RTE_MAX_NUMA_NODES]; - size_t default_size; - unsigned lcore_id; - - /* Compute number of cores per socket */ - memset(cpu_per_socket, 0, sizeof(cpu_per_socket)); - RTE_LCORE_FOREACH(lcore_id) { - cpu_per_socket[rte_lcore_to_socket_id(lcore_id)]++; - } - - /* - * Automatically spread requested memory amongst detected sockets according - * to number of cores from cpu mask present on each socket - */ - total_size = internal_config.memory; - for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) { - - /* Set memory amount per socket */ - default_size = (internal_config.memory * cpu_per_socket[socket]) - / rte_lcore_count(); - - /* Limit to maximum available memory on socket */ - default_size = RTE_MIN(default_size, get_socket_mem_size(socket)); - - /* Update sizes */ - memory[socket] = default_size; - total_size -= default_size; - } - - /* - * If some memory is remaining, try to allocate it by getting all - * available memory from sockets, one after the other - */ - for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) { - /* take whatever is available */ - default_size = RTE_MIN(get_socket_mem_size(socket) - memory[socket], - total_size); - - /* Update sizes */ - memory[socket] += default_size; - total_size -= default_size; - } -#else - /* in 32-bit mode, allocate all of the memory only on master - * lcore socket - */ - total_size = internal_config.memory; - for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; - socket++) { - struct rte_config *cfg = rte_eal_get_configuration(); - unsigned int master_lcore_socket; - - master_lcore_socket = - rte_lcore_to_socket_id(cfg->master_lcore); - - if (master_lcore_socket != socket) - continue; - - /* Update sizes */ - memory[socket] = total_size; - break; - } -#endif - } - - for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) { - /* skips if the memory on specific socket wasn't requested */ - for (i = 0; i < num_hp_info && memory[socket] != 0; i++){ - strlcpy(hp_used[i].hugedir, hp_info[i].hugedir, - sizeof(hp_used[i].hugedir)); - hp_used[i].num_pages[socket] = RTE_MIN( - memory[socket] / hp_info[i].hugepage_sz, - hp_info[i].num_pages[socket]); - - cur_mem = hp_used[i].num_pages[socket] * - hp_used[i].hugepage_sz; - - memory[socket] -= cur_mem; - total_mem -= cur_mem; - - total_num_pages += hp_used[i].num_pages[socket]; - - /* check if we have met all memory requests */ - if (memory[socket] == 0) - break; - - /* check if we have any more pages left at this size, if so - * move on to next size */ - if (hp_used[i].num_pages[socket] == hp_info[i].num_pages[socket]) - continue; - /* At this point we know that there are more pages available that are - * bigger than the memory we want, so lets see if we can get enough - * from other page sizes. - */ - remaining_mem = 0; - for (j = i+1; j < num_hp_info; j++) - remaining_mem += hp_info[j].hugepage_sz * - hp_info[j].num_pages[socket]; - - /* is there enough other memory, if not allocate another page and quit */ - if (remaining_mem < memory[socket]){ - cur_mem = RTE_MIN(memory[socket], - hp_info[i].hugepage_sz); - memory[socket] -= cur_mem; - total_mem -= cur_mem; - hp_used[i].num_pages[socket]++; - total_num_pages++; - break; /* we are done with this socket*/ - } - } - /* if we didn't satisfy all memory requirements per socket */ - if (memory[socket] > 0 && - internal_config.socket_mem[socket] != 0) { - /* to prevent icc errors */ - requested = (unsigned) (internal_config.socket_mem[socket] / - 0x100000); - available = requested - - ((unsigned) (memory[socket] / 0x100000)); - RTE_LOG(ERR, EAL, "Not enough memory available on socket %u! " - "Requested: %uMB, available: %uMB\n", socket, - requested, available); - return -1; - } - } - - /* if we didn't satisfy total memory requirements */ - if (total_mem > 0) { - requested = (unsigned) (internal_config.memory / 0x100000); - available = requested - (unsigned) (total_mem / 0x100000); - RTE_LOG(ERR, EAL, "Not enough memory available! Requested: %uMB," - " available: %uMB\n", requested, available); - return -1; - } - return total_num_pages; -} - static inline size_t eal_get_hugepage_mem_size(void) { uint64_t size = 0; unsigned i, j; + struct internal_config *internal_conf = + eal_get_internal_configuration(); - for (i = 0; i < internal_config.num_hugepage_sizes; i++) { - struct hugepage_info *hpi = &internal_config.hugepage_info[i]; + for (i = 0; i < internal_conf->num_hugepage_sizes; i++) { + struct hugepage_info *hpi = &internal_conf->hugepage_info[i]; if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) { for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { size += hpi->hugepage_sz * hpi->num_pages[j]; @@ -1323,8 +1111,8 @@ eal_legacy_hugepage_init(void) struct rte_mem_config *mcfg; struct hugepage_file *hugepage = NULL, *tmp_hp = NULL; struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES]; - struct rte_fbarray *arr; - struct rte_memseg *ms; + struct internal_config *internal_conf = + eal_get_internal_configuration(); uint64_t memory[RTE_MAX_NUMA_NODES]; @@ -1339,31 +1127,31 @@ eal_legacy_hugepage_init(void) mcfg = rte_eal_get_configuration()->mem_config; /* hugetlbfs can be disabled */ - if (internal_config.no_hugetlbfs) { + if (internal_conf->no_hugetlbfs) { void *prealloc_addr; size_t mem_sz; struct rte_memseg_list *msl; - int n_segs, cur_seg, fd, flags; + int n_segs, fd, flags; #ifdef MEMFD_SUPPORTED int memfd; #endif uint64_t page_sz; /* nohuge mode is legacy mode */ - internal_config.legacy_mem = 1; + internal_conf->legacy_mem = 1; /* nohuge mode is single-file segments mode */ - internal_config.single_file_segments = 1; + internal_conf->single_file_segments = 1; /* create a memseg list */ msl = &mcfg->memsegs[0]; + mem_sz = internal_conf->memory; page_sz = RTE_PGSIZE_4K; - n_segs = internal_config.memory / page_sz; + n_segs = mem_sz / page_sz; - if (rte_fbarray_init(&msl->memseg_arr, "nohugemem", n_segs, - sizeof(struct rte_memseg))) { - RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n"); + if (eal_memseg_list_init_named( + msl, "nohugemem", page_sz, n_segs, 0, true)) { return -1; } @@ -1380,7 +1168,7 @@ eal_legacy_hugepage_init(void) RTE_LOG(DEBUG, EAL, "Falling back to anonymous map\n"); } else { /* we got an fd - now resize it */ - if (ftruncate(memfd, internal_config.memory) < 0) { + if (ftruncate(memfd, internal_conf->memory) < 0) { RTE_LOG(ERR, EAL, "Cannot resize memfd: %s\n", strerror(errno)); RTE_LOG(ERR, EAL, "Falling back to anonymous map\n"); @@ -1400,16 +1188,12 @@ eal_legacy_hugepage_init(void) /* preallocate address space for the memory, so that it can be * fit into the DMA mask. */ - mem_sz = internal_config.memory; - prealloc_addr = eal_get_virtual_area( - NULL, &mem_sz, page_sz, 0, 0); - if (prealloc_addr == NULL) { - RTE_LOG(ERR, EAL, - "%s: reserving memory area failed: " - "%s\n", - __func__, strerror(errno)); + if (eal_memseg_list_alloc(msl, 0)) { + RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n"); return -1; } + + prealloc_addr = msl->base_va; addr = mmap(prealloc_addr, mem_sz, PROT_READ | PROT_WRITE, flags | MAP_FIXED, fd, 0); if (addr == MAP_FAILED || addr != prealloc_addr) { @@ -1418,11 +1202,6 @@ eal_legacy_hugepage_init(void) munmap(prealloc_addr, mem_sz); return -1; } - msl->base_va = addr; - msl->page_sz = page_sz; - msl->socket_id = 0; - msl->len = mem_sz; - msl->heap = 1; /* we're in single-file segments mode, so only the segment list * fd needs to be set up. @@ -1434,24 +1213,8 @@ eal_legacy_hugepage_init(void) } } - /* populate memsegs. each memseg is one page long */ - for (cur_seg = 0; cur_seg < n_segs; cur_seg++) { - arr = &msl->memseg_arr; - - ms = rte_fbarray_get(arr, cur_seg); - if (rte_eal_iova_mode() == RTE_IOVA_VA) - ms->iova = (uintptr_t)addr; - else - ms->iova = RTE_BAD_IOVA; - ms->addr = addr; - ms->hugepage_sz = page_sz; - ms->socket_id = 0; - ms->len = page_sz; + eal_memseg_list_populate(msl, addr, n_segs); - rte_fbarray_set_used(arr, cur_seg); - - addr = RTE_PTR_ADD(addr, (size_t)page_sz); - } if (mcfg->dma_maskbits && rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) { RTE_LOG(ERR, EAL, @@ -1469,11 +1232,11 @@ eal_legacy_hugepage_init(void) /* calculate total number of hugepages available. at this point we haven't * yet started sorting them so they all are on socket 0 */ - for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { + for (i = 0; i < (int) internal_conf->num_hugepage_sizes; i++) { /* meanwhile, also initialize used_hp hugepage sizes in used_hp */ - used_hp[i].hugepage_sz = internal_config.hugepage_info[i].hugepage_sz; + used_hp[i].hugepage_sz = internal_conf->hugepage_info[i].hugepage_sz; - nr_hugepages += internal_config.hugepage_info[i].num_pages[0]; + nr_hugepages += internal_conf->hugepage_info[i].num_pages[0]; } /* @@ -1494,10 +1257,10 @@ eal_legacy_hugepage_init(void) /* make a copy of socket_mem, needed for balanced allocation. */ for (i = 0; i < RTE_MAX_NUMA_NODES; i++) - memory[i] = internal_config.socket_mem[i]; + memory[i] = internal_conf->socket_mem[i]; /* map all hugepages and sort them */ - for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){ + for (i = 0; i < (int)internal_conf->num_hugepage_sizes; i++) { unsigned pages_old, pages_new; struct hugepage_info *hpi; @@ -1506,7 +1269,7 @@ eal_legacy_hugepage_init(void) * we just map all hugepages available to the system * all hugepages are still located on socket 0 */ - hpi = &internal_config.hugepage_info[i]; + hpi = &internal_conf->hugepage_info[i]; if (hpi->num_pages[0] == 0) continue; @@ -1562,16 +1325,16 @@ eal_legacy_hugepage_init(void) huge_recover_sigbus(); - if (internal_config.memory == 0 && internal_config.force_sockets == 0) - internal_config.memory = eal_get_hugepage_mem_size(); + if (internal_conf->memory == 0 && internal_conf->force_sockets == 0) + internal_conf->memory = eal_get_hugepage_mem_size(); nr_hugefiles = nr_hugepages; /* clean out the numbers of pages */ - for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) + for (i = 0; i < (int) internal_conf->num_hugepage_sizes; i++) for (j = 0; j < RTE_MAX_NUMA_NODES; j++) - internal_config.hugepage_info[i].num_pages[j] = 0; + internal_conf->hugepage_info[i].num_pages[j] = 0; /* get hugepages for each socket */ for (i = 0; i < nr_hugefiles; i++) { @@ -1579,30 +1342,30 @@ eal_legacy_hugepage_init(void) /* find a hugepage info with right size and increment num_pages */ const int nb_hpsizes = RTE_MIN(MAX_HUGEPAGE_SIZES, - (int)internal_config.num_hugepage_sizes); + (int)internal_conf->num_hugepage_sizes); for (j = 0; j < nb_hpsizes; j++) { if (tmp_hp[i].size == - internal_config.hugepage_info[j].hugepage_sz) { - internal_config.hugepage_info[j].num_pages[socket]++; + internal_conf->hugepage_info[j].hugepage_sz) { + internal_conf->hugepage_info[j].num_pages[socket]++; } } } /* make a copy of socket_mem, needed for number of pages calculation */ for (i = 0; i < RTE_MAX_NUMA_NODES; i++) - memory[i] = internal_config.socket_mem[i]; + memory[i] = internal_conf->socket_mem[i]; /* calculate final number of pages */ - nr_hugepages = calc_num_pages_per_socket(memory, - internal_config.hugepage_info, used_hp, - internal_config.num_hugepage_sizes); + nr_hugepages = eal_dynmem_calc_num_pages_per_socket(memory, + internal_conf->hugepage_info, used_hp, + internal_conf->num_hugepage_sizes); /* error if not enough memory available */ if (nr_hugepages < 0) goto fail; /* reporting in! */ - for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { + for (i = 0; i < (int) internal_conf->num_hugepage_sizes; i++) { for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { if (used_hp[i].num_pages[j] > 0) { RTE_LOG(DEBUG, EAL, @@ -1631,7 +1394,7 @@ eal_legacy_hugepage_init(void) * also, sets final_va to NULL on pages that were unmapped. */ if (unmap_unneeded_hugepages(tmp_hp, used_hp, - internal_config.num_hugepage_sizes) < 0) { + internal_conf->num_hugepage_sizes) < 0) { RTE_LOG(ERR, EAL, "Unmapping and locking hugepages failed!\n"); goto fail; } @@ -1649,7 +1412,7 @@ eal_legacy_hugepage_init(void) #ifndef RTE_ARCH_64 /* for legacy 32-bit mode, we did not preallocate VA space, so do it */ - if (internal_config.legacy_mem && + if (internal_conf->legacy_mem && prealloc_segments(hugepage, nr_hugefiles)) { RTE_LOG(ERR, EAL, "Could not preallocate VA space for hugepages\n"); goto fail; @@ -1665,8 +1428,8 @@ eal_legacy_hugepage_init(void) } /* free the hugepage backing files */ - if (internal_config.hugepage_unlink && - unlink_hugepage_files(tmp_hp, internal_config.num_hugepage_sizes) < 0) { + if (internal_conf->hugepage_unlink && + unlink_hugepage_files(tmp_hp, internal_conf->num_hugepage_sizes) < 0) { RTE_LOG(ERR, EAL, "Unlinking hugepage files failed!\n"); goto fail; } @@ -1720,140 +1483,6 @@ fail: return -1; } -static int __rte_unused -hugepage_count_walk(const struct rte_memseg_list *msl, void *arg) -{ - struct hugepage_info *hpi = arg; - - if (msl->page_sz != hpi->hugepage_sz) - return 0; - - hpi->num_pages[msl->socket_id] += msl->memseg_arr.len; - return 0; -} - -static int -limits_callback(int socket_id, size_t cur_limit, size_t new_len) -{ - RTE_SET_USED(socket_id); - RTE_SET_USED(cur_limit); - RTE_SET_USED(new_len); - return -1; -} - -static int -eal_hugepage_init(void) -{ - struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES]; - uint64_t memory[RTE_MAX_NUMA_NODES]; - int hp_sz_idx, socket_id; - - memset(used_hp, 0, sizeof(used_hp)); - - for (hp_sz_idx = 0; - hp_sz_idx < (int) internal_config.num_hugepage_sizes; - hp_sz_idx++) { -#ifndef RTE_ARCH_64 - struct hugepage_info dummy; - unsigned int i; -#endif - /* also initialize used_hp hugepage sizes in used_hp */ - struct hugepage_info *hpi; - hpi = &internal_config.hugepage_info[hp_sz_idx]; - used_hp[hp_sz_idx].hugepage_sz = hpi->hugepage_sz; - -#ifndef RTE_ARCH_64 - /* for 32-bit, limit number of pages on socket to whatever we've - * preallocated, as we cannot allocate more. - */ - memset(&dummy, 0, sizeof(dummy)); - dummy.hugepage_sz = hpi->hugepage_sz; - if (rte_memseg_list_walk(hugepage_count_walk, &dummy) < 0) - return -1; - - for (i = 0; i < RTE_DIM(dummy.num_pages); i++) { - hpi->num_pages[i] = RTE_MIN(hpi->num_pages[i], - dummy.num_pages[i]); - } -#endif - } - - /* make a copy of socket_mem, needed for balanced allocation. */ - for (hp_sz_idx = 0; hp_sz_idx < RTE_MAX_NUMA_NODES; hp_sz_idx++) - memory[hp_sz_idx] = internal_config.socket_mem[hp_sz_idx]; - - /* calculate final number of pages */ - if (calc_num_pages_per_socket(memory, - internal_config.hugepage_info, used_hp, - internal_config.num_hugepage_sizes) < 0) - return -1; - - for (hp_sz_idx = 0; - hp_sz_idx < (int)internal_config.num_hugepage_sizes; - hp_sz_idx++) { - for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES; - socket_id++) { - struct rte_memseg **pages; - struct hugepage_info *hpi = &used_hp[hp_sz_idx]; - unsigned int num_pages = hpi->num_pages[socket_id]; - unsigned int num_pages_alloc; - - if (num_pages == 0) - continue; - - RTE_LOG(DEBUG, EAL, "Allocating %u pages of size %" PRIu64 "M on socket %i\n", - num_pages, hpi->hugepage_sz >> 20, socket_id); - - /* we may not be able to allocate all pages in one go, - * because we break up our memory map into multiple - * memseg lists. therefore, try allocating multiple - * times and see if we can get the desired number of - * pages from multiple allocations. - */ - - num_pages_alloc = 0; - do { - int i, cur_pages, needed; - - needed = num_pages - num_pages_alloc; - - pages = malloc(sizeof(*pages) * needed); - - /* do not request exact number of pages */ - cur_pages = eal_memalloc_alloc_seg_bulk(pages, - needed, hpi->hugepage_sz, - socket_id, false); - if (cur_pages <= 0) { - free(pages); - return -1; - } - - /* mark preallocated pages as unfreeable */ - for (i = 0; i < cur_pages; i++) { - struct rte_memseg *ms = pages[i]; - ms->flags |= RTE_MEMSEG_FLAG_DO_NOT_FREE; - } - free(pages); - - num_pages_alloc += cur_pages; - } while (num_pages_alloc != num_pages); - } - } - /* if socket limits were specified, set them */ - if (internal_config.force_socket_limits) { - unsigned int i; - for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { - uint64_t limit = internal_config.socket_limit[i]; - if (limit == 0) - continue; - if (rte_mem_alloc_validator_register("socket-limit", - limits_callback, i, limit)) - RTE_LOG(ERR, EAL, "Failed to register socket limits validator callback\n"); - } - } - return 0; -} - /* * uses fstat to report the size of a file on disk */ @@ -2010,15 +1639,21 @@ eal_hugepage_attach(void) int rte_eal_hugepage_init(void) { - return internal_config.legacy_mem ? + const struct internal_config *internal_conf = + eal_get_internal_configuration(); + + return internal_conf->legacy_mem ? eal_legacy_hugepage_init() : - eal_hugepage_init(); + eal_dynmem_hugepage_init(); } int rte_eal_hugepage_attach(void) { - return internal_config.legacy_mem ? + const struct internal_config *internal_conf = + eal_get_internal_configuration(); + + return internal_conf->legacy_mem ? eal_legacy_hugepage_attach() : eal_hugepage_attach(); } @@ -2047,9 +1682,11 @@ memseg_primary_init_32(void) struct rte_memseg_list *msl; uint64_t extra_mem_per_socket, total_extra_mem, total_requested_mem; uint64_t max_mem; + struct internal_config *internal_conf = + eal_get_internal_configuration(); /* no-huge does not need this at all */ - if (internal_config.no_hugetlbfs) + if (internal_conf->no_hugetlbfs) return 0; /* this is a giant hack, but desperate times call for desperate @@ -2062,7 +1699,7 @@ memseg_primary_init_32(void) * unneeded pages. this will not affect secondary processes, as those * should be able to mmap the space without (too many) problems. */ - if (internal_config.legacy_mem) + if (internal_conf->legacy_mem) return 0; /* 32-bit mode is a very special case. we cannot know in advance where @@ -2071,12 +1708,12 @@ memseg_primary_init_32(void) */ active_sockets = 0; total_requested_mem = 0; - if (internal_config.force_sockets) + if (internal_conf->force_sockets) for (i = 0; i < rte_socket_count(); i++) { uint64_t mem; socket_id = rte_socket_id_by_idx(i); - mem = internal_config.socket_mem[socket_id]; + mem = internal_conf->socket_mem[socket_id]; if (mem == 0) continue; @@ -2085,7 +1722,7 @@ memseg_primary_init_32(void) total_requested_mem += mem; } else - total_requested_mem = internal_config.memory; + total_requested_mem = internal_conf->memory; max_mem = (uint64_t)RTE_MAX_MEM_MB << 20; if (total_requested_mem > max_mem) { @@ -2100,7 +1737,7 @@ memseg_primary_init_32(void) /* the allocation logic is a little bit convoluted, but here's how it * works, in a nutshell: * - if user hasn't specified on which sockets to allocate memory via - * --socket-mem, we allocate all of our memory on master core socket. + * --socket-mem, we allocate all of our memory on main core socket. * - if user has specified sockets to allocate memory on, there may be * some "unused" memory left (e.g. if user has specified --socket-mem * such that not all memory adds up to 2 gigabytes), so add it to all @@ -2112,9 +1749,9 @@ memseg_primary_init_32(void) /* create memseg lists */ for (i = 0; i < rte_socket_count(); i++) { - int hp_sizes = (int) internal_config.num_hugepage_sizes; + int hp_sizes = (int) internal_conf->num_hugepage_sizes; uint64_t max_socket_mem, cur_socket_mem; - unsigned int master_lcore_socket; + unsigned int main_lcore_socket; struct rte_config *cfg = rte_eal_get_configuration(); bool skip; @@ -2122,18 +1759,18 @@ memseg_primary_init_32(void) #ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES /* we can still sort pages by socket in legacy mode */ - if (!internal_config.legacy_mem && socket_id > 0) + if (!internal_conf->legacy_mem && socket_id > 0) break; #endif /* if we didn't specifically request memory on this socket */ skip = active_sockets != 0 && - internal_config.socket_mem[socket_id] == 0; + internal_conf->socket_mem[socket_id] == 0; /* ...or if we didn't specifically request memory on *any* - * socket, and this is not master lcore + * socket, and this is not main lcore */ - master_lcore_socket = rte_lcore_to_socket_id(cfg->master_lcore); - skip |= active_sockets == 0 && socket_id != master_lcore_socket; + main_lcore_socket = rte_lcore_to_socket_id(cfg->main_lcore); + skip |= active_sockets == 0 && socket_id != main_lcore_socket; if (skip) { RTE_LOG(DEBUG, EAL, "Will not preallocate memory on socket %u\n", @@ -2143,8 +1780,8 @@ memseg_primary_init_32(void) /* max amount of memory on this socket */ max_socket_mem = (active_sockets != 0 ? - internal_config.socket_mem[socket_id] : - internal_config.memory) + + internal_conf->socket_mem[socket_id] : + internal_conf->memory) + extra_mem_per_socket; cur_socket_mem = 0; @@ -2154,7 +1791,7 @@ memseg_primary_init_32(void) struct hugepage_info *hpi; int type_msl_idx, max_segs, total_segs = 0; - hpi = &internal_config.hugepage_info[hpi_idx]; + hpi = &internal_conf->hugepage_info[hpi_idx]; hugepage_sz = hpi->hugepage_sz; /* check if pages are actually available */ @@ -2181,7 +1818,7 @@ memseg_primary_init_32(void) if (msl_idx >= RTE_MAX_MEMSEG_LISTS) { RTE_LOG(ERR, EAL, "No more space in memseg lists, please increase %s\n", - RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS)); + RTE_STR(RTE_MAX_MEMSEG_LISTS)); return -1; } @@ -2191,8 +1828,9 @@ memseg_primary_init_32(void) max_pagesz_mem); n_segs = cur_mem / hugepage_sz; - if (alloc_memseg_list(msl, hugepage_sz, n_segs, - socket_id, type_msl_idx)) { + if (eal_memseg_list_init(msl, hugepage_sz, + n_segs, socket_id, type_msl_idx, + true)) { /* failing to allocate a memseg list is * a serious error. */ @@ -2200,13 +1838,13 @@ memseg_primary_init_32(void) return -1; } - if (alloc_va_space(msl)) { + if (eal_memseg_list_alloc(msl, 0)) { /* if we couldn't allocate VA space, we * can try with smaller page sizes. */ RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list, retrying with different page size\n"); /* deallocate memseg list */ - if (free_memseg_list(msl)) + if (memseg_list_free(msl)) return -1; break; } @@ -2231,185 +1869,7 @@ memseg_primary_init_32(void) static int __rte_unused memseg_primary_init(void) { - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - struct memtype { - uint64_t page_sz; - int socket_id; - } *memtypes = NULL; - int i, hpi_idx, msl_idx, ret = -1; /* fail unless told to succeed */ - struct rte_memseg_list *msl; - uint64_t max_mem, max_mem_per_type; - unsigned int max_seglists_per_type; - unsigned int n_memtypes, cur_type; - - /* no-huge does not need this at all */ - if (internal_config.no_hugetlbfs) - return 0; - - /* - * figuring out amount of memory we're going to have is a long and very - * involved process. the basic element we're operating with is a memory - * type, defined as a combination of NUMA node ID and page size (so that - * e.g. 2 sockets with 2 page sizes yield 4 memory types in total). - * - * deciding amount of memory going towards each memory type is a - * balancing act between maximum segments per type, maximum memory per - * type, and number of detected NUMA nodes. the goal is to make sure - * each memory type gets at least one memseg list. - * - * the total amount of memory is limited by RTE_MAX_MEM_MB value. - * - * the total amount of memory per type is limited by either - * RTE_MAX_MEM_MB_PER_TYPE, or by RTE_MAX_MEM_MB divided by the number - * of detected NUMA nodes. additionally, maximum number of segments per - * type is also limited by RTE_MAX_MEMSEG_PER_TYPE. this is because for - * smaller page sizes, it can take hundreds of thousands of segments to - * reach the above specified per-type memory limits. - * - * additionally, each type may have multiple memseg lists associated - * with it, each limited by either RTE_MAX_MEM_MB_PER_LIST for bigger - * page sizes, or RTE_MAX_MEMSEG_PER_LIST segments for smaller ones. - * - * the number of memseg lists per type is decided based on the above - * limits, and also taking number of detected NUMA nodes, to make sure - * that we don't run out of memseg lists before we populate all NUMA - * nodes with memory. - * - * we do this in three stages. first, we collect the number of types. - * then, we figure out memory constraints and populate the list of - * would-be memseg lists. then, we go ahead and allocate the memseg - * lists. - */ - - /* create space for mem types */ - n_memtypes = internal_config.num_hugepage_sizes * rte_socket_count(); - memtypes = calloc(n_memtypes, sizeof(*memtypes)); - if (memtypes == NULL) { - RTE_LOG(ERR, EAL, "Cannot allocate space for memory types\n"); - return -1; - } - - /* populate mem types */ - cur_type = 0; - for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes; - hpi_idx++) { - struct hugepage_info *hpi; - uint64_t hugepage_sz; - - hpi = &internal_config.hugepage_info[hpi_idx]; - hugepage_sz = hpi->hugepage_sz; - - for (i = 0; i < (int) rte_socket_count(); i++, cur_type++) { - int socket_id = rte_socket_id_by_idx(i); - -#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES - /* we can still sort pages by socket in legacy mode */ - if (!internal_config.legacy_mem && socket_id > 0) - break; -#endif - memtypes[cur_type].page_sz = hugepage_sz; - memtypes[cur_type].socket_id = socket_id; - - RTE_LOG(DEBUG, EAL, "Detected memory type: " - "socket_id:%u hugepage_sz:%" PRIu64 "\n", - socket_id, hugepage_sz); - } - } - /* number of memtypes could have been lower due to no NUMA support */ - n_memtypes = cur_type; - - /* set up limits for types */ - max_mem = (uint64_t)RTE_MAX_MEM_MB << 20; - max_mem_per_type = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20, - max_mem / n_memtypes); - /* - * limit maximum number of segment lists per type to ensure there's - * space for memseg lists for all NUMA nodes with all page sizes - */ - max_seglists_per_type = RTE_MAX_MEMSEG_LISTS / n_memtypes; - - if (max_seglists_per_type == 0) { - RTE_LOG(ERR, EAL, "Cannot accommodate all memory types, please increase %s\n", - RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS)); - goto out; - } - - /* go through all mem types and create segment lists */ - msl_idx = 0; - for (cur_type = 0; cur_type < n_memtypes; cur_type++) { - unsigned int cur_seglist, n_seglists, n_segs; - unsigned int max_segs_per_type, max_segs_per_list; - struct memtype *type = &memtypes[cur_type]; - uint64_t max_mem_per_list, pagesz; - int socket_id; - - pagesz = type->page_sz; - socket_id = type->socket_id; - - /* - * we need to create segment lists for this type. we must take - * into account the following things: - * - * 1. total amount of memory we can use for this memory type - * 2. total amount of memory per memseg list allowed - * 3. number of segments needed to fit the amount of memory - * 4. number of segments allowed per type - * 5. number of segments allowed per memseg list - * 6. number of memseg lists we are allowed to take up - */ - - /* calculate how much segments we will need in total */ - max_segs_per_type = max_mem_per_type / pagesz; - /* limit number of segments to maximum allowed per type */ - max_segs_per_type = RTE_MIN(max_segs_per_type, - (unsigned int)RTE_MAX_MEMSEG_PER_TYPE); - /* limit number of segments to maximum allowed per list */ - max_segs_per_list = RTE_MIN(max_segs_per_type, - (unsigned int)RTE_MAX_MEMSEG_PER_LIST); - - /* calculate how much memory we can have per segment list */ - max_mem_per_list = RTE_MIN(max_segs_per_list * pagesz, - (uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20); - - /* calculate how many segments each segment list will have */ - n_segs = RTE_MIN(max_segs_per_list, max_mem_per_list / pagesz); - - /* calculate how many segment lists we can have */ - n_seglists = RTE_MIN(max_segs_per_type / n_segs, - max_mem_per_type / max_mem_per_list); - - /* limit number of segment lists according to our maximum */ - n_seglists = RTE_MIN(n_seglists, max_seglists_per_type); - - RTE_LOG(DEBUG, EAL, "Creating %i segment lists: " - "n_segs:%i socket_id:%i hugepage_sz:%" PRIu64 "\n", - n_seglists, n_segs, socket_id, pagesz); - - /* create all segment lists */ - for (cur_seglist = 0; cur_seglist < n_seglists; cur_seglist++) { - if (msl_idx >= RTE_MAX_MEMSEG_LISTS) { - RTE_LOG(ERR, EAL, - "No more space in memseg lists, please increase %s\n", - RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS)); - goto out; - } - msl = &mcfg->memsegs[msl_idx++]; - - if (alloc_memseg_list(msl, pagesz, n_segs, - socket_id, cur_seglist)) - goto out; - - if (alloc_va_space(msl)) { - RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n"); - goto out; - } - } - } - /* we're successful */ - ret = 0; -out: - free(memtypes); - return ret; + return eal_dynmem_memseg_lists_init(); } static int @@ -2433,7 +1893,7 @@ memseg_secondary_init(void) } /* preallocate VA space */ - if (alloc_va_space(msl)) { + if (eal_memseg_list_alloc(msl, 0)) { RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n"); return -1; } @@ -2448,6 +1908,10 @@ rte_eal_memseg_init(void) /* increase rlimit to maximum */ struct rlimit lim; +#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES + const struct internal_config *internal_conf = + eal_get_internal_configuration(); +#endif if (getrlimit(RLIMIT_NOFILE, &lim) == 0) { /* set limit to maximum */ lim.rlim_cur = lim.rlim_max; @@ -2464,7 +1928,7 @@ rte_eal_memseg_init(void) RTE_LOG(ERR, EAL, "Cannot get current resource limits\n"); } #ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES - if (!internal_config.legacy_mem && rte_socket_count() > 1) { + if (!internal_conf->legacy_mem && rte_socket_count() > 1) { RTE_LOG(WARNING, EAL, "DPDK is running on a NUMA system, but is compiled without NUMA support.\n"); RTE_LOG(WARNING, EAL, "This will have adverse consequences for performance and usability.\n"); RTE_LOG(WARNING, EAL, "Please use --"OPT_LEGACY_MEM" option, or recompile with NUMA support.\n");