From: Sergio Gonzalez Monroy Date: Wed, 15 Jul 2015 16:32:21 +0000 (+0100) Subject: mem: rework memzone to be allocated by malloc X-Git-Tag: spdx-start~8735 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=fafcc11985a2d79c88334b10a6284068670954da;p=dpdk.git mem: rework memzone to be allocated by malloc In the current memory hierarchy, memsegs are groups of physically contiguous hugepages, memzones are slices of memsegs and malloc further slices memzones into smaller memory chunks. This patch modifies malloc so it partitions memsegs instead of memzones. Thus memzones would call malloc internally for memory allocation while maintaining its ABI. During initialization malloc sets all available memory as part of the heaps. CONFIG_RTE_MALLOC_MEMZONE_SIZE was used to specify the default memory block size to expand the heap. The option is not used/relevant anymore, so we remove it. Remove free_memseg field from internal mem config structure as it is not used anymore. Also remove code in ivshmem that was setting up free_memseg on init. It would be possible to free memzones and therefore any other structure based on memzones, ie. mempools Signed-off-by: Sergio Gonzalez Monroy --- diff --git a/config/common_bsdapp b/config/common_bsdapp index 5bb7f557ea..4e505bf411 100644 --- a/config/common_bsdapp +++ b/config/common_bsdapp @@ -107,7 +107,6 @@ CONFIG_RTE_LOG_HISTORY=256 CONFIG_RTE_EAL_ALLOW_INV_SOCKET_ID=n CONFIG_RTE_EAL_ALWAYS_PANIC_ON_ERROR=n CONFIG_RTE_MALLOC_DEBUG=n -CONFIG_RTE_MALLOC_MEMZONE_SIZE=11M # # FreeBSD contiguous memory driver settings diff --git a/config/common_linuxapp b/config/common_linuxapp index 7b57044298..579a5d794a 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -110,7 +110,6 @@ CONFIG_RTE_EAL_ALWAYS_PANIC_ON_ERROR=n CONFIG_RTE_EAL_IGB_UIO=y CONFIG_RTE_EAL_VFIO=y CONFIG_RTE_MALLOC_DEBUG=n -CONFIG_RTE_MALLOC_MEMZONE_SIZE=11M # # Special configurations in PCI Config Space for high performance diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index 9c1da71548..31bf6d8d66 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -50,15 +50,15 @@ #include #include +#include "malloc_heap.h" +#include "malloc_elem.h" #include "eal_private.h" -/* internal copy of free memory segments */ -static struct rte_memseg *free_memseg = NULL; - static inline const struct rte_memzone * memzone_lookup_thread_unsafe(const char *name) { const struct rte_mem_config *mcfg; + const struct rte_memzone *mz; unsigned i = 0; /* get pointer to global configuration */ @@ -68,62 +68,50 @@ memzone_lookup_thread_unsafe(const char *name) * the algorithm is not optimal (linear), but there are few * zones and this function should be called at init only */ - for (i = 0; i < RTE_MAX_MEMZONE && mcfg->memzone[i].addr != NULL; i++) { - if (!strncmp(name, mcfg->memzone[i].name, RTE_MEMZONE_NAMESIZE)) + for (i = 0; i < RTE_MAX_MEMZONE; i++) { + mz = &mcfg->memzone[i]; + if (mz->addr != NULL && !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE)) return &mcfg->memzone[i]; } return NULL; } -/* - * Helper function for memzone_reserve_aligned_thread_unsafe(). - * Calculate address offset from the start of the segment. - * Align offset in that way that it satisfy istart alignmnet and - * buffer of the requested length would not cross specified boundary. - */ -static inline phys_addr_t -align_phys_boundary(const struct rte_memseg *ms, size_t len, size_t align, - size_t bound) +/* This function will return the greatest free block if a heap has been + * specified. If no heap has been specified, it will return the heap and + * length of the greatest free block available in all heaps */ +static size_t +find_heap_max_free_elem(int *s, unsigned align) { - phys_addr_t addr_offset, bmask, end, start; - size_t step; - - step = RTE_MAX(align, bound); - bmask = ~((phys_addr_t)bound - 1); - - /* calculate offset to closest alignment */ - start = RTE_ALIGN_CEIL(ms->phys_addr, align); - addr_offset = start - ms->phys_addr; + struct rte_mem_config *mcfg; + struct rte_malloc_socket_stats stats; + int i, socket = *s; + size_t len = 0; - while (addr_offset + len < ms->len) { + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; - /* check, do we meet boundary condition */ - end = start + len - (len != 0); - if ((start & bmask) == (end & bmask)) - break; + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { + if ((socket != SOCKET_ID_ANY) && (socket != i)) + continue; - /* calculate next offset */ - start = RTE_ALIGN_CEIL(start + 1, step); - addr_offset = start - ms->phys_addr; + malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats); + if (stats.greatest_free_size > len) { + len = stats.greatest_free_size; + *s = i; + } } - return addr_offset; + return (len - MALLOC_ELEM_OVERHEAD - align); } static const struct rte_memzone * memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, - int socket_id, uint64_t size_mask, unsigned align, - unsigned bound) + int socket_id, unsigned flags, unsigned align, unsigned bound) { struct rte_mem_config *mcfg; - unsigned i = 0; - int memseg_idx = -1; - uint64_t addr_offset, seg_offset = 0; size_t requested_len; - size_t memseg_len = 0; - phys_addr_t memseg_physaddr; - void *memseg_addr; + int socket, i; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; @@ -155,7 +143,6 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, if (align < RTE_CACHE_LINE_SIZE) align = RTE_CACHE_LINE_SIZE; - /* align length on cache boundary. Check for overflow before doing so */ if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) { rte_errno = EINVAL; /* requested size too big */ @@ -169,108 +156,65 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len); /* check that boundary condition is valid */ - if (bound != 0 && - (requested_len > bound || !rte_is_power_of_2(bound))) { + if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) { rte_errno = EINVAL; return NULL; } - /* find the smallest segment matching requirements */ - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - /* last segment */ - if (free_memseg[i].addr == NULL) - break; - - /* empty segment, skip it */ - if (free_memseg[i].len == 0) - continue; - - /* bad socket ID */ - if (socket_id != SOCKET_ID_ANY && - free_memseg[i].socket_id != SOCKET_ID_ANY && - socket_id != free_memseg[i].socket_id) - continue; - - /* - * calculate offset to closest alignment that - * meets boundary conditions. - */ - addr_offset = align_phys_boundary(free_memseg + i, - requested_len, align, bound); + if ((socket_id != SOCKET_ID_ANY) && (socket_id >= RTE_MAX_NUMA_NODES)) { + rte_errno = EINVAL; + return NULL; + } - /* check len */ - if ((requested_len + addr_offset) > free_memseg[i].len) - continue; + if (!rte_eal_has_hugepages()) + socket_id = SOCKET_ID_ANY; - if ((size_mask & free_memseg[i].hugepage_sz) == 0) - continue; + if (len == 0) { + if (bound != 0) + requested_len = bound; + else + requested_len = find_heap_max_free_elem(&socket_id, align); + } - /* this segment is the best until now */ - if (memseg_idx == -1) { - memseg_idx = i; - memseg_len = free_memseg[i].len; - seg_offset = addr_offset; - } - /* find the biggest contiguous zone */ - else if (len == 0) { - if (free_memseg[i].len > memseg_len) { - memseg_idx = i; - memseg_len = free_memseg[i].len; - seg_offset = addr_offset; - } - } - /* - * find the smallest (we already checked that current - * zone length is > len - */ - else if (free_memseg[i].len + align < memseg_len || - (free_memseg[i].len <= memseg_len + align && - addr_offset < seg_offset)) { - memseg_idx = i; - memseg_len = free_memseg[i].len; - seg_offset = addr_offset; + if (socket_id == SOCKET_ID_ANY) + socket = malloc_get_numa_socket(); + else + socket = socket_id; + + /* allocate memory on heap */ + void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL, + requested_len, flags, align, bound); + + if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) { + /* try other heaps */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { + if (socket == i) + continue; + + mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i], + NULL, requested_len, flags, align, bound); + if (mz_addr != NULL) + break; } } - /* no segment found */ - if (memseg_idx == -1) { + if (mz_addr == NULL) { rte_errno = ENOMEM; return NULL; } - /* save aligned physical and virtual addresses */ - memseg_physaddr = free_memseg[memseg_idx].phys_addr + seg_offset; - memseg_addr = RTE_PTR_ADD(free_memseg[memseg_idx].addr, - (uintptr_t) seg_offset); - - /* if we are looking for a biggest memzone */ - if (len == 0) { - if (bound == 0) - requested_len = memseg_len - seg_offset; - else - requested_len = RTE_ALIGN_CEIL(memseg_physaddr + 1, - bound) - memseg_physaddr; - } - - /* set length to correct value */ - len = (size_t)seg_offset + requested_len; - - /* update our internal state */ - free_memseg[memseg_idx].len -= len; - free_memseg[memseg_idx].phys_addr += len; - free_memseg[memseg_idx].addr = - (char *)free_memseg[memseg_idx].addr + len; + const struct malloc_elem *elem = malloc_elem_from_data(mz_addr); /* fill the zone in config */ struct rte_memzone *mz = &mcfg->memzone[mcfg->memzone_idx++]; snprintf(mz->name, sizeof(mz->name), "%s", name); - mz->phys_addr = memseg_physaddr; - mz->addr = memseg_addr; - mz->len = requested_len; - mz->hugepage_sz = free_memseg[memseg_idx].hugepage_sz; - mz->socket_id = free_memseg[memseg_idx].socket_id; + mz->phys_addr = rte_malloc_virt2phy(mz_addr); + mz->addr = mz_addr; + mz->len = (requested_len == 0 ? elem->size : requested_len); + mz->hugepage_sz = elem->ms->hugepage_sz; + mz->socket_id = elem->ms->socket_id; mz->flags = 0; - mz->memseg_id = memseg_idx; + mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg; return mz; } @@ -282,26 +226,6 @@ rte_memzone_reserve_thread_safe(const char *name, size_t len, { struct rte_mem_config *mcfg; const struct rte_memzone *mz = NULL; - uint64_t size_mask = 0; - - if (flags & RTE_MEMZONE_256KB) - size_mask |= RTE_PGSIZE_256K; - if (flags & RTE_MEMZONE_2MB) - size_mask |= RTE_PGSIZE_2M; - if (flags & RTE_MEMZONE_16MB) - size_mask |= RTE_PGSIZE_16M; - if (flags & RTE_MEMZONE_256MB) - size_mask |= RTE_PGSIZE_256M; - if (flags & RTE_MEMZONE_512MB) - size_mask |= RTE_PGSIZE_512M; - if (flags & RTE_MEMZONE_1GB) - size_mask |= RTE_PGSIZE_1G; - if (flags & RTE_MEMZONE_4GB) - size_mask |= RTE_PGSIZE_4G; - if (flags & RTE_MEMZONE_16GB) - size_mask |= RTE_PGSIZE_16G; - if (!size_mask) - size_mask = UINT64_MAX; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; @@ -309,18 +233,7 @@ rte_memzone_reserve_thread_safe(const char *name, size_t len, rte_rwlock_write_lock(&mcfg->mlock); mz = memzone_reserve_aligned_thread_unsafe( - name, len, socket_id, size_mask, align, bound); - - /* - * If we failed to allocate the requested page size, and the - * RTE_MEMZONE_SIZE_HINT_ONLY flag is specified, try allocating - * again. - */ - if (!mz && rte_errno == ENOMEM && size_mask != UINT64_MAX && - flags & RTE_MEMZONE_SIZE_HINT_ONLY) { - mz = memzone_reserve_aligned_thread_unsafe( - name, len, socket_id, UINT64_MAX, align, bound); - } + name, len, socket_id, flags, align, bound); rte_rwlock_write_unlock(&mcfg->mlock); @@ -411,45 +324,6 @@ rte_memzone_dump(FILE *f) rte_rwlock_read_unlock(&mcfg->mlock); } -/* - * called by init: modify the free memseg list to have cache-aligned - * addresses and cache-aligned lengths - */ -static int -memseg_sanitize(struct rte_memseg *memseg) -{ - unsigned phys_align; - unsigned virt_align; - unsigned off; - - phys_align = memseg->phys_addr & RTE_CACHE_LINE_MASK; - virt_align = (unsigned long)memseg->addr & RTE_CACHE_LINE_MASK; - - /* - * sanity check: phys_addr and addr must have the same - * alignment - */ - if (phys_align != virt_align) - return -1; - - /* memseg is really too small, don't bother with it */ - if (memseg->len < (2 * RTE_CACHE_LINE_SIZE)) { - memseg->len = 0; - return 0; - } - - /* align start address */ - off = (RTE_CACHE_LINE_SIZE - phys_align) & RTE_CACHE_LINE_MASK; - memseg->phys_addr += off; - memseg->addr = (char *)memseg->addr + off; - memseg->len -= off; - - /* align end address */ - memseg->len &= ~((uint64_t)RTE_CACHE_LINE_MASK); - - return 0; -} - /* * Init the memzone subsystem */ @@ -458,14 +332,10 @@ rte_eal_memzone_init(void) { struct rte_mem_config *mcfg; const struct rte_memseg *memseg; - unsigned i = 0; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; - /* mirror the runtime memsegs from config */ - free_memseg = mcfg->free_memseg; - /* secondary processes don't need to initialise anything */ if (rte_eal_process_type() == RTE_PROC_SECONDARY) return 0; @@ -478,33 +348,13 @@ rte_eal_memzone_init(void) rte_rwlock_write_lock(&mcfg->mlock); - /* fill in uninitialized free_memsegs */ - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - if (memseg[i].addr == NULL) - break; - if (free_memseg[i].addr != NULL) - continue; - memcpy(&free_memseg[i], &memseg[i], sizeof(struct rte_memseg)); - } - - /* make all zones cache-aligned */ - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - if (free_memseg[i].addr == NULL) - break; - if (memseg_sanitize(&free_memseg[i]) < 0) { - RTE_LOG(ERR, EAL, "%s(): Sanity check failed\n", __func__); - rte_rwlock_write_unlock(&mcfg->mlock); - return -1; - } - } - /* delete all zones */ mcfg->memzone_idx = 0; memset(mcfg->memzone, 0, sizeof(mcfg->memzone)); rte_rwlock_write_unlock(&mcfg->mlock); - return 0; + return rte_eal_malloc_heap_init(); } /* Walk all reserved memory zones */ diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h index 34f5abcaaa..7de906b602 100644 --- a/lib/librte_eal/common/include/rte_eal_memconfig.h +++ b/lib/librte_eal/common/include/rte_eal_memconfig.h @@ -73,9 +73,6 @@ struct rte_mem_config { struct rte_memseg memseg[RTE_MAX_MEMSEG]; /**< Physmem descriptors. */ struct rte_memzone memzone[RTE_MAX_MEMZONE]; /**< Memzone descriptors. */ - /* Runtime Physmem descriptors. */ - struct rte_memseg free_memseg[RTE_MAX_MEMSEG]; - struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */ /* Heaps of Malloc per socket */ diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h index 716216f253..b27035628f 100644 --- a/lib/librte_eal/common/include/rte_malloc_heap.h +++ b/lib/librte_eal/common/include/rte_malloc_heap.h @@ -40,7 +40,7 @@ #include /* Number of free lists per heap, grouped by size. */ -#define RTE_HEAP_NUM_FREELISTS 5 +#define RTE_HEAP_NUM_FREELISTS 13 /** * Structure to hold malloc heap @@ -48,7 +48,6 @@ struct malloc_heap { rte_spinlock_t lock; LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS]; - unsigned mz_count; unsigned alloc_count; size_t total_size; } __rte_cache_aligned; diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c index a5e1248ad0..b54ee330df 100644 --- a/lib/librte_eal/common/malloc_elem.c +++ b/lib/librte_eal/common/malloc_elem.c @@ -37,7 +37,6 @@ #include #include -#include #include #include #include @@ -56,10 +55,10 @@ */ void malloc_elem_init(struct malloc_elem *elem, - struct malloc_heap *heap, const struct rte_memzone *mz, size_t size) + struct malloc_heap *heap, const struct rte_memseg *ms, size_t size) { elem->heap = heap; - elem->mz = mz; + elem->ms = ms; elem->prev = NULL; memset(&elem->free_list, 0, sizeof(elem->free_list)); elem->state = ELEM_FREE; @@ -70,12 +69,12 @@ malloc_elem_init(struct malloc_elem *elem, } /* - * initialise a dummy malloc_elem header for the end-of-memzone marker + * initialise a dummy malloc_elem header for the end-of-memseg marker */ void malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev) { - malloc_elem_init(elem, prev->heap, prev->mz, 0); + malloc_elem_init(elem, prev->heap, prev->ms, 0); elem->prev = prev; elem->state = ELEM_BUSY; /* mark busy so its never merged */ } @@ -86,12 +85,24 @@ malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev) * fit, return NULL. */ static void * -elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align) +elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align, + size_t bound) { - const uintptr_t end_pt = (uintptr_t)elem + + const size_t bmask = ~(bound - 1); + uintptr_t end_pt = (uintptr_t)elem + elem->size - MALLOC_ELEM_TRAILER_LEN; - const uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); - const uintptr_t new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN; + uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); + uintptr_t new_elem_start; + + /* check boundary */ + if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) { + end_pt = RTE_ALIGN_FLOOR(end_pt, bound); + new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); + if (((end_pt - 1) & bmask) != (new_data_start & bmask)) + return NULL; + } + + new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN; /* if the new start point is before the exist start, it won't fit */ return (new_elem_start < (uintptr_t)elem) ? NULL : (void *)new_elem_start; @@ -102,9 +113,10 @@ elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align) * alignment request from the current element */ int -malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align) +malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align, + size_t bound) { - return elem_start_pt(elem, size, align) != NULL; + return elem_start_pt(elem, size, align, bound) != NULL; } /* @@ -115,10 +127,10 @@ static void split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt) { struct malloc_elem *next_elem = RTE_PTR_ADD(elem, elem->size); - const unsigned old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem; - const unsigned new_elem_size = elem->size - old_elem_size; + const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem; + const size_t new_elem_size = elem->size - old_elem_size; - malloc_elem_init(split_pt, elem->heap, elem->mz, new_elem_size); + malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size); split_pt->prev = elem; next_elem->prev = split_pt; elem->size = old_elem_size; @@ -168,8 +180,9 @@ malloc_elem_free_list_index(size_t size) void malloc_elem_free_list_insert(struct malloc_elem *elem) { - size_t idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN); + size_t idx; + idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN); elem->state = ELEM_FREE; LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list); } @@ -190,12 +203,26 @@ elem_free_list_remove(struct malloc_elem *elem) * is not done here, as it's done there previously. */ struct malloc_elem * -malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align) +malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align, + size_t bound) { - struct malloc_elem *new_elem = elem_start_pt(elem, size, align); - const unsigned old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem; + struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound); + const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem; + const size_t trailer_size = elem->size - old_elem_size - size - + MALLOC_ELEM_OVERHEAD; + + elem_free_list_remove(elem); - if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE){ + if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { + /* split it, too much free space after elem */ + struct malloc_elem *new_free_elem = + RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD); + + split_elem(elem, new_free_elem); + malloc_elem_free_list_insert(new_free_elem); + } + + if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { /* don't split it, pad the element instead */ elem->state = ELEM_BUSY; elem->pad = old_elem_size; @@ -208,8 +235,6 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align) new_elem->size = elem->size - elem->pad; set_header(new_elem); } - /* remove element from free list */ - elem_free_list_remove(elem); return new_elem; } @@ -219,7 +244,6 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align) * Re-insert original element, in case its new size makes it * belong on a different list. */ - elem_free_list_remove(elem); split_elem(elem, new_elem); new_elem->state = ELEM_BUSY; malloc_elem_free_list_insert(elem); diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h index 9790b1a09e..e05d2ea056 100644 --- a/lib/librte_eal/common/malloc_elem.h +++ b/lib/librte_eal/common/malloc_elem.h @@ -47,9 +47,9 @@ enum elem_state { struct malloc_elem { struct malloc_heap *heap; - struct malloc_elem *volatile prev; /* points to prev elem in memzone */ + struct malloc_elem *volatile prev; /* points to prev elem in memseg */ LIST_ENTRY(malloc_elem) free_list; /* list of free elements in heap */ - const struct rte_memzone *mz; + const struct rte_memseg *ms; volatile enum elem_state state; uint32_t pad; size_t size; @@ -136,11 +136,11 @@ malloc_elem_from_data(const void *data) void malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap, - const struct rte_memzone *mz, + const struct rte_memseg *ms, size_t size); /* - * initialise a dummy malloc_elem header for the end-of-memzone marker + * initialise a dummy malloc_elem header for the end-of-memseg marker */ void malloc_elem_mkend(struct malloc_elem *elem, @@ -151,14 +151,16 @@ malloc_elem_mkend(struct malloc_elem *elem, * of the requested size and with the requested alignment */ int -malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align); +malloc_elem_can_hold(struct malloc_elem *elem, size_t size, + unsigned align, size_t bound); /* * reserve a block of data in an existing malloc_elem. If the malloc_elem * is much larger than the data block requested, we split the element in two. */ struct malloc_elem * -malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align); +malloc_elem_alloc(struct malloc_elem *elem, size_t size, + unsigned align, size_t bound); /* * free a malloc_elem block by adding it to the free list. If the diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c index 8861d27bfb..21d8914405 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -39,7 +39,6 @@ #include #include -#include #include #include #include @@ -54,123 +53,125 @@ #include "malloc_elem.h" #include "malloc_heap.h" -/* since the memzone size starts with a digit, it will appear unquoted in - * rte_config.h, so quote it so it can be passed to rte_str_to_size */ -#define MALLOC_MEMZONE_SIZE RTE_STR(RTE_MALLOC_MEMZONE_SIZE) - -/* - * returns the configuration setting for the memzone size as a size_t value - */ -static inline size_t -get_malloc_memzone_size(void) +static unsigned +check_hugepage_sz(unsigned flags, uint64_t hugepage_sz) { - return rte_str_to_size(MALLOC_MEMZONE_SIZE); + unsigned check_flag = 0; + + if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY)) + return 1; + + switch (hugepage_sz) { + case RTE_PGSIZE_256K: + check_flag = RTE_MEMZONE_256KB; + break; + case RTE_PGSIZE_2M: + check_flag = RTE_MEMZONE_2MB; + break; + case RTE_PGSIZE_16M: + check_flag = RTE_MEMZONE_16MB; + break; + case RTE_PGSIZE_256M: + check_flag = RTE_MEMZONE_256MB; + break; + case RTE_PGSIZE_512M: + check_flag = RTE_MEMZONE_512MB; + break; + case RTE_PGSIZE_1G: + check_flag = RTE_MEMZONE_1GB; + break; + case RTE_PGSIZE_4G: + check_flag = RTE_MEMZONE_4GB; + break; + case RTE_PGSIZE_16G: + check_flag = RTE_MEMZONE_16GB; + } + + return (check_flag & flags); } /* - * reserve an extra memory zone and make it available for use by a particular - * heap. This reserves the zone and sets a dummy malloc_elem header at the end + * Expand the heap with a memseg. + * This reserves the zone and sets a dummy malloc_elem header at the end * to prevent overflow. The rest of the zone is added to free list as a single * large free block */ -static int -malloc_heap_add_memzone(struct malloc_heap *heap, size_t size, unsigned align) +static void +malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms) { - const unsigned mz_flags = 0; - const size_t block_size = get_malloc_memzone_size(); - /* ensure the data we want to allocate will fit in the memzone */ - const size_t min_size = size + align + MALLOC_ELEM_OVERHEAD * 2; - const struct rte_memzone *mz = NULL; - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - unsigned numa_socket = heap - mcfg->malloc_heaps; - - size_t mz_size = min_size; - if (mz_size < block_size) - mz_size = block_size; - - char mz_name[RTE_MEMZONE_NAMESIZE]; - snprintf(mz_name, sizeof(mz_name), "MALLOC_S%u_HEAP_%u", - numa_socket, heap->mz_count++); - - /* try getting a block. if we fail and we don't need as big a block - * as given in the config, we can shrink our request and try again - */ - do { - mz = rte_memzone_reserve(mz_name, mz_size, numa_socket, - mz_flags); - if (mz == NULL) - mz_size /= 2; - } while (mz == NULL && mz_size > min_size); - if (mz == NULL) - return -1; - /* allocate the memory block headers, one at end, one at start */ - struct malloc_elem *start_elem = (struct malloc_elem *)mz->addr; - struct malloc_elem *end_elem = RTE_PTR_ADD(mz->addr, - mz_size - MALLOC_ELEM_OVERHEAD); + struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr; + struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr, + ms->len - MALLOC_ELEM_OVERHEAD); end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE); + const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem; - const unsigned elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem; - malloc_elem_init(start_elem, heap, mz, elem_size); + malloc_elem_init(start_elem, heap, ms, elem_size); malloc_elem_mkend(end_elem, start_elem); malloc_elem_free_list_insert(start_elem); - /* increase heap total size by size of new memzone */ - heap->total_size+=mz_size - MALLOC_ELEM_OVERHEAD; - return 0; + heap->total_size += elem_size; } /* * Iterates through the freelist for a heap to find a free element * which can store data of the required size and with the requested alignment. + * If size is 0, find the biggest available elem. * Returns null on failure, or pointer to element on success. */ static struct malloc_elem * -find_suitable_element(struct malloc_heap *heap, size_t size, unsigned align) +find_suitable_element(struct malloc_heap *heap, size_t size, + unsigned flags, size_t align, size_t bound) { size_t idx; - struct malloc_elem *elem; + struct malloc_elem *elem, *alt_elem = NULL; for (idx = malloc_elem_free_list_index(size); - idx < RTE_HEAP_NUM_FREELISTS; idx++) - { + idx < RTE_HEAP_NUM_FREELISTS; idx++) { for (elem = LIST_FIRST(&heap->free_head[idx]); - !!elem; elem = LIST_NEXT(elem, free_list)) - { - if (malloc_elem_can_hold(elem, size, align)) - return elem; + !!elem; elem = LIST_NEXT(elem, free_list)) { + if (malloc_elem_can_hold(elem, size, align, bound)) { + if (check_hugepage_sz(flags, elem->ms->hugepage_sz)) + return elem; + if (alt_elem == NULL) + alt_elem = elem; + } } } + + if ((alt_elem != NULL) && (flags & RTE_MEMZONE_SIZE_HINT_ONLY)) + return alt_elem; + return NULL; } /* - * Main function called by malloc to allocate a block of memory from the - * heap. It locks the free list, scans it, and adds a new memzone if the - * scan fails. Once the new memzone is added, it re-scans and should return + * Main function to allocate a block of memory from the heap. + * It locks the free list, scans it, and adds a new memseg if the + * scan fails. Once the new memseg is added, it re-scans and should return * the new element after releasing the lock. */ void * malloc_heap_alloc(struct malloc_heap *heap, - const char *type __attribute__((unused)), size_t size, unsigned align) + const char *type __attribute__((unused)), size_t size, unsigned flags, + size_t align, size_t bound) { + struct malloc_elem *elem; + size = RTE_CACHE_LINE_ROUNDUP(size); align = RTE_CACHE_LINE_ROUNDUP(align); + rte_spinlock_lock(&heap->lock); - struct malloc_elem *elem = find_suitable_element(heap, size, align); - if (elem == NULL){ - if ((malloc_heap_add_memzone(heap, size, align)) == 0) - elem = find_suitable_element(heap, size, align); - } - if (elem != NULL){ - elem = malloc_elem_alloc(elem, size, align); + elem = find_suitable_element(heap, size, flags, align, bound); + if (elem != NULL) { + elem = malloc_elem_alloc(elem, size, align, bound); /* increase heap's count of allocated elements */ heap->alloc_count++; } rte_spinlock_unlock(&heap->lock); - return elem == NULL ? NULL : (void *)(&elem[1]); + return elem == NULL ? NULL : (void *)(&elem[1]); } /* @@ -206,3 +207,21 @@ malloc_heap_get_stats(const struct malloc_heap *heap, socket_stats->alloc_count = heap->alloc_count; return 0; } + +int +rte_eal_malloc_heap_init(void) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + unsigned ms_cnt; + struct rte_memseg *ms; + + if (mcfg == NULL) + return -1; + + for (ms = &mcfg->memseg[0], ms_cnt = 0; + (ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0); + ms_cnt++, ms++) + malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms); + + return 0; +} diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h index a47136d851..3ccbef0fdb 100644 --- a/lib/librte_eal/common/malloc_heap.h +++ b/lib/librte_eal/common/malloc_heap.h @@ -53,15 +53,15 @@ malloc_get_numa_socket(void) } void * -malloc_heap_alloc(struct malloc_heap *heap, const char *type, - size_t size, unsigned align); +malloc_heap_alloc(struct malloc_heap *heap, const char *type, size_t size, + unsigned flags, size_t align, size_t bound); int malloc_heap_get_stats(const struct malloc_heap *heap, struct rte_malloc_socket_stats *socket_stats); int -rte_eal_heap_memzone_init(void); +rte_eal_malloc_heap_init(void); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c index c313a57a67..47deb007d2 100644 --- a/lib/librte_eal/common/rte_malloc.c +++ b/lib/librte_eal/common/rte_malloc.c @@ -39,7 +39,6 @@ #include #include -#include #include #include #include @@ -77,6 +76,9 @@ rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg) if (size == 0 || (align && !rte_is_power_of_2(align))) return NULL; + if (!rte_eal_has_hugepages()) + socket_arg = SOCKET_ID_ANY; + if (socket_arg == SOCKET_ID_ANY) socket = malloc_get_numa_socket(); else @@ -87,7 +89,7 @@ rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg) return NULL; ret = malloc_heap_alloc(&mcfg->malloc_heaps[socket], type, - size, align == 0 ? 1 : align); + size, 0, align == 0 ? 1 : align, 0); if (ret != NULL || socket_arg != SOCKET_ID_ANY) return ret; @@ -98,7 +100,7 @@ rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg) continue; ret = malloc_heap_alloc(&mcfg->malloc_heaps[i], type, - size, align == 0 ? 1 : align); + size, 0, align == 0 ? 1 : align, 0); if (ret != NULL) return ret; } @@ -256,5 +258,5 @@ rte_malloc_virt2phy(const void *addr) const struct malloc_elem *elem = malloc_elem_from_data(addr); if (elem == NULL) return 0; - return elem->mz->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->mz->addr); + return elem->ms->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->ms->addr); } diff --git a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c index 2deaeb7305..facfb800e6 100644 --- a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c +++ b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c @@ -725,15 +725,6 @@ map_all_segments(void) * expect memsegs to be empty */ memcpy(&mcfg->memseg[i], &ms, sizeof(struct rte_memseg)); - memcpy(&mcfg->free_memseg[i], &ms, - sizeof(struct rte_memseg)); - - - /* adjust the free_memseg so that there's no free space left */ - mcfg->free_memseg[i].ioremap_addr += mcfg->free_memseg[i].len; - mcfg->free_memseg[i].phys_addr += mcfg->free_memseg[i].len; - mcfg->free_memseg[i].addr_64 += mcfg->free_memseg[i].len; - mcfg->free_memseg[i].len = 0; close(fd); diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 4fd63bbb03..80ee78f0ba 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -1071,7 +1071,7 @@ rte_eal_hugepage_init(void) mcfg->memseg[0].addr = addr; mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; mcfg->memseg[0].len = internal_config.memory; - mcfg->memseg[0].socket_id = SOCKET_ID_ANY; + mcfg->memseg[0].socket_id = 0; return 0; }