1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
10 #include <sys/queue.h>
12 #include <rte_memory.h>
14 #include <rte_launch.h>
15 #include <rte_per_lcore.h>
16 #include <rte_lcore.h>
17 #include <rte_debug.h>
18 #include <rte_common.h>
19 #include <rte_spinlock.h>
21 #include "eal_private.h"
22 #include "eal_internal_cfg.h"
23 #include "eal_memalloc.h"
24 #include "malloc_elem.h"
25 #include "malloc_heap.h"
28 * If debugging is enabled, freed memory is set to poison value
29 * to catch buggy programs. Otherwise, freed memory is set to zero
30 * to avoid having to zero in zmalloc
32 #ifdef RTE_MALLOC_DEBUG
33 #define MALLOC_POISON 0x6b
35 #define MALLOC_POISON 0
39 malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
41 void *cur_page, *contig_seg_start, *page_end, *cur_seg_end;
42 void *data_start, *data_end;
43 rte_iova_t expected_iova;
44 struct rte_memseg *ms;
45 size_t page_sz, cur, max;
46 const struct internal_config *internal_conf =
47 eal_get_internal_configuration();
49 page_sz = (size_t)elem->msl->page_sz;
50 data_start = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
51 data_end = RTE_PTR_ADD(elem, elem->size - MALLOC_ELEM_TRAILER_LEN);
52 /* segment must start after header and with specified alignment */
53 contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
55 /* return if aligned address is already out of malloc element */
56 if (contig_seg_start > data_end)
59 /* if we're in IOVA as VA mode, or if we're in legacy mode with
60 * hugepages, all elements are IOVA-contiguous. however, we can only
61 * make these assumptions about internal memory - externally allocated
62 * segments have to be checked.
64 if (!elem->msl->external &&
65 (rte_eal_iova_mode() == RTE_IOVA_VA ||
66 (internal_conf->legacy_mem &&
67 rte_eal_has_hugepages())))
68 return RTE_PTR_DIFF(data_end, contig_seg_start);
70 cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz);
71 ms = rte_mem_virt2memseg(cur_page, elem->msl);
73 /* do first iteration outside the loop */
74 page_end = RTE_PTR_ADD(cur_page, page_sz);
75 cur_seg_end = RTE_MIN(page_end, data_end);
76 cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start) -
77 MALLOC_ELEM_TRAILER_LEN;
79 expected_iova = ms->iova + page_sz;
80 /* memsegs are contiguous in memory */
83 cur_page = RTE_PTR_ADD(cur_page, page_sz);
85 while (cur_page < data_end) {
86 page_end = RTE_PTR_ADD(cur_page, page_sz);
87 cur_seg_end = RTE_MIN(page_end, data_end);
89 /* reset start of contiguous segment if unexpected iova */
90 if (ms->iova != expected_iova) {
91 /* next contiguous segment must start at specified
94 contig_seg_start = RTE_PTR_ALIGN(cur_page, align);
95 /* new segment start may be on a different page, so find
96 * the page and skip to next iteration to make sure
97 * we're not blowing past data end.
99 ms = rte_mem_virt2memseg(contig_seg_start, elem->msl);
101 /* don't trigger another recalculation */
102 expected_iova = ms->iova;
105 /* cur_seg_end ends on a page boundary or on data end. if we're
106 * looking at data end, then malloc trailer is already included
107 * in the calculations. if we're looking at page end, then we
108 * know there's more data past this page and thus there's space
109 * for malloc element trailer, so don't count it here.
111 cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start);
112 /* update max if cur value is bigger */
116 /* move to next page */
118 expected_iova = ms->iova + page_sz;
119 /* memsegs are contiguous in memory */
127 * Initialize a general malloc_elem header structure
130 malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap,
131 struct rte_memseg_list *msl, size_t size,
132 struct malloc_elem *orig_elem, size_t orig_size, bool dirty)
138 memset(&elem->free_list, 0, sizeof(elem->free_list));
139 elem->state = ELEM_FREE;
143 elem->orig_elem = orig_elem;
144 elem->orig_size = orig_size;
150 malloc_elem_insert(struct malloc_elem *elem)
152 struct malloc_elem *prev_elem, *next_elem;
153 struct malloc_heap *heap = elem->heap;
155 /* first and last elements must be both NULL or both non-NULL */
156 if ((heap->first == NULL) != (heap->last == NULL)) {
157 RTE_LOG(ERR, EAL, "Heap is probably corrupt\n");
161 if (heap->first == NULL && heap->last == NULL) {
167 } else if (elem < heap->first) {
168 /* if lower than start */
170 next_elem = heap->first;
172 } else if (elem > heap->last) {
173 /* if higher than end */
174 prev_elem = heap->last;
178 /* the new memory is somewhere between start and end */
179 uint64_t dist_from_start, dist_from_end;
181 dist_from_end = RTE_PTR_DIFF(heap->last, elem);
182 dist_from_start = RTE_PTR_DIFF(elem, heap->first);
184 /* check which is closer, and find closest list entries */
185 if (dist_from_start < dist_from_end) {
186 prev_elem = heap->first;
187 while (prev_elem->next < elem)
188 prev_elem = prev_elem->next;
189 next_elem = prev_elem->next;
191 next_elem = heap->last;
192 while (next_elem->prev > elem)
193 next_elem = next_elem->prev;
194 prev_elem = next_elem->prev;
198 /* insert new element */
199 elem->prev = prev_elem;
200 elem->next = next_elem;
202 prev_elem->next = elem;
204 next_elem->prev = elem;
208 * Attempt to find enough physically contiguous memory in this block to store
209 * our data. Assume that element has at least enough space to fit in the data,
210 * so we just check the page addresses.
213 elem_check_phys_contig(const struct rte_memseg_list *msl,
214 void *start, size_t size)
216 return eal_memalloc_is_contig(msl, start, size);
220 * calculate the starting point of where data of the requested size
221 * and alignment would fit in the current element. If the data doesn't
225 elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align,
226 size_t bound, bool contig)
228 size_t elem_size = elem->size;
231 * we're allocating from the end, so adjust the size of element by
234 while (elem_size >= size) {
235 const size_t bmask = ~(bound - 1);
236 uintptr_t end_pt = (uintptr_t)elem +
237 elem_size - MALLOC_ELEM_TRAILER_LEN;
238 uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
240 uintptr_t new_elem_start;
243 if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) {
244 end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
245 new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
247 end_pt = new_data_start + size;
249 if (((end_pt - 1) & bmask) != (new_data_start & bmask))
253 new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
255 /* if the new start point is before the exist start,
258 if (new_elem_start < (uintptr_t)elem)
262 size_t new_data_size = end_pt - new_data_start;
265 * if physical contiguousness was requested and we
266 * couldn't fit all data into one physically contiguous
267 * block, try again with lower addresses.
269 if (!elem_check_phys_contig(elem->msl,
270 (void *)new_data_start,
276 return (void *)new_elem_start;
282 * use elem_start_pt to determine if we get meet the size and
283 * alignment request from the current element
286 malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align,
287 size_t bound, bool contig)
289 return elem_start_pt(elem, size, align, bound, contig) != NULL;
293 * split an existing element into two smaller elements at the given
294 * split_pt parameter.
297 split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
299 struct malloc_elem *next_elem = elem->next;
300 const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
301 const size_t new_elem_size = elem->size - old_elem_size;
303 malloc_elem_init(split_pt, elem->heap, elem->msl, new_elem_size,
304 elem->orig_elem, elem->orig_size, elem->dirty);
305 split_pt->prev = elem;
306 split_pt->next = next_elem;
308 next_elem->prev = split_pt;
310 elem->heap->last = split_pt;
311 elem->next = split_pt;
312 elem->size = old_elem_size;
315 /* Update inner padding inner element size. */
316 elem = RTE_PTR_ADD(elem, elem->pad);
317 elem->size = old_elem_size - elem->pad;
322 * our malloc heap is a doubly linked list, so doubly remove our element.
324 static void __rte_unused
325 remove_elem(struct malloc_elem *elem)
327 struct malloc_elem *next, *prev;
334 elem->heap->last = prev;
338 elem->heap->first = next;
345 next_elem_is_adjacent(struct malloc_elem *elem)
347 const struct internal_config *internal_conf =
348 eal_get_internal_configuration();
350 return elem->next == RTE_PTR_ADD(elem, elem->size) &&
351 elem->next->msl == elem->msl &&
352 (!internal_conf->match_allocations ||
353 elem->orig_elem == elem->next->orig_elem);
357 prev_elem_is_adjacent(struct malloc_elem *elem)
359 const struct internal_config *internal_conf =
360 eal_get_internal_configuration();
362 return elem == RTE_PTR_ADD(elem->prev, elem->prev->size) &&
363 elem->prev->msl == elem->msl &&
364 (!internal_conf->match_allocations ||
365 elem->orig_elem == elem->prev->orig_elem);
369 * Given an element size, compute its freelist index.
370 * We free an element into the freelist containing similarly-sized elements.
371 * We try to allocate elements starting with the freelist containing
372 * similarly-sized elements, and if necessary, we search freelists
373 * containing larger elements.
375 * Example element size ranges for a heap with five free lists:
376 * heap->free_head[0] - (0 , 2^8]
377 * heap->free_head[1] - (2^8 , 2^10]
378 * heap->free_head[2] - (2^10 ,2^12]
379 * heap->free_head[3] - (2^12, 2^14]
380 * heap->free_head[4] - (2^14, MAX_SIZE]
383 malloc_elem_free_list_index(size_t size)
385 #define MALLOC_MINSIZE_LOG2 8
386 #define MALLOC_LOG2_INCREMENT 2
391 if (size <= (1UL << MALLOC_MINSIZE_LOG2))
394 /* Find next power of 2 >= size. */
395 log2 = sizeof(size) * 8 - __builtin_clzl(size - 1);
397 /* Compute freelist index, based on log2(size). */
398 index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) /
399 MALLOC_LOG2_INCREMENT;
401 return index <= RTE_HEAP_NUM_FREELISTS - 1 ?
402 index : RTE_HEAP_NUM_FREELISTS - 1;
406 * Add the specified element to its heap's free list.
409 malloc_elem_free_list_insert(struct malloc_elem *elem)
413 idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN);
414 elem->state = ELEM_FREE;
415 LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list);
419 * Remove the specified element from its heap's free list.
422 malloc_elem_free_list_remove(struct malloc_elem *elem)
424 LIST_REMOVE(elem, free_list);
428 * reserve a block of data in an existing malloc_elem. If the malloc_elem
429 * is much larger than the data block requested, we split the element in two.
430 * This function is only called from malloc_heap_alloc so parameter checking
431 * is not done here, as it's done there previously.
434 malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
435 size_t bound, bool contig)
437 struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound,
439 const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
440 const size_t trailer_size = elem->size - old_elem_size - size -
441 MALLOC_ELEM_OVERHEAD;
443 malloc_elem_free_list_remove(elem);
445 if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
446 /* split it, too much free space after elem */
447 struct malloc_elem *new_free_elem =
448 RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD);
450 asan_clear_split_alloczone(new_free_elem);
452 split_elem(elem, new_free_elem);
453 malloc_elem_free_list_insert(new_free_elem);
455 if (elem == elem->heap->last)
456 elem->heap->last = new_free_elem;
459 if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
460 /* don't split it, pad the element instead */
461 elem->state = ELEM_BUSY;
462 elem->pad = old_elem_size;
464 asan_clear_alloczone(elem);
466 /* put a dummy header in padding, to point to real element header */
467 if (elem->pad > 0) { /* pad will be at least 64-bytes, as everything
468 * is cache-line aligned */
469 new_elem->pad = elem->pad;
470 new_elem->state = ELEM_PAD;
471 new_elem->size = elem->size - elem->pad;
472 set_header(new_elem);
478 asan_clear_split_alloczone(new_elem);
480 /* we are going to split the element in two. The original element
481 * remains free, and the new element is the one allocated.
482 * Re-insert original element, in case its new size makes it
483 * belong on a different list.
486 split_elem(elem, new_elem);
488 asan_clear_alloczone(new_elem);
490 new_elem->state = ELEM_BUSY;
491 malloc_elem_free_list_insert(elem);
497 * join two struct malloc_elem together. elem1 and elem2 must
498 * be contiguous in memory.
501 join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
503 struct malloc_elem *next = elem2->next;
504 elem1->size += elem2->size;
508 elem1->heap->last = elem1;
510 elem1->dirty |= elem2->dirty;
512 struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad);
513 inner->size = elem1->size - elem1->pad;
518 malloc_elem_join_adjacent_free(struct malloc_elem *elem)
521 * check if next element exists, is adjacent and is free, if so join
522 * with it, need to remove from free list.
524 if (elem->next != NULL && elem->next->state == ELEM_FREE &&
525 next_elem_is_adjacent(elem)) {
529 /* we will want to erase the trailer and header */
530 erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN);
531 erase_len = MALLOC_ELEM_OVERHEAD + elem->next->pad;
533 /* remove from free list, join to this one */
534 malloc_elem_free_list_remove(elem->next);
535 join_elem(elem, elem->next);
537 /* erase header, trailer and pad */
538 memset(erase, MALLOC_POISON, erase_len);
542 * check if prev element exists, is adjacent and is free, if so join
543 * with it, need to remove from free list.
545 if (elem->prev != NULL && elem->prev->state == ELEM_FREE &&
546 prev_elem_is_adjacent(elem)) {
547 struct malloc_elem *new_elem;
551 /* we will want to erase trailer and header */
552 erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN);
553 erase_len = MALLOC_ELEM_OVERHEAD + elem->pad;
555 /* remove from free list, join to this one */
556 malloc_elem_free_list_remove(elem->prev);
558 new_elem = elem->prev;
559 join_elem(new_elem, elem);
561 /* erase header, trailer and pad */
562 memset(erase, MALLOC_POISON, erase_len);
571 * free a malloc_elem block by adding it to the free list. If the
572 * blocks either immediately before or immediately after newly freed block
573 * are also free, the blocks are merged together.
576 malloc_elem_free(struct malloc_elem *elem)
581 ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
582 data_len = elem->size - MALLOC_ELEM_OVERHEAD;
585 * Consider the element clean for the purposes of joining.
586 * If both neighbors are clean or non-existent,
587 * the joint element will be clean,
588 * which means the memory should be cleared.
589 * There is no need to clear the memory if the joint element is dirty.
592 elem = malloc_elem_join_adjacent_free(elem);
594 malloc_elem_free_list_insert(elem);
598 /* decrease heap's count of allocated elements */
599 elem->heap->alloc_count--;
601 #ifndef RTE_MALLOC_DEBUG
602 /* Normally clear the memory when needed. */
604 memset(ptr, 0, data_len);
606 /* Always poison the memory in debug mode. */
607 memset(ptr, MALLOC_POISON, data_len);
613 /* assume all checks were already done */
615 malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len)
617 struct malloc_elem *hide_start, *hide_end, *prev, *next;
618 size_t len_before, len_after;
621 hide_end = RTE_PTR_ADD(start, len);
626 /* we cannot do anything with non-adjacent elements */
627 if (next && next_elem_is_adjacent(elem)) {
628 len_after = RTE_PTR_DIFF(next, hide_end);
629 if (len_after >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
630 asan_clear_split_alloczone(hide_end);
633 split_elem(elem, hide_end);
635 malloc_elem_free_list_insert(hide_end);
636 } else if (len_after > 0) {
637 RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
642 /* we cannot do anything with non-adjacent elements */
643 if (prev && prev_elem_is_adjacent(elem)) {
644 len_before = RTE_PTR_DIFF(hide_start, elem);
645 if (len_before >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
646 asan_clear_split_alloczone(hide_start);
649 split_elem(elem, hide_start);
654 malloc_elem_free_list_insert(prev);
655 } else if (len_before > 0) {
656 RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
661 asan_clear_alloczone(elem);
667 * attempt to resize a malloc_elem by expanding into any free space
668 * immediately after it in memory.
671 malloc_elem_resize(struct malloc_elem *elem, size_t size)
673 const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD;
675 /* if we request a smaller size, then always return ok */
676 if (elem->size >= new_size) {
677 asan_clear_alloczone(elem);
681 /* check if there is a next element, it's free and adjacent */
682 if (!elem->next || elem->next->state != ELEM_FREE ||
683 !next_elem_is_adjacent(elem))
685 if (elem->size + elem->next->size < new_size)
688 /* we now know the element fits, so remove from free list,
691 malloc_elem_free_list_remove(elem->next);
692 join_elem(elem, elem->next);
694 if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
695 /* now we have a big block together. Lets cut it down a bit, by splitting */
696 struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size);
697 split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE);
699 asan_clear_split_alloczone(split_pt);
701 split_elem(elem, split_pt);
702 malloc_elem_free_list_insert(split_pt);
705 asan_clear_alloczone(elem);
710 static inline const char *
711 elem_state_to_str(enum elem_state state)
725 malloc_elem_dump(const struct malloc_elem *elem, FILE *f)
727 fprintf(f, "Malloc element at %p (%s)\n", elem,
728 elem_state_to_str(elem->state));
729 fprintf(f, " len: 0x%zx pad: 0x%" PRIx32 "\n", elem->size, elem->pad);
730 fprintf(f, " prev: %p next: %p\n", elem->prev, elem->next);