1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
11 #include <rte_memory.h>
13 #include <rte_common.h>
15 #include "eal_private.h"
16 #include "eal_internal_cfg.h"
17 #include "eal_memalloc.h"
18 #include "malloc_elem.h"
19 #include "malloc_heap.h"
22 * If debugging is enabled, freed memory is set to poison value
23 * to catch buggy programs. Otherwise, freed memory is set to zero
24 * to avoid having to zero in zmalloc
26 #ifdef RTE_MALLOC_DEBUG
27 #define MALLOC_POISON 0x6b
29 #define MALLOC_POISON 0
33 malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
35 void *cur_page, *contig_seg_start, *page_end, *cur_seg_end;
36 void *data_start, *data_end;
37 rte_iova_t expected_iova;
38 struct rte_memseg *ms;
39 size_t page_sz, cur, max;
40 const struct internal_config *internal_conf =
41 eal_get_internal_configuration();
43 page_sz = (size_t)elem->msl->page_sz;
44 data_start = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
45 data_end = RTE_PTR_ADD(elem, elem->size - MALLOC_ELEM_TRAILER_LEN);
46 /* segment must start after header and with specified alignment */
47 contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
49 /* return if aligned address is already out of malloc element */
50 if (contig_seg_start > data_end)
53 /* if we're in IOVA as VA mode, or if we're in legacy mode with
54 * hugepages, all elements are IOVA-contiguous. however, we can only
55 * make these assumptions about internal memory - externally allocated
56 * segments have to be checked.
58 if (!elem->msl->external &&
59 (rte_eal_iova_mode() == RTE_IOVA_VA ||
60 (internal_conf->legacy_mem &&
61 rte_eal_has_hugepages())))
62 return RTE_PTR_DIFF(data_end, contig_seg_start);
64 cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz);
65 ms = rte_mem_virt2memseg(cur_page, elem->msl);
67 /* do first iteration outside the loop */
68 page_end = RTE_PTR_ADD(cur_page, page_sz);
69 cur_seg_end = RTE_MIN(page_end, data_end);
70 cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start) -
71 MALLOC_ELEM_TRAILER_LEN;
73 expected_iova = ms->iova + page_sz;
74 /* memsegs are contiguous in memory */
77 cur_page = RTE_PTR_ADD(cur_page, page_sz);
79 while (cur_page < data_end) {
80 page_end = RTE_PTR_ADD(cur_page, page_sz);
81 cur_seg_end = RTE_MIN(page_end, data_end);
83 /* reset start of contiguous segment if unexpected iova */
84 if (ms->iova != expected_iova) {
85 /* next contiguous segment must start at specified
88 contig_seg_start = RTE_PTR_ALIGN(cur_page, align);
89 /* new segment start may be on a different page, so find
90 * the page and skip to next iteration to make sure
91 * we're not blowing past data end.
93 ms = rte_mem_virt2memseg(contig_seg_start, elem->msl);
95 /* don't trigger another recalculation */
96 expected_iova = ms->iova;
99 /* cur_seg_end ends on a page boundary or on data end. if we're
100 * looking at data end, then malloc trailer is already included
101 * in the calculations. if we're looking at page end, then we
102 * know there's more data past this page and thus there's space
103 * for malloc element trailer, so don't count it here.
105 cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start);
106 /* update max if cur value is bigger */
110 /* move to next page */
112 expected_iova = ms->iova + page_sz;
113 /* memsegs are contiguous in memory */
121 * Initialize a general malloc_elem header structure
124 malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap,
125 struct rte_memseg_list *msl, size_t size,
126 struct malloc_elem *orig_elem, size_t orig_size, bool dirty)
132 memset(&elem->free_list, 0, sizeof(elem->free_list));
133 elem->state = ELEM_FREE;
137 elem->orig_elem = orig_elem;
138 elem->orig_size = orig_size;
144 malloc_elem_insert(struct malloc_elem *elem)
146 struct malloc_elem *prev_elem, *next_elem;
147 struct malloc_heap *heap = elem->heap;
149 /* first and last elements must be both NULL or both non-NULL */
150 if ((heap->first == NULL) != (heap->last == NULL)) {
151 RTE_LOG(ERR, EAL, "Heap is probably corrupt\n");
155 if (heap->first == NULL && heap->last == NULL) {
161 } else if (elem < heap->first) {
162 /* if lower than start */
164 next_elem = heap->first;
166 } else if (elem > heap->last) {
167 /* if higher than end */
168 prev_elem = heap->last;
172 /* the new memory is somewhere between start and end */
173 uint64_t dist_from_start, dist_from_end;
175 dist_from_end = RTE_PTR_DIFF(heap->last, elem);
176 dist_from_start = RTE_PTR_DIFF(elem, heap->first);
178 /* check which is closer, and find closest list entries */
179 if (dist_from_start < dist_from_end) {
180 prev_elem = heap->first;
181 while (prev_elem->next < elem)
182 prev_elem = prev_elem->next;
183 next_elem = prev_elem->next;
185 next_elem = heap->last;
186 while (next_elem->prev > elem)
187 next_elem = next_elem->prev;
188 prev_elem = next_elem->prev;
192 /* insert new element */
193 elem->prev = prev_elem;
194 elem->next = next_elem;
196 prev_elem->next = elem;
198 next_elem->prev = elem;
202 * Attempt to find enough physically contiguous memory in this block to store
203 * our data. Assume that element has at least enough space to fit in the data,
204 * so we just check the page addresses.
207 elem_check_phys_contig(const struct rte_memseg_list *msl,
208 void *start, size_t size)
210 return eal_memalloc_is_contig(msl, start, size);
214 * calculate the starting point of where data of the requested size
215 * and alignment would fit in the current element. If the data doesn't
219 elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align,
220 size_t bound, bool contig)
222 size_t elem_size = elem->size;
225 * we're allocating from the end, so adjust the size of element by
228 while (elem_size >= size) {
229 const size_t bmask = ~(bound - 1);
230 uintptr_t end_pt = (uintptr_t)elem +
231 elem_size - MALLOC_ELEM_TRAILER_LEN;
232 uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
234 uintptr_t new_elem_start;
237 if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) {
238 end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
239 new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
241 end_pt = new_data_start + size;
243 if (((end_pt - 1) & bmask) != (new_data_start & bmask))
247 new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
249 /* if the new start point is before the exist start,
252 if (new_elem_start < (uintptr_t)elem)
256 size_t new_data_size = end_pt - new_data_start;
259 * if physical contiguousness was requested and we
260 * couldn't fit all data into one physically contiguous
261 * block, try again with lower addresses.
263 if (!elem_check_phys_contig(elem->msl,
264 (void *)new_data_start,
270 return (void *)new_elem_start;
276 * use elem_start_pt to determine if we get meet the size and
277 * alignment request from the current element
280 malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align,
281 size_t bound, bool contig)
283 return elem_start_pt(elem, size, align, bound, contig) != NULL;
287 * split an existing element into two smaller elements at the given
288 * split_pt parameter.
291 split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
293 struct malloc_elem *next_elem = elem->next;
294 const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
295 const size_t new_elem_size = elem->size - old_elem_size;
297 malloc_elem_init(split_pt, elem->heap, elem->msl, new_elem_size,
298 elem->orig_elem, elem->orig_size, elem->dirty);
299 split_pt->prev = elem;
300 split_pt->next = next_elem;
302 next_elem->prev = split_pt;
304 elem->heap->last = split_pt;
305 elem->next = split_pt;
306 elem->size = old_elem_size;
309 /* Update inner padding inner element size. */
310 elem = RTE_PTR_ADD(elem, elem->pad);
311 elem->size = old_elem_size - elem->pad;
316 * our malloc heap is a doubly linked list, so doubly remove our element.
318 static void __rte_unused
319 remove_elem(struct malloc_elem *elem)
321 struct malloc_elem *next, *prev;
328 elem->heap->last = prev;
332 elem->heap->first = next;
339 next_elem_is_adjacent(struct malloc_elem *elem)
341 const struct internal_config *internal_conf =
342 eal_get_internal_configuration();
344 return elem->next == RTE_PTR_ADD(elem, elem->size) &&
345 elem->next->msl == elem->msl &&
346 (!internal_conf->match_allocations ||
347 elem->orig_elem == elem->next->orig_elem);
351 prev_elem_is_adjacent(struct malloc_elem *elem)
353 const struct internal_config *internal_conf =
354 eal_get_internal_configuration();
356 return elem == RTE_PTR_ADD(elem->prev, elem->prev->size) &&
357 elem->prev->msl == elem->msl &&
358 (!internal_conf->match_allocations ||
359 elem->orig_elem == elem->prev->orig_elem);
363 * Given an element size, compute its freelist index.
364 * We free an element into the freelist containing similarly-sized elements.
365 * We try to allocate elements starting with the freelist containing
366 * similarly-sized elements, and if necessary, we search freelists
367 * containing larger elements.
369 * Example element size ranges for a heap with five free lists:
370 * heap->free_head[0] - (0 , 2^8]
371 * heap->free_head[1] - (2^8 , 2^10]
372 * heap->free_head[2] - (2^10 ,2^12]
373 * heap->free_head[3] - (2^12, 2^14]
374 * heap->free_head[4] - (2^14, MAX_SIZE]
377 malloc_elem_free_list_index(size_t size)
379 #define MALLOC_MINSIZE_LOG2 8
380 #define MALLOC_LOG2_INCREMENT 2
385 if (size <= (1UL << MALLOC_MINSIZE_LOG2))
388 /* Find next power of 2 >= size. */
389 log2 = sizeof(size) * 8 - __builtin_clzl(size - 1);
391 /* Compute freelist index, based on log2(size). */
392 index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) /
393 MALLOC_LOG2_INCREMENT;
395 return index <= RTE_HEAP_NUM_FREELISTS - 1 ?
396 index : RTE_HEAP_NUM_FREELISTS - 1;
400 * Add the specified element to its heap's free list.
403 malloc_elem_free_list_insert(struct malloc_elem *elem)
407 idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN);
408 elem->state = ELEM_FREE;
409 LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list);
413 * Remove the specified element from its heap's free list.
416 malloc_elem_free_list_remove(struct malloc_elem *elem)
418 LIST_REMOVE(elem, free_list);
422 * reserve a block of data in an existing malloc_elem. If the malloc_elem
423 * is much larger than the data block requested, we split the element in two.
424 * This function is only called from malloc_heap_alloc so parameter checking
425 * is not done here, as it's done there previously.
428 malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
429 size_t bound, bool contig)
431 struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound,
433 const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
434 const size_t trailer_size = elem->size - old_elem_size - size -
435 MALLOC_ELEM_OVERHEAD;
437 malloc_elem_free_list_remove(elem);
439 if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
440 /* split it, too much free space after elem */
441 struct malloc_elem *new_free_elem =
442 RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD);
444 asan_clear_split_alloczone(new_free_elem);
446 split_elem(elem, new_free_elem);
447 malloc_elem_free_list_insert(new_free_elem);
449 if (elem == elem->heap->last)
450 elem->heap->last = new_free_elem;
453 if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
454 /* don't split it, pad the element instead */
455 elem->state = ELEM_BUSY;
456 elem->pad = old_elem_size;
458 asan_clear_alloczone(elem);
460 /* put a dummy header in padding, to point to real element header */
461 if (elem->pad > 0) { /* pad will be at least 64-bytes, as everything
462 * is cache-line aligned */
463 new_elem->pad = elem->pad;
464 new_elem->state = ELEM_PAD;
465 new_elem->size = elem->size - elem->pad;
466 set_header(new_elem);
472 asan_clear_split_alloczone(new_elem);
474 /* we are going to split the element in two. The original element
475 * remains free, and the new element is the one allocated.
476 * Re-insert original element, in case its new size makes it
477 * belong on a different list.
480 split_elem(elem, new_elem);
482 asan_clear_alloczone(new_elem);
484 new_elem->state = ELEM_BUSY;
485 malloc_elem_free_list_insert(elem);
491 * join two struct malloc_elem together. elem1 and elem2 must
492 * be contiguous in memory.
495 join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
497 struct malloc_elem *next = elem2->next;
498 elem1->size += elem2->size;
502 elem1->heap->last = elem1;
504 elem1->dirty |= elem2->dirty;
506 struct malloc_elem *inner = RTE_PTR_ADD(elem1, elem1->pad);
507 inner->size = elem1->size - elem1->pad;
512 malloc_elem_join_adjacent_free(struct malloc_elem *elem)
515 * check if next element exists, is adjacent and is free, if so join
516 * with it, need to remove from free list.
518 if (elem->next != NULL && elem->next->state == ELEM_FREE &&
519 next_elem_is_adjacent(elem)) {
523 /* we will want to erase the trailer and header */
524 erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN);
525 erase_len = MALLOC_ELEM_OVERHEAD + elem->next->pad;
527 /* remove from free list, join to this one */
528 malloc_elem_free_list_remove(elem->next);
529 join_elem(elem, elem->next);
531 /* erase header, trailer and pad */
532 memset(erase, MALLOC_POISON, erase_len);
536 * check if prev element exists, is adjacent and is free, if so join
537 * with it, need to remove from free list.
539 if (elem->prev != NULL && elem->prev->state == ELEM_FREE &&
540 prev_elem_is_adjacent(elem)) {
541 struct malloc_elem *new_elem;
545 /* we will want to erase trailer and header */
546 erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN);
547 erase_len = MALLOC_ELEM_OVERHEAD + elem->pad;
549 /* remove from free list, join to this one */
550 malloc_elem_free_list_remove(elem->prev);
552 new_elem = elem->prev;
553 join_elem(new_elem, elem);
555 /* erase header, trailer and pad */
556 memset(erase, MALLOC_POISON, erase_len);
565 * free a malloc_elem block by adding it to the free list. If the
566 * blocks either immediately before or immediately after newly freed block
567 * are also free, the blocks are merged together.
570 malloc_elem_free(struct malloc_elem *elem)
575 ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
576 data_len = elem->size - MALLOC_ELEM_OVERHEAD;
579 * Consider the element clean for the purposes of joining.
580 * If both neighbors are clean or non-existent,
581 * the joint element will be clean,
582 * which means the memory should be cleared.
583 * There is no need to clear the memory if the joint element is dirty.
586 elem = malloc_elem_join_adjacent_free(elem);
588 malloc_elem_free_list_insert(elem);
592 /* decrease heap's count of allocated elements */
593 elem->heap->alloc_count--;
595 #ifndef RTE_MALLOC_DEBUG
596 /* Normally clear the memory when needed. */
598 memset(ptr, 0, data_len);
600 /* Always poison the memory in debug mode. */
601 memset(ptr, MALLOC_POISON, data_len);
607 /* assume all checks were already done */
609 malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len)
611 struct malloc_elem *hide_start, *hide_end, *prev, *next;
612 size_t len_before, len_after;
615 hide_end = RTE_PTR_ADD(start, len);
620 /* we cannot do anything with non-adjacent elements */
621 if (next && next_elem_is_adjacent(elem)) {
622 len_after = RTE_PTR_DIFF(next, hide_end);
623 if (len_after >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
624 asan_clear_split_alloczone(hide_end);
627 split_elem(elem, hide_end);
629 malloc_elem_free_list_insert(hide_end);
630 } else if (len_after > 0) {
631 RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
636 /* we cannot do anything with non-adjacent elements */
637 if (prev && prev_elem_is_adjacent(elem)) {
638 len_before = RTE_PTR_DIFF(hide_start, elem);
639 if (len_before >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
640 asan_clear_split_alloczone(hide_start);
643 split_elem(elem, hide_start);
648 malloc_elem_free_list_insert(prev);
649 } else if (len_before > 0) {
650 RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
655 asan_clear_alloczone(elem);
661 * attempt to resize a malloc_elem by expanding into any free space
662 * immediately after it in memory.
665 malloc_elem_resize(struct malloc_elem *elem, size_t size)
667 const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD;
669 /* if we request a smaller size, then always return ok */
670 if (elem->size >= new_size) {
671 asan_clear_alloczone(elem);
675 /* check if there is a next element, it's free and adjacent */
676 if (!elem->next || elem->next->state != ELEM_FREE ||
677 !next_elem_is_adjacent(elem))
679 if (elem->size + elem->next->size < new_size)
682 /* we now know the element fits, so remove from free list,
685 malloc_elem_free_list_remove(elem->next);
686 join_elem(elem, elem->next);
688 if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
689 /* now we have a big block together. Lets cut it down a bit, by splitting */
690 struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size);
691 split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE);
693 asan_clear_split_alloczone(split_pt);
695 split_elem(elem, split_pt);
696 malloc_elem_free_list_insert(split_pt);
699 asan_clear_alloczone(elem);
704 static inline const char *
705 elem_state_to_str(enum elem_state state)
719 malloc_elem_dump(const struct malloc_elem *elem, FILE *f)
721 fprintf(f, "Malloc element at %p (%s)\n", elem,
722 elem_state_to_str(elem->state));
723 fprintf(f, " len: 0x%zx pad: 0x%" PRIx32 "\n", elem->size, elem->pad);
724 fprintf(f, " prev: %p next: %p\n", elem->prev, elem->next);