4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #include <sys/queue.h>
44 #include <rte_memory.h>
45 #include <rte_memzone.h>
46 #include <rte_tailq.h>
48 #include <rte_eal_memconfig.h>
49 #include <rte_per_lcore.h>
50 #include <rte_errno.h>
51 #include <rte_string_fns.h>
52 #include <rte_common.h>
54 #include "eal_private.h"
56 /* internal copy of free memory segments */
57 static struct rte_memseg *free_memseg = NULL;
59 static inline const struct rte_memzone *
60 memzone_lookup_thread_unsafe(const char *name)
62 const struct rte_mem_config *mcfg;
65 /* get pointer to global configuration */
66 mcfg = rte_eal_get_configuration()->mem_config;
69 * the algorithm is not optimal (linear), but there are few
70 * zones and this function should be called at init only
72 for (i = 0; i < RTE_MAX_MEMZONE && mcfg->memzone[i].addr != NULL; i++) {
73 if (!strncmp(name, mcfg->memzone[i].name, RTE_MEMZONE_NAMESIZE))
74 return &mcfg->memzone[i];
81 * Return a pointer to a correctly filled memzone descriptor. If the
82 * allocation cannot be done, return NULL.
84 const struct rte_memzone *
85 rte_memzone_reserve(const char *name, size_t len, int socket_id,
88 return rte_memzone_reserve_aligned(name,
89 len, socket_id, flags, CACHE_LINE_SIZE);
93 * Helper function for memzone_reserve_aligned_thread_unsafe().
94 * Calculate address offset from the start of the segment.
95 * Align offset in that way that it satisfy istart alignmnet and
96 * buffer of the requested length would not cross specified boundary.
98 static inline phys_addr_t
99 align_phys_boundary(const struct rte_memseg *ms, size_t len, size_t align,
102 phys_addr_t addr_offset, bmask, end, start;
105 step = RTE_MAX(align, bound);
106 bmask = ~((phys_addr_t)bound - 1);
108 /* calculate offset to closest alignment */
109 start = RTE_ALIGN_CEIL(ms->phys_addr, align);
110 addr_offset = start - ms->phys_addr;
112 while (addr_offset + len < ms->len) {
114 /* check, do we meet boundary condition */
115 end = start + len - (len != 0);
116 if ((start & bmask) == (end & bmask))
119 /* calculate next offset */
120 start = RTE_ALIGN_CEIL(start + 1, step);
121 addr_offset = start - ms->phys_addr;
124 return (addr_offset);
127 static const struct rte_memzone *
128 memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
129 int socket_id, unsigned flags, unsigned align, unsigned bound)
131 struct rte_mem_config *mcfg;
134 uint64_t addr_offset, seg_offset = 0;
135 size_t requested_len;
136 size_t memseg_len = 0;
137 phys_addr_t memseg_physaddr;
140 /* get pointer to global configuration */
141 mcfg = rte_eal_get_configuration()->mem_config;
143 /* no more room in config */
144 if (mcfg->memzone_idx >= RTE_MAX_MEMZONE) {
145 RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__);
150 /* zone already exist */
151 if ((memzone_lookup_thread_unsafe(name)) != NULL) {
152 RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n",
158 /* if alignment is not a power of two */
159 if (!rte_is_power_of_2(align)) {
160 RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
166 /* alignment less than cache size is not allowed */
167 if (align < CACHE_LINE_SIZE)
168 align = CACHE_LINE_SIZE;
171 /* align length on cache boundary. Check for overflow before doing so */
172 if (len > SIZE_MAX - CACHE_LINE_MASK) {
173 rte_errno = EINVAL; /* requested size too big */
177 len += CACHE_LINE_MASK;
178 len &= ~((size_t) CACHE_LINE_MASK);
180 /* save minimal requested length */
181 requested_len = RTE_MAX((size_t)CACHE_LINE_SIZE, len);
183 /* check that boundary condition is valid */
185 (requested_len > bound || !rte_is_power_of_2(bound))) {
190 /* find the smallest segment matching requirements */
191 for (i = 0; i < RTE_MAX_MEMSEG; i++) {
193 if (free_memseg[i].addr == NULL)
196 /* empty segment, skip it */
197 if (free_memseg[i].len == 0)
201 if (socket_id != SOCKET_ID_ANY &&
202 socket_id != free_memseg[i].socket_id)
206 * calculate offset to closest alignment that
207 * meets boundary conditions.
209 addr_offset = align_phys_boundary(free_memseg + i,
210 requested_len, align, bound);
213 if ((requested_len + addr_offset) > free_memseg[i].len)
216 /* check flags for hugepage sizes */
217 if ((flags & RTE_MEMZONE_2MB) &&
218 free_memseg[i].hugepage_sz == RTE_PGSIZE_1G )
220 if ((flags & RTE_MEMZONE_1GB) &&
221 free_memseg[i].hugepage_sz == RTE_PGSIZE_2M )
224 /* this segment is the best until now */
225 if (memseg_idx == -1) {
227 memseg_len = free_memseg[i].len;
228 seg_offset = addr_offset;
230 /* find the biggest contiguous zone */
232 if (free_memseg[i].len > memseg_len) {
234 memseg_len = free_memseg[i].len;
235 seg_offset = addr_offset;
239 * find the smallest (we already checked that current
240 * zone length is > len
242 else if (free_memseg[i].len + align < memseg_len ||
243 (free_memseg[i].len <= memseg_len + align &&
244 addr_offset < seg_offset)) {
246 memseg_len = free_memseg[i].len;
247 seg_offset = addr_offset;
251 /* no segment found */
252 if (memseg_idx == -1) {
254 * If RTE_MEMZONE_SIZE_HINT_ONLY flag is specified,
255 * try allocating again without the size parameter otherwise -fail.
257 if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) &&
258 ((flags & RTE_MEMZONE_1GB) || (flags & RTE_MEMZONE_2MB)))
259 return memzone_reserve_aligned_thread_unsafe(name,
260 len, socket_id, 0, align, bound);
262 RTE_LOG(ERR, EAL, "%s(%s, %zu, %d): "
263 "No appropriate segment found\n",
264 __func__, name, requested_len, socket_id);
269 /* save aligned physical and virtual addresses */
270 memseg_physaddr = free_memseg[memseg_idx].phys_addr + seg_offset;
271 memseg_addr = RTE_PTR_ADD(free_memseg[memseg_idx].addr,
272 (uintptr_t) seg_offset);
274 /* if we are looking for a biggest memzone */
277 requested_len = memseg_len - seg_offset;
279 requested_len = RTE_ALIGN_CEIL(memseg_physaddr + 1,
280 bound) - memseg_physaddr;
283 /* set length to correct value */
284 len = (size_t)seg_offset + requested_len;
286 /* update our internal state */
287 free_memseg[memseg_idx].len -= len;
288 free_memseg[memseg_idx].phys_addr += len;
289 free_memseg[memseg_idx].addr =
290 (char *)free_memseg[memseg_idx].addr + len;
292 /* fill the zone in config */
293 struct rte_memzone *mz = &mcfg->memzone[mcfg->memzone_idx++];
294 rte_snprintf(mz->name, sizeof(mz->name), "%s", name);
295 mz->phys_addr = memseg_physaddr;
296 mz->addr = memseg_addr;
297 mz->len = requested_len;
298 mz->hugepage_sz = free_memseg[memseg_idx].hugepage_sz;
299 mz->socket_id = free_memseg[memseg_idx].socket_id;
301 mz->memseg_id = memseg_idx;
307 * Return a pointer to a correctly filled memzone descriptor (with a
308 * specified alignment). If the allocation cannot be done, return NULL.
310 const struct rte_memzone *
311 rte_memzone_reserve_aligned(const char *name, size_t len,
312 int socket_id, unsigned flags, unsigned align)
314 struct rte_mem_config *mcfg;
315 const struct rte_memzone *mz = NULL;
317 /* both sizes cannot be explicitly called for */
318 if ((flags & RTE_MEMZONE_1GB) && (flags & RTE_MEMZONE_2MB)) {
323 /* get pointer to global configuration */
324 mcfg = rte_eal_get_configuration()->mem_config;
326 rte_rwlock_write_lock(&mcfg->mlock);
328 mz = memzone_reserve_aligned_thread_unsafe(
329 name, len, socket_id, flags, align, 0);
331 rte_rwlock_write_unlock(&mcfg->mlock);
337 * Return a pointer to a correctly filled memzone descriptor (with a
338 * specified alignment and boundary).
339 * If the allocation cannot be done, return NULL.
341 const struct rte_memzone *
342 rte_memzone_reserve_bounded(const char *name, size_t len,
343 int socket_id, unsigned flags, unsigned align, unsigned bound)
345 struct rte_mem_config *mcfg;
346 const struct rte_memzone *mz = NULL;
348 /* both sizes cannot be explicitly called for */
349 if ((flags & RTE_MEMZONE_1GB) && (flags & RTE_MEMZONE_2MB)) {
354 /* get pointer to global configuration */
355 mcfg = rte_eal_get_configuration()->mem_config;
357 rte_rwlock_write_lock(&mcfg->mlock);
359 mz = memzone_reserve_aligned_thread_unsafe(
360 name, len, socket_id, flags, align, bound);
362 rte_rwlock_write_unlock(&mcfg->mlock);
369 * Lookup for the memzone identified by the given name
371 const struct rte_memzone *
372 rte_memzone_lookup(const char *name)
374 struct rte_mem_config *mcfg;
375 const struct rte_memzone *memzone = NULL;
377 mcfg = rte_eal_get_configuration()->mem_config;
379 rte_rwlock_read_lock(&mcfg->mlock);
381 memzone = memzone_lookup_thread_unsafe(name);
383 rte_rwlock_read_unlock(&mcfg->mlock);
388 /* Dump all reserved memory zones on console */
390 rte_memzone_dump(void)
392 struct rte_mem_config *mcfg;
395 /* get pointer to global configuration */
396 mcfg = rte_eal_get_configuration()->mem_config;
398 rte_rwlock_read_lock(&mcfg->mlock);
400 for (i=0; i<RTE_MAX_MEMZONE; i++) {
401 if (mcfg->memzone[i].addr == NULL)
403 printf("Zone %u: name:<%s>, phys:0x%"PRIx64", len:0x%zx"
404 ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i,
405 mcfg->memzone[i].name,
406 mcfg->memzone[i].phys_addr,
407 mcfg->memzone[i].len,
408 mcfg->memzone[i].addr,
409 mcfg->memzone[i].socket_id,
410 mcfg->memzone[i].flags);
412 rte_rwlock_read_unlock(&mcfg->mlock);
416 * called by init: modify the free memseg list to have cache-aligned
417 * addresses and cache-aligned lengths
420 memseg_sanitize(struct rte_memseg *memseg)
426 phys_align = memseg->phys_addr & CACHE_LINE_MASK;
427 virt_align = (unsigned long)memseg->addr & CACHE_LINE_MASK;
430 * sanity check: phys_addr and addr must have the same
433 if (phys_align != virt_align)
436 /* memseg is really too small, don't bother with it */
437 if (memseg->len < (2 * CACHE_LINE_SIZE)) {
442 /* align start address */
443 off = (CACHE_LINE_SIZE - phys_align) & CACHE_LINE_MASK;
444 memseg->phys_addr += off;
445 memseg->addr = (char *)memseg->addr + off;
448 /* align end address */
449 memseg->len &= ~((uint64_t)CACHE_LINE_MASK);
455 * Init the memzone subsystem
458 rte_eal_memzone_init(void)
460 struct rte_mem_config *mcfg;
461 const struct rte_memseg *memseg;
464 /* get pointer to global configuration */
465 mcfg = rte_eal_get_configuration()->mem_config;
467 /* mirror the runtime memsegs from config */
468 free_memseg = mcfg->free_memseg;
470 /* secondary processes don't need to initialise anything */
471 if (rte_eal_process_type() == RTE_PROC_SECONDARY)
474 memseg = rte_eal_get_physmem_layout();
475 if (memseg == NULL) {
476 RTE_LOG(ERR, EAL, "%s(): Cannot get physical layout\n", __func__);
480 rte_rwlock_write_lock(&mcfg->mlock);
482 /* duplicate the memsegs from config */
483 memcpy(free_memseg, memseg, sizeof(struct rte_memseg) * RTE_MAX_MEMSEG);
485 /* make all zones cache-aligned */
486 for (i=0; i<RTE_MAX_MEMSEG; i++) {
487 if (free_memseg[i].addr == NULL)
489 if (memseg_sanitize(&free_memseg[i]) < 0) {
490 RTE_LOG(ERR, EAL, "%s(): Sanity check failed\n", __func__);
491 rte_rwlock_write_unlock(&mcfg->mlock);
496 /* delete all zones */
497 mcfg->memzone_idx = 0;
498 memset(mcfg->memzone, 0, sizeof(mcfg->memzone));
500 rte_rwlock_write_unlock(&mcfg->mlock);