4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #include <sys/queue.h>
44 #include <rte_memory.h>
45 #include <rte_memzone.h>
47 #include <rte_eal_memconfig.h>
48 #include <rte_per_lcore.h>
49 #include <rte_errno.h>
50 #include <rte_string_fns.h>
51 #include <rte_common.h>
53 #include "eal_private.h"
55 /* internal copy of free memory segments */
56 static struct rte_memseg *free_memseg = NULL;
58 static inline const struct rte_memzone *
59 memzone_lookup_thread_unsafe(const char *name)
61 const struct rte_mem_config *mcfg;
64 /* get pointer to global configuration */
65 mcfg = rte_eal_get_configuration()->mem_config;
68 * the algorithm is not optimal (linear), but there are few
69 * zones and this function should be called at init only
71 for (i = 0; i < RTE_MAX_MEMZONE && mcfg->memzone[i].addr != NULL; i++) {
72 if (!strncmp(name, mcfg->memzone[i].name, RTE_MEMZONE_NAMESIZE))
73 return &mcfg->memzone[i];
80 * Helper function for memzone_reserve_aligned_thread_unsafe().
81 * Calculate address offset from the start of the segment.
82 * Align offset in that way that it satisfy istart alignmnet and
83 * buffer of the requested length would not cross specified boundary.
85 static inline phys_addr_t
86 align_phys_boundary(const struct rte_memseg *ms, size_t len, size_t align,
89 phys_addr_t addr_offset, bmask, end, start;
92 step = RTE_MAX(align, bound);
93 bmask = ~((phys_addr_t)bound - 1);
95 /* calculate offset to closest alignment */
96 start = RTE_ALIGN_CEIL(ms->phys_addr, align);
97 addr_offset = start - ms->phys_addr;
99 while (addr_offset + len < ms->len) {
101 /* check, do we meet boundary condition */
102 end = start + len - (len != 0);
103 if ((start & bmask) == (end & bmask))
106 /* calculate next offset */
107 start = RTE_ALIGN_CEIL(start + 1, step);
108 addr_offset = start - ms->phys_addr;
114 static const struct rte_memzone *
115 memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
116 int socket_id, uint64_t size_mask, unsigned align,
119 struct rte_mem_config *mcfg;
122 uint64_t addr_offset, seg_offset = 0;
123 size_t requested_len;
124 size_t memseg_len = 0;
125 phys_addr_t memseg_physaddr;
128 /* get pointer to global configuration */
129 mcfg = rte_eal_get_configuration()->mem_config;
131 /* no more room in config */
132 if (mcfg->memzone_idx >= RTE_MAX_MEMZONE) {
133 RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__);
138 /* zone already exist */
139 if ((memzone_lookup_thread_unsafe(name)) != NULL) {
140 RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n",
146 /* if alignment is not a power of two */
147 if (align && !rte_is_power_of_2(align)) {
148 RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
154 /* alignment less than cache size is not allowed */
155 if (align < RTE_CACHE_LINE_SIZE)
156 align = RTE_CACHE_LINE_SIZE;
159 /* align length on cache boundary. Check for overflow before doing so */
160 if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
161 rte_errno = EINVAL; /* requested size too big */
165 len += RTE_CACHE_LINE_MASK;
166 len &= ~((size_t) RTE_CACHE_LINE_MASK);
168 /* save minimal requested length */
169 requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len);
171 /* check that boundary condition is valid */
173 (requested_len > bound || !rte_is_power_of_2(bound))) {
178 /* find the smallest segment matching requirements */
179 for (i = 0; i < RTE_MAX_MEMSEG; i++) {
181 if (free_memseg[i].addr == NULL)
184 /* empty segment, skip it */
185 if (free_memseg[i].len == 0)
189 if (socket_id != SOCKET_ID_ANY &&
190 free_memseg[i].socket_id != SOCKET_ID_ANY &&
191 socket_id != free_memseg[i].socket_id)
195 * calculate offset to closest alignment that
196 * meets boundary conditions.
198 addr_offset = align_phys_boundary(free_memseg + i,
199 requested_len, align, bound);
202 if ((requested_len + addr_offset) > free_memseg[i].len)
205 if ((size_mask & free_memseg[i].hugepage_sz) == 0)
208 /* this segment is the best until now */
209 if (memseg_idx == -1) {
211 memseg_len = free_memseg[i].len;
212 seg_offset = addr_offset;
214 /* find the biggest contiguous zone */
216 if (free_memseg[i].len > memseg_len) {
218 memseg_len = free_memseg[i].len;
219 seg_offset = addr_offset;
223 * find the smallest (we already checked that current
224 * zone length is > len
226 else if (free_memseg[i].len + align < memseg_len ||
227 (free_memseg[i].len <= memseg_len + align &&
228 addr_offset < seg_offset)) {
230 memseg_len = free_memseg[i].len;
231 seg_offset = addr_offset;
235 /* no segment found */
236 if (memseg_idx == -1) {
241 /* save aligned physical and virtual addresses */
242 memseg_physaddr = free_memseg[memseg_idx].phys_addr + seg_offset;
243 memseg_addr = RTE_PTR_ADD(free_memseg[memseg_idx].addr,
244 (uintptr_t) seg_offset);
246 /* if we are looking for a biggest memzone */
249 requested_len = memseg_len - seg_offset;
251 requested_len = RTE_ALIGN_CEIL(memseg_physaddr + 1,
252 bound) - memseg_physaddr;
255 /* set length to correct value */
256 len = (size_t)seg_offset + requested_len;
258 /* update our internal state */
259 free_memseg[memseg_idx].len -= len;
260 free_memseg[memseg_idx].phys_addr += len;
261 free_memseg[memseg_idx].addr =
262 (char *)free_memseg[memseg_idx].addr + len;
264 /* fill the zone in config */
265 struct rte_memzone *mz = &mcfg->memzone[mcfg->memzone_idx++];
266 snprintf(mz->name, sizeof(mz->name), "%s", name);
267 mz->phys_addr = memseg_physaddr;
268 mz->addr = memseg_addr;
269 mz->len = requested_len;
270 mz->hugepage_sz = free_memseg[memseg_idx].hugepage_sz;
271 mz->socket_id = free_memseg[memseg_idx].socket_id;
273 mz->memseg_id = memseg_idx;
278 static const struct rte_memzone *
279 rte_memzone_reserve_thread_safe(const char *name, size_t len,
280 int socket_id, unsigned flags, unsigned align,
283 struct rte_mem_config *mcfg;
284 const struct rte_memzone *mz = NULL;
285 uint64_t size_mask = 0;
287 if (flags & RTE_MEMZONE_256KB)
288 size_mask |= RTE_PGSIZE_256K;
289 if (flags & RTE_MEMZONE_2MB)
290 size_mask |= RTE_PGSIZE_2M;
291 if (flags & RTE_MEMZONE_16MB)
292 size_mask |= RTE_PGSIZE_16M;
293 if (flags & RTE_MEMZONE_256MB)
294 size_mask |= RTE_PGSIZE_256M;
295 if (flags & RTE_MEMZONE_512MB)
296 size_mask |= RTE_PGSIZE_512M;
297 if (flags & RTE_MEMZONE_1GB)
298 size_mask |= RTE_PGSIZE_1G;
299 if (flags & RTE_MEMZONE_4GB)
300 size_mask |= RTE_PGSIZE_4G;
301 if (flags & RTE_MEMZONE_16GB)
302 size_mask |= RTE_PGSIZE_16G;
304 size_mask = UINT64_MAX;
306 /* get pointer to global configuration */
307 mcfg = rte_eal_get_configuration()->mem_config;
309 rte_rwlock_write_lock(&mcfg->mlock);
311 mz = memzone_reserve_aligned_thread_unsafe(
312 name, len, socket_id, size_mask, align, bound);
315 * If we failed to allocate the requested page size, and the
316 * RTE_MEMZONE_SIZE_HINT_ONLY flag is specified, try allocating
319 if (!mz && rte_errno == ENOMEM && size_mask != UINT64_MAX &&
320 flags & RTE_MEMZONE_SIZE_HINT_ONLY) {
321 mz = memzone_reserve_aligned_thread_unsafe(
322 name, len, socket_id, UINT64_MAX, align, bound);
325 rte_rwlock_write_unlock(&mcfg->mlock);
331 * Return a pointer to a correctly filled memzone descriptor (with a
332 * specified alignment and boundary). If the allocation cannot be done,
335 const struct rte_memzone *
336 rte_memzone_reserve_bounded(const char *name, size_t len, int socket_id,
337 unsigned flags, unsigned align, unsigned bound)
339 return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
344 * Return a pointer to a correctly filled memzone descriptor (with a
345 * specified alignment). If the allocation cannot be done, return NULL.
347 const struct rte_memzone *
348 rte_memzone_reserve_aligned(const char *name, size_t len, int socket_id,
349 unsigned flags, unsigned align)
351 return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
356 * Return a pointer to a correctly filled memzone descriptor. If the
357 * allocation cannot be done, return NULL.
359 const struct rte_memzone *
360 rte_memzone_reserve(const char *name, size_t len, int socket_id,
363 return rte_memzone_reserve_thread_safe(name, len, socket_id,
364 flags, RTE_CACHE_LINE_SIZE, 0);
368 * Lookup for the memzone identified by the given name
370 const struct rte_memzone *
371 rte_memzone_lookup(const char *name)
373 struct rte_mem_config *mcfg;
374 const struct rte_memzone *memzone = NULL;
376 mcfg = rte_eal_get_configuration()->mem_config;
378 rte_rwlock_read_lock(&mcfg->mlock);
380 memzone = memzone_lookup_thread_unsafe(name);
382 rte_rwlock_read_unlock(&mcfg->mlock);
387 /* Dump all reserved memory zones on console */
389 rte_memzone_dump(FILE *f)
391 struct rte_mem_config *mcfg;
394 /* get pointer to global configuration */
395 mcfg = rte_eal_get_configuration()->mem_config;
397 rte_rwlock_read_lock(&mcfg->mlock);
399 for (i=0; i<RTE_MAX_MEMZONE; i++) {
400 if (mcfg->memzone[i].addr == NULL)
402 fprintf(f, "Zone %u: name:<%s>, phys:0x%"PRIx64", len:0x%zx"
403 ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i,
404 mcfg->memzone[i].name,
405 mcfg->memzone[i].phys_addr,
406 mcfg->memzone[i].len,
407 mcfg->memzone[i].addr,
408 mcfg->memzone[i].socket_id,
409 mcfg->memzone[i].flags);
411 rte_rwlock_read_unlock(&mcfg->mlock);
415 * called by init: modify the free memseg list to have cache-aligned
416 * addresses and cache-aligned lengths
419 memseg_sanitize(struct rte_memseg *memseg)
425 phys_align = memseg->phys_addr & RTE_CACHE_LINE_MASK;
426 virt_align = (unsigned long)memseg->addr & RTE_CACHE_LINE_MASK;
429 * sanity check: phys_addr and addr must have the same
432 if (phys_align != virt_align)
435 /* memseg is really too small, don't bother with it */
436 if (memseg->len < (2 * RTE_CACHE_LINE_SIZE)) {
441 /* align start address */
442 off = (RTE_CACHE_LINE_SIZE - phys_align) & RTE_CACHE_LINE_MASK;
443 memseg->phys_addr += off;
444 memseg->addr = (char *)memseg->addr + off;
447 /* align end address */
448 memseg->len &= ~((uint64_t)RTE_CACHE_LINE_MASK);
454 * Init the memzone subsystem
457 rte_eal_memzone_init(void)
459 struct rte_mem_config *mcfg;
460 const struct rte_memseg *memseg;
463 /* get pointer to global configuration */
464 mcfg = rte_eal_get_configuration()->mem_config;
466 /* mirror the runtime memsegs from config */
467 free_memseg = mcfg->free_memseg;
469 /* secondary processes don't need to initialise anything */
470 if (rte_eal_process_type() == RTE_PROC_SECONDARY)
473 memseg = rte_eal_get_physmem_layout();
474 if (memseg == NULL) {
475 RTE_LOG(ERR, EAL, "%s(): Cannot get physical layout\n", __func__);
479 rte_rwlock_write_lock(&mcfg->mlock);
481 /* fill in uninitialized free_memsegs */
482 for (i = 0; i < RTE_MAX_MEMSEG; i++) {
483 if (memseg[i].addr == NULL)
485 if (free_memseg[i].addr != NULL)
487 memcpy(&free_memseg[i], &memseg[i], sizeof(struct rte_memseg));
490 /* make all zones cache-aligned */
491 for (i = 0; i < RTE_MAX_MEMSEG; i++) {
492 if (free_memseg[i].addr == NULL)
494 if (memseg_sanitize(&free_memseg[i]) < 0) {
495 RTE_LOG(ERR, EAL, "%s(): Sanity check failed\n", __func__);
496 rte_rwlock_write_unlock(&mcfg->mlock);
501 /* delete all zones */
502 mcfg->memzone_idx = 0;
503 memset(mcfg->memzone, 0, sizeof(mcfg->memzone));
505 rte_rwlock_write_unlock(&mcfg->mlock);
510 /* Walk all reserved memory zones */
511 void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
514 struct rte_mem_config *mcfg;
517 mcfg = rte_eal_get_configuration()->mem_config;
519 rte_rwlock_read_lock(&mcfg->mlock);
520 for (i=0; i<RTE_MAX_MEMZONE; i++) {
521 if (mcfg->memzone[i].addr != NULL)
522 (*func)(&mcfg->memzone[i], arg);
524 rte_rwlock_read_unlock(&mcfg->mlock);