X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_eal%2Fcommon%2Feal_common_memory.c;h=d47ea4938f90b5908c5b94f158511dd07f7c8f25;hb=32fc0fa00eed43d6985c2e8d3691f0e29aeea645;hp=30671ef82393c451b0d1767b2ed26a5f837d9a48;hpb=1d406458db476adb12c5be004eca893637143556;p=dpdk.git diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 30671ef823..d47ea4938f 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -2,6 +2,7 @@ * Copyright(c) 2010-2014 Intel Corporation */ +#include #include #include #include @@ -37,6 +38,23 @@ static void *next_baseaddr; static uint64_t system_page_sz; +#ifdef RTE_ARCH_64 +/* + * Linux kernel uses a really high address as starting address for serving + * mmaps calls. If there exists addressing limitations and IOVA mode is VA, + * this starting address is likely too high for those devices. However, it + * is possible to use a lower address in the process virtual address space + * as with 64 bits there is a lot of available space. + * + * Current known limitations are 39 or 40 bits. Setting the starting address + * at 4GB implies there are 508GB or 1020GB for mapping the available + * hugepages. This is likely enough for most systems, although a device with + * addressing limitations should call rte_mem_check_dma_mask for ensuring all + * memory is within supported range. + */ +static uint64_t baseaddr = 0x100000000; +#endif + void * eal_get_virtual_area(void *requested_addr, size_t *size, size_t page_sz, int flags, int mmap_flags) @@ -60,20 +78,28 @@ eal_get_virtual_area(void *requested_addr, size_t *size, rte_eal_process_type() == RTE_PROC_PRIMARY) next_baseaddr = (void *) internal_config.base_virtaddr; +#ifdef RTE_ARCH_64 + if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 && + rte_eal_process_type() == RTE_PROC_PRIMARY) + next_baseaddr = (void *) baseaddr; +#endif if (requested_addr == NULL && next_baseaddr != NULL) { requested_addr = next_baseaddr; requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz); addr_is_hint = true; } - /* if requested address is not aligned by page size, or if requested - * address is NULL, add page size to requested length as we may get an - * address that's aligned by system page size, which can be smaller than - * our requested page size. additionally, we shouldn't try to align if - * system page size is the same as requested page size. + /* we don't need alignment of resulting pointer in the following cases: + * + * 1. page size is equal to system size + * 2. we have a requested address, and it is page-aligned, and we will + * be discarding the address if we get a different one. + * + * for all other cases, alignment is potentially necessary. */ no_align = (requested_addr != NULL && - ((uintptr_t)requested_addr & (page_sz - 1))) || + requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) && + !addr_is_hint) || page_sz == system_page_sz; do { @@ -88,7 +114,17 @@ eal_get_virtual_area(void *requested_addr, size_t *size, mmap_flags, -1, 0); if (mapped_addr == MAP_FAILED && allow_shrink) *size -= page_sz; - } while (allow_shrink && mapped_addr == MAP_FAILED && *size > 0); + + if (mapped_addr != MAP_FAILED && addr_is_hint && + mapped_addr != requested_addr) { + /* hint was not used. Try with another offset */ + munmap(mapped_addr, map_sz); + mapped_addr = MAP_FAILED; + next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz); + requested_addr = next_baseaddr; + } + } while ((allow_shrink || addr_is_hint) && + mapped_addr == MAP_FAILED && *size > 0); /* align resulting address - if map failed, we will ignore the value * anyway, so no need to add additional checks. @@ -168,7 +204,7 @@ virt2memseg(const void *addr, const struct rte_memseg_list *msl) /* a memseg list was specified, check if it's the right one */ start = msl->base_va; - end = RTE_PTR_ADD(start, (size_t)msl->page_sz * msl->memseg_arr.len); + end = RTE_PTR_ADD(start, msl->len); if (addr < start || addr >= end) return NULL; @@ -191,8 +227,7 @@ virt2memseg_list(const void *addr) msl = &mcfg->memsegs[msl_idx]; start = msl->base_va; - end = RTE_PTR_ADD(start, - (size_t)msl->page_sz * msl->memseg_arr.len); + end = RTE_PTR_ADD(start, msl->len); if (addr >= start && addr < end) break; } @@ -270,6 +305,9 @@ physmem_size(const struct rte_memseg_list *msl, void *arg) { uint64_t *total_len = arg; + if (msl->external) + return 0; + *total_len += msl->memseg_arr.count * msl->page_sz; return 0; @@ -291,7 +329,7 @@ dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms, void *arg) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - int msl_idx, ms_idx; + int msl_idx, ms_idx, fd; FILE *f = arg; msl_idx = msl - mcfg->memsegs; @@ -302,10 +340,11 @@ dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms, if (ms_idx < 0) return -1; + fd = eal_memalloc_get_seg_fd(msl_idx, ms_idx); fprintf(f, "Segment %i-%i: IOVA:0x%"PRIx64", len:%zu, " "virt:%p, socket_id:%"PRId32", " "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", " - "nrank:%"PRIx32"\n", + "nrank:%"PRIx32" fd:%i\n", msl_idx, ms_idx, ms->iova, ms->len, @@ -313,7 +352,8 @@ dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms, ms->socket_id, ms->hugepage_sz, ms->nchannel, - ms->nrank); + ms->nrank, + fd); return 0; } @@ -380,6 +420,97 @@ rte_dump_physmem_layout(FILE *f) rte_memseg_walk(dump_memseg, f); } +static int +check_iova(const struct rte_memseg_list *msl __rte_unused, + const struct rte_memseg *ms, void *arg) +{ + uint64_t *mask = arg; + rte_iova_t iova; + + /* higher address within segment */ + iova = (ms->iova + ms->len) - 1; + if (!(iova & *mask)) + return 0; + + RTE_LOG(DEBUG, EAL, "memseg iova %"PRIx64", len %zx, out of range\n", + ms->iova, ms->len); + + RTE_LOG(DEBUG, EAL, "\tusing dma mask %"PRIx64"\n", *mask); + return 1; +} + +#define MAX_DMA_MASK_BITS 63 + +/* check memseg iovas are within the required range based on dma mask */ +static int __rte_experimental +check_dma_mask(uint8_t maskbits, bool thread_unsafe) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + uint64_t mask; + int ret; + + /* Sanity check. We only check width can be managed with 64 bits + * variables. Indeed any higher value is likely wrong. */ + if (maskbits > MAX_DMA_MASK_BITS) { + RTE_LOG(ERR, EAL, "wrong dma mask size %u (Max: %u)\n", + maskbits, MAX_DMA_MASK_BITS); + return -1; + } + + /* create dma mask */ + mask = ~((1ULL << maskbits) - 1); + + if (thread_unsafe) + ret = rte_memseg_walk_thread_unsafe(check_iova, &mask); + else + ret = rte_memseg_walk(check_iova, &mask); + + if (ret) + /* + * Dma mask precludes hugepage usage. + * This device can not be used and we do not need to keep + * the dma mask. + */ + return 1; + + /* + * we need to keep the more restricted maskbit for checking + * potential dynamic memory allocation in the future. + */ + mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits : + RTE_MIN(mcfg->dma_maskbits, maskbits); + + return 0; +} + +int __rte_experimental +rte_mem_check_dma_mask(uint8_t maskbits) +{ + return check_dma_mask(maskbits, false); +} + +int __rte_experimental +rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits) +{ + return check_dma_mask(maskbits, true); +} + +/* + * Set dma mask to use when memory initialization is done. + * + * This function should ONLY be used by code executed before the memory + * initialization. PMDs should use rte_mem_check_dma_mask if addressing + * limitations by the device. + */ +void __rte_experimental +rte_mem_set_dma_mask(uint8_t maskbits) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + + mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits : + RTE_MIN(mcfg->dma_maskbits, maskbits); +} + /* return the number of memory channels */ unsigned rte_memory_get_nchannel(void) { @@ -418,14 +549,11 @@ rte_mem_lock_page(const void *virt) } int __rte_experimental -rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg) +rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; int i, ms_idx, ret = 0; - /* do not allow allocations/frees/init while we iterate */ - rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); - for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { struct rte_memseg_list *msl = &mcfg->memsegs[i]; const struct rte_memseg *ms; @@ -450,30 +578,34 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg) len = n_segs * msl->page_sz; ret = func(msl, ms, len, arg); - if (ret < 0) { - ret = -1; - goto out; - } else if (ret > 0) { - ret = 1; - goto out; - } + if (ret) + return ret; ms_idx = rte_fbarray_find_next_used(arr, ms_idx + n_segs); } } -out: - rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); - return ret; + return 0; } int __rte_experimental -rte_memseg_walk(rte_memseg_walk_t func, void *arg) +rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - int i, ms_idx, ret = 0; + int ret = 0; /* do not allow allocations/frees/init while we iterate */ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + ret = rte_memseg_contig_walk_thread_unsafe(func, arg); + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + + return ret; +} + +int __rte_experimental +rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int i, ms_idx, ret = 0; for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { struct rte_memseg_list *msl = &mcfg->memsegs[i]; @@ -489,29 +621,33 @@ rte_memseg_walk(rte_memseg_walk_t func, void *arg) while (ms_idx >= 0) { ms = rte_fbarray_get(arr, ms_idx); ret = func(msl, ms, arg); - if (ret < 0) { - ret = -1; - goto out; - } else if (ret > 0) { - ret = 1; - goto out; - } + if (ret) + return ret; ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1); } } -out: - rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); - return ret; + return 0; } int __rte_experimental -rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg) +rte_memseg_walk(rte_memseg_walk_t func, void *arg) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - int i, ret = 0; + int ret = 0; /* do not allow allocations/frees/init while we iterate */ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + ret = rte_memseg_walk_thread_unsafe(func, arg); + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + + return ret; +} + +int __rte_experimental +rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int i, ret = 0; for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { struct rte_memseg_list *msl = &mcfg->memsegs[i]; @@ -520,17 +656,122 @@ rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg) continue; ret = func(msl, arg); - if (ret < 0) { - ret = -1; - goto out; - } - if (ret > 0) { - ret = 1; - goto out; - } + if (ret) + return ret; } -out: + return 0; +} + +int __rte_experimental +rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int ret = 0; + + /* do not allow allocations/frees/init while we iterate */ + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + ret = rte_memseg_list_walk_thread_unsafe(func, arg); rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + + return ret; +} + +int __rte_experimental +rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg_list *msl; + struct rte_fbarray *arr; + int msl_idx, seg_idx, ret; + + if (ms == NULL) { + rte_errno = EINVAL; + return -1; + } + + msl = rte_mem_virt2memseg_list(ms->addr); + if (msl == NULL) { + rte_errno = EINVAL; + return -1; + } + arr = &msl->memseg_arr; + + msl_idx = msl - mcfg->memsegs; + seg_idx = rte_fbarray_find_idx(arr, ms); + + if (!rte_fbarray_is_used(arr, seg_idx)) { + rte_errno = ENOENT; + return -1; + } + + ret = eal_memalloc_get_seg_fd(msl_idx, seg_idx); + if (ret < 0) { + rte_errno = -ret; + ret = -1; + } + return ret; +} + +int __rte_experimental +rte_memseg_get_fd(const struct rte_memseg *ms) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int ret; + + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + ret = rte_memseg_get_fd_thread_unsafe(ms); + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + + return ret; +} + +int __rte_experimental +rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms, + size_t *offset) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg_list *msl; + struct rte_fbarray *arr; + int msl_idx, seg_idx, ret; + + if (ms == NULL || offset == NULL) { + rte_errno = EINVAL; + return -1; + } + + msl = rte_mem_virt2memseg_list(ms->addr); + if (msl == NULL) { + rte_errno = EINVAL; + return -1; + } + arr = &msl->memseg_arr; + + msl_idx = msl - mcfg->memsegs; + seg_idx = rte_fbarray_find_idx(arr, ms); + + if (!rte_fbarray_is_used(arr, seg_idx)) { + rte_errno = ENOENT; + return -1; + } + + ret = eal_memalloc_get_seg_fd_offset(msl_idx, seg_idx, offset); + if (ret < 0) { + rte_errno = -ret; + ret = -1; + } + return ret; +} + +int __rte_experimental +rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int ret; + + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + ret = rte_memseg_get_fd_offset_thread_unsafe(ms, offset); + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + return ret; }