-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2013 6WIND.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ * Copyright(c) 2013 6WIND S.A.
*/
#define _FILE_OFFSET_BITS 64
#include <numaif.h>
#endif
+#include <rte_errno.h>
#include <rte_log.h>
#include <rte_memory.h>
-#include <rte_memzone.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
* zone as well as a physical contiguous zone.
*/
-static uint64_t baseaddr_offset;
-
static bool phys_addrs_available = true;
#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
physaddr = rte_mem_virt2phy(&tmp);
if (physaddr == RTE_BAD_PHYS_ADDR) {
- RTE_LOG(ERR, EAL,
- "Cannot obtain physical addresses: %s. "
- "Only vfio will function.\n",
- strerror(errno));
+ if (rte_eal_iova_mode() == RTE_IOVA_PA)
+ RTE_LOG(ERR, EAL,
+ "Cannot obtain physical addresses: %s. "
+ "Only vfio will function.\n",
+ strerror(errno));
phys_addrs_available = false;
}
}
int page_size;
off_t offset;
- if (rte_eal_iova_mode() == RTE_IOVA_VA)
- return (uintptr_t)virtaddr;
-
/* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
if (!phys_addrs_available)
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
/* standard page size */
page_size = getpagesize();
if (fd < 0) {
RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
__func__, strerror(errno));
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
}
virt_pfn = (unsigned long)virtaddr / page_size;
RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
__func__, strerror(errno));
close(fd);
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
}
retval = read(fd, &page, PFN_MASK_SIZE);
if (retval < 0) {
RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
__func__, strerror(errno));
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
} else if (retval != PFN_MASK_SIZE) {
RTE_LOG(ERR, EAL, "%s(): read %d bytes from /proc/self/pagemap "
"but expected %d:\n",
__func__, retval, PFN_MASK_SIZE);
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
}
/*
* pagemap.txt in linux Documentation)
*/
if ((page & 0x7fffffffffffffULL) == 0)
- return RTE_BAD_PHYS_ADDR;
+ return RTE_BAD_IOVA;
physaddr = ((page & 0x7fffffffffffffULL) * page_size)
+ ((unsigned long)virtaddr % page_size);
return physaddr;
}
+rte_iova_t
+rte_mem_virt2iova(const void *virtaddr)
+{
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ return (uintptr_t)virtaddr;
+ return rte_mem_virt2phy(virtaddr);
+}
+
/*
* For each hugepage in hugepg_tbl, fill the physaddr value. We find
* it by browsing the /proc/self/pagemap special file.
}
}
-/*
- * Try to mmap *size bytes in /dev/zero. If it is successful, return the
- * pointer to the mmap'd area and keep *size unmodified. Else, retry
- * with a smaller zone: decrease *size by hugepage_sz until it reaches
- * 0. In this case, return NULL. Note: this function returns an address
- * which is a multiple of hugepage size.
- */
-static void *
-get_virtual_area(size_t *size, size_t hugepage_sz)
-{
- void *addr;
- int fd;
- long aligned_addr;
-
- if (internal_config.base_virtaddr != 0) {
- addr = (void*) (uintptr_t) (internal_config.base_virtaddr +
- baseaddr_offset);
- }
- else addr = NULL;
-
- RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
-
- fd = open("/dev/zero", O_RDONLY);
- if (fd < 0){
- RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n");
- return NULL;
- }
- do {
- addr = mmap(addr,
- (*size) + hugepage_sz, PROT_READ,
-#ifdef RTE_ARCH_PPC_64
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-#else
- MAP_PRIVATE,
-#endif
- fd, 0);
- if (addr == MAP_FAILED)
- *size -= hugepage_sz;
- } while (addr == MAP_FAILED && *size > 0);
-
- if (addr == MAP_FAILED) {
- close(fd);
- RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n",
- strerror(errno));
- return NULL;
- }
-
- munmap(addr, (*size) + hugepage_sz);
- close(fd);
-
- /* align addr to a huge page size boundary */
- aligned_addr = (long)addr;
- aligned_addr += (hugepage_sz - 1);
- aligned_addr &= (~(hugepage_sz - 1));
- addr = (void *)(aligned_addr);
-
- RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
- addr, *size);
-
- /* increment offset */
- baseaddr_offset += *size;
-
- return addr;
-}
-
static sigjmp_buf huge_jmpenv;
static void huge_sigbus_handler(int signo __rte_unused)
* hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the
* virtual address is stored in hugepg_tbl[i].orig_va, else it is stored
* in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to
- * map continguous physical blocks in contiguous virtual blocks.
+ * map contiguous physical blocks in contiguous virtual blocks.
*/
static unsigned
map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
/* get the biggest virtual memory area up to
* vma_len. If it fails, vma_addr is NULL, so
* let the kernel provide the address. */
- vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
+ vma_addr = eal_get_virtual_area(NULL, &vma_len,
+ hpi->hugepage_sz,
+ EAL_VIRTUAL_AREA_ALLOW_SHRINK |
+ EAL_VIRTUAL_AREA_UNMAP,
+#ifdef RTE_ARCH_PPC_64
+ MAP_HUGETLB
+#else
+ 0
+#endif
+ );
if (vma_addr == NULL)
vma_len = hugepage_sz;
}
hugepg_tbl[i].orig_va = virtaddr;
}
else {
+ /* rewrite physical addresses in IOVA as VA mode */
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ hugepg_tbl[i].physaddr = (uintptr_t)virtaddr;
hugepg_tbl[i].final_va = virtaddr;
}
}
retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
close(fd);
+ if (retval == MAP_FAILED)
+ return NULL;
return retval;
}
strerror(errno));
return -1;
}
- mcfg->memseg[0].phys_addr = RTE_BAD_PHYS_ADDR;
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ mcfg->memseg[0].iova = (uintptr_t)addr;
+ else
+ mcfg->memseg[0].iova = RTE_BAD_IOVA;
mcfg->memseg[0].addr = addr;
mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K;
mcfg->memseg[0].len = internal_config.memory;
continue;
}
- if (phys_addrs_available) {
+ if (phys_addrs_available &&
+ rte_eal_iova_mode() != RTE_IOVA_VA) {
/* find physical addresses for each hugepage */
if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
RTE_LOG(DEBUG, EAL, "Failed to find phys addr "
if (j == RTE_MAX_MEMSEG)
break;
- mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
+ mcfg->memseg[j].iova = hugepage[i].physaddr;
mcfg->memseg[j].addr = hugepage[i].final_va;
mcfg->memseg[j].len = hugepage[i].size;
mcfg->memseg[j].socket_id = hugepage[i].socket_id;
#ifdef RTE_ARCH_PPC_64
/* Use the phy and virt address of the last page as segment
* address for IBM Power architecture */
- mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
+ mcfg->memseg[j].iova = hugepage[i].physaddr;
mcfg->memseg[j].addr = hugepage[i].final_va;
#endif
mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz;
unsigned i, s = 0; /* s used to track the segment number */
unsigned max_seg = RTE_MAX_MEMSEG;
off_t size = 0;
- int fd, fd_zero = -1, fd_hugepage = -1;
+ int fd, fd_hugepage = -1;
if (aslr_enabled() > 0) {
RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization "
test_phys_addrs_available();
- fd_zero = open("/dev/zero", O_RDONLY);
- if (fd_zero < 0) {
- RTE_LOG(ERR, EAL, "Could not open /dev/zero\n");
- goto error;
- }
fd_hugepage = open(eal_hugepage_info_path(), O_RDONLY);
if (fd_hugepage < 0) {
RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path());
/* map all segments into memory to make sure we get the addrs */
for (s = 0; s < RTE_MAX_MEMSEG; ++s) {
void *base_addr;
+ size_t mmap_sz;
+ int mmap_flags = 0;
/*
* the first memory segment with len==0 is the one that
if (mcfg->memseg[s].len == 0)
break;
- /*
- * fdzero is mmapped to get a contiguous block of virtual
- * addresses of the appropriate memseg size.
- * use mmap to get identical addresses as the primary process.
+ /* get identical addresses as the primary process.
*/
- base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
- PROT_READ,
#ifdef RTE_ARCH_PPC_64
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-#else
- MAP_PRIVATE,
+ mmap_flags |= MAP_HUGETLB;
#endif
- fd_zero, 0);
- if (base_addr == MAP_FAILED ||
- base_addr != mcfg->memseg[s].addr) {
+ mmap_sz = mcfg->memseg[s].len;
+ base_addr = eal_get_virtual_area(mcfg->memseg[s].addr,
+ &mmap_sz, mcfg->memseg[s].hugepage_sz, 0,
+ mmap_flags);
+ if (base_addr == NULL) {
max_seg = s;
- if (base_addr != MAP_FAILED) {
- /* errno is stale, don't use */
- RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
- "in /dev/zero at [%p], got [%p] - "
- "please use '--base-virtaddr' option\n",
- (unsigned long long)mcfg->memseg[s].len,
- mcfg->memseg[s].addr, base_addr);
- munmap(base_addr, mcfg->memseg[s].len);
+ if (rte_errno == EADDRNOTAVAIL) {
+ RTE_LOG(ERR, EAL, "Could not mmap %zu bytes at [%p] - please use '--base-virtaddr' option\n",
+ mcfg->memseg[s].len,
+ mcfg->memseg[s].addr);
} else {
- RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
- "in /dev/zero at [%p]: '%s'\n",
- (unsigned long long)mcfg->memseg[s].len,
- mcfg->memseg[s].addr, strerror(errno));
+ RTE_LOG(ERR, EAL, "Could not mmap %zu bytes at [%p]: '%s'\n",
+ mcfg->memseg[s].len,
+ mcfg->memseg[s].addr,
+ rte_strerror(rte_errno));
}
if (aslr_enabled() > 0) {
RTE_LOG(ERR, EAL, "It is recommended to "
}
/* unmap the hugepage config file, since we are done using it */
munmap(hp, size);
- close(fd_zero);
close(fd_hugepage);
return 0;
munmap(mcfg->memseg[i].addr, mcfg->memseg[i].len);
if (hp != NULL && hp != MAP_FAILED)
munmap(hp, size);
- if (fd_zero >= 0)
- close(fd_zero);
if (fd_hugepage >= 0)
close(fd_hugepage);
return -1;
}
-bool
+int
rte_eal_using_phys_addrs(void)
{
return phys_addrs_available;