X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_eal%2Flinuxapp%2Feal%2Feal_memory.c;h=79d1d2dd8033b0892f7415736ec4cfa1f6d193c2;hb=bd6aa172cf35046e197b3a42a79069d4da15813a;hp=d52901601a28fc7c7ed0bc16ff4df4bcbb331f29;hpb=a20d5f06e2e19d3622620d838006ea4eaaa594fb;p=dpdk.git diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index d52901601a..79d1d2dd80 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -97,6 +97,13 @@ #include "eal_filesystem.h" #include "eal_hugepages.h" +#ifdef RTE_LIBRTE_XEN_DOM0 +int rte_xen_dom0_supported(void) +{ + return internal_config.xen_dom0_support; +} +#endif + /** * @file * Huge page mapping under linux @@ -115,6 +122,24 @@ static unsigned proc_pagemap_readable; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" +static void +test_proc_pagemap_readable(void) +{ + int fd = open("/proc/self/pagemap", O_RDONLY); + + if (fd < 0) { + RTE_LOG(ERR, EAL, + "Cannot open /proc/self/pagemap: %s. " + "virt2phys address translation will not work\n", + strerror(errno)); + return; + } + + /* Is readable */ + close(fd); + proc_pagemap_readable = 1; +} + /* Lock page in physical memory and prevent from swapping. */ int rte_mem_lock_page(const void *virt) @@ -374,8 +399,10 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, return -1; } + /* map the segment, and populate page tables, + * the kernel fills this segment with zeros */ virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); + MAP_SHARED | MAP_POPULATE, fd, 0); if (virtaddr == MAP_FAILED) { RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); @@ -385,7 +412,6 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, if (orig) { hugepg_tbl[i].orig_va = virtaddr; - memset(virtaddr, 0, hugepage_sz); } else { hugepg_tbl[i].final_va = virtaddr; @@ -504,22 +530,16 @@ remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) old_addr = vma_addr; - /* map new, bigger segment */ + /* map new, bigger segment, and populate page tables, + * the kernel fills this segment with zeros */ vma_addr = mmap(vma_addr, total_size, - PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0); if (vma_addr == MAP_FAILED || vma_addr != old_addr) { RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); close(fd); return -1; } - - /* touch the page. this is needed because kernel postpones mapping - * creation until the first page fault. with this, we pin down - * the page and it is marked as used and gets into process' pagemap. - */ - for (offset = 0; offset < total_size; offset += hugepage_sz) - *((volatile uint8_t*) RTE_PTR_ADD(vma_addr, offset)); } /* set shared flock on the file. */ @@ -567,9 +587,6 @@ remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) } } - /* zero out the whole segment */ - memset(hugepg_tbl[page_idx].final_va, 0, total_size); - page_idx++; } @@ -676,54 +693,23 @@ error: return -1; } -/* - * Sort the hugepg_tbl by physical address (lower addresses first on x86, - * higher address first on powerpc). We use a slow algorithm, but we won't - * have millions of pages, and this is only done at init time. - */ static int -sort_by_physaddr(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +cmp_physaddr(const void *a, const void *b) { - unsigned i, j; - int compare_idx; - uint64_t compare_addr; - struct hugepage_file tmp; - - for (i = 0; i < hpi->num_pages[0]; i++) { - compare_addr = 0; - compare_idx = -1; - - /* - * browse all entries starting at 'i', and find the - * entry with the smallest addr - */ - for (j=i; j< hpi->num_pages[0]; j++) { - - if (compare_addr == 0 || -#ifdef RTE_ARCH_PPC_64 - hugepg_tbl[j].physaddr > compare_addr) { +#ifndef RTE_ARCH_PPC_64 + const struct hugepage_file *p1 = (const struct hugepage_file *)a; + const struct hugepage_file *p2 = (const struct hugepage_file *)b; #else - hugepg_tbl[j].physaddr < compare_addr) { + /* PowerPC needs memory sorted in reverse order from x86 */ + const struct hugepage_file *p1 = (const struct hugepage_file *)b; + const struct hugepage_file *p2 = (const struct hugepage_file *)a; #endif - compare_addr = hugepg_tbl[j].physaddr; - compare_idx = j; - } - } - - /* should not happen */ - if (compare_idx == -1) { - RTE_LOG(ERR, EAL, "%s(): error in physaddr sorting\n", __func__); - return -1; - } - - /* swap the 2 entries in the table */ - memcpy(&tmp, &hugepg_tbl[compare_idx], - sizeof(struct hugepage_file)); - memcpy(&hugepg_tbl[compare_idx], &hugepg_tbl[i], - sizeof(struct hugepage_file)); - memcpy(&hugepg_tbl[i], &tmp, sizeof(struct hugepage_file)); - } - return 0; + if (p1->physaddr < p2->physaddr) + return -1; + else if (p1->physaddr > p2->physaddr) + return 1; + else + return 0; } /* @@ -768,6 +754,30 @@ copy_hugepages_to_shared_mem(struct hugepage_file * dst, int dest_size, return 0; } +static int +unlink_hugepage_files(struct hugepage_file *hugepg_tbl, + unsigned num_hp_info) +{ + unsigned socket, size; + int page, nrpages = 0; + + /* get total number of hugepages */ + for (size = 0; size < num_hp_info; size++) + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) + nrpages += + internal_config.hugepage_info[size].num_pages[socket]; + + for (page = 0; page < nrpages; page++) { + struct hugepage_file *hp = &hugepg_tbl[page]; + + if (hp->final_va != NULL && unlink(hp->filepath)) { + RTE_LOG(WARNING, EAL, "%s(): Removing %s failed: %s\n", + __func__, hp->filepath, strerror(errno)); + } + } + return 0; +} + /* * unmaps hugepages that are not going to be used. since we originally allocate * ALL hugepages (not just those we need), additional unmapping needs to be done. @@ -1037,7 +1047,7 @@ calc_num_pages_per_socket(uint64_t * memory, * 6. unmap the first mapping * 7. fill memsegs in configuration with contiguous zones */ -static int +int rte_eal_hugepage_init(void) { struct rte_mem_config *mcfg; @@ -1054,6 +1064,8 @@ rte_eal_hugepage_init(void) int new_pages_count[MAX_HUGEPAGE_SIZES]; #endif + test_proc_pagemap_readable(); + memset(used_hp, 0, sizeof(used_hp)); /* get pointer to global configuration */ @@ -1062,7 +1074,7 @@ rte_eal_hugepage_init(void) /* hugetlbfs can be disabled */ if (internal_config.no_hugetlbfs) { addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + MAP_LOCKED | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); if (addr == MAP_FAILED) { RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__, strerror(errno)); @@ -1087,7 +1099,6 @@ rte_eal_hugepage_init(void) #endif } - /* calculate total number of hugepages available. at this point we haven't * yet started sorting them so they all are on socket 0 */ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { @@ -1145,8 +1156,8 @@ rte_eal_hugepage_init(void) goto fail; } - if (sort_by_physaddr(&tmp_hp[hp_offset], hpi) < 0) - goto fail; + qsort(&tmp_hp[hp_offset], hpi->num_pages[0], + sizeof(struct hugepage_file), cmp_physaddr); #ifdef RTE_EAL_SINGLE_FILE_SEGMENTS /* remap all hugepages into single file segments */ @@ -1270,6 +1281,13 @@ rte_eal_hugepage_init(void) goto fail; } + /* free the hugepage backing files */ + if (internal_config.hugepage_unlink && + unlink_hugepage_files(tmp_hp, internal_config.num_hugepage_sizes) < 0) { + RTE_LOG(ERR, EAL, "Unlinking hugepage files failed!\n"); + goto fail; + } + /* free the temporary hugepage table */ free(tmp_hp); tmp_hp = NULL; @@ -1355,8 +1373,7 @@ rte_eal_hugepage_init(void) return 0; fail: - if (tmp_hp) - free(tmp_hp); + free(tmp_hp); return -1; } @@ -1378,7 +1395,7 @@ getFileSize(int fd) * configuration and finds the hugepages which form that segment, mapping them * in order to form a contiguous block in the virtual memory space */ -static int +int rte_eal_hugepage_attach(void) { const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; @@ -1395,6 +1412,8 @@ rte_eal_hugepage_attach(void) "into secondary processes\n"); } + test_proc_pagemap_readable(); + if (internal_config.xen_dom0_support) { #ifdef RTE_LIBRTE_XEN_DOM0 if (rte_xen_dom0_memory_attach() < 0) { @@ -1538,56 +1557,3 @@ error: close(fd_hugepage); return -1; } - -static int -rte_eal_memdevice_init(void) -{ - struct rte_config *config; - - if (rte_eal_process_type() == RTE_PROC_SECONDARY) - return 0; - - config = rte_eal_get_configuration(); - config->mem_config->nchannel = internal_config.force_nchannel; - config->mem_config->nrank = internal_config.force_nrank; - - return 0; -} - -static int -test_proc_pagemap_readable(void) -{ - int fd = open("/proc/self/pagemap", O_RDONLY); - - if (fd < 0) - return 0; - /* Is readable */ - close(fd); - - return 1; -} - -/* init memory subsystem */ -int -rte_eal_memory_init(void) -{ - RTE_LOG(INFO, EAL, "Setting up memory...\n"); - - proc_pagemap_readable = test_proc_pagemap_readable(); - if (!proc_pagemap_readable) - RTE_LOG(ERR, EAL, - "Cannot open /proc/self/pagemap: %s. " - "virt2phys address translation will not work\n", - strerror(errno)); - - const int retval = rte_eal_process_type() == RTE_PROC_PRIMARY ? - rte_eal_hugepage_init() : - rte_eal_hugepage_attach(); - if (retval < 0) - return -1; - - if (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0) - return -1; - - return 0; -}