X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_eal%2Flinuxapp%2Feal%2Feal_memory.c;h=5f9f92e1636e3ebba80f29d5d71117986a236c53;hb=ff708facfcbf42f3dcb3c62d82ecd93e7b8c2506;hp=296f17272c522221ac3ed628f7125aad6d0b7201;hpb=1896b4ec5e7ad5089fa17120bebf17d5dea8f476;p=dpdk.git diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 296f17272c..5f9f92e163 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -1,13 +1,13 @@ /*- * BSD LICENSE - * + * * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * + * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright @@ -17,7 +17,7 @@ * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -85,7 +85,6 @@ #include #include #include -#include #include #include #include @@ -131,42 +130,42 @@ phys_addr_t rte_mem_virt2phy(const void *virtaddr) { int fd; - uint64_t page, physaddr, virtual; + uint64_t page, physaddr; unsigned long virt_pfn; int page_size; + off_t offset; /* standard page size */ page_size = getpagesize(); - virtual = (uint64_t) virtaddr; fd = open("/proc/self/pagemap", O_RDONLY); if (fd < 0) { RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n", __func__, strerror(errno)); - return (uint64_t) -1; + return RTE_BAD_PHYS_ADDR; } - off_t offset; virt_pfn = (unsigned long)virtaddr / page_size; offset = sizeof(uint64_t) * virt_pfn; if (lseek(fd, offset, SEEK_SET) == (off_t) -1) { RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n", __func__, strerror(errno)); close(fd); - return (uint64_t) -1; + return RTE_BAD_PHYS_ADDR; } if (read(fd, &page, sizeof(uint64_t)) < 0) { RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n", __func__, strerror(errno)); close(fd); - return (uint64_t) -1; + return RTE_BAD_PHYS_ADDR; } /* * the pfn (page frame number) are bits 0-54 (see * pagemap.txt in linux Documentation) */ - physaddr = ((page & 0x7fffffffffffffULL) * page_size) + (virtual % page_size); + physaddr = ((page & 0x7fffffffffffffULL) * page_size) + + ((unsigned long)virtaddr % page_size); close(fd); return physaddr; } @@ -221,7 +220,7 @@ aslr_enabled(void) } /* - * Try to mmap *size bytes in /dev/zero. If it is succesful, return the + * Try to mmap *size bytes in /dev/zero. If it is successful, return the * pointer to the mmap'd area and keep *size unmodified. Else, retry * with a smaller zone: decrease *size by hugepage_sz until it reaches * 0. In this case, return NULL. Note: this function returns an address @@ -300,7 +299,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, #endif for (i = 0; i < hpi->num_pages[0]; i++) { - size_t hugepage_sz = hpi->hugepage_sz; + uint64_t hugepage_sz = hpi->hugepage_sz; if (orig) { hugepg_tbl[i].file_id = i; @@ -316,11 +315,12 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, #endif hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0'; } -#ifndef RTE_ARCH_X86_64 - /* for 32-bit systems, don't remap 1G pages, just reuse original - * map address as final map address. +#ifndef RTE_ARCH_64 + /* for 32-bit systems, don't remap 1G and 16G pages, just reuse + * original map address as final map address. */ - else if (hugepage_sz == RTE_PGSIZE_1G){ + else if ((hugepage_sz == RTE_PGSIZE_1G) + || (hugepage_sz == RTE_PGSIZE_16G)) { hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; hugepg_tbl[i].orig_va = NULL; continue; @@ -335,9 +335,17 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, * physical block: count the number of * contiguous physical pages. */ for (j = i+1; j < hpi->num_pages[0] ; j++) { +#ifdef RTE_ARCH_PPC_64 + /* The physical addresses are sorted in + * descending order on PPC64 */ + if (hugepg_tbl[j].physaddr != + hugepg_tbl[j-1].physaddr - hugepage_sz) + break; +#else if (hugepg_tbl[j].physaddr != hugepg_tbl[j-1].physaddr + hugepage_sz) break; +#endif } num_pages = j - i; vma_len = num_pages * hugepage_sz; @@ -412,11 +420,12 @@ remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) while (i < hpi->num_pages[0]) { -#ifndef RTE_ARCH_X86_64 - /* for 32-bit systems, don't remap 1G pages, just reuse original - * map address as final map address. +#ifndef RTE_ARCH_64 + /* for 32-bit systems, don't remap 1G pages and 16G pages, + * just reuse original map address as final map address. */ - if (hugepage_sz == RTE_PGSIZE_1G){ + if ((hugepage_sz == RTE_PGSIZE_1G) + || (hugepage_sz == RTE_PGSIZE_16G)) { hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; hugepg_tbl[i].orig_va = NULL; i++; @@ -428,8 +437,17 @@ remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) * physical block: count the number of * contiguous physical pages. */ for (j = i+1; j < hpi->num_pages[0] ; j++) { - if (hugepg_tbl[j].physaddr != hugepg_tbl[j-1].physaddr + hugepage_sz) +#ifdef RTE_ARCH_PPC_64 + /* The physical addresses are sorted in descending + * order on PPC64 */ + if (hugepg_tbl[j].physaddr != + hugepg_tbl[j-1].physaddr - hugepage_sz) + break; +#else + if (hugepg_tbl[j].physaddr != + hugepg_tbl[j-1].physaddr + hugepage_sz) break; +#endif } num_pages = j - i; vma_len = num_pages * hugepage_sz; @@ -505,7 +523,7 @@ remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) return -1; } - rte_snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s", + snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s", filepath); physaddr = rte_mem_virt2phy(vma_addr); @@ -591,8 +609,8 @@ find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) return 0; } - rte_snprintf(hugedir_str, sizeof(hugedir_str), - "%s/", hpi->hugedir); + snprintf(hugedir_str, sizeof(hugedir_str), + "%s/%s", hpi->hugedir, internal_config.hugefile_prefix); /* parse numa map */ while (fgets(buf, sizeof(buf), f) != NULL) { @@ -652,21 +670,21 @@ error: } /* - * Sort the hugepg_tbl by physical address (lower addresses first). We - * use a slow algorithm, but we won't have millions of pages, and this - * is only done at init time. + * Sort the hugepg_tbl by physical address (lower addresses first on x86, + * higher address first on powerpc). We use a slow algorithm, but we won't + * have millions of pages, and this is only done at init time. */ static int sort_by_physaddr(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) { unsigned i, j; - int smallest_idx; - uint64_t smallest_addr; + int compare_idx; + uint64_t compare_addr; struct hugepage_file tmp; for (i = 0; i < hpi->num_pages[0]; i++) { - smallest_addr = 0; - smallest_idx = -1; + compare_addr = 0; + compare_idx = -1; /* * browse all entries starting at 'i', and find the @@ -674,23 +692,28 @@ sort_by_physaddr(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) */ for (j=i; j< hpi->num_pages[0]; j++) { - if (smallest_addr == 0 || - hugepg_tbl[j].physaddr < smallest_addr) { - smallest_addr = hugepg_tbl[j].physaddr; - smallest_idx = j; + if (compare_addr == 0 || +#ifdef RTE_ARCH_PPC_64 + hugepg_tbl[j].physaddr > compare_addr) { +#else + hugepg_tbl[j].physaddr < compare_addr) { +#endif + compare_addr = hugepg_tbl[j].physaddr; + compare_idx = j; } } /* should not happen */ - if (smallest_idx == -1) { + if (compare_idx == -1) { RTE_LOG(ERR, EAL, "%s(): error in physaddr sorting\n", __func__); return -1; } /* swap the 2 entries in the table */ - memcpy(&tmp, &hugepg_tbl[smallest_idx], sizeof(struct hugepage_file)); - memcpy(&hugepg_tbl[smallest_idx], &hugepg_tbl[i], - sizeof(struct hugepage_file)); + memcpy(&tmp, &hugepg_tbl[compare_idx], + sizeof(struct hugepage_file)); + memcpy(&hugepg_tbl[compare_idx], &hugepg_tbl[i], + sizeof(struct hugepage_file)); memcpy(&hugepg_tbl[i], &tmp, sizeof(struct hugepage_file)); } return 0; @@ -881,13 +904,53 @@ calc_num_pages_per_socket(uint64_t * memory, if (num_hp_info == 0) return -1; - for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) { - /* if specific memory amounts per socket weren't requested */ - if (internal_config.force_sockets == 0) { + /* if specific memory amounts per socket weren't requested */ + if (internal_config.force_sockets == 0) { + int cpu_per_socket[RTE_MAX_NUMA_NODES]; + size_t default_size, total_size; + unsigned lcore_id; + + /* Compute number of cores per socket */ + memset(cpu_per_socket, 0, sizeof(cpu_per_socket)); + RTE_LCORE_FOREACH(lcore_id) { + cpu_per_socket[rte_lcore_to_socket_id(lcore_id)]++; + } + + /* + * Automatically spread requested memory amongst detected sockets according + * to number of cores from cpu mask present on each socket + */ + total_size = internal_config.memory; + for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) { + + /* Set memory amount per socket */ + default_size = (internal_config.memory * cpu_per_socket[socket]) + / rte_lcore_count(); + + /* Limit to maximum available memory on socket */ + default_size = RTE_MIN(default_size, get_socket_mem_size(socket)); + + /* Update sizes */ + memory[socket] = default_size; + total_size -= default_size; + } + + /* + * If some memory is remaining, try to allocate it by getting all + * available memory from sockets, one after the other + */ + for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) { /* take whatever is available */ - memory[socket] = RTE_MIN(get_socket_mem_size(socket), - total_mem); + default_size = RTE_MIN(get_socket_mem_size(socket) - memory[socket], + total_size); + + /* Update sizes */ + memory[socket] += default_size; + total_size -= default_size; } + } + + for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) { /* skips if the memory on specific socket wasn't requested */ for (i = 0; i < num_hp_info && memory[socket] != 0; i++){ hp_used[i].hugedir = hp_info[i].hugedir; @@ -991,7 +1054,13 @@ rte_eal_hugepage_init(void) /* hugetlbfs can be disabled */ if (internal_config.no_hugetlbfs) { - addr = malloc(internal_config.memory); + addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__, + strerror(errno)); + return -1; + } mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr; mcfg->memseg[0].addr = addr; mcfg->memseg[0].len = internal_config.memory; @@ -1214,12 +1283,25 @@ rte_eal_hugepage_init(void) new_memseg = 1; else if (hugepage[i].size != hugepage[i-1].size) new_memseg = 1; + +#ifdef RTE_ARCH_PPC_64 + /* On PPC64 architecture, the mmap always start from higher + * virtual address to lower address. Here, both the physical + * address and virtual address are in descending order */ + else if ((hugepage[i-1].physaddr - hugepage[i].physaddr) != + hugepage[i].size) + new_memseg = 1; + else if (((unsigned long)hugepage[i-1].final_va - + (unsigned long)hugepage[i].final_va) != hugepage[i].size) + new_memseg = 1; +#else else if ((hugepage[i].physaddr - hugepage[i-1].physaddr) != hugepage[i].size) new_memseg = 1; else if (((unsigned long)hugepage[i].final_va - (unsigned long)hugepage[i-1].final_va) != hugepage[i].size) new_memseg = 1; +#endif if (new_memseg) { j += 1; @@ -1238,6 +1320,12 @@ rte_eal_hugepage_init(void) } /* continuation of previous memseg */ else { +#ifdef RTE_ARCH_PPC_64 + /* Use the phy and virt address of the last page as segment + * address for IBM Power architecture */ + mcfg->memseg[j].phys_addr = hugepage[i].physaddr; + mcfg->memseg[j].addr = hugepage[i].final_va; +#endif mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz; } hugepage[i].memseg_id = j;