eal: remove duplicated license
[dpdk.git] / lib / librte_eal / linuxapp / eal / eal_memory.c
index 8add643..9c9baf6 100644 (file)
@@ -2,6 +2,7 @@
  *   BSD LICENSE
  *
  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013 6WIND.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-/*   BSD LICENSE
- *
- *   Copyright(c) 2013 6WIND.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of 6WIND S.A. nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
 
 #define _FILE_OFFSET_BITS 64
 #include <errno.h>
 #include <stdarg.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -122,26 +94,28 @@ int rte_xen_dom0_supported(void)
 
 static uint64_t baseaddr_offset;
 
-static unsigned proc_pagemap_readable;
+static bool phys_addrs_available = true;
 
 #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
 
 static void
-test_proc_pagemap_readable(void)
+test_phys_addrs_available(void)
 {
-       int fd = open("/proc/self/pagemap", O_RDONLY);
+       uint64_t tmp;
+       phys_addr_t physaddr;
 
-       if (fd < 0) {
+       /* For dom0, phys addresses can always be available */
+       if (rte_xen_dom0_supported())
+               return;
+
+       physaddr = rte_mem_virt2phy(&tmp);
+       if (physaddr == RTE_BAD_PHYS_ADDR) {
                RTE_LOG(ERR, EAL,
-                       "Cannot open /proc/self/pagemap: %s. "
-                       "virt2phys address translation will not work\n",
+                       "Cannot obtain physical addresses: %s. "
+                       "Only vfio will function.\n",
                        strerror(errno));
-               return;
+               phys_addrs_available = false;
        }
-
-       /* Is readable */
-       close(fd);
-       proc_pagemap_readable = 1;
 }
 
 /* Lock page in physical memory and prevent from swapping. */
@@ -190,7 +164,7 @@ rte_mem_virt2phy(const void *virtaddr)
        }
 
        /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
-       if (!proc_pagemap_readable)
+       if (!phys_addrs_available)
                return RTE_BAD_PHYS_ADDR;
 
        /* standard page size */
@@ -229,6 +203,9 @@ rte_mem_virt2phy(const void *virtaddr)
         * the pfn (page frame number) are bits 0-54 (see
         * pagemap.txt in linux Documentation)
         */
+       if ((page & 0x7fffffffffffffULL) == 0)
+               return RTE_BAD_PHYS_ADDR;
+
        physaddr = ((page & 0x7fffffffffffffULL) * page_size)
                + ((unsigned long)virtaddr % page_size);
 
@@ -242,7 +219,7 @@ rte_mem_virt2phy(const void *virtaddr)
 static int
 find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
 {
-       unsigned i;
+       unsigned int i;
        phys_addr_t addr;
 
        for (i = 0; i < hpi->num_pages[0]; i++) {
@@ -254,6 +231,22 @@ find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
        return 0;
 }
 
+/*
+ * For each hugepage in hugepg_tbl, fill the physaddr value sequentially.
+ */
+static int
+set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+       unsigned int i;
+       static phys_addr_t addr;
+
+       for (i = 0; i < hpi->num_pages[0]; i++) {
+               hugepg_tbl[i].physaddr = addr;
+               addr += hugepg_tbl[i].size;
+       }
+       return 0;
+}
+
 /*
  * Check whether address-space layout randomization is enabled in
  * the kernel. This is important for multi-process as it can prevent
@@ -313,7 +306,13 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
        }
        do {
                addr = mmap(addr,
-                               (*size) + hugepage_sz, PROT_READ, MAP_PRIVATE, fd, 0);
+                               (*size) + hugepage_sz, PROT_READ,
+#ifdef RTE_ARCH_PPC_64
+                               MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+#else
+                               MAP_PRIVATE,
+#endif
+                               fd, 0);
                if (addr == MAP_FAILED)
                        *size -= hugepage_sz;
        } while (addr == MAP_FAILED && *size > 0);
@@ -592,12 +591,12 @@ static int
 cmp_physaddr(const void *a, const void *b)
 {
 #ifndef RTE_ARCH_PPC_64
-       const struct hugepage_file *p1 = (const struct hugepage_file *)a;
-       const struct hugepage_file *p2 = (const struct hugepage_file *)b;
+       const struct hugepage_file *p1 = a;
+       const struct hugepage_file *p2 = b;
 #else
        /* PowerPC needs memory sorted in reverse order from x86 */
-       const struct hugepage_file *p1 = (const struct hugepage_file *)b;
-       const struct hugepage_file *p2 = (const struct hugepage_file *)a;
+       const struct hugepage_file *p1 = b;
+       const struct hugepage_file *p2 = a;
 #endif
        if (p1->physaddr < p2->physaddr)
                return -1;
@@ -951,7 +950,7 @@ rte_eal_hugepage_init(void)
        int nr_hugefiles, nr_hugepages = 0;
        void *addr;
 
-       test_proc_pagemap_readable();
+       test_phys_addrs_available();
 
        memset(used_hp, 0, sizeof(used_hp));
 
@@ -1043,11 +1042,22 @@ rte_eal_hugepage_init(void)
                                continue;
                }
 
-               /* find physical addresses and sockets for each hugepage */
-               if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
-                       RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
-                                       (unsigned)(hpi->hugepage_sz / 0x100000));
-                       goto fail;
+               if (phys_addrs_available) {
+                       /* find physical addresses for each hugepage */
+                       if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+                               RTE_LOG(DEBUG, EAL, "Failed to find phys addr "
+                                       "for %u MB pages\n",
+                                       (unsigned int)(hpi->hugepage_sz / 0x100000));
+                               goto fail;
+                       }
+               } else {
+                       /* set physical addresses for each hugepage */
+                       if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+                               RTE_LOG(DEBUG, EAL, "Failed to set phys addr "
+                                       "for %u MB pages\n",
+                                       (unsigned int)(hpi->hugepage_sz / 0x100000));
+                               goto fail;
+                       }
                }
 
                if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
@@ -1278,7 +1288,8 @@ rte_eal_hugepage_attach(void)
        struct hugepage_file *hp = NULL;
        unsigned num_hp = 0;
        unsigned i, s = 0; /* s used to track the segment number */
-       off_t size;
+       unsigned max_seg = RTE_MAX_MEMSEG;
+       off_t size = 0;
        int fd, fd_zero = -1, fd_hugepage = -1;
 
        if (aslr_enabled() > 0) {
@@ -1288,7 +1299,7 @@ rte_eal_hugepage_attach(void)
                                "into secondary processes\n");
        }
 
-       test_proc_pagemap_readable();
+       test_phys_addrs_available();
 
        if (internal_config.xen_dom0_support) {
 #ifdef RTE_LIBRTE_XEN_DOM0
@@ -1329,13 +1340,30 @@ rte_eal_hugepage_attach(void)
                 * use mmap to get identical addresses as the primary process.
                 */
                base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
-                                PROT_READ, MAP_PRIVATE, fd_zero, 0);
+                                PROT_READ,
+#ifdef RTE_ARCH_PPC_64
+                                MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+#else
+                                MAP_PRIVATE,
+#endif
+                                fd_zero, 0);
                if (base_addr == MAP_FAILED ||
                    base_addr != mcfg->memseg[s].addr) {
-                       RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
-                               "in /dev/zero to requested address [%p]: '%s'\n",
-                               (unsigned long long)mcfg->memseg[s].len,
-                               mcfg->memseg[s].addr, strerror(errno));
+                       max_seg = s;
+                       if (base_addr != MAP_FAILED) {
+                               /* errno is stale, don't use */
+                               RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
+                                       "in /dev/zero at [%p], got [%p] - "
+                                       "please use '--base-virtaddr' option\n",
+                                       (unsigned long long)mcfg->memseg[s].len,
+                                       mcfg->memseg[s].addr, base_addr);
+                               munmap(base_addr, mcfg->memseg[s].len);
+                       } else {
+                               RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
+                                       "in /dev/zero at [%p]: '%s'\n",
+                                       (unsigned long long)mcfg->memseg[s].len,
+                                       mcfg->memseg[s].addr, strerror(errno));
+                       }
                        if (aslr_enabled() > 0) {
                                RTE_LOG(ERR, EAL, "It is recommended to "
                                        "disable ASLR in the kernel "
@@ -1404,11 +1432,8 @@ rte_eal_hugepage_attach(void)
        return 0;
 
 error:
-       s = 0;
-       while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0) {
-               munmap(mcfg->memseg[s].addr, mcfg->memseg[s].len);
-               s++;
-       }
+       for (i = 0; i < max_seg && mcfg->memseg[i].len > 0; i++)
+               munmap(mcfg->memseg[i].addr, mcfg->memseg[i].len);
        if (hp != NULL && hp != MAP_FAILED)
                munmap(hp, size);
        if (fd_zero >= 0)
@@ -1417,3 +1442,9 @@ error:
                close(fd_hugepage);
        return -1;
 }
+
+bool
+rte_eal_using_phys_addrs(void)
+{
+       return phys_addrs_available;
+}