eal/linux: support running as unprivileged user
authorBen Walker <benjamin.walker@intel.com>
Tue, 31 Jan 2017 17:44:53 +0000 (10:44 -0700)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Thu, 9 Mar 2017 16:08:46 +0000 (17:08 +0100)
For Linux kernel 4.0 and newer, the ability to obtain
physical page frame numbers for unprivileged users from
/proc/self/pagemap was removed. Instead, when an IOMMU
is present, simply choose our own DMA addresses instead.

Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Acked-by: Sergio Gonzalez Monroy <sergio.gonzalez.monroy@intel.com>
lib/librte_eal/common/eal_private.h
lib/librte_eal/linuxapp/eal/eal_memory.c
lib/librte_eal/linuxapp/eal/eal_pci.c

index 9e7d8f6..8b2d323 100644 (file)
@@ -34,6 +34,7 @@
 #ifndef _EAL_PRIVATE_H_
 #define _EAL_PRIVATE_H_
 
+#include <stdbool.h>
 #include <stdio.h>
 #include <rte_pci.h>
 
@@ -301,4 +302,15 @@ int rte_eal_hugepage_init(void);
  */
 int rte_eal_hugepage_attach(void);
 
+/**
+ * Returns true if the system is able to obtain
+ * physical addresses. Return false if using DMA
+ * addresses through an IOMMU.
+ *
+ * Drivers based on uio will not load unless physical
+ * addresses are obtainable. It is only possible to get
+ * physical addresses when running as a privileged user.
+ */
+bool rte_eal_using_phys_addrs(void);
+
 #endif /* _EAL_PRIVATE_H_ */
index a956bb2..657c6f4 100644 (file)
@@ -64,6 +64,7 @@
 #define _FILE_OFFSET_BITS 64
 #include <errno.h>
 #include <stdarg.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdint.h>
@@ -122,26 +123,24 @@ int rte_xen_dom0_supported(void)
 
 static uint64_t baseaddr_offset;
 
-static unsigned proc_pagemap_readable;
+static bool phys_addrs_available = true;
 
 #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
 
 static void
-test_proc_pagemap_readable(void)
+test_phys_addrs_available(void)
 {
-       int fd = open("/proc/self/pagemap", O_RDONLY);
+       uint64_t tmp;
+       phys_addr_t physaddr;
 
-       if (fd < 0) {
+       physaddr = rte_mem_virt2phy(&tmp);
+       if (physaddr == RTE_BAD_PHYS_ADDR) {
                RTE_LOG(ERR, EAL,
-                       "Cannot open /proc/self/pagemap: %s. "
-                       "virt2phys address translation will not work\n",
+                       "Cannot obtain physical addresses: %s. "
+                       "Only vfio will function.\n",
                        strerror(errno));
-               return;
+               phys_addrs_available = false;
        }
-
-       /* Is readable */
-       close(fd);
-       proc_pagemap_readable = 1;
 }
 
 /* Lock page in physical memory and prevent from swapping. */
@@ -190,7 +189,7 @@ rte_mem_virt2phy(const void *virtaddr)
        }
 
        /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
-       if (!proc_pagemap_readable)
+       if (!phys_addrs_available)
                return RTE_BAD_PHYS_ADDR;
 
        /* standard page size */
@@ -229,6 +228,9 @@ rte_mem_virt2phy(const void *virtaddr)
         * the pfn (page frame number) are bits 0-54 (see
         * pagemap.txt in linux Documentation)
         */
+       if ((page & 0x7fffffffffffffULL) == 0)
+               return RTE_BAD_PHYS_ADDR;
+
        physaddr = ((page & 0x7fffffffffffffULL) * page_size)
                + ((unsigned long)virtaddr % page_size);
 
@@ -242,7 +244,7 @@ rte_mem_virt2phy(const void *virtaddr)
 static int
 find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
 {
-       unsigned i;
+       unsigned int i;
        phys_addr_t addr;
 
        for (i = 0; i < hpi->num_pages[0]; i++) {
@@ -254,6 +256,22 @@ find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
        return 0;
 }
 
+/*
+ * For each hugepage in hugepg_tbl, fill the physaddr value sequentially.
+ */
+static int
+set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+       unsigned int i;
+       static phys_addr_t addr;
+
+       for (i = 0; i < hpi->num_pages[0]; i++) {
+               hugepg_tbl[i].physaddr = addr;
+               addr += hugepg_tbl[i].size;
+       }
+       return 0;
+}
+
 /*
  * Check whether address-space layout randomization is enabled in
  * the kernel. This is important for multi-process as it can prevent
@@ -951,7 +969,7 @@ rte_eal_hugepage_init(void)
        int nr_hugefiles, nr_hugepages = 0;
        void *addr;
 
-       test_proc_pagemap_readable();
+       test_phys_addrs_available();
 
        memset(used_hp, 0, sizeof(used_hp));
 
@@ -1043,11 +1061,22 @@ rte_eal_hugepage_init(void)
                                continue;
                }
 
-               /* find physical addresses and sockets for each hugepage */
-               if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
-                       RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
-                                       (unsigned)(hpi->hugepage_sz / 0x100000));
-                       goto fail;
+               if (phys_addrs_available) {
+                       /* find physical addresses for each hugepage */
+                       if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+                               RTE_LOG(DEBUG, EAL, "Failed to find phys addr "
+                                       "for %u MB pages\n",
+                                       (unsigned int)(hpi->hugepage_sz / 0x100000));
+                               goto fail;
+                       }
+               } else {
+                       /* set physical addresses for each hugepage */
+                       if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+                               RTE_LOG(DEBUG, EAL, "Failed to set phys addr "
+                                       "for %u MB pages\n",
+                                       (unsigned int)(hpi->hugepage_sz / 0x100000));
+                               goto fail;
+                       }
                }
 
                if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
@@ -1289,7 +1318,7 @@ rte_eal_hugepage_attach(void)
                                "into secondary processes\n");
        }
 
-       test_proc_pagemap_readable();
+       test_phys_addrs_available();
 
        if (internal_config.xen_dom0_support) {
 #ifdef RTE_LIBRTE_XEN_DOM0
@@ -1426,3 +1455,9 @@ error:
                close(fd_hugepage);
        return -1;
 }
+
+bool
+rte_eal_using_phys_addrs(void)
+{
+       return phys_addrs_available;
+}
index e2fc219..61d55b9 100644 (file)
@@ -99,8 +99,10 @@ rte_eal_pci_map_device(struct rte_pci_device *dev)
                break;
        case RTE_KDRV_IGB_UIO:
        case RTE_KDRV_UIO_GENERIC:
-               /* map resources for devices that use uio */
-               ret = pci_uio_map_resource(dev);
+               if (rte_eal_using_phys_addrs()) {
+                       /* map resources for devices that use uio */
+                       ret = pci_uio_map_resource(dev);
+               }
                break;
        default:
                RTE_LOG(DEBUG, EAL,