eal/linux: map pci memory resources after hugepages
authorAnatoly Burakov <anatoly.burakov@intel.com>
Tue, 11 Nov 2014 10:09:25 +0000 (10:09 +0000)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Tue, 25 Nov 2014 17:16:41 +0000 (18:16 +0100)
Multi-process DPDK application must mmap hugepages and PCI resources
into the same virtual address space. By default the virtual addresses
are chosen by the primary process automatically when calling the mmap.
But sometimes the chosen virtual addresses aren't usable in secondary
process - for example, secondary process is linked with more libraries
than primary process, and the library occupies the same address space
that the primary process has requested for PCI mappings.

This patch makes EAL try and map PCI BARs right after the hugepages
(instead of location chosen by mmap) in virtual memory, so that PCI BARs
have less chance of ending up in random places in virtual memory.

Signed-off-by: Liang Xu <liang.xu@cinfotech.cn>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
lib/librte_eal/linuxapp/eal/eal_pci.c
lib/librte_eal/linuxapp/eal/eal_pci_init.h
lib/librte_eal/linuxapp/eal/eal_pci_uio.c
lib/librte_eal/linuxapp/eal/eal_pci_vfio.c

index ddb05352ec0e775541bb3c04944a3f9f9b6ab309..b5f54101e8aafab63b463ee6043b5a89557c47a1 100644 (file)
@@ -97,6 +97,25 @@ error:
        return -1;
 }
 
+void *
+pci_find_max_end_va(void)
+{
+       const struct rte_memseg *seg = rte_eal_get_physmem_layout();
+       const struct rte_memseg *last = seg;
+       unsigned i = 0;
+
+       for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) {
+               if (seg->addr == NULL)
+                       break;
+
+               if (seg->addr > last->addr)
+                       last = seg;
+
+       }
+       return RTE_PTR_ADD(last->addr, last->len);
+}
+
+
 /* map a particular resource from a file */
 void *
 pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
@@ -106,21 +125,16 @@ pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
        /* Map the PCI memory resource of device */
        mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
                        MAP_SHARED, fd, offset);
-       if (mapaddr == MAP_FAILED ||
-                       (requested_addr != NULL && mapaddr != requested_addr)) {
+       if (mapaddr == MAP_FAILED) {
                RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n",
                        __func__, fd, requested_addr,
                        (unsigned long)size, (unsigned long)offset,
                        strerror(errno), mapaddr);
-               goto fail;
+       } else {
+               RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
        }
 
-       RTE_LOG(DEBUG, EAL, "  PCI memory mapped at %p\n", mapaddr);
-
        return mapaddr;
-
-fail:
-       return NULL;
 }
 
 /* parse the "resource" sysfs file */
index d758bee311cc4776c56124ee0a94d728c9b70f33..1070eb88fe0aa538eedd487122ad34de6fa4e2e3 100644 (file)
@@ -59,6 +59,12 @@ struct mapped_pci_resource {
 TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource);
 extern struct mapped_pci_res_list *pci_res_list;
 
+/*
+ * Helper function to map PCI resources right after hugepages in virtual memory
+ */
+extern void *pci_map_addr;
+void *pci_find_max_end_va(void);
+
 void *pci_map_resource(void *requested_addr, int fd, off_t offset,
                size_t size);
 
index 7e6226642981704f93fb115415d47d5f38c0d0e3..e53f06b824305e6665b630157be09e5e372d958a 100644 (file)
@@ -35,6 +35,7 @@
 #include <fcntl.h>
 #include <dirent.h>
 #include <sys/stat.h>
+#include <sys/mman.h>
 
 #include <rte_log.h>
 #include <rte_pci.h>
@@ -48,6 +49,8 @@
 
 static int pci_parse_sysfs_value(const char *filename, uint64_t *val);
 
+void *pci_map_addr = NULL;
+
 
 #define OFF_MAX              ((uint64_t)(off_t)-1)
 static int
@@ -371,10 +374,16 @@ pci_uio_map_resource(struct rte_pci_device *dev)
                        if (maps[j].addr != NULL)
                                fail = 1;
                        else {
-                               mapaddr = pci_map_resource(NULL, fd, (off_t)offset,
+                               /* try mapping somewhere close to the end of hugepages */
+                               if (pci_map_addr == NULL)
+                                       pci_map_addr = pci_find_max_end_va();
+
+                               mapaddr = pci_map_resource(pci_map_addr, fd, (off_t)offset,
                                                (size_t)maps[j].size);
-                               if (mapaddr == NULL)
+                               if (mapaddr == MAP_FAILED)
                                        fail = 1;
+
+                               pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t) maps[j].size);
                        }
 
                        if (fail) {
index c776ddc442be9eb00ca61cb33db36a5077737eae..c1246e8dabba4c3c112c8d14ab575f6e50cdf856 100644 (file)
@@ -37,6 +37,7 @@
 #include <sys/eventfd.h>
 #include <sys/socket.h>
 #include <sys/ioctl.h>
+#include <sys/mman.h>
 
 #include <rte_log.h>
 #include <rte_pci.h>
@@ -720,10 +721,22 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
                if (i == msix_bar)
                        continue;
 
-               bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset,
-                               reg.size);
+               if (internal_config.process_type == RTE_PROC_PRIMARY) {
+                       /* try mapping somewhere close to the end of hugepages */
+                       if (pci_map_addr == NULL)
+                               pci_map_addr = pci_find_max_end_va();
+
+                       bar_addr = pci_map_resource(pci_map_addr, vfio_dev_fd, reg.offset,
+                                       reg.size);
+                       pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
+               } else {
+                       bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset,
+                                       reg.size);
+               }
 
-               if (bar_addr == NULL) {
+               if (bar_addr == MAP_FAILED ||
+                               (internal_config.process_type == RTE_PROC_SECONDARY &&
+                                               bar_addr != maps[i].addr)) {
                        RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n", pci_addr, i,
                                        strerror(errno));
                        close(vfio_dev_fd);