ivshmem: library changes for mmaping using ivshmem
authorBruce Richardson <bruce.richardson@intel.com>
Tue, 11 Feb 2014 10:28:51 +0000 (10:28 +0000)
committerDavid Marchand <david.marchand@6wind.com>
Tue, 25 Feb 2014 20:29:19 +0000 (21:29 +0100)
These library changes provide a new Intel DPDK feature for communicating
with virtual machines using QEMU's IVSHMEM mechanism.

The feature works by providing a command line for QEMU to map several hugepages
into a single IVSHMEM device. For the guest to know what is inside any given IVSHMEM
device (and to distinguish between Intel(R) DPDK and non-Intel(R) DPDK IVSHMEM
devices), a metadata file is also mapped into the IVSHMEM segment. No work needs to
be done by the guest application to map IVSHMEM devices into memory; they are
automatically recognized by the Intel(R) DPDK Environment Abstraction Layer (EAL).

Changes in this patch:
* Changes to EAL to allow mapping of all hugepages in a memseg into a single file
* Changes to EAL to allow ivshmem devices to be transparently mapped in
  the process running on the guest.
* New ivshmem library to create and manage metadata exported to guest VM's
* New ivshmem compilation targets
* Mempool and ring changes to allow export of structures to a VM and allow
  a VM to attach to those structures.
* New autotests to unit tests this functionality.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
23 files changed:
app/test/Makefile
app/test/autotest_data.py
app/test/commands.c
app/test/test.c
app/test/test.h
app/test/test_ivshmem.c [new file with mode: 0644]
config/defconfig_x86_64-ivshmem-linuxapp-gcc [new file with mode: 0644]
config/defconfig_x86_64-ivshmem-linuxapp-icc [new file with mode: 0644]
lib/Makefile
lib/librte_eal/common/eal_common_memzone.c
lib/librte_eal/common/include/eal_private.h
lib/librte_eal/common/include/rte_memory.h
lib/librte_eal/common/include/rte_memzone.h
lib/librte_eal/linuxapp/eal/Makefile
lib/librte_eal/linuxapp/eal/eal.c
lib/librte_eal/linuxapp/eal/eal_ivshmem.c [new file with mode: 0644]
lib/librte_eal/linuxapp/eal/eal_memory.c
lib/librte_eal/linuxapp/eal/include/eal_filesystem.h
lib/librte_eal/linuxapp/eal/include/eal_hugepages.h
lib/librte_ivshmem/Makefile [new file with mode: 0644]
lib/librte_ivshmem/rte_ivshmem.c [new file with mode: 0644]
lib/librte_ivshmem/rte_ivshmem.h [new file with mode: 0644]
mk/rte.app.mk

index 39fa163..c065a4c 100644 (file)
@@ -92,6 +92,7 @@ SRCS-$(CONFIG_RTE_APP_TEST) += test_kni.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_power.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_common.c
 SRCS-$(CONFIG_RTE_APP_TEST) += test_timer_perf.c
+SRCS-$(CONFIG_RTE_APP_TEST) += test_ivshmem.c
 
 ifeq ($(CONFIG_RTE_APP_TEST),y)
 SRCS-$(CONFIG_RTE_LIBRTE_ACL) += test_acl.c
@@ -107,6 +108,7 @@ CFLAGS_test_kni.o += -wd1478
 else
 CFLAGS_test_kni.o += -Wno-deprecated-declarations
 endif
+CFLAGS += -D_GNU_SOURCE
 
 # this application needs libraries first
 DEPDIRS-$(CONFIG_RTE_APP_TEST) += lib
index bdb7e94..1161a92 100644 (file)
@@ -174,6 +174,12 @@ parallel_test_group_list = [
                 "Func" :       default_autotest,
                 "Report" :     None,
                },
+               {
+                "Name" :       "IVSHMEM autotest",
+                "Command" :    "ivshmem_autotest",
+                "Func" :       default_autotest,
+                "Report" :     None,
+               },
                {
                 "Name" :       "Memcpy autotest",
                 "Command" :    "memcpy_autotest",
index 118f70d..f09bc90 100644 (file)
@@ -184,6 +184,8 @@ static void cmd_autotest_parsed(void *parsed_result,
                ret |= test_power();
        if (all || !strcmp(res->autotest, "common_autotest"))
                ret |= test_common();
+       if (all || !strcmp(res->autotest, "ivshmem_autotest"))
+               ret = test_ivshmem();
 #ifdef RTE_LIBRTE_PMD_RING
        if (all || !strcmp(res->autotest, "ring_pmd_autotest"))
                ret |= test_pmd_ring();
@@ -224,7 +226,7 @@ cmdline_parse_token_string_t cmd_autotest_autotest =
                        "memcpy_perf_autotest#ring_perf_autotest#"
                        "red_autotest#meter_autotest#sched_autotest#"
                        "memcpy_perf_autotest#kni_autotest#"
-                       "pm_autotest#"
+                       "pm_autotest#ivshmem_autotest#"
 #ifdef RTE_LIBRTE_ACL
                        "acl_autotest#"
 #endif
index c87e0df..3a7999b 100644 (file)
@@ -86,6 +86,7 @@ do_recursive_call(void)
                        { "test_memory_flags", no_action },
                        { "test_file_prefix", no_action },
                        { "test_no_huge_flag", no_action },
+                       { "test_ivshmem", test_ivshmem },
        };
 
        if (recursive_call == NULL)
index 71d87d1..adc6212 100644 (file)
@@ -95,6 +95,7 @@ int test_kni(void);
 int test_power(void);
 int test_common(void);
 int test_pmd_ring(void);
+int test_ivshmem(void);
 
 int test_pci_run;
 
diff --git a/app/test/test_ivshmem.c b/app/test/test_ivshmem.c
new file mode 100644 (file)
index 0000000..52f8277
--- /dev/null
@@ -0,0 +1,441 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <stdio.h>
+
+#include <cmdline_parse.h>
+
+#include "test.h"
+
+#ifdef RTE_LIBRTE_IVSHMEM
+
+#include <rte_common.h>
+#include <rte_ivshmem.h>
+#include <rte_string_fns.h>
+#include "process.h"
+
+#define DUPLICATE_METADATA "duplicate"
+#define METADATA_NAME "metadata"
+#define NONEXISTENT_METADATA "nonexistent"
+#define FIRST_TEST 'a'
+
+#define launch_proc(ARGV) process_dup(ARGV, \
+               sizeof(ARGV)/(sizeof(ARGV[0])), "test_ivshmem")
+
+#define ASSERT(cond,msg) do {                                          \
+               if (!(cond)) {                                                          \
+                       printf("**** TEST %s() failed: %s\n",   \
+                               __func__, msg);                                         \
+                       return -1;                                                              \
+               }                                                                                       \
+} while(0)
+
+static char*
+get_current_prefix(char * prefix, int size)
+{
+       char path[PATH_MAX] = {0};
+       char buf[PATH_MAX] = {0};
+
+       /* get file for config (fd is always 3) */
+       rte_snprintf(path, sizeof(path), "/proc/self/fd/%d", 3);
+
+       /* return NULL on error */
+       if (readlink(path, buf, sizeof(buf)) == -1)
+               return NULL;
+
+       /* get the basename */
+       rte_snprintf(buf, sizeof(buf), "%s", basename(buf));
+
+       /* copy string all the way from second char up to start of _config */
+       rte_snprintf(prefix, size, "%.*s",
+                       strnlen(buf, sizeof(buf)) - sizeof("_config"), &buf[1]);
+
+       return prefix;
+}
+
+static struct rte_ivshmem_metadata*
+mmap_metadata(const char *name)
+{
+       int fd;
+       char pathname[PATH_MAX];
+       struct rte_ivshmem_metadata *metadata;
+
+       rte_snprintf(pathname, sizeof(pathname),
+                       "/var/run/.dpdk_ivshmem_metadata_%s", name);
+
+       fd = open(pathname, O_RDWR, 0660);
+       if (fd < 0)
+               return NULL;
+
+       metadata = (struct rte_ivshmem_metadata*) mmap(NULL,
+                       sizeof(struct rte_ivshmem_metadata), PROT_READ | PROT_WRITE,
+                       MAP_SHARED, fd, 0);
+
+       if (metadata == MAP_FAILED)
+               return NULL;
+
+       close(fd);
+
+       return metadata;
+}
+
+static int
+create_duplicate(void)
+{
+       /* create a metadata that another process will then try to overwrite */
+       ASSERT (rte_ivshmem_metadata_create(DUPLICATE_METADATA) == 0,
+                       "Creating metadata failed");
+       return 0;
+}
+
+static int
+test_ivshmem_create_lots_of_memzones(void)
+{
+       int i;
+       char name[IVSHMEM_NAME_LEN];
+       const struct rte_memzone *mz;
+
+       ASSERT(rte_ivshmem_metadata_create(METADATA_NAME) == 0,
+                       "Failed to create metadata");
+
+       for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES; i++) {
+               rte_snprintf(name, sizeof(name), "mz_%i", i);
+
+               mz = rte_memzone_reserve(name, CACHE_LINE_SIZE, SOCKET_ID_ANY, 0);
+               ASSERT(mz != NULL, "Failed to reserve memzone");
+
+               ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) == 0,
+                               "Failed to add memzone");
+       }
+       mz = rte_memzone_reserve("one too many", CACHE_LINE_SIZE, SOCKET_ID_ANY, 0);
+       ASSERT(mz != NULL, "Failed to reserve memzone");
+
+       ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) < 0,
+               "Metadata should have been full");
+
+       return 0;
+}
+
+static int
+test_ivshmem_create_duplicate_memzone(void)
+{
+       const struct rte_memzone *mz;
+
+       ASSERT(rte_ivshmem_metadata_create(METADATA_NAME) == 0,
+                       "Failed to create metadata");
+
+       mz = rte_memzone_reserve("mz", CACHE_LINE_SIZE, SOCKET_ID_ANY, 0);
+       ASSERT(mz != NULL, "Failed to reserve memzone");
+
+       ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) == 0,
+                       "Failed to add memzone");
+
+       ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) < 0,
+                       "Added the same memzone twice");
+
+       return 0;
+}
+
+static int
+test_ivshmem_api_test(void)
+{
+       const struct rte_memzone * mz;
+       struct rte_mempool * mp;
+       struct rte_ring * r;
+       char buf[BUFSIZ];
+
+       memset(buf, 0, sizeof(buf));
+
+       r = rte_ring_create("ring", 1, SOCKET_ID_ANY, 0);
+       mp = rte_mempool_create("mempool", 1, 1, 1, 1, NULL, NULL, NULL, NULL,
+                       SOCKET_ID_ANY, 0);
+       mz = rte_memzone_reserve("memzone", 64, SOCKET_ID_ANY, 0);
+
+       ASSERT(r != NULL, "Failed to create ring");
+       ASSERT(mp != NULL, "Failed to create mempool");
+       ASSERT(mz != NULL, "Failed to reserve memzone");
+
+       /* try to create NULL metadata */
+       ASSERT(rte_ivshmem_metadata_create(NULL) < 0,
+                       "Created metadata with NULL name");
+
+       /* create valid metadata to do tests on */
+       ASSERT(rte_ivshmem_metadata_create(METADATA_NAME) == 0,
+                       "Failed to create metadata");
+
+       /* test adding memzone */
+       ASSERT(rte_ivshmem_metadata_add_memzone(NULL, NULL) < 0,
+                       "Added NULL memzone to NULL metadata");
+       ASSERT(rte_ivshmem_metadata_add_memzone(NULL, METADATA_NAME) < 0,
+                       "Added NULL memzone");
+       ASSERT(rte_ivshmem_metadata_add_memzone(mz, NULL) < 0,
+                       "Added memzone to NULL metadata");
+       ASSERT(rte_ivshmem_metadata_add_memzone(mz, NONEXISTENT_METADATA) < 0,
+                       "Added memzone to nonexistent metadata");
+
+       /* test adding ring */
+       ASSERT(rte_ivshmem_metadata_add_ring(NULL, NULL) < 0,
+                       "Added NULL ring to NULL metadata");
+       ASSERT(rte_ivshmem_metadata_add_ring(NULL, METADATA_NAME) < 0,
+                       "Added NULL ring");
+       ASSERT(rte_ivshmem_metadata_add_ring(r, NULL) < 0,
+                       "Added ring to NULL metadata");
+       ASSERT(rte_ivshmem_metadata_add_ring(r, NONEXISTENT_METADATA) < 0,
+                       "Added ring to nonexistent metadata");
+
+       /* test adding mempool */
+       ASSERT(rte_ivshmem_metadata_add_mempool(NULL, NULL) < 0,
+                       "Added NULL mempool to NULL metadata");
+       ASSERT(rte_ivshmem_metadata_add_mempool(NULL, METADATA_NAME) < 0,
+                       "Added NULL mempool");
+       ASSERT(rte_ivshmem_metadata_add_mempool(mp, NULL) < 0,
+                       "Added mempool to NULL metadata");
+       ASSERT(rte_ivshmem_metadata_add_mempool(mp, NONEXISTENT_METADATA) < 0,
+                       "Added mempool to nonexistent metadata");
+
+       /* test creating command line */
+       ASSERT(rte_ivshmem_metadata_cmdline_generate(NULL, sizeof(buf), METADATA_NAME) < 0,
+                       "Written command line into NULL buffer");
+       ASSERT(strnlen(buf, sizeof(buf)) == 0, "Buffer is not empty");
+
+       ASSERT(rte_ivshmem_metadata_cmdline_generate(buf, 0, METADATA_NAME) < 0,
+                       "Written command line into small buffer");
+       ASSERT(strnlen(buf, sizeof(buf)) == 0, "Buffer is not empty");
+
+       ASSERT(rte_ivshmem_metadata_cmdline_generate(buf, sizeof(buf), NULL) < 0,
+                       "Written command line for NULL metadata");
+       ASSERT(strnlen(buf, sizeof(buf)) == 0, "Buffer is not empty");
+
+       ASSERT(rte_ivshmem_metadata_cmdline_generate(buf, sizeof(buf),
+                       NONEXISTENT_METADATA) < 0,
+                       "Writen command line for nonexistent metadata");
+       ASSERT(strnlen(buf, sizeof(buf)) == 0, "Buffer is not empty");
+
+       /* add stuff to config */
+       ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) == 0,
+                       "Failed to add memzone to valid config");
+       ASSERT(rte_ivshmem_metadata_add_ring(r, METADATA_NAME) == 0,
+                       "Failed to add ring to valid config");
+       ASSERT(rte_ivshmem_metadata_add_mempool(mp, METADATA_NAME) == 0,
+                       "Failed to add mempool to valid config");
+
+       /* create config */
+       ASSERT(rte_ivshmem_metadata_cmdline_generate(buf, sizeof(buf),
+                       METADATA_NAME) == 0, "Failed to write command-line");
+
+       /* check if something was written */
+       ASSERT(strnlen(buf, sizeof(buf)) != 0, "Buffer is empty");
+
+       /* make sure we don't segfault */
+       rte_ivshmem_metadata_dump(NULL);
+
+       /* dump our metadata */
+       rte_ivshmem_metadata_dump(METADATA_NAME);
+
+       return 0;
+}
+
+static int
+test_ivshmem_create_duplicate_metadata(void)
+{
+       ASSERT(rte_ivshmem_metadata_create(DUPLICATE_METADATA) < 0,
+                       "Creating duplicate metadata should have failed");
+
+       return 0;
+}
+
+static int
+test_ivshmem_create_metadata_config(void)
+{
+       struct rte_ivshmem_metadata *metadata;
+
+       rte_ivshmem_metadata_create(METADATA_NAME);
+
+       metadata = mmap_metadata(METADATA_NAME);
+
+       ASSERT(metadata != MAP_FAILED, "Metadata mmaping failed");
+
+       ASSERT(metadata->magic_number == IVSHMEM_MAGIC,
+                       "Magic number is not that magic");
+
+       ASSERT(strncmp(metadata->name, METADATA_NAME, sizeof(metadata->name)) == 0,
+                       "Name has not been set up");
+
+       ASSERT(metadata->entry[0].offset == 0, "Offest is not initialized");
+       ASSERT(metadata->entry[0].mz.addr == 0, "mz.addr is not initialized");
+       ASSERT(metadata->entry[0].mz.len == 0, "mz.len is not initialized");
+
+       return 0;
+}
+
+static int
+test_ivshmem_create_multiple_metadata_configs(void)
+{
+       int i;
+       char name[IVSHMEM_NAME_LEN];
+       struct rte_ivshmem_metadata *metadata;
+
+       for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES / 2; i++) {
+               rte_snprintf(name, sizeof(name), "test_%d", i);
+               rte_ivshmem_metadata_create(name);
+               metadata = mmap_metadata(name);
+
+               ASSERT(metadata->magic_number == IVSHMEM_MAGIC,
+                               "Magic number is not that magic");
+
+               ASSERT(strncmp(metadata->name, name, sizeof(metadata->name)) == 0,
+                               "Name has not been set up");
+       }
+
+       return 0;
+}
+
+static int
+test_ivshmem_create_too_many_metadata_configs(void)
+{
+       int i;
+       char name[IVSHMEM_NAME_LEN];
+
+       for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES; i++) {
+               rte_snprintf(name, sizeof(name), "test_%d", i);
+               ASSERT(rte_ivshmem_metadata_create(name) == 0,
+                               "Create config file failed");
+       }
+
+       ASSERT(rte_ivshmem_metadata_create(name) < 0,
+                       "Create config file didn't fail");
+
+       return 0;
+}
+
+enum rte_ivshmem_tests {
+       _test_ivshmem_api_test = 0,
+       _test_ivshmem_create_metadata_config,
+       _test_ivshmem_create_multiple_metadata_configs,
+       _test_ivshmem_create_too_many_metadata_configs,
+       _test_ivshmem_create_duplicate_metadata,
+       _test_ivshmem_create_lots_of_memzones,
+       _test_ivshmem_create_duplicate_memzone,
+       _last_test,
+};
+
+#define RTE_IVSHMEM_TEST_ID "RTE_IVSHMEM_TEST_ID"
+
+static int
+launch_all_tests_on_secondary_processes(void)
+{
+       int ret = 0;
+       char id;
+       char testid;
+       char tmp[PATH_MAX] = {0};
+       char prefix[PATH_MAX] = {0};
+
+       get_current_prefix(tmp, sizeof(tmp));
+
+       rte_snprintf(prefix, sizeof(prefix), "--file-prefix=%s", tmp);
+
+       const char *argv[] = { prgname, "-c", "1", "-n", "3",
+                       "--proc-type=secondary", prefix };
+
+       for (id = 0; id < _last_test; id++) {
+               testid = (char)(FIRST_TEST + id);
+               setenv(RTE_IVSHMEM_TEST_ID, &testid, 1);
+               if (launch_proc(argv) != 0)
+                       return -1;
+       }
+       return ret;
+}
+
+int
+test_ivshmem(void)
+{
+       int testid;
+
+       /* We want to have a clean execution for every test without exposing
+        * private global data structures in rte_ivshmem so we launch each test
+        * on a different secondary process. */
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+
+               /* first, create metadata */
+               ASSERT(create_duplicate() == 0, "Creating metadata failed");
+
+               return launch_all_tests_on_secondary_processes();
+       }
+
+       testid = *(getenv(RTE_IVSHMEM_TEST_ID)) - FIRST_TEST;
+
+       printf("Secondary process running test %d \n", testid);
+
+       switch (testid) {
+       case _test_ivshmem_api_test:
+               return test_ivshmem_api_test();
+
+       case _test_ivshmem_create_metadata_config:
+               return test_ivshmem_create_metadata_config();
+
+       case _test_ivshmem_create_multiple_metadata_configs:
+               return test_ivshmem_create_multiple_metadata_configs();
+
+       case _test_ivshmem_create_too_many_metadata_configs:
+               return test_ivshmem_create_too_many_metadata_configs();
+
+       case _test_ivshmem_create_duplicate_metadata:
+               return test_ivshmem_create_duplicate_metadata();
+
+       case _test_ivshmem_create_lots_of_memzones:
+               return test_ivshmem_create_lots_of_memzones();
+
+       case _test_ivshmem_create_duplicate_memzone:
+               return test_ivshmem_create_duplicate_memzone();
+
+       default:
+               break;
+       }
+
+       return -1;
+}
+#else /* RTE_LIBRTE_IVSHMEM */
+
+int
+test_ivshmem(void)
+{
+       printf("This binary was not compiled with IVSHMEM support!\n");
+       return 0;
+}
+#endif /* RTE_LIBRTE_IVSHMEM */
diff --git a/config/defconfig_x86_64-ivshmem-linuxapp-gcc b/config/defconfig_x86_64-ivshmem-linuxapp-gcc
new file mode 100644 (file)
index 0000000..2f55a69
--- /dev/null
@@ -0,0 +1,49 @@
+#   BSD LICENSE
+# 
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+# 
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+# 
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+# 
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+#
+# use default config
+#
+
+#include "defconfig_x86_64-default-linuxapp-gcc"
+
+#
+# Compile IVSHMEM library
+#
+CONFIG_RTE_LIBRTE_IVSHMEM=y
+CONFIG_RTE_LIBRTE_IVSHMEM_DEBUG=n
+CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS=4
+CONFIG_RTE_LIBRTE_IVSHMEM_MAX_ENTRIES=128
+CONFIG_RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES=32
+
+# Set EAL to single file segments
+CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS=y
\ No newline at end of file
diff --git a/config/defconfig_x86_64-ivshmem-linuxapp-icc b/config/defconfig_x86_64-ivshmem-linuxapp-icc
new file mode 100644 (file)
index 0000000..14f0926
--- /dev/null
@@ -0,0 +1,49 @@
+#   BSD LICENSE
+# 
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+# 
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+# 
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+# 
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+#
+# use default config
+#
+
+#include "defconfig_x86_64-default-linuxapp-icc"
+
+#
+# Compile IVSHMEM library
+#
+CONFIG_RTE_LIBRTE_IVSHMEM=y
+CONFIG_RTE_LIBRTE_IVSHMEM_DEBUG=n
+CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS=4
+CONFIG_RTE_LIBRTE_IVSHMEM_MAX_ENTRIES=128
+CONFIG_RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES=32
+
+# Set EAL to single file segments
+CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS=y
index fda306e..c9f0111 100644 (file)
@@ -55,6 +55,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
+DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += librte_ivshmem
 endif
 
 include $(RTE_SDK)/mk/rte.sharelib.mk
index 4d60f8c..a1fcdfd 100644 (file)
@@ -479,11 +479,17 @@ rte_eal_memzone_init(void)
 
        rte_rwlock_write_lock(&mcfg->mlock);
 
-       /* duplicate the memsegs from config */
-       memcpy(free_memseg, memseg, sizeof(struct rte_memseg) * RTE_MAX_MEMSEG);
+       /* fill in uninitialized free_memsegs */
+       for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+               if (memseg[i].addr == NULL)
+                       break;
+               if (free_memseg[i].addr != NULL)
+                       continue;
+               memcpy(&free_memseg[i], &memseg[i], sizeof(struct rte_memseg));
+       }
 
        /* make all zones cache-aligned */
-       for (i=0; i<RTE_MAX_MEMSEG; i++) {
+       for (i = 0; i < RTE_MAX_MEMSEG; i++) {
                if (free_memseg[i].addr == NULL)
                        break;
                if (memseg_sanitize(&free_memseg[i]) < 0) {
index 7e2a269..251f15e 100644 (file)
@@ -128,6 +128,28 @@ int rte_eal_log_init(const char *id, int facility);
  */
 int rte_eal_pci_init(void);
 
+#ifdef RTE_LIBRTE_IVSHMEM
+/**
+ * Init the memory from IVSHMEM devices
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *  0 on success, negative on error
+ */
+int rte_eal_ivshmem_init(void);
+
+/**
+ * Init objects in IVSHMEM devices
+ *
+ * This function is private to EAL.
+ *
+ * @return
+ *  0 on success, negative on error
+ */
+int rte_eal_ivshmem_obj_init(void);
+#endif
+
 struct rte_pci_driver;
 struct rte_pci_device;
 
index d2c6265..4611dcd 100644 (file)
@@ -79,6 +79,9 @@ struct rte_memseg {
                void *addr;         /**< Start virtual address. */
                uint64_t addr_64;   /**< Makes sure addr is always 64 bits */
        };
+#ifdef RTE_LIBRTE_IVSHMEM
+       phys_addr_t ioremap_addr; /**< Real physical address inside the VM */
+#endif
        size_t len;               /**< Length of the segment. */
        size_t hugepage_sz;       /**< The pagesize of underlying memory */
        int32_t socket_id;          /**< NUMA socket ID. */
index a4243e4..5e29ff1 100644 (file)
@@ -75,6 +75,9 @@ struct rte_memzone {
                void *addr;                   /**< Start virtual address. */
                uint64_t addr_64;             /**< Makes sure addr is always 64-bits */
        };
+#ifdef RTE_LIBRTE_IVSHMEM
+       phys_addr_t ioremap_addr;         /**< Real physical address inside the VM */
+#endif
        size_t len;                       /**< Length of the memzone. */
 
        size_t hugepage_sz;               /**< The page size of underlying memory */
index 91f96bc..2667145 100644 (file)
@@ -41,6 +41,7 @@ CFLAGS += -I$(RTE_SDK)/lib/librte_ring
 CFLAGS += -I$(RTE_SDK)/lib/librte_mempool
 CFLAGS += -I$(RTE_SDK)/lib/librte_malloc
 CFLAGS += -I$(RTE_SDK)/lib/librte_ether
+CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem
 CFLAGS += -I$(RTE_SDK)/lib/librte_pmd_ring
 CFLAGS += -I$(RTE_SDK)/lib/librte_pmd_pcap
 CFLAGS += $(WERROR_FLAGS) -O3
@@ -57,6 +58,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_lcore.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_timer.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_interrupts.c
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_alarm.c
+ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_ivshmem.c
+endif
 
 # from common dir
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
@@ -75,6 +79,9 @@ CFLAGS_eal.o := -D_GNU_SOURCE
 CFLAGS_eal_thread.o := -D_GNU_SOURCE
 CFLAGS_eal_log.o := -D_GNU_SOURCE
 CFLAGS_eal_common_log.o := -D_GNU_SOURCE
+CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
+CFLAGS_eal_pci.o := -D_GNU_SOURCE
+CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
 
 # workaround for a gcc bug with noreturn attribute
 # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
index 7a32794..1ddfb65 100644 (file)
@@ -935,6 +935,14 @@ rte_eal_init(int argc, char **argv)
        if (rte_eal_cpu_init() < 0)
                rte_panic("Cannot detect lcores\n");
 
+       if (rte_eal_pci_init() < 0)
+               rte_panic("Cannot init PCI\n");
+
+#ifdef RTE_LIBRTE_IVSHMEM
+       if (rte_eal_ivshmem_init() < 0)
+               rte_panic("Cannot init IVSHMEM\n");
+#endif
+
        if (rte_eal_memory_init() < 0)
                rte_panic("Cannot init memory\n");
 
@@ -947,6 +955,11 @@ rte_eal_init(int argc, char **argv)
        if (rte_eal_tailqs_init() < 0)
                rte_panic("Cannot init tail queues for objects\n");
 
+#ifdef RTE_LIBRTE_IVSHMEM
+       if (rte_eal_ivshmem_obj_init() < 0)
+               rte_panic("Cannot init IVSHMEM objects\n");
+#endif
+
        if (rte_eal_log_init(argv[0], internal_config.syslog_facility) < 0)
                rte_panic("Cannot init logs\n");
 
@@ -959,9 +972,6 @@ rte_eal_init(int argc, char **argv)
        if (rte_eal_timer_init() < 0)
                rte_panic("Cannot init HPET or TSC timers\n");
 
-       if (rte_eal_pci_init() < 0)
-               rte_panic("Cannot init PCI\n");
-
        RTE_LOG(DEBUG, EAL, "Master core %u is ready (tid=%x)\n",
                rte_config.master_lcore, (int)thread_id);
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c
new file mode 100644 (file)
index 0000000..6191fef
--- /dev/null
@@ -0,0 +1,953 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef RTE_LIBRTE_IVSHMEM /* hide it from coverage */
+
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/file.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_string_fns.h>
+#include <rte_errno.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_common.h>
+#include <rte_ivshmem.h>
+#include <rte_tailq_elem.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_private.h"
+
+#define PCI_VENDOR_ID_IVSHMEM 0x1Af4
+#define PCI_DEVICE_ID_IVSHMEM 0x1110
+
+#define IVSHMEM_MAGIC 0x0BADC0DE
+#define IVSHMEM_METADATA_SIZE 0x1000
+
+#define IVSHMEM_RESOURCE_PATH "/sys/bus/pci/devices/%04x:%02x:%02x.%x/resource2"
+#define IVSHMEM_CONFIG_PATH "/var/run/.%s_ivshmem_config"
+
+#define PHYS 0x1
+#define VIRT 0x2
+#define IOREMAP 0x4
+#define FULL (PHYS|VIRT|IOREMAP)
+
+#define METADATA_SIZE_ALIGNED \
+       (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz))
+
+#define CONTAINS(x,y)\
+       (((y).addr_64 >= (x).addr_64) && ((y).addr_64 < (x).addr_64 + (x).len))
+
+#define DIM(x) (sizeof(x)/sizeof(x[0]))
+
+struct ivshmem_pci_device {
+       char path[PATH_MAX];
+       phys_addr_t ioremap_addr;
+};
+
+/* data type to store in config */
+struct ivshmem_segment {
+       struct rte_ivshmem_metadata_entry entry;
+       uint64_t align;
+       char path[PATH_MAX];
+};
+struct ivshmem_shared_config {
+       struct ivshmem_segment segment[RTE_MAX_MEMSEG];
+       uint32_t segment_idx;
+       struct ivshmem_pci_device pci_devs[RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS];
+       uint32_t pci_devs_idx;
+};
+static struct ivshmem_shared_config * ivshmem_config;
+static int memseg_idx;
+static int pagesz;
+
+/* Tailq heads to add rings to */
+TAILQ_HEAD(rte_ring_list, rte_ring);
+
+/*
+ * Utility functions
+ */
+
+static int
+is_ivshmem_device(struct rte_pci_device * dev)
+{
+       return (dev->id.vendor_id == PCI_VENDOR_ID_IVSHMEM
+                       && dev->id.device_id == PCI_DEVICE_ID_IVSHMEM);
+}
+
+static void *
+map_metadata(int fd, uint64_t len)
+{
+       size_t metadata_len = sizeof(struct rte_ivshmem_metadata);
+       size_t aligned_len = METADATA_SIZE_ALIGNED;
+
+       return mmap(NULL, metadata_len, PROT_READ | PROT_WRITE,
+                       MAP_SHARED, fd, len - aligned_len);
+}
+
+static void
+unmap_metadata(void * ptr)
+{
+       munmap(ptr, sizeof(struct rte_ivshmem_metadata));
+}
+
+static int
+has_ivshmem_metadata(int fd, uint64_t len)
+{
+       struct rte_ivshmem_metadata metadata;
+       void * ptr;
+
+       ptr = map_metadata(fd, len);
+
+       if (ptr == MAP_FAILED)
+               return -1;
+
+       metadata = *(struct rte_ivshmem_metadata*) (ptr);
+
+       unmap_metadata(ptr);
+
+       return metadata.magic_number == IVSHMEM_MAGIC;
+}
+
+static void
+remove_segment(struct ivshmem_segment * ms, int len, int idx)
+{
+       int i;
+
+       for (i = idx; i < len - 1; i++)
+               memcpy(&ms[i], &ms[i+1], sizeof(struct ivshmem_segment));
+       memset(&ms[len-1], 0, sizeof(struct ivshmem_segment));
+}
+
+static int
+overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
+{
+       uint64_t start1, end1, start2, end2;
+       uint64_t p_start1, p_end1, p_start2, p_end2;
+       uint64_t i_start1, i_end1, i_start2, i_end2;
+       int result = 0;
+
+       /* gather virtual addresses */
+       start1 = mz1->addr_64;
+       end1 = mz1->addr_64 + mz1->len;
+       start2 = mz2->addr_64;
+       end2 = mz2->addr_64 + mz2->len;
+
+       /* gather physical addresses */
+       p_start1 = mz1->phys_addr;
+       p_end1 = mz1->phys_addr + mz1->len;
+       p_start2 = mz2->phys_addr;
+       p_end2 = mz2->phys_addr + mz2->len;
+
+       /* gather ioremap addresses */
+       i_start1 = mz1->ioremap_addr;
+       i_end1 = mz1->ioremap_addr + mz1->len;
+       i_start2 = mz2->ioremap_addr;
+       i_end2 = mz2->ioremap_addr + mz2->len;
+
+       /* check for overlap in virtual addresses */
+       if (start1 >= start2 && start1 < end2)
+               result |= VIRT;
+       if (start2 >= start1 && start2 < end1)
+               result |= VIRT;
+
+       /* check for overlap in physical addresses */
+       if (p_start1 >= p_start2 && p_start1 < p_end2)
+               result |= PHYS;
+       if (p_start2 >= p_start1 && p_start2 < p_end1)
+               result |= PHYS;
+
+       /* check for overlap in ioremap addresses */
+       if (i_start1 >= i_start2 && i_start1 < i_end2)
+               result |= IOREMAP;
+       if (i_start2 >= i_start1 && i_start2 < i_end1)
+               result |= IOREMAP;
+
+       return result;
+}
+
+static int
+adjacent(const struct rte_memzone * mz1, const struct rte_memzone * mz2)
+{
+       uint64_t start1, end1, start2, end2;
+       uint64_t p_start1, p_end1, p_start2, p_end2;
+       uint64_t i_start1, i_end1, i_start2, i_end2;
+       int result = 0;
+
+       /* gather virtual addresses */
+       start1 = mz1->addr_64;
+       end1 = mz1->addr_64 + mz1->len;
+       start2 = mz2->addr_64;
+       end2 = mz2->addr_64 + mz2->len;
+
+       /* gather physical addresses */
+       p_start1 = mz1->phys_addr;
+       p_end1 = mz1->phys_addr + mz1->len;
+       p_start2 = mz2->phys_addr;
+       p_end2 = mz2->phys_addr + mz2->len;
+
+       /* gather ioremap addresses */
+       i_start1 = mz1->ioremap_addr;
+       i_end1 = mz1->ioremap_addr + mz1->len;
+       i_start2 = mz2->ioremap_addr;
+       i_end2 = mz2->ioremap_addr + mz2->len;
+
+       /* check if segments are virtually adjacent */
+       if (start1 == end2)
+               result |= VIRT;
+       if (start2 == end1)
+               result |= VIRT;
+
+       /* check if segments are physically adjacent */
+       if (p_start1 == p_end2)
+               result |= PHYS;
+       if (p_start2 == p_end1)
+               result |= PHYS;
+
+       /* check if segments are ioremap-adjacent */
+       if (i_start1 == i_end2)
+               result |= IOREMAP;
+       if (i_start2 == i_end1)
+               result |= IOREMAP;
+
+       return result;
+}
+
+static int
+has_adjacent_segments(struct ivshmem_segment * ms, int len)
+{
+       int i, j, a;
+
+       for (i = 0; i < len; i++)
+               for (j = i + 1; j < len; j++) {
+                       a = adjacent(&ms[i].entry.mz, &ms[j].entry.mz);
+
+                       /* check if segments are adjacent virtually and/or physically but
+                        * not ioremap (since that would indicate that they are from
+                        * different PCI devices and thus don't need to be concatenated.
+                        */
+                       if ((a & (VIRT|PHYS)) > 0 && (a & IOREMAP) == 0)
+                               return 1;
+               }
+       return 0;
+}
+
+static int
+has_overlapping_segments(struct ivshmem_segment * ms, int len)
+{
+       int i, j;
+
+       for (i = 0; i < len; i++)
+               for (j = i + 1; j < len; j++)
+                       if (overlap(&ms[i].entry.mz, &ms[j].entry.mz))
+                               return 1;
+       return 0;
+}
+
+static int
+seg_compare(const void * a, const void * b)
+{
+       const struct ivshmem_segment * s1 = (const struct ivshmem_segment*) a;
+       const struct ivshmem_segment * s2 = (const struct ivshmem_segment*) b;
+
+       /* move unallocated zones to the end */
+       if (s1->entry.mz.addr == NULL && s2->entry.mz.addr == NULL)
+               return 0;
+       if (s1->entry.mz.addr == 0)
+               return 1;
+       if (s2->entry.mz.addr == 0)
+               return -1;
+
+       return s1->entry.mz.phys_addr > s2->entry.mz.phys_addr;
+}
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+static void
+entry_dump(struct rte_ivshmem_metadata_entry *e)
+{
+       RTE_LOG(DEBUG, EAL, "\tvirt: %p-%p\n", e->mz.addr,
+                       RTE_PTR_ADD(e->mz.addr, e->mz.len));
+       RTE_LOG(DEBUG, EAL, "\tphys: 0x%" PRIx64 "-0x%" PRIx64 "\n",
+                       e->mz.phys_addr,
+                       e->mz.phys_addr + e->mz.len);
+       RTE_LOG(DEBUG, EAL, "\tio: 0x%" PRIx64 "-0x%" PRIx64 "\n",
+                       e->mz.ioremap_addr,
+                       e->mz.ioremap_addr + e->mz.len);
+       RTE_LOG(DEBUG, EAL, "\tlen: 0x%" PRIx64 "\n", e->mz.len);
+       RTE_LOG(DEBUG, EAL, "\toff: 0x%" PRIx64 "\n", e->offset);
+}
+#endif
+
+
+
+/*
+ * Actual useful code
+ */
+
+/* read through metadata mapped from the IVSHMEM device */
+static int
+read_metadata(char * path, int path_len, int fd, uint64_t flen)
+{
+       struct rte_ivshmem_metadata metadata;
+       struct rte_ivshmem_metadata_entry * entry;
+       int idx, i;
+       void * ptr;
+
+       ptr = map_metadata(fd, flen);
+
+       if (ptr == MAP_FAILED)
+               return -1;
+
+       metadata = *(struct rte_ivshmem_metadata*) (ptr);
+
+       unmap_metadata(ptr);
+
+       RTE_LOG(DEBUG, EAL, "Parsing metadata for \"%s\"\n", metadata.name);
+
+       idx = ivshmem_config->segment_idx;
+
+       for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES &&
+               idx <= RTE_MAX_MEMSEG; i++) {
+
+               if (idx == RTE_MAX_MEMSEG) {
+                       RTE_LOG(ERR, EAL, "Not enough memory segments!\n");
+                       return -1;
+               }
+
+               entry = &metadata.entry[i];
+
+               /* stop on uninitialized memzone */
+               if (entry->mz.len == 0)
+                       break;
+
+               /* copy metadata entry */
+               memcpy(&ivshmem_config->segment[idx].entry, entry,
+                               sizeof(struct rte_ivshmem_metadata_entry));
+
+               /* copy path */
+               rte_snprintf(ivshmem_config->segment[idx].path, path_len, "%s", path);
+
+               idx++;
+       }
+       ivshmem_config->segment_idx = idx;
+
+       return 0;
+}
+
+/* check through each segment and look for adjacent or overlapping ones. */
+static int
+cleanup_segments(struct ivshmem_segment * ms, int tbl_len)
+{
+       struct ivshmem_segment * s, * tmp;
+       int i, j, concat, seg_adjacent, seg_overlapping;
+       uint64_t start1, start2, end1, end2, p_start1, p_start2, i_start1, i_start2;
+
+       qsort(ms, tbl_len, sizeof(struct ivshmem_segment),
+                               seg_compare);
+
+       while (has_overlapping_segments(ms, tbl_len) ||
+                       has_adjacent_segments(ms, tbl_len)) {
+
+               for (i = 0; i < tbl_len; i++) {
+                       s = &ms[i];
+
+                       concat = 0;
+
+                       for (j = i + 1; j < tbl_len; j++) {
+                               tmp = &ms[j];
+
+                               /* check if this segment is overlapping with existing segment,
+                                * or is adjacent to existing segment */
+                               seg_overlapping = overlap(&s->entry.mz, &tmp->entry.mz);
+                               seg_adjacent = adjacent(&s->entry.mz, &tmp->entry.mz);
+
+                               /* check if segments fully overlap or are fully adjacent */
+                               if ((seg_adjacent == FULL) || (seg_overlapping == FULL)) {
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+                                       RTE_LOG(DEBUG, EAL, "Concatenating segments\n");
+                                       RTE_LOG(DEBUG, EAL, "Segment %i:\n", i);
+                                       entry_dump(&s->entry);
+                                       RTE_LOG(DEBUG, EAL, "Segment %i:\n", j);
+                                       entry_dump(&tmp->entry);
+#endif
+
+                                       start1 = s->entry.mz.addr_64;
+                                       start2 = tmp->entry.mz.addr_64;
+                                       p_start1 = s->entry.mz.phys_addr;
+                                       p_start2 = tmp->entry.mz.phys_addr;
+                                       i_start1 = s->entry.mz.ioremap_addr;
+                                       i_start2 = tmp->entry.mz.ioremap_addr;
+                                       end1 = s->entry.mz.addr_64 + s->entry.mz.len;
+                                       end2 = tmp->entry.mz.addr_64 + tmp->entry.mz.len;
+
+                                       /* settle for minimum start address and maximum length */
+                                       s->entry.mz.addr_64 = RTE_MIN(start1, start2);
+                                       s->entry.mz.phys_addr = RTE_MIN(p_start1, p_start2);
+                                       s->entry.mz.ioremap_addr = RTE_MIN(i_start1, i_start2);
+                                       s->entry.offset = RTE_MIN(s->entry.offset, tmp->entry.offset);
+                                       s->entry.mz.len = RTE_MAX(end1, end2) - s->entry.mz.addr_64;
+                                       concat = 1;
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+                                       RTE_LOG(DEBUG, EAL, "Resulting segment:\n");
+                                       entry_dump(&s->entry);
+
+#endif
+                               }
+                               /* if segments not fully overlap, we have an error condition.
+                                * adjacent segments can coexist.
+                                */
+                               else if (seg_overlapping > 0) {
+                                       RTE_LOG(ERR, EAL, "Segments %i and %i overlap!\n", i, j);
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+                                       RTE_LOG(DEBUG, EAL, "Segment %i:\n", i);
+                                       entry_dump(&s->entry);
+                                       RTE_LOG(DEBUG, EAL, "Segment %i:\n", j);
+                                       entry_dump(&tmp->entry);
+#endif
+                                       return -1;
+                               }
+                               if (concat)
+                                       break;
+                       }
+                       /* if we concatenated, remove segment at j */
+                       if (concat) {
+                               remove_segment(ms, tbl_len, j);
+                               tbl_len--;
+                               break;
+                       }
+               }
+       }
+
+       return tbl_len;
+}
+
+static int
+create_shared_config(void)
+{
+       char path[PATH_MAX];
+       int fd;
+
+       /* build ivshmem config file path */
+       rte_snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH,
+                       internal_config.hugefile_prefix);
+
+       fd = open(path, O_CREAT | O_RDWR);
+
+       if (fd < 0) {
+               RTE_LOG(ERR, EAL, "Could not open %s: %s\n", path, strerror(errno));
+               return -1;
+       }
+
+       /* try ex-locking first - if the file is locked, we have a problem */
+       if (flock(fd, LOCK_EX | LOCK_NB) == -1) {
+               RTE_LOG(ERR, EAL, "Locking %s failed: %s\n", path, strerror(errno));
+               close(fd);
+               return -1;
+       }
+
+       ftruncate(fd, sizeof(struct ivshmem_shared_config));
+
+       ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config),
+                       PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+       if (ivshmem_config == MAP_FAILED)
+               return -1;
+
+       memset(ivshmem_config, 0, sizeof(struct ivshmem_shared_config));
+
+       /* change the exclusive lock we got earlier to a shared lock */
+       if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
+               RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno));
+               return -1;
+       }
+
+       close(fd);
+
+       return 0;
+}
+
+/* open shared config file and, if present, map the config.
+ * having no config file is not an error condition, as we later check if
+ * ivshmem_config is NULL (if it is, that means nothing was mapped). */
+static int
+open_shared_config(void)
+{
+       char path[PATH_MAX];
+       int fd;
+
+       /* build ivshmem config file path */
+       rte_snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH,
+                       internal_config.hugefile_prefix);
+
+       fd = open(path, O_RDONLY);
+
+       /* if the file doesn't exist, just return success */
+       if (fd < 0 && errno == ENOENT)
+               return 0;
+       /* else we have an error condition */
+       else if (fd < 0) {
+               RTE_LOG(ERR, EAL, "Could not open %s: %s\n",
+                               path, strerror(errno));
+               return -1;
+       }
+
+       /* try ex-locking first - if the lock *does* succeed, this means it's a
+        * stray config file, so it should be deleted.
+        */
+       if (flock(fd, LOCK_EX | LOCK_NB) != -1) {
+
+               /* if we can't remove the file, something is wrong */
+               if (unlink(path) < 0) {
+                       RTE_LOG(ERR, EAL, "Could not remove %s: %s\n", path,
+                                       strerror(errno));
+                       return -1;
+               }
+
+               /* release the lock */
+               flock(fd, LOCK_UN);
+               close(fd);
+
+               /* return success as having a stray config file is equivalent to not
+                * having config file at all.
+                */
+               return 0;
+       }
+
+       ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config),
+                       PROT_READ, MAP_SHARED, fd, 0);
+
+       if (ivshmem_config == MAP_FAILED)
+               return -1;
+
+       /* place a shared lock on config file */
+       if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
+               RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno));
+               return -1;
+       }
+
+       close(fd);
+
+       return 0;
+}
+
+/*
+ * This function does the following:
+ *
+ * 1) Builds a table of ivshmem_segments with proper offset alignment
+ * 2) Cleans up that table so that we don't have any overlapping or adjacent
+ *    memory segments
+ * 3) Creates memsegs from this table and maps them into memory.
+ */
+static inline int
+map_all_segments(void)
+{
+       struct ivshmem_segment ms_tbl[RTE_MAX_MEMSEG];
+       struct ivshmem_pci_device * pci_dev; 
+       struct rte_mem_config * mcfg;
+       struct ivshmem_segment * seg;
+       int fd, fd_zero;
+       unsigned i, j;
+       struct rte_memzone mz;
+       struct rte_memseg ms;
+       void * base_addr;
+       uint64_t align, len;
+       phys_addr_t ioremap_addr;
+
+       ioremap_addr = 0;
+
+       memset(ms_tbl, 0, sizeof(ms_tbl));
+       memset(&mz, 0, sizeof(struct rte_memzone));
+       memset(&ms, 0, sizeof(struct rte_memseg));
+
+       /* first, build a table of memsegs to map, to avoid failed mmaps due to
+        * overlaps
+        */
+       for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMSEG; i++) {
+               if (i == RTE_MAX_MEMSEG) {
+                       RTE_LOG(ERR, EAL, "Too many segments requested!\n");
+                       return -1;
+               }
+
+               seg = &ivshmem_config->segment[i];
+
+               /* copy segment to table */
+               memcpy(&ms_tbl[i], seg, sizeof(struct ivshmem_segment));
+
+               /* find ioremap addr */
+               for (j = 0; j < DIM(ivshmem_config->pci_devs); j++) {
+                       pci_dev = &ivshmem_config->pci_devs[j];
+                       if (!strncmp(pci_dev->path, seg->path, sizeof(pci_dev->path))) {
+                               ioremap_addr = pci_dev->ioremap_addr;
+                               break;
+                       }
+               }
+               if (ioremap_addr == 0) {
+                       RTE_LOG(ERR, EAL, "Cannot find ioremap addr!\n");
+                       return -1;
+               }
+
+               /* work out alignments */
+               align = seg->entry.mz.addr_64 -
+                               RTE_ALIGN_FLOOR(seg->entry.mz.addr_64, 0x1000);
+               len = RTE_ALIGN_CEIL(seg->entry.mz.len + align, 0x1000);
+
+               /* save original alignments */
+               ms_tbl[i].align = align;
+
+               /* create a memory zone */
+               mz.addr_64 = seg->entry.mz.addr_64 - align;
+               mz.len = len;
+               mz.hugepage_sz = seg->entry.mz.hugepage_sz;
+               mz.phys_addr = seg->entry.mz.phys_addr - align;
+
+               /* find true physical address */
+               mz.ioremap_addr = ioremap_addr + seg->entry.offset - align;
+
+               ms_tbl[i].entry.offset = seg->entry.offset - align;
+
+               memcpy(&ms_tbl[i].entry.mz, &mz, sizeof(struct rte_memzone));
+       }
+
+       /* clean up the segments */
+       memseg_idx = cleanup_segments(ms_tbl, ivshmem_config->segment_idx);
+
+       if (memseg_idx < 0)
+               return -1;
+
+       mcfg = rte_eal_get_configuration()->mem_config;
+
+       fd_zero = open("/dev/zero", O_RDWR);
+
+       if (fd_zero < 0) {
+               RTE_LOG(ERR, EAL, "Cannot open /dev/zero: %s\n", strerror(errno));
+               return -1;
+       }
+
+       /* create memsegs and put them into DPDK memory */
+       for (i = 0; i < (unsigned) memseg_idx; i++) {
+
+               seg = &ms_tbl[i];
+
+               ms.addr_64 = seg->entry.mz.addr_64;
+               ms.hugepage_sz = seg->entry.mz.hugepage_sz;
+               ms.len = seg->entry.mz.len;
+               ms.nchannel = rte_memory_get_nchannel();
+               ms.nrank = rte_memory_get_nrank();
+               ms.phys_addr = seg->entry.mz.phys_addr;
+               ms.ioremap_addr = seg->entry.mz.ioremap_addr;
+               ms.socket_id = seg->entry.mz.socket_id;
+
+               base_addr = mmap(ms.addr, ms.len,
+                               PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_zero, 0);
+
+               if (base_addr == MAP_FAILED || base_addr != ms.addr) {
+                       RTE_LOG(ERR, EAL, "Cannot map /dev/zero!\n");
+                       return -1;
+               }
+
+               fd = open(seg->path, O_RDWR);
+
+               if (fd < 0) {
+                       RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", seg->path,
+                                       strerror(errno));
+                       return -1;
+               }
+
+               munmap(ms.addr, ms.len);
+
+               base_addr = mmap(ms.addr, ms.len,
+                               PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+                               seg->entry.offset);
+
+
+               if (base_addr == MAP_FAILED || base_addr != ms.addr) {
+                       RTE_LOG(ERR, EAL, "Cannot map segment into memory: "
+                                       "expected %p got %p (%s)\n", ms.addr, base_addr,
+                                       strerror(errno));
+                       return -1;
+               }
+
+               RTE_LOG(DEBUG, EAL, "Memory segment mapped: %p (len %" PRIx64 ") at "
+                               "offset 0x%" PRIx64 "\n",
+                               ms.addr, ms.len, seg->entry.offset);
+
+               /* put the pointers back into their real positions using original
+                * alignment */
+               ms.addr_64 += seg->align;
+               ms.phys_addr += seg->align;
+               ms.ioremap_addr += seg->align;
+               ms.len -= seg->align;
+
+               /* at this point, the rest of DPDK memory is not initialized, so we
+                * expect memsegs to be empty */
+               memcpy(&mcfg->memseg[i], &ms,
+                               sizeof(struct rte_memseg));
+               memcpy(&mcfg->free_memseg[i], &ms,
+                               sizeof(struct rte_memseg));
+
+
+               /* adjust the free_memseg so that there's no free space left */
+               mcfg->free_memseg[i].ioremap_addr += mcfg->free_memseg[i].len;
+               mcfg->free_memseg[i].phys_addr += mcfg->free_memseg[i].len;
+               mcfg->free_memseg[i].addr_64 += mcfg->free_memseg[i].len;
+               mcfg->free_memseg[i].len = 0;
+
+               close(fd);
+
+               RTE_LOG(DEBUG, EAL, "IVSHMEM segment found, size: 0x%lx\n",
+                               ms.len);
+       }
+
+       return 0;
+}
+
+/* this happens at a later stage, after general EAL memory initialization */
+int
+rte_eal_ivshmem_obj_init(void)
+{
+       struct rte_ring_list* ring_list = NULL;
+       struct rte_mem_config * mcfg;
+       struct ivshmem_segment * seg;
+       struct rte_memzone * mz;
+       struct rte_ring * r;
+       unsigned i, ms, idx;
+       uint64_t offset;
+
+       /* secondary process would not need any object discovery - it'll all
+        * already be in shared config */
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY || ivshmem_config == NULL)
+               return 0;
+
+       /* check that we have an initialised ring tail queue */
+       if ((ring_list =
+            RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_RING, rte_ring_list)) == NULL) {
+               RTE_LOG(ERR, EAL, "No rte_ring tailq found!\n");
+               return -1;
+       }
+
+       mcfg = rte_eal_get_configuration()->mem_config;
+
+       /* create memzones */
+       for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMZONE; i++) {
+
+               seg = &ivshmem_config->segment[i];
+
+               /* add memzone */
+               if (mcfg->memzone_idx == RTE_MAX_MEMZONE) {
+                       RTE_LOG(ERR, EAL, "No more memory zones available!\n");
+                       return -1;
+               }
+
+               idx = mcfg->memzone_idx;
+
+               RTE_LOG(DEBUG, EAL, "Found memzone: '%s' at %p (len 0x%" PRIx64 ")\n",
+                               seg->entry.mz.name, seg->entry.mz.addr, seg->entry.mz.len);
+
+               memcpy(&mcfg->memzone[idx], &seg->entry.mz,
+                               sizeof(struct rte_memzone));
+
+               /* find ioremap address */
+               for (ms = 0; ms <= RTE_MAX_MEMSEG; ms++) {
+                       if (ms == RTE_MAX_MEMSEG) {
+                               RTE_LOG(ERR, EAL, "Physical address of segment not found!\n");
+                               return -1;
+                       }
+                       if (CONTAINS(mcfg->memseg[ms], mcfg->memzone[idx])) {
+                               offset = mcfg->memzone[idx].addr_64 -
+                                                               mcfg->memseg[ms].addr_64;
+                               mcfg->memzone[idx].ioremap_addr = mcfg->memseg[ms].ioremap_addr +
+                                               offset;
+                               break;
+                       }
+               }
+
+               mcfg->memzone_idx++;
+       }
+
+       /* find rings */
+       for (i = 0; i < mcfg->memzone_idx; i++) {
+               mz = &mcfg->memzone[i];
+
+               /* check if memzone has a ring prefix */
+               if (strncmp(mz->name, RTE_RING_MZ_PREFIX,
+                               sizeof(RTE_RING_MZ_PREFIX) - 1) != 0)
+                       continue;
+
+               r = (struct rte_ring*) (mz->addr_64);
+
+               TAILQ_INSERT_TAIL(ring_list, r, next);
+
+               RTE_LOG(DEBUG, EAL, "Found ring: '%s' at %p\n", r->name, mz->addr);
+       }
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+       rte_memzone_dump();
+       rte_ring_list_dump();
+#endif
+
+       return 0;
+}
+
+/* initialize ivshmem structures */
+int rte_eal_ivshmem_init(void)
+{
+       struct rte_pci_device * dev;
+       struct rte_pci_resource * res;
+       int fd, ret;
+       char path[PATH_MAX];
+
+       /* initialize everything to 0 */
+       memset(path, 0, sizeof(path));
+       ivshmem_config = NULL;
+       
+       pagesz = getpagesize();
+
+       RTE_LOG(DEBUG, EAL, "Searching for IVSHMEM devices...\n");
+
+       if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+
+               if (open_shared_config() < 0) {
+                       RTE_LOG(ERR, EAL, "Could not open IVSHMEM config!\n");
+                       return -1;
+               }
+       }
+       else {
+
+               TAILQ_FOREACH(dev, &device_list, next) {
+
+                       if (is_ivshmem_device(dev)) {
+
+                               /* IVSHMEM memory is always on BAR2 */
+                               res = &dev->mem_resource[2];
+
+                               /* if we don't have a BAR2 */
+                               if (res->len == 0)
+                                       continue;
+
+                               /* construct pci device path */
+                               rte_snprintf(path, sizeof(path), IVSHMEM_RESOURCE_PATH,
+                                               dev->addr.domain, dev->addr.bus, dev->addr.devid,
+                                               dev->addr.function);
+
+                               /* try to find memseg */
+                               fd = open(path, O_RDWR);
+                               if (fd < 0) {
+                                       RTE_LOG(ERR, EAL, "Could not open %s\n", path);
+                                       return -1;
+                               }
+
+                               /* check if it's a DPDK IVSHMEM device */
+                               ret = has_ivshmem_metadata(fd, res->len);
+
+                               /* is DPDK device */
+                               if (ret == 1) {
+
+                                       /* config file creation is deferred until the first
+                                        * DPDK device is found. then, it has to be created
+                                        * only once. */
+                                       if (ivshmem_config == NULL &&
+                                                       create_shared_config() < 0) {
+                                               RTE_LOG(ERR, EAL, "Could not create IVSHMEM config!\n");
+                                               close(fd);
+                                               return -1;
+                                       }
+
+                                       if (read_metadata(path, sizeof(path), fd, res->len) < 0) {
+                                               RTE_LOG(ERR, EAL, "Could not read metadata from"
+                                                               " device %02x:%02x.%x!\n", dev->addr.bus,
+                                                               dev->addr.devid, dev->addr.function);
+                                               close(fd);
+                                               return -1;
+                                       }
+
+                                       if (ivshmem_config->pci_devs_idx == RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS) {
+                                               RTE_LOG(WARNING, EAL,
+                                                               "IVSHMEM PCI device limit exceeded. Increase "
+                                                               "CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS  in "
+                                                               "your config file.\n");
+                                               break;
+                                       }
+
+                                       RTE_LOG(INFO, EAL, "Found IVSHMEM device %02x:%02x.%x\n",
+                                                       dev->addr.bus, dev->addr.devid, dev->addr.function);
+
+                                       ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].ioremap_addr = res->phys_addr;
+                                       rte_snprintf(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path,
+                                                       sizeof(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path),
+                                                       path);
+
+                                       ivshmem_config->pci_devs_idx++;
+                               }
+                               /* failed to read */
+                               else if (ret < 0) {
+                                       RTE_LOG(ERR, EAL, "Could not read IVSHMEM device: %s\n",
+                                                       strerror(errno));
+                                       close(fd);
+                                       return -1;
+                               }
+                               /* not a DPDK device */
+                               else
+                                       RTE_LOG(DEBUG, EAL, "Skipping non-DPDK IVSHMEM device\n");
+
+                               /* close the BAR fd */
+                               close(fd);
+                       }
+               }
+       }
+
+       /* ivshmem_config is not NULL only if config was created and/or mapped */
+       if (ivshmem_config) {
+               if (map_all_segments() < 0) {
+                       RTE_LOG(ERR, EAL, "Mapping IVSHMEM segments failed!\n");
+                       return -1;
+               }
+       }
+       else {
+               RTE_LOG(DEBUG, EAL, "No IVSHMEM configuration found! \n");
+       }
+
+       return 0;
+}
+
+#endif
index 3a1822e..6b78d89 100644 (file)
@@ -113,6 +113,68 @@ static uint64_t baseaddr_offset;
 
 #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
 
+static uint64_t
+get_physaddr(void * virtaddr)
+{
+       int fd;
+       uint64_t page, physaddr;
+       unsigned long virt_pfn;
+       int page_size;
+
+       /* standard page size */
+       page_size = getpagesize();
+
+       fd = open("/proc/self/pagemap", O_RDONLY);
+       if (fd < 0) {
+               RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
+                       __func__, strerror(errno));
+               return (uint64_t) -1;
+       }
+
+       off_t offset;
+       virt_pfn = (unsigned long)virtaddr / page_size;
+       offset = sizeof(uint64_t) * virt_pfn;
+       if (lseek(fd, offset, SEEK_SET) == (off_t) -1) {
+               RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
+                               __func__, strerror(errno));
+               close(fd);
+               return (uint64_t) -1;
+       }
+       if (read(fd, &page, sizeof(uint64_t)) < 0) {
+               RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
+                               __func__, strerror(errno));
+               close(fd);
+               return (uint64_t) -1;
+       }
+
+       /*
+        * the pfn (page frame number) are bits 0-54 (see
+        * pagemap.txt in linux Documentation)
+        */
+       physaddr = ((page & 0x7fffffffffffffULL) * page_size);
+       close(fd);
+       return physaddr;
+}
+
+/*
+ * For each hugepage in hugepg_tbl, fill the physaddr value. We find
+ * it by browsing the /proc/self/pagemap special file.
+ */
+static int
+find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+       unsigned i;
+       phys_addr_t addr;
+
+       for (i = 0; i < hpi->num_pages[0]; i++) {
+               addr = get_physaddr(hugepg_tbl[i].orig_va);
+               if (addr == (phys_addr_t) -1)
+                       return -1;
+               hugepg_tbl[i].physaddr = addr;
+       }
+       return 0;
+}
+
 /*
  * Check whether address-space layout randomization is enabled in
  * the kernel. This is important for multi-process as it can prevent
@@ -209,7 +271,7 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
  * map continguous physical blocks in contiguous virtual blocks.
  */
 static int
-map_all_hugepages(struct hugepage *hugepg_tbl,
+map_all_hugepages(struct hugepage_file *hugepg_tbl,
                struct hugepage_info *hpi, int orig)
 {
        int fd;
@@ -218,15 +280,25 @@ map_all_hugepages(struct hugepage *hugepg_tbl,
        void *vma_addr = NULL;
        size_t vma_len = 0;
 
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+       RTE_SET_USED(vma_len);
+#endif
+
        for (i = 0; i < hpi->num_pages[0]; i++) {
                size_t hugepage_sz = hpi->hugepage_sz;
 
                if (orig) {
                        hugepg_tbl[i].file_id = i;
                        hugepg_tbl[i].size = hugepage_sz;
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+                       eal_get_hugefile_temp_path(hugepg_tbl[i].filepath,
+                                       sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
+                                       hugepg_tbl[i].file_id);
+#else
                        eal_get_hugefile_path(hugepg_tbl[i].filepath,
                                        sizeof(hugepg_tbl[i].filepath), hpi->hugedir,
                                        hugepg_tbl[i].file_id);
+#endif
                        hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0';
                }
 #ifndef RTE_ARCH_X86_64
@@ -239,6 +311,8 @@ map_all_hugepages(struct hugepage *hugepg_tbl,
                        continue;
                }
 #endif
+
+#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS
                else if (vma_len == 0) {
                        unsigned j, num_pages;
 
@@ -260,6 +334,7 @@ map_all_hugepages(struct hugepage *hugepg_tbl,
                        if (vma_addr == NULL)
                                vma_len = hugepage_sz;
                }
+#endif
 
                /* try to create hugepage file */
                fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755);
@@ -302,77 +377,189 @@ map_all_hugepages(struct hugepage *hugepg_tbl,
        return 0;
 }
 
-/* Unmap all hugepages from original mapping. */
-static int
-unmap_all_hugepages_orig(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
-{
-       unsigned i;
-       for (i = 0; i < hpi->num_pages[0]; i++) {
-               if (hugepg_tbl[i].orig_va) {
-                       munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz);
-                       hugepg_tbl[i].orig_va = NULL;
-               }
-       }
-       return 0;
-}
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
 
 /*
- * For each hugepage in hugepg_tbl, fill the physaddr value. We find
- * it by browsing the /proc/self/pagemap special file.
+ * Remaps all hugepages into single file segments
  */
 static int
-find_physaddr(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
+remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
 {
        int fd;
-       unsigned i;
-       uint64_t page;
-       unsigned long virt_pfn;
-       int page_size;
+       unsigned i = 0, j, num_pages, page_idx = 0;
+       void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL;
+       size_t vma_len = 0;
+       size_t hugepage_sz = hpi->hugepage_sz;
+       size_t total_size, offset;
+       char filepath[MAX_HUGEPAGE_PATH];
+       phys_addr_t physaddr;
+       int socket;
 
-       /* standard page size */
-       page_size = getpagesize();
+       while (i < hpi->num_pages[0]) {
 
-       fd = open("/proc/self/pagemap", O_RDONLY);
-       if (fd < 0) {
-               RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
-                       __func__, strerror(errno));
-               return -1;
-       }
+#ifndef RTE_ARCH_X86_64
+               /* for 32-bit systems, don't remap 1G pages, just reuse original
+                * map address as final map address.
+                */
+               if (hugepage_sz == RTE_PGSIZE_1G){
+                       hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va;
+                       hugepg_tbl[i].orig_va = NULL;
+                       i++;
+                       continue;
+               }
+#endif
 
-       for (i = 0; i < hpi->num_pages[0]; i++) {
-               off_t offset;
-               virt_pfn = (unsigned long)hugepg_tbl[i].orig_va /
-                       page_size;
-               offset = sizeof(uint64_t) * virt_pfn;
-               if (lseek(fd, offset, SEEK_SET) == (off_t) -1) {
-                       RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
-                                       __func__, strerror(errno));
-                       close(fd);
+               /* reserve a virtual area for next contiguous
+                * physical block: count the number of
+                * contiguous physical pages. */
+               for (j = i+1; j < hpi->num_pages[0] ; j++) {
+                       if (hugepg_tbl[j].physaddr != hugepg_tbl[j-1].physaddr + hugepage_sz)
+                               break;
+               }
+               num_pages = j - i;
+               vma_len = num_pages * hugepage_sz;
+
+               socket = hugepg_tbl[i].socket_id;
+
+               /* get the biggest virtual memory area up to
+                * vma_len. If it fails, vma_addr is NULL, so
+                * let the kernel provide the address. */
+               vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz);
+
+               /* If we can't find a big enough virtual area, work out how many pages
+                * we are going to get */
+               if (vma_addr == NULL)
+                       j = i + 1;
+               else if (vma_len != num_pages * hugepage_sz) {
+                       num_pages = vma_len / hugepage_sz;
+                       j = i + num_pages;
+
+               }
+
+               hugepg_tbl[page_idx].file_id = page_idx;
+               eal_get_hugefile_path(filepath,
+                               sizeof(filepath),
+                               hpi->hugedir,
+                               hugepg_tbl[page_idx].file_id);
+
+               /* try to create hugepage file */
+               fd = open(filepath, O_CREAT | O_RDWR, 0755);
+               if (fd < 0) {
+                       RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno));
                        return -1;
                }
-               if (read(fd, &page, sizeof(uint64_t)) < 0) {
-                       RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
-                                       __func__, strerror(errno));
+
+               total_size = 0;
+               for (;i < j; i++) {
+
+                       /* unmap current segment */
+                       if (total_size > 0)
+                               munmap(vma_addr, total_size);
+
+                       /* unmap original page */
+                       munmap(hugepg_tbl[i].orig_va, hugepage_sz);
+                       unlink(hugepg_tbl[i].filepath);
+
+                       total_size += hugepage_sz;
+
+                       old_addr = vma_addr;
+
+                       /* map new, bigger segment */
+                       vma_addr = mmap(vma_addr, total_size,
+                                       PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+                       if (vma_addr == MAP_FAILED || vma_addr != old_addr) {
+                               RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno));
+                               close(fd);
+                               return -1;
+                       }
+
+                       /* touch the page. this is needed because kernel postpones mapping
+                        * creation until the first page fault. with this, we pin down
+                        * the page and it is marked as used and gets into process' pagemap.
+                        */
+                       for (offset = 0; offset < total_size; offset += hugepage_sz)
+                               *((volatile uint8_t*) RTE_PTR_ADD(vma_addr, offset));
+               }
+
+               /* set shared flock on the file. */
+               if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
+                       RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n",
+                               __func__, strerror(errno));
                        close(fd);
                        return -1;
                }
 
-               /*
-                * the pfn (page frame number) are bits 0-54 (see
-                * pagemap.txt in linux Documentation)
+               rte_snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s",
+                               filepath);
+
+               physaddr = get_physaddr(vma_addr);
+
+               if (physaddr == (phys_addr_t) -1)
+                       return -1;
+
+               hugepg_tbl[page_idx].final_va = vma_addr;
+
+               hugepg_tbl[page_idx].physaddr = physaddr;
+
+               hugepg_tbl[page_idx].repeated = num_pages;
+
+               hugepg_tbl[page_idx].socket_id = socket;
+
+               close(fd);
+
+               /* verify the memory segment - that is, check that every VA corresponds
+                * to the physical address we expect to see
                 */
-               hugepg_tbl[i].physaddr = ((page & 0x7fffffffffffffULL) * page_size);
+               for (offset = 0; offset < vma_len; offset += hugepage_sz) {
+                       uint64_t expected_physaddr;
+
+                       expected_physaddr = hugepg_tbl[page_idx].physaddr + offset;
+                       page_addr = RTE_PTR_ADD(vma_addr, offset);
+                       physaddr = get_physaddr(page_addr);
+
+                       if (physaddr != expected_physaddr) {
+                               RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr "
+                                               "at %p (offset 0x%" PRIx64 ": 0x%" PRIx64
+                                               " (expected 0x%" PRIx64 ")\n",
+                                               page_addr, offset, physaddr, expected_physaddr);
+                               return -1;
+                       }
+               }
+
+               /* zero out the whole segment */
+               memset(hugepg_tbl[page_idx].final_va, 0, total_size);
+
+               page_idx++;
        }
-       close(fd);
-       return 0;
+
+       /* zero out the rest */
+       memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file));
+       return page_idx;
 }
+#else/* RTE_EAL_SINGLE_FILE_SEGMENTS=n */
+
+/* Unmap all hugepages from original mapping */
+static int
+unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+        unsigned i;
+        for (i = 0; i < hpi->num_pages[0]; i++) {
+                if (hugepg_tbl[i].orig_va) {
+                        munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz);
+                        hugepg_tbl[i].orig_va = NULL;
+                }
+        }
+        return 0;
+}
+#endif /* RTE_EAL_SINGLE_FILE_SEGMENTS */
 
 /*
  * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge
  * page.
  */
 static int
-find_numasocket(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
+find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
 {
        int socket_id;
        char *end, *nodestr;
@@ -455,12 +642,12 @@ error:
  * is only done at init time.
  */
 static int
-sort_by_physaddr(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
+sort_by_physaddr(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
 {
        unsigned i, j;
        int smallest_idx;
        uint64_t smallest_addr;
-       struct hugepage tmp;
+       struct hugepage_file tmp;
 
        for (i = 0; i < hpi->num_pages[0]; i++) {
                smallest_addr = 0;
@@ -486,10 +673,10 @@ sort_by_physaddr(struct hugepage *hugepg_tbl, struct hugepage_info *hpi)
                }
 
                /* swap the 2 entries in the table */
-               memcpy(&tmp, &hugepg_tbl[smallest_idx], sizeof(struct hugepage));
+               memcpy(&tmp, &hugepg_tbl[smallest_idx], sizeof(struct hugepage_file));
                memcpy(&hugepg_tbl[smallest_idx], &hugepg_tbl[i],
-                               sizeof(struct hugepage));
-               memcpy(&hugepg_tbl[i], &tmp, sizeof(struct hugepage));
+                               sizeof(struct hugepage_file));
+               memcpy(&hugepg_tbl[i], &tmp, sizeof(struct hugepage_file));
        }
        return 0;
 }
@@ -519,8 +706,8 @@ create_shared_memory(const char *filename, const size_t mem_size)
  * destination is typically the shared memory.
  */
 static int
-copy_hugepages_to_shared_mem(struct hugepage * dst, int dest_size,
-               const struct hugepage * src, int src_size)
+copy_hugepages_to_shared_mem(struct hugepage_file * dst, int dest_size,
+               const struct hugepage_file * src, int src_size)
 {
        int src_pos, dst_pos = 0;
 
@@ -529,7 +716,7 @@ copy_hugepages_to_shared_mem(struct hugepage * dst, int dest_size,
                        /* error on overflow attempt */
                        if (dst_pos == dest_size)
                                return -1;
-                       memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage));
+                       memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage_file));
                        dst_pos++;
                }
        }
@@ -541,7 +728,7 @@ copy_hugepages_to_shared_mem(struct hugepage * dst, int dest_size,
  * ALL hugepages (not just those we need), additional unmapping needs to be done.
  */
 static int
-unmap_unneeded_hugepages(struct hugepage *hugepg_tbl,
+unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
                struct hugepage_info *hpi,
                unsigned num_hp_info)
 {
@@ -556,9 +743,16 @@ unmap_unneeded_hugepages(struct hugepage *hugepg_tbl,
        for (size = 0; size < num_hp_info; size++) {
                for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
                        unsigned pages_found = 0;
+
                        /* traverse until we have unmapped all the unused pages */
                        for (page = 0; page < nrpages; page++) {
-                               struct hugepage *hp = &hugepg_tbl[page];
+                               struct hugepage_file *hp = &hugepg_tbl[page];
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+                               /* if this page was already cleared */
+                               if (hp->final_va == NULL)
+                                       continue;
+#endif
 
                                /* find a page that matches the criteria */
                                if ((hp->size == hpi[size].hugepage_sz) &&
@@ -566,17 +760,67 @@ unmap_unneeded_hugepages(struct hugepage *hugepg_tbl,
 
                                        /* if we skipped enough pages, unmap the rest */
                                        if (pages_found == hpi[size].num_pages[socket]) {
-                                               munmap(hp->final_va, hp->size);
+                                               uint64_t unmap_len;
+
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+                                               unmap_len = hp->size * hp->repeated;
+#else
+                                               unmap_len = hp->size;
+#endif
+
+                                               /* get start addr and len of the remaining segment */
+                                               munmap(hp->final_va, (size_t) unmap_len);
+
                                                hp->final_va = NULL;
-                                               if (remove(hp->filepath) == -1) {
+                                               if (unlink(hp->filepath) == -1) {
                                                        RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n",
                                                                        __func__, hp->filepath, strerror(errno));
                                                        return -1;
                                                }
                                        }
-                                       /* lock the page and skip */
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+                                       /* else, check how much do we need to map */
+                                       else {
+                                               int nr_pg_left =
+                                                               hpi[size].num_pages[socket] - pages_found;
+
+                                               /* if we need enough memory to fit into the segment */
+                                               if (hp->repeated <= nr_pg_left) {
+                                                       pages_found += hp->repeated;
+                                               }
+                                               /* truncate the segment */
+                                               else {
+                                                       uint64_t final_size = nr_pg_left * hp->size;
+                                                       uint64_t seg_size = hp->repeated * hp->size;
+
+                                                       void * unmap_va = RTE_PTR_ADD(hp->final_va,
+                                                                       final_size);
+                                                       int fd;
+
+                                                       munmap(unmap_va, seg_size - final_size);
+
+                                                       fd = open(hp->filepath, O_RDWR);
+                                                       if (fd < 0) {
+                                                               RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+                                                                               hp->filepath, strerror(errno));
+                                                               return -1;
+                                                       }
+                                                       if (ftruncate(fd, final_size) < 0) {
+                                                               RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n",
+                                                                               hp->filepath, strerror(errno));
+                                                               return -1;
+                                                       }
+                                                       close(fd);
+
+                                                       pages_found += nr_pg_left;
+                                                       hp->repeated = nr_pg_left;
+                                               }
+                                       }
+#else
+                                       /* else, lock the page and skip */
                                        else
                                                pages_found++;
+#endif
 
                                } /* match page */
                        } /* foreach page */
@@ -712,15 +956,18 @@ static int
 rte_eal_hugepage_init(void)
 {
        struct rte_mem_config *mcfg;
-       struct hugepage *hugepage, *tmp_hp = NULL;
+       struct hugepage_file *hugepage, *tmp_hp = NULL;
        struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
 
        uint64_t memory[RTE_MAX_NUMA_NODES];
 
        unsigned hp_offset;
        int i, j, new_memseg;
-       int nrpages, total_pages = 0;
+       int nr_hugefiles, nr_hugepages = 0;
        void *addr;
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+       int new_pages_count[MAX_HUGEPAGE_SIZES];
+#endif
 
        memset(used_hp, 0, sizeof(used_hp));
 
@@ -744,7 +991,7 @@ rte_eal_hugepage_init(void)
                /* meanwhile, also initialize used_hp hugepage sizes in used_hp */
                used_hp[i].hugepage_sz = internal_config.hugepage_info[i].hugepage_sz;
 
-               total_pages += internal_config.hugepage_info[i].num_pages[0];
+               nr_hugepages += internal_config.hugepage_info[i].num_pages[0];
        }
 
        /*
@@ -753,11 +1000,11 @@ rte_eal_hugepage_init(void)
         * processing done on these pages, shared memory will be created
         * at a later stage.
         */
-       tmp_hp = malloc(total_pages * sizeof(struct hugepage));
+       tmp_hp = malloc(nr_hugepages * sizeof(struct hugepage_file));
        if (tmp_hp == NULL)
                goto fail;
 
-       memset(tmp_hp, 0, total_pages * sizeof(struct hugepage));
+       memset(tmp_hp, 0, nr_hugepages * sizeof(struct hugepage_file));
 
        hp_offset = 0; /* where we start the current page size entries */
 
@@ -772,7 +1019,7 @@ rte_eal_hugepage_init(void)
                 */
                hpi = &internal_config.hugepage_info[i];
 
-               if (hpi->num_pages == 0)
+               if (hpi->num_pages[0] == 0)
                        continue;
 
                /* map all hugepages available */
@@ -783,7 +1030,7 @@ rte_eal_hugepage_init(void)
                }
 
                /* find physical addresses and sockets for each hugepage */
-               if (find_physaddr(&tmp_hp[hp_offset], hpi) < 0){
+               if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
                        RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
                                        (unsigned)(hpi->hugepage_sz / 0x100000));
                        goto fail;
@@ -798,6 +1045,18 @@ rte_eal_hugepage_init(void)
                if (sort_by_physaddr(&tmp_hp[hp_offset], hpi) < 0)
                        goto fail;
 
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+               /* remap all hugepages into single file segments */
+               new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi);
+               if (new_pages_count[i] < 0){
+                       RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",
+                                       (unsigned)(hpi->hugepage_sz / 0x100000));
+                       goto fail;
+               }
+
+               /* we have processed a num of hugepages of this size, so inc offset */
+               hp_offset += new_pages_count[i];
+#else
                /* remap all hugepages */
                if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){
                        RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n",
@@ -811,22 +1070,38 @@ rte_eal_hugepage_init(void)
 
                /* we have processed a num of hugepages of this size, so inc offset */
                hp_offset += hpi->num_pages[0];
+#endif
        }
 
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+       nr_hugefiles = 0;
+       for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) {
+               nr_hugefiles += new_pages_count[i];
+       }
+#else
+       nr_hugefiles = nr_hugepages;
+#endif
+
+
        /* clean out the numbers of pages */
        for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++)
                for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
                        internal_config.hugepage_info[i].num_pages[j] = 0;
 
        /* get hugepages for each socket */
-       for (i = 0; i < total_pages; i++) {
+       for (i = 0; i < nr_hugefiles; i++) {
                int socket = tmp_hp[i].socket_id;
 
                /* find a hugepage info with right size and increment num_pages */
                for (j = 0; j < (int) internal_config.num_hugepage_sizes; j++) {
                        if (tmp_hp[i].size ==
                                        internal_config.hugepage_info[j].hugepage_sz) {
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+                                       internal_config.hugepage_info[j].num_pages[socket] +=
+                                               tmp_hp[i].repeated;
+#else
                                internal_config.hugepage_info[j].num_pages[socket]++;
+#endif
                        }
                }
        }
@@ -836,12 +1111,12 @@ rte_eal_hugepage_init(void)
                memory[i] = internal_config.socket_mem[i];
 
        /* calculate final number of pages */
-       nrpages = calc_num_pages_per_socket(memory,
+       nr_hugepages = calc_num_pages_per_socket(memory,
                        internal_config.hugepage_info, used_hp,
                        internal_config.num_hugepage_sizes);
 
        /* error if not enough memory available */
-       if (nrpages < 0)
+       if (nr_hugepages < 0)
                goto fail;
 
        /* reporting in! */
@@ -861,12 +1136,13 @@ rte_eal_hugepage_init(void)
 
        /* create shared memory */
        hugepage = create_shared_memory(eal_hugepage_info_path(),
-                                       nrpages * sizeof(struct hugepage));
+                       nr_hugefiles * sizeof(struct hugepage_file));
 
        if (hugepage == NULL) {
                RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
                goto fail;
        }
+       memset(hugepage, 0, nr_hugefiles * sizeof(struct hugepage_file));
 
        /*
         * unmap pages that we won't need (looks at used_hp).
@@ -883,8 +1159,8 @@ rte_eal_hugepage_init(void)
         * this procedure only copies those hugepages that have final_va
         * not NULL. has overflow protection.
         */
-       if (copy_hugepages_to_shared_mem(hugepage, nrpages,
-                       tmp_hp, total_pages) < 0) {
+       if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles,
+                       tmp_hp, nr_hugefiles) < 0) {
                RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n");
                goto fail;
        }
@@ -893,9 +1169,16 @@ rte_eal_hugepage_init(void)
        free(tmp_hp);
        tmp_hp = NULL;
 
-       memset(mcfg->memseg, 0, sizeof(mcfg->memseg));
-       j = -1;
-       for (i = 0; i < nrpages; i++) {
+       /* find earliest free memseg - this is needed because in case of IVSHMEM,
+        * segments might have already been initialized */
+       for (j = 0; j < RTE_MAX_MEMSEG; j++)
+               if (mcfg->memseg[j].addr == NULL) {
+                       /* move to previous segment and exit loop */
+                       j--;
+                       break;
+               }
+
+       for (i = 0; i < nr_hugefiles; i++) {
                new_memseg = 0;
 
                /* if this is a new section, create a new memseg */
@@ -919,7 +1202,11 @@ rte_eal_hugepage_init(void)
 
                        mcfg->memseg[j].phys_addr = hugepage[i].physaddr;
                        mcfg->memseg[j].addr = hugepage[i].final_va;
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+                       mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated;
+#else
                        mcfg->memseg[j].len = hugepage[i].size;
+#endif
                        mcfg->memseg[j].socket_id = hugepage[i].socket_id;
                        mcfg->memseg[j].hugepage_sz = hugepage[i].size;
                }
@@ -930,21 +1217,19 @@ rte_eal_hugepage_init(void)
                hugepage[i].memseg_id = j;
        }
 
-       if (i < nrpages) {
+       if (i < nr_hugefiles) {
                RTE_LOG(ERR, EAL, "Can only reserve %d pages "
                        "from %d requested\n"
                        "Current %s=%d is not enough\n"
                        "Please either increase it or request less amount "
                        "of memory.\n",
-                       i, nrpages, RTE_STR(CONFIG_RTE_MAX_MEMSEG),
+                       i, nr_hugefiles, RTE_STR(CONFIG_RTE_MAX_MEMSEG),
                        RTE_MAX_MEMSEG);
                return (-ENOMEM);
        }
-       
 
        return 0;
 
-
 fail:
        if (tmp_hp)
                free(tmp_hp);
@@ -973,7 +1258,7 @@ static int
 rte_eal_hugepage_attach(void)
 {
        const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-       const struct hugepage *hp = NULL;
+       const struct hugepage_file *hp = NULL;
        unsigned num_hp = 0;
        unsigned i, s = 0; /* s used to track the segment number */
        off_t size;
@@ -1008,6 +1293,15 @@ rte_eal_hugepage_attach(void)
                if (mcfg->memseg[s].len == 0)
                        break;
 
+#ifdef RTE_LIBRTE_IVSHMEM
+               /*
+                * if segment has ioremap address set, it's an IVSHMEM segment and
+                * doesn't need mapping as it was already mapped earlier
+                */
+               if (mcfg->memseg[s].ioremap_addr != 0)
+                       continue;
+#endif
+
                /*
                 * fdzero is mmapped to get a contiguous block of virtual
                 * addresses of the appropriate memseg size.
@@ -1018,9 +1312,9 @@ rte_eal_hugepage_attach(void)
                if (base_addr == MAP_FAILED ||
                    base_addr != mcfg->memseg[s].addr) {
                        RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
-                               "in /dev/zero to requested address [%p]\n",
+                               "in /dev/zero to requested address [%p]: '%s'\n",
                                (unsigned long long)mcfg->memseg[s].len,
-                               mcfg->memseg[s].addr);
+                               mcfg->memseg[s].addr, strerror(errno));
                        if (aslr_enabled() > 0) {
                                RTE_LOG(ERR, EAL, "It is recommended to "
                                        "disable ASLR in the kernel "
@@ -1038,14 +1332,24 @@ rte_eal_hugepage_attach(void)
                goto error;
        }
 
-       num_hp = size / sizeof(struct hugepage);
-       RTE_LOG(DEBUG, EAL, "Analysing %u hugepages\n", num_hp);
+       num_hp = size / sizeof(struct hugepage_file);
+       RTE_LOG(DEBUG, EAL, "Analysing %u files\n", num_hp);
 
        s = 0;
        while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){
                void *addr, *base_addr;
                uintptr_t offset = 0;
-
+               size_t mapping_size;
+#ifdef RTE_LIBRTE_IVSHMEM
+               /*
+                * if segment has ioremap address set, it's an IVSHMEM segment and
+                * doesn't need mapping as it was already mapped earlier
+                */
+               if (mcfg->memseg[s].ioremap_addr != 0) {
+                       s++;
+                       continue;
+               }
+#endif
                /*
                 * free previously mapped memory so we can map the
                 * hugepages into the space
@@ -1064,16 +1368,22 @@ rte_eal_hugepage_attach(void)
                                                hp[i].filepath);
                                        goto error;
                                }
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+                               mapping_size = hp[i].size * hp[i].repeated;
+#else
+                               mapping_size = hp[i].size;
+#endif
                                addr = mmap(RTE_PTR_ADD(base_addr, offset),
-                                               hp[i].size, PROT_READ | PROT_WRITE,
-                                               MAP_SHARED | MAP_FIXED, fd, 0);
+                                               mapping_size, PROT_READ | PROT_WRITE,
+                                               MAP_SHARED, fd, 0);
                                close(fd); /* close file both on success and on failure */
-                               if (addr == MAP_FAILED) {
+                               if (addr == MAP_FAILED ||
+                                               addr != RTE_PTR_ADD(base_addr, offset)) {
                                        RTE_LOG(ERR, EAL, "Could not mmap %s\n",
                                                hp[i].filepath);
                                        goto error;
                                }
-                               offset+=hp[i].size;
+                               offset+=mapping_size;
                        }
                }
                RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s,
index 7ffd5cd..034e58d 100644 (file)
@@ -46,6 +46,8 @@
 #include <stdint.h>
 #include <limits.h>
 #include <unistd.h>
+#include <stdlib.h>
+
 #include <rte_string_fns.h>
 #include "eal_internal_cfg.h"
 
@@ -84,6 +86,7 @@ eal_hugepage_info_path(void)
 
 /** String format for hugepage map files. */
 #define HUGEFILE_FMT "%s/%smap_%d"
+#define TEMP_HUGEFILE_FMT "%s/%smap_temp_%d"
 
 static inline const char *
 eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
@@ -94,6 +97,17 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id
        return buffer;
 }
 
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+static inline const char *
+eal_get_hugefile_temp_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
+{
+       rte_snprintf(buffer, buflen, TEMP_HUGEFILE_FMT, hugedir,
+                       internal_config.hugefile_prefix, f_id);
+       buffer[buflen - 1] = '\0';
+       return buffer;
+}
+#endif
+
 /** define the default filename prefix for the %s values above */
 #define HUGEFILE_PREFIX_DEFAULT "rte"
 
index 82dd641..064cdb0 100644 (file)
@@ -35,6 +35,8 @@
 #define RTE_LINUXAPP_HUGEPAGES_H_
 
 #include <stddef.h>
+#include <stdint.h>
+#include <limits.h>
 
 #define MAX_HUGEPAGE_PATH PATH_MAX
 
@@ -42,7 +44,7 @@
  * Structure used to store informations about hugepages that we mapped
  * through the files in hugetlbfs.
  */
-struct hugepage {
+struct hugepage_file {
        void *orig_va;      /**< virtual addr of first mmap() */
        void *final_va;     /**< virtual addr of 2nd mmap() */
        uint64_t physaddr;  /**< physical addr */
@@ -50,6 +52,9 @@ struct hugepage {
        int socket_id;      /**< NUMA socket ID */
        int file_id;        /**< the '%d' in HUGEFILE_FMT */
        int memseg_id;      /**< the memory segment to which page belongs */
+#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS
+       int repeated;           /**< number of times the page size is repeated */
+#endif
        char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */
 };
 
diff --git a/lib/librte_ivshmem/Makefile b/lib/librte_ivshmem/Makefile
new file mode 100644 (file)
index 0000000..c94f926
--- /dev/null
@@ -0,0 +1,48 @@
+#   BSD LICENSE
+# 
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   All rights reserved.
+# 
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+# 
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+# 
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_ivshmem.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_IVSHMEM) := rte_ivshmem.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_IVSHMEM)-include := rte_ivshmem.h
+
+# this lib needs eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_mempool
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_ivshmem/rte_ivshmem.c b/lib/librte_ivshmem/rte_ivshmem.c
new file mode 100644 (file)
index 0000000..d62d016
--- /dev/null
@@ -0,0 +1,884 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <fcntl.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#include <rte_eal_memconfig.h>
+#include <rte_memory.h>
+#include <rte_ivshmem.h>
+#include <rte_string_fns.h>
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_spinlock.h>
+#include <rte_common.h>
+#include <rte_malloc.h>
+
+#include "rte_ivshmem.h"
+
+#define IVSHMEM_CONFIG_FILE_FMT "/var/run/.dpdk_ivshmem_metadata_%s"
+#define IVSHMEM_QEMU_CMD_LINE_HEADER_FMT "-device ivshmem,size=%" PRIu64 "M,shm=fd%s"
+#define IVSHMEM_QEMU_CMD_FD_FMT ":%s:0x%" PRIx64 ":0x%" PRIx64
+#define IVSHMEM_QEMU_CMDLINE_BUFSIZE 1024
+#define IVSHMEM_MAX_PAGES (1 << 12)
+#define adjacent(x,y) (((x).phys_addr+(x).len)==(y).phys_addr)
+#define METADATA_SIZE_ALIGNED \
+       (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz))
+
+#define GET_PAGEMAP_ADDR(in,addr,dlm,err)    \
+{                                      \
+       char *end;                         \
+       errno = 0;                         \
+       addr = strtoull((in), &end, 16);   \
+       if (errno != 0 || *end != (dlm)) { \
+               RTE_LOG(ERR, EAL, err);        \
+               goto error;                    \
+       }                                  \
+       (in) = end + 1;                    \
+}
+
+static int pagesz;
+
+struct memseg_cache_entry {
+       char filepath[PATH_MAX];
+       uint64_t offset;
+       uint64_t len;
+};
+
+struct ivshmem_config {
+       struct rte_ivshmem_metadata * metadata;
+       struct memseg_cache_entry memseg_cache[IVSHMEM_MAX_PAGES];
+               /**< account for multiple files per segment case */
+       struct flock lock;
+       rte_spinlock_t sl;
+};
+
+static struct ivshmem_config
+ivshmem_global_config[RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES];
+
+static rte_spinlock_t global_cfg_sl;
+
+static struct ivshmem_config *
+get_config_by_name(const char * name)
+{
+       struct rte_ivshmem_metadata * config;
+       unsigned i;
+
+       for (i = 0; i < RTE_DIM(ivshmem_global_config); i++) {
+               config = ivshmem_global_config[i].metadata;
+               if (config == NULL)
+                       return NULL;
+               if (strncmp(name, config->name, IVSHMEM_NAME_LEN) == 0)
+                       return &ivshmem_global_config[i];
+       }
+
+       return NULL;
+}
+
+static int
+overlap(const struct rte_memzone * s1, const struct rte_memzone * s2)
+{
+       uint64_t start1, end1, start2, end2;
+
+       start1 = s1->addr_64;
+       end1 = s1->addr_64 + s1->len;
+       start2 = s2->addr_64;
+       end2 = s2->addr_64 + s2->len;
+
+       if (start1 >= start2 && start1 < end2)
+               return 1;
+       if (start2 >= start1 && start2 < end1)
+               return 1;
+
+       return 0;
+}
+
+static struct rte_memzone *
+get_memzone_by_addr(const void * addr)
+{
+       struct rte_memzone * tmp, * mz;
+       struct rte_mem_config * mcfg;
+       int i;
+
+       mcfg = rte_eal_get_configuration()->mem_config;
+       mz = NULL;
+
+       /* find memzone for the ring */
+       for (i = 0; i < RTE_MAX_MEMZONE; i++) {
+               tmp = &mcfg->memzone[i];
+
+               if (tmp->addr_64 == (uint64_t) addr) {
+                       mz = tmp;
+                       break;
+               }
+       }
+
+       return mz;
+}
+
+static int
+entry_compare(const void * a, const void * b)
+{
+       const struct rte_ivshmem_metadata_entry * e1 =
+                       (const struct rte_ivshmem_metadata_entry*) a;
+       const struct rte_ivshmem_metadata_entry * e2 =
+                       (const struct rte_ivshmem_metadata_entry*) b;
+
+       /* move unallocated zones to the end */
+       if (e1->mz.addr == NULL && e2->mz.addr == NULL)
+               return 0;
+       if (e1->mz.addr == 0)
+               return 1;
+       if (e2->mz.addr == 0)
+               return -1;
+
+       return e1->mz.phys_addr > e2->mz.phys_addr;
+}
+
+/* fills hugepage cache entry for a given start virt_addr */
+static int
+get_hugefile_by_virt_addr(uint64_t virt_addr, struct memseg_cache_entry * e)
+{
+       uint64_t start_addr, end_addr;
+       char *start,*path_end;
+       char buf[PATH_MAX*2];
+       FILE *f;
+
+       start = NULL;
+       path_end = NULL;
+       start_addr = 0;
+
+       memset(e->filepath, 0, sizeof(e->filepath));
+
+       /* open /proc/self/maps */
+       f = fopen("/proc/self/maps", "r");
+       if (f == NULL) {
+               RTE_LOG(ERR, EAL, "cannot open /proc/self/maps!\n");
+               return -1;
+       }
+
+       /* parse maps */
+       while (fgets(buf, sizeof(buf), f) != NULL) {
+
+               /* get endptr to end of start addr */
+               start = buf;
+
+               GET_PAGEMAP_ADDR(start,start_addr,'-',
+                               "Cannot find start address in maps!\n");
+
+               /* if start address is bigger than our address, skip */
+               if (start_addr > virt_addr)
+                       continue;
+
+               GET_PAGEMAP_ADDR(start,end_addr,' ',
+                               "Cannot find end address in maps!\n");
+
+               /* if end address is less than our address, skip */
+               if (end_addr <= virt_addr)
+                       continue;
+
+               /* find where the path starts */
+               start = strstr(start, "/");
+
+               if (start == NULL)
+                       continue;
+
+               /* at this point, we know that this is our map.
+                * now let's find the file */
+               path_end = strstr(start, "\n");
+               break;
+       }
+
+       if (path_end == NULL) {
+               RTE_LOG(ERR, EAL, "Hugefile path not found!\n");
+               goto error;
+       }
+
+       /* calculate offset and copy the file path */
+       rte_snprintf(e->filepath, RTE_PTR_DIFF(path_end, start) + 1, "%s", start);
+
+       e->offset = virt_addr - start_addr;
+
+       fclose(f);
+
+       return 0;
+error:
+       fclose(f);
+       return -1;
+}
+
+/*
+ * This is a complex function. What it does is the following:
+ *  1. Goes through metadata and gets list of hugepages involved
+ *  2. Sorts the hugepages by size (1G first)
+ *  3. Goes through metadata again and writes correct offsets
+ *  4. Goes through pages and finds out their filenames, offsets etc.
+ */
+static int
+build_config(struct rte_ivshmem_metadata * metadata)
+{
+       struct rte_ivshmem_metadata_entry * e_local;
+       struct memseg_cache_entry * ms_local;
+       struct rte_memseg pages[IVSHMEM_MAX_PAGES];
+       struct rte_ivshmem_metadata_entry *entry;
+       struct memseg_cache_entry * c_entry, * prev_entry;
+       struct ivshmem_config * config;
+       unsigned i, j, mz_iter, ms_iter;
+       uint64_t biggest_len;
+       int biggest_idx;
+
+       /* return error if we try to use an unknown config file */
+       config = get_config_by_name(metadata->name);
+       if (config == NULL) {
+               RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", metadata->name);
+               goto fail_e;
+       }
+
+       memset(pages, 0, sizeof(pages));
+
+       e_local = malloc(sizeof(config->metadata->entry));
+       if (e_local == NULL)
+               goto fail_e;
+       ms_local = malloc(sizeof(config->memseg_cache));
+       if (ms_local == NULL)
+               goto fail_ms;
+
+
+       /* make local copies before doing anything */
+       memcpy(e_local, config->metadata->entry, sizeof(config->metadata->entry));
+       memcpy(ms_local, config->memseg_cache, sizeof(config->memseg_cache));
+
+       qsort(e_local, RTE_DIM(config->metadata->entry), sizeof(struct rte_ivshmem_metadata_entry),
+                       entry_compare);
+
+       /* first pass - collect all huge pages */
+       for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {
+
+               entry = &e_local[mz_iter];
+
+               uint64_t start_addr = RTE_ALIGN_FLOOR(entry->mz.addr_64,
+                               entry->mz.hugepage_sz);
+               uint64_t offset = entry->mz.addr_64 - start_addr;
+               uint64_t len = RTE_ALIGN_CEIL(entry->mz.len + offset,
+                               entry->mz.hugepage_sz);
+
+               if (entry->mz.addr_64 == 0 || start_addr == 0 || len == 0)
+                       continue;
+
+               int start_page;
+
+               /* find first unused page - mz are phys_addr sorted so we don't have to
+                * look out for holes */
+               for (i = 0; i < RTE_DIM(pages); i++) {
+
+                       /* skip if we already have this page */
+                       if (pages[i].addr_64 == start_addr) {
+                               start_addr += entry->mz.hugepage_sz;
+                               len -= entry->mz.hugepage_sz;
+                               continue;
+                       }
+                       /* we found a new page */
+                       else if (pages[i].addr_64 == 0) {
+                               start_page = i;
+                               break;
+                       }
+               }
+               if (i == RTE_DIM(pages)) {
+                       RTE_LOG(ERR, EAL, "Cannot find unused page!\n");
+                       goto fail;
+               }
+
+               /* populate however many pages the memzone has */
+               for (i = start_page; i < RTE_DIM(pages) && len != 0; i++) {
+
+                       pages[i].addr_64 = start_addr;
+                       pages[i].len = entry->mz.hugepage_sz;
+                       start_addr += entry->mz.hugepage_sz;
+                       len -= entry->mz.hugepage_sz;
+               }
+               /* if there's still length left */
+               if (len != 0) {
+                       RTE_LOG(ERR, EAL, "Not enough space for pages!\n");
+                       goto fail;
+               }
+       }
+
+       /* second pass - sort pages by size */
+       for (i = 0; i < RTE_DIM(pages); i++) {
+
+               if (pages[i].addr == NULL)
+                       break;
+
+               biggest_len = 0;
+               biggest_idx = -1;
+
+               /*
+                * browse all entries starting at 'i', and find the
+                * entry with the smallest addr
+                */
+               for (j=i; j< RTE_DIM(pages); j++) {
+                       if (pages[j].addr == NULL)
+                                       break;
+                       if (biggest_len == 0 ||
+                               pages[j].len > biggest_len) {
+                               biggest_len = pages[j].len;
+                               biggest_idx = j;
+                       }
+               }
+
+               /* should not happen */
+               if (biggest_idx == -1) {
+                       RTE_LOG(ERR, EAL, "Error sorting by size!\n");
+                       goto fail;
+               }
+               if (i != (unsigned) biggest_idx) {
+                       struct rte_memseg tmp;
+
+                       memcpy(&tmp, &pages[biggest_idx], sizeof(struct rte_memseg));
+
+                       /* we don't want to break contiguousness, so instead of just
+                        * swapping segments, we move all the preceding segments to the
+                        * right and then put the old segment @ biggest_idx in place of
+                        * segment @ i */
+                       for (j = biggest_idx - 1; j >= i; j--) {
+                               memcpy(&pages[j+1], &pages[j], sizeof(struct rte_memseg));
+                               memset(&pages[j], 0, sizeof(struct rte_memseg));
+                       }
+
+                       /* put old biggest segment to its new place */
+                       memcpy(&pages[i], &tmp, sizeof(struct rte_memseg));
+               }
+       }
+
+       /* third pass - write correct offsets */
+       for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) {
+
+               uint64_t offset = 0;
+
+               entry = &e_local[mz_iter];
+
+               if (entry->mz.addr_64 == 0)
+                       break;
+
+               /* find page for current memzone */
+               for (i = 0; i < RTE_DIM(pages); i++) {
+                       /* we found our page */
+                       if (entry->mz.addr_64 >= pages[i].addr_64 &&
+                                       entry->mz.addr_64 < pages[i].addr_64 + pages[i].len) {
+                               entry->offset = (entry->mz.addr_64 - pages[i].addr_64) +
+                                               offset;
+                               break;
+                       }
+                       offset += pages[i].len;
+               }
+               if (i == RTE_DIM(pages)) {
+                       RTE_LOG(ERR, EAL, "Page not found!\n");
+                       goto fail;
+               }
+       }
+
+       ms_iter = 0;
+       prev_entry = NULL;
+
+       /* fourth pass - create proper memseg cache */
+       for (i = 0; i < RTE_DIM(pages) &&
+                       ms_iter <= RTE_DIM(config->memseg_cache); i++) {
+               if (pages[i].addr_64 == 0)
+                       break;
+
+
+               if (ms_iter == RTE_DIM(pages)) {
+                       RTE_LOG(ERR, EAL, "The universe has collapsed!\n");
+                       goto fail;
+               }
+
+               c_entry = &ms_local[ms_iter];
+               c_entry->len = pages[i].len;
+
+               if (get_hugefile_by_virt_addr(pages[i].addr_64, c_entry) < 0)
+                       goto fail;
+
+               /* if previous entry has the same filename and is contiguous,
+                * clear current entry and increase previous entry's length
+                */
+               if (prev_entry != NULL &&
+                               strncmp(c_entry->filepath, prev_entry->filepath,
+                               sizeof(c_entry->filepath)) == 0 &&
+                               prev_entry->offset + prev_entry->len == c_entry->offset) {
+                       prev_entry->len += pages[i].len;
+                       memset(c_entry, 0, sizeof(struct memseg_cache_entry));
+               }
+               else {
+                       prev_entry = c_entry;
+                       ms_iter++;
+               }
+       }
+
+       /* update current configuration with new valid data */
+       memcpy(config->metadata->entry, e_local, sizeof(config->metadata->entry));
+       memcpy(config->memseg_cache, ms_local, sizeof(config->memseg_cache));
+
+       free(ms_local);
+       free(e_local);
+
+       return 0;
+fail:
+       free(ms_local);
+fail_ms:
+       free(e_local);
+fail_e:
+       return -1;
+}
+
+static int
+add_memzone_to_metadata(const struct rte_memzone * mz,
+               struct ivshmem_config * config)
+{
+       struct rte_ivshmem_metadata_entry * entry;
+       unsigned i;
+
+       rte_spinlock_lock(&config->sl);
+
+       /* find free slot in this config */
+       for (i = 0; i < RTE_DIM(config->metadata->entry); i++) {
+               entry = &config->metadata->entry[i];
+
+               if (&entry->mz.addr_64 != 0 && overlap(mz, &entry->mz)) {
+                       RTE_LOG(ERR, EAL, "Overlapping memzones!\n");
+                       goto fail;
+               }
+
+               /* if addr is zero, the memzone is probably free */
+               if (entry->mz.addr_64 == 0) {
+                       RTE_LOG(DEBUG, EAL, "Adding memzone '%s' at %p to metadata %s\n",
+                                       mz->name, mz->addr, config->metadata->name);
+                       memcpy(&entry->mz, mz, sizeof(struct rte_memzone));
+
+                       /* run config file parser */
+                       if (build_config(config->metadata) < 0)
+                               goto fail;
+
+                       break;
+               }
+       }
+
+       /* if we reached the maximum, that means we have no place in config */
+       if (i == RTE_DIM(config->metadata->entry)) {
+               RTE_LOG(ERR, EAL, "No space left in IVSHMEM metadata %s!\n",
+                               config->metadata->name);
+               goto fail;
+       }
+
+       rte_spinlock_unlock(&config->sl);
+       return 0;
+fail:
+       rte_spinlock_unlock(&config->sl);
+       return -1;
+}
+
+static int
+add_ring_to_metadata(const struct rte_ring * r,
+               struct ivshmem_config * config)
+{
+       struct rte_memzone * mz;
+
+       mz = get_memzone_by_addr(r);
+
+       if (!mz) {
+               RTE_LOG(ERR, EAL, "Cannot find memzone for ring!\n");
+               return -1;
+       }
+
+       return add_memzone_to_metadata(mz, config);
+}
+
+static int
+add_mempool_to_metadata(const struct rte_mempool * mp,
+               struct ivshmem_config * config)
+{
+       struct rte_memzone * mz;
+       int ret;
+
+       mz = get_memzone_by_addr(mp);
+       ret = 0;
+
+       if (!mz) {
+               RTE_LOG(ERR, EAL, "Cannot find memzone for mempool!\n");
+               return -1;
+       }
+
+       /* mempool consists of memzone and ring */
+       ret = add_memzone_to_metadata(mz, config);
+       if (ret < 0)
+               return -1;
+
+       return add_ring_to_metadata(mp->ring, config);
+}
+
+int
+rte_ivshmem_metadata_add_ring(const struct rte_ring * r, const char * name)
+{
+       struct ivshmem_config * config;
+
+       if (name == NULL || r == NULL)
+               return -1;
+
+       config = get_config_by_name(name);
+
+       if (config == NULL) {
+               RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
+               return -1;
+       }
+
+       return add_ring_to_metadata(r, config);
+}
+
+int
+rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz, const char * name)
+{
+       struct ivshmem_config * config;
+
+       if (name == NULL || mz == NULL)
+               return -1;
+
+       config = get_config_by_name(name);
+
+       if (config == NULL) {
+               RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
+               return -1;
+       }
+
+       return add_memzone_to_metadata(mz, config);
+}
+
+int
+rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp, const char * name)
+{
+       struct ivshmem_config * config;
+
+       if (name == NULL || mp == NULL)
+               return -1;
+
+       config = get_config_by_name(name);
+
+       if (config == NULL) {
+               RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
+               return -1;
+       }
+
+       return add_mempool_to_metadata(mp, config);
+}
+
+static inline void
+ivshmem_config_path(char *buffer, size_t bufflen, const char *name)
+{
+       rte_snprintf(buffer, bufflen, IVSHMEM_CONFIG_FILE_FMT, name);
+}
+
+
+
+static inline
+void *ivshmem_metadata_create(const char *name, size_t size,
+               struct flock *lock)
+{
+       int retval, fd;
+       void *metadata_addr;
+       char pathname[PATH_MAX];
+
+       ivshmem_config_path(pathname, sizeof(pathname), name);
+
+       fd = open(pathname, O_RDWR | O_CREAT, 0660);
+       if (fd < 0) {
+               RTE_LOG(ERR, EAL, "Cannot open '%s'\n", pathname);
+               return NULL;
+       }
+
+       size = METADATA_SIZE_ALIGNED;
+
+       retval = fcntl(fd, F_SETLK, lock);
+       if (retval < 0){
+               close(fd);
+               RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another "
+                               "process using it?\n", pathname);
+               return NULL;
+       }
+
+       retval = ftruncate(fd, size);
+       if (retval < 0){
+               close(fd);
+               RTE_LOG(ERR, EAL, "Cannot resize '%s'\n", pathname);
+               return NULL;
+       }
+
+       metadata_addr = mmap(NULL, size,
+                               PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+       if (metadata_addr == MAP_FAILED){
+               RTE_LOG(ERR, EAL, "Cannot mmap memory for '%s'\n", pathname);
+
+               /* we don't care if we can't unlock */
+               fcntl(fd, F_UNLCK, lock);
+               close(fd);
+
+               return NULL;
+       }
+
+       return metadata_addr;
+}
+
+int rte_ivshmem_metadata_create(const char *name)
+{
+       struct ivshmem_config * ivshmem_config;
+       unsigned index;
+
+       if (pagesz == 0)
+               pagesz = getpagesize();
+
+       if (name == NULL)
+               return -1;
+
+       rte_spinlock_lock(&global_cfg_sl);
+
+       for (index = 0; index < RTE_DIM(ivshmem_global_config); index++) {
+               if (ivshmem_global_config[index].metadata == NULL) {
+                       ivshmem_config = &ivshmem_global_config[index];
+                       break;
+               }
+       }
+
+       if (index == RTE_DIM(ivshmem_global_config)) {
+               RTE_LOG(ERR, EAL, "Cannot create more ivshmem config files. "
+               "Maximum has been reached\n");
+               rte_spinlock_unlock(&global_cfg_sl);
+               return -1;
+       }
+
+       ivshmem_config->lock.l_type = F_WRLCK;
+       ivshmem_config->lock.l_whence = SEEK_SET;
+
+       ivshmem_config->lock.l_start = 0;
+       ivshmem_config->lock.l_len = METADATA_SIZE_ALIGNED;
+
+       ivshmem_global_config[index].metadata = ((struct rte_ivshmem_metadata *)
+                       ivshmem_metadata_create(
+                                       name,
+                                       sizeof(struct rte_ivshmem_metadata),
+                                       &ivshmem_config->lock));
+
+       if (ivshmem_global_config[index].metadata == NULL) {
+               rte_spinlock_unlock(&global_cfg_sl);
+               return -1;
+       }
+
+       /* Metadata setup */
+       memset(ivshmem_config->metadata, 0, sizeof(struct rte_ivshmem_metadata));
+       ivshmem_config->metadata->magic_number = IVSHMEM_MAGIC;
+       rte_snprintf(ivshmem_config->metadata->name,
+                       sizeof(ivshmem_config->metadata->name), "%s", name);
+
+       rte_spinlock_unlock(&global_cfg_sl);
+
+       return 0;
+}
+
+int
+rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, const char *name)
+{
+       const struct memseg_cache_entry * ms_cache, *entry;
+       struct ivshmem_config * config;
+       char cmdline[IVSHMEM_QEMU_CMDLINE_BUFSIZE], *cmdline_ptr;
+       char cfg_file_path[PATH_MAX];
+       unsigned remaining_len, tmplen, iter;
+       uint64_t shared_mem_size, zero_size, total_size;
+
+       if (buffer == NULL || name == NULL)
+               return -1;
+
+       config = get_config_by_name(name);
+
+       if (config == NULL) {
+               RTE_LOG(ERR, EAL, "Config %s not found!\n", name);
+               return -1;
+       }
+
+       rte_spinlock_lock(&config->sl);
+
+       /* prepare metadata file path */
+       rte_snprintf(cfg_file_path, sizeof(cfg_file_path), IVSHMEM_CONFIG_FILE_FMT,
+                       config->metadata->name);
+
+       ms_cache = config->memseg_cache;
+
+       cmdline_ptr = cmdline;
+       remaining_len = sizeof(cmdline);
+
+       shared_mem_size = 0;
+       iter = 0;
+
+       while ((ms_cache[iter].len != 0) && (iter < RTE_DIM(config->metadata->entry))) {
+
+               entry = &ms_cache[iter];
+
+               /* Offset and sizes within the current pathname */
+               tmplen = rte_snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
+                               entry->filepath, entry->offset, entry->len);
+
+               shared_mem_size += entry->len;
+
+               cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
+               remaining_len -= tmplen;
+
+               if (remaining_len == 0) {
+                       RTE_LOG(ERR, EAL, "Command line too long!\n");
+                       rte_spinlock_unlock(&config->sl);
+                       return -1;
+               }
+
+               iter++;
+       }
+
+       total_size = rte_align64pow2(shared_mem_size + METADATA_SIZE_ALIGNED);
+       zero_size = total_size - shared_mem_size - METADATA_SIZE_ALIGNED;
+
+       /* add /dev/zero to command-line to fill the space */
+       tmplen = rte_snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
+                       "/dev/zero",
+                       0x0,
+                       zero_size);
+
+       cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
+       remaining_len -= tmplen;
+
+       if (remaining_len == 0) {
+               RTE_LOG(ERR, EAL, "Command line too long!\n");
+               rte_spinlock_unlock(&config->sl);
+               return -1;
+       }
+
+       /* add metadata file to the end of command-line */
+       tmplen = rte_snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT,
+                       cfg_file_path,
+                       0x0,
+                       METADATA_SIZE_ALIGNED);
+
+       cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen);
+       remaining_len -= tmplen;
+
+       if (remaining_len == 0) {
+               RTE_LOG(ERR, EAL, "Command line too long!\n");
+               rte_spinlock_unlock(&config->sl);
+               return -1;
+       }
+
+       /* if current length of the command line is bigger than the buffer supplied
+        * by the user, or if command-line is bigger than what IVSHMEM accepts */
+       if ((sizeof(cmdline) - remaining_len) > size) {
+               RTE_LOG(ERR, EAL, "Buffer is too short!\n");
+               rte_spinlock_unlock(&config->sl);
+               return -1;
+       }
+       /* complete the command-line */
+       rte_snprintf(buffer, size,
+                       IVSHMEM_QEMU_CMD_LINE_HEADER_FMT,
+                       total_size >> 20,
+                       cmdline);
+
+       rte_spinlock_unlock(&config->sl);
+
+       return 0;
+}
+
+
+void
+rte_ivshmem_metadata_dump(const char *name)
+{
+       unsigned i = 0;
+       struct ivshmem_config * config;
+       struct rte_ivshmem_metadata_entry *entry;
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+       uint64_t addr;
+       uint64_t end, hugepage_sz;
+       struct memseg_cache_entry e;
+#endif
+
+       if (name == NULL)
+               return;
+
+       /* return error if we try to use an unknown config file */
+       config = get_config_by_name(name);
+       if (config == NULL) {
+               RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name);
+               return;
+       }
+
+       rte_spinlock_lock(&config->sl);
+
+       entry = &config->metadata->entry[0];
+
+       while (entry->mz.addr != NULL && i < RTE_DIM(config->metadata->entry)) {
+
+               printf("Entry %u: name:<%-20s>, phys:0x%-15lx, len:0x%-15lx, "
+                       "virt:%-15p, off:0x%-15lx\n",
+                       i,
+                       entry->mz.name,
+                       entry->mz.phys_addr,
+                       entry->mz.len,
+                       entry->mz.addr,
+                       entry->offset);
+               i++;
+
+#ifdef RTE_LIBRTE_IVSHMEM_DEBUG
+               printf("\tHugepage files:\n");
+
+               hugepage_sz = entry->mz.hugepage_sz;
+               addr = RTE_ALIGN_FLOOR(entry->mz.addr_64, hugepage_sz);
+               end = addr + RTE_ALIGN_CEIL(entry->mz.len + (entry->mz.addr_64 - addr),
+                               hugepage_sz);
+
+               for (; addr < end; addr += hugepage_sz) {
+                       memset(&e, 0, sizeof(e));
+
+                       get_hugefile_by_virt_addr(addr, &e);
+
+                       printf("\t0x%"PRIx64 "-0x%" PRIx64 " offset: 0x%" PRIx64 " %s\n",
+                                       addr, addr + hugepage_sz, e.offset, e.filepath);
+               }
+#endif
+               entry++;
+       }
+
+       rte_spinlock_unlock(&config->sl);
+}
diff --git a/lib/librte_ivshmem/rte_ivshmem.h b/lib/librte_ivshmem/rte_ivshmem.h
new file mode 100644 (file)
index 0000000..9ff54bb
--- /dev/null
@@ -0,0 +1,163 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_IVSHMEM_H_
+#define RTE_IVSHMEM_H_
+
+#include <rte_memzone.h>
+#include <rte_mempool.h>
+
+/**
+ * @file
+ *
+ * The RTE IVSHMEM interface provides functions to create metadata files
+ * describing memory segments to be shared via QEMU IVSHMEM.
+ */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define IVSHMEM_MAGIC          0x0BADC0DE
+#define IVSHMEM_NAME_LEN       32
+
+/**
+ * Structure that holds IVSHMEM shared metadata entry.
+ */
+struct rte_ivshmem_metadata_entry {
+       struct rte_memzone mz;  /**< shared memzone */
+       uint64_t offset;        /**< offset of memzone within IVSHMEM device */
+};
+
+/**
+ * Structure that holds IVSHMEM metadata.
+ */
+struct rte_ivshmem_metadata {
+       int magic_number;                               /**< magic number */
+       char name[IVSHMEM_NAME_LEN];    /**< name of the metadata file */
+       struct rte_ivshmem_metadata_entry entry[RTE_LIBRTE_IVSHMEM_MAX_ENTRIES];
+                       /**< metadata entries */
+};
+
+/**
+ * Creates metadata file with a given name
+ *
+ * @param name
+ *  Name of metadata file to be created
+ *
+ * @return
+ *  - On success, zero
+ *  - On failure, a negative value
+ */
+int rte_ivshmem_metadata_create(const char * name);
+
+/**
+ * Adds memzone to a specific metadata file
+ *
+ * @param mz
+ *  Memzone to be added
+ * @param md_name
+ *  Name of metadata file for the memzone to be added to
+ *
+ * @return
+ *  - On success, zero
+ *  - On failure, a negative value
+ */
+int rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz,
+               const char * md_name);
+
+/**
+ * Adds a ring descriptor to a specific metadata file
+ *
+ * @param r
+ *  Ring descriptor to be added
+ * @param md_name
+ *  Name of metadata file for the ring to be added to
+ *
+ * @return
+ *  - On success, zero
+ *  - On failure, a negative value
+ */
+int rte_ivshmem_metadata_add_ring(const struct rte_ring * r,
+               const char * md_name);
+
+/**
+ * Adds a mempool to a specific metadata file
+ *
+ * @param mp
+ *  Mempool to be added
+ * @param md_name
+ *  Name of metadata file for the mempool to be added to
+ *
+ * @return
+ *  - On success, zero
+ *  - On failure, a negative value
+ */
+int rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp,
+               const char * md_name);
+
+
+/**
+ * Generates the QEMU command-line for IVSHMEM device for a given metadata file.
+ * This function is to be called after all the objects were added.
+ *
+ * @param buffer
+ *  Buffer to be filled with the command line arguments.
+ * @param size
+ *  Size of the buffer.
+ * @param name
+ *  Name of metadata file to generate QEMU command-line parameters for
+ *
+ * @return
+ *  - On success, zero
+ *  - On failure, a negative value
+ */
+int rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size,
+               const char *name);
+
+
+/**
+ * Dump all metadata entries from a given metadata file to the console.
+ *
+ * @name
+ *  Name of the metadata file to be dumped to console.
+ */
+void rte_ivshmem_metadata_dump(const char *name);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_IVSHMEM_H_ */
index 89b1c3b..a974dc8 100644 (file)
@@ -64,6 +64,12 @@ LDLIBS += -lrte_kni
 endif
 endif
 
+ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
+LDLIBS += -lrte_ivshmem
+endif
+endif
+
 ifeq ($(CONFIG_RTE_LIBRTE_E1000_PMD),y)
 LDLIBS += -lrte_pmd_e1000
 endif