From: Bruce Richardson Date: Tue, 11 Feb 2014 10:28:51 +0000 (+0000) Subject: ivshmem: library changes for mmaping using ivshmem X-Git-Tag: spdx-start~11032 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=40b966a211ab71e96b8e155d9058f224e7b5bbf6;p=dpdk.git ivshmem: library changes for mmaping using ivshmem These library changes provide a new Intel DPDK feature for communicating with virtual machines using QEMU's IVSHMEM mechanism. The feature works by providing a command line for QEMU to map several hugepages into a single IVSHMEM device. For the guest to know what is inside any given IVSHMEM device (and to distinguish between Intel(R) DPDK and non-Intel(R) DPDK IVSHMEM devices), a metadata file is also mapped into the IVSHMEM segment. No work needs to be done by the guest application to map IVSHMEM devices into memory; they are automatically recognized by the Intel(R) DPDK Environment Abstraction Layer (EAL). Changes in this patch: * Changes to EAL to allow mapping of all hugepages in a memseg into a single file * Changes to EAL to allow ivshmem devices to be transparently mapped in the process running on the guest. * New ivshmem library to create and manage metadata exported to guest VM's * New ivshmem compilation targets * Mempool and ring changes to allow export of structures to a VM and allow a VM to attach to those structures. * New autotests to unit tests this functionality. Signed-off-by: Bruce Richardson --- diff --git a/app/test/Makefile b/app/test/Makefile index 39fa163dec..c065a4ccf5 100644 --- a/app/test/Makefile +++ b/app/test/Makefile @@ -92,6 +92,7 @@ SRCS-$(CONFIG_RTE_APP_TEST) += test_kni.c SRCS-$(CONFIG_RTE_APP_TEST) += test_power.c SRCS-$(CONFIG_RTE_APP_TEST) += test_common.c SRCS-$(CONFIG_RTE_APP_TEST) += test_timer_perf.c +SRCS-$(CONFIG_RTE_APP_TEST) += test_ivshmem.c ifeq ($(CONFIG_RTE_APP_TEST),y) SRCS-$(CONFIG_RTE_LIBRTE_ACL) += test_acl.c @@ -107,6 +108,7 @@ CFLAGS_test_kni.o += -wd1478 else CFLAGS_test_kni.o += -Wno-deprecated-declarations endif +CFLAGS += -D_GNU_SOURCE # this application needs libraries first DEPDIRS-$(CONFIG_RTE_APP_TEST) += lib diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py index bdb7e941a7..1161a9281c 100644 --- a/app/test/autotest_data.py +++ b/app/test/autotest_data.py @@ -174,6 +174,12 @@ parallel_test_group_list = [ "Func" : default_autotest, "Report" : None, }, + { + "Name" : "IVSHMEM autotest", + "Command" : "ivshmem_autotest", + "Func" : default_autotest, + "Report" : None, + }, { "Name" : "Memcpy autotest", "Command" : "memcpy_autotest", diff --git a/app/test/commands.c b/app/test/commands.c index 118f70d458..f09bc90754 100644 --- a/app/test/commands.c +++ b/app/test/commands.c @@ -184,6 +184,8 @@ static void cmd_autotest_parsed(void *parsed_result, ret |= test_power(); if (all || !strcmp(res->autotest, "common_autotest")) ret |= test_common(); + if (all || !strcmp(res->autotest, "ivshmem_autotest")) + ret = test_ivshmem(); #ifdef RTE_LIBRTE_PMD_RING if (all || !strcmp(res->autotest, "ring_pmd_autotest")) ret |= test_pmd_ring(); @@ -224,7 +226,7 @@ cmdline_parse_token_string_t cmd_autotest_autotest = "memcpy_perf_autotest#ring_perf_autotest#" "red_autotest#meter_autotest#sched_autotest#" "memcpy_perf_autotest#kni_autotest#" - "pm_autotest#" + "pm_autotest#ivshmem_autotest#" #ifdef RTE_LIBRTE_ACL "acl_autotest#" #endif diff --git a/app/test/test.c b/app/test/test.c index c87e0df3f8..3a7999bb10 100644 --- a/app/test/test.c +++ b/app/test/test.c @@ -86,6 +86,7 @@ do_recursive_call(void) { "test_memory_flags", no_action }, { "test_file_prefix", no_action }, { "test_no_huge_flag", no_action }, + { "test_ivshmem", test_ivshmem }, }; if (recursive_call == NULL) diff --git a/app/test/test.h b/app/test/test.h index 71d87d10e8..adc6212409 100644 --- a/app/test/test.h +++ b/app/test/test.h @@ -95,6 +95,7 @@ int test_kni(void); int test_power(void); int test_common(void); int test_pmd_ring(void); +int test_ivshmem(void); int test_pci_run; diff --git a/app/test/test_ivshmem.c b/app/test/test_ivshmem.c new file mode 100644 index 0000000000..52f82779c3 --- /dev/null +++ b/app/test/test_ivshmem.c @@ -0,0 +1,441 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "test.h" + +#ifdef RTE_LIBRTE_IVSHMEM + +#include +#include +#include +#include "process.h" + +#define DUPLICATE_METADATA "duplicate" +#define METADATA_NAME "metadata" +#define NONEXISTENT_METADATA "nonexistent" +#define FIRST_TEST 'a' + +#define launch_proc(ARGV) process_dup(ARGV, \ + sizeof(ARGV)/(sizeof(ARGV[0])), "test_ivshmem") + +#define ASSERT(cond,msg) do { \ + if (!(cond)) { \ + printf("**** TEST %s() failed: %s\n", \ + __func__, msg); \ + return -1; \ + } \ +} while(0) + +static char* +get_current_prefix(char * prefix, int size) +{ + char path[PATH_MAX] = {0}; + char buf[PATH_MAX] = {0}; + + /* get file for config (fd is always 3) */ + rte_snprintf(path, sizeof(path), "/proc/self/fd/%d", 3); + + /* return NULL on error */ + if (readlink(path, buf, sizeof(buf)) == -1) + return NULL; + + /* get the basename */ + rte_snprintf(buf, sizeof(buf), "%s", basename(buf)); + + /* copy string all the way from second char up to start of _config */ + rte_snprintf(prefix, size, "%.*s", + strnlen(buf, sizeof(buf)) - sizeof("_config"), &buf[1]); + + return prefix; +} + +static struct rte_ivshmem_metadata* +mmap_metadata(const char *name) +{ + int fd; + char pathname[PATH_MAX]; + struct rte_ivshmem_metadata *metadata; + + rte_snprintf(pathname, sizeof(pathname), + "/var/run/.dpdk_ivshmem_metadata_%s", name); + + fd = open(pathname, O_RDWR, 0660); + if (fd < 0) + return NULL; + + metadata = (struct rte_ivshmem_metadata*) mmap(NULL, + sizeof(struct rte_ivshmem_metadata), PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + + if (metadata == MAP_FAILED) + return NULL; + + close(fd); + + return metadata; +} + +static int +create_duplicate(void) +{ + /* create a metadata that another process will then try to overwrite */ + ASSERT (rte_ivshmem_metadata_create(DUPLICATE_METADATA) == 0, + "Creating metadata failed"); + return 0; +} + +static int +test_ivshmem_create_lots_of_memzones(void) +{ + int i; + char name[IVSHMEM_NAME_LEN]; + const struct rte_memzone *mz; + + ASSERT(rte_ivshmem_metadata_create(METADATA_NAME) == 0, + "Failed to create metadata"); + + for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES; i++) { + rte_snprintf(name, sizeof(name), "mz_%i", i); + + mz = rte_memzone_reserve(name, CACHE_LINE_SIZE, SOCKET_ID_ANY, 0); + ASSERT(mz != NULL, "Failed to reserve memzone"); + + ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) == 0, + "Failed to add memzone"); + } + mz = rte_memzone_reserve("one too many", CACHE_LINE_SIZE, SOCKET_ID_ANY, 0); + ASSERT(mz != NULL, "Failed to reserve memzone"); + + ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) < 0, + "Metadata should have been full"); + + return 0; +} + +static int +test_ivshmem_create_duplicate_memzone(void) +{ + const struct rte_memzone *mz; + + ASSERT(rte_ivshmem_metadata_create(METADATA_NAME) == 0, + "Failed to create metadata"); + + mz = rte_memzone_reserve("mz", CACHE_LINE_SIZE, SOCKET_ID_ANY, 0); + ASSERT(mz != NULL, "Failed to reserve memzone"); + + ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) == 0, + "Failed to add memzone"); + + ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) < 0, + "Added the same memzone twice"); + + return 0; +} + +static int +test_ivshmem_api_test(void) +{ + const struct rte_memzone * mz; + struct rte_mempool * mp; + struct rte_ring * r; + char buf[BUFSIZ]; + + memset(buf, 0, sizeof(buf)); + + r = rte_ring_create("ring", 1, SOCKET_ID_ANY, 0); + mp = rte_mempool_create("mempool", 1, 1, 1, 1, NULL, NULL, NULL, NULL, + SOCKET_ID_ANY, 0); + mz = rte_memzone_reserve("memzone", 64, SOCKET_ID_ANY, 0); + + ASSERT(r != NULL, "Failed to create ring"); + ASSERT(mp != NULL, "Failed to create mempool"); + ASSERT(mz != NULL, "Failed to reserve memzone"); + + /* try to create NULL metadata */ + ASSERT(rte_ivshmem_metadata_create(NULL) < 0, + "Created metadata with NULL name"); + + /* create valid metadata to do tests on */ + ASSERT(rte_ivshmem_metadata_create(METADATA_NAME) == 0, + "Failed to create metadata"); + + /* test adding memzone */ + ASSERT(rte_ivshmem_metadata_add_memzone(NULL, NULL) < 0, + "Added NULL memzone to NULL metadata"); + ASSERT(rte_ivshmem_metadata_add_memzone(NULL, METADATA_NAME) < 0, + "Added NULL memzone"); + ASSERT(rte_ivshmem_metadata_add_memzone(mz, NULL) < 0, + "Added memzone to NULL metadata"); + ASSERT(rte_ivshmem_metadata_add_memzone(mz, NONEXISTENT_METADATA) < 0, + "Added memzone to nonexistent metadata"); + + /* test adding ring */ + ASSERT(rte_ivshmem_metadata_add_ring(NULL, NULL) < 0, + "Added NULL ring to NULL metadata"); + ASSERT(rte_ivshmem_metadata_add_ring(NULL, METADATA_NAME) < 0, + "Added NULL ring"); + ASSERT(rte_ivshmem_metadata_add_ring(r, NULL) < 0, + "Added ring to NULL metadata"); + ASSERT(rte_ivshmem_metadata_add_ring(r, NONEXISTENT_METADATA) < 0, + "Added ring to nonexistent metadata"); + + /* test adding mempool */ + ASSERT(rte_ivshmem_metadata_add_mempool(NULL, NULL) < 0, + "Added NULL mempool to NULL metadata"); + ASSERT(rte_ivshmem_metadata_add_mempool(NULL, METADATA_NAME) < 0, + "Added NULL mempool"); + ASSERT(rte_ivshmem_metadata_add_mempool(mp, NULL) < 0, + "Added mempool to NULL metadata"); + ASSERT(rte_ivshmem_metadata_add_mempool(mp, NONEXISTENT_METADATA) < 0, + "Added mempool to nonexistent metadata"); + + /* test creating command line */ + ASSERT(rte_ivshmem_metadata_cmdline_generate(NULL, sizeof(buf), METADATA_NAME) < 0, + "Written command line into NULL buffer"); + ASSERT(strnlen(buf, sizeof(buf)) == 0, "Buffer is not empty"); + + ASSERT(rte_ivshmem_metadata_cmdline_generate(buf, 0, METADATA_NAME) < 0, + "Written command line into small buffer"); + ASSERT(strnlen(buf, sizeof(buf)) == 0, "Buffer is not empty"); + + ASSERT(rte_ivshmem_metadata_cmdline_generate(buf, sizeof(buf), NULL) < 0, + "Written command line for NULL metadata"); + ASSERT(strnlen(buf, sizeof(buf)) == 0, "Buffer is not empty"); + + ASSERT(rte_ivshmem_metadata_cmdline_generate(buf, sizeof(buf), + NONEXISTENT_METADATA) < 0, + "Writen command line for nonexistent metadata"); + ASSERT(strnlen(buf, sizeof(buf)) == 0, "Buffer is not empty"); + + /* add stuff to config */ + ASSERT(rte_ivshmem_metadata_add_memzone(mz, METADATA_NAME) == 0, + "Failed to add memzone to valid config"); + ASSERT(rte_ivshmem_metadata_add_ring(r, METADATA_NAME) == 0, + "Failed to add ring to valid config"); + ASSERT(rte_ivshmem_metadata_add_mempool(mp, METADATA_NAME) == 0, + "Failed to add mempool to valid config"); + + /* create config */ + ASSERT(rte_ivshmem_metadata_cmdline_generate(buf, sizeof(buf), + METADATA_NAME) == 0, "Failed to write command-line"); + + /* check if something was written */ + ASSERT(strnlen(buf, sizeof(buf)) != 0, "Buffer is empty"); + + /* make sure we don't segfault */ + rte_ivshmem_metadata_dump(NULL); + + /* dump our metadata */ + rte_ivshmem_metadata_dump(METADATA_NAME); + + return 0; +} + +static int +test_ivshmem_create_duplicate_metadata(void) +{ + ASSERT(rte_ivshmem_metadata_create(DUPLICATE_METADATA) < 0, + "Creating duplicate metadata should have failed"); + + return 0; +} + +static int +test_ivshmem_create_metadata_config(void) +{ + struct rte_ivshmem_metadata *metadata; + + rte_ivshmem_metadata_create(METADATA_NAME); + + metadata = mmap_metadata(METADATA_NAME); + + ASSERT(metadata != MAP_FAILED, "Metadata mmaping failed"); + + ASSERT(metadata->magic_number == IVSHMEM_MAGIC, + "Magic number is not that magic"); + + ASSERT(strncmp(metadata->name, METADATA_NAME, sizeof(metadata->name)) == 0, + "Name has not been set up"); + + ASSERT(metadata->entry[0].offset == 0, "Offest is not initialized"); + ASSERT(metadata->entry[0].mz.addr == 0, "mz.addr is not initialized"); + ASSERT(metadata->entry[0].mz.len == 0, "mz.len is not initialized"); + + return 0; +} + +static int +test_ivshmem_create_multiple_metadata_configs(void) +{ + int i; + char name[IVSHMEM_NAME_LEN]; + struct rte_ivshmem_metadata *metadata; + + for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES / 2; i++) { + rte_snprintf(name, sizeof(name), "test_%d", i); + rte_ivshmem_metadata_create(name); + metadata = mmap_metadata(name); + + ASSERT(metadata->magic_number == IVSHMEM_MAGIC, + "Magic number is not that magic"); + + ASSERT(strncmp(metadata->name, name, sizeof(metadata->name)) == 0, + "Name has not been set up"); + } + + return 0; +} + +static int +test_ivshmem_create_too_many_metadata_configs(void) +{ + int i; + char name[IVSHMEM_NAME_LEN]; + + for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES; i++) { + rte_snprintf(name, sizeof(name), "test_%d", i); + ASSERT(rte_ivshmem_metadata_create(name) == 0, + "Create config file failed"); + } + + ASSERT(rte_ivshmem_metadata_create(name) < 0, + "Create config file didn't fail"); + + return 0; +} + +enum rte_ivshmem_tests { + _test_ivshmem_api_test = 0, + _test_ivshmem_create_metadata_config, + _test_ivshmem_create_multiple_metadata_configs, + _test_ivshmem_create_too_many_metadata_configs, + _test_ivshmem_create_duplicate_metadata, + _test_ivshmem_create_lots_of_memzones, + _test_ivshmem_create_duplicate_memzone, + _last_test, +}; + +#define RTE_IVSHMEM_TEST_ID "RTE_IVSHMEM_TEST_ID" + +static int +launch_all_tests_on_secondary_processes(void) +{ + int ret = 0; + char id; + char testid; + char tmp[PATH_MAX] = {0}; + char prefix[PATH_MAX] = {0}; + + get_current_prefix(tmp, sizeof(tmp)); + + rte_snprintf(prefix, sizeof(prefix), "--file-prefix=%s", tmp); + + const char *argv[] = { prgname, "-c", "1", "-n", "3", + "--proc-type=secondary", prefix }; + + for (id = 0; id < _last_test; id++) { + testid = (char)(FIRST_TEST + id); + setenv(RTE_IVSHMEM_TEST_ID, &testid, 1); + if (launch_proc(argv) != 0) + return -1; + } + return ret; +} + +int +test_ivshmem(void) +{ + int testid; + + /* We want to have a clean execution for every test without exposing + * private global data structures in rte_ivshmem so we launch each test + * on a different secondary process. */ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + + /* first, create metadata */ + ASSERT(create_duplicate() == 0, "Creating metadata failed"); + + return launch_all_tests_on_secondary_processes(); + } + + testid = *(getenv(RTE_IVSHMEM_TEST_ID)) - FIRST_TEST; + + printf("Secondary process running test %d \n", testid); + + switch (testid) { + case _test_ivshmem_api_test: + return test_ivshmem_api_test(); + + case _test_ivshmem_create_metadata_config: + return test_ivshmem_create_metadata_config(); + + case _test_ivshmem_create_multiple_metadata_configs: + return test_ivshmem_create_multiple_metadata_configs(); + + case _test_ivshmem_create_too_many_metadata_configs: + return test_ivshmem_create_too_many_metadata_configs(); + + case _test_ivshmem_create_duplicate_metadata: + return test_ivshmem_create_duplicate_metadata(); + + case _test_ivshmem_create_lots_of_memzones: + return test_ivshmem_create_lots_of_memzones(); + + case _test_ivshmem_create_duplicate_memzone: + return test_ivshmem_create_duplicate_memzone(); + + default: + break; + } + + return -1; +} +#else /* RTE_LIBRTE_IVSHMEM */ + +int +test_ivshmem(void) +{ + printf("This binary was not compiled with IVSHMEM support!\n"); + return 0; +} +#endif /* RTE_LIBRTE_IVSHMEM */ diff --git a/config/defconfig_x86_64-ivshmem-linuxapp-gcc b/config/defconfig_x86_64-ivshmem-linuxapp-gcc new file mode 100644 index 0000000000..2f55a6990b --- /dev/null +++ b/config/defconfig_x86_64-ivshmem-linuxapp-gcc @@ -0,0 +1,49 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# +# use default config +# + +#include "defconfig_x86_64-default-linuxapp-gcc" + +# +# Compile IVSHMEM library +# +CONFIG_RTE_LIBRTE_IVSHMEM=y +CONFIG_RTE_LIBRTE_IVSHMEM_DEBUG=n +CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS=4 +CONFIG_RTE_LIBRTE_IVSHMEM_MAX_ENTRIES=128 +CONFIG_RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES=32 + +# Set EAL to single file segments +CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS=y \ No newline at end of file diff --git a/config/defconfig_x86_64-ivshmem-linuxapp-icc b/config/defconfig_x86_64-ivshmem-linuxapp-icc new file mode 100644 index 0000000000..14f0926cb6 --- /dev/null +++ b/config/defconfig_x86_64-ivshmem-linuxapp-icc @@ -0,0 +1,49 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# +# use default config +# + +#include "defconfig_x86_64-default-linuxapp-icc" + +# +# Compile IVSHMEM library +# +CONFIG_RTE_LIBRTE_IVSHMEM=y +CONFIG_RTE_LIBRTE_IVSHMEM_DEBUG=n +CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS=4 +CONFIG_RTE_LIBRTE_IVSHMEM_MAX_ENTRIES=128 +CONFIG_RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES=32 + +# Set EAL to single file segments +CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS=y diff --git a/lib/Makefile b/lib/Makefile index fda306e3fa..c9f0111ad7 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -55,6 +55,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y) DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni +DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += librte_ivshmem endif include $(RTE_SDK)/mk/rte.sharelib.mk diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index 4d60f8c04a..a1fcdfdfd0 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -479,11 +479,17 @@ rte_eal_memzone_init(void) rte_rwlock_write_lock(&mcfg->mlock); - /* duplicate the memsegs from config */ - memcpy(free_memseg, memseg, sizeof(struct rte_memseg) * RTE_MAX_MEMSEG); + /* fill in uninitialized free_memsegs */ + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + if (memseg[i].addr == NULL) + break; + if (free_memseg[i].addr != NULL) + continue; + memcpy(&free_memseg[i], &memseg[i], sizeof(struct rte_memseg)); + } /* make all zones cache-aligned */ - for (i=0; i +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_internal_cfg.h" +#include "eal_private.h" + +#define PCI_VENDOR_ID_IVSHMEM 0x1Af4 +#define PCI_DEVICE_ID_IVSHMEM 0x1110 + +#define IVSHMEM_MAGIC 0x0BADC0DE +#define IVSHMEM_METADATA_SIZE 0x1000 + +#define IVSHMEM_RESOURCE_PATH "/sys/bus/pci/devices/%04x:%02x:%02x.%x/resource2" +#define IVSHMEM_CONFIG_PATH "/var/run/.%s_ivshmem_config" + +#define PHYS 0x1 +#define VIRT 0x2 +#define IOREMAP 0x4 +#define FULL (PHYS|VIRT|IOREMAP) + +#define METADATA_SIZE_ALIGNED \ + (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz)) + +#define CONTAINS(x,y)\ + (((y).addr_64 >= (x).addr_64) && ((y).addr_64 < (x).addr_64 + (x).len)) + +#define DIM(x) (sizeof(x)/sizeof(x[0])) + +struct ivshmem_pci_device { + char path[PATH_MAX]; + phys_addr_t ioremap_addr; +}; + +/* data type to store in config */ +struct ivshmem_segment { + struct rte_ivshmem_metadata_entry entry; + uint64_t align; + char path[PATH_MAX]; +}; +struct ivshmem_shared_config { + struct ivshmem_segment segment[RTE_MAX_MEMSEG]; + uint32_t segment_idx; + struct ivshmem_pci_device pci_devs[RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS]; + uint32_t pci_devs_idx; +}; +static struct ivshmem_shared_config * ivshmem_config; +static int memseg_idx; +static int pagesz; + +/* Tailq heads to add rings to */ +TAILQ_HEAD(rte_ring_list, rte_ring); + +/* + * Utility functions + */ + +static int +is_ivshmem_device(struct rte_pci_device * dev) +{ + return (dev->id.vendor_id == PCI_VENDOR_ID_IVSHMEM + && dev->id.device_id == PCI_DEVICE_ID_IVSHMEM); +} + +static void * +map_metadata(int fd, uint64_t len) +{ + size_t metadata_len = sizeof(struct rte_ivshmem_metadata); + size_t aligned_len = METADATA_SIZE_ALIGNED; + + return mmap(NULL, metadata_len, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, len - aligned_len); +} + +static void +unmap_metadata(void * ptr) +{ + munmap(ptr, sizeof(struct rte_ivshmem_metadata)); +} + +static int +has_ivshmem_metadata(int fd, uint64_t len) +{ + struct rte_ivshmem_metadata metadata; + void * ptr; + + ptr = map_metadata(fd, len); + + if (ptr == MAP_FAILED) + return -1; + + metadata = *(struct rte_ivshmem_metadata*) (ptr); + + unmap_metadata(ptr); + + return metadata.magic_number == IVSHMEM_MAGIC; +} + +static void +remove_segment(struct ivshmem_segment * ms, int len, int idx) +{ + int i; + + for (i = idx; i < len - 1; i++) + memcpy(&ms[i], &ms[i+1], sizeof(struct ivshmem_segment)); + memset(&ms[len-1], 0, sizeof(struct ivshmem_segment)); +} + +static int +overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2) +{ + uint64_t start1, end1, start2, end2; + uint64_t p_start1, p_end1, p_start2, p_end2; + uint64_t i_start1, i_end1, i_start2, i_end2; + int result = 0; + + /* gather virtual addresses */ + start1 = mz1->addr_64; + end1 = mz1->addr_64 + mz1->len; + start2 = mz2->addr_64; + end2 = mz2->addr_64 + mz2->len; + + /* gather physical addresses */ + p_start1 = mz1->phys_addr; + p_end1 = mz1->phys_addr + mz1->len; + p_start2 = mz2->phys_addr; + p_end2 = mz2->phys_addr + mz2->len; + + /* gather ioremap addresses */ + i_start1 = mz1->ioremap_addr; + i_end1 = mz1->ioremap_addr + mz1->len; + i_start2 = mz2->ioremap_addr; + i_end2 = mz2->ioremap_addr + mz2->len; + + /* check for overlap in virtual addresses */ + if (start1 >= start2 && start1 < end2) + result |= VIRT; + if (start2 >= start1 && start2 < end1) + result |= VIRT; + + /* check for overlap in physical addresses */ + if (p_start1 >= p_start2 && p_start1 < p_end2) + result |= PHYS; + if (p_start2 >= p_start1 && p_start2 < p_end1) + result |= PHYS; + + /* check for overlap in ioremap addresses */ + if (i_start1 >= i_start2 && i_start1 < i_end2) + result |= IOREMAP; + if (i_start2 >= i_start1 && i_start2 < i_end1) + result |= IOREMAP; + + return result; +} + +static int +adjacent(const struct rte_memzone * mz1, const struct rte_memzone * mz2) +{ + uint64_t start1, end1, start2, end2; + uint64_t p_start1, p_end1, p_start2, p_end2; + uint64_t i_start1, i_end1, i_start2, i_end2; + int result = 0; + + /* gather virtual addresses */ + start1 = mz1->addr_64; + end1 = mz1->addr_64 + mz1->len; + start2 = mz2->addr_64; + end2 = mz2->addr_64 + mz2->len; + + /* gather physical addresses */ + p_start1 = mz1->phys_addr; + p_end1 = mz1->phys_addr + mz1->len; + p_start2 = mz2->phys_addr; + p_end2 = mz2->phys_addr + mz2->len; + + /* gather ioremap addresses */ + i_start1 = mz1->ioremap_addr; + i_end1 = mz1->ioremap_addr + mz1->len; + i_start2 = mz2->ioremap_addr; + i_end2 = mz2->ioremap_addr + mz2->len; + + /* check if segments are virtually adjacent */ + if (start1 == end2) + result |= VIRT; + if (start2 == end1) + result |= VIRT; + + /* check if segments are physically adjacent */ + if (p_start1 == p_end2) + result |= PHYS; + if (p_start2 == p_end1) + result |= PHYS; + + /* check if segments are ioremap-adjacent */ + if (i_start1 == i_end2) + result |= IOREMAP; + if (i_start2 == i_end1) + result |= IOREMAP; + + return result; +} + +static int +has_adjacent_segments(struct ivshmem_segment * ms, int len) +{ + int i, j, a; + + for (i = 0; i < len; i++) + for (j = i + 1; j < len; j++) { + a = adjacent(&ms[i].entry.mz, &ms[j].entry.mz); + + /* check if segments are adjacent virtually and/or physically but + * not ioremap (since that would indicate that they are from + * different PCI devices and thus don't need to be concatenated. + */ + if ((a & (VIRT|PHYS)) > 0 && (a & IOREMAP) == 0) + return 1; + } + return 0; +} + +static int +has_overlapping_segments(struct ivshmem_segment * ms, int len) +{ + int i, j; + + for (i = 0; i < len; i++) + for (j = i + 1; j < len; j++) + if (overlap(&ms[i].entry.mz, &ms[j].entry.mz)) + return 1; + return 0; +} + +static int +seg_compare(const void * a, const void * b) +{ + const struct ivshmem_segment * s1 = (const struct ivshmem_segment*) a; + const struct ivshmem_segment * s2 = (const struct ivshmem_segment*) b; + + /* move unallocated zones to the end */ + if (s1->entry.mz.addr == NULL && s2->entry.mz.addr == NULL) + return 0; + if (s1->entry.mz.addr == 0) + return 1; + if (s2->entry.mz.addr == 0) + return -1; + + return s1->entry.mz.phys_addr > s2->entry.mz.phys_addr; +} + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG +static void +entry_dump(struct rte_ivshmem_metadata_entry *e) +{ + RTE_LOG(DEBUG, EAL, "\tvirt: %p-%p\n", e->mz.addr, + RTE_PTR_ADD(e->mz.addr, e->mz.len)); + RTE_LOG(DEBUG, EAL, "\tphys: 0x%" PRIx64 "-0x%" PRIx64 "\n", + e->mz.phys_addr, + e->mz.phys_addr + e->mz.len); + RTE_LOG(DEBUG, EAL, "\tio: 0x%" PRIx64 "-0x%" PRIx64 "\n", + e->mz.ioremap_addr, + e->mz.ioremap_addr + e->mz.len); + RTE_LOG(DEBUG, EAL, "\tlen: 0x%" PRIx64 "\n", e->mz.len); + RTE_LOG(DEBUG, EAL, "\toff: 0x%" PRIx64 "\n", e->offset); +} +#endif + + + +/* + * Actual useful code + */ + +/* read through metadata mapped from the IVSHMEM device */ +static int +read_metadata(char * path, int path_len, int fd, uint64_t flen) +{ + struct rte_ivshmem_metadata metadata; + struct rte_ivshmem_metadata_entry * entry; + int idx, i; + void * ptr; + + ptr = map_metadata(fd, flen); + + if (ptr == MAP_FAILED) + return -1; + + metadata = *(struct rte_ivshmem_metadata*) (ptr); + + unmap_metadata(ptr); + + RTE_LOG(DEBUG, EAL, "Parsing metadata for \"%s\"\n", metadata.name); + + idx = ivshmem_config->segment_idx; + + for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES && + idx <= RTE_MAX_MEMSEG; i++) { + + if (idx == RTE_MAX_MEMSEG) { + RTE_LOG(ERR, EAL, "Not enough memory segments!\n"); + return -1; + } + + entry = &metadata.entry[i]; + + /* stop on uninitialized memzone */ + if (entry->mz.len == 0) + break; + + /* copy metadata entry */ + memcpy(&ivshmem_config->segment[idx].entry, entry, + sizeof(struct rte_ivshmem_metadata_entry)); + + /* copy path */ + rte_snprintf(ivshmem_config->segment[idx].path, path_len, "%s", path); + + idx++; + } + ivshmem_config->segment_idx = idx; + + return 0; +} + +/* check through each segment and look for adjacent or overlapping ones. */ +static int +cleanup_segments(struct ivshmem_segment * ms, int tbl_len) +{ + struct ivshmem_segment * s, * tmp; + int i, j, concat, seg_adjacent, seg_overlapping; + uint64_t start1, start2, end1, end2, p_start1, p_start2, i_start1, i_start2; + + qsort(ms, tbl_len, sizeof(struct ivshmem_segment), + seg_compare); + + while (has_overlapping_segments(ms, tbl_len) || + has_adjacent_segments(ms, tbl_len)) { + + for (i = 0; i < tbl_len; i++) { + s = &ms[i]; + + concat = 0; + + for (j = i + 1; j < tbl_len; j++) { + tmp = &ms[j]; + + /* check if this segment is overlapping with existing segment, + * or is adjacent to existing segment */ + seg_overlapping = overlap(&s->entry.mz, &tmp->entry.mz); + seg_adjacent = adjacent(&s->entry.mz, &tmp->entry.mz); + + /* check if segments fully overlap or are fully adjacent */ + if ((seg_adjacent == FULL) || (seg_overlapping == FULL)) { + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + RTE_LOG(DEBUG, EAL, "Concatenating segments\n"); + RTE_LOG(DEBUG, EAL, "Segment %i:\n", i); + entry_dump(&s->entry); + RTE_LOG(DEBUG, EAL, "Segment %i:\n", j); + entry_dump(&tmp->entry); +#endif + + start1 = s->entry.mz.addr_64; + start2 = tmp->entry.mz.addr_64; + p_start1 = s->entry.mz.phys_addr; + p_start2 = tmp->entry.mz.phys_addr; + i_start1 = s->entry.mz.ioremap_addr; + i_start2 = tmp->entry.mz.ioremap_addr; + end1 = s->entry.mz.addr_64 + s->entry.mz.len; + end2 = tmp->entry.mz.addr_64 + tmp->entry.mz.len; + + /* settle for minimum start address and maximum length */ + s->entry.mz.addr_64 = RTE_MIN(start1, start2); + s->entry.mz.phys_addr = RTE_MIN(p_start1, p_start2); + s->entry.mz.ioremap_addr = RTE_MIN(i_start1, i_start2); + s->entry.offset = RTE_MIN(s->entry.offset, tmp->entry.offset); + s->entry.mz.len = RTE_MAX(end1, end2) - s->entry.mz.addr_64; + concat = 1; + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + RTE_LOG(DEBUG, EAL, "Resulting segment:\n"); + entry_dump(&s->entry); + +#endif + } + /* if segments not fully overlap, we have an error condition. + * adjacent segments can coexist. + */ + else if (seg_overlapping > 0) { + RTE_LOG(ERR, EAL, "Segments %i and %i overlap!\n", i, j); +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + RTE_LOG(DEBUG, EAL, "Segment %i:\n", i); + entry_dump(&s->entry); + RTE_LOG(DEBUG, EAL, "Segment %i:\n", j); + entry_dump(&tmp->entry); +#endif + return -1; + } + if (concat) + break; + } + /* if we concatenated, remove segment at j */ + if (concat) { + remove_segment(ms, tbl_len, j); + tbl_len--; + break; + } + } + } + + return tbl_len; +} + +static int +create_shared_config(void) +{ + char path[PATH_MAX]; + int fd; + + /* build ivshmem config file path */ + rte_snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH, + internal_config.hugefile_prefix); + + fd = open(path, O_CREAT | O_RDWR); + + if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open %s: %s\n", path, strerror(errno)); + return -1; + } + + /* try ex-locking first - if the file is locked, we have a problem */ + if (flock(fd, LOCK_EX | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "Locking %s failed: %s\n", path, strerror(errno)); + close(fd); + return -1; + } + + ftruncate(fd, sizeof(struct ivshmem_shared_config)); + + ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config), + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (ivshmem_config == MAP_FAILED) + return -1; + + memset(ivshmem_config, 0, sizeof(struct ivshmem_shared_config)); + + /* change the exclusive lock we got earlier to a shared lock */ + if (flock(fd, LOCK_SH | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno)); + return -1; + } + + close(fd); + + return 0; +} + +/* open shared config file and, if present, map the config. + * having no config file is not an error condition, as we later check if + * ivshmem_config is NULL (if it is, that means nothing was mapped). */ +static int +open_shared_config(void) +{ + char path[PATH_MAX]; + int fd; + + /* build ivshmem config file path */ + rte_snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH, + internal_config.hugefile_prefix); + + fd = open(path, O_RDONLY); + + /* if the file doesn't exist, just return success */ + if (fd < 0 && errno == ENOENT) + return 0; + /* else we have an error condition */ + else if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open %s: %s\n", + path, strerror(errno)); + return -1; + } + + /* try ex-locking first - if the lock *does* succeed, this means it's a + * stray config file, so it should be deleted. + */ + if (flock(fd, LOCK_EX | LOCK_NB) != -1) { + + /* if we can't remove the file, something is wrong */ + if (unlink(path) < 0) { + RTE_LOG(ERR, EAL, "Could not remove %s: %s\n", path, + strerror(errno)); + return -1; + } + + /* release the lock */ + flock(fd, LOCK_UN); + close(fd); + + /* return success as having a stray config file is equivalent to not + * having config file at all. + */ + return 0; + } + + ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config), + PROT_READ, MAP_SHARED, fd, 0); + + if (ivshmem_config == MAP_FAILED) + return -1; + + /* place a shared lock on config file */ + if (flock(fd, LOCK_SH | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno)); + return -1; + } + + close(fd); + + return 0; +} + +/* + * This function does the following: + * + * 1) Builds a table of ivshmem_segments with proper offset alignment + * 2) Cleans up that table so that we don't have any overlapping or adjacent + * memory segments + * 3) Creates memsegs from this table and maps them into memory. + */ +static inline int +map_all_segments(void) +{ + struct ivshmem_segment ms_tbl[RTE_MAX_MEMSEG]; + struct ivshmem_pci_device * pci_dev; + struct rte_mem_config * mcfg; + struct ivshmem_segment * seg; + int fd, fd_zero; + unsigned i, j; + struct rte_memzone mz; + struct rte_memseg ms; + void * base_addr; + uint64_t align, len; + phys_addr_t ioremap_addr; + + ioremap_addr = 0; + + memset(ms_tbl, 0, sizeof(ms_tbl)); + memset(&mz, 0, sizeof(struct rte_memzone)); + memset(&ms, 0, sizeof(struct rte_memseg)); + + /* first, build a table of memsegs to map, to avoid failed mmaps due to + * overlaps + */ + for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMSEG; i++) { + if (i == RTE_MAX_MEMSEG) { + RTE_LOG(ERR, EAL, "Too many segments requested!\n"); + return -1; + } + + seg = &ivshmem_config->segment[i]; + + /* copy segment to table */ + memcpy(&ms_tbl[i], seg, sizeof(struct ivshmem_segment)); + + /* find ioremap addr */ + for (j = 0; j < DIM(ivshmem_config->pci_devs); j++) { + pci_dev = &ivshmem_config->pci_devs[j]; + if (!strncmp(pci_dev->path, seg->path, sizeof(pci_dev->path))) { + ioremap_addr = pci_dev->ioremap_addr; + break; + } + } + if (ioremap_addr == 0) { + RTE_LOG(ERR, EAL, "Cannot find ioremap addr!\n"); + return -1; + } + + /* work out alignments */ + align = seg->entry.mz.addr_64 - + RTE_ALIGN_FLOOR(seg->entry.mz.addr_64, 0x1000); + len = RTE_ALIGN_CEIL(seg->entry.mz.len + align, 0x1000); + + /* save original alignments */ + ms_tbl[i].align = align; + + /* create a memory zone */ + mz.addr_64 = seg->entry.mz.addr_64 - align; + mz.len = len; + mz.hugepage_sz = seg->entry.mz.hugepage_sz; + mz.phys_addr = seg->entry.mz.phys_addr - align; + + /* find true physical address */ + mz.ioremap_addr = ioremap_addr + seg->entry.offset - align; + + ms_tbl[i].entry.offset = seg->entry.offset - align; + + memcpy(&ms_tbl[i].entry.mz, &mz, sizeof(struct rte_memzone)); + } + + /* clean up the segments */ + memseg_idx = cleanup_segments(ms_tbl, ivshmem_config->segment_idx); + + if (memseg_idx < 0) + return -1; + + mcfg = rte_eal_get_configuration()->mem_config; + + fd_zero = open("/dev/zero", O_RDWR); + + if (fd_zero < 0) { + RTE_LOG(ERR, EAL, "Cannot open /dev/zero: %s\n", strerror(errno)); + return -1; + } + + /* create memsegs and put them into DPDK memory */ + for (i = 0; i < (unsigned) memseg_idx; i++) { + + seg = &ms_tbl[i]; + + ms.addr_64 = seg->entry.mz.addr_64; + ms.hugepage_sz = seg->entry.mz.hugepage_sz; + ms.len = seg->entry.mz.len; + ms.nchannel = rte_memory_get_nchannel(); + ms.nrank = rte_memory_get_nrank(); + ms.phys_addr = seg->entry.mz.phys_addr; + ms.ioremap_addr = seg->entry.mz.ioremap_addr; + ms.socket_id = seg->entry.mz.socket_id; + + base_addr = mmap(ms.addr, ms.len, + PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_zero, 0); + + if (base_addr == MAP_FAILED || base_addr != ms.addr) { + RTE_LOG(ERR, EAL, "Cannot map /dev/zero!\n"); + return -1; + } + + fd = open(seg->path, O_RDWR); + + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", seg->path, + strerror(errno)); + return -1; + } + + munmap(ms.addr, ms.len); + + base_addr = mmap(ms.addr, ms.len, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, + seg->entry.offset); + + + if (base_addr == MAP_FAILED || base_addr != ms.addr) { + RTE_LOG(ERR, EAL, "Cannot map segment into memory: " + "expected %p got %p (%s)\n", ms.addr, base_addr, + strerror(errno)); + return -1; + } + + RTE_LOG(DEBUG, EAL, "Memory segment mapped: %p (len %" PRIx64 ") at " + "offset 0x%" PRIx64 "\n", + ms.addr, ms.len, seg->entry.offset); + + /* put the pointers back into their real positions using original + * alignment */ + ms.addr_64 += seg->align; + ms.phys_addr += seg->align; + ms.ioremap_addr += seg->align; + ms.len -= seg->align; + + /* at this point, the rest of DPDK memory is not initialized, so we + * expect memsegs to be empty */ + memcpy(&mcfg->memseg[i], &ms, + sizeof(struct rte_memseg)); + memcpy(&mcfg->free_memseg[i], &ms, + sizeof(struct rte_memseg)); + + + /* adjust the free_memseg so that there's no free space left */ + mcfg->free_memseg[i].ioremap_addr += mcfg->free_memseg[i].len; + mcfg->free_memseg[i].phys_addr += mcfg->free_memseg[i].len; + mcfg->free_memseg[i].addr_64 += mcfg->free_memseg[i].len; + mcfg->free_memseg[i].len = 0; + + close(fd); + + RTE_LOG(DEBUG, EAL, "IVSHMEM segment found, size: 0x%lx\n", + ms.len); + } + + return 0; +} + +/* this happens at a later stage, after general EAL memory initialization */ +int +rte_eal_ivshmem_obj_init(void) +{ + struct rte_ring_list* ring_list = NULL; + struct rte_mem_config * mcfg; + struct ivshmem_segment * seg; + struct rte_memzone * mz; + struct rte_ring * r; + unsigned i, ms, idx; + uint64_t offset; + + /* secondary process would not need any object discovery - it'll all + * already be in shared config */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY || ivshmem_config == NULL) + return 0; + + /* check that we have an initialised ring tail queue */ + if ((ring_list = + RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_RING, rte_ring_list)) == NULL) { + RTE_LOG(ERR, EAL, "No rte_ring tailq found!\n"); + return -1; + } + + mcfg = rte_eal_get_configuration()->mem_config; + + /* create memzones */ + for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMZONE; i++) { + + seg = &ivshmem_config->segment[i]; + + /* add memzone */ + if (mcfg->memzone_idx == RTE_MAX_MEMZONE) { + RTE_LOG(ERR, EAL, "No more memory zones available!\n"); + return -1; + } + + idx = mcfg->memzone_idx; + + RTE_LOG(DEBUG, EAL, "Found memzone: '%s' at %p (len 0x%" PRIx64 ")\n", + seg->entry.mz.name, seg->entry.mz.addr, seg->entry.mz.len); + + memcpy(&mcfg->memzone[idx], &seg->entry.mz, + sizeof(struct rte_memzone)); + + /* find ioremap address */ + for (ms = 0; ms <= RTE_MAX_MEMSEG; ms++) { + if (ms == RTE_MAX_MEMSEG) { + RTE_LOG(ERR, EAL, "Physical address of segment not found!\n"); + return -1; + } + if (CONTAINS(mcfg->memseg[ms], mcfg->memzone[idx])) { + offset = mcfg->memzone[idx].addr_64 - + mcfg->memseg[ms].addr_64; + mcfg->memzone[idx].ioremap_addr = mcfg->memseg[ms].ioremap_addr + + offset; + break; + } + } + + mcfg->memzone_idx++; + } + + /* find rings */ + for (i = 0; i < mcfg->memzone_idx; i++) { + mz = &mcfg->memzone[i]; + + /* check if memzone has a ring prefix */ + if (strncmp(mz->name, RTE_RING_MZ_PREFIX, + sizeof(RTE_RING_MZ_PREFIX) - 1) != 0) + continue; + + r = (struct rte_ring*) (mz->addr_64); + + TAILQ_INSERT_TAIL(ring_list, r, next); + + RTE_LOG(DEBUG, EAL, "Found ring: '%s' at %p\n", r->name, mz->addr); + } + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + rte_memzone_dump(); + rte_ring_list_dump(); +#endif + + return 0; +} + +/* initialize ivshmem structures */ +int rte_eal_ivshmem_init(void) +{ + struct rte_pci_device * dev; + struct rte_pci_resource * res; + int fd, ret; + char path[PATH_MAX]; + + /* initialize everything to 0 */ + memset(path, 0, sizeof(path)); + ivshmem_config = NULL; + + pagesz = getpagesize(); + + RTE_LOG(DEBUG, EAL, "Searching for IVSHMEM devices...\n"); + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + + if (open_shared_config() < 0) { + RTE_LOG(ERR, EAL, "Could not open IVSHMEM config!\n"); + return -1; + } + } + else { + + TAILQ_FOREACH(dev, &device_list, next) { + + if (is_ivshmem_device(dev)) { + + /* IVSHMEM memory is always on BAR2 */ + res = &dev->mem_resource[2]; + + /* if we don't have a BAR2 */ + if (res->len == 0) + continue; + + /* construct pci device path */ + rte_snprintf(path, sizeof(path), IVSHMEM_RESOURCE_PATH, + dev->addr.domain, dev->addr.bus, dev->addr.devid, + dev->addr.function); + + /* try to find memseg */ + fd = open(path, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", path); + return -1; + } + + /* check if it's a DPDK IVSHMEM device */ + ret = has_ivshmem_metadata(fd, res->len); + + /* is DPDK device */ + if (ret == 1) { + + /* config file creation is deferred until the first + * DPDK device is found. then, it has to be created + * only once. */ + if (ivshmem_config == NULL && + create_shared_config() < 0) { + RTE_LOG(ERR, EAL, "Could not create IVSHMEM config!\n"); + close(fd); + return -1; + } + + if (read_metadata(path, sizeof(path), fd, res->len) < 0) { + RTE_LOG(ERR, EAL, "Could not read metadata from" + " device %02x:%02x.%x!\n", dev->addr.bus, + dev->addr.devid, dev->addr.function); + close(fd); + return -1; + } + + if (ivshmem_config->pci_devs_idx == RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS) { + RTE_LOG(WARNING, EAL, + "IVSHMEM PCI device limit exceeded. Increase " + "CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS in " + "your config file.\n"); + break; + } + + RTE_LOG(INFO, EAL, "Found IVSHMEM device %02x:%02x.%x\n", + dev->addr.bus, dev->addr.devid, dev->addr.function); + + ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].ioremap_addr = res->phys_addr; + rte_snprintf(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path, + sizeof(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path), + path); + + ivshmem_config->pci_devs_idx++; + } + /* failed to read */ + else if (ret < 0) { + RTE_LOG(ERR, EAL, "Could not read IVSHMEM device: %s\n", + strerror(errno)); + close(fd); + return -1; + } + /* not a DPDK device */ + else + RTE_LOG(DEBUG, EAL, "Skipping non-DPDK IVSHMEM device\n"); + + /* close the BAR fd */ + close(fd); + } + } + } + + /* ivshmem_config is not NULL only if config was created and/or mapped */ + if (ivshmem_config) { + if (map_all_segments() < 0) { + RTE_LOG(ERR, EAL, "Mapping IVSHMEM segments failed!\n"); + return -1; + } + } + else { + RTE_LOG(DEBUG, EAL, "No IVSHMEM configuration found! \n"); + } + + return 0; +} + +#endif diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 3a1822ee2e..6b78d8941d 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -113,6 +113,68 @@ static uint64_t baseaddr_offset; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" +static uint64_t +get_physaddr(void * virtaddr) +{ + int fd; + uint64_t page, physaddr; + unsigned long virt_pfn; + int page_size; + + /* standard page size */ + page_size = getpagesize(); + + fd = open("/proc/self/pagemap", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n", + __func__, strerror(errno)); + return (uint64_t) -1; + } + + off_t offset; + virt_pfn = (unsigned long)virtaddr / page_size; + offset = sizeof(uint64_t) * virt_pfn; + if (lseek(fd, offset, SEEK_SET) == (off_t) -1) { + RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n", + __func__, strerror(errno)); + close(fd); + return (uint64_t) -1; + } + if (read(fd, &page, sizeof(uint64_t)) < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n", + __func__, strerror(errno)); + close(fd); + return (uint64_t) -1; + } + + /* + * the pfn (page frame number) are bits 0-54 (see + * pagemap.txt in linux Documentation) + */ + physaddr = ((page & 0x7fffffffffffffULL) * page_size); + close(fd); + return physaddr; +} + +/* + * For each hugepage in hugepg_tbl, fill the physaddr value. We find + * it by browsing the /proc/self/pagemap special file. + */ +static int +find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + unsigned i; + phys_addr_t addr; + + for (i = 0; i < hpi->num_pages[0]; i++) { + addr = get_physaddr(hugepg_tbl[i].orig_va); + if (addr == (phys_addr_t) -1) + return -1; + hugepg_tbl[i].physaddr = addr; + } + return 0; +} + /* * Check whether address-space layout randomization is enabled in * the kernel. This is important for multi-process as it can prevent @@ -209,7 +271,7 @@ get_virtual_area(size_t *size, size_t hugepage_sz) * map continguous physical blocks in contiguous virtual blocks. */ static int -map_all_hugepages(struct hugepage *hugepg_tbl, +map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, int orig) { int fd; @@ -218,15 +280,25 @@ map_all_hugepages(struct hugepage *hugepg_tbl, void *vma_addr = NULL; size_t vma_len = 0; +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + RTE_SET_USED(vma_len); +#endif + for (i = 0; i < hpi->num_pages[0]; i++) { size_t hugepage_sz = hpi->hugepage_sz; if (orig) { hugepg_tbl[i].file_id = i; hugepg_tbl[i].size = hugepage_sz; +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + eal_get_hugefile_temp_path(hugepg_tbl[i].filepath, + sizeof(hugepg_tbl[i].filepath), hpi->hugedir, + hugepg_tbl[i].file_id); +#else eal_get_hugefile_path(hugepg_tbl[i].filepath, sizeof(hugepg_tbl[i].filepath), hpi->hugedir, hugepg_tbl[i].file_id); +#endif hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0'; } #ifndef RTE_ARCH_X86_64 @@ -239,6 +311,8 @@ map_all_hugepages(struct hugepage *hugepg_tbl, continue; } #endif + +#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS else if (vma_len == 0) { unsigned j, num_pages; @@ -260,6 +334,7 @@ map_all_hugepages(struct hugepage *hugepg_tbl, if (vma_addr == NULL) vma_len = hugepage_sz; } +#endif /* try to create hugepage file */ fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755); @@ -302,77 +377,189 @@ map_all_hugepages(struct hugepage *hugepg_tbl, return 0; } -/* Unmap all hugepages from original mapping. */ -static int -unmap_all_hugepages_orig(struct hugepage *hugepg_tbl, struct hugepage_info *hpi) -{ - unsigned i; - for (i = 0; i < hpi->num_pages[0]; i++) { - if (hugepg_tbl[i].orig_va) { - munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz); - hugepg_tbl[i].orig_va = NULL; - } - } - return 0; -} +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS /* - * For each hugepage in hugepg_tbl, fill the physaddr value. We find - * it by browsing the /proc/self/pagemap special file. + * Remaps all hugepages into single file segments */ static int -find_physaddr(struct hugepage *hugepg_tbl, struct hugepage_info *hpi) +remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) { int fd; - unsigned i; - uint64_t page; - unsigned long virt_pfn; - int page_size; + unsigned i = 0, j, num_pages, page_idx = 0; + void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL; + size_t vma_len = 0; + size_t hugepage_sz = hpi->hugepage_sz; + size_t total_size, offset; + char filepath[MAX_HUGEPAGE_PATH]; + phys_addr_t physaddr; + int socket; - /* standard page size */ - page_size = getpagesize(); + while (i < hpi->num_pages[0]) { - fd = open("/proc/self/pagemap", O_RDONLY); - if (fd < 0) { - RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n", - __func__, strerror(errno)); - return -1; - } +#ifndef RTE_ARCH_X86_64 + /* for 32-bit systems, don't remap 1G pages, just reuse original + * map address as final map address. + */ + if (hugepage_sz == RTE_PGSIZE_1G){ + hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; + hugepg_tbl[i].orig_va = NULL; + i++; + continue; + } +#endif - for (i = 0; i < hpi->num_pages[0]; i++) { - off_t offset; - virt_pfn = (unsigned long)hugepg_tbl[i].orig_va / - page_size; - offset = sizeof(uint64_t) * virt_pfn; - if (lseek(fd, offset, SEEK_SET) == (off_t) -1) { - RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n", - __func__, strerror(errno)); - close(fd); + /* reserve a virtual area for next contiguous + * physical block: count the number of + * contiguous physical pages. */ + for (j = i+1; j < hpi->num_pages[0] ; j++) { + if (hugepg_tbl[j].physaddr != hugepg_tbl[j-1].physaddr + hugepage_sz) + break; + } + num_pages = j - i; + vma_len = num_pages * hugepage_sz; + + socket = hugepg_tbl[i].socket_id; + + /* get the biggest virtual memory area up to + * vma_len. If it fails, vma_addr is NULL, so + * let the kernel provide the address. */ + vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); + + /* If we can't find a big enough virtual area, work out how many pages + * we are going to get */ + if (vma_addr == NULL) + j = i + 1; + else if (vma_len != num_pages * hugepage_sz) { + num_pages = vma_len / hugepage_sz; + j = i + num_pages; + + } + + hugepg_tbl[page_idx].file_id = page_idx; + eal_get_hugefile_path(filepath, + sizeof(filepath), + hpi->hugedir, + hugepg_tbl[page_idx].file_id); + + /* try to create hugepage file */ + fd = open(filepath, O_CREAT | O_RDWR, 0755); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); return -1; } - if (read(fd, &page, sizeof(uint64_t)) < 0) { - RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n", - __func__, strerror(errno)); + + total_size = 0; + for (;i < j; i++) { + + /* unmap current segment */ + if (total_size > 0) + munmap(vma_addr, total_size); + + /* unmap original page */ + munmap(hugepg_tbl[i].orig_va, hugepage_sz); + unlink(hugepg_tbl[i].filepath); + + total_size += hugepage_sz; + + old_addr = vma_addr; + + /* map new, bigger segment */ + vma_addr = mmap(vma_addr, total_size, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (vma_addr == MAP_FAILED || vma_addr != old_addr) { + RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); + close(fd); + return -1; + } + + /* touch the page. this is needed because kernel postpones mapping + * creation until the first page fault. with this, we pin down + * the page and it is marked as used and gets into process' pagemap. + */ + for (offset = 0; offset < total_size; offset += hugepage_sz) + *((volatile uint8_t*) RTE_PTR_ADD(vma_addr, offset)); + } + + /* set shared flock on the file. */ + if (flock(fd, LOCK_SH | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", + __func__, strerror(errno)); close(fd); return -1; } - /* - * the pfn (page frame number) are bits 0-54 (see - * pagemap.txt in linux Documentation) + rte_snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s", + filepath); + + physaddr = get_physaddr(vma_addr); + + if (physaddr == (phys_addr_t) -1) + return -1; + + hugepg_tbl[page_idx].final_va = vma_addr; + + hugepg_tbl[page_idx].physaddr = physaddr; + + hugepg_tbl[page_idx].repeated = num_pages; + + hugepg_tbl[page_idx].socket_id = socket; + + close(fd); + + /* verify the memory segment - that is, check that every VA corresponds + * to the physical address we expect to see */ - hugepg_tbl[i].physaddr = ((page & 0x7fffffffffffffULL) * page_size); + for (offset = 0; offset < vma_len; offset += hugepage_sz) { + uint64_t expected_physaddr; + + expected_physaddr = hugepg_tbl[page_idx].physaddr + offset; + page_addr = RTE_PTR_ADD(vma_addr, offset); + physaddr = get_physaddr(page_addr); + + if (physaddr != expected_physaddr) { + RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr " + "at %p (offset 0x%" PRIx64 ": 0x%" PRIx64 + " (expected 0x%" PRIx64 ")\n", + page_addr, offset, physaddr, expected_physaddr); + return -1; + } + } + + /* zero out the whole segment */ + memset(hugepg_tbl[page_idx].final_va, 0, total_size); + + page_idx++; } - close(fd); - return 0; + + /* zero out the rest */ + memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file)); + return page_idx; } +#else/* RTE_EAL_SINGLE_FILE_SEGMENTS=n */ + +/* Unmap all hugepages from original mapping */ +static int +unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + unsigned i; + for (i = 0; i < hpi->num_pages[0]; i++) { + if (hugepg_tbl[i].orig_va) { + munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz); + hugepg_tbl[i].orig_va = NULL; + } + } + return 0; +} +#endif /* RTE_EAL_SINGLE_FILE_SEGMENTS */ /* * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge * page. */ static int -find_numasocket(struct hugepage *hugepg_tbl, struct hugepage_info *hpi) +find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) { int socket_id; char *end, *nodestr; @@ -455,12 +642,12 @@ error: * is only done at init time. */ static int -sort_by_physaddr(struct hugepage *hugepg_tbl, struct hugepage_info *hpi) +sort_by_physaddr(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) { unsigned i, j; int smallest_idx; uint64_t smallest_addr; - struct hugepage tmp; + struct hugepage_file tmp; for (i = 0; i < hpi->num_pages[0]; i++) { smallest_addr = 0; @@ -486,10 +673,10 @@ sort_by_physaddr(struct hugepage *hugepg_tbl, struct hugepage_info *hpi) } /* swap the 2 entries in the table */ - memcpy(&tmp, &hugepg_tbl[smallest_idx], sizeof(struct hugepage)); + memcpy(&tmp, &hugepg_tbl[smallest_idx], sizeof(struct hugepage_file)); memcpy(&hugepg_tbl[smallest_idx], &hugepg_tbl[i], - sizeof(struct hugepage)); - memcpy(&hugepg_tbl[i], &tmp, sizeof(struct hugepage)); + sizeof(struct hugepage_file)); + memcpy(&hugepg_tbl[i], &tmp, sizeof(struct hugepage_file)); } return 0; } @@ -519,8 +706,8 @@ create_shared_memory(const char *filename, const size_t mem_size) * destination is typically the shared memory. */ static int -copy_hugepages_to_shared_mem(struct hugepage * dst, int dest_size, - const struct hugepage * src, int src_size) +copy_hugepages_to_shared_mem(struct hugepage_file * dst, int dest_size, + const struct hugepage_file * src, int src_size) { int src_pos, dst_pos = 0; @@ -529,7 +716,7 @@ copy_hugepages_to_shared_mem(struct hugepage * dst, int dest_size, /* error on overflow attempt */ if (dst_pos == dest_size) return -1; - memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage)); + memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage_file)); dst_pos++; } } @@ -541,7 +728,7 @@ copy_hugepages_to_shared_mem(struct hugepage * dst, int dest_size, * ALL hugepages (not just those we need), additional unmapping needs to be done. */ static int -unmap_unneeded_hugepages(struct hugepage *hugepg_tbl, +unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, unsigned num_hp_info) { @@ -556,9 +743,16 @@ unmap_unneeded_hugepages(struct hugepage *hugepg_tbl, for (size = 0; size < num_hp_info; size++) { for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { unsigned pages_found = 0; + /* traverse until we have unmapped all the unused pages */ for (page = 0; page < nrpages; page++) { - struct hugepage *hp = &hugepg_tbl[page]; + struct hugepage_file *hp = &hugepg_tbl[page]; + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + /* if this page was already cleared */ + if (hp->final_va == NULL) + continue; +#endif /* find a page that matches the criteria */ if ((hp->size == hpi[size].hugepage_sz) && @@ -566,17 +760,67 @@ unmap_unneeded_hugepages(struct hugepage *hugepg_tbl, /* if we skipped enough pages, unmap the rest */ if (pages_found == hpi[size].num_pages[socket]) { - munmap(hp->final_va, hp->size); + uint64_t unmap_len; + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + unmap_len = hp->size * hp->repeated; +#else + unmap_len = hp->size; +#endif + + /* get start addr and len of the remaining segment */ + munmap(hp->final_va, (size_t) unmap_len); + hp->final_va = NULL; - if (remove(hp->filepath) == -1) { + if (unlink(hp->filepath) == -1) { RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n", __func__, hp->filepath, strerror(errno)); return -1; } } - /* lock the page and skip */ +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + /* else, check how much do we need to map */ + else { + int nr_pg_left = + hpi[size].num_pages[socket] - pages_found; + + /* if we need enough memory to fit into the segment */ + if (hp->repeated <= nr_pg_left) { + pages_found += hp->repeated; + } + /* truncate the segment */ + else { + uint64_t final_size = nr_pg_left * hp->size; + uint64_t seg_size = hp->repeated * hp->size; + + void * unmap_va = RTE_PTR_ADD(hp->final_va, + final_size); + int fd; + + munmap(unmap_va, seg_size - final_size); + + fd = open(hp->filepath, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + hp->filepath, strerror(errno)); + return -1; + } + if (ftruncate(fd, final_size) < 0) { + RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n", + hp->filepath, strerror(errno)); + return -1; + } + close(fd); + + pages_found += nr_pg_left; + hp->repeated = nr_pg_left; + } + } +#else + /* else, lock the page and skip */ else pages_found++; +#endif } /* match page */ } /* foreach page */ @@ -712,15 +956,18 @@ static int rte_eal_hugepage_init(void) { struct rte_mem_config *mcfg; - struct hugepage *hugepage, *tmp_hp = NULL; + struct hugepage_file *hugepage, *tmp_hp = NULL; struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES]; uint64_t memory[RTE_MAX_NUMA_NODES]; unsigned hp_offset; int i, j, new_memseg; - int nrpages, total_pages = 0; + int nr_hugefiles, nr_hugepages = 0; void *addr; +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + int new_pages_count[MAX_HUGEPAGE_SIZES]; +#endif memset(used_hp, 0, sizeof(used_hp)); @@ -744,7 +991,7 @@ rte_eal_hugepage_init(void) /* meanwhile, also initialize used_hp hugepage sizes in used_hp */ used_hp[i].hugepage_sz = internal_config.hugepage_info[i].hugepage_sz; - total_pages += internal_config.hugepage_info[i].num_pages[0]; + nr_hugepages += internal_config.hugepage_info[i].num_pages[0]; } /* @@ -753,11 +1000,11 @@ rte_eal_hugepage_init(void) * processing done on these pages, shared memory will be created * at a later stage. */ - tmp_hp = malloc(total_pages * sizeof(struct hugepage)); + tmp_hp = malloc(nr_hugepages * sizeof(struct hugepage_file)); if (tmp_hp == NULL) goto fail; - memset(tmp_hp, 0, total_pages * sizeof(struct hugepage)); + memset(tmp_hp, 0, nr_hugepages * sizeof(struct hugepage_file)); hp_offset = 0; /* where we start the current page size entries */ @@ -772,7 +1019,7 @@ rte_eal_hugepage_init(void) */ hpi = &internal_config.hugepage_info[i]; - if (hpi->num_pages == 0) + if (hpi->num_pages[0] == 0) continue; /* map all hugepages available */ @@ -783,7 +1030,7 @@ rte_eal_hugepage_init(void) } /* find physical addresses and sockets for each hugepage */ - if (find_physaddr(&tmp_hp[hp_offset], hpi) < 0){ + if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){ RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n", (unsigned)(hpi->hugepage_sz / 0x100000)); goto fail; @@ -798,6 +1045,18 @@ rte_eal_hugepage_init(void) if (sort_by_physaddr(&tmp_hp[hp_offset], hpi) < 0) goto fail; +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + /* remap all hugepages into single file segments */ + new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi); + if (new_pages_count[i] < 0){ + RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n", + (unsigned)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + + /* we have processed a num of hugepages of this size, so inc offset */ + hp_offset += new_pages_count[i]; +#else /* remap all hugepages */ if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){ RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n", @@ -811,22 +1070,38 @@ rte_eal_hugepage_init(void) /* we have processed a num of hugepages of this size, so inc offset */ hp_offset += hpi->num_pages[0]; +#endif } +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + nr_hugefiles = 0; + for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { + nr_hugefiles += new_pages_count[i]; + } +#else + nr_hugefiles = nr_hugepages; +#endif + + /* clean out the numbers of pages */ for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) for (j = 0; j < RTE_MAX_NUMA_NODES; j++) internal_config.hugepage_info[i].num_pages[j] = 0; /* get hugepages for each socket */ - for (i = 0; i < total_pages; i++) { + for (i = 0; i < nr_hugefiles; i++) { int socket = tmp_hp[i].socket_id; /* find a hugepage info with right size and increment num_pages */ for (j = 0; j < (int) internal_config.num_hugepage_sizes; j++) { if (tmp_hp[i].size == internal_config.hugepage_info[j].hugepage_sz) { +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + internal_config.hugepage_info[j].num_pages[socket] += + tmp_hp[i].repeated; +#else internal_config.hugepage_info[j].num_pages[socket]++; +#endif } } } @@ -836,12 +1111,12 @@ rte_eal_hugepage_init(void) memory[i] = internal_config.socket_mem[i]; /* calculate final number of pages */ - nrpages = calc_num_pages_per_socket(memory, + nr_hugepages = calc_num_pages_per_socket(memory, internal_config.hugepage_info, used_hp, internal_config.num_hugepage_sizes); /* error if not enough memory available */ - if (nrpages < 0) + if (nr_hugepages < 0) goto fail; /* reporting in! */ @@ -861,12 +1136,13 @@ rte_eal_hugepage_init(void) /* create shared memory */ hugepage = create_shared_memory(eal_hugepage_info_path(), - nrpages * sizeof(struct hugepage)); + nr_hugefiles * sizeof(struct hugepage_file)); if (hugepage == NULL) { RTE_LOG(ERR, EAL, "Failed to create shared memory!\n"); goto fail; } + memset(hugepage, 0, nr_hugefiles * sizeof(struct hugepage_file)); /* * unmap pages that we won't need (looks at used_hp). @@ -883,8 +1159,8 @@ rte_eal_hugepage_init(void) * this procedure only copies those hugepages that have final_va * not NULL. has overflow protection. */ - if (copy_hugepages_to_shared_mem(hugepage, nrpages, - tmp_hp, total_pages) < 0) { + if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles, + tmp_hp, nr_hugefiles) < 0) { RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n"); goto fail; } @@ -893,9 +1169,16 @@ rte_eal_hugepage_init(void) free(tmp_hp); tmp_hp = NULL; - memset(mcfg->memseg, 0, sizeof(mcfg->memseg)); - j = -1; - for (i = 0; i < nrpages; i++) { + /* find earliest free memseg - this is needed because in case of IVSHMEM, + * segments might have already been initialized */ + for (j = 0; j < RTE_MAX_MEMSEG; j++) + if (mcfg->memseg[j].addr == NULL) { + /* move to previous segment and exit loop */ + j--; + break; + } + + for (i = 0; i < nr_hugefiles; i++) { new_memseg = 0; /* if this is a new section, create a new memseg */ @@ -919,7 +1202,11 @@ rte_eal_hugepage_init(void) mcfg->memseg[j].phys_addr = hugepage[i].physaddr; mcfg->memseg[j].addr = hugepage[i].final_va; +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated; +#else mcfg->memseg[j].len = hugepage[i].size; +#endif mcfg->memseg[j].socket_id = hugepage[i].socket_id; mcfg->memseg[j].hugepage_sz = hugepage[i].size; } @@ -930,21 +1217,19 @@ rte_eal_hugepage_init(void) hugepage[i].memseg_id = j; } - if (i < nrpages) { + if (i < nr_hugefiles) { RTE_LOG(ERR, EAL, "Can only reserve %d pages " "from %d requested\n" "Current %s=%d is not enough\n" "Please either increase it or request less amount " "of memory.\n", - i, nrpages, RTE_STR(CONFIG_RTE_MAX_MEMSEG), + i, nr_hugefiles, RTE_STR(CONFIG_RTE_MAX_MEMSEG), RTE_MAX_MEMSEG); return (-ENOMEM); } - return 0; - fail: if (tmp_hp) free(tmp_hp); @@ -973,7 +1258,7 @@ static int rte_eal_hugepage_attach(void) { const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - const struct hugepage *hp = NULL; + const struct hugepage_file *hp = NULL; unsigned num_hp = 0; unsigned i, s = 0; /* s used to track the segment number */ off_t size; @@ -1008,6 +1293,15 @@ rte_eal_hugepage_attach(void) if (mcfg->memseg[s].len == 0) break; +#ifdef RTE_LIBRTE_IVSHMEM + /* + * if segment has ioremap address set, it's an IVSHMEM segment and + * doesn't need mapping as it was already mapped earlier + */ + if (mcfg->memseg[s].ioremap_addr != 0) + continue; +#endif + /* * fdzero is mmapped to get a contiguous block of virtual * addresses of the appropriate memseg size. @@ -1018,9 +1312,9 @@ rte_eal_hugepage_attach(void) if (base_addr == MAP_FAILED || base_addr != mcfg->memseg[s].addr) { RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " - "in /dev/zero to requested address [%p]\n", + "in /dev/zero to requested address [%p]: '%s'\n", (unsigned long long)mcfg->memseg[s].len, - mcfg->memseg[s].addr); + mcfg->memseg[s].addr, strerror(errno)); if (aslr_enabled() > 0) { RTE_LOG(ERR, EAL, "It is recommended to " "disable ASLR in the kernel " @@ -1038,14 +1332,24 @@ rte_eal_hugepage_attach(void) goto error; } - num_hp = size / sizeof(struct hugepage); - RTE_LOG(DEBUG, EAL, "Analysing %u hugepages\n", num_hp); + num_hp = size / sizeof(struct hugepage_file); + RTE_LOG(DEBUG, EAL, "Analysing %u files\n", num_hp); s = 0; while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){ void *addr, *base_addr; uintptr_t offset = 0; - + size_t mapping_size; +#ifdef RTE_LIBRTE_IVSHMEM + /* + * if segment has ioremap address set, it's an IVSHMEM segment and + * doesn't need mapping as it was already mapped earlier + */ + if (mcfg->memseg[s].ioremap_addr != 0) { + s++; + continue; + } +#endif /* * free previously mapped memory so we can map the * hugepages into the space @@ -1064,16 +1368,22 @@ rte_eal_hugepage_attach(void) hp[i].filepath); goto error; } +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + mapping_size = hp[i].size * hp[i].repeated; +#else + mapping_size = hp[i].size; +#endif addr = mmap(RTE_PTR_ADD(base_addr, offset), - hp[i].size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, fd, 0); + mapping_size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); close(fd); /* close file both on success and on failure */ - if (addr == MAP_FAILED) { + if (addr == MAP_FAILED || + addr != RTE_PTR_ADD(base_addr, offset)) { RTE_LOG(ERR, EAL, "Could not mmap %s\n", hp[i].filepath); goto error; } - offset+=hp[i].size; + offset+=mapping_size; } } RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s, diff --git a/lib/librte_eal/linuxapp/eal/include/eal_filesystem.h b/lib/librte_eal/linuxapp/eal/include/eal_filesystem.h index 7ffd5cd087..034e58d806 100644 --- a/lib/librte_eal/linuxapp/eal/include/eal_filesystem.h +++ b/lib/librte_eal/linuxapp/eal/include/eal_filesystem.h @@ -46,6 +46,8 @@ #include #include #include +#include + #include #include "eal_internal_cfg.h" @@ -84,6 +86,7 @@ eal_hugepage_info_path(void) /** String format for hugepage map files. */ #define HUGEFILE_FMT "%s/%smap_%d" +#define TEMP_HUGEFILE_FMT "%s/%smap_temp_%d" static inline const char * eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id) @@ -94,6 +97,17 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id return buffer; } +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS +static inline const char * +eal_get_hugefile_temp_path(char *buffer, size_t buflen, const char *hugedir, int f_id) +{ + rte_snprintf(buffer, buflen, TEMP_HUGEFILE_FMT, hugedir, + internal_config.hugefile_prefix, f_id); + buffer[buflen - 1] = '\0'; + return buffer; +} +#endif + /** define the default filename prefix for the %s values above */ #define HUGEFILE_PREFIX_DEFAULT "rte" diff --git a/lib/librte_eal/linuxapp/eal/include/eal_hugepages.h b/lib/librte_eal/linuxapp/eal/include/eal_hugepages.h index 82dd641448..064cdb0adc 100644 --- a/lib/librte_eal/linuxapp/eal/include/eal_hugepages.h +++ b/lib/librte_eal/linuxapp/eal/include/eal_hugepages.h @@ -35,6 +35,8 @@ #define RTE_LINUXAPP_HUGEPAGES_H_ #include +#include +#include #define MAX_HUGEPAGE_PATH PATH_MAX @@ -42,7 +44,7 @@ * Structure used to store informations about hugepages that we mapped * through the files in hugetlbfs. */ -struct hugepage { +struct hugepage_file { void *orig_va; /**< virtual addr of first mmap() */ void *final_va; /**< virtual addr of 2nd mmap() */ uint64_t physaddr; /**< physical addr */ @@ -50,6 +52,9 @@ struct hugepage { int socket_id; /**< NUMA socket ID */ int file_id; /**< the '%d' in HUGEFILE_FMT */ int memseg_id; /**< the memory segment to which page belongs */ +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + int repeated; /**< number of times the page size is repeated */ +#endif char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */ }; diff --git a/lib/librte_ivshmem/Makefile b/lib/librte_ivshmem/Makefile new file mode 100644 index 0000000000..c94f92652e --- /dev/null +++ b/lib/librte_ivshmem/Makefile @@ -0,0 +1,48 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_ivshmem.a + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_IVSHMEM) := rte_ivshmem.c + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_IVSHMEM)-include := rte_ivshmem.h + +# this lib needs eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_mempool + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_ivshmem/rte_ivshmem.c b/lib/librte_ivshmem/rte_ivshmem.c new file mode 100644 index 0000000000..d62d0161d3 --- /dev/null +++ b/lib/librte_ivshmem/rte_ivshmem.c @@ -0,0 +1,884 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_ivshmem.h" + +#define IVSHMEM_CONFIG_FILE_FMT "/var/run/.dpdk_ivshmem_metadata_%s" +#define IVSHMEM_QEMU_CMD_LINE_HEADER_FMT "-device ivshmem,size=%" PRIu64 "M,shm=fd%s" +#define IVSHMEM_QEMU_CMD_FD_FMT ":%s:0x%" PRIx64 ":0x%" PRIx64 +#define IVSHMEM_QEMU_CMDLINE_BUFSIZE 1024 +#define IVSHMEM_MAX_PAGES (1 << 12) +#define adjacent(x,y) (((x).phys_addr+(x).len)==(y).phys_addr) +#define METADATA_SIZE_ALIGNED \ + (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz)) + +#define GET_PAGEMAP_ADDR(in,addr,dlm,err) \ +{ \ + char *end; \ + errno = 0; \ + addr = strtoull((in), &end, 16); \ + if (errno != 0 || *end != (dlm)) { \ + RTE_LOG(ERR, EAL, err); \ + goto error; \ + } \ + (in) = end + 1; \ +} + +static int pagesz; + +struct memseg_cache_entry { + char filepath[PATH_MAX]; + uint64_t offset; + uint64_t len; +}; + +struct ivshmem_config { + struct rte_ivshmem_metadata * metadata; + struct memseg_cache_entry memseg_cache[IVSHMEM_MAX_PAGES]; + /**< account for multiple files per segment case */ + struct flock lock; + rte_spinlock_t sl; +}; + +static struct ivshmem_config +ivshmem_global_config[RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES]; + +static rte_spinlock_t global_cfg_sl; + +static struct ivshmem_config * +get_config_by_name(const char * name) +{ + struct rte_ivshmem_metadata * config; + unsigned i; + + for (i = 0; i < RTE_DIM(ivshmem_global_config); i++) { + config = ivshmem_global_config[i].metadata; + if (config == NULL) + return NULL; + if (strncmp(name, config->name, IVSHMEM_NAME_LEN) == 0) + return &ivshmem_global_config[i]; + } + + return NULL; +} + +static int +overlap(const struct rte_memzone * s1, const struct rte_memzone * s2) +{ + uint64_t start1, end1, start2, end2; + + start1 = s1->addr_64; + end1 = s1->addr_64 + s1->len; + start2 = s2->addr_64; + end2 = s2->addr_64 + s2->len; + + if (start1 >= start2 && start1 < end2) + return 1; + if (start2 >= start1 && start2 < end1) + return 1; + + return 0; +} + +static struct rte_memzone * +get_memzone_by_addr(const void * addr) +{ + struct rte_memzone * tmp, * mz; + struct rte_mem_config * mcfg; + int i; + + mcfg = rte_eal_get_configuration()->mem_config; + mz = NULL; + + /* find memzone for the ring */ + for (i = 0; i < RTE_MAX_MEMZONE; i++) { + tmp = &mcfg->memzone[i]; + + if (tmp->addr_64 == (uint64_t) addr) { + mz = tmp; + break; + } + } + + return mz; +} + +static int +entry_compare(const void * a, const void * b) +{ + const struct rte_ivshmem_metadata_entry * e1 = + (const struct rte_ivshmem_metadata_entry*) a; + const struct rte_ivshmem_metadata_entry * e2 = + (const struct rte_ivshmem_metadata_entry*) b; + + /* move unallocated zones to the end */ + if (e1->mz.addr == NULL && e2->mz.addr == NULL) + return 0; + if (e1->mz.addr == 0) + return 1; + if (e2->mz.addr == 0) + return -1; + + return e1->mz.phys_addr > e2->mz.phys_addr; +} + +/* fills hugepage cache entry for a given start virt_addr */ +static int +get_hugefile_by_virt_addr(uint64_t virt_addr, struct memseg_cache_entry * e) +{ + uint64_t start_addr, end_addr; + char *start,*path_end; + char buf[PATH_MAX*2]; + FILE *f; + + start = NULL; + path_end = NULL; + start_addr = 0; + + memset(e->filepath, 0, sizeof(e->filepath)); + + /* open /proc/self/maps */ + f = fopen("/proc/self/maps", "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "cannot open /proc/self/maps!\n"); + return -1; + } + + /* parse maps */ + while (fgets(buf, sizeof(buf), f) != NULL) { + + /* get endptr to end of start addr */ + start = buf; + + GET_PAGEMAP_ADDR(start,start_addr,'-', + "Cannot find start address in maps!\n"); + + /* if start address is bigger than our address, skip */ + if (start_addr > virt_addr) + continue; + + GET_PAGEMAP_ADDR(start,end_addr,' ', + "Cannot find end address in maps!\n"); + + /* if end address is less than our address, skip */ + if (end_addr <= virt_addr) + continue; + + /* find where the path starts */ + start = strstr(start, "/"); + + if (start == NULL) + continue; + + /* at this point, we know that this is our map. + * now let's find the file */ + path_end = strstr(start, "\n"); + break; + } + + if (path_end == NULL) { + RTE_LOG(ERR, EAL, "Hugefile path not found!\n"); + goto error; + } + + /* calculate offset and copy the file path */ + rte_snprintf(e->filepath, RTE_PTR_DIFF(path_end, start) + 1, "%s", start); + + e->offset = virt_addr - start_addr; + + fclose(f); + + return 0; +error: + fclose(f); + return -1; +} + +/* + * This is a complex function. What it does is the following: + * 1. Goes through metadata and gets list of hugepages involved + * 2. Sorts the hugepages by size (1G first) + * 3. Goes through metadata again and writes correct offsets + * 4. Goes through pages and finds out their filenames, offsets etc. + */ +static int +build_config(struct rte_ivshmem_metadata * metadata) +{ + struct rte_ivshmem_metadata_entry * e_local; + struct memseg_cache_entry * ms_local; + struct rte_memseg pages[IVSHMEM_MAX_PAGES]; + struct rte_ivshmem_metadata_entry *entry; + struct memseg_cache_entry * c_entry, * prev_entry; + struct ivshmem_config * config; + unsigned i, j, mz_iter, ms_iter; + uint64_t biggest_len; + int biggest_idx; + + /* return error if we try to use an unknown config file */ + config = get_config_by_name(metadata->name); + if (config == NULL) { + RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", metadata->name); + goto fail_e; + } + + memset(pages, 0, sizeof(pages)); + + e_local = malloc(sizeof(config->metadata->entry)); + if (e_local == NULL) + goto fail_e; + ms_local = malloc(sizeof(config->memseg_cache)); + if (ms_local == NULL) + goto fail_ms; + + + /* make local copies before doing anything */ + memcpy(e_local, config->metadata->entry, sizeof(config->metadata->entry)); + memcpy(ms_local, config->memseg_cache, sizeof(config->memseg_cache)); + + qsort(e_local, RTE_DIM(config->metadata->entry), sizeof(struct rte_ivshmem_metadata_entry), + entry_compare); + + /* first pass - collect all huge pages */ + for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) { + + entry = &e_local[mz_iter]; + + uint64_t start_addr = RTE_ALIGN_FLOOR(entry->mz.addr_64, + entry->mz.hugepage_sz); + uint64_t offset = entry->mz.addr_64 - start_addr; + uint64_t len = RTE_ALIGN_CEIL(entry->mz.len + offset, + entry->mz.hugepage_sz); + + if (entry->mz.addr_64 == 0 || start_addr == 0 || len == 0) + continue; + + int start_page; + + /* find first unused page - mz are phys_addr sorted so we don't have to + * look out for holes */ + for (i = 0; i < RTE_DIM(pages); i++) { + + /* skip if we already have this page */ + if (pages[i].addr_64 == start_addr) { + start_addr += entry->mz.hugepage_sz; + len -= entry->mz.hugepage_sz; + continue; + } + /* we found a new page */ + else if (pages[i].addr_64 == 0) { + start_page = i; + break; + } + } + if (i == RTE_DIM(pages)) { + RTE_LOG(ERR, EAL, "Cannot find unused page!\n"); + goto fail; + } + + /* populate however many pages the memzone has */ + for (i = start_page; i < RTE_DIM(pages) && len != 0; i++) { + + pages[i].addr_64 = start_addr; + pages[i].len = entry->mz.hugepage_sz; + start_addr += entry->mz.hugepage_sz; + len -= entry->mz.hugepage_sz; + } + /* if there's still length left */ + if (len != 0) { + RTE_LOG(ERR, EAL, "Not enough space for pages!\n"); + goto fail; + } + } + + /* second pass - sort pages by size */ + for (i = 0; i < RTE_DIM(pages); i++) { + + if (pages[i].addr == NULL) + break; + + biggest_len = 0; + biggest_idx = -1; + + /* + * browse all entries starting at 'i', and find the + * entry with the smallest addr + */ + for (j=i; j< RTE_DIM(pages); j++) { + if (pages[j].addr == NULL) + break; + if (biggest_len == 0 || + pages[j].len > biggest_len) { + biggest_len = pages[j].len; + biggest_idx = j; + } + } + + /* should not happen */ + if (biggest_idx == -1) { + RTE_LOG(ERR, EAL, "Error sorting by size!\n"); + goto fail; + } + if (i != (unsigned) biggest_idx) { + struct rte_memseg tmp; + + memcpy(&tmp, &pages[biggest_idx], sizeof(struct rte_memseg)); + + /* we don't want to break contiguousness, so instead of just + * swapping segments, we move all the preceding segments to the + * right and then put the old segment @ biggest_idx in place of + * segment @ i */ + for (j = biggest_idx - 1; j >= i; j--) { + memcpy(&pages[j+1], &pages[j], sizeof(struct rte_memseg)); + memset(&pages[j], 0, sizeof(struct rte_memseg)); + } + + /* put old biggest segment to its new place */ + memcpy(&pages[i], &tmp, sizeof(struct rte_memseg)); + } + } + + /* third pass - write correct offsets */ + for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) { + + uint64_t offset = 0; + + entry = &e_local[mz_iter]; + + if (entry->mz.addr_64 == 0) + break; + + /* find page for current memzone */ + for (i = 0; i < RTE_DIM(pages); i++) { + /* we found our page */ + if (entry->mz.addr_64 >= pages[i].addr_64 && + entry->mz.addr_64 < pages[i].addr_64 + pages[i].len) { + entry->offset = (entry->mz.addr_64 - pages[i].addr_64) + + offset; + break; + } + offset += pages[i].len; + } + if (i == RTE_DIM(pages)) { + RTE_LOG(ERR, EAL, "Page not found!\n"); + goto fail; + } + } + + ms_iter = 0; + prev_entry = NULL; + + /* fourth pass - create proper memseg cache */ + for (i = 0; i < RTE_DIM(pages) && + ms_iter <= RTE_DIM(config->memseg_cache); i++) { + if (pages[i].addr_64 == 0) + break; + + + if (ms_iter == RTE_DIM(pages)) { + RTE_LOG(ERR, EAL, "The universe has collapsed!\n"); + goto fail; + } + + c_entry = &ms_local[ms_iter]; + c_entry->len = pages[i].len; + + if (get_hugefile_by_virt_addr(pages[i].addr_64, c_entry) < 0) + goto fail; + + /* if previous entry has the same filename and is contiguous, + * clear current entry and increase previous entry's length + */ + if (prev_entry != NULL && + strncmp(c_entry->filepath, prev_entry->filepath, + sizeof(c_entry->filepath)) == 0 && + prev_entry->offset + prev_entry->len == c_entry->offset) { + prev_entry->len += pages[i].len; + memset(c_entry, 0, sizeof(struct memseg_cache_entry)); + } + else { + prev_entry = c_entry; + ms_iter++; + } + } + + /* update current configuration with new valid data */ + memcpy(config->metadata->entry, e_local, sizeof(config->metadata->entry)); + memcpy(config->memseg_cache, ms_local, sizeof(config->memseg_cache)); + + free(ms_local); + free(e_local); + + return 0; +fail: + free(ms_local); +fail_ms: + free(e_local); +fail_e: + return -1; +} + +static int +add_memzone_to_metadata(const struct rte_memzone * mz, + struct ivshmem_config * config) +{ + struct rte_ivshmem_metadata_entry * entry; + unsigned i; + + rte_spinlock_lock(&config->sl); + + /* find free slot in this config */ + for (i = 0; i < RTE_DIM(config->metadata->entry); i++) { + entry = &config->metadata->entry[i]; + + if (&entry->mz.addr_64 != 0 && overlap(mz, &entry->mz)) { + RTE_LOG(ERR, EAL, "Overlapping memzones!\n"); + goto fail; + } + + /* if addr is zero, the memzone is probably free */ + if (entry->mz.addr_64 == 0) { + RTE_LOG(DEBUG, EAL, "Adding memzone '%s' at %p to metadata %s\n", + mz->name, mz->addr, config->metadata->name); + memcpy(&entry->mz, mz, sizeof(struct rte_memzone)); + + /* run config file parser */ + if (build_config(config->metadata) < 0) + goto fail; + + break; + } + } + + /* if we reached the maximum, that means we have no place in config */ + if (i == RTE_DIM(config->metadata->entry)) { + RTE_LOG(ERR, EAL, "No space left in IVSHMEM metadata %s!\n", + config->metadata->name); + goto fail; + } + + rte_spinlock_unlock(&config->sl); + return 0; +fail: + rte_spinlock_unlock(&config->sl); + return -1; +} + +static int +add_ring_to_metadata(const struct rte_ring * r, + struct ivshmem_config * config) +{ + struct rte_memzone * mz; + + mz = get_memzone_by_addr(r); + + if (!mz) { + RTE_LOG(ERR, EAL, "Cannot find memzone for ring!\n"); + return -1; + } + + return add_memzone_to_metadata(mz, config); +} + +static int +add_mempool_to_metadata(const struct rte_mempool * mp, + struct ivshmem_config * config) +{ + struct rte_memzone * mz; + int ret; + + mz = get_memzone_by_addr(mp); + ret = 0; + + if (!mz) { + RTE_LOG(ERR, EAL, "Cannot find memzone for mempool!\n"); + return -1; + } + + /* mempool consists of memzone and ring */ + ret = add_memzone_to_metadata(mz, config); + if (ret < 0) + return -1; + + return add_ring_to_metadata(mp->ring, config); +} + +int +rte_ivshmem_metadata_add_ring(const struct rte_ring * r, const char * name) +{ + struct ivshmem_config * config; + + if (name == NULL || r == NULL) + return -1; + + config = get_config_by_name(name); + + if (config == NULL) { + RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name); + return -1; + } + + return add_ring_to_metadata(r, config); +} + +int +rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz, const char * name) +{ + struct ivshmem_config * config; + + if (name == NULL || mz == NULL) + return -1; + + config = get_config_by_name(name); + + if (config == NULL) { + RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name); + return -1; + } + + return add_memzone_to_metadata(mz, config); +} + +int +rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp, const char * name) +{ + struct ivshmem_config * config; + + if (name == NULL || mp == NULL) + return -1; + + config = get_config_by_name(name); + + if (config == NULL) { + RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name); + return -1; + } + + return add_mempool_to_metadata(mp, config); +} + +static inline void +ivshmem_config_path(char *buffer, size_t bufflen, const char *name) +{ + rte_snprintf(buffer, bufflen, IVSHMEM_CONFIG_FILE_FMT, name); +} + + + +static inline +void *ivshmem_metadata_create(const char *name, size_t size, + struct flock *lock) +{ + int retval, fd; + void *metadata_addr; + char pathname[PATH_MAX]; + + ivshmem_config_path(pathname, sizeof(pathname), name); + + fd = open(pathname, O_RDWR | O_CREAT, 0660); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open '%s'\n", pathname); + return NULL; + } + + size = METADATA_SIZE_ALIGNED; + + retval = fcntl(fd, F_SETLK, lock); + if (retval < 0){ + close(fd); + RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another " + "process using it?\n", pathname); + return NULL; + } + + retval = ftruncate(fd, size); + if (retval < 0){ + close(fd); + RTE_LOG(ERR, EAL, "Cannot resize '%s'\n", pathname); + return NULL; + } + + metadata_addr = mmap(NULL, size, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (metadata_addr == MAP_FAILED){ + RTE_LOG(ERR, EAL, "Cannot mmap memory for '%s'\n", pathname); + + /* we don't care if we can't unlock */ + fcntl(fd, F_UNLCK, lock); + close(fd); + + return NULL; + } + + return metadata_addr; +} + +int rte_ivshmem_metadata_create(const char *name) +{ + struct ivshmem_config * ivshmem_config; + unsigned index; + + if (pagesz == 0) + pagesz = getpagesize(); + + if (name == NULL) + return -1; + + rte_spinlock_lock(&global_cfg_sl); + + for (index = 0; index < RTE_DIM(ivshmem_global_config); index++) { + if (ivshmem_global_config[index].metadata == NULL) { + ivshmem_config = &ivshmem_global_config[index]; + break; + } + } + + if (index == RTE_DIM(ivshmem_global_config)) { + RTE_LOG(ERR, EAL, "Cannot create more ivshmem config files. " + "Maximum has been reached\n"); + rte_spinlock_unlock(&global_cfg_sl); + return -1; + } + + ivshmem_config->lock.l_type = F_WRLCK; + ivshmem_config->lock.l_whence = SEEK_SET; + + ivshmem_config->lock.l_start = 0; + ivshmem_config->lock.l_len = METADATA_SIZE_ALIGNED; + + ivshmem_global_config[index].metadata = ((struct rte_ivshmem_metadata *) + ivshmem_metadata_create( + name, + sizeof(struct rte_ivshmem_metadata), + &ivshmem_config->lock)); + + if (ivshmem_global_config[index].metadata == NULL) { + rte_spinlock_unlock(&global_cfg_sl); + return -1; + } + + /* Metadata setup */ + memset(ivshmem_config->metadata, 0, sizeof(struct rte_ivshmem_metadata)); + ivshmem_config->metadata->magic_number = IVSHMEM_MAGIC; + rte_snprintf(ivshmem_config->metadata->name, + sizeof(ivshmem_config->metadata->name), "%s", name); + + rte_spinlock_unlock(&global_cfg_sl); + + return 0; +} + +int +rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, const char *name) +{ + const struct memseg_cache_entry * ms_cache, *entry; + struct ivshmem_config * config; + char cmdline[IVSHMEM_QEMU_CMDLINE_BUFSIZE], *cmdline_ptr; + char cfg_file_path[PATH_MAX]; + unsigned remaining_len, tmplen, iter; + uint64_t shared_mem_size, zero_size, total_size; + + if (buffer == NULL || name == NULL) + return -1; + + config = get_config_by_name(name); + + if (config == NULL) { + RTE_LOG(ERR, EAL, "Config %s not found!\n", name); + return -1; + } + + rte_spinlock_lock(&config->sl); + + /* prepare metadata file path */ + rte_snprintf(cfg_file_path, sizeof(cfg_file_path), IVSHMEM_CONFIG_FILE_FMT, + config->metadata->name); + + ms_cache = config->memseg_cache; + + cmdline_ptr = cmdline; + remaining_len = sizeof(cmdline); + + shared_mem_size = 0; + iter = 0; + + while ((ms_cache[iter].len != 0) && (iter < RTE_DIM(config->metadata->entry))) { + + entry = &ms_cache[iter]; + + /* Offset and sizes within the current pathname */ + tmplen = rte_snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT, + entry->filepath, entry->offset, entry->len); + + shared_mem_size += entry->len; + + cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen); + remaining_len -= tmplen; + + if (remaining_len == 0) { + RTE_LOG(ERR, EAL, "Command line too long!\n"); + rte_spinlock_unlock(&config->sl); + return -1; + } + + iter++; + } + + total_size = rte_align64pow2(shared_mem_size + METADATA_SIZE_ALIGNED); + zero_size = total_size - shared_mem_size - METADATA_SIZE_ALIGNED; + + /* add /dev/zero to command-line to fill the space */ + tmplen = rte_snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT, + "/dev/zero", + 0x0, + zero_size); + + cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen); + remaining_len -= tmplen; + + if (remaining_len == 0) { + RTE_LOG(ERR, EAL, "Command line too long!\n"); + rte_spinlock_unlock(&config->sl); + return -1; + } + + /* add metadata file to the end of command-line */ + tmplen = rte_snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT, + cfg_file_path, + 0x0, + METADATA_SIZE_ALIGNED); + + cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen); + remaining_len -= tmplen; + + if (remaining_len == 0) { + RTE_LOG(ERR, EAL, "Command line too long!\n"); + rte_spinlock_unlock(&config->sl); + return -1; + } + + /* if current length of the command line is bigger than the buffer supplied + * by the user, or if command-line is bigger than what IVSHMEM accepts */ + if ((sizeof(cmdline) - remaining_len) > size) { + RTE_LOG(ERR, EAL, "Buffer is too short!\n"); + rte_spinlock_unlock(&config->sl); + return -1; + } + /* complete the command-line */ + rte_snprintf(buffer, size, + IVSHMEM_QEMU_CMD_LINE_HEADER_FMT, + total_size >> 20, + cmdline); + + rte_spinlock_unlock(&config->sl); + + return 0; +} + + +void +rte_ivshmem_metadata_dump(const char *name) +{ + unsigned i = 0; + struct ivshmem_config * config; + struct rte_ivshmem_metadata_entry *entry; +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + uint64_t addr; + uint64_t end, hugepage_sz; + struct memseg_cache_entry e; +#endif + + if (name == NULL) + return; + + /* return error if we try to use an unknown config file */ + config = get_config_by_name(name); + if (config == NULL) { + RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name); + return; + } + + rte_spinlock_lock(&config->sl); + + entry = &config->metadata->entry[0]; + + while (entry->mz.addr != NULL && i < RTE_DIM(config->metadata->entry)) { + + printf("Entry %u: name:<%-20s>, phys:0x%-15lx, len:0x%-15lx, " + "virt:%-15p, off:0x%-15lx\n", + i, + entry->mz.name, + entry->mz.phys_addr, + entry->mz.len, + entry->mz.addr, + entry->offset); + i++; + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + printf("\tHugepage files:\n"); + + hugepage_sz = entry->mz.hugepage_sz; + addr = RTE_ALIGN_FLOOR(entry->mz.addr_64, hugepage_sz); + end = addr + RTE_ALIGN_CEIL(entry->mz.len + (entry->mz.addr_64 - addr), + hugepage_sz); + + for (; addr < end; addr += hugepage_sz) { + memset(&e, 0, sizeof(e)); + + get_hugefile_by_virt_addr(addr, &e); + + printf("\t0x%"PRIx64 "-0x%" PRIx64 " offset: 0x%" PRIx64 " %s\n", + addr, addr + hugepage_sz, e.offset, e.filepath); + } +#endif + entry++; + } + + rte_spinlock_unlock(&config->sl); +} diff --git a/lib/librte_ivshmem/rte_ivshmem.h b/lib/librte_ivshmem/rte_ivshmem.h new file mode 100644 index 0000000000..9ff54bb81e --- /dev/null +++ b/lib/librte_ivshmem/rte_ivshmem.h @@ -0,0 +1,163 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_IVSHMEM_H_ +#define RTE_IVSHMEM_H_ + +#include +#include + +/** + * @file + * + * The RTE IVSHMEM interface provides functions to create metadata files + * describing memory segments to be shared via QEMU IVSHMEM. + */ + + +#ifdef __cplusplus +extern "C" { +#endif + +#define IVSHMEM_MAGIC 0x0BADC0DE +#define IVSHMEM_NAME_LEN 32 + +/** + * Structure that holds IVSHMEM shared metadata entry. + */ +struct rte_ivshmem_metadata_entry { + struct rte_memzone mz; /**< shared memzone */ + uint64_t offset; /**< offset of memzone within IVSHMEM device */ +}; + +/** + * Structure that holds IVSHMEM metadata. + */ +struct rte_ivshmem_metadata { + int magic_number; /**< magic number */ + char name[IVSHMEM_NAME_LEN]; /**< name of the metadata file */ + struct rte_ivshmem_metadata_entry entry[RTE_LIBRTE_IVSHMEM_MAX_ENTRIES]; + /**< metadata entries */ +}; + +/** + * Creates metadata file with a given name + * + * @param name + * Name of metadata file to be created + * + * @return + * - On success, zero + * - On failure, a negative value + */ +int rte_ivshmem_metadata_create(const char * name); + +/** + * Adds memzone to a specific metadata file + * + * @param mz + * Memzone to be added + * @param md_name + * Name of metadata file for the memzone to be added to + * + * @return + * - On success, zero + * - On failure, a negative value + */ +int rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz, + const char * md_name); + +/** + * Adds a ring descriptor to a specific metadata file + * + * @param r + * Ring descriptor to be added + * @param md_name + * Name of metadata file for the ring to be added to + * + * @return + * - On success, zero + * - On failure, a negative value + */ +int rte_ivshmem_metadata_add_ring(const struct rte_ring * r, + const char * md_name); + +/** + * Adds a mempool to a specific metadata file + * + * @param mp + * Mempool to be added + * @param md_name + * Name of metadata file for the mempool to be added to + * + * @return + * - On success, zero + * - On failure, a negative value + */ +int rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp, + const char * md_name); + + +/** + * Generates the QEMU command-line for IVSHMEM device for a given metadata file. + * This function is to be called after all the objects were added. + * + * @param buffer + * Buffer to be filled with the command line arguments. + * @param size + * Size of the buffer. + * @param name + * Name of metadata file to generate QEMU command-line parameters for + * + * @return + * - On success, zero + * - On failure, a negative value + */ +int rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, + const char *name); + + +/** + * Dump all metadata entries from a given metadata file to the console. + * + * @name + * Name of the metadata file to be dumped to console. + */ +void rte_ivshmem_metadata_dump(const char *name); + + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_IVSHMEM_H_ */ diff --git a/mk/rte.app.mk b/mk/rte.app.mk index 89b1c3b634..a974dc8bcc 100644 --- a/mk/rte.app.mk +++ b/mk/rte.app.mk @@ -64,6 +64,12 @@ LDLIBS += -lrte_kni endif endif +ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y) +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y) +LDLIBS += -lrte_ivshmem +endif +endif + ifeq ($(CONFIG_RTE_LIBRTE_E1000_PMD),y) LDLIBS += -lrte_pmd_e1000 endif