From: Bruce Richardson Date: Wed, 12 Feb 2014 15:32:25 +0000 (+0000) Subject: xen: core library changes X-Git-Tag: spdx-start~11031 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=148f963fb5323c1c6b6d5cea95084deb25cc73f8;p=dpdk.git xen: core library changes Core support for using the Intel DPDK with Xen Dom0 - including EAL changes and mempool changes. These changes encompass how memory mapping is done, including support for initializing a memory pool inside an already-allocated block of memory. KNI sample app updated to use KNI close function when used with Xen. Signed-off-by: Bruce Richardson --- diff --git a/app/test-pmd/Makefile b/app/test-pmd/Makefile index 69632340a1..9882c6145e 100644 --- a/app/test-pmd/Makefile +++ b/app/test-pmd/Makefile @@ -58,6 +58,12 @@ SRCS-$(CONFIG_RTE_TEST_PMD) += csumonly.c ifeq ($(CONFIG_RTE_LIBRTE_IEEE1588),y) SRCS-$(CONFIG_RTE_TEST_PMD) += ieee1588fwd.c endif +SRCS-$(CONFIG_RTE_TEST_PMD) += mempool_anon.c + +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y) +CFLAGS_mempool_anon.o := -D_GNU_SOURCE +endif +CFLAGS_cmdline.o := -D_GNU_SOURCE # this application needs libraries first DEPDIRS-$(CONFIG_RTE_TEST_PMD) += lib diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c index ae0ea12f84..a7a125b66f 100644 --- a/app/test-pmd/config.c +++ b/app/test-pmd/config.c @@ -974,10 +974,12 @@ pkt_fwd_config_display(struct fwd_config *cfg) streamid_t sm_id; printf("%s packet forwarding - ports=%d - cores=%d - streams=%d - " - "NUMA support %s\n", - cfg->fwd_eng->fwd_mode_name, - cfg->nb_fwd_ports, cfg->nb_fwd_lcores, cfg->nb_fwd_streams, - numa_support == 1 ? "enabled" : "disabled"); + "NUMA support %s, MP over anonymous pages %s\n", + cfg->fwd_eng->fwd_mode_name, + cfg->nb_fwd_ports, cfg->nb_fwd_lcores, cfg->nb_fwd_streams, + numa_support == 1 ? "enabled" : "disabled", + mp_anon != 0 ? "enabled" : "disabled"); + for (lc_id = 0; lc_id < cfg->nb_fwd_lcores; lc_id++) { printf("Logical Core %u (socket %u) forwards packets on " "%d streams:", diff --git a/app/test-pmd/mempool_anon.c b/app/test-pmd/mempool_anon.c new file mode 100644 index 0000000000..f22e4f9ab9 --- /dev/null +++ b/app/test-pmd/mempool_anon.c @@ -0,0 +1,201 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "mempool_osdep.h" +#include + +#ifdef RTE_EXEC_ENV_LINUXAPP + +#include +#include +#include + + +#define PAGEMAP_FNAME "/proc/self/pagemap" + +/* + * the pfn (page frame number) are bits 0-54 (see pagemap.txt in linux + * Documentation). + */ +#define PAGEMAP_PFN_BITS 54 +#define PAGEMAP_PFN_MASK RTE_LEN2MASK(PAGEMAP_PFN_BITS, phys_addr_t) + + +static int +get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num, uint32_t pg_sz) +{ + int32_t fd, rc; + uint32_t i, nb; + off_t ofs; + + ofs = (uintptr_t)va / pg_sz * sizeof(*pa); + nb = pg_num * sizeof(*pa); + + if ((fd = open(PAGEMAP_FNAME, O_RDONLY)) < 0) + return (ENOENT); + + if ((rc = pread(fd, pa, nb, ofs)) < 0 || (rc -= nb) != 0) { + + RTE_LOG(ERR, USER1, "failed read of %u bytes from \'%s\' " + "at offset %zu, error code: %d\n", + nb, PAGEMAP_FNAME, (size_t)ofs, errno); + rc = ENOENT; + } + + close(fd); + + for (i = 0; i != pg_num; i++) + pa[i] = (pa[i] & PAGEMAP_PFN_MASK) * pg_sz; + + return (rc); +} + +struct rte_mempool * +mempool_anon_create(const char *name, unsigned elt_num, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags) +{ + struct rte_mempool *mp; + phys_addr_t *pa; + char *va, *uv; + uint32_t n, pg_num, pg_shift, pg_sz, total_size; + size_t sz; + ssize_t usz; + int32_t rc; + + rc = ENOMEM; + mp = NULL; + + pg_sz = getpagesize(); + if (rte_is_power_of_2(pg_sz) == 0) { + rte_errno = EINVAL; + return (mp); + } + + pg_shift = rte_bsf32(pg_sz); + + total_size = rte_mempool_calc_obj_size(elt_size, flags, NULL); + + /* calc max memory size and max number of pages needed. */ + sz = rte_mempool_xmem_size(elt_num, total_size, pg_shift); + pg_num = sz >> pg_shift; + + /* get chunk of virtually continuos memory.*/ + if ((va = mmap(NULL, sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_LOCKED, + -1, 0)) == MAP_FAILED) { + RTE_LOG(ERR, USER1, "%s(%s) failed mmap of %zu bytes, " + "error code: %d\n", + __func__, name, sz, errno); + rte_errno = rc; + return (mp); + } + + /* extract physical mappings of the allocated memory. */ + if ((pa = calloc(pg_num, sizeof (*pa))) != NULL && + (rc = get_phys_map(va, pa, pg_num, pg_sz)) == 0) { + + /* + * Check that allocated size is big enough to hold elt_num + * objects and a calcualte how many bytes are actually required. + */ + + if ((usz = rte_mempool_xmem_usage(va, elt_num, total_size, pa, + pg_num, pg_shift)) < 0) { + + n = -usz; + rc = ENOENT; + RTE_LOG(ERR, USER1, "%s(%s) only %u objects from %u " + "requested can be created over " + "mmaped region %p of %zu bytes\n", + __func__, name, n, elt_num, va, sz); + } else { + + /* unmap unused pages if any */ + if ((size_t)usz < sz) { + + uv = va + usz; + usz = sz - usz; + + RTE_LOG(INFO, USER1, + "%s(%s): unmap unused %zu of %zu " + "mmaped bytes @%p\n", + __func__, name, (size_t)usz, sz, uv); + munmap(uv, usz); + sz -= usz; + pg_num = sz >> pg_shift; + } + + if ((mp = rte_mempool_xmem_create(name, elt_num, + elt_size, cache_size, private_data_size, + mp_init, mp_init_arg, + obj_init, obj_init_arg, + socket_id, flags, va, pa, pg_num, + pg_shift)) != NULL) + + RTE_VERIFY(elt_num == mp->size); + } + } + + if (mp == NULL) { + munmap(va, sz); + rte_errno = rc; + } + + free(pa); + return (mp); +} + +#else /* RTE_EXEC_ENV_LINUXAPP */ + + +struct rte_mempool * +mempool_anon_create(__rte_unused const char *name, + __rte_unused unsigned elt_num, __rte_unused unsigned elt_size, + __rte_unused unsigned cache_size, + __rte_unused unsigned private_data_size, + __rte_unused rte_mempool_ctor_t *mp_init, + __rte_unused void *mp_init_arg, + __rte_unused rte_mempool_obj_ctor_t *obj_init, + __rte_unused void *obj_init_arg, + __rte_unused int socket_id, __rte_unused unsigned flags) +{ + rte_errno = ENOTSUP; + return (NULL); +} + +#endif /* RTE_EXEC_ENV_LINUXAPP */ diff --git a/app/test-pmd/mempool_osdep.h b/app/test-pmd/mempool_osdep.h new file mode 100644 index 0000000000..9f4a47b112 --- /dev/null +++ b/app/test-pmd/mempool_osdep.h @@ -0,0 +1,54 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MEMPOOL_OSDEP_H_ +#define _MEMPOOL_OSDEP_H_ + +#include + +/** + * @file + * mempool OS specific header. + */ + +/* + * Create mempool over objects from mmap(..., MAP_ANONYMOUS, ...). + */ +struct rte_mempool * +mempool_anon_create(const char *name, unsigned n, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags); + +#endif /*_RTE_MEMPOOL_OSDEP_H_ */ diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c index 454961605a..e5b026c6cc 100644 --- a/app/test-pmd/parameters.c +++ b/app/test-pmd/parameters.c @@ -478,6 +478,7 @@ launch_args_parse(int argc, char** argv) { "coremask", 1, 0, 0 }, { "portmask", 1, 0, 0 }, { "numa", 0, 0, 0 }, + { "mp-anon", 0, 0, 0 }, { "port-numa-config", 1, 0, 0 }, { "ring-numa-config", 1, 0, 0 }, { "socket-num", 1, 0, 0 }, @@ -594,6 +595,9 @@ launch_args_parse(int argc, char** argv) memset(rxring_numa,NUMA_NO_CONFIG,RTE_MAX_ETHPORTS); memset(txring_numa,NUMA_NO_CONFIG,RTE_MAX_ETHPORTS); } + if (!strcmp(lgopts[opt_idx].name, "mp-anon")) { + mp_anon = 1; + } if (!strcmp(lgopts[opt_idx].name, "port-numa-config")) { if (parse_portnuma_config(optarg)) rte_exit(EXIT_FAILURE, diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index a827d367ec..0b376ccc4d 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -72,8 +72,12 @@ #include #include #include +#ifdef RTE_LIBRTE_PMD_XENVIRT +#include +#endif #include "testpmd.h" +#include "mempool_osdep.h" uint16_t verbose_level = 0; /**< Silent by default. */ @@ -95,6 +99,11 @@ uint8_t numa_support = 0; /**< No numa support by default */ */ uint8_t socket_num = UMA_NO_CONFIG; +/* + * Use ANONYMOUS mapped memory (might be not physically continuous) for mbufs. + */ +uint8_t mp_anon = 0; + /* * Record the Ethernet address of peer target ports to which packets are * forwarded. @@ -407,8 +416,7 @@ testpmd_mbuf_pool_ctor(struct rte_mempool *mp, return; } mbp_ctor_arg = (struct mbuf_pool_ctor_arg *) opaque_arg; - mbp_priv = (struct rte_pktmbuf_pool_private *) - ((char *)mp + sizeof(struct rte_mempool)); + mbp_priv = rte_mempool_get_priv(mp); mbp_priv->mbuf_data_room_size = mbp_ctor_arg->seg_buf_size; } @@ -429,15 +437,40 @@ mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf, mb_ctor_arg.seg_buf_size = mbp_ctor_arg.seg_buf_size; mb_size = mb_ctor_arg.seg_buf_offset + mb_ctor_arg.seg_buf_size; mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name)); - rte_mp = rte_mempool_create(pool_name, nb_mbuf, (unsigned) mb_size, + +#ifdef RTE_LIBRTE_PMD_XENVIRT + rte_mp = rte_mempool_gntalloc_create(pool_name, nb_mbuf, mb_size, + (unsigned) mb_mempool_cache, + sizeof(struct rte_pktmbuf_pool_private), + testpmd_mbuf_pool_ctor, &mbp_ctor_arg, + testpmd_mbuf_ctor, &mb_ctor_arg, + socket_id, 0); + + + +#else + if (mp_anon != 0) + rte_mp = mempool_anon_create(pool_name, nb_mbuf, mb_size, + (unsigned) mb_mempool_cache, + sizeof(struct rte_pktmbuf_pool_private), + testpmd_mbuf_pool_ctor, &mbp_ctor_arg, + testpmd_mbuf_ctor, &mb_ctor_arg, + socket_id, 0); + else + rte_mp = rte_mempool_create(pool_name, nb_mbuf, mb_size, (unsigned) mb_mempool_cache, sizeof(struct rte_pktmbuf_pool_private), testpmd_mbuf_pool_ctor, &mbp_ctor_arg, testpmd_mbuf_ctor, &mb_ctor_arg, socket_id, 0); + +#endif + if (rte_mp == NULL) { rte_exit(EXIT_FAILURE, "Creation of mbuf pool for socket %u " "failed\n", socket_id); + } else if (verbose_level > 0) { + rte_mempool_dump(rte_mp); } } @@ -1136,7 +1169,7 @@ all_ports_started(void) return 1; } -void +int start_port(portid_t pid) { int diag, need_check_link_status = 0; @@ -1146,12 +1179,12 @@ start_port(portid_t pid) if (test_done == 0) { printf("Please stop forwarding first\n"); - return; + return -1; } if (init_fwd_streams() < 0) { printf("Fail from init_fwd_streams()\n"); - return; + return -1; } if(dcb_config) @@ -1183,7 +1216,7 @@ start_port(portid_t pid) printf("Fail to configure port %d\n", pi); /* try to reconfigure port next time */ port->need_reconfig = 1; - return; + return -1; } } if (port->need_reconfig_queues > 0) { @@ -1212,7 +1245,7 @@ start_port(portid_t pid) printf("Fail to configure port %d tx queues\n", pi); /* try to reconfigure queues next time */ port->need_reconfig_queues = 1; - return; + return -1; } /* setup rx queues */ for (qi = 0; qi < nb_rxq; qi++) { @@ -1225,7 +1258,7 @@ start_port(portid_t pid) "No mempool allocation" "on the socket %d\n", rxring_numa[pi]); - return; + return -1; } diag = rte_eth_rx_queue_setup(pi, qi, @@ -1251,7 +1284,7 @@ start_port(portid_t pid) printf("Fail to configure port %d rx queues\n", pi); /* try to reconfigure queues next time */ port->need_reconfig_queues = 1; - return; + return -1; } } /* start port */ @@ -1280,6 +1313,7 @@ start_port(portid_t pid) printf("Please stop the ports first\n"); printf("Done\n"); + return 0; } void @@ -1732,7 +1766,8 @@ main(int argc, char** argv) nb_rxq, nb_txq); init_config(); - start_port(RTE_PORT_ALL); + if (start_port(RTE_PORT_ALL) != 0) + rte_exit(EXIT_FAILURE, "Start ports failed\n"); /* set all ports to promiscuous mode by default */ for (port_id = 0; port_id < nb_ports; port_id++) diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index b043793152..834277a611 100644 --- a/app/test-pmd/testpmd.h +++ b/app/test-pmd/testpmd.h @@ -271,6 +271,7 @@ extern uint8_t interactive; extern uint8_t numa_support; /**< set by "--numa" parameter */ extern uint16_t port_topology; /**< set by "--port-topology" parameter */ extern uint8_t no_flush_rx; /** #include #include -#include #include #include #include +#ifdef RTE_LIBRTE_TIMER +#include +#endif #include "test.h" @@ -82,7 +84,11 @@ do_recursive_call(void) { "test_whitelist_flag", no_action }, { "test_invalid_b_flag", no_action }, { "test_invalid_r_flag", no_action }, +#ifdef RTE_LIBRTE_XEN_DOM0 + { "test_dom0_misc_flags", no_action }, +#else { "test_misc_flags", no_action }, +#endif { "test_memory_flags", no_action }, { "test_file_prefix", no_action }, { "test_no_huge_flag", no_action }, @@ -110,7 +116,9 @@ main(int argc, char **argv) if (ret < 0) return -1; +#ifdef RTE_LIBRTE_TIMER rte_timer_subsystem_init(); +#endif argv += ret; diff --git a/app/test/test_eal_flags.c b/app/test/test_eal_flags.c index 2d09385384..0f373ef4b1 100644 --- a/app/test/test_eal_flags.c +++ b/app/test/test_eal_flags.c @@ -644,6 +644,72 @@ test_no_huge_flag(void) return 0; } +#ifdef RTE_LIBRTE_XEN_DOM0 +static int +test_dom0_misc_flags(void) +{ + char prefix[PATH_MAX], tmp[PATH_MAX]; + + if (get_current_prefix(tmp, sizeof(tmp)) == NULL) { + printf("Error - unable to get current prefix!\n"); + return -1; + } + rte_snprintf(prefix, sizeof(prefix), "--file-prefix=%s", tmp); + + /* check that some general flags don't prevent things from working. + * All cases, apart from the first, app should run. + * No futher testing of output done. + */ + /* sanity check - failure with invalid option */ + const char *argv0[] = {prgname, prefix, mp_flag, "-c", "1", "--invalid-opt"}; + + /* With --no-pci */ + const char *argv1[] = {prgname, prefix, mp_flag, "-c", "1", "--no-pci"}; + /* With -v */ + const char *argv2[] = {prgname, prefix, mp_flag, "-c", "1", "-v"}; + /* With valid --syslog */ + const char *argv3[] = {prgname, prefix, mp_flag, "-c", "1", + "--syslog", "syslog"}; + /* With empty --syslog (should fail) */ + const char *argv4[] = {prgname, prefix, mp_flag, "-c", "1", "--syslog"}; + /* With invalid --syslog */ + const char *argv5[] = {prgname, prefix, mp_flag, "-c", "1", "--syslog", "error"}; + /* With no-sh-conf */ + const char *argv6[] = {prgname, "-c", "1", "-n", "2", "-m", "20", + "--no-shconf", "--file-prefix=noshconf" }; + + if (launch_proc(argv0) == 0) { + printf("Error - process ran ok with invalid flag\n"); + return -1; + } + if (launch_proc(argv1) != 0) { + printf("Error - process did not run ok with --no-pci flag\n"); + return -1; + } + if (launch_proc(argv2) != 0) { + printf("Error - process did not run ok with -v flag\n"); + return -1; + } + if (launch_proc(argv3) != 0) { + printf("Error - process did not run ok with --syslog flag\n"); + return -1; + } + if (launch_proc(argv4) == 0) { + printf("Error - process run ok with empty --syslog flag\n"); + return -1; + } + if (launch_proc(argv5) == 0) { + printf("Error - process run ok with invalid --syslog flag\n"); + return -1; + } + if (launch_proc(argv6) != 0) { + printf("Error - process did not run ok with --no-shconf flag\n"); + return -1; + } + + return 0; +} +#else static int test_misc_flags(void) { @@ -736,6 +802,10 @@ test_misc_flags(void) * effect on secondary processes) */ const char *argv10[] = {prgname, prefix, mp_flag, "-c", "1", "--huge-dir", "invalid"}; + /* try running with base-virtaddr param */ + const char *argv11[] = {prgname, "--file-prefix=virtaddr", + "-c", "1", "-n", "2", "--base-virtaddr=0x12345678"}; + if (launch_proc(argv0) == 0) { printf("Error - process ran ok with invalid flag\n"); @@ -784,8 +854,13 @@ test_misc_flags(void) printf("Error - secondary process did not run ok with invalid --huge-dir flag\n"); return -1; } + if (launch_proc(argv11) != 0) { + printf("Error - process did not run ok with --base-virtaddr parameter\n"); + return -1; + } return 0; } +#endif static int test_file_prefix(void) @@ -822,6 +897,9 @@ test_file_prefix(void) printf("Error - unable to get current prefix!\n"); return -1; } +#ifdef RTE_LIBRTE_XEN_DOM0 + return 0; +#endif /* check if files for current prefix are present */ if (process_hugefiles(prefix, HUGEPAGE_CHECK_EXISTS) != 1) { @@ -905,6 +983,7 @@ test_file_prefix(void) static int test_memory_flags(void) { + const char* mem_size = NULL; #ifdef RTE_EXEC_ENV_BSDAPP /* BSD target doesn't support prefixes at this point */ const char * prefix = ""; @@ -916,13 +995,20 @@ test_memory_flags(void) } rte_snprintf(prefix, sizeof(prefix), "--file-prefix=%s", tmp); #endif - /* valid -m flag */ - const char *argv0[] = {prgname, "-c", "10", "-n", "2", - "--file-prefix=" memtest, "-m", "2"}; +#ifdef RTE_LIBRTE_XEN_DOM0 + mem_size = "30"; +#else + mem_size = "2"; +#endif + /* valid -m flag and mp flag */ - const char *argv1[] = {prgname, prefix, mp_flag, "-c", "10", - "-n", "2", "-m", "2"}; + const char *argv0[] = {prgname, prefix, mp_flag, "-c", "10", + "-n", "2", "-m", mem_size}; + + /* valid -m flag */ + const char *argv1[] = {prgname, "-c", "10", "-n", "2", + "--file-prefix=" memtest, "-m", mem_size}; /* invalid (zero) --socket-mem flag */ const char *argv2[] = {prgname, "-c", "10", "-n", "2", @@ -1016,10 +1102,12 @@ test_memory_flags(void) #endif if (launch_proc(argv1) != 0) { - printf("Error - secondary process failed with valid -m flag !\n"); + printf("Error - process failed with valid -m flag!\n"); return -1; } - +#ifdef RTE_LIBRTE_XEN_DOM0 + return 0; +#endif if (launch_proc(argv2) == 0) { printf("Error - process run ok with invalid (zero) --socket-mem!\n"); return -1; @@ -1132,7 +1220,11 @@ test_eal_flags(void) return ret; } +#ifdef RTE_LIBRTE_XEN_DOM0 + ret = test_dom0_misc_flags(); +#else ret = test_misc_flags(); +#endif if (ret < 0) { printf("Error in test_misc_flags()"); return ret; diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c index 1195804e19..b948e30493 100644 --- a/app/test/test_mempool.c +++ b/app/test/test_mempool.c @@ -126,12 +126,14 @@ test_mempool_basic(void) printf("get private data\n"); if (rte_mempool_get_priv(mp) != - (char*) mp + sizeof(struct rte_mempool)) + (char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num)) return -1; printf("get physical address of an object\n"); - if (rte_mempool_virt2phy(mp, obj) != - (phys_addr_t) (mp->phys_addr + (phys_addr_t) ((char*) obj - (char*) mp))) + if (MEMPOOL_IS_CONTIG(mp) && + rte_mempool_virt2phy(mp, obj) != + (phys_addr_t) (mp->phys_addr + + (phys_addr_t) ((char*) obj - (char*) mp))) return -1; printf("put the object back\n"); @@ -428,6 +430,33 @@ test_mempool_same_name_twice_creation(void) return 0; } +/* + * BAsic test for mempool_xmem functions. + */ +static int +test_mempool_xmem_misc(void) +{ + uint32_t elt_num, total_size; + size_t sz; + ssize_t usz; + + elt_num = MAX_KEEP; + total_size = rte_mempool_calc_obj_size(MEMPOOL_ELT_SIZE, 0, NULL); + sz = rte_mempool_xmem_size(elt_num, total_size, MEMPOOL_PG_SHIFT_MAX); + + usz = rte_mempool_xmem_usage(NULL, elt_num, total_size, 0, 1, + MEMPOOL_PG_SHIFT_MAX); + + if(sz != (size_t)usz) { + printf("failure @ %s: rte_mempool_xmem_usage(%u, %u) " + "returns: %#zx, while expected: %#zx;\n", + __func__, elt_num, total_size, sz, (size_t)usz); + return (-1); + } + + return (0); +} + int test_mempool(void) { @@ -487,6 +516,9 @@ test_mempool(void) if (test_mempool_same_name_twice_creation() < 0) return -1; + if (test_mempool_xmem_misc() < 0) + return -1; + rte_mempool_list_dump(); return 0; diff --git a/config/defconfig_i686-default-linuxapp-gcc b/config/defconfig_i686-default-linuxapp-gcc index aa497032cf..62be4f8b00 100644 --- a/config/defconfig_i686-default-linuxapp-gcc +++ b/config/defconfig_i686-default-linuxapp-gcc @@ -293,6 +293,11 @@ CONFIG_RTE_KNI_VHOST_VNET_HDR_EN=n CONFIG_RTE_KNI_VHOST_DEBUG_RX=n CONFIG_RTE_KNI_VHOST_DEBUG_TX=n +# +#Compile Xen domain0 support +# +CONFIG_RTE_LIBRTE_XEN_DOM0=n + # # Enable warning directives # diff --git a/config/defconfig_i686-default-linuxapp-icc b/config/defconfig_i686-default-linuxapp-icc index 2e267546b7..7a1e787bfd 100644 --- a/config/defconfig_i686-default-linuxapp-icc +++ b/config/defconfig_i686-default-linuxapp-icc @@ -292,6 +292,11 @@ CONFIG_RTE_KNI_VHOST_VNET_HDR_EN=n CONFIG_RTE_KNI_VHOST_DEBUG_RX=n CONFIG_RTE_KNI_VHOST_DEBUG_TX=n +# +#Compile Xen domain0 support +# +CONFIG_RTE_LIBRTE_XEN_DOM0=n + # # Enable warning directives # diff --git a/config/defconfig_x86_64-default-linuxapp-gcc b/config/defconfig_x86_64-default-linuxapp-gcc index 9809ea88b8..d09e25d320 100644 --- a/config/defconfig_x86_64-default-linuxapp-gcc +++ b/config/defconfig_x86_64-default-linuxapp-gcc @@ -195,6 +195,9 @@ CONFIG_RTE_PMD_RING_MAX_TX_RINGS=16 # CONFIG_RTE_LIBRTE_PMD_PCAP=n + +CONFIG_RTE_LIBRTE_PMD_XENVIRT=n + # # Do prefetch of packet data within PMD driver receive function # @@ -293,6 +296,11 @@ CONFIG_RTE_KNI_VHOST_VNET_HDR_EN=n CONFIG_RTE_KNI_VHOST_DEBUG_RX=n CONFIG_RTE_KNI_VHOST_DEBUG_TX=n +# +#Compile Xen domain0 support +# +CONFIG_RTE_LIBRTE_XEN_DOM0=n + # # Enable warning directives # diff --git a/config/defconfig_x86_64-default-linuxapp-icc b/config/defconfig_x86_64-default-linuxapp-icc index b182f9e2c2..3cf72d9105 100644 --- a/config/defconfig_x86_64-default-linuxapp-icc +++ b/config/defconfig_x86_64-default-linuxapp-icc @@ -292,6 +292,11 @@ CONFIG_RTE_KNI_VHOST_VNET_HDR_EN=n CONFIG_RTE_KNI_VHOST_DEBUG_RX=n CONFIG_RTE_KNI_VHOST_DEBUG_TX=n +# +#Compile Xen domain0 support +# +CONFIG_RTE_LIBRTE_XEN_DOM0=n + # # Enable warning directives # diff --git a/examples/kni/main.c b/examples/kni/main.c index 2975c256d0..af59758ee2 100644 --- a/examples/kni/main.c +++ b/examples/kni/main.c @@ -939,6 +939,9 @@ main(int argc, char** argv) continue; kni_free_kni(port); } +#ifdef RTE_LIBRTE_XEN_DOM0 + rte_kni_close(); +#endif for (i = 0; i < RTE_MAX_ETHPORTS; i++) if (kni_port_params_array[i]) { rte_free(kni_port_params_array[i]); diff --git a/lib/librte_eal/common/include/rte_debug.h b/lib/librte_eal/common/include/rte_debug.h index 675378e138..e234e2ffe6 100644 --- a/lib/librte_eal/common/include/rte_debug.h +++ b/lib/librte_eal/common/include/rte_debug.h @@ -78,6 +78,11 @@ void rte_dump_registers(void); #define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__) #define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy") +#define RTE_VERIFY(exp) do { \ + if (!(exp)) \ + rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \ +} while (0) + /* * Provide notification of a critical non-recoverable error and stop. * diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index 4611dcd722..4ae3bf75a7 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -43,6 +43,10 @@ #include #include +#ifdef RTE_EXEC_ENV_LINUXAPP +#include +#endif + #ifdef __cplusplus extern "C" { #endif @@ -87,6 +91,10 @@ struct rte_memseg { int32_t socket_id; /**< NUMA socket ID. */ uint32_t nchannel; /**< Number of channels. */ uint32_t nrank; /**< Number of ranks. */ +#ifdef RTE_LIBRTE_XEN_DOM0 + /**< store segment MFNs */ + uint64_t mfn[DOM0_NUM_MEMBLOCK]; +#endif } __attribute__((__packed__)); @@ -138,6 +146,42 @@ unsigned rte_memory_get_nchannel(void); */ unsigned rte_memory_get_nrank(void); +#ifdef RTE_LIBRTE_XEN_DOM0 +/** + * Return the physical address of elt, which is an element of the pool mp. + * + * @param memseg_id + * The mempool is from which memory segment. + * @param phy_addr + * physical address of elt. + * + * @return + * The physical address or error. + */ +phys_addr_t rte_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr); + +/** + * Memory init for supporting application running on Xen domain0. + * + * @param void + * + * @return + * 0: successfully + * negative: error + */ +int rte_xen_dom0_memory_init(void); + +/** + * Attach to memory setments of primary process on Xen domain0. + * + * @param void + * + * @return + * 0: successfully + * negative: error + */ +int rte_xen_dom0_memory_attach(void); +#endif #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile index a2957b6383..b00e89fb2f 100644 --- a/lib/librte_eal/linuxapp/Makefile +++ b/lib/librte_eal/linuxapp/Makefile @@ -36,5 +36,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal ifeq ($(CONFIG_RTE_LIBRTE_KNI),y) DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += kni endif +ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y) +DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += xen_dom0 +endif include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 2667145930..b2124738b4 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -50,6 +50,9 @@ CFLAGS += $(WERROR_FLAGS) -O3 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) := eal.c SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_hugepage_info.c SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_memory.c +ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y) +SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_xen_memory.c +endif SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_thread.c SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_log.c SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci.c @@ -90,7 +93,7 @@ CFLAGS_eal_thread.o += -Wno-return-type CFLAGS_eal_hpet.o += -Wno-return-type endif -INC := rte_per_lcore.h rte_lcore.h rte_interrupts.h rte_kni_common.h +INC := rte_per_lcore.h rte_lcore.h rte_interrupts.h rte_kni_common.h rte_dom0_common.h SYMLINK-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP)-include/exec-env := \ $(addprefix include/exec-env/,$(INC)) diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 1ddfb655da..dde9126e6a 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -48,8 +48,6 @@ #include #include #include -#include -#include #include #include @@ -92,6 +90,7 @@ #define OPT_USE_DEVICE "use-device" #define OPT_SYSLOG "syslog" #define OPT_BASE_VIRTADDR "base-virtaddr" +#define OPT_XEN_DOM0 "xen-dom0" #define RTE_EAL_BLACKLIST_SIZE 0x100 @@ -335,6 +334,8 @@ eal_usage(const char *prgname) " (multiple -b options are allowed)\n" " -m MB : memory to allocate (see also --"OPT_SOCKET_MEM")\n" " -r NUM : force number of memory ranks (don't detect)\n" + " --"OPT_XEN_DOM0" : support application running on Xen Domain0 " + "without hugetlbfs\n" " --"OPT_SYSLOG" : set syslog facility\n" " --"OPT_SOCKET_MEM" : memory to allocate on specific \n" " sockets (use comma separated values)\n" @@ -409,7 +410,7 @@ eal_parse_coremask(const char *coremask) if (coremask[0] == '0' && ((coremask[1] == 'x') || (coremask[1] == 'X')) ) coremask += 2; - i = strnlen(coremask, MAX_ARG_STRLEN); + i = strnlen(coremask, PATH_MAX); while ((i > 0) && isblank(coremask[i - 1])) i--; if (i == 0) @@ -627,6 +628,7 @@ eal_parse_args(int argc, char **argv) {OPT_USE_DEVICE, 1, 0, 0}, {OPT_SYSLOG, 1, NULL, 0}, {OPT_BASE_VIRTADDR, 1, 0, 0}, + {OPT_XEN_DOM0, 0, 0, 0}, {0, 0, 0, 0} }; @@ -639,6 +641,7 @@ eal_parse_args(int argc, char **argv) internal_config.hugepage_dir = NULL; internal_config.force_sockets = 0; internal_config.syslog_facility = LOG_DAEMON; + internal_config.xen_dom0_support = 0; #ifdef RTE_LIBEAL_USE_HPET internal_config.no_hpet = 0; #else @@ -714,6 +717,16 @@ eal_parse_args(int argc, char **argv) if (!strcmp(lgopts[option_index].name, OPT_NO_HUGE)) { internal_config.no_hugetlbfs = 1; } + if (!strcmp(lgopts[option_index].name, OPT_XEN_DOM0)) { + #ifdef RTE_LIBRTE_XEN_DOM0 + internal_config.xen_dom0_support = 1; + #else + RTE_LOG(ERR, EAL, "Can't support DPDK app " + "running on Dom0, please configure" + " RTE_LIBRTE_XEN_DOM0=y\n"); + return -1; + #endif + } else if (!strcmp(lgopts[option_index].name, OPT_NO_PCI)) { internal_config.no_pci = 1; } @@ -810,7 +823,13 @@ eal_parse_args(int argc, char **argv) eal_usage(prgname); return -1; } - + /* --xen-dom0 doesn't make sense with --socket-mem */ + if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) { + RTE_LOG(ERR, EAL, "Options --socket-mem cannot be specified " + "together with --xen_dom0!\n"); + eal_usage(prgname); + return -1; + } /* if no blacklist, parse a whitelist */ if (blacklist_index > 0) { if (eal_dev_whitelist_exists()) { @@ -904,6 +923,7 @@ rte_eal_init(int argc, char **argv) if (internal_config.no_hugetlbfs == 0 && internal_config.process_type != RTE_PROC_SECONDARY && + internal_config.xen_dom0_support == 0 && eal_hugepage_info_init() < 0) rte_panic("Cannot get hugepage information\n"); diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 6b78d8941d..fe317463e2 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -984,6 +984,17 @@ rte_eal_hugepage_init(void) return 0; } +/* check if app runs on Xen Dom0 */ + if (internal_config.xen_dom0_support) { +#ifdef RTE_LIBRTE_XEN_DOM0 + /* use dom0_mm kernel driver to init memory */ + if (rte_xen_dom0_memory_init() < 0) + return -1; + else + return 0; +#endif + } + /* calculate total number of hugepages available. at this point we haven't * yet started sorting them so they all are on socket 0 */ @@ -1271,6 +1282,17 @@ rte_eal_hugepage_attach(void) "into secondary processes\n"); } + if (internal_config.xen_dom0_support) { +#ifdef RTE_LIBRTE_XEN_DOM0 + if (rte_xen_dom0_memory_attach() < 0) { + RTE_LOG(ERR, EAL,"Failed to attach memory setments of primay " + "process\n"); + return -1; + } + return 0; +#endif + } + fd_zero = open("/dev/zero", O_RDONLY); if (fd_zero < 0) { RTE_LOG(ERR, EAL, "Could not open /dev/zero\n"); diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index d0a729be5d..af9415dd15 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -493,14 +493,14 @@ pci_uio_map_resource(struct rte_pci_device *dev) * or uio:uioX */ rte_snprintf(dirname, sizeof(dirname), - "/sys/bus/pci/devices/" PCI_PRI_FMT "/uio", + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio", loc->domain, loc->bus, loc->devid, loc->function); dir = opendir(dirname); if (dir == NULL) { /* retry with the parent directory */ rte_snprintf(dirname, sizeof(dirname), - "/sys/bus/pci/devices/" PCI_PRI_FMT, + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT, loc->domain, loc->bus, loc->devid, loc->function); dir = opendir(dirname); diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c new file mode 100644 index 0000000000..2f52258f35 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c @@ -0,0 +1,370 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_internal_cfg.h" +#include "eal_filesystem.h" +#include + +#define PAGE_SIZE RTE_PGSIZE_4K +#define DEFAUL_DOM0_NAME "dom0-mem" + +static int xen_fd = -1; +static const char sys_dir_path[] = "/sys/kernel/mm/dom0-mm/memsize-mB"; + +/* + * Try to mmap *size bytes in /dev/zero. If it is succesful, return the + * pointer to the mmap'd area and keep *size unmodified. Else, retry + * with a smaller zone: decrease *size by mem_size until it reaches + * 0. In this case, return NULL. Note: this function returns an address + * which is a multiple of mem_size size. + */ +static void * +xen_get_virtual_area(size_t *size, size_t mem_size) +{ + void *addr; + int fd; + long aligned_addr; + + RTE_LOG(INFO, EAL, "Ask a virtual area of 0x%zu bytes\n", *size); + + fd = open("/dev/zero", O_RDONLY); + if (fd < 0){ + RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n"); + return NULL; + } + do { + addr = mmap(NULL, (*size) + mem_size, PROT_READ, + MAP_PRIVATE, fd, 0); + if (addr == MAP_FAILED) + *size -= mem_size; + } while (addr == MAP_FAILED && *size > 0); + + if (addr == MAP_FAILED) { + close(fd); + RTE_LOG(INFO, EAL, "Cannot get a virtual area\n"); + return NULL; + } + + munmap(addr, (*size) + mem_size); + close(fd); + + /* align addr to a mem_size boundary */ + aligned_addr = (uintptr_t)addr; + aligned_addr = RTE_ALIGN_CEIL(aligned_addr, mem_size); + addr = (void *)(aligned_addr); + + RTE_LOG(INFO, EAL, "Virtual area found at %p (size = 0x%zx)\n", + addr, *size); + + return addr; +} + +/** + * Get memory size configuration from /sys/devices/virtual/misc/dom0_mm + * /memsize-mB/memsize file, and the size unit is mB. + */ +static int +get_xen_memory_size(void) +{ + char path[PATH_MAX]; + unsigned long mem_size = 0; + static const char *file_name; + + file_name = "memsize"; + rte_snprintf(path, sizeof(path), "%s/%s", + sys_dir_path, file_name); + + if (eal_parse_sysfs_value(path, &mem_size) < 0) + return -1; + + if (mem_size == 0) + rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s was not" + " configured.\n",sys_dir_path, file_name); + if (mem_size % 2) + rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s must be" + " even number.\n",sys_dir_path, file_name); + + if (mem_size > DOM0_CONFIG_MEMSIZE) + rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s should not be larger" + " than %d mB\n",sys_dir_path, file_name, DOM0_CONFIG_MEMSIZE); + + return mem_size; +} + +/** + * Based on physical address to caculate MFN in Xen Dom0. + */ +phys_addr_t +rte_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr) +{ + int mfn_id; + uint64_t mfn, mfn_offset; + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg *memseg = mcfg->memseg; + + mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M; + + /*the MFN is contiguous in 2M */ + mfn_offset = (phy_addr - memseg[memseg_id].phys_addr) % + RTE_PGSIZE_2M / PAGE_SIZE; + mfn = mfn_offset + memseg[memseg_id].mfn[mfn_id]; + + /** return mechine address */ + return (mfn * PAGE_SIZE + phy_addr % PAGE_SIZE); +} + +int +rte_xen_dom0_memory_init(void) +{ + void *vir_addr, *vma_addr = NULL; + int err, ret = 0; + uint32_t i, requested, mem_size, memseg_idx, num_memseg = 0; + size_t vma_len = 0; + struct memory_info meminfo; + struct memseg_info seginfo[RTE_MAX_MEMSEG]; + int flags, page_size = getpagesize(); + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg *memseg = mcfg->memseg; + uint64_t total_mem = internal_config.memory; + + memset(seginfo, 0, sizeof(seginfo)); + memset(&meminfo, 0, sizeof(struct memory_info)); + + mem_size = get_xen_memory_size(); + requested = (unsigned) (total_mem / 0x100000); + if (requested > mem_size) + /* if we didn't satisfy total memory requirements */ + rte_exit(EXIT_FAILURE,"Not enough memory available! Requested: %uMB," + " available: %uMB\n", requested, mem_size); + else if (total_mem != 0) + mem_size = requested; + + /* Check FD and open once */ + if (xen_fd < 0) { + xen_fd = open(DOM0_MM_DEV, O_RDWR); + if (xen_fd < 0) { + RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV); + return -1; + } + } + + meminfo.size = mem_size; + + /* construct memory mangement name for Dom0 */ + rte_snprintf(meminfo.name, DOM0_NAME_MAX, "%s-%s", + internal_config.hugefile_prefix, DEFAUL_DOM0_NAME); + + /* Notify kernel driver to allocate memory */ + ret = ioctl(xen_fd, RTE_DOM0_IOCTL_PREPARE_MEMSEG, &meminfo); + if (ret < 0) { + RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memory\n"); + err = -EIO; + goto fail; + } + + /* Get number of memory segment from driver */ + ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_NUM_MEMSEG, &num_memseg); + if (ret < 0) { + RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg count.\n"); + err = -EIO; + goto fail; + } + + if(num_memseg > RTE_MAX_MEMSEG){ + RTE_LOG(ERR, EAL, "XEN DOM0: the memseg count %d is greater" + " than max memseg %d.\n",num_memseg, RTE_MAX_MEMSEG); + err = -EIO; + goto fail; + } + + /* get all memory segements information */ + ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_MEMSEG_INFO, seginfo); + if (ret < 0) { + RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg info.\n"); + err = -EIO; + goto fail; + } + + /* map all memory segments to contiguous user space */ + for (memseg_idx = 0; memseg_idx < num_memseg; memseg_idx++) + { + vma_len = seginfo[memseg_idx].size; + + /** + * get the biggest virtual memory area up to vma_len. If it fails, + * vma_addr is NULL, so let the kernel provide the address. + */ + vma_addr = xen_get_virtual_area(&vma_len, RTE_PGSIZE_2M); + if (vma_addr == NULL) { + flags = MAP_SHARED; + vma_len = RTE_PGSIZE_2M; + } else + flags = MAP_SHARED | MAP_FIXED; + + seginfo[memseg_idx].size = vma_len; + vir_addr = mmap(vma_addr, seginfo[memseg_idx].size, + PROT_READ|PROT_WRITE, flags, xen_fd, + memseg_idx * page_size); + if (vir_addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "XEN DOM0:Could not mmap %s\n", + DOM0_MM_DEV); + err = -EIO; + goto fail; + } + + memseg[memseg_idx].addr = vir_addr; + memseg[memseg_idx].phys_addr = page_size * + seginfo[memseg_idx].pfn ; + memseg[memseg_idx].len = seginfo[memseg_idx].size; + for ( i = 0; i < seginfo[memseg_idx].size / RTE_PGSIZE_2M; i++) + memseg[memseg_idx].mfn[i] = seginfo[memseg_idx].mfn[i]; + + /* MFNs are continuous in 2M, so assume that page size is 2M */ + memseg[memseg_idx].hugepage_sz = RTE_PGSIZE_2M; + + memseg[memseg_idx].nchannel = mcfg->nchannel; + memseg[memseg_idx].nrank = mcfg->nrank; + + /* NUMA is not suppoted in Xen Dom0, so only set socket 0*/ + memseg[memseg_idx].socket_id = 0; + } + + return 0; +fail: + if (xen_fd > 0) { + close(xen_fd); + xen_fd = -1; + } + return err; +} + +/* + * This creates the memory mappings in the secondary process to match that of + * the server process. It goes through each memory segment in the DPDK runtime + * configuration, mapping them in order to form a contiguous block in the + * virtual memory space + */ +int +rte_xen_dom0_memory_attach(void) +{ + const struct rte_mem_config *mcfg; + unsigned s = 0; /* s used to track the segment number */ + int xen_fd = -1; + int ret = -1; + void *vir_addr; + char name[DOM0_NAME_MAX] = {0}; + int page_size = getpagesize(); + + mcfg = rte_eal_get_configuration()->mem_config; + + /* Check FD and open once */ + if (xen_fd < 0) { + xen_fd = open(DOM0_MM_DEV, O_RDWR); + if (xen_fd < 0) { + RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV); + goto error; + } + } + + /* construct memory mangement name for Dom0 */ + rte_snprintf(name, DOM0_NAME_MAX, "%s-%s", + internal_config.hugefile_prefix, DEFAUL_DOM0_NAME); + /* attach to memory segments of primary process */ + ret = ioctl(xen_fd, RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG, name); + if (ret) { + RTE_LOG(ERR, EAL,"attach memory segments fail.\n"); + goto error; + } + + /* map all segments into memory to make sure we get the addrs */ + for (s = 0; s < RTE_MAX_MEMSEG; ++s) { + + /* + * the first memory segment with len==0 is the one that + * follows the last valid segment. + */ + if (mcfg->memseg[s].len == 0) + break; + + vir_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len, + PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, xen_fd, + s * page_size); + if (vir_addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " + "in %s to requested address [%p]\n", + (unsigned long long)mcfg->memseg[s].len, DOM0_MM_DEV, + mcfg->memseg[s].addr); + goto error; + } + } + return 0; + +error: + if (xen_fd >= 0) { + close(xen_fd); + xen_fd = -1; + } + return -1; +} diff --git a/lib/librte_eal/linuxapp/eal/include/eal_internal_cfg.h b/lib/librte_eal/linuxapp/eal/include/eal_internal_cfg.h index 0a2eddda5e..643db7cfcc 100644 --- a/lib/librte_eal/linuxapp/eal/include/eal_internal_cfg.h +++ b/lib/librte_eal/linuxapp/eal/include/eal_internal_cfg.h @@ -63,6 +63,7 @@ struct internal_config { volatile unsigned force_nchannel; /**< force number of channels */ volatile unsigned force_nrank; /**< force number of ranks */ volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */ + volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/ volatile unsigned no_pci; /**< true to disable PCI */ volatile unsigned no_hpet; /**< true to disable HPET */ volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h new file mode 100644 index 0000000000..ea05d58492 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h @@ -0,0 +1,107 @@ +/*- + * This file is provided under a dual BSD/LGPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GNU LESSER GENERAL PUBLIC LICENSE + * + * Copyright(c) 2007-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * Contact Information: + * Intel Corporation + * + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _RTE_DOM0_COMMON_H_ +#define _RTE_DOM0_COMMON_H_ + +#ifdef __KERNEL__ +#include +#endif + +#define DOM0_NAME_MAX 256 +#define DOM0_MM_DEV "/dev/dom0_mm" + +#define DOM0_CONTIG_NUM_ORDER 9 /**< 2M order */ +#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */ +#define DOM0_MEMBLOCK_SIZE 0x200000 /**< Maximum nb. of memory block(2M). */ +#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */ +#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */ + +#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info) +#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *) +#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int) +#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *) + +/** + * A structure used to store memory information. + */ +struct memory_info { + char name[DOM0_NAME_MAX]; + uint64_t size; +}; + +/** + * A structure used to store memory segment information. + */ +struct memseg_info { + uint32_t idx; + uint64_t pfn; + uint64_t size; + uint64_t mfn[DOM0_NUM_MEMBLOCK]; +}; + +/** + * A structure used to store memory block information. + */ +struct memblock_info { + uint8_t exchange_flag; + uint64_t vir_addr; + uint64_t pfn; + uint64_t mfn; +}; +#endif /* _RTE_DOM0_COMMON_H_ */ diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c index 61c004b836..24f93a5279 100644 --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c @@ -30,6 +30,10 @@ #include #include +#ifdef CONFIG_XEN_DOM0 +#include +#endif + /** * MSI-X related macros, copy from linux/pci_regs.h in kernel 2.6.39, * but none of them in kernel 2.6.35. @@ -312,6 +316,48 @@ spin_unlock: return ret; } +#ifdef CONFIG_XEN_DOM0 +static int +igbuio_dom0_mmap_phys(struct uio_info *info, struct vm_area_struct *vma) +{ + int idx; + idx = (int)vma->vm_pgoff; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_page_prot.pgprot |= _PAGE_IOMAP; + + return remap_pfn_range(vma, + vma->vm_start, + info->mem[idx].addr >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} + +/** + * This is uio device mmap method which will use igbuio mmap for Xen + * Dom0 enviroment. + */ +static int +igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma) +{ + int idx; + + if (vma->vm_pgoff >= MAX_UIO_MAPS) + return -EINVAL; + if(info->mem[vma->vm_pgoff].size == 0) + return -EINVAL; + + idx = (int)vma->vm_pgoff; + switch (info->mem[idx].memtype) { + case UIO_MEM_PHYS: + return igbuio_dom0_mmap_phys(info, vma); + case UIO_MEM_LOGICAL: + case UIO_MEM_VIRTUAL: + default: + return -EINVAL; + } +} +#endif + /* Remap pci resources described by bar #pci_bar in uio resource n. */ static int igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info, @@ -462,6 +508,11 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) udev->info.version = "0.1"; udev->info.handler = igbuio_pci_irqhandler; udev->info.irqcontrol = igbuio_pci_irqcontrol; +#ifdef CONFIG_XEN_DOM0 + /* check if the driver run on Xen Dom0 */ + if (xen_initial_domain()) + udev->info.mmap = igbuio_dom0_pci_mmap; +#endif udev->info.priv = udev; udev->pdev = dev; udev->mode = 0; /* set the default value for interrupt mode */ diff --git a/lib/librte_eal/linuxapp/xen_dom0/Makefile b/lib/librte_eal/linuxapp/xen_dom0/Makefile new file mode 100644 index 0000000000..42f5478140 --- /dev/null +++ b/lib/librte_eal/linuxapp/xen_dom0/Makefile @@ -0,0 +1,56 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# module name and path +# +MODULE = rte_dom0_mm + +# +# CFLAGS +# +MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50 +MODULE_CFLAGS += -I$(RTE_OUTPUT)/include +MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h +MODULE_CFLAGS += -Wall -Werror + +# this lib needs main eal +DEPDIRS-y += lib/librte_eal/linuxapp/eal + +# +# all source are stored in SRCS-y +# + +SRCS-y += dom0_mm_misc.c + +include $(RTE_SDK)/mk/rte.module.mk diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h new file mode 100644 index 0000000000..50995c248f --- /dev/null +++ b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h @@ -0,0 +1,99 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#ifndef _DOM0_MM_DEV_H_ +#define _DOM0_MM_DEV_H_ + +#include +#include +#include +#include +#include + +#define NUM_MEM_CTX 256 /**< Maximum number of memory context*/ +#define MAX_EXCHANGE_FAIL_TIME 5 /**< Maximum times of allowing exchange fail .*/ + +/** + * A structure describing the private information for a dom0 device. + */ +struct dom0_mm_dev { + struct miscdevice miscdev; + uint32_t allocated_memsize; + uint32_t num_mem_ctx; + uint32_t config_memsize; + struct dom0_mm_data *mm_data[NUM_MEM_CTX]; + struct mutex data_lock; +}; + +struct dom0_mm_data{ + uint8_t fail_times; + uint32_t refcnt; + uint32_t num_memseg; /**< Number of memory segment. */ + uint32_t mem_size; /**< Size of requesting memory. */ + char name[DOM0_NAME_MAX]; + + /** Storing memory block information.*/ + struct memblock_info block_info[DOM0_NUM_MEMBLOCK]; + + /** Storing memory segment information.*/ + struct memseg_info seg_info[DOM0_NUM_MEMSEG]; +}; + +#define XEN_ERR(args...) printk(KERN_DEBUG "XEN_DOM0: Error: " args) +#define XEN_PRINT(args...) printk(KERN_DEBUG "XEN_DOM0: " args) +#endif diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c new file mode 100644 index 0000000000..87fa3e63d6 --- /dev/null +++ b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c @@ -0,0 +1,620 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include "dom0_mm_dev.h" + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Kernel Module for supporting DPDK running on Xen Dom0"); + +static struct dom0_mm_dev dom0_dev; +static struct kobject *dom0_kobj = NULL; + +static int dom0_open(struct inode *inode, struct file *file); +static int dom0_release(struct inode *inode, struct file *file); +static int dom0_ioctl(struct file *file, unsigned int ioctl_num, + unsigned long ioctl_param); +static int dom0_mmap(struct file *file, struct vm_area_struct *vma); +static int dom0_memory_free(struct dom0_mm_data *mm_data); + +static const struct file_operations data_fops = { + .owner = THIS_MODULE, + .open = dom0_open, + .release = dom0_release, + .mmap = dom0_mmap, + .unlocked_ioctl = (void *)dom0_ioctl, +}; + +static ssize_t +show_memsize_rsvd(struct device *dev, struct device_attribute *attr, char *buf) +{ + return snprintf(buf, 10, "%u\n", dom0_dev.allocated_memsize); +} + +static ssize_t +show_memsize(struct device *dev, struct device_attribute *attr, char *buf) +{ + return snprintf(buf, 10, "%u\n", dom0_dev.config_memsize); +} + +static ssize_t +store_memsize(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + int err = 0; + unsigned long mem_size; + + if (0 != strict_strtoul(buf, 0, &mem_size)) + return -EINVAL; + + mutex_lock(&dom0_dev.data_lock); + if (0 == mem_size) { + err = -EINVAL; + goto fail; + } else if (mem_size < dom0_dev.allocated_memsize || + mem_size > DOM0_CONFIG_MEMSIZE) { + XEN_ERR("configure memory size fail\n"); + err = -EINVAL; + goto fail; + } else + dom0_dev.config_memsize = mem_size; + +fail: + mutex_unlock(&dom0_dev.data_lock); + return err ? err : count; +} + +static DEVICE_ATTR(memsize, S_IRUGO | S_IWUSR, show_memsize, store_memsize); +static DEVICE_ATTR(memsize_rsvd, S_IRUGO, show_memsize_rsvd, NULL); + +static struct attribute *dev_attrs[] = { + &dev_attr_memsize.attr, + &dev_attr_memsize_rsvd.attr, + NULL, +}; + +/* the memory size unit is MB */ +static const struct attribute_group dev_attr_grp = { + .name = "memsize-mB", + .attrs = dev_attrs, +}; + + +static void +sort_viraddr(struct memblock_info *mb, int cnt) +{ + int i,j; + uint64_t tmp_pfn; + uint64_t tmp_viraddr; + + /*sort virtual address and pfn */ + for(i = 0; i < cnt; i ++) { + for(j = cnt - 1; j > i; j--) { + if(mb[j].pfn < mb[j - 1].pfn) { + tmp_pfn = mb[j - 1].pfn; + mb[j - 1].pfn = mb[j].pfn; + mb[j].pfn = tmp_pfn; + + tmp_viraddr = mb[j - 1].vir_addr; + mb[j - 1].vir_addr = mb[j].vir_addr; + mb[j].vir_addr = tmp_viraddr; + } + } + } +} + +static int +dom0_find_memdata(const char * mem_name) +{ + unsigned i; + int idx = -1; + for(i = 0; i< NUM_MEM_CTX; i++) { + if(dom0_dev.mm_data[i] == NULL) + continue; + if (!strncmp(dom0_dev.mm_data[i]->name, mem_name, + sizeof(char) * DOM0_NAME_MAX)) { + idx = i; + break; + } + } + + return idx; +} + +static int +dom0_find_mempos(const char * mem_name) +{ + unsigned i; + int idx = -1; + + for(i = 0; i< NUM_MEM_CTX; i++) { + if(dom0_dev.mm_data[i] == NULL){ + idx = i; + break; + } + } + + return idx; +} + +static int +dom0_memory_free(struct dom0_mm_data * mm_data) +{ + int idx; + uint64_t vstart, vaddr; + uint32_t i, num_block, size; + + if (!xen_pv_domain()) + return -1; + + /* each memory block is 2M */ + num_block = mm_data->mem_size / 2; + if (num_block == 0) + return -1; + + /* free memory and destory contiguous region in Xen*/ + for (i = 0; i< num_block; i++) { + vstart = mm_data->block_info[i].vir_addr; + if (vstart) { + if (mm_data->block_info[i].exchange_flag) + xen_destroy_contiguous_region(vstart, + DOM0_CONTIG_NUM_ORDER); + + size = DOM0_MEMBLOCK_SIZE; + vaddr = vstart; + while (size > 0) { + ClearPageReserved(virt_to_page(vaddr)); + vaddr += PAGE_SIZE; + size -= PAGE_SIZE; + } + free_pages(vstart, DOM0_CONTIG_NUM_ORDER); + } + } + + /* reset global memory data */ + idx = dom0_find_memdata(mm_data->name); + if (idx >= 0) { + dom0_dev.allocated_memsize -= mm_data->mem_size; + dom0_dev.mm_data[idx] = NULL; + dom0_dev.num_mem_ctx--; + } + memset(mm_data, 0, sizeof(struct dom0_mm_data)); + vfree(mm_data); + + return 0; +} + +/** + * Find all memory segments in which physical addresses are contiguous. + */ +static void +find_memseg(int count, struct dom0_mm_data * mm_data) +{ + int i = 0; + int j, k, idx = 0; + uint64_t zone_len, pfn, num_block; + + while(i < count) { + if (mm_data->block_info[i].exchange_flag == 0) { + i++; + continue; + } + k = 0; + pfn = mm_data->block_info[i].pfn; + mm_data->seg_info[idx].pfn = pfn; + mm_data->seg_info[idx].mfn[k] = mm_data->block_info[i].mfn; + + for (j = i + 1; j < count; j++) { + + /* ignore exchange fail memory block */ + if (mm_data->block_info[j].exchange_flag == 0) + break; + + if (mm_data->block_info[j].pfn != + (mm_data->block_info[j - 1].pfn + + DOM0_MEMBLOCK_SIZE / PAGE_SIZE)) + break; + ++k; + mm_data->seg_info[idx].mfn[k] = mm_data->block_info[j].mfn; + } + + num_block = j - i; + zone_len = num_block * DOM0_MEMBLOCK_SIZE; + mm_data->seg_info[idx].size = zone_len; + + XEN_PRINT("memseg id=%d, size=0x%llx\n", idx, zone_len); + i = i+ num_block; + idx++; + if (idx == DOM0_NUM_MEMSEG) + break; + } + mm_data->num_memseg = idx; +} + +static int +dom0_prepare_memsegs(struct memory_info* meminfo, struct dom0_mm_data *mm_data) +{ + uint64_t pfn, vstart, vaddr; + uint32_t i, num_block, size; + int idx; + + /* Allocate 2M memory once */ + num_block = meminfo->size / 2; + + for (i = 0; i< num_block; i++) { + vstart = (unsigned long) + __get_free_pages(GFP_ATOMIC, DOM0_CONTIG_NUM_ORDER); + if (vstart == 0) { + XEN_ERR("allocate memory fail.\n"); + mm_data->mem_size = 2 * i; + dom0_memory_free(mm_data); + return -ENOMEM; + } + + size = DOM0_MEMBLOCK_SIZE; + vaddr = vstart; + while (size > 0) { + SetPageReserved(virt_to_page(vaddr)); + vaddr += PAGE_SIZE; + size -= PAGE_SIZE; + } + pfn = virt_to_pfn(vstart); + mm_data->block_info[i].pfn = pfn; + mm_data->block_info[i].vir_addr = vstart; + } + + sort_viraddr(mm_data->block_info, num_block); + + for (i = 0; i< num_block; i++) { + + /* + * This API is used to exchage MFN for getting a block of + * contiguous physical addresses, its maximum size is 2M. + */ + if (xen_create_contiguous_region(mm_data->block_info[i].vir_addr, + DOM0_CONTIG_NUM_ORDER, 0) == 0) { + mm_data->block_info[i].exchange_flag = 1; + mm_data->block_info[i].mfn = + pfn_to_mfn(mm_data->block_info[i].pfn); + } else { + XEN_ERR("exchange memeory fail\n"); + mm_data->block_info[i].exchange_flag = 0; + mm_data->fail_times++; + if (mm_data->fail_times > MAX_EXCHANGE_FAIL_TIME) { + mm_data->mem_size = meminfo->size; + dom0_memory_free(mm_data); + return -1; + } + } + } + + find_memseg(num_block, mm_data); + + /* update private memory data */ + mm_data->refcnt++; + mm_data->mem_size = meminfo->size; + memcpy(mm_data->name, meminfo->name, DOM0_NAME_MAX); + mm_data->name[DOM0_NAME_MAX -1] = '\0'; + + /* update global memory data */ + idx = dom0_find_mempos(meminfo->name); + if (idx < 0) { + dom0_memory_free(mm_data); + return -1; + } + + dom0_dev.mm_data[idx] = mm_data; + dom0_dev.num_mem_ctx++; + dom0_dev.allocated_memsize += mm_data->mem_size; + + return 0; +} + +static int +dom0_check_memory (struct memory_info *meminfo) +{ + int idx; + uint64_t mem_size; + + /* round memory size to the next even number. */ + if (meminfo->size % 2) + ++meminfo->size; + + mem_size = meminfo->size; + if (dom0_dev.num_mem_ctx > NUM_MEM_CTX) { + XEN_ERR("Memory data space is full in Dom0 driver\n"); + return -1; + } + idx = dom0_find_memdata(meminfo->name); + if (idx >= 0) { + XEN_ERR("Memory data name %s has already exsited in Dom0 driver.\n", + meminfo->name); + return -1; + } + if ((dom0_dev.allocated_memsize + mem_size) > + dom0_dev.config_memsize) { + XEN_ERR("total memory size can't be larger than config memory size.\n"); + return -1; + } + + return 0; +} + +static int __init +dom0_init(void) +{ + if (!xen_domain()) + return -ENODEV; + + /* Setup the misc device */ + dom0_dev.miscdev.minor = MISC_DYNAMIC_MINOR; + dom0_dev.miscdev.name = "dom0_mm"; + dom0_dev.miscdev.fops = &data_fops; + + /* register misc char device */ + if (misc_register(&dom0_dev.miscdev) != 0) { + XEN_ERR("Misc device registration failed\n"); + return -EPERM; + } + + mutex_init(&dom0_dev.data_lock); + dom0_kobj = kobject_create_and_add("dom0-mm", mm_kobj); + + if (!dom0_kobj) { + XEN_ERR("dom0-mm object creation failed\n"); + misc_deregister(&dom0_dev.miscdev); + return -ENOMEM; + } + + if (sysfs_create_group(dom0_kobj, &dev_attr_grp)) { + sysfs_remove_group(dom0_kobj, &dev_attr_grp); + kobject_put(dom0_kobj); + misc_deregister(&dom0_dev.miscdev); + return -EPERM; + } + + XEN_PRINT("####### DPDK Xen Dom0 module loaded #######\n"); + return 0; +} + +static void __exit +dom0_exit(void) +{ + sysfs_remove_group(dom0_kobj, &dev_attr_grp); + kobject_put(dom0_kobj); + misc_deregister(&dom0_dev.miscdev); + + XEN_PRINT("####### DPDK Xen Dom0 module unloaded #######\n"); +} + +static int +dom0_open(struct inode *inode, struct file *file) +{ + file->private_data = NULL; + + XEN_PRINT(KERN_INFO "/dev/dom0_mm opened\n"); + return 0; +} + +static int +dom0_release(struct inode *inode, struct file *file) +{ + int ret = 0; + struct dom0_mm_data *mm_data = file->private_data; + + if (mm_data == NULL) + return ret; + + mutex_lock(&dom0_dev.data_lock); + if (--mm_data->refcnt == 0) + ret = dom0_memory_free(mm_data); + mutex_unlock(&dom0_dev.data_lock); + + file->private_data = NULL; + XEN_PRINT(KERN_INFO "/dev/dom0_mm closed\n"); + return ret; +} + +static int +dom0_mmap(struct file *file, struct vm_area_struct *vm) +{ + int status = 0; + uint32_t idx = vm->vm_pgoff; + uint64_t pfn, size = vm->vm_end - vm->vm_start; + struct dom0_mm_data *mm_data = file->private_data; + + if(mm_data == NULL) + return -EINVAL; + + mutex_lock(&dom0_dev.data_lock); + if (idx >= mm_data->num_memseg) { + mutex_unlock(&dom0_dev.data_lock); + return -EINVAL; + } + + if (size > mm_data->seg_info[idx].size){ + mutex_unlock(&dom0_dev.data_lock); + return -EINVAL; + } + + XEN_PRINT("mmap memseg idx =%d,size = 0x%llx\n", idx, size); + + pfn = mm_data->seg_info[idx].pfn; + mutex_unlock(&dom0_dev.data_lock); + + status = remap_pfn_range(vm, vm->vm_start, pfn, size, PAGE_SHARED); + + return status; +} +static int +dom0_ioctl(struct file *file, + unsigned int ioctl_num, + unsigned long ioctl_param) +{ + int idx, ret; + char name[DOM0_NAME_MAX] = {0}; + struct memory_info meminfo; + struct dom0_mm_data *mm_data = file->private_data; + + XEN_PRINT("IOCTL num=0x%0x param=0x%0lx \n", ioctl_num, ioctl_param); + + /** + * Switch according to the ioctl called + */ + switch _IOC_NR(ioctl_num) { + case _IOC_NR(RTE_DOM0_IOCTL_PREPARE_MEMSEG): + ret = copy_from_user(&meminfo, (void *)ioctl_param, + sizeof(struct memory_info)); + if (ret) + return -EFAULT; + + if (mm_data != NULL) { + XEN_ERR("Cannot create memory segment for the same" + " file descriptor\n"); + return -EINVAL; + } + + /* Allocate private data */ + mm_data = vmalloc(sizeof(struct dom0_mm_data)); + if (!mm_data) { + XEN_ERR("Unable to allocate device private data\n"); + return -ENOMEM; + } + memset(mm_data, 0, sizeof(struct dom0_mm_data)); + + mutex_lock(&dom0_dev.data_lock); + /* check if we can allocate memory*/ + if (dom0_check_memory(&meminfo) < 0) { + mutex_unlock(&dom0_dev.data_lock); + vfree(mm_data); + return -EINVAL; + } + + /* allocate memories and created memory segments*/ + if (dom0_prepare_memsegs(&meminfo, mm_data) < 0) { + XEN_ERR("create memory segment fail.\n"); + mutex_unlock(&dom0_dev.data_lock); + return -EIO; + } + + file->private_data = mm_data; + mutex_unlock(&dom0_dev.data_lock); + break; + + /* support multiple process in term of memory mapping*/ + case _IOC_NR(RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG): + ret = copy_from_user(name, (void *)ioctl_param, + sizeof(char) * DOM0_NAME_MAX); + if (ret) + return -EFAULT; + + mutex_lock(&dom0_dev.data_lock); + idx = dom0_find_memdata(name); + if (idx < 0) { + mutex_unlock(&dom0_dev.data_lock); + return -EINVAL; + } + + mm_data = dom0_dev.mm_data[idx]; + mm_data->refcnt++; + file->private_data = mm_data; + mutex_unlock(&dom0_dev.data_lock); + break; + + case _IOC_NR(RTE_DOM0_IOCTL_GET_NUM_MEMSEG): + ret = copy_to_user((void *)ioctl_param, &mm_data->num_memseg, + sizeof(int)); + if (ret) + return -EFAULT; + break; + + case _IOC_NR(RTE_DOM0_IOCTL_GET_MEMSEG_INFO): + ret = copy_to_user((void *)ioctl_param, + &mm_data->seg_info[0], + sizeof(struct memseg_info) * + mm_data->num_memseg); + if (ret) + return -EFAULT; + break; + default: + XEN_PRINT("IOCTL default \n"); + break; + } + + return 0; +} + +module_init(dom0_init); +module_exit(dom0_exit); diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index f5b6b054e7..c1c64bf554 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -757,8 +757,7 @@ rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, (int) sizeof(struct rte_pktmbuf_pool_private)); return (-ENOSPC); } - mbp_priv = (struct rte_pktmbuf_pool_private *) - ((char *)mp + sizeof(struct rte_mempool)); + mbp_priv = rte_mempool_get_priv(mp); if ((uint32_t) (mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM) < dev_info.min_rx_bufsize) { PMD_DEBUG_TRACE("%s mbuf_data_room_size %d < %d " diff --git a/lib/librte_mempool/Makefile b/lib/librte_mempool/Makefile index 877f71548f..d8bbc16dbf 100644 --- a/lib/librte_mempool/Makefile +++ b/lib/librte_mempool/Makefile @@ -37,8 +37,10 @@ LIB = librte_mempool.a CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 # all source are stored in SRCS-y -SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) := rte_mempool.c - +SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool.c +ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y) +SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_dom0_mempool.c +endif # install includes SYMLINK-$(CONFIG_RTE_LIBRTE_MEMPOOL)-include := rte_mempool.h diff --git a/lib/librte_mempool/rte_dom0_mempool.c b/lib/librte_mempool/rte_dom0_mempool.c new file mode 100644 index 0000000000..046821129a --- /dev/null +++ b/lib/librte_mempool/rte_dom0_mempool.c @@ -0,0 +1,134 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_mempool.h" + +static void +get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num, + uint32_t pg_sz, uint32_t memseg_id) +{ + uint32_t i; + uint64_t virt_addr, mfn_id; + struct rte_mem_config *mcfg; + uint32_t page_size = getpagesize(); + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + virt_addr =(uintptr_t) mcfg->memseg[memseg_id].addr; + + for (i = 0; i != pg_num; i++) { + mfn_id = ((uintptr_t)va + i * pg_sz - virt_addr) / RTE_PGSIZE_2M; + pa[i] = mcfg->memseg[memseg_id].mfn[mfn_id] * page_size; + } +} + +/* create the mempool for supporting Dom0 */ +struct rte_mempool * +rte_dom0_mempool_create(const char *name, unsigned elt_num, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags) +{ + struct rte_mempool *mp = NULL; + phys_addr_t *pa; + char *va; + size_t sz; + uint32_t pg_num, pg_shift, pg_sz, total_size; + const struct rte_memzone *mz; + char mz_name[RTE_MEMZONE_NAMESIZE]; + int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY; + + pg_sz = RTE_PGSIZE_2M; + + pg_shift = rte_bsf32(pg_sz); + total_size = rte_mempool_calc_obj_size(elt_size, flags, NULL); + + /* calc max memory size and max number of pages needed. */ + sz = rte_mempool_xmem_size(elt_num, total_size, pg_shift) + + RTE_PGSIZE_2M; + pg_num = sz >> pg_shift; + + /* extract physical mappings of the allocated memory. */ + pa = calloc(pg_num, sizeof (*pa)); + if (pa == NULL) + return mp; + + rte_snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME, name); + mz = rte_memzone_reserve(mz_name, sz, socket_id, mz_flags); + if (mz == NULL) { + free(pa); + return mp; + } + + va = (char *)RTE_ALIGN_CEIL((uintptr_t)mz->addr, RTE_PGSIZE_2M); + /* extract physical mappings of the allocated memory. */ + get_phys_map(va, pa, pg_num, pg_sz, mz->memseg_id); + + mp = rte_mempool_xmem_create(name, elt_num, elt_size, + cache_size, private_data_size, + mp_init, mp_init_arg, + obj_init, obj_init_arg, + socket_id, flags, va, pa, pg_num, pg_shift); + + free(pa); + + return (mp); +} diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c index 8b25981051..1132440793 100644 --- a/lib/librte_mempool/rte_mempool.c +++ b/lib/librte_mempool/rte_mempool.c @@ -119,6 +119,232 @@ static unsigned optimize_object_size(unsigned obj_size) return new_obj_size * CACHE_LINE_SIZE; } +static void +mempool_add_elem(struct rte_mempool *mp, void *obj, uint32_t obj_idx, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg) +{ + struct rte_mempool **mpp; + + obj = (char *)obj + mp->header_size; + + /* set mempool ptr in header */ + mpp = __mempool_from_obj(obj); + *mpp = mp; + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + __mempool_write_header_cookie(obj, 1); + __mempool_write_trailer_cookie(obj); +#endif + /* call the initializer */ + if (obj_init) + obj_init(mp, obj_init_arg, obj, obj_idx); + + /* enqueue in ring */ + rte_ring_sp_enqueue(mp->ring, obj); +} + +uint32_t +rte_mempool_obj_iter(void *vaddr, uint32_t elt_num, size_t elt_sz, size_t align, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift, + rte_mempool_obj_iter_t obj_iter, void *obj_iter_arg) +{ + uint32_t i, j, k; + uint32_t pgn; + uintptr_t end, start, va; + uintptr_t pg_sz; + + pg_sz = (uintptr_t)1 << pg_shift; + va = (uintptr_t)vaddr; + + i = 0; + j = 0; + + while (i != elt_num && j != pg_num) { + + start = RTE_ALIGN_CEIL(va, align); + end = start + elt_sz; + + pgn = (end >> pg_shift) - (start >> pg_shift); + pgn += j; + + /* do we have enough space left for the next element. */ + if (pgn >= pg_num) + break; + + for (k = j; + k != pgn && + paddr[k] + pg_sz == paddr[k + 1]; + k++) + ; + + /* + * if next pgn chunks of memory physically continuous, + * use it to create next element. + * otherwise, just skip that chunk unused. + */ + if (k == pgn) { + if (obj_iter != NULL) + obj_iter(obj_iter_arg, (void *)start, + (void *)end, i); + va = end; + j = pgn; + i++; + } else { + va = RTE_ALIGN_CEIL((va + 1), pg_sz); + j++; + } + } + + return (i); +} + +/* + * Populate mempool with the objects. + */ + +struct mempool_populate_arg { + struct rte_mempool *mp; + rte_mempool_obj_ctor_t *obj_init; + void *obj_init_arg; +}; + +static void +mempool_obj_populate(void *arg, void *start, void *end, uint32_t idx) +{ + struct mempool_populate_arg *pa = arg; + + mempool_add_elem(pa->mp, start, idx, pa->obj_init, pa->obj_init_arg); + pa->mp->elt_va_end = (uintptr_t)end; +} + +static void +mempool_populate(struct rte_mempool *mp, size_t num, size_t align, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg) +{ + uint32_t elt_sz; + struct mempool_populate_arg arg; + + elt_sz = mp->elt_size + mp->header_size + mp->trailer_size; + arg.mp = mp; + arg.obj_init = obj_init; + arg.obj_init_arg = obj_init_arg; + + mp->size = rte_mempool_obj_iter((void *)mp->elt_va_start, + num, elt_sz, align, + mp->elt_pa, mp->pg_num, mp->pg_shift, + mempool_obj_populate, &arg); +} + +uint32_t +rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags, + struct rte_mempool_objsz *sz) +{ + struct rte_mempool_objsz lsz; + + sz = (sz != NULL) ? sz : &lsz; + + /* + * In header, we have at least the pointer to the pool, and + * optionaly a 64 bits cookie. + */ + sz->header_size = 0; + sz->header_size += sizeof(struct rte_mempool *); /* ptr to pool */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + sz->header_size += sizeof(uint64_t); /* cookie */ +#endif + if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0) + sz->header_size = RTE_ALIGN_CEIL(sz->header_size, + CACHE_LINE_SIZE); + + /* trailer contains the cookie in debug mode */ + sz->trailer_size = 0; +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + sz->trailer_size += sizeof(uint64_t); /* cookie */ +#endif + /* element size is 8 bytes-aligned at least */ + sz->elt_size = RTE_ALIGN_CEIL(elt_size, sizeof(uint64_t)); + + /* expand trailer to next cache line */ + if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0) { + sz->total_size = sz->header_size + sz->elt_size + + sz->trailer_size; + sz->trailer_size += ((CACHE_LINE_SIZE - + (sz->total_size & CACHE_LINE_MASK)) & + CACHE_LINE_MASK); + } + + /* + * increase trailer to add padding between objects in order to + * spread them accross memory channels/ranks + */ + if ((flags & MEMPOOL_F_NO_SPREAD) == 0) { + unsigned new_size; + new_size = optimize_object_size(sz->header_size + sz->elt_size + + sz->trailer_size); + sz->trailer_size = new_size - sz->header_size - sz->elt_size; + } + + /* this is the size of an object, including header and trailer */ + sz->total_size = sz->header_size + sz->elt_size + sz->trailer_size; + + return (sz->total_size); +} + + +/* + * Calculate maximum amount of memory required to store given number of objects. + */ +size_t +rte_mempool_xmem_size(uint32_t elt_num, size_t elt_sz, uint32_t pg_shift) +{ + size_t n, pg_num, pg_sz, sz; + + pg_sz = (size_t)1 << pg_shift; + + if ((n = pg_sz / elt_sz) > 0) { + pg_num = (elt_num + n - 1) / n; + sz = pg_num << pg_shift; + } else { + sz = RTE_ALIGN_CEIL(elt_sz, pg_sz) * elt_num; + } + + return (sz); +} + +/* + * Calculate how much memory would be actually required with the + * given memory footprint to store required number of elements. + */ +static void +mempool_lelem_iter(void *arg, __rte_unused void *start, void *end, + __rte_unused uint32_t idx) +{ + *(uintptr_t *)arg = (uintptr_t)end; +} + +ssize_t +rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, size_t elt_sz, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift) +{ + uint32_t n; + uintptr_t va, uv; + size_t pg_sz, usz; + + pg_sz = (size_t)1 << pg_shift; + va = (uintptr_t)vaddr; + uv = va; + + if ((n = rte_mempool_obj_iter(vaddr, elt_num, elt_sz, 1, + paddr, pg_num, pg_shift, mempool_lelem_iter, + &uv)) != elt_num) { + return (-n); + } + + uv = RTE_ALIGN_CEIL(uv, pg_sz); + usz = uv - va; + return (usz); +} + /* create the mempool */ struct rte_mempool * rte_mempool_create(const char *name, unsigned n, unsigned elt_size, @@ -126,18 +352,48 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, rte_mempool_ctor_t *mp_init, void *mp_init_arg, rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, int socket_id, unsigned flags) +{ +#ifdef RTE_LIBRTE_XEN_DOM0 + return (rte_dom0_mempool_create(name, n, elt_size, + cache_size, private_data_size, + mp_init, mp_init_arg, + obj_init, obj_init_arg, + socket_id, flags)); +#else + return (rte_mempool_xmem_create(name, n, elt_size, + cache_size, private_data_size, + mp_init, mp_init_arg, + obj_init, obj_init_arg, + socket_id, flags, + NULL, NULL, MEMPOOL_PG_NUM_DEFAULT, MEMPOOL_PG_SHIFT_MAX)); +#endif +} + +/* + * Create the mempool over already allocated chunk of memory. + * That external memory buffer can consists of physically disjoint pages. + * Setting vaddr to NULL, makes mempool to fallback to original behaviour + * and allocate space for mempool and it's elements as one big chunk of + * physically continuos memory. + * */ +struct rte_mempool * +rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags, void *vaddr, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift) { char mz_name[RTE_MEMZONE_NAMESIZE]; char rg_name[RTE_RING_NAMESIZE]; struct rte_mempool *mp = NULL; struct rte_ring *r; const struct rte_memzone *mz; - size_t mempool_size, total_elt_size; + size_t mempool_size; int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY; int rg_flags = 0; - uint32_t header_size, trailer_size; - unsigned i; - void *obj; + void *obj; + struct rte_mempool_objsz objsz; /* compilation-time checks */ RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) & @@ -156,13 +412,26 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, #endif /* check that we have an initialised tail queue */ - if (RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_MEMPOOL, rte_mempool_list) == NULL) { + if (RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_MEMPOOL, + rte_mempool_list) == NULL) { rte_errno = E_RTE_NO_TAILQ; return NULL; } /* asked cache too big */ - if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE){ + if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE) { + rte_errno = EINVAL; + return NULL; + } + + /* check that we have both VA and PA */ + if (vaddr != NULL && paddr == NULL) { + rte_errno = EINVAL; + return NULL; + } + + /* Check that pg_num and pg_shift parameters are valid. */ + if (pg_num < RTE_DIM(mp->elt_pa) || pg_shift > MEMPOOL_PG_SHIFT_MAX) { rte_errno = EINVAL; return NULL; } @@ -177,6 +446,9 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, if (flags & MEMPOOL_F_SC_GET) rg_flags |= RING_F_SC_DEQ; + /* calculate mempool object sizes. */ + rte_mempool_calc_obj_size(elt_size, flags, &objsz); + rte_rwlock_write_lock(RTE_EAL_MEMPOOL_RWLOCK); /* allocate the ring that will be used to store objects */ @@ -189,53 +461,21 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, goto exit; /* - * In header, we have at least the pointer to the pool, and - * optionaly a 64 bits cookie. - */ - header_size = 0; - header_size += sizeof(struct rte_mempool *); /* ptr to pool */ -#ifdef RTE_LIBRTE_MEMPOOL_DEBUG - header_size += sizeof(uint64_t); /* cookie */ -#endif - if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0) - header_size = (header_size + CACHE_LINE_MASK) & (~CACHE_LINE_MASK); - - /* trailer contains the cookie in debug mode */ - trailer_size = 0; -#ifdef RTE_LIBRTE_MEMPOOL_DEBUG - trailer_size += sizeof(uint64_t); /* cookie */ -#endif - /* element size is 8 bytes-aligned at least */ - elt_size = (elt_size + 7) & (~7); - - /* expand trailer to next cache line */ - if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0) { - total_elt_size = header_size + elt_size + trailer_size; - trailer_size += ((CACHE_LINE_SIZE - - (total_elt_size & CACHE_LINE_MASK)) & - CACHE_LINE_MASK); - } - - /* - * increase trailer to add padding between objects in order to - * spread them accross memory channels/ranks + * reserve a memory zone for this mempool: private data is + * cache-aligned */ - if ((flags & MEMPOOL_F_NO_SPREAD) == 0) { - unsigned new_size; - new_size = optimize_object_size(header_size + elt_size + - trailer_size); - trailer_size = new_size - header_size - elt_size; - } - - /* this is the size of an object, including header and trailer */ - total_elt_size = header_size + elt_size + trailer_size; - - /* reserve a memory zone for this mempool: private data is - * cache-aligned */ private_data_size = (private_data_size + CACHE_LINE_MASK) & (~CACHE_LINE_MASK); - mempool_size = total_elt_size * n + - sizeof(struct rte_mempool) + private_data_size; + + /* + * If user provided an external memory buffer, then use it to + * store mempool objects. Otherwise reserve memzone big enough to + * hold mempool header and metadata plus mempool objects. + */ + mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size; + if (vaddr == NULL) + mempool_size += (size_t)objsz.total_size * n; + rte_snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT, name); mz = rte_memzone_reserve(mz_name, mempool_size, socket_id, mz_flags); @@ -255,39 +495,42 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, mp->ring = r; mp->size = n; mp->flags = flags; - mp->elt_size = elt_size; - mp->header_size = header_size; - mp->trailer_size = trailer_size; + mp->elt_size = objsz.elt_size; + mp->header_size = objsz.header_size; + mp->trailer_size = objsz.trailer_size; mp->cache_size = cache_size; - mp->cache_flushthresh = (uint32_t)(cache_size * CACHE_FLUSHTHRESH_MULTIPLIER); + mp->cache_flushthresh = (uint32_t) + (cache_size * CACHE_FLUSHTHRESH_MULTIPLIER); mp->private_data_size = private_data_size; + /* calculate address of the first element for continuous mempool. */ + obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) + + private_data_size; + + /* populate address translation fields. */ + mp->pg_num = pg_num; + mp->pg_shift = pg_shift; + mp->pg_mask = RTE_LEN2MASK(mp->pg_shift, typeof(mp->pg_mask)); + + /* mempool elements allocated together with mempool */ + if (vaddr == NULL) { + mp->elt_va_start = (uintptr_t)obj; + mp->elt_pa[0] = mp->phys_addr + + (mp->elt_va_start - (uintptr_t)mp); + + /* mempool elements in a separate chunk of memory. */ + } else { + mp->elt_va_start = (uintptr_t)vaddr; + memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num); + } + + mp->elt_va_end = mp->elt_va_start; + /* call the initializer */ if (mp_init) mp_init(mp, mp_init_arg); - /* fill the headers and trailers, and add objects in ring */ - obj = (char *)mp + sizeof(struct rte_mempool) + private_data_size; - for (i = 0; i < n; i++) { - struct rte_mempool **mpp; - obj = (char *)obj + header_size; - - /* set mempool ptr in header */ - mpp = __mempool_from_obj(obj); - *mpp = mp; - -#ifdef RTE_LIBRTE_MEMPOOL_DEBUG - __mempool_write_header_cookie(obj, 1); - __mempool_write_trailer_cookie(obj); -#endif - /* call the initializer */ - if (obj_init) - obj_init(mp, obj_init_arg, obj, i); - - /* enqueue in ring */ - rte_ring_sp_enqueue(mp->ring, obj); - obj = (char *)obj + elt_size + trailer_size; - } + mempool_populate(mp, n, 1, obj_init, obj_init_arg); RTE_EAL_TAILQ_INSERT_TAIL(RTE_TAILQ_MEMPOOL, rte_mempool_list, mp); @@ -355,21 +598,56 @@ rte_mempool_dump_cache(const struct rte_mempool *mp) #ifndef __INTEL_COMPILER #pragma GCC diagnostic ignored "-Wcast-qual" #endif + +struct mempool_audit_arg { + const struct rte_mempool *mp; + uintptr_t obj_end; + uint32_t obj_num; +}; + static void -mempool_audit_cookies(const struct rte_mempool *mp) +mempool_obj_audit(void *arg, void *start, void *end, uint32_t idx) { - unsigned i; + struct mempool_audit_arg *pa = arg; void *obj; - void * const *obj_table; - - obj = (char *)mp + sizeof(struct rte_mempool) + mp->private_data_size; - for (i = 0; i < mp->size; i++) { - obj = (char *)obj + mp->header_size; - obj_table = &obj; - __mempool_check_cookies(mp, obj_table, 1, 2); - obj = (char *)obj + mp->elt_size + mp->trailer_size; + + obj = (char *)start + pa->mp->header_size; + pa->obj_end = (uintptr_t)end; + pa->obj_num = idx + 1; + __mempool_check_cookies(pa->mp, &obj, 1, 2); +} + +static void +mempool_audit_cookies(const struct rte_mempool *mp) +{ + uint32_t elt_sz, num; + struct mempool_audit_arg arg; + + elt_sz = mp->elt_size + mp->header_size + mp->trailer_size; + + arg.mp = mp; + arg.obj_end = mp->elt_va_start; + arg.obj_num = 0; + + num = rte_mempool_obj_iter((void *)mp->elt_va_start, + mp->size, elt_sz, 1, + mp->elt_pa, mp->pg_num, mp->pg_shift, + mempool_obj_audit, &arg); + + if (num != mp->size) { + rte_panic("rte_mempool_obj_iter(mempool=%p, size=%u) " + "iterated only over %u elements\n", + mp, mp->size, num); + } else if (arg.obj_end != mp->elt_va_end || arg.obj_num != mp->size) { + rte_panic("rte_mempool_obj_iter(mempool=%p, size=%u) " + "last callback va_end: %#tx (%#tx expeceted), " + "num of objects: %u (%u expected)\n", + mp, mp->size, + arg.obj_end, mp->elt_va_end, + arg.obj_num, mp->size); } } + #ifndef __INTEL_COMPILER #pragma GCC diagnostic error "-Wcast-qual" #endif @@ -422,6 +700,7 @@ rte_mempool_dump(const struct rte_mempool *mp) printf("mempool <%s>@%p\n", mp->name, mp); printf(" flags=%x\n", mp->flags); printf(" ring=<%s>@%p\n", mp->ring->name, mp->ring); + printf(" phys_addr=0x%" PRIx64 "\n", mp->phys_addr); printf(" size=%"PRIu32"\n", mp->size); printf(" header_size=%"PRIu32"\n", mp->header_size); printf(" elt_size=%"PRIu32"\n", mp->elt_size); @@ -429,6 +708,19 @@ rte_mempool_dump(const struct rte_mempool *mp) printf(" total_obj_size=%"PRIu32"\n", mp->header_size + mp->elt_size + mp->trailer_size); + printf(" private_data_size=%"PRIu32"\n", mp->private_data_size); + printf(" pg_num=%"PRIu32"\n", mp->pg_num); + printf(" pg_shift=%"PRIu32"\n", mp->pg_shift); + printf(" pg_mask=%#tx\n", mp->pg_mask); + printf(" elt_va_start=%#tx\n", mp->elt_va_start); + printf(" elt_va_end=%#tx\n", mp->elt_va_end); + printf(" elt_pa[0]=0x%" PRIx64 "\n", mp->elt_pa[0]); + + if (mp->size != 0) + printf(" avg bytes/object=%#Lf\n", + (long double)(mp->elt_va_end - mp->elt_va_start) / + mp->size); + cache_count = rte_mempool_dump_cache(mp); common_count = rte_ring_count(mp->ring); if ((cache_count + common_count) > mp->size) diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h index 94c75cbab0..ae13f8b0d0 100644 --- a/lib/librte_mempool/rte_mempool.h +++ b/lib/librte_mempool/rte_mempool.h @@ -108,14 +108,36 @@ struct rte_mempool_cache { } __rte_cache_aligned; #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ +struct rte_mempool_objsz { + uint32_t elt_size; /**< Size of an element. */ + uint32_t header_size; /**< Size of header (before elt). */ + uint32_t trailer_size; /**< Size of trailer (after elt). */ + uint32_t total_size; + /**< Total size of an object (header + elt + trailer). */ +}; + #define RTE_MEMPOOL_NAMESIZE 32 /**< Maximum length of a memory pool. */ #define RTE_MEMPOOL_MZ_PREFIX "MP_" /* "MP_" */ #define RTE_MEMPOOL_MZ_FORMAT RTE_MEMPOOL_MZ_PREFIX "%s" +#ifdef RTE_LIBRTE_XEN_DOM0 + +/* "_MP_elt" */ +#define RTE_MEMPOOL_OBJ_NAME "%s_" RTE_MEMPOOL_MZ_PREFIX "elt" + +#else + #define RTE_MEMPOOL_OBJ_NAME RTE_MEMPOOL_MZ_FORMAT +#endif /* RTE_LIBRTE_XEN_DOM0 */ + +#define MEMPOOL_PG_SHIFT_MAX (sizeof(uintptr_t) * CHAR_BIT - 1) + +/** Mempool over one chunk of physically continuous memory */ +#define MEMPOOL_PG_NUM_DEFAULT 1 + /** * The RTE mempool structure. */ @@ -128,7 +150,8 @@ struct rte_mempool { int flags; /**< Flags of the mempool. */ uint32_t size; /**< Size of the mempool. */ uint32_t cache_size; /**< Size of per-lcore local cache. */ - uint32_t cache_flushthresh; /**< Threshold before we flush excess elements. */ + uint32_t cache_flushthresh; + /**< Threshold before we flush excess elements. */ uint32_t elt_size; /**< Size of an element. */ uint32_t header_size; /**< Size of header (before elt). */ @@ -145,6 +168,20 @@ struct rte_mempool { /** Per-lcore statistics. */ struct rte_mempool_debug_stats stats[RTE_MAX_LCORE]; #endif + + /* Address translation support, starts from next cache line. */ + + /** Number of elements in the elt_pa array. */ + uint32_t pg_num __rte_cache_aligned; + uint32_t pg_shift; /**< LOG2 of the physical pages. */ + uintptr_t pg_mask; /**< physical page mask value. */ + uintptr_t elt_va_start; + /**< Virtual address of the first mempool object. */ + uintptr_t elt_va_end; + /**< Virtual address of the mempool object. */ + phys_addr_t elt_pa[MEMPOOL_PG_NUM_DEFAULT]; + /**< Array of physical pages addresses for the mempool objects buffer. */ + } __rte_cache_aligned; #define MEMPOOL_F_NO_SPREAD 0x0001 /**< Do not spread in memory. */ @@ -171,6 +208,24 @@ struct rte_mempool { #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0) #endif +/** + * Calculates size of the mempool header. + * @param mp + * Pointer to the memory pool. + * @param pgn + * Number of page used to store mempool objects. + */ +#define MEMPOOL_HEADER_SIZE(mp, pgn) (sizeof(*(mp)) + \ + RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \ + sizeof ((mp)->elt_pa[0]), CACHE_LINE_SIZE)) + +/** + * Returns TRUE if whole mempool is allocated in one contiguous block of memory. + */ +#define MEMPOOL_IS_CONTIG(mp) \ + ((mp)->pg_num == MEMPOOL_PG_NUM_DEFAULT && \ + (mp)->phys_addr == (mp)->elt_pa[0]) + /** * @internal Get a pointer to a mempool pointer in the object header. * @param obj @@ -331,6 +386,49 @@ static inline void __mempool_check_cookies(const struct rte_mempool *mp, #define __mempool_check_cookies(mp, obj_table_const, n, free) do {} while(0) #endif /* RTE_LIBRTE_MEMPOOL_DEBUG */ +/** + * An mempool's object iterator callback function. + */ +typedef void (*rte_mempool_obj_iter_t)(void * /*obj_iter_arg*/, + void * /*obj_start*/, + void * /*obj_end*/, + uint32_t /*obj_index */); + +/* + * Iterates across objects of the given size and alignment in the + * provided chunk of memory. The given memory buffer can consist of + * disjoint physical pages. + * For each object calls the provided callback (if any). + * Used to populate mempool, walk through all elements of the mempool, + * estimate how many elements of the given size could be created in the given + * memory buffer. + * @param vaddr + * Virtual address of the memory buffer. + * @param elt_num + * Maximum number of objects to iterate through. + * @param elt_sz + * Size of each object. + * @param paddr + * Array of phyiscall addresses of the pages that comprises given memory + * buffer. + * @param pg_num + * Number of elements in the paddr array. + * @param pg_shift + * LOG2 of the physical pages size. + * @param obj_iter + * Object iterator callback function (could be NULL). + * @param obj_iter_arg + * User defined Prameter for the object iterator callback function. + * + * @return + * Number of objects iterated through. + */ + +uint32_t rte_mempool_obj_iter(void *vaddr, + uint32_t elt_num, size_t elt_sz, size_t align, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift, + rte_mempool_obj_iter_t obj_iter, void *obj_iter_arg); + /** * An object constructor callback function for mempool. * @@ -354,6 +452,8 @@ typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *); * * This function uses ``memzone_reserve()`` to allocate memory. The * pool contains n elements of elt_size. Its size is set to n. + * All elements of the mempool are allocated together with the mempool header, + * in one physically continuous chunk of memory. * * @param name * The name of the mempool. @@ -436,6 +536,199 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size, rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, int socket_id, unsigned flags); +/** + * Creates a new mempool named *name* in memory. + * + * This function uses ``memzone_reserve()`` to allocate memory. The + * pool contains n elements of elt_size. Its size is set to n. + * Depending on the input parameters, mempool elements can be either allocated + * together with the mempool header, or an externally provided memory buffer + * could be used to store mempool objects. In later case, that external + * memory buffer can consist of set of disjoint phyiscal pages. + * + * @param name + * The name of the mempool. + * @param n + * The number of elements in the mempool. The optimum size (in terms of + * memory usage) for a mempool is when n is a power of two minus one: + * n = (2^q - 1). + * @param elt_size + * The size of each element. + * @param cache_size + * If cache_size is non-zero, the rte_mempool library will try to + * limit the accesses to the common lockless pool, by maintaining a + * per-lcore object cache. This argument must be lower or equal to + * CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE. It is advised to choose + * cache_size to have "n modulo cache_size == 0": if this is + * not the case, some elements will always stay in the pool and will + * never be used. The access to the per-lcore table is of course + * faster than the multi-producer/consumer pool. The cache can be + * disabled if the cache_size argument is set to 0; it can be useful to + * avoid loosing objects in cache. Note that even if not used, the + * memory space for cache is always reserved in a mempool structure, + * except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0. + * @param private_data_size + * The size of the private data appended after the mempool + * structure. This is useful for storing some private data after the + * mempool structure, as is done for rte_mbuf_pool for example. + * @param mp_init + * A function pointer that is called for initialization of the pool, + * before object initialization. The user can initialize the private + * data in this function if needed. This parameter can be NULL if + * not needed. + * @param mp_init_arg + * An opaque pointer to data that can be used in the mempool + * constructor function. + * @param obj_init + * A function pointer that is called for each object at + * initialization of the pool. The user can set some meta data in + * objects if needed. This parameter can be NULL if not needed. + * The obj_init() function takes the mempool pointer, the init_arg, + * the object pointer and the object number as parameters. + * @param obj_init_arg + * An opaque pointer to data that can be used as an argument for + * each call to the object constructor function. + * @param socket_id + * The *socket_id* argument is the socket identifier in the case of + * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The *flags* arguments is an OR of following flags: + * - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread + * between channels in RAM: the pool allocator will add padding + * between objects depending on the hardware configuration. See + * Memory alignment constraints for details. If this flag is set, + * the allocator will just align them to a cache line. + * - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are + * cache-aligned. This flag removes this constraint, and no + * padding will be present between objects. This flag implies + * MEMPOOL_F_NO_SPREAD. + * - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior + * when using rte_mempool_put() or rte_mempool_put_bulk() is + * "single-producer". Otherwise, it is "multi-producers". + * - MEMPOOL_F_SC_GET: If this flag is set, the default behavior + * when using rte_mempool_get() or rte_mempool_get_bulk() is + * "single-consumer". Otherwise, it is "multi-consumers". + * @param vaddr + * Virtual address of the externally allocated memory buffer. + * Will be used to store mempool objects. + * @param paddr + * Array of phyiscall addresses of the pages that comprises given memory + * buffer. + * @param pg_num + * Number of elements in the paddr array. + * @param pg_shift + * LOG2 of the physical pages size. + * @return + * The pointer to the new allocated mempool, on success. NULL on error + * with rte_errno set appropriately. Possible rte_errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - E_RTE_NO_TAILQ - no tailq list could be got for the ring or mempool list + * - EINVAL - cache size provided is too large + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_mempool * +rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags, void *vaddr, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift); + +#ifdef RTE_LIBRTE_XEN_DOM0 +/** + * Creates a new mempool named *name* in memory on Xen Dom0. + * + * This function uses ``rte_mempool_xmem_create()`` to allocate memory. The + * pool contains n elements of elt_size. Its size is set to n. + * All elements of the mempool are allocated together with the mempool header, + * and memory buffer can consist of set of disjoint phyiscal pages. + * + * @param name + * The name of the mempool. + * @param n + * The number of elements in the mempool. The optimum size (in terms of + * memory usage) for a mempool is when n is a power of two minus one: + * n = (2^q - 1). + * @param elt_size + * The size of each element. + * @param cache_size + * If cache_size is non-zero, the rte_mempool library will try to + * limit the accesses to the common lockless pool, by maintaining a + * per-lcore object cache. This argument must be lower or equal to + * CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE. It is advised to choose + * cache_size to have "n modulo cache_size == 0": if this is + * not the case, some elements will always stay in the pool and will + * never be used. The access to the per-lcore table is of course + * faster than the multi-producer/consumer pool. The cache can be + * disabled if the cache_size argument is set to 0; it can be useful to + * avoid loosing objects in cache. Note that even if not used, the + * memory space for cache is always reserved in a mempool structure, + * except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0. + * @param private_data_size + * The size of the private data appended after the mempool + * structure. This is useful for storing some private data after the + * mempool structure, as is done for rte_mbuf_pool for example. + * @param mp_init + * A function pointer that is called for initialization of the pool, + * before object initialization. The user can initialize the private + * data in this function if needed. This parameter can be NULL if + * not needed. + * @param mp_init_arg + * An opaque pointer to data that can be used in the mempool + * constructor function. + * @param obj_init + * A function pointer that is called for each object at + * initialization of the pool. The user can set some meta data in + * objects if needed. This parameter can be NULL if not needed. + * The obj_init() function takes the mempool pointer, the init_arg, + * the object pointer and the object number as parameters. + * @param obj_init_arg + * An opaque pointer to data that can be used as an argument for + * each call to the object constructor function. + * @param socket_id + * The *socket_id* argument is the socket identifier in the case of + * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The *flags* arguments is an OR of following flags: + * - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread + * between channels in RAM: the pool allocator will add padding + * between objects depending on the hardware configuration. See + * Memory alignment constraints for details. If this flag is set, + * the allocator will just align them to a cache line. + * - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are + * cache-aligned. This flag removes this constraint, and no + * padding will be present between objects. This flag implies + * MEMPOOL_F_NO_SPREAD. + * - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior + * when using rte_mempool_put() or rte_mempool_put_bulk() is + * "single-producer". Otherwise, it is "multi-producers". + * - MEMPOOL_F_SC_GET: If this flag is set, the default behavior + * when using rte_mempool_get() or rte_mempool_get_bulk() is + * "single-consumer". Otherwise, it is "multi-consumers". + * @return + * The pointer to the new allocated mempool, on success. NULL on error + * with rte_errno set appropriately. Possible rte_errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - E_RTE_NO_TAILQ - no tailq list could be got for the ring or mempool list + * - EINVAL - cache size provided is too large + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_mempool * +rte_dom0_mempool_create(const char *name, unsigned n, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags); +#endif + /** * Dump the status of the mempool to the console. * @@ -959,13 +1252,13 @@ rte_mempool_empty(const struct rte_mempool *mp) * @return * The physical address of the elt element. */ -static inline phys_addr_t rte_mempool_virt2phy(const struct rte_mempool *mp, - const void *elt) +static inline phys_addr_t +rte_mempool_virt2phy(const struct rte_mempool *mp, const void *elt) { uintptr_t off; - off = (const char *)elt - (const char *)mp; - return mp->phys_addr + off; + off = (const char *)elt - (const char *)mp->elt_va_start; + return (mp->elt_pa[off >> mp->pg_shift] + (off & mp->pg_mask)); } @@ -991,7 +1284,7 @@ void rte_mempool_audit(const struct rte_mempool *mp); */ static inline void *rte_mempool_get_priv(struct rte_mempool *mp) { - return (char *)mp + sizeof(struct rte_mempool); + return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num); } /** @@ -1005,13 +1298,73 @@ void rte_mempool_list_dump(void); * @param name * The name of the mempool. * @return - * The pointer to the mempool matching the name, or NULL if not found.NULL on error + * The pointer to the mempool matching the name, or NULL if not found. + * NULL on error * with rte_errno set appropriately. Possible rte_errno values include: * - ENOENT - required entry not available to return. * */ struct rte_mempool *rte_mempool_lookup(const char *name); +/** + * Given a desired size of the mempool element and mempool flags, + * caluclates header, trailer, body and total sizes of the mempool object. + * @param elt_size + * The size of each element. + * @param flags + * The flags used for the mempool creation. + * Consult rte_mempool_create() for more information about possible values. + * The size of each element. + * @return + * Total size of the mempool object. + */ +uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags, + struct rte_mempool_objsz *sz); + +/** + * Calculate maximum amount of memory required to store given number of objects. + * Assumes that the memory buffer will be alligned at page boundary. + * Note, that if object size is bigger then page size, then it assumes that + * we have a subsets of physically continuous pages big enough to store + * at least one object. + * @param elt_num + * Number of elements. + * @param elt_sz + * The size of each element. + * @param pg_shift + * LOG2 of the physical pages size. + * @return + * Required memory size aligned at page boundary. + */ +size_t rte_mempool_xmem_size(uint32_t elt_num, size_t elt_sz, + uint32_t pg_shift); + +/** + * Calculate how much memory would be actually required with the given + * memory footprint to store required number of objects. + * @param vaddr + * Virtual address of the externally allocated memory buffer. + * Will be used to store mempool objects. + * @param elt_num + * Number of elements. + * @param elt_sz + * The size of each element. + * @param paddr + * Array of phyiscall addresses of the pages that comprises given memory + * buffer. + * @param pg_num + * Number of elements in the paddr array. + * @param pg_shift + * LOG2 of the physical pages size. + * @return + * Number of bytes needed to store given number of objects, + * aligned to the given page size. + * If provided memory buffer is not big enough: + * (-1) * actual number of elemnts that can be stored in that buffer. + */ +ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, size_t elt_sz, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift); + #ifdef __cplusplus } #endif diff --git a/lib/librte_pmd_e1000/em_rxtx.c b/lib/librte_pmd_e1000/em_rxtx.c index e4a13dca5a..ca886cf8c7 100644 --- a/lib/librte_pmd_e1000/em_rxtx.c +++ b/lib/librte_pmd_e1000/em_rxtx.c @@ -1101,7 +1101,12 @@ ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name, if ((mz = rte_memzone_lookup(z_name)) != 0) return (mz); +#ifdef RTE_LIBRTE_XEN_DOM0 + return rte_memzone_reserve_bounded(z_name, ring_size, + socket_id, 0, CACHE_LINE_SIZE, RTE_PGSIZE_2M); +#else return rte_memzone_reserve(z_name, ring_size, socket_id, 0); +#endif } static void @@ -1277,7 +1282,11 @@ eth_em_tx_queue_setup(struct rte_eth_dev *dev, txq->port_id = dev->data->port_id; txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(queue_idx)); +#ifndef RTE_LIBRTE_XEN_DOM0 txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr; +#else + txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr); +#endif txq->tx_ring = (struct e1000_data_desc *) tz->addr; PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n", @@ -1404,8 +1413,12 @@ eth_em_rx_queue_setup(struct rte_eth_dev *dev, 0 : ETHER_CRC_LEN); rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(queue_idx)); - rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(queue_idx)); + rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(queue_idx)); +#ifndef RTE_LIBRTE_XEN_DOM0 rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr; +#else + rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr); +#endif rxq->rx_ring = (struct e1000_rx_desc *) rz->addr; PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n", diff --git a/lib/librte_pmd_e1000/igb_rxtx.c b/lib/librte_pmd_e1000/igb_rxtx.c index 9e3bec5d86..42955397d1 100644 --- a/lib/librte_pmd_e1000/igb_rxtx.c +++ b/lib/librte_pmd_e1000/igb_rxtx.c @@ -1086,8 +1086,13 @@ ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name, if (mz) return mz; +#ifdef RTE_LIBRTE_XEN_DOM0 + return rte_memzone_reserve_bounded(z_name, ring_size, + socket_id, 0, IGB_ALIGN, RTE_PGSIZE_2M); +#else return rte_memzone_reserve_aligned(z_name, ring_size, socket_id, 0, IGB_ALIGN); +#endif } static void @@ -1240,9 +1245,12 @@ eth_igb_tx_queue_setup(struct rte_eth_dev *dev, txq->port_id = dev->data->port_id; txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx)); +#ifndef RTE_LIBRTE_XEN_DOM0 txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr; - txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr; - +#else + txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr); +#endif + txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr; /* Allocate software ring */ txq->sw_ring = rte_zmalloc("txq->sw_ring", sizeof(struct igb_tx_entry) * nb_desc, @@ -1372,7 +1380,11 @@ eth_igb_rx_queue_setup(struct rte_eth_dev *dev, } rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx)); rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx)); +#ifndef RTE_LIBRTE_XEN_DOM0 rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr; +#else + rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr); +#endif rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr; /* Allocate software ring. */ @@ -1838,8 +1850,7 @@ eth_igb_rx_init(struct rte_eth_dev *dev) /* * Configure RX buffer size. */ - mbp_priv = (struct rte_pktmbuf_pool_private *) - ((char *)rxq->mb_pool + sizeof(struct rte_mempool)); + mbp_priv = rte_mempool_get_priv(rxq->mb_pool); buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM); if (buf_size >= 1024) { @@ -2093,8 +2104,7 @@ eth_igbvf_rx_init(struct rte_eth_dev *dev) /* * Configure RX buffer size. */ - mbp_priv = (struct rte_pktmbuf_pool_private *) - ((char *)rxq->mb_pool + sizeof(struct rte_mempool)); + mbp_priv = rte_mempool_get_priv(rxq->mb_pool); buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM); if (buf_size >= 1024) { diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c index 3ff22a7f69..d5775807ea 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c @@ -1758,8 +1758,13 @@ ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name, if (mz) return mz; +#ifdef RTE_LIBRTE_XEN_DOM0 + return rte_memzone_reserve_bounded(z_name, ring_size, + socket_id, 0, IXGBE_ALIGN, RTE_PGSIZE_2M); +#else return rte_memzone_reserve_aligned(z_name, ring_size, - socket_id, 0, IXGBE_ALIGN); + socket_id, 0, IXGBE_ALIGN); +#endif } static void @@ -1971,8 +1976,11 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev, txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx)); else txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx)); - +#ifndef RTE_LIBRTE_XEN_DOM0 txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr; +#else + txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr); +#endif txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr; /* Allocate software ring */ @@ -2221,8 +2229,11 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev, rxq->rdh_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx)); } - +#ifndef RTE_LIBRTE_XEN_DOM0 rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr; +#else + rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr); +#endif rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr; /* @@ -3440,8 +3451,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev) * The value is in 1 KB resolution. Valid values can be from * 1 KB to 16 KB. */ - mbp_priv = (struct rte_pktmbuf_pool_private *) - ((char *)rxq->mb_pool + sizeof(struct rte_mempool)); + mbp_priv = rte_mempool_get_priv(rxq->mb_pool); buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM); srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) & @@ -3712,8 +3722,7 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev) * The value is in 1 KB resolution. Valid values can be from * 1 KB to 16 KB. */ - mbp_priv = (struct rte_pktmbuf_pool_private *) - ((char *)rxq->mb_pool + sizeof(struct rte_mempool)); + mbp_priv = rte_mempool_get_priv(rxq->mb_pool); buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM); srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) & diff --git a/lib/librte_pmd_pcap/rte_eth_pcap.c b/lib/librte_pmd_pcap/rte_eth_pcap.c index 068eadfa3c..ff6d8b012b 100644 --- a/lib/librte_pmd_pcap/rte_eth_pcap.c +++ b/lib/librte_pmd_pcap/rte_eth_pcap.c @@ -140,8 +140,7 @@ eth_pcap_rx(void *queue, break; /* Now get the space available for data in the mbuf */ - mbp_priv = (struct rte_pktmbuf_pool_private *) - ((char *)pcap_q->mb_pool + sizeof(struct rte_mempool)); + mbp_priv = rte_mempool_get_priv(pcap_q->mb_pool); buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);