ifeq ($(CONFIG_RTE_LIBRTE_IEEE1588),y)
SRCS-$(CONFIG_RTE_TEST_PMD) += ieee1588fwd.c
endif
+SRCS-$(CONFIG_RTE_TEST_PMD) += mempool_anon.c
+
+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
+CFLAGS_mempool_anon.o := -D_GNU_SOURCE
+endif
+CFLAGS_cmdline.o := -D_GNU_SOURCE
# this application needs libraries first
DEPDIRS-$(CONFIG_RTE_TEST_PMD) += lib
streamid_t sm_id;
printf("%s packet forwarding - ports=%d - cores=%d - streams=%d - "
- "NUMA support %s\n",
- cfg->fwd_eng->fwd_mode_name,
- cfg->nb_fwd_ports, cfg->nb_fwd_lcores, cfg->nb_fwd_streams,
- numa_support == 1 ? "enabled" : "disabled");
+ "NUMA support %s, MP over anonymous pages %s\n",
+ cfg->fwd_eng->fwd_mode_name,
+ cfg->nb_fwd_ports, cfg->nb_fwd_lcores, cfg->nb_fwd_streams,
+ numa_support == 1 ? "enabled" : "disabled",
+ mp_anon != 0 ? "enabled" : "disabled");
+
for (lc_id = 0; lc_id < cfg->nb_fwd_lcores; lc_id++) {
printf("Logical Core %u (socket %u) forwards packets on "
"%d streams:",
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "mempool_osdep.h"
+#include <rte_errno.h>
+
+#ifdef RTE_EXEC_ENV_LINUXAPP
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+
+#define PAGEMAP_FNAME "/proc/self/pagemap"
+
+/*
+ * the pfn (page frame number) are bits 0-54 (see pagemap.txt in linux
+ * Documentation).
+ */
+#define PAGEMAP_PFN_BITS 54
+#define PAGEMAP_PFN_MASK RTE_LEN2MASK(PAGEMAP_PFN_BITS, phys_addr_t)
+
+
+static int
+get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num, uint32_t pg_sz)
+{
+ int32_t fd, rc;
+ uint32_t i, nb;
+ off_t ofs;
+
+ ofs = (uintptr_t)va / pg_sz * sizeof(*pa);
+ nb = pg_num * sizeof(*pa);
+
+ if ((fd = open(PAGEMAP_FNAME, O_RDONLY)) < 0)
+ return (ENOENT);
+
+ if ((rc = pread(fd, pa, nb, ofs)) < 0 || (rc -= nb) != 0) {
+
+ RTE_LOG(ERR, USER1, "failed read of %u bytes from \'%s\' "
+ "at offset %zu, error code: %d\n",
+ nb, PAGEMAP_FNAME, (size_t)ofs, errno);
+ rc = ENOENT;
+ }
+
+ close(fd);
+
+ for (i = 0; i != pg_num; i++)
+ pa[i] = (pa[i] & PAGEMAP_PFN_MASK) * pg_sz;
+
+ return (rc);
+}
+
+struct rte_mempool *
+mempool_anon_create(const char *name, unsigned elt_num, unsigned elt_size,
+ unsigned cache_size, unsigned private_data_size,
+ rte_mempool_ctor_t *mp_init, void *mp_init_arg,
+ rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
+ int socket_id, unsigned flags)
+{
+ struct rte_mempool *mp;
+ phys_addr_t *pa;
+ char *va, *uv;
+ uint32_t n, pg_num, pg_shift, pg_sz, total_size;
+ size_t sz;
+ ssize_t usz;
+ int32_t rc;
+
+ rc = ENOMEM;
+ mp = NULL;
+
+ pg_sz = getpagesize();
+ if (rte_is_power_of_2(pg_sz) == 0) {
+ rte_errno = EINVAL;
+ return (mp);
+ }
+
+ pg_shift = rte_bsf32(pg_sz);
+
+ total_size = rte_mempool_calc_obj_size(elt_size, flags, NULL);
+
+ /* calc max memory size and max number of pages needed. */
+ sz = rte_mempool_xmem_size(elt_num, total_size, pg_shift);
+ pg_num = sz >> pg_shift;
+
+ /* get chunk of virtually continuos memory.*/
+ if ((va = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS | MAP_LOCKED,
+ -1, 0)) == MAP_FAILED) {
+ RTE_LOG(ERR, USER1, "%s(%s) failed mmap of %zu bytes, "
+ "error code: %d\n",
+ __func__, name, sz, errno);
+ rte_errno = rc;
+ return (mp);
+ }
+
+ /* extract physical mappings of the allocated memory. */
+ if ((pa = calloc(pg_num, sizeof (*pa))) != NULL &&
+ (rc = get_phys_map(va, pa, pg_num, pg_sz)) == 0) {
+
+ /*
+ * Check that allocated size is big enough to hold elt_num
+ * objects and a calcualte how many bytes are actually required.
+ */
+
+ if ((usz = rte_mempool_xmem_usage(va, elt_num, total_size, pa,
+ pg_num, pg_shift)) < 0) {
+
+ n = -usz;
+ rc = ENOENT;
+ RTE_LOG(ERR, USER1, "%s(%s) only %u objects from %u "
+ "requested can be created over "
+ "mmaped region %p of %zu bytes\n",
+ __func__, name, n, elt_num, va, sz);
+ } else {
+
+ /* unmap unused pages if any */
+ if ((size_t)usz < sz) {
+
+ uv = va + usz;
+ usz = sz - usz;
+
+ RTE_LOG(INFO, USER1,
+ "%s(%s): unmap unused %zu of %zu "
+ "mmaped bytes @%p\n",
+ __func__, name, (size_t)usz, sz, uv);
+ munmap(uv, usz);
+ sz -= usz;
+ pg_num = sz >> pg_shift;
+ }
+
+ if ((mp = rte_mempool_xmem_create(name, elt_num,
+ elt_size, cache_size, private_data_size,
+ mp_init, mp_init_arg,
+ obj_init, obj_init_arg,
+ socket_id, flags, va, pa, pg_num,
+ pg_shift)) != NULL)
+
+ RTE_VERIFY(elt_num == mp->size);
+ }
+ }
+
+ if (mp == NULL) {
+ munmap(va, sz);
+ rte_errno = rc;
+ }
+
+ free(pa);
+ return (mp);
+}
+
+#else /* RTE_EXEC_ENV_LINUXAPP */
+
+
+struct rte_mempool *
+mempool_anon_create(__rte_unused const char *name,
+ __rte_unused unsigned elt_num, __rte_unused unsigned elt_size,
+ __rte_unused unsigned cache_size,
+ __rte_unused unsigned private_data_size,
+ __rte_unused rte_mempool_ctor_t *mp_init,
+ __rte_unused void *mp_init_arg,
+ __rte_unused rte_mempool_obj_ctor_t *obj_init,
+ __rte_unused void *obj_init_arg,
+ __rte_unused int socket_id, __rte_unused unsigned flags)
+{
+ rte_errno = ENOTSUP;
+ return (NULL);
+}
+
+#endif /* RTE_EXEC_ENV_LINUXAPP */
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MEMPOOL_OSDEP_H_
+#define _MEMPOOL_OSDEP_H_
+
+#include <rte_mempool.h>
+
+/**
+ * @file
+ * mempool OS specific header.
+ */
+
+/*
+ * Create mempool over objects from mmap(..., MAP_ANONYMOUS, ...).
+ */
+struct rte_mempool *
+mempool_anon_create(const char *name, unsigned n, unsigned elt_size,
+ unsigned cache_size, unsigned private_data_size,
+ rte_mempool_ctor_t *mp_init, void *mp_init_arg,
+ rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
+ int socket_id, unsigned flags);
+
+#endif /*_RTE_MEMPOOL_OSDEP_H_ */
{ "coremask", 1, 0, 0 },
{ "portmask", 1, 0, 0 },
{ "numa", 0, 0, 0 },
+ { "mp-anon", 0, 0, 0 },
{ "port-numa-config", 1, 0, 0 },
{ "ring-numa-config", 1, 0, 0 },
{ "socket-num", 1, 0, 0 },
memset(rxring_numa,NUMA_NO_CONFIG,RTE_MAX_ETHPORTS);
memset(txring_numa,NUMA_NO_CONFIG,RTE_MAX_ETHPORTS);
}
+ if (!strcmp(lgopts[opt_idx].name, "mp-anon")) {
+ mp_anon = 1;
+ }
if (!strcmp(lgopts[opt_idx].name, "port-numa-config")) {
if (parse_portnuma_config(optarg))
rte_exit(EXIT_FAILURE,
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_string_fns.h>
+#ifdef RTE_LIBRTE_PMD_XENVIRT
+#include <rte_eth_xenvirt.h>
+#endif
#include "testpmd.h"
+#include "mempool_osdep.h"
uint16_t verbose_level = 0; /**< Silent by default. */
*/
uint8_t socket_num = UMA_NO_CONFIG;
+/*
+ * Use ANONYMOUS mapped memory (might be not physically continuous) for mbufs.
+ */
+uint8_t mp_anon = 0;
+
/*
* Record the Ethernet address of peer target ports to which packets are
* forwarded.
return;
}
mbp_ctor_arg = (struct mbuf_pool_ctor_arg *) opaque_arg;
- mbp_priv = (struct rte_pktmbuf_pool_private *)
- ((char *)mp + sizeof(struct rte_mempool));
+ mbp_priv = rte_mempool_get_priv(mp);
mbp_priv->mbuf_data_room_size = mbp_ctor_arg->seg_buf_size;
}
mb_ctor_arg.seg_buf_size = mbp_ctor_arg.seg_buf_size;
mb_size = mb_ctor_arg.seg_buf_offset + mb_ctor_arg.seg_buf_size;
mbuf_poolname_build(socket_id, pool_name, sizeof(pool_name));
- rte_mp = rte_mempool_create(pool_name, nb_mbuf, (unsigned) mb_size,
+
+#ifdef RTE_LIBRTE_PMD_XENVIRT
+ rte_mp = rte_mempool_gntalloc_create(pool_name, nb_mbuf, mb_size,
+ (unsigned) mb_mempool_cache,
+ sizeof(struct rte_pktmbuf_pool_private),
+ testpmd_mbuf_pool_ctor, &mbp_ctor_arg,
+ testpmd_mbuf_ctor, &mb_ctor_arg,
+ socket_id, 0);
+
+
+
+#else
+ if (mp_anon != 0)
+ rte_mp = mempool_anon_create(pool_name, nb_mbuf, mb_size,
+ (unsigned) mb_mempool_cache,
+ sizeof(struct rte_pktmbuf_pool_private),
+ testpmd_mbuf_pool_ctor, &mbp_ctor_arg,
+ testpmd_mbuf_ctor, &mb_ctor_arg,
+ socket_id, 0);
+ else
+ rte_mp = rte_mempool_create(pool_name, nb_mbuf, mb_size,
(unsigned) mb_mempool_cache,
sizeof(struct rte_pktmbuf_pool_private),
testpmd_mbuf_pool_ctor, &mbp_ctor_arg,
testpmd_mbuf_ctor, &mb_ctor_arg,
socket_id, 0);
+
+#endif
+
if (rte_mp == NULL) {
rte_exit(EXIT_FAILURE, "Creation of mbuf pool for socket %u "
"failed\n", socket_id);
+ } else if (verbose_level > 0) {
+ rte_mempool_dump(rte_mp);
}
}
return 1;
}
-void
+int
start_port(portid_t pid)
{
int diag, need_check_link_status = 0;
if (test_done == 0) {
printf("Please stop forwarding first\n");
- return;
+ return -1;
}
if (init_fwd_streams() < 0) {
printf("Fail from init_fwd_streams()\n");
- return;
+ return -1;
}
if(dcb_config)
printf("Fail to configure port %d\n", pi);
/* try to reconfigure port next time */
port->need_reconfig = 1;
- return;
+ return -1;
}
}
if (port->need_reconfig_queues > 0) {
printf("Fail to configure port %d tx queues\n", pi);
/* try to reconfigure queues next time */
port->need_reconfig_queues = 1;
- return;
+ return -1;
}
/* setup rx queues */
for (qi = 0; qi < nb_rxq; qi++) {
"No mempool allocation"
"on the socket %d\n",
rxring_numa[pi]);
- return;
+ return -1;
}
diag = rte_eth_rx_queue_setup(pi, qi,
printf("Fail to configure port %d rx queues\n", pi);
/* try to reconfigure queues next time */
port->need_reconfig_queues = 1;
- return;
+ return -1;
}
}
/* start port */
printf("Please stop the ports first\n");
printf("Done\n");
+ return 0;
}
void
nb_rxq, nb_txq);
init_config();
- start_port(RTE_PORT_ALL);
+ if (start_port(RTE_PORT_ALL) != 0)
+ rte_exit(EXIT_FAILURE, "Start ports failed\n");
/* set all ports to promiscuous mode by default */
for (port_id = 0; port_id < nb_ports; port_id++)
extern uint8_t numa_support; /**< set by "--numa" parameter */
extern uint16_t port_topology; /**< set by "--port-topology" parameter */
extern uint8_t no_flush_rx; /**<set by "--no-flush-rx" parameter */
+extern uint8_t mp_anon; /**< set by "--mp-anon" parameter */
#ifdef RTE_NIC_BYPASS
extern uint32_t bypass_timeout; /**< Store the NIC bypass watchdog timeout */
void stop_packet_forwarding(void);
void init_port_config(void);
int init_port_dcb_config(portid_t pid,struct dcb_config *dcb_conf);
-void start_port(portid_t pid);
+int start_port(portid_t pid);
void stop_port(portid_t pid);
void close_port(portid_t pid);
int all_ports_stopped(void);
static inline int
process_dup(const char *const argv[], int numargs, const char *env_value)
{
+ int num;
+#ifdef RTE_LIBRTE_XEN_DOM0
+ char *argv_cpy[numargs + 2];
+#else
char *argv_cpy[numargs + 1];
+#endif
int i, fd, status;
char path[32];
/* make a copy of the arguments to be passed to exec */
for (i = 0; i < numargs; i++)
argv_cpy[i] = strdup(argv[i]);
+ #ifdef RTE_LIBRTE_XEN_DOM0
+ argv_cpy[i] = strdup("--xen-dom0");
+ argv_cpy[i + 1] = NULL;
+ num = numargs + 1;
+ #else
argv_cpy[i] = NULL;
+ num = numargs;
+ #endif
/* close all open file descriptors, check /proc/self/fd to only
* call close on open fds. Exclude fds 0, 1 and 2*/
close(fd);
}
printf("Running binary with argv[]:");
- for (i = 0; i < numargs; i++)
+ for (i = 0; i < num; i++)
printf("'%s' ", argv_cpy[i]);
printf("\n");
#include <rte_memzone.h>
#include <rte_tailq.h>
#include <rte_eal.h>
-#include <rte_timer.h>
#include <rte_cycles.h>
#include <rte_log.h>
#include <rte_string_fns.h>
+#ifdef RTE_LIBRTE_TIMER
+#include <rte_timer.h>
+#endif
#include "test.h"
{ "test_whitelist_flag", no_action },
{ "test_invalid_b_flag", no_action },
{ "test_invalid_r_flag", no_action },
+#ifdef RTE_LIBRTE_XEN_DOM0
+ { "test_dom0_misc_flags", no_action },
+#else
{ "test_misc_flags", no_action },
+#endif
{ "test_memory_flags", no_action },
{ "test_file_prefix", no_action },
{ "test_no_huge_flag", no_action },
if (ret < 0)
return -1;
+#ifdef RTE_LIBRTE_TIMER
rte_timer_subsystem_init();
+#endif
argv += ret;
return 0;
}
+#ifdef RTE_LIBRTE_XEN_DOM0
+static int
+test_dom0_misc_flags(void)
+{
+ char prefix[PATH_MAX], tmp[PATH_MAX];
+
+ if (get_current_prefix(tmp, sizeof(tmp)) == NULL) {
+ printf("Error - unable to get current prefix!\n");
+ return -1;
+ }
+ rte_snprintf(prefix, sizeof(prefix), "--file-prefix=%s", tmp);
+
+ /* check that some general flags don't prevent things from working.
+ * All cases, apart from the first, app should run.
+ * No futher testing of output done.
+ */
+ /* sanity check - failure with invalid option */
+ const char *argv0[] = {prgname, prefix, mp_flag, "-c", "1", "--invalid-opt"};
+
+ /* With --no-pci */
+ const char *argv1[] = {prgname, prefix, mp_flag, "-c", "1", "--no-pci"};
+ /* With -v */
+ const char *argv2[] = {prgname, prefix, mp_flag, "-c", "1", "-v"};
+ /* With valid --syslog */
+ const char *argv3[] = {prgname, prefix, mp_flag, "-c", "1",
+ "--syslog", "syslog"};
+ /* With empty --syslog (should fail) */
+ const char *argv4[] = {prgname, prefix, mp_flag, "-c", "1", "--syslog"};
+ /* With invalid --syslog */
+ const char *argv5[] = {prgname, prefix, mp_flag, "-c", "1", "--syslog", "error"};
+ /* With no-sh-conf */
+ const char *argv6[] = {prgname, "-c", "1", "-n", "2", "-m", "20",
+ "--no-shconf", "--file-prefix=noshconf" };
+
+ if (launch_proc(argv0) == 0) {
+ printf("Error - process ran ok with invalid flag\n");
+ return -1;
+ }
+ if (launch_proc(argv1) != 0) {
+ printf("Error - process did not run ok with --no-pci flag\n");
+ return -1;
+ }
+ if (launch_proc(argv2) != 0) {
+ printf("Error - process did not run ok with -v flag\n");
+ return -1;
+ }
+ if (launch_proc(argv3) != 0) {
+ printf("Error - process did not run ok with --syslog flag\n");
+ return -1;
+ }
+ if (launch_proc(argv4) == 0) {
+ printf("Error - process run ok with empty --syslog flag\n");
+ return -1;
+ }
+ if (launch_proc(argv5) == 0) {
+ printf("Error - process run ok with invalid --syslog flag\n");
+ return -1;
+ }
+ if (launch_proc(argv6) != 0) {
+ printf("Error - process did not run ok with --no-shconf flag\n");
+ return -1;
+ }
+
+ return 0;
+}
+#else
static int
test_misc_flags(void)
{
* effect on secondary processes) */
const char *argv10[] = {prgname, prefix, mp_flag, "-c", "1", "--huge-dir", "invalid"};
+ /* try running with base-virtaddr param */
+ const char *argv11[] = {prgname, "--file-prefix=virtaddr",
+ "-c", "1", "-n", "2", "--base-virtaddr=0x12345678"};
+
if (launch_proc(argv0) == 0) {
printf("Error - process ran ok with invalid flag\n");
printf("Error - secondary process did not run ok with invalid --huge-dir flag\n");
return -1;
}
+ if (launch_proc(argv11) != 0) {
+ printf("Error - process did not run ok with --base-virtaddr parameter\n");
+ return -1;
+ }
return 0;
}
+#endif
static int
test_file_prefix(void)
printf("Error - unable to get current prefix!\n");
return -1;
}
+#ifdef RTE_LIBRTE_XEN_DOM0
+ return 0;
+#endif
/* check if files for current prefix are present */
if (process_hugefiles(prefix, HUGEPAGE_CHECK_EXISTS) != 1) {
static int
test_memory_flags(void)
{
+ const char* mem_size = NULL;
#ifdef RTE_EXEC_ENV_BSDAPP
/* BSD target doesn't support prefixes at this point */
const char * prefix = "";
}
rte_snprintf(prefix, sizeof(prefix), "--file-prefix=%s", tmp);
#endif
- /* valid -m flag */
- const char *argv0[] = {prgname, "-c", "10", "-n", "2",
- "--file-prefix=" memtest, "-m", "2"};
+#ifdef RTE_LIBRTE_XEN_DOM0
+ mem_size = "30";
+#else
+ mem_size = "2";
+#endif
+
/* valid -m flag and mp flag */
- const char *argv1[] = {prgname, prefix, mp_flag, "-c", "10",
- "-n", "2", "-m", "2"};
+ const char *argv0[] = {prgname, prefix, mp_flag, "-c", "10",
+ "-n", "2", "-m", mem_size};
+
+ /* valid -m flag */
+ const char *argv1[] = {prgname, "-c", "10", "-n", "2",
+ "--file-prefix=" memtest, "-m", mem_size};
/* invalid (zero) --socket-mem flag */
const char *argv2[] = {prgname, "-c", "10", "-n", "2",
#endif
if (launch_proc(argv1) != 0) {
- printf("Error - secondary process failed with valid -m flag !\n");
+ printf("Error - process failed with valid -m flag!\n");
return -1;
}
-
+#ifdef RTE_LIBRTE_XEN_DOM0
+ return 0;
+#endif
if (launch_proc(argv2) == 0) {
printf("Error - process run ok with invalid (zero) --socket-mem!\n");
return -1;
return ret;
}
+#ifdef RTE_LIBRTE_XEN_DOM0
+ ret = test_dom0_misc_flags();
+#else
ret = test_misc_flags();
+#endif
if (ret < 0) {
printf("Error in test_misc_flags()");
return ret;
printf("get private data\n");
if (rte_mempool_get_priv(mp) !=
- (char*) mp + sizeof(struct rte_mempool))
+ (char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
return -1;
printf("get physical address of an object\n");
- if (rte_mempool_virt2phy(mp, obj) !=
- (phys_addr_t) (mp->phys_addr + (phys_addr_t) ((char*) obj - (char*) mp)))
+ if (MEMPOOL_IS_CONTIG(mp) &&
+ rte_mempool_virt2phy(mp, obj) !=
+ (phys_addr_t) (mp->phys_addr +
+ (phys_addr_t) ((char*) obj - (char*) mp)))
return -1;
printf("put the object back\n");
return 0;
}
+/*
+ * BAsic test for mempool_xmem functions.
+ */
+static int
+test_mempool_xmem_misc(void)
+{
+ uint32_t elt_num, total_size;
+ size_t sz;
+ ssize_t usz;
+
+ elt_num = MAX_KEEP;
+ total_size = rte_mempool_calc_obj_size(MEMPOOL_ELT_SIZE, 0, NULL);
+ sz = rte_mempool_xmem_size(elt_num, total_size, MEMPOOL_PG_SHIFT_MAX);
+
+ usz = rte_mempool_xmem_usage(NULL, elt_num, total_size, 0, 1,
+ MEMPOOL_PG_SHIFT_MAX);
+
+ if(sz != (size_t)usz) {
+ printf("failure @ %s: rte_mempool_xmem_usage(%u, %u) "
+ "returns: %#zx, while expected: %#zx;\n",
+ __func__, elt_num, total_size, sz, (size_t)usz);
+ return (-1);
+ }
+
+ return (0);
+}
+
int
test_mempool(void)
{
if (test_mempool_same_name_twice_creation() < 0)
return -1;
+ if (test_mempool_xmem_misc() < 0)
+ return -1;
+
rte_mempool_list_dump();
return 0;
CONFIG_RTE_KNI_VHOST_DEBUG_RX=n
CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
+#
+#Compile Xen domain0 support
+#
+CONFIG_RTE_LIBRTE_XEN_DOM0=n
+
#
# Enable warning directives
#
CONFIG_RTE_KNI_VHOST_DEBUG_RX=n
CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
+#
+#Compile Xen domain0 support
+#
+CONFIG_RTE_LIBRTE_XEN_DOM0=n
+
#
# Enable warning directives
#
#
CONFIG_RTE_LIBRTE_PMD_PCAP=n
+
+CONFIG_RTE_LIBRTE_PMD_XENVIRT=n
+
#
# Do prefetch of packet data within PMD driver receive function
#
CONFIG_RTE_KNI_VHOST_DEBUG_RX=n
CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
+#
+#Compile Xen domain0 support
+#
+CONFIG_RTE_LIBRTE_XEN_DOM0=n
+
#
# Enable warning directives
#
CONFIG_RTE_KNI_VHOST_DEBUG_RX=n
CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
+#
+#Compile Xen domain0 support
+#
+CONFIG_RTE_LIBRTE_XEN_DOM0=n
+
#
# Enable warning directives
#
continue;
kni_free_kni(port);
}
+#ifdef RTE_LIBRTE_XEN_DOM0
+ rte_kni_close();
+#endif
for (i = 0; i < RTE_MAX_ETHPORTS; i++)
if (kni_port_params_array[i]) {
rte_free(kni_port_params_array[i]);
#define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__)
#define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy")
+#define RTE_VERIFY(exp) do { \
+ if (!(exp)) \
+ rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \
+} while (0)
+
/*
* Provide notification of a critical non-recoverable error and stop.
*
#include <stdint.h>
#include <stddef.h>
+#ifdef RTE_EXEC_ENV_LINUXAPP
+#include <exec-env/rte_dom0_common.h>
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
int32_t socket_id; /**< NUMA socket ID. */
uint32_t nchannel; /**< Number of channels. */
uint32_t nrank; /**< Number of ranks. */
+#ifdef RTE_LIBRTE_XEN_DOM0
+ /**< store segment MFNs */
+ uint64_t mfn[DOM0_NUM_MEMBLOCK];
+#endif
} __attribute__((__packed__));
*/
unsigned rte_memory_get_nrank(void);
+#ifdef RTE_LIBRTE_XEN_DOM0
+/**
+ * Return the physical address of elt, which is an element of the pool mp.
+ *
+ * @param memseg_id
+ * The mempool is from which memory segment.
+ * @param phy_addr
+ * physical address of elt.
+ *
+ * @return
+ * The physical address or error.
+ */
+phys_addr_t rte_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr);
+
+/**
+ * Memory init for supporting application running on Xen domain0.
+ *
+ * @param void
+ *
+ * @return
+ * 0: successfully
+ * negative: error
+ */
+int rte_xen_dom0_memory_init(void);
+
+/**
+ * Attach to memory setments of primary process on Xen domain0.
+ *
+ * @param void
+ *
+ * @return
+ * 0: successfully
+ * negative: error
+ */
+int rte_xen_dom0_memory_attach(void);
+#endif
#ifdef __cplusplus
}
#endif
ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += kni
endif
+ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
+DIRS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += xen_dom0
+endif
include $(RTE_SDK)/mk/rte.subdir.mk
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) := eal.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_hugepage_info.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_memory.c
+ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_xen_memory.c
+endif
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_thread.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_log.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_pci.c
CFLAGS_eal_hpet.o += -Wno-return-type
endif
-INC := rte_per_lcore.h rte_lcore.h rte_interrupts.h rte_kni_common.h
+INC := rte_per_lcore.h rte_lcore.h rte_interrupts.h rte_kni_common.h rte_dom0_common.h
SYMLINK-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP)-include/exec-env := \
$(addprefix include/exec-env/,$(INC))
#include <sys/mman.h>
#include <sys/queue.h>
#include <sys/io.h>
-#include <sys/user.h>
-#include <linux/binfmts.h>
#include <rte_common.h>
#include <rte_debug.h>
#define OPT_USE_DEVICE "use-device"
#define OPT_SYSLOG "syslog"
#define OPT_BASE_VIRTADDR "base-virtaddr"
+#define OPT_XEN_DOM0 "xen-dom0"
#define RTE_EAL_BLACKLIST_SIZE 0x100
" (multiple -b options are allowed)\n"
" -m MB : memory to allocate (see also --"OPT_SOCKET_MEM")\n"
" -r NUM : force number of memory ranks (don't detect)\n"
+ " --"OPT_XEN_DOM0" : support application running on Xen Domain0 "
+ "without hugetlbfs\n"
" --"OPT_SYSLOG" : set syslog facility\n"
" --"OPT_SOCKET_MEM" : memory to allocate on specific \n"
" sockets (use comma separated values)\n"
if (coremask[0] == '0' && ((coremask[1] == 'x')
|| (coremask[1] == 'X')) )
coremask += 2;
- i = strnlen(coremask, MAX_ARG_STRLEN);
+ i = strnlen(coremask, PATH_MAX);
while ((i > 0) && isblank(coremask[i - 1]))
i--;
if (i == 0)
{OPT_USE_DEVICE, 1, 0, 0},
{OPT_SYSLOG, 1, NULL, 0},
{OPT_BASE_VIRTADDR, 1, 0, 0},
+ {OPT_XEN_DOM0, 0, 0, 0},
{0, 0, 0, 0}
};
internal_config.hugepage_dir = NULL;
internal_config.force_sockets = 0;
internal_config.syslog_facility = LOG_DAEMON;
+ internal_config.xen_dom0_support = 0;
#ifdef RTE_LIBEAL_USE_HPET
internal_config.no_hpet = 0;
#else
if (!strcmp(lgopts[option_index].name, OPT_NO_HUGE)) {
internal_config.no_hugetlbfs = 1;
}
+ if (!strcmp(lgopts[option_index].name, OPT_XEN_DOM0)) {
+ #ifdef RTE_LIBRTE_XEN_DOM0
+ internal_config.xen_dom0_support = 1;
+ #else
+ RTE_LOG(ERR, EAL, "Can't support DPDK app "
+ "running on Dom0, please configure"
+ " RTE_LIBRTE_XEN_DOM0=y\n");
+ return -1;
+ #endif
+ }
else if (!strcmp(lgopts[option_index].name, OPT_NO_PCI)) {
internal_config.no_pci = 1;
}
eal_usage(prgname);
return -1;
}
-
+ /* --xen-dom0 doesn't make sense with --socket-mem */
+ if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) {
+ RTE_LOG(ERR, EAL, "Options --socket-mem cannot be specified "
+ "together with --xen_dom0!\n");
+ eal_usage(prgname);
+ return -1;
+ }
/* if no blacklist, parse a whitelist */
if (blacklist_index > 0) {
if (eal_dev_whitelist_exists()) {
if (internal_config.no_hugetlbfs == 0 &&
internal_config.process_type != RTE_PROC_SECONDARY &&
+ internal_config.xen_dom0_support == 0 &&
eal_hugepage_info_init() < 0)
rte_panic("Cannot get hugepage information\n");
return 0;
}
+/* check if app runs on Xen Dom0 */
+ if (internal_config.xen_dom0_support) {
+#ifdef RTE_LIBRTE_XEN_DOM0
+ /* use dom0_mm kernel driver to init memory */
+ if (rte_xen_dom0_memory_init() < 0)
+ return -1;
+ else
+ return 0;
+#endif
+ }
+
/* calculate total number of hugepages available. at this point we haven't
* yet started sorting them so they all are on socket 0 */
"into secondary processes\n");
}
+ if (internal_config.xen_dom0_support) {
+#ifdef RTE_LIBRTE_XEN_DOM0
+ if (rte_xen_dom0_memory_attach() < 0) {
+ RTE_LOG(ERR, EAL,"Failed to attach memory setments of primay "
+ "process\n");
+ return -1;
+ }
+ return 0;
+#endif
+ }
+
fd_zero = open("/dev/zero", O_RDONLY);
if (fd_zero < 0) {
RTE_LOG(ERR, EAL, "Could not open /dev/zero\n");
* or uio:uioX */
rte_snprintf(dirname, sizeof(dirname),
- "/sys/bus/pci/devices/" PCI_PRI_FMT "/uio",
+ SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio",
loc->domain, loc->bus, loc->devid, loc->function);
dir = opendir(dirname);
if (dir == NULL) {
/* retry with the parent directory */
rte_snprintf(dirname, sizeof(dirname),
- "/sys/bus/pci/devices/" PCI_PRI_FMT,
+ SYSFS_PCI_DEVICES "/" PCI_PRI_FMT,
loc->domain, loc->bus, loc->devid, loc->function);
dir = opendir(dirname);
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/queue.h>
+#include <sys/file.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_common.h>
+#include <rte_string_fns.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_filesystem.h"
+#include <exec-env/rte_dom0_common.h>
+
+#define PAGE_SIZE RTE_PGSIZE_4K
+#define DEFAUL_DOM0_NAME "dom0-mem"
+
+static int xen_fd = -1;
+static const char sys_dir_path[] = "/sys/kernel/mm/dom0-mm/memsize-mB";
+
+/*
+ * Try to mmap *size bytes in /dev/zero. If it is succesful, return the
+ * pointer to the mmap'd area and keep *size unmodified. Else, retry
+ * with a smaller zone: decrease *size by mem_size until it reaches
+ * 0. In this case, return NULL. Note: this function returns an address
+ * which is a multiple of mem_size size.
+ */
+static void *
+xen_get_virtual_area(size_t *size, size_t mem_size)
+{
+ void *addr;
+ int fd;
+ long aligned_addr;
+
+ RTE_LOG(INFO, EAL, "Ask a virtual area of 0x%zu bytes\n", *size);
+
+ fd = open("/dev/zero", O_RDONLY);
+ if (fd < 0){
+ RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n");
+ return NULL;
+ }
+ do {
+ addr = mmap(NULL, (*size) + mem_size, PROT_READ,
+ MAP_PRIVATE, fd, 0);
+ if (addr == MAP_FAILED)
+ *size -= mem_size;
+ } while (addr == MAP_FAILED && *size > 0);
+
+ if (addr == MAP_FAILED) {
+ close(fd);
+ RTE_LOG(INFO, EAL, "Cannot get a virtual area\n");
+ return NULL;
+ }
+
+ munmap(addr, (*size) + mem_size);
+ close(fd);
+
+ /* align addr to a mem_size boundary */
+ aligned_addr = (uintptr_t)addr;
+ aligned_addr = RTE_ALIGN_CEIL(aligned_addr, mem_size);
+ addr = (void *)(aligned_addr);
+
+ RTE_LOG(INFO, EAL, "Virtual area found at %p (size = 0x%zx)\n",
+ addr, *size);
+
+ return addr;
+}
+
+/**
+ * Get memory size configuration from /sys/devices/virtual/misc/dom0_mm
+ * /memsize-mB/memsize file, and the size unit is mB.
+ */
+static int
+get_xen_memory_size(void)
+{
+ char path[PATH_MAX];
+ unsigned long mem_size = 0;
+ static const char *file_name;
+
+ file_name = "memsize";
+ rte_snprintf(path, sizeof(path), "%s/%s",
+ sys_dir_path, file_name);
+
+ if (eal_parse_sysfs_value(path, &mem_size) < 0)
+ return -1;
+
+ if (mem_size == 0)
+ rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s was not"
+ " configured.\n",sys_dir_path, file_name);
+ if (mem_size % 2)
+ rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s must be"
+ " even number.\n",sys_dir_path, file_name);
+
+ if (mem_size > DOM0_CONFIG_MEMSIZE)
+ rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s should not be larger"
+ " than %d mB\n",sys_dir_path, file_name, DOM0_CONFIG_MEMSIZE);
+
+ return mem_size;
+}
+
+/**
+ * Based on physical address to caculate MFN in Xen Dom0.
+ */
+phys_addr_t
+rte_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr)
+{
+ int mfn_id;
+ uint64_t mfn, mfn_offset;
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg *memseg = mcfg->memseg;
+
+ mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M;
+
+ /*the MFN is contiguous in 2M */
+ mfn_offset = (phy_addr - memseg[memseg_id].phys_addr) %
+ RTE_PGSIZE_2M / PAGE_SIZE;
+ mfn = mfn_offset + memseg[memseg_id].mfn[mfn_id];
+
+ /** return mechine address */
+ return (mfn * PAGE_SIZE + phy_addr % PAGE_SIZE);
+}
+
+int
+rte_xen_dom0_memory_init(void)
+{
+ void *vir_addr, *vma_addr = NULL;
+ int err, ret = 0;
+ uint32_t i, requested, mem_size, memseg_idx, num_memseg = 0;
+ size_t vma_len = 0;
+ struct memory_info meminfo;
+ struct memseg_info seginfo[RTE_MAX_MEMSEG];
+ int flags, page_size = getpagesize();
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg *memseg = mcfg->memseg;
+ uint64_t total_mem = internal_config.memory;
+
+ memset(seginfo, 0, sizeof(seginfo));
+ memset(&meminfo, 0, sizeof(struct memory_info));
+
+ mem_size = get_xen_memory_size();
+ requested = (unsigned) (total_mem / 0x100000);
+ if (requested > mem_size)
+ /* if we didn't satisfy total memory requirements */
+ rte_exit(EXIT_FAILURE,"Not enough memory available! Requested: %uMB,"
+ " available: %uMB\n", requested, mem_size);
+ else if (total_mem != 0)
+ mem_size = requested;
+
+ /* Check FD and open once */
+ if (xen_fd < 0) {
+ xen_fd = open(DOM0_MM_DEV, O_RDWR);
+ if (xen_fd < 0) {
+ RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
+ return -1;
+ }
+ }
+
+ meminfo.size = mem_size;
+
+ /* construct memory mangement name for Dom0 */
+ rte_snprintf(meminfo.name, DOM0_NAME_MAX, "%s-%s",
+ internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
+
+ /* Notify kernel driver to allocate memory */
+ ret = ioctl(xen_fd, RTE_DOM0_IOCTL_PREPARE_MEMSEG, &meminfo);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memory\n");
+ err = -EIO;
+ goto fail;
+ }
+
+ /* Get number of memory segment from driver */
+ ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_NUM_MEMSEG, &num_memseg);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg count.\n");
+ err = -EIO;
+ goto fail;
+ }
+
+ if(num_memseg > RTE_MAX_MEMSEG){
+ RTE_LOG(ERR, EAL, "XEN DOM0: the memseg count %d is greater"
+ " than max memseg %d.\n",num_memseg, RTE_MAX_MEMSEG);
+ err = -EIO;
+ goto fail;
+ }
+
+ /* get all memory segements information */
+ ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_MEMSEG_INFO, seginfo);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg info.\n");
+ err = -EIO;
+ goto fail;
+ }
+
+ /* map all memory segments to contiguous user space */
+ for (memseg_idx = 0; memseg_idx < num_memseg; memseg_idx++)
+ {
+ vma_len = seginfo[memseg_idx].size;
+
+ /**
+ * get the biggest virtual memory area up to vma_len. If it fails,
+ * vma_addr is NULL, so let the kernel provide the address.
+ */
+ vma_addr = xen_get_virtual_area(&vma_len, RTE_PGSIZE_2M);
+ if (vma_addr == NULL) {
+ flags = MAP_SHARED;
+ vma_len = RTE_PGSIZE_2M;
+ } else
+ flags = MAP_SHARED | MAP_FIXED;
+
+ seginfo[memseg_idx].size = vma_len;
+ vir_addr = mmap(vma_addr, seginfo[memseg_idx].size,
+ PROT_READ|PROT_WRITE, flags, xen_fd,
+ memseg_idx * page_size);
+ if (vir_addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "XEN DOM0:Could not mmap %s\n",
+ DOM0_MM_DEV);
+ err = -EIO;
+ goto fail;
+ }
+
+ memseg[memseg_idx].addr = vir_addr;
+ memseg[memseg_idx].phys_addr = page_size *
+ seginfo[memseg_idx].pfn ;
+ memseg[memseg_idx].len = seginfo[memseg_idx].size;
+ for ( i = 0; i < seginfo[memseg_idx].size / RTE_PGSIZE_2M; i++)
+ memseg[memseg_idx].mfn[i] = seginfo[memseg_idx].mfn[i];
+
+ /* MFNs are continuous in 2M, so assume that page size is 2M */
+ memseg[memseg_idx].hugepage_sz = RTE_PGSIZE_2M;
+
+ memseg[memseg_idx].nchannel = mcfg->nchannel;
+ memseg[memseg_idx].nrank = mcfg->nrank;
+
+ /* NUMA is not suppoted in Xen Dom0, so only set socket 0*/
+ memseg[memseg_idx].socket_id = 0;
+ }
+
+ return 0;
+fail:
+ if (xen_fd > 0) {
+ close(xen_fd);
+ xen_fd = -1;
+ }
+ return err;
+}
+
+/*
+ * This creates the memory mappings in the secondary process to match that of
+ * the server process. It goes through each memory segment in the DPDK runtime
+ * configuration, mapping them in order to form a contiguous block in the
+ * virtual memory space
+ */
+int
+rte_xen_dom0_memory_attach(void)
+{
+ const struct rte_mem_config *mcfg;
+ unsigned s = 0; /* s used to track the segment number */
+ int xen_fd = -1;
+ int ret = -1;
+ void *vir_addr;
+ char name[DOM0_NAME_MAX] = {0};
+ int page_size = getpagesize();
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* Check FD and open once */
+ if (xen_fd < 0) {
+ xen_fd = open(DOM0_MM_DEV, O_RDWR);
+ if (xen_fd < 0) {
+ RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV);
+ goto error;
+ }
+ }
+
+ /* construct memory mangement name for Dom0 */
+ rte_snprintf(name, DOM0_NAME_MAX, "%s-%s",
+ internal_config.hugefile_prefix, DEFAUL_DOM0_NAME);
+ /* attach to memory segments of primary process */
+ ret = ioctl(xen_fd, RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG, name);
+ if (ret) {
+ RTE_LOG(ERR, EAL,"attach memory segments fail.\n");
+ goto error;
+ }
+
+ /* map all segments into memory to make sure we get the addrs */
+ for (s = 0; s < RTE_MAX_MEMSEG; ++s) {
+
+ /*
+ * the first memory segment with len==0 is the one that
+ * follows the last valid segment.
+ */
+ if (mcfg->memseg[s].len == 0)
+ break;
+
+ vir_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
+ PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, xen_fd,
+ s * page_size);
+ if (vir_addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "Could not mmap %llu bytes "
+ "in %s to requested address [%p]\n",
+ (unsigned long long)mcfg->memseg[s].len, DOM0_MM_DEV,
+ mcfg->memseg[s].addr);
+ goto error;
+ }
+ }
+ return 0;
+
+error:
+ if (xen_fd >= 0) {
+ close(xen_fd);
+ xen_fd = -1;
+ }
+ return -1;
+}
volatile unsigned force_nchannel; /**< force number of channels */
volatile unsigned force_nrank; /**< force number of ranks */
volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */
+ volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
volatile unsigned no_pci; /**< true to disable PCI */
volatile unsigned no_hpet; /**< true to disable HPET */
volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
--- /dev/null
+/*-
+ * This file is provided under a dual BSD/LGPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GNU LESSER GENERAL PUBLIC LICENSE
+ *
+ * Copyright(c) 2007-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Contact Information:
+ * Intel Corporation
+ *
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _RTE_DOM0_COMMON_H_
+#define _RTE_DOM0_COMMON_H_
+
+#ifdef __KERNEL__
+#include <linux/if.h>
+#endif
+
+#define DOM0_NAME_MAX 256
+#define DOM0_MM_DEV "/dev/dom0_mm"
+
+#define DOM0_CONTIG_NUM_ORDER 9 /**< 2M order */
+#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */
+#define DOM0_MEMBLOCK_SIZE 0x200000 /**< Maximum nb. of memory block(2M). */
+#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */
+#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */
+
+#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info)
+#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *)
+#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int)
+#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *)
+
+/**
+ * A structure used to store memory information.
+ */
+struct memory_info {
+ char name[DOM0_NAME_MAX];
+ uint64_t size;
+};
+
+/**
+ * A structure used to store memory segment information.
+ */
+struct memseg_info {
+ uint32_t idx;
+ uint64_t pfn;
+ uint64_t size;
+ uint64_t mfn[DOM0_NUM_MEMBLOCK];
+};
+
+/**
+ * A structure used to store memory block information.
+ */
+struct memblock_info {
+ uint8_t exchange_flag;
+ uint64_t vir_addr;
+ uint64_t pfn;
+ uint64_t mfn;
+};
+#endif /* _RTE_DOM0_COMMON_H_ */
#include <linux/msi.h>
#include <linux/version.h>
+#ifdef CONFIG_XEN_DOM0
+#include <xen/xen.h>
+#endif
+
/**
* MSI-X related macros, copy from linux/pci_regs.h in kernel 2.6.39,
* but none of them in kernel 2.6.35.
return ret;
}
+#ifdef CONFIG_XEN_DOM0
+static int
+igbuio_dom0_mmap_phys(struct uio_info *info, struct vm_area_struct *vma)
+{
+ int idx;
+ idx = (int)vma->vm_pgoff;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_page_prot.pgprot |= _PAGE_IOMAP;
+
+ return remap_pfn_range(vma,
+ vma->vm_start,
+ info->mem[idx].addr >> PAGE_SHIFT,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+}
+
+/**
+ * This is uio device mmap method which will use igbuio mmap for Xen
+ * Dom0 enviroment.
+ */
+static int
+igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma)
+{
+ int idx;
+
+ if (vma->vm_pgoff >= MAX_UIO_MAPS)
+ return -EINVAL;
+ if(info->mem[vma->vm_pgoff].size == 0)
+ return -EINVAL;
+
+ idx = (int)vma->vm_pgoff;
+ switch (info->mem[idx].memtype) {
+ case UIO_MEM_PHYS:
+ return igbuio_dom0_mmap_phys(info, vma);
+ case UIO_MEM_LOGICAL:
+ case UIO_MEM_VIRTUAL:
+ default:
+ return -EINVAL;
+ }
+}
+#endif
+
/* Remap pci resources described by bar #pci_bar in uio resource n. */
static int
igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
udev->info.version = "0.1";
udev->info.handler = igbuio_pci_irqhandler;
udev->info.irqcontrol = igbuio_pci_irqcontrol;
+#ifdef CONFIG_XEN_DOM0
+ /* check if the driver run on Xen Dom0 */
+ if (xen_initial_domain())
+ udev->info.mmap = igbuio_dom0_pci_mmap;
+#endif
udev->info.priv = udev;
udev->pdev = dev;
udev->mode = 0; /* set the default value for interrupt mode */
--- /dev/null
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# module name and path
+#
+MODULE = rte_dom0_mm
+
+#
+# CFLAGS
+#
+MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50
+MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
+MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
+MODULE_CFLAGS += -Wall -Werror
+
+# this lib needs main eal
+DEPDIRS-y += lib/librte_eal/linuxapp/eal
+
+#
+# all source are stored in SRCS-y
+#
+
+SRCS-y += dom0_mm_misc.c
+
+include $(RTE_SDK)/mk/rte.module.mk
--- /dev/null
+/*-
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ * Intel Corporation
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _DOM0_MM_DEV_H_
+#define _DOM0_MM_DEV_H_
+
+#include <linux/wait.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <exec-env/rte_dom0_common.h>
+
+#define NUM_MEM_CTX 256 /**< Maximum number of memory context*/
+#define MAX_EXCHANGE_FAIL_TIME 5 /**< Maximum times of allowing exchange fail .*/
+
+/**
+ * A structure describing the private information for a dom0 device.
+ */
+struct dom0_mm_dev {
+ struct miscdevice miscdev;
+ uint32_t allocated_memsize;
+ uint32_t num_mem_ctx;
+ uint32_t config_memsize;
+ struct dom0_mm_data *mm_data[NUM_MEM_CTX];
+ struct mutex data_lock;
+};
+
+struct dom0_mm_data{
+ uint8_t fail_times;
+ uint32_t refcnt;
+ uint32_t num_memseg; /**< Number of memory segment. */
+ uint32_t mem_size; /**< Size of requesting memory. */
+ char name[DOM0_NAME_MAX];
+
+ /** Storing memory block information.*/
+ struct memblock_info block_info[DOM0_NUM_MEMBLOCK];
+
+ /** Storing memory segment information.*/
+ struct memseg_info seg_info[DOM0_NUM_MEMSEG];
+};
+
+#define XEN_ERR(args...) printk(KERN_DEBUG "XEN_DOM0: Error: " args)
+#define XEN_PRINT(args...) printk(KERN_DEBUG "XEN_DOM0: " args)
+#endif
--- /dev/null
+/*-
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ * Intel Corporation
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+
+#include <xen/xen.h>
+#include <xen/page.h>
+#include <xen/xen-ops.h>
+#include <xen/interface/memory.h>
+
+#include <rte_config.h>
+#include <exec-env/rte_dom0_common.h>
+
+#include "dom0_mm_dev.h"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Kernel Module for supporting DPDK running on Xen Dom0");
+
+static struct dom0_mm_dev dom0_dev;
+static struct kobject *dom0_kobj = NULL;
+
+static int dom0_open(struct inode *inode, struct file *file);
+static int dom0_release(struct inode *inode, struct file *file);
+static int dom0_ioctl(struct file *file, unsigned int ioctl_num,
+ unsigned long ioctl_param);
+static int dom0_mmap(struct file *file, struct vm_area_struct *vma);
+static int dom0_memory_free(struct dom0_mm_data *mm_data);
+
+static const struct file_operations data_fops = {
+ .owner = THIS_MODULE,
+ .open = dom0_open,
+ .release = dom0_release,
+ .mmap = dom0_mmap,
+ .unlocked_ioctl = (void *)dom0_ioctl,
+};
+
+static ssize_t
+show_memsize_rsvd(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return snprintf(buf, 10, "%u\n", dom0_dev.allocated_memsize);
+}
+
+static ssize_t
+show_memsize(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return snprintf(buf, 10, "%u\n", dom0_dev.config_memsize);
+}
+
+static ssize_t
+store_memsize(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int err = 0;
+ unsigned long mem_size;
+
+ if (0 != strict_strtoul(buf, 0, &mem_size))
+ return -EINVAL;
+
+ mutex_lock(&dom0_dev.data_lock);
+ if (0 == mem_size) {
+ err = -EINVAL;
+ goto fail;
+ } else if (mem_size < dom0_dev.allocated_memsize ||
+ mem_size > DOM0_CONFIG_MEMSIZE) {
+ XEN_ERR("configure memory size fail\n");
+ err = -EINVAL;
+ goto fail;
+ } else
+ dom0_dev.config_memsize = mem_size;
+
+fail:
+ mutex_unlock(&dom0_dev.data_lock);
+ return err ? err : count;
+}
+
+static DEVICE_ATTR(memsize, S_IRUGO | S_IWUSR, show_memsize, store_memsize);
+static DEVICE_ATTR(memsize_rsvd, S_IRUGO, show_memsize_rsvd, NULL);
+
+static struct attribute *dev_attrs[] = {
+ &dev_attr_memsize.attr,
+ &dev_attr_memsize_rsvd.attr,
+ NULL,
+};
+
+/* the memory size unit is MB */
+static const struct attribute_group dev_attr_grp = {
+ .name = "memsize-mB",
+ .attrs = dev_attrs,
+};
+
+
+static void
+sort_viraddr(struct memblock_info *mb, int cnt)
+{
+ int i,j;
+ uint64_t tmp_pfn;
+ uint64_t tmp_viraddr;
+
+ /*sort virtual address and pfn */
+ for(i = 0; i < cnt; i ++) {
+ for(j = cnt - 1; j > i; j--) {
+ if(mb[j].pfn < mb[j - 1].pfn) {
+ tmp_pfn = mb[j - 1].pfn;
+ mb[j - 1].pfn = mb[j].pfn;
+ mb[j].pfn = tmp_pfn;
+
+ tmp_viraddr = mb[j - 1].vir_addr;
+ mb[j - 1].vir_addr = mb[j].vir_addr;
+ mb[j].vir_addr = tmp_viraddr;
+ }
+ }
+ }
+}
+
+static int
+dom0_find_memdata(const char * mem_name)
+{
+ unsigned i;
+ int idx = -1;
+ for(i = 0; i< NUM_MEM_CTX; i++) {
+ if(dom0_dev.mm_data[i] == NULL)
+ continue;
+ if (!strncmp(dom0_dev.mm_data[i]->name, mem_name,
+ sizeof(char) * DOM0_NAME_MAX)) {
+ idx = i;
+ break;
+ }
+ }
+
+ return idx;
+}
+
+static int
+dom0_find_mempos(const char * mem_name)
+{
+ unsigned i;
+ int idx = -1;
+
+ for(i = 0; i< NUM_MEM_CTX; i++) {
+ if(dom0_dev.mm_data[i] == NULL){
+ idx = i;
+ break;
+ }
+ }
+
+ return idx;
+}
+
+static int
+dom0_memory_free(struct dom0_mm_data * mm_data)
+{
+ int idx;
+ uint64_t vstart, vaddr;
+ uint32_t i, num_block, size;
+
+ if (!xen_pv_domain())
+ return -1;
+
+ /* each memory block is 2M */
+ num_block = mm_data->mem_size / 2;
+ if (num_block == 0)
+ return -1;
+
+ /* free memory and destory contiguous region in Xen*/
+ for (i = 0; i< num_block; i++) {
+ vstart = mm_data->block_info[i].vir_addr;
+ if (vstart) {
+ if (mm_data->block_info[i].exchange_flag)
+ xen_destroy_contiguous_region(vstart,
+ DOM0_CONTIG_NUM_ORDER);
+
+ size = DOM0_MEMBLOCK_SIZE;
+ vaddr = vstart;
+ while (size > 0) {
+ ClearPageReserved(virt_to_page(vaddr));
+ vaddr += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+ free_pages(vstart, DOM0_CONTIG_NUM_ORDER);
+ }
+ }
+
+ /* reset global memory data */
+ idx = dom0_find_memdata(mm_data->name);
+ if (idx >= 0) {
+ dom0_dev.allocated_memsize -= mm_data->mem_size;
+ dom0_dev.mm_data[idx] = NULL;
+ dom0_dev.num_mem_ctx--;
+ }
+ memset(mm_data, 0, sizeof(struct dom0_mm_data));
+ vfree(mm_data);
+
+ return 0;
+}
+
+/**
+ * Find all memory segments in which physical addresses are contiguous.
+ */
+static void
+find_memseg(int count, struct dom0_mm_data * mm_data)
+{
+ int i = 0;
+ int j, k, idx = 0;
+ uint64_t zone_len, pfn, num_block;
+
+ while(i < count) {
+ if (mm_data->block_info[i].exchange_flag == 0) {
+ i++;
+ continue;
+ }
+ k = 0;
+ pfn = mm_data->block_info[i].pfn;
+ mm_data->seg_info[idx].pfn = pfn;
+ mm_data->seg_info[idx].mfn[k] = mm_data->block_info[i].mfn;
+
+ for (j = i + 1; j < count; j++) {
+
+ /* ignore exchange fail memory block */
+ if (mm_data->block_info[j].exchange_flag == 0)
+ break;
+
+ if (mm_data->block_info[j].pfn !=
+ (mm_data->block_info[j - 1].pfn +
+ DOM0_MEMBLOCK_SIZE / PAGE_SIZE))
+ break;
+ ++k;
+ mm_data->seg_info[idx].mfn[k] = mm_data->block_info[j].mfn;
+ }
+
+ num_block = j - i;
+ zone_len = num_block * DOM0_MEMBLOCK_SIZE;
+ mm_data->seg_info[idx].size = zone_len;
+
+ XEN_PRINT("memseg id=%d, size=0x%llx\n", idx, zone_len);
+ i = i+ num_block;
+ idx++;
+ if (idx == DOM0_NUM_MEMSEG)
+ break;
+ }
+ mm_data->num_memseg = idx;
+}
+
+static int
+dom0_prepare_memsegs(struct memory_info* meminfo, struct dom0_mm_data *mm_data)
+{
+ uint64_t pfn, vstart, vaddr;
+ uint32_t i, num_block, size;
+ int idx;
+
+ /* Allocate 2M memory once */
+ num_block = meminfo->size / 2;
+
+ for (i = 0; i< num_block; i++) {
+ vstart = (unsigned long)
+ __get_free_pages(GFP_ATOMIC, DOM0_CONTIG_NUM_ORDER);
+ if (vstart == 0) {
+ XEN_ERR("allocate memory fail.\n");
+ mm_data->mem_size = 2 * i;
+ dom0_memory_free(mm_data);
+ return -ENOMEM;
+ }
+
+ size = DOM0_MEMBLOCK_SIZE;
+ vaddr = vstart;
+ while (size > 0) {
+ SetPageReserved(virt_to_page(vaddr));
+ vaddr += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+ pfn = virt_to_pfn(vstart);
+ mm_data->block_info[i].pfn = pfn;
+ mm_data->block_info[i].vir_addr = vstart;
+ }
+
+ sort_viraddr(mm_data->block_info, num_block);
+
+ for (i = 0; i< num_block; i++) {
+
+ /*
+ * This API is used to exchage MFN for getting a block of
+ * contiguous physical addresses, its maximum size is 2M.
+ */
+ if (xen_create_contiguous_region(mm_data->block_info[i].vir_addr,
+ DOM0_CONTIG_NUM_ORDER, 0) == 0) {
+ mm_data->block_info[i].exchange_flag = 1;
+ mm_data->block_info[i].mfn =
+ pfn_to_mfn(mm_data->block_info[i].pfn);
+ } else {
+ XEN_ERR("exchange memeory fail\n");
+ mm_data->block_info[i].exchange_flag = 0;
+ mm_data->fail_times++;
+ if (mm_data->fail_times > MAX_EXCHANGE_FAIL_TIME) {
+ mm_data->mem_size = meminfo->size;
+ dom0_memory_free(mm_data);
+ return -1;
+ }
+ }
+ }
+
+ find_memseg(num_block, mm_data);
+
+ /* update private memory data */
+ mm_data->refcnt++;
+ mm_data->mem_size = meminfo->size;
+ memcpy(mm_data->name, meminfo->name, DOM0_NAME_MAX);
+ mm_data->name[DOM0_NAME_MAX -1] = '\0';
+
+ /* update global memory data */
+ idx = dom0_find_mempos(meminfo->name);
+ if (idx < 0) {
+ dom0_memory_free(mm_data);
+ return -1;
+ }
+
+ dom0_dev.mm_data[idx] = mm_data;
+ dom0_dev.num_mem_ctx++;
+ dom0_dev.allocated_memsize += mm_data->mem_size;
+
+ return 0;
+}
+
+static int
+dom0_check_memory (struct memory_info *meminfo)
+{
+ int idx;
+ uint64_t mem_size;
+
+ /* round memory size to the next even number. */
+ if (meminfo->size % 2)
+ ++meminfo->size;
+
+ mem_size = meminfo->size;
+ if (dom0_dev.num_mem_ctx > NUM_MEM_CTX) {
+ XEN_ERR("Memory data space is full in Dom0 driver\n");
+ return -1;
+ }
+ idx = dom0_find_memdata(meminfo->name);
+ if (idx >= 0) {
+ XEN_ERR("Memory data name %s has already exsited in Dom0 driver.\n",
+ meminfo->name);
+ return -1;
+ }
+ if ((dom0_dev.allocated_memsize + mem_size) >
+ dom0_dev.config_memsize) {
+ XEN_ERR("total memory size can't be larger than config memory size.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int __init
+dom0_init(void)
+{
+ if (!xen_domain())
+ return -ENODEV;
+
+ /* Setup the misc device */
+ dom0_dev.miscdev.minor = MISC_DYNAMIC_MINOR;
+ dom0_dev.miscdev.name = "dom0_mm";
+ dom0_dev.miscdev.fops = &data_fops;
+
+ /* register misc char device */
+ if (misc_register(&dom0_dev.miscdev) != 0) {
+ XEN_ERR("Misc device registration failed\n");
+ return -EPERM;
+ }
+
+ mutex_init(&dom0_dev.data_lock);
+ dom0_kobj = kobject_create_and_add("dom0-mm", mm_kobj);
+
+ if (!dom0_kobj) {
+ XEN_ERR("dom0-mm object creation failed\n");
+ misc_deregister(&dom0_dev.miscdev);
+ return -ENOMEM;
+ }
+
+ if (sysfs_create_group(dom0_kobj, &dev_attr_grp)) {
+ sysfs_remove_group(dom0_kobj, &dev_attr_grp);
+ kobject_put(dom0_kobj);
+ misc_deregister(&dom0_dev.miscdev);
+ return -EPERM;
+ }
+
+ XEN_PRINT("####### DPDK Xen Dom0 module loaded #######\n");
+ return 0;
+}
+
+static void __exit
+dom0_exit(void)
+{
+ sysfs_remove_group(dom0_kobj, &dev_attr_grp);
+ kobject_put(dom0_kobj);
+ misc_deregister(&dom0_dev.miscdev);
+
+ XEN_PRINT("####### DPDK Xen Dom0 module unloaded #######\n");
+}
+
+static int
+dom0_open(struct inode *inode, struct file *file)
+{
+ file->private_data = NULL;
+
+ XEN_PRINT(KERN_INFO "/dev/dom0_mm opened\n");
+ return 0;
+}
+
+static int
+dom0_release(struct inode *inode, struct file *file)
+{
+ int ret = 0;
+ struct dom0_mm_data *mm_data = file->private_data;
+
+ if (mm_data == NULL)
+ return ret;
+
+ mutex_lock(&dom0_dev.data_lock);
+ if (--mm_data->refcnt == 0)
+ ret = dom0_memory_free(mm_data);
+ mutex_unlock(&dom0_dev.data_lock);
+
+ file->private_data = NULL;
+ XEN_PRINT(KERN_INFO "/dev/dom0_mm closed\n");
+ return ret;
+}
+
+static int
+dom0_mmap(struct file *file, struct vm_area_struct *vm)
+{
+ int status = 0;
+ uint32_t idx = vm->vm_pgoff;
+ uint64_t pfn, size = vm->vm_end - vm->vm_start;
+ struct dom0_mm_data *mm_data = file->private_data;
+
+ if(mm_data == NULL)
+ return -EINVAL;
+
+ mutex_lock(&dom0_dev.data_lock);
+ if (idx >= mm_data->num_memseg) {
+ mutex_unlock(&dom0_dev.data_lock);
+ return -EINVAL;
+ }
+
+ if (size > mm_data->seg_info[idx].size){
+ mutex_unlock(&dom0_dev.data_lock);
+ return -EINVAL;
+ }
+
+ XEN_PRINT("mmap memseg idx =%d,size = 0x%llx\n", idx, size);
+
+ pfn = mm_data->seg_info[idx].pfn;
+ mutex_unlock(&dom0_dev.data_lock);
+
+ status = remap_pfn_range(vm, vm->vm_start, pfn, size, PAGE_SHARED);
+
+ return status;
+}
+static int
+dom0_ioctl(struct file *file,
+ unsigned int ioctl_num,
+ unsigned long ioctl_param)
+{
+ int idx, ret;
+ char name[DOM0_NAME_MAX] = {0};
+ struct memory_info meminfo;
+ struct dom0_mm_data *mm_data = file->private_data;
+
+ XEN_PRINT("IOCTL num=0x%0x param=0x%0lx \n", ioctl_num, ioctl_param);
+
+ /**
+ * Switch according to the ioctl called
+ */
+ switch _IOC_NR(ioctl_num) {
+ case _IOC_NR(RTE_DOM0_IOCTL_PREPARE_MEMSEG):
+ ret = copy_from_user(&meminfo, (void *)ioctl_param,
+ sizeof(struct memory_info));
+ if (ret)
+ return -EFAULT;
+
+ if (mm_data != NULL) {
+ XEN_ERR("Cannot create memory segment for the same"
+ " file descriptor\n");
+ return -EINVAL;
+ }
+
+ /* Allocate private data */
+ mm_data = vmalloc(sizeof(struct dom0_mm_data));
+ if (!mm_data) {
+ XEN_ERR("Unable to allocate device private data\n");
+ return -ENOMEM;
+ }
+ memset(mm_data, 0, sizeof(struct dom0_mm_data));
+
+ mutex_lock(&dom0_dev.data_lock);
+ /* check if we can allocate memory*/
+ if (dom0_check_memory(&meminfo) < 0) {
+ mutex_unlock(&dom0_dev.data_lock);
+ vfree(mm_data);
+ return -EINVAL;
+ }
+
+ /* allocate memories and created memory segments*/
+ if (dom0_prepare_memsegs(&meminfo, mm_data) < 0) {
+ XEN_ERR("create memory segment fail.\n");
+ mutex_unlock(&dom0_dev.data_lock);
+ return -EIO;
+ }
+
+ file->private_data = mm_data;
+ mutex_unlock(&dom0_dev.data_lock);
+ break;
+
+ /* support multiple process in term of memory mapping*/
+ case _IOC_NR(RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG):
+ ret = copy_from_user(name, (void *)ioctl_param,
+ sizeof(char) * DOM0_NAME_MAX);
+ if (ret)
+ return -EFAULT;
+
+ mutex_lock(&dom0_dev.data_lock);
+ idx = dom0_find_memdata(name);
+ if (idx < 0) {
+ mutex_unlock(&dom0_dev.data_lock);
+ return -EINVAL;
+ }
+
+ mm_data = dom0_dev.mm_data[idx];
+ mm_data->refcnt++;
+ file->private_data = mm_data;
+ mutex_unlock(&dom0_dev.data_lock);
+ break;
+
+ case _IOC_NR(RTE_DOM0_IOCTL_GET_NUM_MEMSEG):
+ ret = copy_to_user((void *)ioctl_param, &mm_data->num_memseg,
+ sizeof(int));
+ if (ret)
+ return -EFAULT;
+ break;
+
+ case _IOC_NR(RTE_DOM0_IOCTL_GET_MEMSEG_INFO):
+ ret = copy_to_user((void *)ioctl_param,
+ &mm_data->seg_info[0],
+ sizeof(struct memseg_info) *
+ mm_data->num_memseg);
+ if (ret)
+ return -EFAULT;
+ break;
+ default:
+ XEN_PRINT("IOCTL default \n");
+ break;
+ }
+
+ return 0;
+}
+
+module_init(dom0_init);
+module_exit(dom0_exit);
(int) sizeof(struct rte_pktmbuf_pool_private));
return (-ENOSPC);
}
- mbp_priv = (struct rte_pktmbuf_pool_private *)
- ((char *)mp + sizeof(struct rte_mempool));
+ mbp_priv = rte_mempool_get_priv(mp);
if ((uint32_t) (mbp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM) <
dev_info.min_rx_bufsize) {
PMD_DEBUG_TRACE("%s mbuf_data_room_size %d < %d "
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
# all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) := rte_mempool.c
-
+SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool.c
+ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y)
+SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_dom0_mempool.c
+endif
# install includes
SYMLINK-$(CONFIG_RTE_LIBRTE_MEMPOOL)-include := rte_mempool.h
--- /dev/null
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_atomic.h>
+#include <rte_launch.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_ring.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_spinlock.h>
+
+#include "rte_mempool.h"
+
+static void
+get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num,
+ uint32_t pg_sz, uint32_t memseg_id)
+{
+ uint32_t i;
+ uint64_t virt_addr, mfn_id;
+ struct rte_mem_config *mcfg;
+ uint32_t page_size = getpagesize();
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+ virt_addr =(uintptr_t) mcfg->memseg[memseg_id].addr;
+
+ for (i = 0; i != pg_num; i++) {
+ mfn_id = ((uintptr_t)va + i * pg_sz - virt_addr) / RTE_PGSIZE_2M;
+ pa[i] = mcfg->memseg[memseg_id].mfn[mfn_id] * page_size;
+ }
+}
+
+/* create the mempool for supporting Dom0 */
+struct rte_mempool *
+rte_dom0_mempool_create(const char *name, unsigned elt_num, unsigned elt_size,
+ unsigned cache_size, unsigned private_data_size,
+ rte_mempool_ctor_t *mp_init, void *mp_init_arg,
+ rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
+ int socket_id, unsigned flags)
+{
+ struct rte_mempool *mp = NULL;
+ phys_addr_t *pa;
+ char *va;
+ size_t sz;
+ uint32_t pg_num, pg_shift, pg_sz, total_size;
+ const struct rte_memzone *mz;
+ char mz_name[RTE_MEMZONE_NAMESIZE];
+ int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
+
+ pg_sz = RTE_PGSIZE_2M;
+
+ pg_shift = rte_bsf32(pg_sz);
+ total_size = rte_mempool_calc_obj_size(elt_size, flags, NULL);
+
+ /* calc max memory size and max number of pages needed. */
+ sz = rte_mempool_xmem_size(elt_num, total_size, pg_shift) +
+ RTE_PGSIZE_2M;
+ pg_num = sz >> pg_shift;
+
+ /* extract physical mappings of the allocated memory. */
+ pa = calloc(pg_num, sizeof (*pa));
+ if (pa == NULL)
+ return mp;
+
+ rte_snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME, name);
+ mz = rte_memzone_reserve(mz_name, sz, socket_id, mz_flags);
+ if (mz == NULL) {
+ free(pa);
+ return mp;
+ }
+
+ va = (char *)RTE_ALIGN_CEIL((uintptr_t)mz->addr, RTE_PGSIZE_2M);
+ /* extract physical mappings of the allocated memory. */
+ get_phys_map(va, pa, pg_num, pg_sz, mz->memseg_id);
+
+ mp = rte_mempool_xmem_create(name, elt_num, elt_size,
+ cache_size, private_data_size,
+ mp_init, mp_init_arg,
+ obj_init, obj_init_arg,
+ socket_id, flags, va, pa, pg_num, pg_shift);
+
+ free(pa);
+
+ return (mp);
+}
return new_obj_size * CACHE_LINE_SIZE;
}
+static void
+mempool_add_elem(struct rte_mempool *mp, void *obj, uint32_t obj_idx,
+ rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg)
+{
+ struct rte_mempool **mpp;
+
+ obj = (char *)obj + mp->header_size;
+
+ /* set mempool ptr in header */
+ mpp = __mempool_from_obj(obj);
+ *mpp = mp;
+
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ __mempool_write_header_cookie(obj, 1);
+ __mempool_write_trailer_cookie(obj);
+#endif
+ /* call the initializer */
+ if (obj_init)
+ obj_init(mp, obj_init_arg, obj, obj_idx);
+
+ /* enqueue in ring */
+ rte_ring_sp_enqueue(mp->ring, obj);
+}
+
+uint32_t
+rte_mempool_obj_iter(void *vaddr, uint32_t elt_num, size_t elt_sz, size_t align,
+ const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
+ rte_mempool_obj_iter_t obj_iter, void *obj_iter_arg)
+{
+ uint32_t i, j, k;
+ uint32_t pgn;
+ uintptr_t end, start, va;
+ uintptr_t pg_sz;
+
+ pg_sz = (uintptr_t)1 << pg_shift;
+ va = (uintptr_t)vaddr;
+
+ i = 0;
+ j = 0;
+
+ while (i != elt_num && j != pg_num) {
+
+ start = RTE_ALIGN_CEIL(va, align);
+ end = start + elt_sz;
+
+ pgn = (end >> pg_shift) - (start >> pg_shift);
+ pgn += j;
+
+ /* do we have enough space left for the next element. */
+ if (pgn >= pg_num)
+ break;
+
+ for (k = j;
+ k != pgn &&
+ paddr[k] + pg_sz == paddr[k + 1];
+ k++)
+ ;
+
+ /*
+ * if next pgn chunks of memory physically continuous,
+ * use it to create next element.
+ * otherwise, just skip that chunk unused.
+ */
+ if (k == pgn) {
+ if (obj_iter != NULL)
+ obj_iter(obj_iter_arg, (void *)start,
+ (void *)end, i);
+ va = end;
+ j = pgn;
+ i++;
+ } else {
+ va = RTE_ALIGN_CEIL((va + 1), pg_sz);
+ j++;
+ }
+ }
+
+ return (i);
+}
+
+/*
+ * Populate mempool with the objects.
+ */
+
+struct mempool_populate_arg {
+ struct rte_mempool *mp;
+ rte_mempool_obj_ctor_t *obj_init;
+ void *obj_init_arg;
+};
+
+static void
+mempool_obj_populate(void *arg, void *start, void *end, uint32_t idx)
+{
+ struct mempool_populate_arg *pa = arg;
+
+ mempool_add_elem(pa->mp, start, idx, pa->obj_init, pa->obj_init_arg);
+ pa->mp->elt_va_end = (uintptr_t)end;
+}
+
+static void
+mempool_populate(struct rte_mempool *mp, size_t num, size_t align,
+ rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg)
+{
+ uint32_t elt_sz;
+ struct mempool_populate_arg arg;
+
+ elt_sz = mp->elt_size + mp->header_size + mp->trailer_size;
+ arg.mp = mp;
+ arg.obj_init = obj_init;
+ arg.obj_init_arg = obj_init_arg;
+
+ mp->size = rte_mempool_obj_iter((void *)mp->elt_va_start,
+ num, elt_sz, align,
+ mp->elt_pa, mp->pg_num, mp->pg_shift,
+ mempool_obj_populate, &arg);
+}
+
+uint32_t
+rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
+ struct rte_mempool_objsz *sz)
+{
+ struct rte_mempool_objsz lsz;
+
+ sz = (sz != NULL) ? sz : &lsz;
+
+ /*
+ * In header, we have at least the pointer to the pool, and
+ * optionaly a 64 bits cookie.
+ */
+ sz->header_size = 0;
+ sz->header_size += sizeof(struct rte_mempool *); /* ptr to pool */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ sz->header_size += sizeof(uint64_t); /* cookie */
+#endif
+ if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0)
+ sz->header_size = RTE_ALIGN_CEIL(sz->header_size,
+ CACHE_LINE_SIZE);
+
+ /* trailer contains the cookie in debug mode */
+ sz->trailer_size = 0;
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+ sz->trailer_size += sizeof(uint64_t); /* cookie */
+#endif
+ /* element size is 8 bytes-aligned at least */
+ sz->elt_size = RTE_ALIGN_CEIL(elt_size, sizeof(uint64_t));
+
+ /* expand trailer to next cache line */
+ if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0) {
+ sz->total_size = sz->header_size + sz->elt_size +
+ sz->trailer_size;
+ sz->trailer_size += ((CACHE_LINE_SIZE -
+ (sz->total_size & CACHE_LINE_MASK)) &
+ CACHE_LINE_MASK);
+ }
+
+ /*
+ * increase trailer to add padding between objects in order to
+ * spread them accross memory channels/ranks
+ */
+ if ((flags & MEMPOOL_F_NO_SPREAD) == 0) {
+ unsigned new_size;
+ new_size = optimize_object_size(sz->header_size + sz->elt_size +
+ sz->trailer_size);
+ sz->trailer_size = new_size - sz->header_size - sz->elt_size;
+ }
+
+ /* this is the size of an object, including header and trailer */
+ sz->total_size = sz->header_size + sz->elt_size + sz->trailer_size;
+
+ return (sz->total_size);
+}
+
+
+/*
+ * Calculate maximum amount of memory required to store given number of objects.
+ */
+size_t
+rte_mempool_xmem_size(uint32_t elt_num, size_t elt_sz, uint32_t pg_shift)
+{
+ size_t n, pg_num, pg_sz, sz;
+
+ pg_sz = (size_t)1 << pg_shift;
+
+ if ((n = pg_sz / elt_sz) > 0) {
+ pg_num = (elt_num + n - 1) / n;
+ sz = pg_num << pg_shift;
+ } else {
+ sz = RTE_ALIGN_CEIL(elt_sz, pg_sz) * elt_num;
+ }
+
+ return (sz);
+}
+
+/*
+ * Calculate how much memory would be actually required with the
+ * given memory footprint to store required number of elements.
+ */
+static void
+mempool_lelem_iter(void *arg, __rte_unused void *start, void *end,
+ __rte_unused uint32_t idx)
+{
+ *(uintptr_t *)arg = (uintptr_t)end;
+}
+
+ssize_t
+rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, size_t elt_sz,
+ const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift)
+{
+ uint32_t n;
+ uintptr_t va, uv;
+ size_t pg_sz, usz;
+
+ pg_sz = (size_t)1 << pg_shift;
+ va = (uintptr_t)vaddr;
+ uv = va;
+
+ if ((n = rte_mempool_obj_iter(vaddr, elt_num, elt_sz, 1,
+ paddr, pg_num, pg_shift, mempool_lelem_iter,
+ &uv)) != elt_num) {
+ return (-n);
+ }
+
+ uv = RTE_ALIGN_CEIL(uv, pg_sz);
+ usz = uv - va;
+ return (usz);
+}
+
/* create the mempool */
struct rte_mempool *
rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
rte_mempool_ctor_t *mp_init, void *mp_init_arg,
rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
int socket_id, unsigned flags)
+{
+#ifdef RTE_LIBRTE_XEN_DOM0
+ return (rte_dom0_mempool_create(name, n, elt_size,
+ cache_size, private_data_size,
+ mp_init, mp_init_arg,
+ obj_init, obj_init_arg,
+ socket_id, flags));
+#else
+ return (rte_mempool_xmem_create(name, n, elt_size,
+ cache_size, private_data_size,
+ mp_init, mp_init_arg,
+ obj_init, obj_init_arg,
+ socket_id, flags,
+ NULL, NULL, MEMPOOL_PG_NUM_DEFAULT, MEMPOOL_PG_SHIFT_MAX));
+#endif
+}
+
+/*
+ * Create the mempool over already allocated chunk of memory.
+ * That external memory buffer can consists of physically disjoint pages.
+ * Setting vaddr to NULL, makes mempool to fallback to original behaviour
+ * and allocate space for mempool and it's elements as one big chunk of
+ * physically continuos memory.
+ * */
+struct rte_mempool *
+rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
+ unsigned cache_size, unsigned private_data_size,
+ rte_mempool_ctor_t *mp_init, void *mp_init_arg,
+ rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
+ int socket_id, unsigned flags, void *vaddr,
+ const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift)
{
char mz_name[RTE_MEMZONE_NAMESIZE];
char rg_name[RTE_RING_NAMESIZE];
struct rte_mempool *mp = NULL;
struct rte_ring *r;
const struct rte_memzone *mz;
- size_t mempool_size, total_elt_size;
+ size_t mempool_size;
int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
int rg_flags = 0;
- uint32_t header_size, trailer_size;
- unsigned i;
- void *obj;
+ void *obj;
+ struct rte_mempool_objsz objsz;
/* compilation-time checks */
RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
#endif
/* check that we have an initialised tail queue */
- if (RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_MEMPOOL, rte_mempool_list) == NULL) {
+ if (RTE_TAILQ_LOOKUP_BY_IDX(RTE_TAILQ_MEMPOOL,
+ rte_mempool_list) == NULL) {
rte_errno = E_RTE_NO_TAILQ;
return NULL;
}
/* asked cache too big */
- if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE){
+ if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ /* check that we have both VA and PA */
+ if (vaddr != NULL && paddr == NULL) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ /* Check that pg_num and pg_shift parameters are valid. */
+ if (pg_num < RTE_DIM(mp->elt_pa) || pg_shift > MEMPOOL_PG_SHIFT_MAX) {
rte_errno = EINVAL;
return NULL;
}
if (flags & MEMPOOL_F_SC_GET)
rg_flags |= RING_F_SC_DEQ;
+ /* calculate mempool object sizes. */
+ rte_mempool_calc_obj_size(elt_size, flags, &objsz);
+
rte_rwlock_write_lock(RTE_EAL_MEMPOOL_RWLOCK);
/* allocate the ring that will be used to store objects */
goto exit;
/*
- * In header, we have at least the pointer to the pool, and
- * optionaly a 64 bits cookie.
- */
- header_size = 0;
- header_size += sizeof(struct rte_mempool *); /* ptr to pool */
-#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
- header_size += sizeof(uint64_t); /* cookie */
-#endif
- if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0)
- header_size = (header_size + CACHE_LINE_MASK) & (~CACHE_LINE_MASK);
-
- /* trailer contains the cookie in debug mode */
- trailer_size = 0;
-#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
- trailer_size += sizeof(uint64_t); /* cookie */
-#endif
- /* element size is 8 bytes-aligned at least */
- elt_size = (elt_size + 7) & (~7);
-
- /* expand trailer to next cache line */
- if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0) {
- total_elt_size = header_size + elt_size + trailer_size;
- trailer_size += ((CACHE_LINE_SIZE -
- (total_elt_size & CACHE_LINE_MASK)) &
- CACHE_LINE_MASK);
- }
-
- /*
- * increase trailer to add padding between objects in order to
- * spread them accross memory channels/ranks
+ * reserve a memory zone for this mempool: private data is
+ * cache-aligned
*/
- if ((flags & MEMPOOL_F_NO_SPREAD) == 0) {
- unsigned new_size;
- new_size = optimize_object_size(header_size + elt_size +
- trailer_size);
- trailer_size = new_size - header_size - elt_size;
- }
-
- /* this is the size of an object, including header and trailer */
- total_elt_size = header_size + elt_size + trailer_size;
-
- /* reserve a memory zone for this mempool: private data is
- * cache-aligned */
private_data_size = (private_data_size +
CACHE_LINE_MASK) & (~CACHE_LINE_MASK);
- mempool_size = total_elt_size * n +
- sizeof(struct rte_mempool) + private_data_size;
+
+ /*
+ * If user provided an external memory buffer, then use it to
+ * store mempool objects. Otherwise reserve memzone big enough to
+ * hold mempool header and metadata plus mempool objects.
+ */
+ mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
+ if (vaddr == NULL)
+ mempool_size += (size_t)objsz.total_size * n;
+
rte_snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT, name);
mz = rte_memzone_reserve(mz_name, mempool_size, socket_id, mz_flags);
mp->ring = r;
mp->size = n;
mp->flags = flags;
- mp->elt_size = elt_size;
- mp->header_size = header_size;
- mp->trailer_size = trailer_size;
+ mp->elt_size = objsz.elt_size;
+ mp->header_size = objsz.header_size;
+ mp->trailer_size = objsz.trailer_size;
mp->cache_size = cache_size;
- mp->cache_flushthresh = (uint32_t)(cache_size * CACHE_FLUSHTHRESH_MULTIPLIER);
+ mp->cache_flushthresh = (uint32_t)
+ (cache_size * CACHE_FLUSHTHRESH_MULTIPLIER);
mp->private_data_size = private_data_size;
+ /* calculate address of the first element for continuous mempool. */
+ obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
+ private_data_size;
+
+ /* populate address translation fields. */
+ mp->pg_num = pg_num;
+ mp->pg_shift = pg_shift;
+ mp->pg_mask = RTE_LEN2MASK(mp->pg_shift, typeof(mp->pg_mask));
+
+ /* mempool elements allocated together with mempool */
+ if (vaddr == NULL) {
+ mp->elt_va_start = (uintptr_t)obj;
+ mp->elt_pa[0] = mp->phys_addr +
+ (mp->elt_va_start - (uintptr_t)mp);
+
+ /* mempool elements in a separate chunk of memory. */
+ } else {
+ mp->elt_va_start = (uintptr_t)vaddr;
+ memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
+ }
+
+ mp->elt_va_end = mp->elt_va_start;
+
/* call the initializer */
if (mp_init)
mp_init(mp, mp_init_arg);
- /* fill the headers and trailers, and add objects in ring */
- obj = (char *)mp + sizeof(struct rte_mempool) + private_data_size;
- for (i = 0; i < n; i++) {
- struct rte_mempool **mpp;
- obj = (char *)obj + header_size;
-
- /* set mempool ptr in header */
- mpp = __mempool_from_obj(obj);
- *mpp = mp;
-
-#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
- __mempool_write_header_cookie(obj, 1);
- __mempool_write_trailer_cookie(obj);
-#endif
- /* call the initializer */
- if (obj_init)
- obj_init(mp, obj_init_arg, obj, i);
-
- /* enqueue in ring */
- rte_ring_sp_enqueue(mp->ring, obj);
- obj = (char *)obj + elt_size + trailer_size;
- }
+ mempool_populate(mp, n, 1, obj_init, obj_init_arg);
RTE_EAL_TAILQ_INSERT_TAIL(RTE_TAILQ_MEMPOOL, rte_mempool_list, mp);
#ifndef __INTEL_COMPILER
#pragma GCC diagnostic ignored "-Wcast-qual"
#endif
+
+struct mempool_audit_arg {
+ const struct rte_mempool *mp;
+ uintptr_t obj_end;
+ uint32_t obj_num;
+};
+
static void
-mempool_audit_cookies(const struct rte_mempool *mp)
+mempool_obj_audit(void *arg, void *start, void *end, uint32_t idx)
{
- unsigned i;
+ struct mempool_audit_arg *pa = arg;
void *obj;
- void * const *obj_table;
-
- obj = (char *)mp + sizeof(struct rte_mempool) + mp->private_data_size;
- for (i = 0; i < mp->size; i++) {
- obj = (char *)obj + mp->header_size;
- obj_table = &obj;
- __mempool_check_cookies(mp, obj_table, 1, 2);
- obj = (char *)obj + mp->elt_size + mp->trailer_size;
+
+ obj = (char *)start + pa->mp->header_size;
+ pa->obj_end = (uintptr_t)end;
+ pa->obj_num = idx + 1;
+ __mempool_check_cookies(pa->mp, &obj, 1, 2);
+}
+
+static void
+mempool_audit_cookies(const struct rte_mempool *mp)
+{
+ uint32_t elt_sz, num;
+ struct mempool_audit_arg arg;
+
+ elt_sz = mp->elt_size + mp->header_size + mp->trailer_size;
+
+ arg.mp = mp;
+ arg.obj_end = mp->elt_va_start;
+ arg.obj_num = 0;
+
+ num = rte_mempool_obj_iter((void *)mp->elt_va_start,
+ mp->size, elt_sz, 1,
+ mp->elt_pa, mp->pg_num, mp->pg_shift,
+ mempool_obj_audit, &arg);
+
+ if (num != mp->size) {
+ rte_panic("rte_mempool_obj_iter(mempool=%p, size=%u) "
+ "iterated only over %u elements\n",
+ mp, mp->size, num);
+ } else if (arg.obj_end != mp->elt_va_end || arg.obj_num != mp->size) {
+ rte_panic("rte_mempool_obj_iter(mempool=%p, size=%u) "
+ "last callback va_end: %#tx (%#tx expeceted), "
+ "num of objects: %u (%u expected)\n",
+ mp, mp->size,
+ arg.obj_end, mp->elt_va_end,
+ arg.obj_num, mp->size);
}
}
+
#ifndef __INTEL_COMPILER
#pragma GCC diagnostic error "-Wcast-qual"
#endif
printf("mempool <%s>@%p\n", mp->name, mp);
printf(" flags=%x\n", mp->flags);
printf(" ring=<%s>@%p\n", mp->ring->name, mp->ring);
+ printf(" phys_addr=0x%" PRIx64 "\n", mp->phys_addr);
printf(" size=%"PRIu32"\n", mp->size);
printf(" header_size=%"PRIu32"\n", mp->header_size);
printf(" elt_size=%"PRIu32"\n", mp->elt_size);
printf(" total_obj_size=%"PRIu32"\n",
mp->header_size + mp->elt_size + mp->trailer_size);
+ printf(" private_data_size=%"PRIu32"\n", mp->private_data_size);
+ printf(" pg_num=%"PRIu32"\n", mp->pg_num);
+ printf(" pg_shift=%"PRIu32"\n", mp->pg_shift);
+ printf(" pg_mask=%#tx\n", mp->pg_mask);
+ printf(" elt_va_start=%#tx\n", mp->elt_va_start);
+ printf(" elt_va_end=%#tx\n", mp->elt_va_end);
+ printf(" elt_pa[0]=0x%" PRIx64 "\n", mp->elt_pa[0]);
+
+ if (mp->size != 0)
+ printf(" avg bytes/object=%#Lf\n",
+ (long double)(mp->elt_va_end - mp->elt_va_start) /
+ mp->size);
+
cache_count = rte_mempool_dump_cache(mp);
common_count = rte_ring_count(mp->ring);
if ((cache_count + common_count) > mp->size)
} __rte_cache_aligned;
#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
+struct rte_mempool_objsz {
+ uint32_t elt_size; /**< Size of an element. */
+ uint32_t header_size; /**< Size of header (before elt). */
+ uint32_t trailer_size; /**< Size of trailer (after elt). */
+ uint32_t total_size;
+ /**< Total size of an object (header + elt + trailer). */
+};
+
#define RTE_MEMPOOL_NAMESIZE 32 /**< Maximum length of a memory pool. */
#define RTE_MEMPOOL_MZ_PREFIX "MP_"
/* "MP_<name>" */
#define RTE_MEMPOOL_MZ_FORMAT RTE_MEMPOOL_MZ_PREFIX "%s"
+#ifdef RTE_LIBRTE_XEN_DOM0
+
+/* "<name>_MP_elt" */
+#define RTE_MEMPOOL_OBJ_NAME "%s_" RTE_MEMPOOL_MZ_PREFIX "elt"
+
+#else
+
#define RTE_MEMPOOL_OBJ_NAME RTE_MEMPOOL_MZ_FORMAT
+#endif /* RTE_LIBRTE_XEN_DOM0 */
+
+#define MEMPOOL_PG_SHIFT_MAX (sizeof(uintptr_t) * CHAR_BIT - 1)
+
+/** Mempool over one chunk of physically continuous memory */
+#define MEMPOOL_PG_NUM_DEFAULT 1
+
/**
* The RTE mempool structure.
*/
int flags; /**< Flags of the mempool. */
uint32_t size; /**< Size of the mempool. */
uint32_t cache_size; /**< Size of per-lcore local cache. */
- uint32_t cache_flushthresh; /**< Threshold before we flush excess elements. */
+ uint32_t cache_flushthresh;
+ /**< Threshold before we flush excess elements. */
uint32_t elt_size; /**< Size of an element. */
uint32_t header_size; /**< Size of header (before elt). */
/** Per-lcore statistics. */
struct rte_mempool_debug_stats stats[RTE_MAX_LCORE];
#endif
+
+ /* Address translation support, starts from next cache line. */
+
+ /** Number of elements in the elt_pa array. */
+ uint32_t pg_num __rte_cache_aligned;
+ uint32_t pg_shift; /**< LOG2 of the physical pages. */
+ uintptr_t pg_mask; /**< physical page mask value. */
+ uintptr_t elt_va_start;
+ /**< Virtual address of the first mempool object. */
+ uintptr_t elt_va_end;
+ /**< Virtual address of the <size + 1> mempool object. */
+ phys_addr_t elt_pa[MEMPOOL_PG_NUM_DEFAULT];
+ /**< Array of physical pages addresses for the mempool objects buffer. */
+
} __rte_cache_aligned;
#define MEMPOOL_F_NO_SPREAD 0x0001 /**< Do not spread in memory. */
#define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0)
#endif
+/**
+ * Calculates size of the mempool header.
+ * @param mp
+ * Pointer to the memory pool.
+ * @param pgn
+ * Number of page used to store mempool objects.
+ */
+#define MEMPOOL_HEADER_SIZE(mp, pgn) (sizeof(*(mp)) + \
+ RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
+ sizeof ((mp)->elt_pa[0]), CACHE_LINE_SIZE))
+
+/**
+ * Returns TRUE if whole mempool is allocated in one contiguous block of memory.
+ */
+#define MEMPOOL_IS_CONTIG(mp) \
+ ((mp)->pg_num == MEMPOOL_PG_NUM_DEFAULT && \
+ (mp)->phys_addr == (mp)->elt_pa[0])
+
/**
* @internal Get a pointer to a mempool pointer in the object header.
* @param obj
#define __mempool_check_cookies(mp, obj_table_const, n, free) do {} while(0)
#endif /* RTE_LIBRTE_MEMPOOL_DEBUG */
+/**
+ * An mempool's object iterator callback function.
+ */
+typedef void (*rte_mempool_obj_iter_t)(void * /*obj_iter_arg*/,
+ void * /*obj_start*/,
+ void * /*obj_end*/,
+ uint32_t /*obj_index */);
+
+/*
+ * Iterates across objects of the given size and alignment in the
+ * provided chunk of memory. The given memory buffer can consist of
+ * disjoint physical pages.
+ * For each object calls the provided callback (if any).
+ * Used to populate mempool, walk through all elements of the mempool,
+ * estimate how many elements of the given size could be created in the given
+ * memory buffer.
+ * @param vaddr
+ * Virtual address of the memory buffer.
+ * @param elt_num
+ * Maximum number of objects to iterate through.
+ * @param elt_sz
+ * Size of each object.
+ * @param paddr
+ * Array of phyiscall addresses of the pages that comprises given memory
+ * buffer.
+ * @param pg_num
+ * Number of elements in the paddr array.
+ * @param pg_shift
+ * LOG2 of the physical pages size.
+ * @param obj_iter
+ * Object iterator callback function (could be NULL).
+ * @param obj_iter_arg
+ * User defined Prameter for the object iterator callback function.
+ *
+ * @return
+ * Number of objects iterated through.
+ */
+
+uint32_t rte_mempool_obj_iter(void *vaddr,
+ uint32_t elt_num, size_t elt_sz, size_t align,
+ const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
+ rte_mempool_obj_iter_t obj_iter, void *obj_iter_arg);
+
/**
* An object constructor callback function for mempool.
*
*
* This function uses ``memzone_reserve()`` to allocate memory. The
* pool contains n elements of elt_size. Its size is set to n.
+ * All elements of the mempool are allocated together with the mempool header,
+ * in one physically continuous chunk of memory.
*
* @param name
* The name of the mempool.
rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
int socket_id, unsigned flags);
+/**
+ * Creates a new mempool named *name* in memory.
+ *
+ * This function uses ``memzone_reserve()`` to allocate memory. The
+ * pool contains n elements of elt_size. Its size is set to n.
+ * Depending on the input parameters, mempool elements can be either allocated
+ * together with the mempool header, or an externally provided memory buffer
+ * could be used to store mempool objects. In later case, that external
+ * memory buffer can consist of set of disjoint phyiscal pages.
+ *
+ * @param name
+ * The name of the mempool.
+ * @param n
+ * The number of elements in the mempool. The optimum size (in terms of
+ * memory usage) for a mempool is when n is a power of two minus one:
+ * n = (2^q - 1).
+ * @param elt_size
+ * The size of each element.
+ * @param cache_size
+ * If cache_size is non-zero, the rte_mempool library will try to
+ * limit the accesses to the common lockless pool, by maintaining a
+ * per-lcore object cache. This argument must be lower or equal to
+ * CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE. It is advised to choose
+ * cache_size to have "n modulo cache_size == 0": if this is
+ * not the case, some elements will always stay in the pool and will
+ * never be used. The access to the per-lcore table is of course
+ * faster than the multi-producer/consumer pool. The cache can be
+ * disabled if the cache_size argument is set to 0; it can be useful to
+ * avoid loosing objects in cache. Note that even if not used, the
+ * memory space for cache is always reserved in a mempool structure,
+ * except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0.
+ * @param private_data_size
+ * The size of the private data appended after the mempool
+ * structure. This is useful for storing some private data after the
+ * mempool structure, as is done for rte_mbuf_pool for example.
+ * @param mp_init
+ * A function pointer that is called for initialization of the pool,
+ * before object initialization. The user can initialize the private
+ * data in this function if needed. This parameter can be NULL if
+ * not needed.
+ * @param mp_init_arg
+ * An opaque pointer to data that can be used in the mempool
+ * constructor function.
+ * @param obj_init
+ * A function pointer that is called for each object at
+ * initialization of the pool. The user can set some meta data in
+ * objects if needed. This parameter can be NULL if not needed.
+ * The obj_init() function takes the mempool pointer, the init_arg,
+ * the object pointer and the object number as parameters.
+ * @param obj_init_arg
+ * An opaque pointer to data that can be used as an argument for
+ * each call to the object constructor function.
+ * @param socket_id
+ * The *socket_id* argument is the socket identifier in the case of
+ * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA
+ * constraint for the reserved zone.
+ * @param flags
+ * The *flags* arguments is an OR of following flags:
+ * - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread
+ * between channels in RAM: the pool allocator will add padding
+ * between objects depending on the hardware configuration. See
+ * Memory alignment constraints for details. If this flag is set,
+ * the allocator will just align them to a cache line.
+ * - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are
+ * cache-aligned. This flag removes this constraint, and no
+ * padding will be present between objects. This flag implies
+ * MEMPOOL_F_NO_SPREAD.
+ * - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior
+ * when using rte_mempool_put() or rte_mempool_put_bulk() is
+ * "single-producer". Otherwise, it is "multi-producers".
+ * - MEMPOOL_F_SC_GET: If this flag is set, the default behavior
+ * when using rte_mempool_get() or rte_mempool_get_bulk() is
+ * "single-consumer". Otherwise, it is "multi-consumers".
+ * @param vaddr
+ * Virtual address of the externally allocated memory buffer.
+ * Will be used to store mempool objects.
+ * @param paddr
+ * Array of phyiscall addresses of the pages that comprises given memory
+ * buffer.
+ * @param pg_num
+ * Number of elements in the paddr array.
+ * @param pg_shift
+ * LOG2 of the physical pages size.
+ * @return
+ * The pointer to the new allocated mempool, on success. NULL on error
+ * with rte_errno set appropriately. Possible rte_errno values include:
+ * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ * - E_RTE_SECONDARY - function was called from a secondary process instance
+ * - E_RTE_NO_TAILQ - no tailq list could be got for the ring or mempool list
+ * - EINVAL - cache size provided is too large
+ * - ENOSPC - the maximum number of memzones has already been allocated
+ * - EEXIST - a memzone with the same name already exists
+ * - ENOMEM - no appropriate memory area found in which to create memzone
+ */
+struct rte_mempool *
+rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
+ unsigned cache_size, unsigned private_data_size,
+ rte_mempool_ctor_t *mp_init, void *mp_init_arg,
+ rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
+ int socket_id, unsigned flags, void *vaddr,
+ const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift);
+
+#ifdef RTE_LIBRTE_XEN_DOM0
+/**
+ * Creates a new mempool named *name* in memory on Xen Dom0.
+ *
+ * This function uses ``rte_mempool_xmem_create()`` to allocate memory. The
+ * pool contains n elements of elt_size. Its size is set to n.
+ * All elements of the mempool are allocated together with the mempool header,
+ * and memory buffer can consist of set of disjoint phyiscal pages.
+ *
+ * @param name
+ * The name of the mempool.
+ * @param n
+ * The number of elements in the mempool. The optimum size (in terms of
+ * memory usage) for a mempool is when n is a power of two minus one:
+ * n = (2^q - 1).
+ * @param elt_size
+ * The size of each element.
+ * @param cache_size
+ * If cache_size is non-zero, the rte_mempool library will try to
+ * limit the accesses to the common lockless pool, by maintaining a
+ * per-lcore object cache. This argument must be lower or equal to
+ * CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE. It is advised to choose
+ * cache_size to have "n modulo cache_size == 0": if this is
+ * not the case, some elements will always stay in the pool and will
+ * never be used. The access to the per-lcore table is of course
+ * faster than the multi-producer/consumer pool. The cache can be
+ * disabled if the cache_size argument is set to 0; it can be useful to
+ * avoid loosing objects in cache. Note that even if not used, the
+ * memory space for cache is always reserved in a mempool structure,
+ * except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0.
+ * @param private_data_size
+ * The size of the private data appended after the mempool
+ * structure. This is useful for storing some private data after the
+ * mempool structure, as is done for rte_mbuf_pool for example.
+ * @param mp_init
+ * A function pointer that is called for initialization of the pool,
+ * before object initialization. The user can initialize the private
+ * data in this function if needed. This parameter can be NULL if
+ * not needed.
+ * @param mp_init_arg
+ * An opaque pointer to data that can be used in the mempool
+ * constructor function.
+ * @param obj_init
+ * A function pointer that is called for each object at
+ * initialization of the pool. The user can set some meta data in
+ * objects if needed. This parameter can be NULL if not needed.
+ * The obj_init() function takes the mempool pointer, the init_arg,
+ * the object pointer and the object number as parameters.
+ * @param obj_init_arg
+ * An opaque pointer to data that can be used as an argument for
+ * each call to the object constructor function.
+ * @param socket_id
+ * The *socket_id* argument is the socket identifier in the case of
+ * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA
+ * constraint for the reserved zone.
+ * @param flags
+ * The *flags* arguments is an OR of following flags:
+ * - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread
+ * between channels in RAM: the pool allocator will add padding
+ * between objects depending on the hardware configuration. See
+ * Memory alignment constraints for details. If this flag is set,
+ * the allocator will just align them to a cache line.
+ * - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are
+ * cache-aligned. This flag removes this constraint, and no
+ * padding will be present between objects. This flag implies
+ * MEMPOOL_F_NO_SPREAD.
+ * - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior
+ * when using rte_mempool_put() or rte_mempool_put_bulk() is
+ * "single-producer". Otherwise, it is "multi-producers".
+ * - MEMPOOL_F_SC_GET: If this flag is set, the default behavior
+ * when using rte_mempool_get() or rte_mempool_get_bulk() is
+ * "single-consumer". Otherwise, it is "multi-consumers".
+ * @return
+ * The pointer to the new allocated mempool, on success. NULL on error
+ * with rte_errno set appropriately. Possible rte_errno values include:
+ * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
+ * - E_RTE_SECONDARY - function was called from a secondary process instance
+ * - E_RTE_NO_TAILQ - no tailq list could be got for the ring or mempool list
+ * - EINVAL - cache size provided is too large
+ * - ENOSPC - the maximum number of memzones has already been allocated
+ * - EEXIST - a memzone with the same name already exists
+ * - ENOMEM - no appropriate memory area found in which to create memzone
+ */
+struct rte_mempool *
+rte_dom0_mempool_create(const char *name, unsigned n, unsigned elt_size,
+ unsigned cache_size, unsigned private_data_size,
+ rte_mempool_ctor_t *mp_init, void *mp_init_arg,
+ rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg,
+ int socket_id, unsigned flags);
+#endif
+
/**
* Dump the status of the mempool to the console.
*
* @return
* The physical address of the elt element.
*/
-static inline phys_addr_t rte_mempool_virt2phy(const struct rte_mempool *mp,
- const void *elt)
+static inline phys_addr_t
+rte_mempool_virt2phy(const struct rte_mempool *mp, const void *elt)
{
uintptr_t off;
- off = (const char *)elt - (const char *)mp;
- return mp->phys_addr + off;
+ off = (const char *)elt - (const char *)mp->elt_va_start;
+ return (mp->elt_pa[off >> mp->pg_shift] + (off & mp->pg_mask));
}
*/
static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
{
- return (char *)mp + sizeof(struct rte_mempool);
+ return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
}
/**
* @param name
* The name of the mempool.
* @return
- * The pointer to the mempool matching the name, or NULL if not found.NULL on error
+ * The pointer to the mempool matching the name, or NULL if not found.
+ * NULL on error
* with rte_errno set appropriately. Possible rte_errno values include:
* - ENOENT - required entry not available to return.
*
*/
struct rte_mempool *rte_mempool_lookup(const char *name);
+/**
+ * Given a desired size of the mempool element and mempool flags,
+ * caluclates header, trailer, body and total sizes of the mempool object.
+ * @param elt_size
+ * The size of each element.
+ * @param flags
+ * The flags used for the mempool creation.
+ * Consult rte_mempool_create() for more information about possible values.
+ * The size of each element.
+ * @return
+ * Total size of the mempool object.
+ */
+uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
+ struct rte_mempool_objsz *sz);
+
+/**
+ * Calculate maximum amount of memory required to store given number of objects.
+ * Assumes that the memory buffer will be alligned at page boundary.
+ * Note, that if object size is bigger then page size, then it assumes that
+ * we have a subsets of physically continuous pages big enough to store
+ * at least one object.
+ * @param elt_num
+ * Number of elements.
+ * @param elt_sz
+ * The size of each element.
+ * @param pg_shift
+ * LOG2 of the physical pages size.
+ * @return
+ * Required memory size aligned at page boundary.
+ */
+size_t rte_mempool_xmem_size(uint32_t elt_num, size_t elt_sz,
+ uint32_t pg_shift);
+
+/**
+ * Calculate how much memory would be actually required with the given
+ * memory footprint to store required number of objects.
+ * @param vaddr
+ * Virtual address of the externally allocated memory buffer.
+ * Will be used to store mempool objects.
+ * @param elt_num
+ * Number of elements.
+ * @param elt_sz
+ * The size of each element.
+ * @param paddr
+ * Array of phyiscall addresses of the pages that comprises given memory
+ * buffer.
+ * @param pg_num
+ * Number of elements in the paddr array.
+ * @param pg_shift
+ * LOG2 of the physical pages size.
+ * @return
+ * Number of bytes needed to store given number of objects,
+ * aligned to the given page size.
+ * If provided memory buffer is not big enough:
+ * (-1) * actual number of elemnts that can be stored in that buffer.
+ */
+ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, size_t elt_sz,
+ const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift);
+
#ifdef __cplusplus
}
#endif
if ((mz = rte_memzone_lookup(z_name)) != 0)
return (mz);
+#ifdef RTE_LIBRTE_XEN_DOM0
+ return rte_memzone_reserve_bounded(z_name, ring_size,
+ socket_id, 0, CACHE_LINE_SIZE, RTE_PGSIZE_2M);
+#else
return rte_memzone_reserve(z_name, ring_size, socket_id, 0);
+#endif
}
static void
txq->port_id = dev->data->port_id;
txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(queue_idx));
+#ifndef RTE_LIBRTE_XEN_DOM0
txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
+#else
+ txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+#endif
txq->tx_ring = (struct e1000_data_desc *) tz->addr;
PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
0 : ETHER_CRC_LEN);
rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(queue_idx));
- rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(queue_idx));
+ rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(queue_idx));
+#ifndef RTE_LIBRTE_XEN_DOM0
rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
+#else
+ rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+#endif
rxq->rx_ring = (struct e1000_rx_desc *) rz->addr;
PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64"\n",
if (mz)
return mz;
+#ifdef RTE_LIBRTE_XEN_DOM0
+ return rte_memzone_reserve_bounded(z_name, ring_size,
+ socket_id, 0, IGB_ALIGN, RTE_PGSIZE_2M);
+#else
return rte_memzone_reserve_aligned(z_name, ring_size,
socket_id, 0, IGB_ALIGN);
+#endif
}
static void
txq->port_id = dev->data->port_id;
txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
+#ifndef RTE_LIBRTE_XEN_DOM0
txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
- txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
-
+#else
+ txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+#endif
+ txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
/* Allocate software ring */
txq->sw_ring = rte_zmalloc("txq->sw_ring",
sizeof(struct igb_tx_entry) * nb_desc,
}
rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
+#ifndef RTE_LIBRTE_XEN_DOM0
rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
+#else
+ rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+#endif
rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
/* Allocate software ring. */
/*
* Configure RX buffer size.
*/
- mbp_priv = (struct rte_pktmbuf_pool_private *)
- ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
+ mbp_priv = rte_mempool_get_priv(rxq->mb_pool);
buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
RTE_PKTMBUF_HEADROOM);
if (buf_size >= 1024) {
/*
* Configure RX buffer size.
*/
- mbp_priv = (struct rte_pktmbuf_pool_private *)
- ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
+ mbp_priv = rte_mempool_get_priv(rxq->mb_pool);
buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
RTE_PKTMBUF_HEADROOM);
if (buf_size >= 1024) {
if (mz)
return mz;
+#ifdef RTE_LIBRTE_XEN_DOM0
+ return rte_memzone_reserve_bounded(z_name, ring_size,
+ socket_id, 0, IXGBE_ALIGN, RTE_PGSIZE_2M);
+#else
return rte_memzone_reserve_aligned(z_name, ring_size,
- socket_id, 0, IXGBE_ALIGN);
+ socket_id, 0, IXGBE_ALIGN);
+#endif
}
static void
txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
else
txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
-
+#ifndef RTE_LIBRTE_XEN_DOM0
txq->tx_ring_phys_addr = (uint64_t) tz->phys_addr;
+#else
+ txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
+#endif
txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
/* Allocate software ring */
rxq->rdh_reg_addr =
IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
}
-
+#ifndef RTE_LIBRTE_XEN_DOM0
rxq->rx_ring_phys_addr = (uint64_t) rz->phys_addr;
+#else
+ rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
+#endif
rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
/*
* The value is in 1 KB resolution. Valid values can be from
* 1 KB to 16 KB.
*/
- mbp_priv = (struct rte_pktmbuf_pool_private *)
- ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
+ mbp_priv = rte_mempool_get_priv(rxq->mb_pool);
buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
RTE_PKTMBUF_HEADROOM);
srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
* The value is in 1 KB resolution. Valid values can be from
* 1 KB to 16 KB.
*/
- mbp_priv = (struct rte_pktmbuf_pool_private *)
- ((char *)rxq->mb_pool + sizeof(struct rte_mempool));
+ mbp_priv = rte_mempool_get_priv(rxq->mb_pool);
buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
RTE_PKTMBUF_HEADROOM);
srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
break;
/* Now get the space available for data in the mbuf */
- mbp_priv = (struct rte_pktmbuf_pool_private *)
- ((char *)pcap_q->mb_pool + sizeof(struct rte_mempool));
+ mbp_priv = rte_mempool_get_priv(pcap_q->mb_pool);
buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
RTE_PKTMBUF_HEADROOM);