#include <syslog.h>
#include <getopt.h>
#include <sys/file.h>
+#include <dirent.h>
#include <fcntl.h>
+#include <fnmatch.h>
#include <stddef.h>
#include <errno.h>
#include <limits.h>
#include <rte_atomic.h>
#include <malloc_heap.h>
#include <rte_vfio.h>
+#include <rte_option.h>
#include "eal_private.h"
#include "eal_thread.h"
/* used by rte_rdtsc() */
int rte_cycles_vmware_tsc_map;
-/* Return user provided mbuf pool ops name */
-const char * __rte_experimental
-rte_eal_mbuf_user_pool_ops(void)
+/* platform-specific runtime dir */
+static char runtime_dir[PATH_MAX];
+
+static const char *default_runtime_dir = "/var/run";
+
+int
+eal_create_runtime_dir(void)
{
- return internal_config.user_mbuf_pool_ops_name;
+ const char *directory = default_runtime_dir;
+ const char *xdg_runtime_dir = getenv("XDG_RUNTIME_DIR");
+ const char *fallback = "/tmp";
+ char tmp[PATH_MAX];
+ int ret;
+
+ if (getuid() != 0) {
+ /* try XDG path first, fall back to /tmp */
+ if (xdg_runtime_dir != NULL)
+ directory = xdg_runtime_dir;
+ else
+ directory = fallback;
+ }
+ /* create DPDK subdirectory under runtime dir */
+ ret = snprintf(tmp, sizeof(tmp), "%s/dpdk", directory);
+ if (ret < 0 || ret == sizeof(tmp)) {
+ RTE_LOG(ERR, EAL, "Error creating DPDK runtime path name\n");
+ return -1;
+ }
+
+ /* create prefix-specific subdirectory under DPDK runtime dir */
+ ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s",
+ tmp, internal_config.hugefile_prefix);
+ if (ret < 0 || ret == sizeof(runtime_dir)) {
+ RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n");
+ return -1;
+ }
+
+ /* create the path if it doesn't exist. no "mkdir -p" here, so do it
+ * step by step.
+ */
+ ret = mkdir(tmp, 0700);
+ if (ret < 0 && errno != EEXIST) {
+ RTE_LOG(ERR, EAL, "Error creating '%s': %s\n",
+ tmp, strerror(errno));
+ return -1;
+ }
+
+ ret = mkdir(runtime_dir, 0700);
+ if (ret < 0 && errno != EEXIST) {
+ RTE_LOG(ERR, EAL, "Error creating '%s': %s\n",
+ runtime_dir, strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+eal_clean_runtime_dir(void)
+{
+ DIR *dir;
+ struct dirent *dirent;
+ int dir_fd, fd, lck_result;
+ static const char * const filters[] = {
+ "fbarray_*",
+ "mp_socket_*"
+ };
+
+ /* open directory */
+ dir = opendir(runtime_dir);
+ if (!dir) {
+ RTE_LOG(ERR, EAL, "Unable to open runtime directory %s\n",
+ runtime_dir);
+ goto error;
+ }
+ dir_fd = dirfd(dir);
+
+ /* lock the directory before doing anything, to avoid races */
+ if (flock(dir_fd, LOCK_EX) < 0) {
+ RTE_LOG(ERR, EAL, "Unable to lock runtime directory %s\n",
+ runtime_dir);
+ goto error;
+ }
+
+ dirent = readdir(dir);
+ if (!dirent) {
+ RTE_LOG(ERR, EAL, "Unable to read runtime directory %s\n",
+ runtime_dir);
+ goto error;
+ }
+
+ while (dirent != NULL) {
+ unsigned int f_idx;
+ bool skip = true;
+
+ /* skip files that don't match the patterns */
+ for (f_idx = 0; f_idx < RTE_DIM(filters); f_idx++) {
+ const char *filter = filters[f_idx];
+
+ if (fnmatch(filter, dirent->d_name, 0) == 0) {
+ skip = false;
+ break;
+ }
+ }
+ if (skip) {
+ dirent = readdir(dir);
+ continue;
+ }
+
+ /* try and lock the file */
+ fd = openat(dir_fd, dirent->d_name, O_RDONLY);
+
+ /* skip to next file */
+ if (fd == -1) {
+ dirent = readdir(dir);
+ continue;
+ }
+
+ /* non-blocking lock */
+ lck_result = flock(fd, LOCK_EX | LOCK_NB);
+
+ /* if lock succeeds, remove the file */
+ if (lck_result != -1)
+ unlinkat(dir_fd, dirent->d_name, 0);
+ close(fd);
+ dirent = readdir(dir);
+ }
+
+ /* closedir closes dir_fd and drops the lock */
+ closedir(dir);
+ return 0;
+
+error:
+ if (dir)
+ closedir(dir);
+
+ RTE_LOG(ERR, EAL, "Error while clearing runtime dir: %s\n",
+ strerror(errno));
+
+ return -1;
}
-/* Return mbuf pool ops name */
const char *
-rte_eal_mbuf_default_mempool_ops(void)
+rte_eal_get_runtime_dir(void)
{
- if (internal_config.user_mbuf_pool_ops_name == NULL)
- return RTE_MBUF_DEFAULT_MEMPOOL_OPS;
+ return runtime_dir;
+}
+/* Return user provided mbuf pool ops name */
+const char *
+rte_eal_mbuf_user_pool_ops(void)
+{
return internal_config.user_mbuf_pool_ops_name;
}
* processes could later map the config into this exact location */
rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
+ rte_config.mem_config->dma_maskbits = 0;
+
}
/* attach to an existing shared memory config */
enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
const char *pathname = eal_runtime_config_path();
- /* if we can open the file but not get a write-lock we are a secondary
- * process. NOTE: if we get a file handle back, we keep that open
- * and don't close it to prevent a race condition between multiple opens */
- if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
- (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
- ptype = RTE_PROC_SECONDARY;
+ /* if there no shared config, there can be no secondary processes */
+ if (!internal_config.no_shconf) {
+ /* if we can open the file but not get a write-lock we are a
+ * secondary process. NOTE: if we get a file handle back, we
+ * keep that open and don't close it to prevent a race condition
+ * between multiple opens.
+ */
+ if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+ (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+ ptype = RTE_PROC_SECONDARY;
+ }
RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
return ptype;
}
+/* copies data from internal config to shared config */
+static void
+eal_update_mem_config(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ mcfg->legacy_mem = internal_config.legacy_mem;
+ mcfg->single_file_segments = internal_config.single_file_segments;
+}
+
+/* copies data from shared config to internal config */
+static void
+eal_update_internal_config(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ internal_config.legacy_mem = mcfg->legacy_mem;
+ internal_config.single_file_segments = mcfg->single_file_segments;
+}
+
/* Sets up rte_config structure with the pointer to shared memory config.*/
static void
rte_config_init(void)
switch (rte_config.process_type){
case RTE_PROC_PRIMARY:
rte_eal_config_create();
+ eal_update_mem_config();
break;
case RTE_PROC_SECONDARY:
rte_eal_config_attach();
rte_eal_mcfg_wait_complete(rte_config.mem_config);
rte_eal_config_reattach();
+ eal_update_internal_config();
break;
case RTE_PROC_AUTO:
case RTE_PROC_INVALID:
eal_common_usage();
printf("EAL Linux options:\n"
" --"OPT_SOCKET_MEM" Memory to allocate on sockets (comma separated values)\n"
+ " --"OPT_SOCKET_LIMIT" Limit memory allocation on sockets (comma separated values)\n"
" --"OPT_HUGE_DIR" Directory where hugetlbfs is mounted\n"
" --"OPT_FILE_PREFIX" Prefix for hugepage filenames\n"
" --"OPT_BASE_VIRTADDR" Base virtual address\n"
" --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n"
" --"OPT_LEGACY_MEM" Legacy memory mode (no dynamic allocation, contiguous segments)\n"
" --"OPT_SINGLE_FILE_SEGMENTS" Put all hugepage memory in single files\n"
+ " --"OPT_MATCH_ALLOCATIONS" Free hugepages exactly as allocated\n"
"\n");
/* Allow the application to print its usage message too if hook is set */
if ( rte_application_usage_hook ) {
}
static int
-eal_parse_socket_mem(char *socket_mem)
+eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg)
{
char * arg[RTE_MAX_NUMA_NODES];
char *end;
int arg_num, i, len;
uint64_t total_mem = 0;
- len = strnlen(socket_mem, SOCKET_MEM_STRLEN);
+ len = strnlen(strval, SOCKET_MEM_STRLEN);
if (len == SOCKET_MEM_STRLEN) {
RTE_LOG(ERR, EAL, "--socket-mem is too long\n");
return -1;
}
/* all other error cases will be caught later */
- if (!isdigit(socket_mem[len-1]))
+ if (!isdigit(strval[len-1]))
return -1;
/* split the optarg into separate socket values */
- arg_num = rte_strsplit(socket_mem, len,
+ arg_num = rte_strsplit(strval, len,
arg, RTE_MAX_NUMA_NODES, ',');
/* if split failed, or 0 arguments */
if (arg_num <= 0)
return -1;
- internal_config.force_sockets = 1;
-
/* parse each defined socket option */
errno = 0;
for (i = 0; i < arg_num; i++) {
+ uint64_t val;
end = NULL;
- internal_config.socket_mem[i] = strtoull(arg[i], &end, 10);
+ val = strtoull(arg[i], &end, 10);
/* check for invalid input */
if ((errno != 0) ||
(arg[i][0] == '\0') || (end == NULL) || (*end != '\0'))
return -1;
- internal_config.socket_mem[i] *= 1024ULL;
- internal_config.socket_mem[i] *= 1024ULL;
- total_mem += internal_config.socket_mem[i];
+ val <<= 20;
+ total_mem += val;
+ socket_arg[i] = val;
}
/* check if we have a positive amount of total memory */
argvopt = argv;
optind = 1;
+ opterr = 0;
while ((opt = getopt_long(argc, argvopt, eal_short_options,
eal_long_options, &option_index)) != EOF) {
- /* getopt is not happy, stop right now */
+ /*
+ * getopt didn't recognise the option, lets parse the
+ * registered options to see if the flag is valid
+ */
if (opt == '?') {
+ ret = rte_option_parse(argv[optind-1]);
+ if (ret == 0)
+ continue;
+
eal_usage(prgname);
ret = -1;
goto out;
break;
case OPT_SOCKET_MEM_NUM:
- if (eal_parse_socket_mem(optarg) < 0) {
+ if (eal_parse_socket_arg(optarg,
+ internal_config.socket_mem) < 0) {
RTE_LOG(ERR, EAL, "invalid parameters for --"
OPT_SOCKET_MEM "\n");
eal_usage(prgname);
ret = -1;
goto out;
}
+ internal_config.force_sockets = 1;
+ break;
+
+ case OPT_SOCKET_LIMIT_NUM:
+ if (eal_parse_socket_arg(optarg,
+ internal_config.socket_limit) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_SOCKET_LIMIT "\n");
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+ internal_config.force_socket_limits = 1;
break;
case OPT_BASE_VIRTADDR_NUM:
break;
case OPT_MBUF_POOL_OPS_NAME_NUM:
- internal_config.user_mbuf_pool_ops_name = optarg;
+ internal_config.user_mbuf_pool_ops_name =
+ strdup(optarg);
+ break;
+
+ case OPT_MATCH_ALLOCATIONS_NUM:
+ internal_config.match_allocations = 1;
break;
default:
}
}
+ /* create runtime data directory */
+ if (internal_config.no_shconf == 0 &&
+ eal_create_runtime_dir() < 0) {
+ RTE_LOG(ERR, EAL, "Cannot create runtime directory\n");
+ ret = -1;
+ goto out;
+ }
+
if (eal_adjust_config(&internal_config) != 0) {
ret = -1;
goto out;
{
int *socket_id = arg;
- if (msl->socket_id == *socket_id && msl->memseg_arr.count != 0)
- return 1;
+ if (msl->external)
+ return 0;
- return 0;
+ return *socket_id == msl->socket_id;
}
static void
int i, fctret, ret;
pthread_t thread_id;
static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
- const char *logid;
+ const char *p;
+ static char logid[PATH_MAX];
char cpuset[RTE_CPU_AFFINITY_STR_LEN];
char thread_name[RTE_MAX_THREAD_NAME_LEN];
return -1;
}
- logid = strrchr(argv[0], '/');
- logid = strdup(logid ? logid + 1: argv[0]);
-
+ p = strrchr(argv[0], '/');
+ strlcpy(logid, p ? p + 1 : argv[0], sizeof(logid));
thread_id = pthread_self();
eal_reset_internal_config(&internal_config);
}
if (eal_plugins_init() < 0) {
- rte_eal_init_alert("Cannot init plugins\n");
+ rte_eal_init_alert("Cannot init plugins");
rte_errno = EINVAL;
rte_atomic32_clear(&run_once);
return -1;
return -1;
}
+ rte_config_init();
+
+ if (rte_eal_intr_init() < 0) {
+ rte_eal_init_alert("Cannot init interrupt-handling thread");
+ return -1;
+ }
+
+ /* Put mp channel init before bus scan so that we can init the vdev
+ * bus through mp channel in the secondary process before the bus scan.
+ */
+ if (rte_mp_channel_init() < 0) {
+ rte_eal_init_alert("failed to init mp channel");
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ rte_errno = EFAULT;
+ return -1;
+ }
+ }
+
+ /* register multi-process action callbacks for hotplug */
+ if (rte_mp_dev_hotplug_init() < 0) {
+ rte_eal_init_alert("failed to register mp callback for hotplug");
+ return -1;
+ }
+
if (rte_bus_scan()) {
- rte_eal_init_alert("Cannot scan the buses for devices\n");
+ rte_eal_init_alert("Cannot scan the buses for devices");
rte_errno = ENODEV;
rte_atomic32_clear(&run_once);
return -1;
}
- /* autodetect the iova mapping mode (default is iova_pa) */
- rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class();
-
- /* Workaround for KNI which requires physical address to work */
- if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA &&
- rte_eal_check_module("rte_kni") == 1) {
- rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA;
- RTE_LOG(WARNING, EAL,
- "Some devices want IOVA as VA but PA will be used because.. "
- "KNI module inserted\n");
+ /* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */
+ if (internal_config.iova_mode == RTE_IOVA_DC) {
+ /* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */
+ rte_eal_get_configuration()->iova_mode =
+ rte_bus_get_iommu_class();
+
+ /* Workaround for KNI which requires physical address to work */
+ if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA &&
+ rte_eal_check_module("rte_kni") == 1) {
+ rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA;
+ RTE_LOG(WARNING, EAL,
+ "Some devices want IOVA as VA but PA will be used because.. "
+ "KNI module inserted\n");
+ }
+ } else {
+ rte_eal_get_configuration()->iova_mode =
+ internal_config.iova_mode;
}
if (internal_config.no_hugetlbfs == 0) {
rte_srand(rte_rdtsc());
- rte_config_init();
-
if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) {
rte_eal_init_alert("Cannot init logging.");
rte_errno = ENOMEM;
return -1;
}
- if (rte_mp_channel_init() < 0) {
- rte_eal_init_alert("failed to init mp channel\n");
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- rte_errno = EFAULT;
- return -1;
- }
- }
-
#ifdef VFIO_PRESENT
if (rte_eal_vfio_setup() < 0) {
- rte_eal_init_alert("Cannot init VFIO\n");
+ rte_eal_init_alert("Cannot init VFIO");
rte_errno = EAGAIN;
rte_atomic32_clear(&run_once);
return -1;
* initialize memzones first.
*/
if (rte_eal_memzone_init() < 0) {
- rte_eal_init_alert("Cannot init memzone\n");
+ rte_eal_init_alert("Cannot init memzone");
rte_errno = ENODEV;
return -1;
}
if (rte_eal_memory_init() < 0) {
- rte_eal_init_alert("Cannot init memory\n");
+ rte_eal_init_alert("Cannot init memory");
rte_errno = ENOMEM;
return -1;
}
eal_hugedirs_unlock();
if (rte_eal_malloc_heap_init() < 0) {
- rte_eal_init_alert("Cannot init malloc heap\n");
+ rte_eal_init_alert("Cannot init malloc heap");
rte_errno = ENODEV;
return -1;
}
if (rte_eal_tailqs_init() < 0) {
- rte_eal_init_alert("Cannot init tail queues for objects\n");
+ rte_eal_init_alert("Cannot init tail queues for objects");
rte_errno = EFAULT;
return -1;
}
if (rte_eal_alarm_init() < 0) {
- rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+ rte_eal_init_alert("Cannot init interrupt-handling thread");
/* rte_eal_alarm_init sets rte_errno on failure. */
return -1;
}
if (rte_eal_timer_init() < 0) {
- rte_eal_init_alert("Cannot init HPET or TSC timers\n");
+ rte_eal_init_alert("Cannot init HPET or TSC timers");
rte_errno = ENOTSUP;
return -1;
}
eal_thread_init_master(rte_config.master_lcore);
- ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+ ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
- RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
- rte_config.master_lcore, (int)thread_id, cpuset,
+ RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
+ rte_config.master_lcore, (uintptr_t)thread_id, cpuset,
ret == 0 ? "" : "...");
- if (rte_eal_intr_init() < 0) {
- rte_eal_init_alert("Cannot init interrupt-handling thread\n");
- return -1;
- }
-
RTE_LCORE_FOREACH_SLAVE(i) {
/*
rte_panic("Cannot create thread\n");
/* Set thread_name for aid in debugging. */
- snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
+ snprintf(thread_name, sizeof(thread_name),
"lcore-slave-%d", i);
ret = rte_thread_setname(lcore_config[i].thread_id,
thread_name);
/* initialize services so vdevs register service during bus_probe. */
ret = rte_service_init();
if (ret) {
- rte_eal_init_alert("rte_service_init() failed\n");
+ rte_eal_init_alert("rte_service_init() failed");
rte_errno = ENOEXEC;
return -1;
}
/* Probe all the buses and devices/drivers on them */
if (rte_bus_probe()) {
- rte_eal_init_alert("Cannot probe devices\n");
+ rte_eal_init_alert("Cannot probe devices");
rte_errno = ENOTSUP;
return -1;
}
return -1;
}
+ /*
+ * Clean up unused files in runtime directory. We do this at the end of
+ * init and not at the beginning because we want to clean stuff up
+ * whether we are primary or secondary process, but we cannot remove
+ * primary process' files because secondary should be able to run even
+ * if primary process is dead.
+ */
+ if (eal_clean_runtime_dir() < 0) {
+ rte_eal_init_alert("Cannot clear runtime directory\n");
+ return -1;
+ }
+
rte_eal_mcfg_complete();
+ /* Call each registered callback, if enabled */
+ rte_option_init();
+
return fctret;
}
+static int
+mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ void *arg __rte_unused)
+{
+ /* ms is const, so find this memseg */
+ struct rte_memseg *found;
+
+ if (msl->external)
+ return 0;
+
+ found = rte_mem_virt2memseg(ms->addr, msl);
+
+ found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE;
+
+ return 0;
+}
+
int __rte_experimental
rte_eal_cleanup(void)
{
+ /* if we're in a primary process, we need to mark hugepages as freeable
+ * so that finalization can release them back to the system.
+ */
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ rte_memseg_walk(mark_freeable, NULL);
rte_service_finalize();
return 0;
}