X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_eal%2Flinuxapp%2Feal%2Feal.c;h=633e3b8f8c42ccffa5ee0565081eac0e0afa2383;hb=83a73c5fef66;hp=61dc70a93e805f626c32ebc7b24ad842d23782a2;hpb=b6a468ad41d59205ae5b60cf5c8212e130c3e5d7;p=dpdk.git diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 61dc70a93e..21afa73b5c 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -1,35 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2012 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2018 Intel Corporation. + * Copyright(c) 2012-2014 6WIND S.A. */ #include @@ -39,66 +10,59 @@ #include #include #include +#include #include +#include #include #include #include #include -#include #include #include +#include +#if defined(RTE_ARCH_X86) +#include +#endif +#include #include #include #include -#include #include -#include #include +#include +#include #include #include +#include #include #include #include #include #include #include -#include -#include +#include +#include +#include #include +#include +#include +#include #include "eal_private.h" #include "eal_thread.h" #include "eal_internal_cfg.h" #include "eal_filesystem.h" #include "eal_hugepages.h" - -#define OPT_HUGE_DIR "huge-dir" -#define OPT_PROC_TYPE "proc-type" -#define OPT_NO_SHCONF "no-shconf" -#define OPT_NO_HPET "no-hpet" -#define OPT_NO_PCI "no-pci" -#define OPT_NO_HUGE "no-huge" -#define OPT_FILE_PREFIX "file-prefix" -#define OPT_SOCKET_MEM "socket-mem" - -#define RTE_EAL_BLACKLIST_SIZE 0x100 +#include "eal_options.h" +#include "eal_vfio.h" #define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL) #define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10) -#define GET_BLACKLIST_FIELD(in, fd, lim, dlm) \ -{ \ - unsigned long val; \ - char *end; \ - errno = 0; \ - val = strtoul((in), &end, 16); \ - if (errno != 0 || end[0] != (dlm) || val > (lim)) \ - return (-EINVAL); \ - (fd) = (typeof (fd))val; \ - (in) = end + 1; \ -} +/* Allow the application to print its usage message too if set */ +static rte_usage_hook_t rte_application_usage_hook = NULL; /* early configuration structure, when memory config is not mmapped */ static struct rte_mem_config early_mem_config; @@ -110,8 +74,8 @@ static int mem_cfg_fd = -1; static struct flock wr_lock = { .l_type = F_WRLCK, .l_whence = SEEK_SET, - .l_start = offsetof(struct rte_mem_config, memseg), - .l_len = sizeof(early_mem_config.memseg), + .l_start = offsetof(struct rte_mem_config, memsegs), + .l_len = sizeof(early_mem_config.memsegs), }; /* Address of global and public configuration */ @@ -119,14 +83,32 @@ static struct rte_config rte_config = { .mem_config = &early_mem_config, }; -static struct rte_pci_addr eal_dev_blacklist[RTE_EAL_BLACKLIST_SIZE]; - /* internal configuration (per-core) */ struct lcore_config lcore_config[RTE_MAX_LCORE]; /* internal configuration */ struct internal_config internal_config; +/* used by rte_rdtsc() */ +int rte_cycles_vmware_tsc_map; + +/* Return user provided mbuf pool ops name */ +const char * __rte_experimental +rte_eal_mbuf_user_pool_ops(void) +{ + return internal_config.user_mbuf_pool_ops_name; +} + +/* Return mbuf pool ops name */ +const char * +rte_eal_mbuf_default_mempool_ops(void) +{ + if (internal_config.user_mbuf_pool_ops_name == NULL) + return RTE_MBUF_DEFAULT_MEMPOOL_OPS; + + return internal_config.user_mbuf_pool_ops_name; +} + /* Return a pointer to the configuration structure */ struct rte_config * rte_eal_get_configuration(void) @@ -134,6 +116,12 @@ rte_eal_get_configuration(void) return &rte_config; } +enum rte_iova_mode +rte_eal_iova_mode(void) +{ + return rte_eal_get_configuration()->iova_mode; +} + /* parse a sysfs (or other) file containing one integer value */ int eal_parse_sysfs_value(const char *filename, unsigned long *val) @@ -183,6 +171,14 @@ rte_eal_config_create(void) if (internal_config.no_shconf) return; + /* map the config before hugepage address so that we don't waste a page */ + if (internal_config.base_virtaddr != 0) + rte_mem_cfg_addr = (void *) + RTE_ALIGN_FLOOR(internal_config.base_virtaddr - + sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE)); + else + rte_mem_cfg_addr = NULL; + if (mem_cfg_fd < 0){ mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660); if (mem_cfg_fd < 0) @@ -202,44 +198,85 @@ rte_eal_config_create(void) "process running?\n", pathname); } - rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); + rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config), + PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); if (rte_mem_cfg_addr == MAP_FAILED){ rte_panic("Cannot mmap memory for rte_config\n"); } - rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; - memcpy(rte_config.mem_config, &early_mem_config, - sizeof(early_mem_config)); + memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); + rte_config.mem_config = rte_mem_cfg_addr; + + /* store address of the config in the config itself so that secondary + * processes could later map the config into this exact location */ + rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr; + } /* attach to an existing shared memory config */ static void rte_eal_config_attach(void) { - void *rte_mem_cfg_addr; + struct rte_mem_config *mem_config; + const char *pathname = eal_runtime_config_path(); if (internal_config.no_shconf) return; if (mem_cfg_fd < 0){ - mem_cfg_fd = open(pathname, O_RDONLY); + mem_cfg_fd = open(pathname, O_RDWR); if (mem_cfg_fd < 0) rte_panic("Cannot open '%s' for rte_mem_config\n", pathname); } - rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), PROT_READ, - MAP_SHARED, mem_cfg_fd, 0); + /* map it as read-only first */ + mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config), + PROT_READ, MAP_SHARED, mem_cfg_fd, 0); + if (mem_config == MAP_FAILED) + rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n", + errno, strerror(errno)); + + rte_config.mem_config = mem_config; +} + +/* reattach the shared config at exact memory location primary process has it */ +static void +rte_eal_config_reattach(void) +{ + struct rte_mem_config *mem_config; + void *rte_mem_cfg_addr; + + if (internal_config.no_shconf) + return; + + /* save the address primary process has mapped shared config to */ + rte_mem_cfg_addr = (void *) (uintptr_t) rte_config.mem_config->mem_cfg_addr; + + /* unmap original config */ + munmap(rte_config.mem_config, sizeof(struct rte_mem_config)); + + /* remap the config at proper address */ + mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr, + sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED, + mem_cfg_fd, 0); + if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) { + if (mem_config != MAP_FAILED) + /* errno is stale, don't use */ + rte_panic("Cannot mmap memory for rte_config at [%p], got [%p]" + " - please use '--base-virtaddr' option\n", + rte_mem_cfg_addr, mem_config); + else + rte_panic("Cannot mmap memory for rte_config! error %i (%s)\n", + errno, strerror(errno)); + } close(mem_cfg_fd); - if (rte_mem_cfg_addr == MAP_FAILED) - rte_panic("Cannot mmap memory for rte_config\n"); - rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; + rte_config.mem_config = mem_config; } /* Detect if we are a primary or a secondary process */ -static enum rte_proc_type_t +enum rte_proc_type_t eal_proc_type_detect(void) { enum rte_proc_type_t ptype = RTE_PROC_PRIMARY; @@ -262,11 +299,7 @@ eal_proc_type_detect(void) static void rte_config_init(void) { - /* set the magic in configuration structure */ - rte_config.magic = RTE_MAGIC; - rte_config.process_type = (internal_config.process_type == RTE_PROC_AUTO) ? - eal_proc_type_detect() : /* for auto, detect the type */ - internal_config.process_type; /* otherwise use what's already set */ + rte_config.process_type = internal_config.process_type; switch (rte_config.process_type){ case RTE_PROC_PRIMARY: @@ -274,6 +307,8 @@ rte_config_init(void) break; case RTE_PROC_SECONDARY: rte_eal_config_attach(); + rte_eal_mcfg_wait_complete(rte_config.mem_config); + rte_eal_config_reattach(); break; case RTE_PROC_AUTO: case RTE_PROC_INVALID: @@ -281,67 +316,59 @@ rte_config_init(void) } } +/* Unlocks hugepage directories that were locked by eal_hugepage_info_init */ +static void +eal_hugedirs_unlock(void) +{ + int i; + + for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) + { + /* skip uninitialized */ + if (internal_config.hugepage_info[i].lock_descriptor < 0) + continue; + /* unlock hugepage file */ + flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN); + close(internal_config.hugepage_info[i].lock_descriptor); + /* reset the field */ + internal_config.hugepage_info[i].lock_descriptor = -1; + } +} + /* display usage */ static void eal_usage(const char *prgname) { - printf("\nUsage: %s -c COREMASK -n NUM [-m NB] [-r NUM] [-b ]" - "[--proc-type primary|secondary|auto] \n\n" - "EAL options:\n" - " -c COREMASK : A hexadecimal bitmask of cores to run on\n" - " -n NUM : Number of memory channels\n" - " -v : Display version information on startup\n" - " -b : to prevent EAL from using specified " - "PCI device\n" - " (multiple -b options are allowed)\n" - " -m MB : memory to allocate (see also --"OPT_SOCKET_MEM")\n" - " -r NUM : force number of memory ranks (don't detect)\n" - " --"OPT_SOCKET_MEM" : memory to allocate on specific \n" - " sockets (use comma separated values)\n" - " --"OPT_HUGE_DIR" : directory where hugetlbfs is mounted\n" - " --"OPT_PROC_TYPE" : type of this process\n" - " --"OPT_FILE_PREFIX": prefix for hugepage filenames\n" - "\nEAL options for DEBUG use only:\n" - " --"OPT_NO_HUGE" : use malloc instead of hugetlbfs\n" - " --"OPT_NO_PCI" : disable pci\n" - " --"OPT_NO_HPET" : disable hpet\n" - " --"OPT_NO_SHCONF": no shared config (mmap'd files)\n\n", - prgname); + printf("\nUsage: %s ", prgname); + eal_common_usage(); + printf("EAL Linux options:\n" + " --"OPT_SOCKET_MEM" Memory to allocate on sockets (comma separated values)\n" + " --"OPT_HUGE_DIR" Directory where hugetlbfs is mounted\n" + " --"OPT_FILE_PREFIX" Prefix for hugepage filenames\n" + " --"OPT_BASE_VIRTADDR" Base virtual address\n" + " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n" + " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" + " --"OPT_LEGACY_MEM" Legacy memory mode (no dynamic allocation, contiguous segments)\n" + " --"OPT_SINGLE_FILE_SEGMENTS" Put all hugepage memory in single files\n" + "\n"); + /* Allow the application to print its usage message too if hook is set */ + if ( rte_application_usage_hook ) { + printf("===== Application Usage =====\n\n"); + rte_application_usage_hook(prgname); + } } -/* - * Parse the coremask given as argument (hexadecimal string) and fill - * the global configuration (core role and core count) with the parsed - * value. - */ -static int -eal_parse_coremask(const char *coremask) +/* Set a per-application usage message */ +rte_usage_hook_t +rte_set_application_usage_hook( rte_usage_hook_t usage_func ) { - struct rte_config *cfg = rte_eal_get_configuration(); - unsigned i; - char *end = NULL; - unsigned long long cm; - unsigned count = 0; - - /* parse hexadecimal string */ - cm = strtoull(coremask, &end, 16); - if ((coremask[0] == '\0') || (end == NULL) || (*end != '\0') || (cm == 0)) - return -1; - - RTE_LOG(DEBUG, EAL, "coremask set to %llx\n", cm); - /* set core role and core count */ - for (i = 0; i < RTE_MAX_LCORE; i++) { - if ((1ULL << i) & cm) { - if (count == 0) - cfg->master_lcore = i; - cfg->lcore_role[i] = ROLE_RTE; - count++; - } - else { - cfg->lcore_role[i] = ROLE_OFF; - } - } - return 0; + rte_usage_hook_t old_func; + + /* Will be NULL on the first call to denote the last usage routine. */ + old_func = rte_application_usage_hook; + rte_application_usage_hook = usage_func; + + return old_func; } static int @@ -394,273 +421,292 @@ eal_parse_socket_mem(char *socket_mem) return 0; } -static inline uint64_t -eal_get_hugepage_mem_size(void) +static int +eal_parse_base_virtaddr(const char *arg) { - uint64_t size = 0; - unsigned i, j; + char *end; + uint64_t addr; - for (i = 0; i < internal_config.num_hugepage_sizes; i++) { - struct hugepage_info *hpi = &internal_config.hugepage_info[i]; - if (hpi->hugedir != NULL) { - for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { - size += hpi->hugepage_sz * hpi->num_pages[j]; - } - } - } + errno = 0; + addr = strtoull(arg, &end, 16); - return (size); -} + /* check for errors */ + if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0')) + return -1; -static enum rte_proc_type_t -eal_parse_proc_type(const char *arg) -{ - if (strncasecmp(arg, "primary", sizeof("primary")) == 0) - return RTE_PROC_PRIMARY; - if (strncasecmp(arg, "secondary", sizeof("secondary")) == 0) - return RTE_PROC_SECONDARY; - if (strncasecmp(arg, "auto", sizeof("auto")) == 0) - return RTE_PROC_AUTO; + /* make sure we don't exceed 32-bit boundary on 32-bit target */ +#ifndef RTE_ARCH_64 + if (addr >= UINTPTR_MAX) + return -1; +#endif - return RTE_PROC_INVALID; + /* align the addr on 16M boundary, 16MB is the minimum huge page + * size on IBM Power architecture. If the addr is aligned to 16MB, + * it can align to 2MB for x86. So this alignment can also be used + * on x86 */ + internal_config.base_virtaddr = + RTE_PTR_ALIGN_CEIL((uintptr_t)addr, (size_t)RTE_PGSIZE_16M); + + return 0; } static int -eal_parse_blacklist(const char *input, struct rte_pci_addr *dev2bl) +eal_parse_vfio_intr(const char *mode) { - GET_BLACKLIST_FIELD(input, dev2bl->domain, UINT16_MAX, ':'); - GET_BLACKLIST_FIELD(input, dev2bl->bus, UINT8_MAX, ':'); - GET_BLACKLIST_FIELD(input, dev2bl->devid, UINT8_MAX, '.'); - GET_BLACKLIST_FIELD(input, dev2bl->function, UINT8_MAX, 0); - return (0); + unsigned i; + static struct { + const char *name; + enum rte_intr_mode value; + } map[] = { + { "legacy", RTE_INTR_MODE_LEGACY }, + { "msi", RTE_INTR_MODE_MSI }, + { "msix", RTE_INTR_MODE_MSIX }, + }; + + for (i = 0; i < RTE_DIM(map); i++) { + if (!strcmp(mode, map[i].name)) { + internal_config.vfio_intr_mode = map[i].value; + return 0; + } + } + return -1; } -static ssize_t -eal_parse_blacklist_opt(const char *optarg, size_t idx) +/* Parse the arguments for --log-level only */ +static void +eal_log_level_parse(int argc, char **argv) { - if (idx >= sizeof (eal_dev_blacklist) / sizeof (eal_dev_blacklist[0])) { - RTE_LOG(ERR, EAL, - "%s - too many devices to blacklist...\n", - optarg); - return (-EINVAL); - } else if (eal_parse_blacklist(optarg, eal_dev_blacklist + idx) != 0) { - RTE_LOG(ERR, EAL, - "%s - invalid device to blacklist...\n", - optarg); - return (-EINVAL); + int opt; + char **argvopt; + int option_index; + const int old_optind = optind; + const int old_optopt = optopt; + char * const old_optarg = optarg; + + argvopt = argv; + optind = 1; + + while ((opt = getopt_long(argc, argvopt, eal_short_options, + eal_long_options, &option_index)) != EOF) { + + int ret; + + /* getopt is not happy, stop right now */ + if (opt == '?') + break; + + ret = (opt == OPT_LOG_LEVEL_NUM) ? + eal_parse_common_option(opt, optarg, &internal_config) : 0; + + /* common parser is not happy */ + if (ret < 0) + break; } - idx += 1; - return (idx); + /* restore getopt lib */ + optind = old_optind; + optopt = old_optopt; + optarg = old_optarg; } - /* Parse the argument given in the command line of the application */ static int eal_parse_args(int argc, char **argv) { - int opt, ret, i; + int opt, ret; char **argvopt; int option_index; - int coremask_ok = 0; - ssize_t blacklist_index = 0;; char *prgname = argv[0]; - static struct option lgopts[] = { - {OPT_NO_HUGE, 0, 0, 0}, - {OPT_NO_PCI, 0, 0, 0}, - {OPT_NO_HPET, 0, 0, 0}, - {OPT_HUGE_DIR, 1, 0, 0}, - {OPT_NO_SHCONF, 0, 0, 0}, - {OPT_PROC_TYPE, 1, 0, 0}, - {OPT_FILE_PREFIX, 1, 0, 0}, - {OPT_SOCKET_MEM, 1, 0, 0}, - {0, 0, 0, 0} - }; + const int old_optind = optind; + const int old_optopt = optopt; + char * const old_optarg = optarg; argvopt = argv; + optind = 1; - internal_config.memory = 0; - internal_config.force_nrank = 0; - internal_config.force_nchannel = 0; - internal_config.hugefile_prefix = HUGEFILE_PREFIX_DEFAULT; - internal_config.hugepage_dir = NULL; - internal_config.force_sockets = 0; -#ifdef RTE_LIBEAL_USE_HPET - internal_config.no_hpet = 0; -#else - internal_config.no_hpet = 1; -#endif - /* zero out the NUMA config */ - for (i = 0; i < RTE_MAX_NUMA_NODES; i++) - internal_config.socket_mem[i] = 0; + while ((opt = getopt_long(argc, argvopt, eal_short_options, + eal_long_options, &option_index)) != EOF) { - while ((opt = getopt_long(argc, argvopt, "b:c:m:n:r:v", - lgopts, &option_index)) != EOF) { + /* getopt is not happy, stop right now */ + if (opt == '?') { + eal_usage(prgname); + ret = -1; + goto out; + } + + ret = eal_parse_common_option(opt, optarg, &internal_config); + /* common parser is not happy */ + if (ret < 0) { + eal_usage(prgname); + ret = -1; + goto out; + } + /* common parser handled this option */ + if (ret == 0) + continue; switch (opt) { - /* blacklist */ - case 'b': - if ((blacklist_index = eal_parse_blacklist_opt(optarg, - blacklist_index)) < 0) { - eal_usage(prgname); - return (-1); - } + case 'h': + eal_usage(prgname); + exit(EXIT_SUCCESS); + + case OPT_HUGE_DIR_NUM: + internal_config.hugepage_dir = strdup(optarg); + break; + + case OPT_FILE_PREFIX_NUM: + internal_config.hugefile_prefix = strdup(optarg); break; - /* coremask */ - case 'c': - if (eal_parse_coremask(optarg) < 0) { - RTE_LOG(ERR, EAL, "invalid coremask\n"); + + case OPT_SOCKET_MEM_NUM: + if (eal_parse_socket_mem(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameters for --" + OPT_SOCKET_MEM "\n"); eal_usage(prgname); - return -1; + ret = -1; + goto out; } - coremask_ok = 1; break; - /* size of memory */ - case 'm': - internal_config.memory = atoi(optarg); - internal_config.memory *= 1024ULL; - internal_config.memory *= 1024ULL; - break; - /* force number of channels */ - case 'n': - internal_config.force_nchannel = atoi(optarg); - if (internal_config.force_nchannel == 0 || - internal_config.force_nchannel > 4) { - RTE_LOG(ERR, EAL, "invalid channel number\n"); + + case OPT_BASE_VIRTADDR_NUM: + if (eal_parse_base_virtaddr(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameter for --" + OPT_BASE_VIRTADDR "\n"); eal_usage(prgname); - return -1; + ret = -1; + goto out; } break; - /* force number of ranks */ - case 'r': - internal_config.force_nrank = atoi(optarg); - if (internal_config.force_nrank == 0 || - internal_config.force_nrank > 16) { - RTE_LOG(ERR, EAL, "invalid rank number\n"); + + case OPT_VFIO_INTR_NUM: + if (eal_parse_vfio_intr(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameters for --" + OPT_VFIO_INTR "\n"); eal_usage(prgname); - return -1; + ret = -1; + goto out; } break; - case 'v': - /* since message is explicitly requested by user, we - * write message at highest log level so it can always be seen - * even if info or warning messages are disabled */ - RTE_LOG(CRIT, EAL, "RTE Version: '%s'\n", rte_version()); + + case OPT_CREATE_UIO_DEV_NUM: + internal_config.create_uio_dev = 1; break; - /* long options */ - case 0: - if (!strcmp(lgopts[option_index].name, OPT_NO_HUGE)) { - internal_config.no_hugetlbfs = 1; - } - else if (!strcmp(lgopts[option_index].name, OPT_NO_PCI)) { - internal_config.no_pci = 1; - } - else if (!strcmp(lgopts[option_index].name, OPT_NO_HPET)) { - internal_config.no_hpet = 1; - } - else if (!strcmp(lgopts[option_index].name, OPT_NO_SHCONF)) { - internal_config.no_shconf = 1; - } - else if (!strcmp(lgopts[option_index].name, OPT_HUGE_DIR)) { - internal_config.hugepage_dir = optarg; - } - else if (!strcmp(lgopts[option_index].name, OPT_PROC_TYPE)) { - internal_config.process_type = eal_parse_proc_type(optarg); - } - else if (!strcmp(lgopts[option_index].name, OPT_FILE_PREFIX)) { - internal_config.hugefile_prefix = optarg; - } - else if (!strcmp(lgopts[option_index].name, OPT_SOCKET_MEM)) { - if (eal_parse_socket_mem(optarg) < 0) { - RTE_LOG(ERR, EAL, "invalid parameters for --" - OPT_SOCKET_MEM "\n"); - eal_usage(prgname); - return -1; - } - } + case OPT_MBUF_POOL_OPS_NAME_NUM: + internal_config.user_mbuf_pool_ops_name = optarg; break; default: + if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { + RTE_LOG(ERR, EAL, "Option %c is not supported " + "on Linux\n", opt); + } else if (opt >= OPT_LONG_MIN_NUM && + opt < OPT_LONG_MAX_NUM) { + RTE_LOG(ERR, EAL, "Option %s is not supported " + "on Linux\n", + eal_long_options[option_index].name); + } else { + RTE_LOG(ERR, EAL, "Option %d is not supported " + "on Linux\n", opt); + } eal_usage(prgname); - return -1; + ret = -1; + goto out; } } - /* sanity checks */ - if (!coremask_ok) { - RTE_LOG(ERR, EAL, "coremask not specified\n"); - eal_usage(prgname); - return -1; - } - if (internal_config.process_type == RTE_PROC_AUTO){ - internal_config.process_type = eal_proc_type_detect(); - } - if (internal_config.process_type == RTE_PROC_INVALID){ - RTE_LOG(ERR, EAL, "Invalid process type specified\n"); - eal_usage(prgname); - return -1; + if (eal_adjust_config(&internal_config) != 0) { + ret = -1; + goto out; } - if (internal_config.process_type == RTE_PROC_PRIMARY && - internal_config.force_nchannel == 0) { - RTE_LOG(ERR, EAL, "Number of memory channels (-n) not specified\n"); - eal_usage(prgname); - return -1; - } - if (index(internal_config.hugefile_prefix,'%') != NULL){ - RTE_LOG(ERR, EAL, "Invalid char, '%%', in '"OPT_FILE_PREFIX"' option\n"); - eal_usage(prgname); - return -1; - } - if (internal_config.memory > 0 && internal_config.force_sockets == 1) { - RTE_LOG(ERR, EAL, "Options -m and --socket-mem cannot be specified " - "at the same time\n"); - eal_usage(prgname); - return -1; - } - /* --no-huge doesn't make sense with either -m or --socket-mem */ - if (internal_config.no_hugetlbfs && - (internal_config.memory > 0 || - internal_config.force_sockets == 1)) { - RTE_LOG(ERR, EAL, "Options -m or --socket-mem cannot be specified " - "together with --no-huge!\n"); + + /* sanity checks */ + if (eal_check_common_options(&internal_config) != 0) { eal_usage(prgname); - return -1; + ret = -1; + goto out; } - if (blacklist_index > 0) - rte_eal_pci_set_blacklist(eal_dev_blacklist, blacklist_index); - if (optind >= 0) argv[optind-1] = prgname; + ret = optind-1; - /* if no memory amounts were requested, this will result in 0 and - * will be overriden later, right after eal_hugepage_info_init() */ - for (i = 0; i < RTE_MAX_NUMA_NODES; i++) - internal_config.memory += internal_config.socket_mem[i]; +out: + /* restore getopt lib */ + optind = old_optind; + optopt = old_optopt; + optarg = old_optarg; - ret = optind-1; - optind = 0; /* reset getopt lib */ return ret; } +static int +check_socket(const struct rte_memseg_list *msl, void *arg) +{ + int *socket_id = arg; + + if (msl->socket_id == *socket_id && msl->memseg_arr.count != 0) + return 1; + + return 0; +} + static void eal_check_mem_on_local_socket(void) { - const struct rte_memseg *ms; - int i, socket_id; + int socket_id; socket_id = rte_lcore_to_socket_id(rte_config.master_lcore); - ms = rte_eal_get_physmem_layout(); + if (rte_memseg_list_walk(check_socket, &socket_id) == 0) + RTE_LOG(WARNING, EAL, "WARNING: Master core has no memory on local socket!\n"); +} + +static int +sync_func(__attribute__((unused)) void *arg) +{ + return 0; +} - for (i = 0; i < RTE_MAX_MEMSEG; i++) - if (ms[i].socket_id == socket_id && - ms[i].len > 0) - return; +inline static void +rte_eal_mcfg_complete(void) +{ + /* ALL shared mem_config related INIT DONE */ + if (rte_config.process_type == RTE_PROC_PRIMARY) + rte_config.mem_config->magic = RTE_MAGIC; - RTE_LOG(WARNING, EAL, "WARNING: Master core has no " - "memory on local socket!\n"); + internal_config.init_complete = 1; +} + +/* + * Request iopl privilege for all RPL, returns 0 on success + * iopl() call is mostly for the i386 architecture. For other architectures, + * return -1 to indicate IO privilege can't be changed in this way. + */ +int +rte_eal_iopl_init(void) +{ +#if defined(RTE_ARCH_X86) + if (iopl(3) != 0) + return -1; +#endif + return 0; +} + +#ifdef VFIO_PRESENT +static int rte_eal_vfio_setup(void) +{ + if (rte_vfio_enable("vfio")) + return -1; + + return 0; +} +#endif + +static void rte_eal_init_alert(const char *msg) +{ + fprintf(stderr, "EAL: FATAL: %s\n", msg); + RTE_LOG(ERR, EAL, "%s\n", msg); } /* Launch threads, called at application init(). */ @@ -669,61 +715,194 @@ rte_eal_init(int argc, char **argv) { int i, fctret, ret; pthread_t thread_id; + static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0); + const char *logid; + char cpuset[RTE_CPU_AFFINITY_STR_LEN]; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + /* checks if the machine is adequate */ + if (!rte_cpu_is_supported()) { + rte_eal_init_alert("unsupported cpu type."); + rte_errno = ENOTSUP; + return -1; + } + + if (!rte_atomic32_test_and_set(&run_once)) { + rte_eal_init_alert("already called initialization."); + rte_errno = EALREADY; + return -1; + } + + logid = strrchr(argv[0], '/'); + logid = strdup(logid ? logid + 1: argv[0]); thread_id = pthread_self(); - if (rte_eal_log_early_init() < 0) - rte_panic("Cannot init early logs\n"); + eal_reset_internal_config(&internal_config); + + /* set log level as early as possible */ + eal_log_level_parse(argc, argv); + + if (rte_eal_cpu_init() < 0) { + rte_eal_init_alert("Cannot detect lcores."); + rte_errno = ENOTSUP; + return -1; + } fctret = eal_parse_args(argc, argv); - if (fctret < 0) - exit(1); + if (fctret < 0) { + rte_eal_init_alert("Invalid 'command line' arguments."); + rte_errno = EINVAL; + rte_atomic32_clear(&run_once); + return -1; + } + + if (eal_plugins_init() < 0) { + rte_eal_init_alert("Cannot init plugins\n"); + rte_errno = EINVAL; + rte_atomic32_clear(&run_once); + return -1; + } + + if (eal_option_device_parse()) { + rte_errno = ENODEV; + rte_atomic32_clear(&run_once); + return -1; + } + + if (rte_bus_scan()) { + rte_eal_init_alert("Cannot scan the buses for devices\n"); + rte_errno = ENODEV; + rte_atomic32_clear(&run_once); + return -1; + } - if (eal_hugepage_info_init() < 0) - rte_panic("Cannot get hugepage information\n"); + /* autodetect the iova mapping mode (default is iova_pa) */ + rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class(); + + /* Workaround for KNI which requires physical address to work */ + if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA && + rte_eal_check_module("rte_kni") == 1) { + rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA; + RTE_LOG(WARNING, EAL, + "Some devices want IOVA as VA but PA will be used because.. " + "KNI module inserted\n"); + } + + if (internal_config.no_hugetlbfs == 0) { + /* rte_config isn't initialized yet */ + ret = internal_config.process_type == RTE_PROC_PRIMARY ? + eal_hugepage_info_init() : + eal_hugepage_info_read(); + if (ret < 0) { + rte_eal_init_alert("Cannot get hugepage information."); + rte_errno = EACCES; + rte_atomic32_clear(&run_once); + return -1; + } + } if (internal_config.memory == 0 && internal_config.force_sockets == 0) { if (internal_config.no_hugetlbfs) internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE; - else - internal_config.memory = eal_get_hugepage_mem_size(); + } + + if (internal_config.vmware_tsc_map == 1) { +#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT + rte_cycles_vmware_tsc_map = 1; + RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, " + "you must have monitor_control.pseudo_perfctr = TRUE\n"); +#else + RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because " + "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n"); +#endif } rte_srand(rte_rdtsc()); - rte_config_init(); - if (rte_eal_cpu_init() < 0) - rte_panic("Cannot detect lcores\n"); + rte_config_init(); - if (rte_eal_memory_init() < 0) - rte_panic("Cannot init memory\n"); + if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) { + rte_eal_init_alert("Cannot init logging."); + rte_errno = ENOMEM; + rte_atomic32_clear(&run_once); + return -1; + } - if (rte_eal_memzone_init() < 0) - rte_panic("Cannot init memzone\n"); + if (rte_mp_channel_init() < 0) { + rte_eal_init_alert("failed to init mp channel\n"); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + rte_errno = EFAULT; + return -1; + } + } - if (rte_eal_tailqs_init() < 0) - rte_panic("Cannot init tail queues for objects\n"); +#ifdef VFIO_PRESENT + if (rte_eal_vfio_setup() < 0) { + rte_eal_init_alert("Cannot init VFIO\n"); + rte_errno = EAGAIN; + rte_atomic32_clear(&run_once); + return -1; + } +#endif + /* in secondary processes, memory init may allocate additional fbarrays + * not present in primary processes, so to avoid any potential issues, + * initialize memzones first. + */ + if (rte_eal_memzone_init() < 0) { + rte_eal_init_alert("Cannot init memzone\n"); + rte_errno = ENODEV; + return -1; + } - if (rte_eal_log_init() < 0) - rte_panic("Cannot init logs\n"); + if (rte_eal_memory_init() < 0) { + rte_eal_init_alert("Cannot init memory\n"); + rte_errno = ENOMEM; + return -1; + } - if (rte_eal_alarm_init() < 0) - rte_panic("Cannot init interrupt-handling thread\n"); + /* the directories are locked during eal_hugepage_info_init */ + eal_hugedirs_unlock(); - if (rte_eal_intr_init() < 0) - rte_panic("Cannot init interrupt-handling thread\n"); + if (rte_eal_malloc_heap_init() < 0) { + rte_eal_init_alert("Cannot init malloc heap\n"); + rte_errno = ENODEV; + return -1; + } - if (rte_eal_hpet_init() < 0) - rte_panic("Cannot init HPET\n"); + if (rte_eal_tailqs_init() < 0) { + rte_eal_init_alert("Cannot init tail queues for objects\n"); + rte_errno = EFAULT; + return -1; + } - if (rte_eal_pci_init() < 0) - rte_panic("Cannot init PCI\n"); + if (rte_eal_alarm_init() < 0) { + rte_eal_init_alert("Cannot init interrupt-handling thread\n"); + /* rte_eal_alarm_init sets rte_errno on failure. */ + return -1; + } - RTE_LOG(DEBUG, EAL, "Master core %u is ready (tid=%x)\n", - rte_config.master_lcore, (int)thread_id); + if (rte_eal_timer_init() < 0) { + rte_eal_init_alert("Cannot init HPET or TSC timers\n"); + rte_errno = ENOTSUP; + return -1; + } eal_check_mem_on_local_socket(); + eal_thread_init_master(rte_config.master_lcore); + + ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + + RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n", + rte_config.master_lcore, (int)thread_id, cpuset, + ret == 0 ? "" : "..."); + + if (rte_eal_intr_init() < 0) { + rte_eal_init_alert("Cannot init interrupt-handling thread\n"); + return -1; + } + RTE_LCORE_FOREACH_SLAVE(i) { /* @@ -742,23 +921,130 @@ rte_eal_init(int argc, char **argv) eal_thread_loop, NULL); if (ret != 0) rte_panic("Cannot create thread\n"); + + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, + "lcore-slave-%d", i); + ret = rte_thread_setname(lcore_config[i].thread_id, + thread_name); + if (ret != 0) + RTE_LOG(DEBUG, EAL, + "Cannot set name for lcore thread\n"); } - eal_thread_init_master(rte_config.master_lcore); + /* + * Launch a dummy function on all slave lcores, so that master lcore + * knows they are all ready when this function returns. + */ + rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); + rte_eal_mp_wait_lcore(); + + /* initialize services so vdevs register service during bus_probe. */ + ret = rte_service_init(); + if (ret) { + rte_eal_init_alert("rte_service_init() failed\n"); + rte_errno = ENOEXEC; + return -1; + } + + /* Probe all the buses and devices/drivers on them */ + if (rte_bus_probe()) { + rte_eal_init_alert("Cannot probe devices\n"); + rte_errno = ENOTSUP; + return -1; + } + +#ifdef VFIO_PRESENT + /* Register mp action after probe() so that we got enough info */ + if (rte_vfio_is_enabled("vfio") && vfio_mp_sync_setup() < 0) + return -1; +#endif + + /* initialize default service/lcore mappings and start running. Ignore + * -ENOTSUP, as it indicates no service coremask passed to EAL. + */ + ret = rte_service_start_with_defaults(); + if (ret < 0 && ret != -ENOTSUP) { + rte_errno = ENOEXEC; + return -1; + } + + rte_eal_mcfg_complete(); return fctret; } +int __rte_experimental +rte_eal_cleanup(void) +{ + rte_service_finalize(); + return 0; +} + /* get core role */ enum rte_lcore_role_t rte_eal_lcore_role(unsigned lcore_id) { - return (rte_config.lcore_role[lcore_id]); + return rte_config.lcore_role[lcore_id]; } enum rte_proc_type_t rte_eal_process_type(void) { - return (rte_config.process_type); + return rte_config.process_type; +} + +int rte_eal_has_hugepages(void) +{ + return ! internal_config.no_hugetlbfs; +} + +int rte_eal_has_pci(void) +{ + return !internal_config.no_pci; +} + +int rte_eal_create_uio_dev(void) +{ + return internal_config.create_uio_dev; } +enum rte_intr_mode +rte_eal_vfio_intr_mode(void) +{ + return internal_config.vfio_intr_mode; +} + +int +rte_eal_check_module(const char *module_name) +{ + char sysfs_mod_name[PATH_MAX]; + struct stat st; + int n; + + if (NULL == module_name) + return -1; + + /* Check if there is sysfs mounted */ + if (stat("/sys/module", &st) != 0) { + RTE_LOG(DEBUG, EAL, "sysfs is not mounted! error %i (%s)\n", + errno, strerror(errno)); + return -1; + } + + /* A module might be built-in, therefore try sysfs */ + n = snprintf(sysfs_mod_name, PATH_MAX, "/sys/module/%s", module_name); + if (n < 0 || n > PATH_MAX) { + RTE_LOG(DEBUG, EAL, "Could not format module path\n"); + return -1; + } + + if (stat(sysfs_mod_name, &st) != 0) { + RTE_LOG(DEBUG, EAL, "Module %s not found! error %i (%s)\n", + sysfs_mod_name, errno, strerror(errno)); + return 0; + } + + /* Module has been found */ + return 1; +}