X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=lib%2Flibrte_eal%2Flinuxapp%2Feal%2Feal.c;h=9ba3d1dbd81021ec649968eb25a9ed987b975990;hb=e987449c9fce2ec6210be3d8bad680d08d68c9dc;hp=8d82cc3b72a4afda152f18e1ec4fee23169cafe5;hpb=af75078fece3615088e561357c1e97603e43a5fe;p=dpdk.git diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 8d82cc3b72..9ba3d1dbd8 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2012 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,7 +30,6 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * version: DPDK.L.1.2.3-3 */ #include @@ -40,8 +39,9 @@ #include #include #include +#include #include -#include +#include #include #include #include @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -67,25 +68,32 @@ #include #include #include +#include +#include #include "eal_private.h" #include "eal_thread.h" #include "eal_internal_cfg.h" -#include "eal_fs_paths.h" +#include "eal_filesystem.h" #include "eal_hugepages.h" #define OPT_HUGE_DIR "huge-dir" #define OPT_PROC_TYPE "proc-type" #define OPT_NO_SHCONF "no-shconf" #define OPT_NO_HPET "no-hpet" +#define OPT_VMWARE_TSC_MAP "vmware-tsc-map" #define OPT_NO_PCI "no-pci" #define OPT_NO_HUGE "no-huge" #define OPT_FILE_PREFIX "file-prefix" +#define OPT_SOCKET_MEM "socket-mem" +#define OPT_SYSLOG "syslog" #define RTE_EAL_BLACKLIST_SIZE 0x100 #define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL) +#define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10) + #define GET_BLACKLIST_FIELD(in, fd, lim, dlm) \ { \ unsigned long val; \ @@ -98,6 +106,8 @@ (in) = end + 1; \ } +/* Allow the application to print its usage message too if set */ +static rte_usage_hook_t rte_application_usage_hook = NULL; /* early configuration structure, when memory config is not mmapped */ static struct rte_mem_config early_mem_config; @@ -125,6 +135,9 @@ struct lcore_config lcore_config[RTE_MAX_LCORE]; /* internal configuration */ struct internal_config internal_config; +/* used by rte_rdtsc() */ +int rte_cycles_vmware_tsc_map; + /* Return a pointer to the configuration structure */ struct rte_config * rte_eal_get_configuration(void) @@ -132,6 +145,38 @@ rte_eal_get_configuration(void) return &rte_config; } +/* parse a sysfs (or other) file containing one integer value */ +int +eal_parse_sysfs_value(const char *filename, unsigned long *val) +{ + FILE *f; + char buf[BUFSIZ]; + char *end = NULL; + + if ((f = fopen(filename, "r")) == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n", + __func__, filename); + return -1; + } + + if (fgets(buf, sizeof(buf), f) == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n", + __func__, filename); + fclose(f); + return -1; + } + *val = strtoul(buf, &end, 0); + if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) { + RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n", + __func__, filename); + fclose(f); + return -1; + } + fclose(f); + return 0; +} + + /* create memory configuration in shared/mmap memory. Take out * a write lock on the memsegs, so we can auto-detect primary/secondary. * This means we never close the file while running (auto-close on exit). @@ -169,14 +214,13 @@ rte_eal_config_create(void) } rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), - PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); + PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); if (rte_mem_cfg_addr == MAP_FAILED){ rte_panic("Cannot mmap memory for rte_config\n"); } + memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; - memcpy(rte_config.mem_config, &early_mem_config, - sizeof(early_mem_config)); } /* attach to an existing shared memory config */ @@ -190,13 +234,13 @@ rte_eal_config_attach(void) return; if (mem_cfg_fd < 0){ - mem_cfg_fd = open(pathname, O_RDONLY); + mem_cfg_fd = open(pathname, O_RDWR); if (mem_cfg_fd < 0) rte_panic("Cannot open '%s' for rte_mem_config\n", pathname); } - rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), PROT_READ, - MAP_SHARED, mem_cfg_fd, 0); + rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), + PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); close(mem_cfg_fd); if (rte_mem_cfg_addr == MAP_FAILED) rte_panic("Cannot mmap memory for rte_config\n"); @@ -240,6 +284,7 @@ rte_config_init(void) break; case RTE_PROC_SECONDARY: rte_eal_config_attach(); + rte_eal_mcfg_wait_complete(rte_config.mem_config); break; case RTE_PROC_AUTO: case RTE_PROC_INVALID: @@ -247,6 +292,25 @@ rte_config_init(void) } } +/* Unlocks hugepage directories that were locked by eal_hugepage_info_init */ +static void +eal_hugedirs_unlock(void) +{ + int i; + + for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) + { + /* skip uninitialized */ + if (internal_config.hugepage_info[i].lock_descriptor == 0) + continue; + /* unlock hugepage file */ + flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN); + close(internal_config.hugepage_info[i].lock_descriptor); + /* reset the field */ + internal_config.hugepage_info[i].lock_descriptor = 0; + } +} + /* display usage */ static void eal_usage(const char *prgname) @@ -254,22 +318,46 @@ eal_usage(const char *prgname) printf("\nUsage: %s -c COREMASK -n NUM [-m NB] [-r NUM] [-b ]" "[--proc-type primary|secondary|auto] \n\n" "EAL options:\n" - " -c COREMASK: A hexadecimal bitmask of cores to run on\n" - " -n NUM : Number of memory channels\n" - " -v : Display version information on startup\n" - " -b : to prevent EAL from using specified PCI device\n" - " (multiple -b options are alowed)\n" - " -m MB : memory to allocate (default = size of hugemem)\n" - " -r NUM : force number of memory ranks (don't detect)\n" - " --"OPT_HUGE_DIR" : directory where hugetlbfs is mounted\n" - " --"OPT_PROC_TYPE": type of this process\n" + " -c COREMASK : A hexadecimal bitmask of cores to run on\n" + " -n NUM : Number of memory channels\n" + " -v : Display version information on startup\n" + " -b : to prevent EAL from using specified " + "PCI device\n" + " (multiple -b options are allowed)\n" + " -m MB : memory to allocate (see also --"OPT_SOCKET_MEM")\n" + " -r NUM : force number of memory ranks (don't detect)\n" + " --"OPT_SYSLOG" : set syslog facility\n" + " --"OPT_SOCKET_MEM" : memory to allocate on specific \n" + " sockets (use comma separated values)\n" + " --"OPT_HUGE_DIR" : directory where hugetlbfs is mounted\n" + " --"OPT_PROC_TYPE" : type of this process\n" " --"OPT_FILE_PREFIX": prefix for hugepage filenames\n" + " --"OPT_VMWARE_TSC_MAP": use VMware TSC map instead of " + "native RDTSC\n" "\nEAL options for DEBUG use only:\n" " --"OPT_NO_HUGE" : use malloc instead of hugetlbfs\n" " --"OPT_NO_PCI" : disable pci\n" " --"OPT_NO_HPET" : disable hpet\n" " --"OPT_NO_SHCONF": no shared config (mmap'd files)\n\n", prgname); + /* Allow the application to print its usage message too if hook is set */ + if ( rte_application_usage_hook ) { + printf("===== Application Usage =====\n\n"); + rte_application_usage_hook(prgname); + } +} + +/* Set a per-application usage message */ +rte_usage_hook_t +rte_set_application_usage_hook( rte_usage_hook_t usage_func ) +{ + rte_usage_hook_t old_func; + + /* Will be NULL on the first call to denote the last usage routine. */ + old_func = rte_application_usage_hook; + rte_application_usage_hook = usage_func; + + return old_func; } /* @@ -307,19 +395,111 @@ eal_parse_coremask(const char *coremask) return 0; } -static inline uint64_t +static int +eal_parse_syslog(const char *facility) +{ + int i; + static struct { + const char *name; + int value; + } map[] = { + { "auth", LOG_AUTH }, + { "cron", LOG_CRON }, + { "daemon", LOG_DAEMON }, + { "ftp", LOG_FTP }, + { "kern", LOG_KERN }, + { "lpr", LOG_LPR }, + { "mail", LOG_MAIL }, + { "news", LOG_NEWS }, + { "syslog", LOG_SYSLOG }, + { "user", LOG_USER }, + { "uucp", LOG_UUCP }, + { "local0", LOG_LOCAL0 }, + { "local1", LOG_LOCAL1 }, + { "local2", LOG_LOCAL2 }, + { "local3", LOG_LOCAL3 }, + { "local4", LOG_LOCAL4 }, + { "local5", LOG_LOCAL5 }, + { "local6", LOG_LOCAL6 }, + { "local7", LOG_LOCAL7 }, + { NULL, 0 } + }; + + for (i = 0; map[i].name; i++) { + if (!strcmp(facility, map[i].name)) { + internal_config.syslog_facility = map[i].value; + return 0; + } + } + return -1; +} + +static int +eal_parse_socket_mem(char *socket_mem) +{ + char * arg[RTE_MAX_NUMA_NODES]; + char *end; + int arg_num, i, len; + uint64_t total_mem = 0; + + len = strnlen(socket_mem, SOCKET_MEM_STRLEN); + if (len == SOCKET_MEM_STRLEN) { + RTE_LOG(ERR, EAL, "--socket-mem is too long\n"); + return -1; + } + + /* all other error cases will be caught later */ + if (!isdigit(socket_mem[len-1])) + return -1; + + /* split the optarg into separate socket values */ + arg_num = rte_strsplit(socket_mem, len, + arg, RTE_MAX_NUMA_NODES, ','); + + /* if split failed, or 0 arguments */ + if (arg_num <= 0) + return -1; + + internal_config.force_sockets = 1; + + /* parse each defined socket option */ + errno = 0; + for (i = 0; i < arg_num; i++) { + end = NULL; + internal_config.socket_mem[i] = strtoull(arg[i], &end, 10); + + /* check for invalid input */ + if ((errno != 0) || + (arg[i][0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + internal_config.socket_mem[i] *= 1024ULL; + internal_config.socket_mem[i] *= 1024ULL; + total_mem += internal_config.socket_mem[i]; + } + + /* check if we have a positive amount of total memory */ + if (total_mem == 0) + return -1; + + return 0; +} + +static inline size_t eal_get_hugepage_mem_size(void) { uint64_t size = 0; - unsigned i; + unsigned i, j; - for (i = 0; i < internal_config.num_hugepage_sizes; i++){ + for (i = 0; i < internal_config.num_hugepage_sizes; i++) { struct hugepage_info *hpi = &internal_config.hugepage_info[i]; - if (hpi->hugedir != NULL) - size += hpi->hugepage_sz * hpi->num_pages; + if (hpi->hugedir != NULL) { + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { + size += hpi->hugepage_sz * hpi->num_pages[j]; + } + } } - return (size); + return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX; } static enum rte_proc_type_t @@ -369,7 +549,7 @@ eal_parse_blacklist_opt(const char *optarg, size_t idx) static int eal_parse_args(int argc, char **argv) { - int opt, ret; + int opt, ret, i; char **argvopt; int option_index; int coremask_ok = 0; @@ -379,10 +559,13 @@ eal_parse_args(int argc, char **argv) {OPT_NO_HUGE, 0, 0, 0}, {OPT_NO_PCI, 0, 0, 0}, {OPT_NO_HPET, 0, 0, 0}, + {OPT_VMWARE_TSC_MAP, 0, 0, 0}, {OPT_HUGE_DIR, 1, 0, 0}, {OPT_NO_SHCONF, 0, 0, 0}, {OPT_PROC_TYPE, 1, 0, 0}, {OPT_FILE_PREFIX, 1, 0, 0}, + {OPT_SOCKET_MEM, 1, 0, 0}, + {OPT_SYSLOG, 1, NULL, 0}, {0, 0, 0, 0} }; @@ -393,11 +576,22 @@ eal_parse_args(int argc, char **argv) internal_config.force_nchannel = 0; internal_config.hugefile_prefix = HUGEFILE_PREFIX_DEFAULT; internal_config.hugepage_dir = NULL; + internal_config.force_sockets = 0; + internal_config.syslog_facility = LOG_DAEMON; #ifdef RTE_LIBEAL_USE_HPET internal_config.no_hpet = 0; #else internal_config.no_hpet = 1; #endif + /* zero out the NUMA config */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + internal_config.socket_mem[i] = 0; + + /* zero out hugedir descriptors */ + for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) + internal_config.hugepage_info[i].lock_descriptor = 0; + + internal_config.vmware_tsc_map = 0; while ((opt = getopt_long(argc, argvopt, "b:c:m:n:r:v", lgopts, &option_index)) != EOF) { @@ -464,6 +658,9 @@ eal_parse_args(int argc, char **argv) else if (!strcmp(lgopts[option_index].name, OPT_NO_HPET)) { internal_config.no_hpet = 1; } + else if (!strcmp(lgopts[option_index].name, OPT_VMWARE_TSC_MAP)) { + internal_config.vmware_tsc_map = 1; + } else if (!strcmp(lgopts[option_index].name, OPT_NO_SHCONF)) { internal_config.no_shconf = 1; } @@ -476,6 +673,22 @@ eal_parse_args(int argc, char **argv) else if (!strcmp(lgopts[option_index].name, OPT_FILE_PREFIX)) { internal_config.hugefile_prefix = optarg; } + else if (!strcmp(lgopts[option_index].name, OPT_SOCKET_MEM)) { + if (eal_parse_socket_mem(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameters for --" + OPT_SOCKET_MEM "\n"); + eal_usage(prgname); + return -1; + } + } + else if (!strcmp(lgopts[option_index].name, OPT_SYSLOG)) { + if (eal_parse_syslog(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameters for --" + OPT_SYSLOG "\n"); + eal_usage(prgname); + return -1; + } + } break; default: @@ -509,6 +722,21 @@ eal_parse_args(int argc, char **argv) eal_usage(prgname); return -1; } + if (internal_config.memory > 0 && internal_config.force_sockets == 1) { + RTE_LOG(ERR, EAL, "Options -m and --socket-mem cannot be specified " + "at the same time\n"); + eal_usage(prgname); + return -1; + } + /* --no-huge doesn't make sense with either -m or --socket-mem */ + if (internal_config.no_hugetlbfs && + (internal_config.memory > 0 || + internal_config.force_sockets == 1)) { + RTE_LOG(ERR, EAL, "Options -m or --socket-mem cannot be specified " + "together with --no-huge!\n"); + eal_usage(prgname); + return -1; + } if (blacklist_index > 0) rte_eal_pci_set_blacklist(eal_dev_blacklist, blacklist_index); @@ -516,17 +744,59 @@ eal_parse_args(int argc, char **argv) if (optind >= 0) argv[optind-1] = prgname; + /* if no memory amounts were requested, this will result in 0 and + * will be overriden later, right after eal_hugepage_info_init() */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + internal_config.memory += internal_config.socket_mem[i]; + ret = optind-1; optind = 0; /* reset getopt lib */ return ret; } +static void +eal_check_mem_on_local_socket(void) +{ + const struct rte_memseg *ms; + int i, socket_id; + + socket_id = rte_lcore_to_socket_id(rte_config.master_lcore); + + ms = rte_eal_get_physmem_layout(); + + for (i = 0; i < RTE_MAX_MEMSEG; i++) + if (ms[i].socket_id == socket_id && + ms[i].len > 0) + return; + + RTE_LOG(WARNING, EAL, "WARNING: Master core has no " + "memory on local socket!\n"); +} + +static int +sync_func(__attribute__((unused)) void *arg) +{ + return 0; +} + +inline static void +rte_eal_mcfg_complete(void) +{ + /* ALL shared mem_config related INIT DONE */ + if (rte_config.process_type == RTE_PROC_PRIMARY) + rte_config.mem_config->magic = RTE_MAGIC; +} + /* Launch threads, called at application init(). */ int rte_eal_init(int argc, char **argv) { int i, fctret, ret; pthread_t thread_id; + static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0); + + if (!rte_atomic32_test_and_set(&run_once)) + return -1; thread_id = pthread_self(); @@ -537,32 +807,49 @@ rte_eal_init(int argc, char **argv) if (fctret < 0) exit(1); - if (eal_hugepage_info_init() < 0) + if (internal_config.no_hugetlbfs == 0 && + internal_config.process_type != RTE_PROC_SECONDARY && + eal_hugepage_info_init() < 0) rte_panic("Cannot get hugepage information\n"); - if (internal_config.memory == 0) { + if (internal_config.memory == 0 && internal_config.force_sockets == 0) { if (internal_config.no_hugetlbfs) internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE; else internal_config.memory = eal_get_hugepage_mem_size(); } + if (internal_config.vmware_tsc_map == 1) { +#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT + rte_cycles_vmware_tsc_map = 1; + RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, " + "you must have monitor_control.pseudo_perfctr = TRUE\n"); +#else + RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because " + "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n"); +#endif + } + rte_srand(rte_rdtsc()); - rte_config_init(); + rte_config_init(); + if (rte_eal_cpu_init() < 0) rte_panic("Cannot detect lcores\n"); if (rte_eal_memory_init() < 0) rte_panic("Cannot init memory\n"); + /* the directories are locked during eal_hugepage_info_init */ + eal_hugedirs_unlock(); + if (rte_eal_memzone_init() < 0) rte_panic("Cannot init memzone\n"); if (rte_eal_tailqs_init() < 0) rte_panic("Cannot init tail queues for objects\n"); - if (rte_eal_log_init() < 0) + if (rte_eal_log_init(argv[0], internal_config.syslog_facility) < 0) rte_panic("Cannot init logs\n"); if (rte_eal_alarm_init() < 0) @@ -571,8 +858,8 @@ rte_eal_init(int argc, char **argv) if (rte_eal_intr_init() < 0) rte_panic("Cannot init interrupt-handling thread\n"); - if (rte_eal_hpet_init() < 0) - rte_panic("Cannot init HPET\n"); + if (rte_eal_timer_init() < 0) + rte_panic("Cannot init HPET or TSC timers\n"); if (rte_eal_pci_init() < 0) rte_panic("Cannot init PCI\n"); @@ -580,6 +867,10 @@ rte_eal_init(int argc, char **argv) RTE_LOG(DEBUG, EAL, "Master core %u is ready (tid=%x)\n", rte_config.master_lcore, (int)thread_id); + eal_check_mem_on_local_socket(); + + rte_eal_mcfg_complete(); + RTE_LCORE_FOREACH_SLAVE(i) { /* @@ -602,6 +893,13 @@ rte_eal_init(int argc, char **argv) eal_thread_init_master(rte_config.master_lcore); + /* + * Launch a dummy function on all slave lcores, so that master lcore + * knows they are all ready when this function returns. + */ + rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); + rte_eal_mp_wait_lcore(); + return fctret; }