4 * Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 * version: DPDK.L.1.2.3-3
50 #include <sys/queue.h>
52 #include <rte_common.h>
53 #include <rte_debug.h>
54 #include <rte_memory.h>
55 #include <rte_memzone.h>
56 #include <rte_launch.h>
57 #include <rte_tailq.h>
59 #include <rte_per_lcore.h>
60 #include <rte_lcore.h>
62 #include <rte_random.h>
63 #include <rte_cycles.h>
64 #include <rte_string_fns.h>
65 #include <rte_cpuflags.h>
66 #include <rte_interrupts.h>
68 #include <rte_common.h>
69 #include <rte_version.h>
71 #include "eal_private.h"
72 #include "eal_thread.h"
73 #include "eal_internal_cfg.h"
74 #include "eal_fs_paths.h"
75 #include "eal_hugepages.h"
77 #define OPT_HUGE_DIR "huge-dir"
78 #define OPT_PROC_TYPE "proc-type"
79 #define OPT_NO_SHCONF "no-shconf"
80 #define OPT_NO_HPET "no-hpet"
81 #define OPT_NO_PCI "no-pci"
82 #define OPT_NO_HUGE "no-huge"
83 #define OPT_FILE_PREFIX "file-prefix"
85 #define RTE_EAL_BLACKLIST_SIZE 0x100
87 #define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
89 #define GET_BLACKLIST_FIELD(in, fd, lim, dlm) \
94 val = strtoul((in), &end, 16); \
95 if (errno != 0 || end[0] != (dlm) || val > (lim)) \
97 (fd) = (typeof (fd))val; \
101 /* early configuration structure, when memory config is not mmapped */
102 static struct rte_mem_config early_mem_config;
104 /* define fd variable here, because file needs to be kept open for the
105 * duration of the program, as we hold a write lock on it in the primary proc */
106 static int mem_cfg_fd = -1;
108 static struct flock wr_lock = {
110 .l_whence = SEEK_SET,
111 .l_start = offsetof(struct rte_mem_config, memseg),
112 .l_len = sizeof(early_mem_config.memseg),
115 /* Address of global and public configuration */
116 static struct rte_config rte_config = {
117 .mem_config = &early_mem_config,
120 static struct rte_pci_addr eal_dev_blacklist[RTE_EAL_BLACKLIST_SIZE];
122 /* internal configuration (per-core) */
123 struct lcore_config lcore_config[RTE_MAX_LCORE];
125 /* internal configuration */
126 struct internal_config internal_config;
128 /* Return a pointer to the configuration structure */
130 rte_eal_get_configuration(void)
135 /* create memory configuration in shared/mmap memory. Take out
136 * a write lock on the memsegs, so we can auto-detect primary/secondary.
137 * This means we never close the file while running (auto-close on exit).
138 * We also don't lock the whole file, so that in future we can use read-locks
139 * on other parts, e.g. memzones, to detect if there are running secondary
142 rte_eal_config_create(void)
144 void *rte_mem_cfg_addr;
147 const char *pathname = eal_runtime_config_path();
149 if (internal_config.no_shconf)
153 mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
155 rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
158 retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
161 rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
164 retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
167 rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
168 "process running?\n", pathname);
171 rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
172 PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
174 if (rte_mem_cfg_addr == MAP_FAILED){
175 rte_panic("Cannot mmap memory for rte_config\n");
177 rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
178 memcpy(rte_config.mem_config, &early_mem_config,
179 sizeof(early_mem_config));
182 /* attach to an existing shared memory config */
184 rte_eal_config_attach(void)
186 void *rte_mem_cfg_addr;
187 const char *pathname = eal_runtime_config_path();
189 if (internal_config.no_shconf)
193 mem_cfg_fd = open(pathname, O_RDONLY);
195 rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
198 rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), PROT_READ,
199 MAP_SHARED, mem_cfg_fd, 0);
201 if (rte_mem_cfg_addr == MAP_FAILED)
202 rte_panic("Cannot mmap memory for rte_config\n");
204 rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
207 /* Detect if we are a primary or a secondary process */
208 static enum rte_proc_type_t
209 eal_proc_type_detect(void)
211 enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
212 const char *pathname = eal_runtime_config_path();
214 /* if we can open the file but not get a write-lock we are a secondary
215 * process. NOTE: if we get a file handle back, we keep that open
216 * and don't close it to prevent a race condition between multiple opens */
217 if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
218 (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
219 ptype = RTE_PROC_SECONDARY;
221 RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
222 ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
227 /* Sets up rte_config structure with the pointer to shared memory config.*/
229 rte_config_init(void)
231 /* set the magic in configuration structure */
232 rte_config.magic = RTE_MAGIC;
233 rte_config.process_type = (internal_config.process_type == RTE_PROC_AUTO) ?
234 eal_proc_type_detect() : /* for auto, detect the type */
235 internal_config.process_type; /* otherwise use what's already set */
237 switch (rte_config.process_type){
238 case RTE_PROC_PRIMARY:
239 rte_eal_config_create();
241 case RTE_PROC_SECONDARY:
242 rte_eal_config_attach();
245 case RTE_PROC_INVALID:
246 rte_panic("Invalid process type\n");
252 eal_usage(const char *prgname)
254 printf("\nUsage: %s -c COREMASK -n NUM [-m NB] [-r NUM] [-b <domain:bus:devid.func>]"
255 "[--proc-type primary|secondary|auto] \n\n"
257 " -c COREMASK: A hexadecimal bitmask of cores to run on\n"
258 " -n NUM : Number of memory channels\n"
259 " -v : Display version information on startup\n"
260 " -b <domain:bus:devid.func>: to prevent EAL from using specified PCI device\n"
261 " (multiple -b options are alowed)\n"
262 " -m MB : memory to allocate (default = size of hugemem)\n"
263 " -r NUM : force number of memory ranks (don't detect)\n"
264 " --"OPT_HUGE_DIR" : directory where hugetlbfs is mounted\n"
265 " --"OPT_PROC_TYPE": type of this process\n"
266 " --"OPT_FILE_PREFIX": prefix for hugepage filenames\n"
267 "\nEAL options for DEBUG use only:\n"
268 " --"OPT_NO_HUGE" : use malloc instead of hugetlbfs\n"
269 " --"OPT_NO_PCI" : disable pci\n"
270 " --"OPT_NO_HPET" : disable hpet\n"
271 " --"OPT_NO_SHCONF": no shared config (mmap'd files)\n\n",
276 * Parse the coremask given as argument (hexadecimal string) and fill
277 * the global configuration (core role and core count) with the parsed
281 eal_parse_coremask(const char *coremask)
283 struct rte_config *cfg = rte_eal_get_configuration();
286 unsigned long long cm;
289 /* parse hexadecimal string */
290 cm = strtoull(coremask, &end, 16);
291 if ((coremask[0] == '\0') || (end == NULL) || (*end != '\0') || (cm == 0))
294 RTE_LOG(DEBUG, EAL, "coremask set to %llx\n", cm);
295 /* set core role and core count */
296 for (i = 0; i < RTE_MAX_LCORE; i++) {
297 if ((1ULL << i) & cm) {
299 cfg->master_lcore = i;
300 cfg->lcore_role[i] = ROLE_RTE;
304 cfg->lcore_role[i] = ROLE_OFF;
310 static inline uint64_t
311 eal_get_hugepage_mem_size(void)
316 for (i = 0; i < internal_config.num_hugepage_sizes; i++){
317 struct hugepage_info *hpi = &internal_config.hugepage_info[i];
318 if (hpi->hugedir != NULL)
319 size += hpi->hugepage_sz * hpi->num_pages;
325 static enum rte_proc_type_t
326 eal_parse_proc_type(const char *arg)
328 if (strncasecmp(arg, "primary", sizeof("primary")) == 0)
329 return RTE_PROC_PRIMARY;
330 if (strncasecmp(arg, "secondary", sizeof("secondary")) == 0)
331 return RTE_PROC_SECONDARY;
332 if (strncasecmp(arg, "auto", sizeof("auto")) == 0)
333 return RTE_PROC_AUTO;
335 return RTE_PROC_INVALID;
339 eal_parse_blacklist(const char *input, struct rte_pci_addr *dev2bl)
341 GET_BLACKLIST_FIELD(input, dev2bl->domain, UINT16_MAX, ':');
342 GET_BLACKLIST_FIELD(input, dev2bl->bus, UINT8_MAX, ':');
343 GET_BLACKLIST_FIELD(input, dev2bl->devid, UINT8_MAX, '.');
344 GET_BLACKLIST_FIELD(input, dev2bl->function, UINT8_MAX, 0);
349 eal_parse_blacklist_opt(const char *optarg, size_t idx)
351 if (idx >= sizeof (eal_dev_blacklist) / sizeof (eal_dev_blacklist[0])) {
353 "%s - too many devices to blacklist...\n",
356 } else if (eal_parse_blacklist(optarg, eal_dev_blacklist + idx) != 0) {
358 "%s - invalid device to blacklist...\n",
368 /* Parse the argument given in the command line of the application */
370 eal_parse_args(int argc, char **argv)
376 ssize_t blacklist_index = 0;;
377 char *prgname = argv[0];
378 static struct option lgopts[] = {
379 {OPT_NO_HUGE, 0, 0, 0},
380 {OPT_NO_PCI, 0, 0, 0},
381 {OPT_NO_HPET, 0, 0, 0},
382 {OPT_HUGE_DIR, 1, 0, 0},
383 {OPT_NO_SHCONF, 0, 0, 0},
384 {OPT_PROC_TYPE, 1, 0, 0},
385 {OPT_FILE_PREFIX, 1, 0, 0},
391 internal_config.memory = 0;
392 internal_config.force_nrank = 0;
393 internal_config.force_nchannel = 0;
394 internal_config.hugefile_prefix = HUGEFILE_PREFIX_DEFAULT;
395 internal_config.hugepage_dir = NULL;
396 #ifdef RTE_LIBEAL_USE_HPET
397 internal_config.no_hpet = 0;
399 internal_config.no_hpet = 1;
402 while ((opt = getopt_long(argc, argvopt, "b:c:m:n:r:v",
403 lgopts, &option_index)) != EOF) {
408 if ((blacklist_index = eal_parse_blacklist_opt(optarg,
409 blacklist_index)) < 0) {
416 if (eal_parse_coremask(optarg) < 0) {
417 RTE_LOG(ERR, EAL, "invalid coremask\n");
425 internal_config.memory = atoi(optarg);
426 internal_config.memory *= 1024ULL;
427 internal_config.memory *= 1024ULL;
429 /* force number of channels */
431 internal_config.force_nchannel = atoi(optarg);
432 if (internal_config.force_nchannel == 0 ||
433 internal_config.force_nchannel > 4) {
434 RTE_LOG(ERR, EAL, "invalid channel number\n");
439 /* force number of ranks */
441 internal_config.force_nrank = atoi(optarg);
442 if (internal_config.force_nrank == 0 ||
443 internal_config.force_nrank > 16) {
444 RTE_LOG(ERR, EAL, "invalid rank number\n");
450 /* since message is explicitly requested by user, we
451 * write message at highest log level so it can always be seen
452 * even if info or warning messages are disabled */
453 RTE_LOG(CRIT, EAL, "RTE Version: '%s'\n", rte_version());
458 if (!strcmp(lgopts[option_index].name, OPT_NO_HUGE)) {
459 internal_config.no_hugetlbfs = 1;
461 else if (!strcmp(lgopts[option_index].name, OPT_NO_PCI)) {
462 internal_config.no_pci = 1;
464 else if (!strcmp(lgopts[option_index].name, OPT_NO_HPET)) {
465 internal_config.no_hpet = 1;
467 else if (!strcmp(lgopts[option_index].name, OPT_NO_SHCONF)) {
468 internal_config.no_shconf = 1;
470 else if (!strcmp(lgopts[option_index].name, OPT_HUGE_DIR)) {
471 internal_config.hugepage_dir = optarg;
473 else if (!strcmp(lgopts[option_index].name, OPT_PROC_TYPE)) {
474 internal_config.process_type = eal_parse_proc_type(optarg);
476 else if (!strcmp(lgopts[option_index].name, OPT_FILE_PREFIX)) {
477 internal_config.hugefile_prefix = optarg;
489 RTE_LOG(ERR, EAL, "coremask not specified\n");
493 if (internal_config.process_type == RTE_PROC_AUTO){
494 internal_config.process_type = eal_proc_type_detect();
496 if (internal_config.process_type == RTE_PROC_INVALID){
497 RTE_LOG(ERR, EAL, "Invalid process type specified\n");
501 if (internal_config.process_type == RTE_PROC_PRIMARY &&
502 internal_config.force_nchannel == 0) {
503 RTE_LOG(ERR, EAL, "Number of memory channels (-n) not specified\n");
507 if (index(internal_config.hugefile_prefix,'%') != NULL){
508 RTE_LOG(ERR, EAL, "Invalid char, '%%', in '"OPT_FILE_PREFIX"' option\n");
513 if (blacklist_index > 0)
514 rte_eal_pci_set_blacklist(eal_dev_blacklist, blacklist_index);
517 argv[optind-1] = prgname;
520 optind = 0; /* reset getopt lib */
524 /* Launch threads, called at application init(). */
526 rte_eal_init(int argc, char **argv)
531 thread_id = pthread_self();
533 if (rte_eal_log_early_init() < 0)
534 rte_panic("Cannot init early logs\n");
536 fctret = eal_parse_args(argc, argv);
540 if (eal_hugepage_info_init() < 0)
541 rte_panic("Cannot get hugepage information\n");
543 if (internal_config.memory == 0) {
544 if (internal_config.no_hugetlbfs)
545 internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
547 internal_config.memory = eal_get_hugepage_mem_size();
550 rte_srand(rte_rdtsc());
553 if (rte_eal_cpu_init() < 0)
554 rte_panic("Cannot detect lcores\n");
556 if (rte_eal_memory_init() < 0)
557 rte_panic("Cannot init memory\n");
559 if (rte_eal_memzone_init() < 0)
560 rte_panic("Cannot init memzone\n");
562 if (rte_eal_tailqs_init() < 0)
563 rte_panic("Cannot init tail queues for objects\n");
565 if (rte_eal_log_init() < 0)
566 rte_panic("Cannot init logs\n");
568 if (rte_eal_alarm_init() < 0)
569 rte_panic("Cannot init interrupt-handling thread\n");
571 if (rte_eal_intr_init() < 0)
572 rte_panic("Cannot init interrupt-handling thread\n");
574 if (rte_eal_hpet_init() < 0)
575 rte_panic("Cannot init HPET\n");
577 if (rte_eal_pci_init() < 0)
578 rte_panic("Cannot init PCI\n");
580 RTE_LOG(DEBUG, EAL, "Master core %u is ready (tid=%x)\n",
581 rte_config.master_lcore, (int)thread_id);
583 RTE_LCORE_FOREACH_SLAVE(i) {
586 * create communication pipes between master thread
589 if (pipe(lcore_config[i].pipe_master2slave) < 0)
590 rte_panic("Cannot create pipe\n");
591 if (pipe(lcore_config[i].pipe_slave2master) < 0)
592 rte_panic("Cannot create pipe\n");
594 lcore_config[i].state = WAIT;
596 /* create a thread for each lcore */
597 ret = pthread_create(&lcore_config[i].thread_id, NULL,
598 eal_thread_loop, NULL);
600 rte_panic("Cannot create thread\n");
603 eal_thread_init_master(rte_config.master_lcore);
609 enum rte_lcore_role_t
610 rte_eal_lcore_role(unsigned lcore_id)
612 return (rte_config.lcore_role[lcore_id]);
616 rte_eal_process_type(void)
618 return (rte_config.process_type);