eal: allow to blacklist address without domain prefix
[dpdk.git] / lib / librte_eal / linuxapp / eal / eal.c
1 /*-
2  *   BSD LICENSE
3  * 
4  *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  * 
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  * 
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  * 
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <stdint.h>
37 #include <string.h>
38 #include <stdarg.h>
39 #include <unistd.h>
40 #include <pthread.h>
41 #include <syslog.h>
42 #include <getopt.h>
43 #include <sys/file.h>
44 #include <stddef.h>
45 #include <errno.h>
46 #include <limits.h>
47 #include <errno.h>
48 #include <sys/mman.h>
49 #include <sys/queue.h>
50 #include <sys/user.h>
51 #include <linux/binfmts.h>
52
53 #include <rte_common.h>
54 #include <rte_debug.h>
55 #include <rte_memory.h>
56 #include <rte_memzone.h>
57 #include <rte_launch.h>
58 #include <rte_tailq.h>
59 #include <rte_eal.h>
60 #include <rte_eal_memconfig.h>
61 #include <rte_per_lcore.h>
62 #include <rte_lcore.h>
63 #include <rte_log.h>
64 #include <rte_random.h>
65 #include <rte_cycles.h>
66 #include <rte_string_fns.h>
67 #include <rte_cpuflags.h>
68 #include <rte_interrupts.h>
69 #include <rte_pci.h>
70 #include <rte_common.h>
71 #include <rte_version.h>
72 #include <rte_atomic.h>
73 #include <malloc_heap.h>
74
75 #include "eal_private.h"
76 #include "eal_thread.h"
77 #include "eal_internal_cfg.h"
78 #include "eal_filesystem.h"
79 #include "eal_hugepages.h"
80
81 #define OPT_HUGE_DIR    "huge-dir"
82 #define OPT_PROC_TYPE   "proc-type"
83 #define OPT_NO_SHCONF   "no-shconf"
84 #define OPT_NO_HPET     "no-hpet"
85 #define OPT_VMWARE_TSC_MAP   "vmware-tsc-map"
86 #define OPT_NO_PCI      "no-pci"
87 #define OPT_NO_HUGE     "no-huge"
88 #define OPT_FILE_PREFIX "file-prefix"
89 #define OPT_SOCKET_MEM  "socket-mem"
90 #define OPT_SYSLOG      "syslog"
91
92 #define RTE_EAL_BLACKLIST_SIZE  0x100
93
94 #define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
95
96 #define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10)
97
98 #define BITS_PER_HEX 4
99
100 #define GET_BLACKLIST_FIELD(in, fd, lim, dlm)                   \
101 {                                                               \
102         unsigned long val;                                      \
103         char *end;                                              \
104         errno = 0;                                              \
105         val = strtoul((in), &end, 16);                          \
106         if (errno != 0 || end[0] != (dlm) || val > (lim))       \
107                 return (-EINVAL);                               \
108         (fd) = (typeof (fd))val;                                \
109         (in) = end + 1;                                         \
110 }
111
112 /* Allow the application to print its usage message too if set */
113 static rte_usage_hook_t rte_application_usage_hook = NULL;
114 /* early configuration structure, when memory config is not mmapped */
115 static struct rte_mem_config early_mem_config;
116
117 /* define fd variable here, because file needs to be kept open for the
118  * duration of the program, as we hold a write lock on it in the primary proc */
119 static int mem_cfg_fd = -1;
120
121 static struct flock wr_lock = {
122                 .l_type = F_WRLCK,
123                 .l_whence = SEEK_SET,
124                 .l_start = offsetof(struct rte_mem_config, memseg),
125                 .l_len = sizeof(early_mem_config.memseg),
126 };
127
128 /* Address of global and public configuration */
129 static struct rte_config rte_config = {
130                 .mem_config = &early_mem_config,
131 };
132
133 static struct rte_pci_addr eal_dev_blacklist[RTE_EAL_BLACKLIST_SIZE];
134
135 /* internal configuration (per-core) */
136 struct lcore_config lcore_config[RTE_MAX_LCORE];
137
138 /* internal configuration */
139 struct internal_config internal_config;
140
141 /* used by rte_rdtsc() */
142 int rte_cycles_vmware_tsc_map;
143
144 /* Return a pointer to the configuration structure */
145 struct rte_config *
146 rte_eal_get_configuration(void)
147 {
148         return &rte_config;
149 }
150
151 /* parse a sysfs (or other) file containing one integer value */
152 int
153 eal_parse_sysfs_value(const char *filename, unsigned long *val)
154 {
155         FILE *f;
156         char buf[BUFSIZ];
157         char *end = NULL;
158
159         if ((f = fopen(filename, "r")) == NULL) {
160                 RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
161                         __func__, filename);
162                 return -1;
163         }
164
165         if (fgets(buf, sizeof(buf), f) == NULL) {
166                 RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
167                         __func__, filename);
168                 fclose(f);
169                 return -1;
170         }
171         *val = strtoul(buf, &end, 0);
172         if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
173                 RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
174                                 __func__, filename);
175                 fclose(f);
176                 return -1;
177         }
178         fclose(f);
179         return 0;
180 }
181
182
183 /* create memory configuration in shared/mmap memory. Take out
184  * a write lock on the memsegs, so we can auto-detect primary/secondary.
185  * This means we never close the file while running (auto-close on exit).
186  * We also don't lock the whole file, so that in future we can use read-locks
187  * on other parts, e.g. memzones, to detect if there are running secondary
188  * processes. */
189 static void
190 rte_eal_config_create(void)
191 {
192         void *rte_mem_cfg_addr;
193         int retval;
194
195         const char *pathname = eal_runtime_config_path();
196
197         if (internal_config.no_shconf)
198                 return;
199
200         if (mem_cfg_fd < 0){
201                 mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
202                 if (mem_cfg_fd < 0)
203                         rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
204         }
205
206         retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
207         if (retval < 0){
208                 close(mem_cfg_fd);
209                 rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
210         }
211
212         retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
213         if (retval < 0){
214                 close(mem_cfg_fd);
215                 rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
216                                 "process running?\n", pathname);
217         }
218
219         rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
220                                 PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
221
222         if (rte_mem_cfg_addr == MAP_FAILED){
223                 rte_panic("Cannot mmap memory for rte_config\n");
224         }
225         memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
226         rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
227 }
228
229 /* attach to an existing shared memory config */
230 static void
231 rte_eal_config_attach(void)
232 {
233         void *rte_mem_cfg_addr;
234         const char *pathname = eal_runtime_config_path();
235
236         if (internal_config.no_shconf)
237                 return;
238
239         if (mem_cfg_fd < 0){
240                 mem_cfg_fd = open(pathname, O_RDWR);
241                 if (mem_cfg_fd < 0)
242                         rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
243         }
244
245         rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), 
246                                 PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
247         close(mem_cfg_fd);
248         if (rte_mem_cfg_addr == MAP_FAILED)
249                 rte_panic("Cannot mmap memory for rte_config\n");
250
251         rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
252 }
253
254 /* Detect if we are a primary or a secondary process */
255 static enum rte_proc_type_t
256 eal_proc_type_detect(void)
257 {
258         enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
259         const char *pathname = eal_runtime_config_path();
260
261         /* if we can open the file but not get a write-lock we are a secondary
262          * process. NOTE: if we get a file handle back, we keep that open
263          * and don't close it to prevent a race condition between multiple opens */
264         if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
265                         (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
266                 ptype = RTE_PROC_SECONDARY;
267
268         RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
269                         ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
270
271         return ptype;
272 }
273
274 /* Sets up rte_config structure with the pointer to shared memory config.*/
275 static void
276 rte_config_init(void)
277 {
278         /* set the magic in configuration structure */
279         rte_config.magic = RTE_MAGIC;
280         rte_config.process_type = (internal_config.process_type == RTE_PROC_AUTO) ?
281                         eal_proc_type_detect() : /* for auto, detect the type */
282                         internal_config.process_type; /* otherwise use what's already set */
283
284         switch (rte_config.process_type){
285         case RTE_PROC_PRIMARY:
286                 rte_eal_config_create();
287                 break;
288         case RTE_PROC_SECONDARY:
289                 rte_eal_config_attach();
290                 rte_eal_mcfg_wait_complete(rte_config.mem_config);
291                 break;
292         case RTE_PROC_AUTO:
293         case RTE_PROC_INVALID:
294                 rte_panic("Invalid process type\n");
295         }
296 }
297
298 /* Unlocks hugepage directories that were locked by eal_hugepage_info_init */
299 static void
300 eal_hugedirs_unlock(void)
301 {
302         int i;
303
304         for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
305         {
306                 /* skip uninitialized */
307                 if (internal_config.hugepage_info[i].lock_descriptor == 0)
308                         continue;
309                 /* unlock hugepage file */
310                 flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN);
311                 close(internal_config.hugepage_info[i].lock_descriptor);
312                 /* reset the field */
313                 internal_config.hugepage_info[i].lock_descriptor = 0;
314         }
315 }
316
317 /* display usage */
318 static void
319 eal_usage(const char *prgname)
320 {
321         printf("\nUsage: %s -c COREMASK -n NUM [-m NB] [-r NUM] [-b <domain:bus:devid.func>]"
322                "[--proc-type primary|secondary|auto] \n\n"
323                "EAL options:\n"
324                "  -c COREMASK  : A hexadecimal bitmask of cores to run on\n"
325                "  -n NUM       : Number of memory channels\n"
326                    "  -v           : Display version information on startup\n"
327                "  -b <domain:bus:devid.func>: to prevent EAL from using specified "
328            "PCI device\n"
329                "                 (multiple -b options are allowed)\n"
330                "  -m MB        : memory to allocate (see also --"OPT_SOCKET_MEM")\n"
331                "  -r NUM       : force number of memory ranks (don't detect)\n"
332                "  --"OPT_SYSLOG"     : set syslog facility\n"
333                "  --"OPT_SOCKET_MEM" : memory to allocate on specific \n"
334                    "                 sockets (use comma separated values)\n"
335                "  --"OPT_HUGE_DIR"   : directory where hugetlbfs is mounted\n"
336                "  --"OPT_PROC_TYPE"  : type of this process\n"
337                "  --"OPT_FILE_PREFIX": prefix for hugepage filenames\n"
338                "  --"OPT_VMWARE_TSC_MAP": use VMware TSC map instead of "
339                            "native RDTSC\n"
340                "\nEAL options for DEBUG use only:\n"
341                "  --"OPT_NO_HUGE"  : use malloc instead of hugetlbfs\n"
342                "  --"OPT_NO_PCI"   : disable pci\n"
343                "  --"OPT_NO_HPET"  : disable hpet\n"
344                "  --"OPT_NO_SHCONF": no shared config (mmap'd files)\n"
345                "\n",
346                prgname);
347         /* Allow the application to print its usage message too if hook is set */
348         if ( rte_application_usage_hook ) {
349                 printf("===== Application Usage =====\n\n");
350                 rte_application_usage_hook(prgname);
351         }
352 }
353
354 /* Set a per-application usage message */
355 rte_usage_hook_t
356 rte_set_application_usage_hook( rte_usage_hook_t usage_func )
357 {
358         rte_usage_hook_t        old_func;
359
360         /* Will be NULL on the first call to denote the last usage routine. */
361         old_func                                        = rte_application_usage_hook;
362         rte_application_usage_hook      = usage_func;
363
364         return old_func;
365 }
366
367 /*
368  * Parse the coremask given as argument (hexadecimal string) and fill
369  * the global configuration (core role and core count) with the parsed
370  * value.
371  */
372 static int xdigit2val(unsigned char c)
373 {
374         int val;
375         if(isdigit(c)) 
376                 val = c - '0';
377         else if(isupper(c))
378                 val = c - 'A' + 10;
379         else 
380                 val = c - 'a' + 10;
381         return val;
382 }
383 static int
384 eal_parse_coremask(const char *coremask)
385 {
386         struct rte_config *cfg = rte_eal_get_configuration();
387         int i, j, idx = 0 ;
388         unsigned count = 0;
389         char c;
390         int val;
391
392         if (coremask == NULL)
393                 return -1;
394         /* Remove all blank characters ahead and after .
395          * Remove 0x/0X if exists.
396          */
397         while (isblank(*coremask))
398                 coremask++;
399         if (coremask[0] == '0' && ((coremask[1] == 'x')
400                 ||  (coremask[1] == 'X')) )
401                 coremask += 2;
402         i = strnlen(coremask, MAX_ARG_STRLEN);
403         while ((i > 0) && isblank(coremask[i - 1]))
404                 i--;
405         if (i == 0)
406                 return -1;
407
408         for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
409                 c = coremask[i];
410                 if (isxdigit(c) == 0) {
411                         /* invalid characters */
412                         return (-1);
413                 }
414                 val = xdigit2val(c);
415                 for(j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; j++, idx++) {
416                         if((1 << j) & val) {
417                                 cfg->lcore_role[idx] = ROLE_RTE;
418                                 if(count == 0)
419                                         cfg->master_lcore = idx;
420                                 count++;
421                         } else  {
422                                 cfg->lcore_role[idx] = ROLE_OFF;
423                         }
424                 }
425         }
426         for(; i >= 0; i--)
427                 if(coremask[i] != '0')
428                         return -1;
429         for(; idx < RTE_MAX_LCORE; idx++)
430                 cfg->lcore_role[idx] = ROLE_OFF;
431         if(count == 0)
432                 return -1;
433         return 0;
434 }
435
436 static int
437 eal_parse_syslog(const char *facility)
438 {
439         int i;
440         static struct {
441                 const char *name;
442                 int value;
443         } map[] = {
444                 { "auth", LOG_AUTH },
445                 { "cron", LOG_CRON },
446                 { "daemon", LOG_DAEMON },
447                 { "ftp", LOG_FTP },
448                 { "kern", LOG_KERN },
449                 { "lpr", LOG_LPR },
450                 { "mail", LOG_MAIL },
451                 { "news", LOG_NEWS },
452                 { "syslog", LOG_SYSLOG },
453                 { "user", LOG_USER },
454                 { "uucp", LOG_UUCP },
455                 { "local0", LOG_LOCAL0 },
456                 { "local1", LOG_LOCAL1 },
457                 { "local2", LOG_LOCAL2 },
458                 { "local3", LOG_LOCAL3 },
459                 { "local4", LOG_LOCAL4 },
460                 { "local5", LOG_LOCAL5 },
461                 { "local6", LOG_LOCAL6 },
462                 { "local7", LOG_LOCAL7 },
463                 { NULL, 0 }
464         };
465
466         for (i = 0; map[i].name; i++) {
467                 if (!strcmp(facility, map[i].name)) {
468                         internal_config.syslog_facility = map[i].value;
469                         return 0;
470                 }
471         }
472         return -1;
473 }
474
475 static int
476 eal_parse_socket_mem(char *socket_mem)
477 {
478         char * arg[RTE_MAX_NUMA_NODES];
479         char *end;
480         int arg_num, i, len;
481         uint64_t total_mem = 0;
482
483         len = strnlen(socket_mem, SOCKET_MEM_STRLEN);
484         if (len == SOCKET_MEM_STRLEN) {
485                 RTE_LOG(ERR, EAL, "--socket-mem is too long\n");
486                 return -1;
487         }
488
489         /* all other error cases will be caught later */
490         if (!isdigit(socket_mem[len-1]))
491                 return -1;
492
493         /* split the optarg into separate socket values */
494         arg_num = rte_strsplit(socket_mem, len,
495                         arg, RTE_MAX_NUMA_NODES, ',');
496
497         /* if split failed, or 0 arguments */
498         if (arg_num <= 0)
499                 return -1;
500
501         internal_config.force_sockets = 1;
502
503         /* parse each defined socket option */
504         errno = 0;
505         for (i = 0; i < arg_num; i++) {
506                 end = NULL;
507                 internal_config.socket_mem[i] = strtoull(arg[i], &end, 10);
508
509                 /* check for invalid input */
510                 if ((errno != 0)  ||
511                                 (arg[i][0] == '\0') || (end == NULL) || (*end != '\0'))
512                         return -1;
513                 internal_config.socket_mem[i] *= 1024ULL;
514                 internal_config.socket_mem[i] *= 1024ULL;
515                 total_mem += internal_config.socket_mem[i];
516         }
517
518         /* check if we have a positive amount of total memory */
519         if (total_mem == 0)
520                 return -1;
521
522         return 0;
523 }
524
525 static inline size_t
526 eal_get_hugepage_mem_size(void)
527 {
528         uint64_t size = 0;
529         unsigned i, j;
530
531         for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
532                 struct hugepage_info *hpi = &internal_config.hugepage_info[i];
533                 if (hpi->hugedir != NULL) {
534                         for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
535                                 size += hpi->hugepage_sz * hpi->num_pages[j];
536                         }
537                 }
538         }
539
540         return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
541 }
542
543 static enum rte_proc_type_t
544 eal_parse_proc_type(const char *arg)
545 {
546         if (strncasecmp(arg, "primary", sizeof("primary")) == 0)
547                 return RTE_PROC_PRIMARY;
548         if (strncasecmp(arg, "secondary", sizeof("secondary")) == 0)
549                 return RTE_PROC_SECONDARY;
550         if (strncasecmp(arg, "auto", sizeof("auto")) == 0)
551                 return RTE_PROC_AUTO;
552
553         return RTE_PROC_INVALID;
554 }
555
556 static ssize_t
557 eal_parse_blacklist_opt(const char *optarg, size_t idx)
558 {
559         if (idx >= sizeof (eal_dev_blacklist) / sizeof (eal_dev_blacklist[0])) {
560                 RTE_LOG(ERR, EAL, "%s - too many devices to blacklist...\n", optarg);
561                 return (-EINVAL);
562         } else if (eal_parse_pci_DomBDF(optarg, eal_dev_blacklist + idx) < 0 &&
563                         eal_parse_pci_BDF(optarg, eal_dev_blacklist + idx) < 0) {
564                 RTE_LOG(ERR, EAL, "%s - invalid device to blacklist...\n", optarg);
565                 return (-EINVAL);
566         }
567
568         idx += 1;
569         return (idx);
570 }
571
572 /* Parse the argument given in the command line of the application */
573 static int
574 eal_parse_args(int argc, char **argv)
575 {
576         int opt, ret, i;
577         char **argvopt;
578         int option_index;
579         int coremask_ok = 0;
580         ssize_t blacklist_index = 0;
581         char *prgname = argv[0];
582         static struct option lgopts[] = {
583                 {OPT_NO_HUGE, 0, 0, 0},
584                 {OPT_NO_PCI, 0, 0, 0},
585                 {OPT_NO_HPET, 0, 0, 0},
586                 {OPT_VMWARE_TSC_MAP, 0, 0, 0},
587                 {OPT_HUGE_DIR, 1, 0, 0},
588                 {OPT_NO_SHCONF, 0, 0, 0},
589                 {OPT_PROC_TYPE, 1, 0, 0},
590                 {OPT_FILE_PREFIX, 1, 0, 0},
591                 {OPT_SOCKET_MEM, 1, 0, 0},
592                 {OPT_SYSLOG, 1, NULL, 0},
593                 {0, 0, 0, 0}
594         };
595
596         argvopt = argv;
597
598         internal_config.memory = 0;
599         internal_config.force_nrank = 0;
600         internal_config.force_nchannel = 0;
601         internal_config.hugefile_prefix = HUGEFILE_PREFIX_DEFAULT;
602         internal_config.hugepage_dir = NULL;
603         internal_config.force_sockets = 0;
604         internal_config.syslog_facility = LOG_DAEMON;
605 #ifdef RTE_LIBEAL_USE_HPET
606         internal_config.no_hpet = 0;
607 #else
608         internal_config.no_hpet = 1;
609 #endif
610         /* zero out the NUMA config */
611         for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
612                 internal_config.socket_mem[i] = 0;
613
614         /* zero out hugedir descriptors */
615         for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
616                 internal_config.hugepage_info[i].lock_descriptor = 0;
617
618         internal_config.vmware_tsc_map = 0;
619
620         while ((opt = getopt_long(argc, argvopt, "b:c:m:n:r:v",
621                                   lgopts, &option_index)) != EOF) {
622
623                 switch (opt) {
624                 /* blacklist */
625                 case 'b':
626                         if ((blacklist_index = eal_parse_blacklist_opt(optarg,
627                             blacklist_index)) < 0) {
628                                 eal_usage(prgname);
629                                 return (-1);
630                         }
631                         break;
632                 /* coremask */
633                 case 'c':
634                         if (eal_parse_coremask(optarg) < 0) {
635                                 RTE_LOG(ERR, EAL, "invalid coremask\n");
636                                 eal_usage(prgname);
637                                 return -1;
638                         }
639                         coremask_ok = 1;
640                         break;
641                 /* size of memory */
642                 case 'm':
643                         internal_config.memory = atoi(optarg);
644                         internal_config.memory *= 1024ULL;
645                         internal_config.memory *= 1024ULL;
646                         break;
647                 /* force number of channels */
648                 case 'n':
649                         internal_config.force_nchannel = atoi(optarg);
650                         if (internal_config.force_nchannel == 0 ||
651                             internal_config.force_nchannel > 4) {
652                                 RTE_LOG(ERR, EAL, "invalid channel number\n");
653                                 eal_usage(prgname);
654                                 return -1;
655                         }
656                         break;
657                 /* force number of ranks */
658                 case 'r':
659                         internal_config.force_nrank = atoi(optarg);
660                         if (internal_config.force_nrank == 0 ||
661                             internal_config.force_nrank > 16) {
662                                 RTE_LOG(ERR, EAL, "invalid rank number\n");
663                                 eal_usage(prgname);
664                                 return -1;
665                         }
666                         break;
667                 case 'v':
668                         /* since message is explicitly requested by user, we
669                          * write message at highest log level so it can always be seen
670                          * even if info or warning messages are disabled */
671                         RTE_LOG(CRIT, EAL, "RTE Version: '%s'\n", rte_version());
672                         break;
673
674                 /* long options */
675                 case 0:
676                         if (!strcmp(lgopts[option_index].name, OPT_NO_HUGE)) {
677                                 internal_config.no_hugetlbfs = 1;
678                         }
679                         else if (!strcmp(lgopts[option_index].name, OPT_NO_PCI)) {
680                                 internal_config.no_pci = 1;
681                         }
682                         else if (!strcmp(lgopts[option_index].name, OPT_NO_HPET)) {
683                                 internal_config.no_hpet = 1;
684                         }
685                         else if (!strcmp(lgopts[option_index].name, OPT_VMWARE_TSC_MAP)) {
686                                 internal_config.vmware_tsc_map = 1;
687                         }
688                         else if (!strcmp(lgopts[option_index].name, OPT_NO_SHCONF)) {
689                                 internal_config.no_shconf = 1;
690                         }
691                         else if (!strcmp(lgopts[option_index].name, OPT_HUGE_DIR)) {
692                                 internal_config.hugepage_dir = optarg;
693                         }
694                         else if (!strcmp(lgopts[option_index].name, OPT_PROC_TYPE)) {
695                                 internal_config.process_type = eal_parse_proc_type(optarg);
696                         }
697                         else if (!strcmp(lgopts[option_index].name, OPT_FILE_PREFIX)) {
698                                 internal_config.hugefile_prefix = optarg;
699                         }
700                         else if (!strcmp(lgopts[option_index].name, OPT_SOCKET_MEM)) {
701                                 if (eal_parse_socket_mem(optarg) < 0) {
702                                         RTE_LOG(ERR, EAL, "invalid parameters for --"
703                                                         OPT_SOCKET_MEM "\n");
704                                         eal_usage(prgname);
705                                         return -1;
706                                 }
707                         }
708                         else if (!strcmp(lgopts[option_index].name, OPT_SYSLOG)) {
709                                 if (eal_parse_syslog(optarg) < 0) {
710                                         RTE_LOG(ERR, EAL, "invalid parameters for --"
711                                                         OPT_SYSLOG "\n");
712                                         eal_usage(prgname);
713                                         return -1;
714                                 }
715                         }
716                         break;
717
718                 default:
719                         eal_usage(prgname);
720                         return -1;
721                 }
722         }
723
724         /* sanity checks */
725         if (!coremask_ok) {
726                 RTE_LOG(ERR, EAL, "coremask not specified\n");
727                 eal_usage(prgname);
728                 return -1;
729         }
730         if (internal_config.process_type == RTE_PROC_AUTO){
731                 internal_config.process_type = eal_proc_type_detect();
732         }
733         if (internal_config.process_type == RTE_PROC_INVALID){
734                 RTE_LOG(ERR, EAL, "Invalid process type specified\n");
735                 eal_usage(prgname);
736                 return -1;
737         }
738         if (internal_config.process_type == RTE_PROC_PRIMARY &&
739                         internal_config.force_nchannel == 0) {
740                 RTE_LOG(ERR, EAL, "Number of memory channels (-n) not specified\n");
741                 eal_usage(prgname);
742                 return -1;
743         }
744         if (index(internal_config.hugefile_prefix,'%') != NULL){
745                 RTE_LOG(ERR, EAL, "Invalid char, '%%', in '"OPT_FILE_PREFIX"' option\n");
746                 eal_usage(prgname);
747                 return -1;
748         }
749         if (internal_config.memory > 0 && internal_config.force_sockets == 1) {
750                 RTE_LOG(ERR, EAL, "Options -m and --socket-mem cannot be specified "
751                                 "at the same time\n");
752                 eal_usage(prgname);
753                 return -1;
754         }
755         /* --no-huge doesn't make sense with either -m or --socket-mem */
756         if (internal_config.no_hugetlbfs &&
757                         (internal_config.memory > 0 ||
758                                         internal_config.force_sockets == 1)) {
759                 RTE_LOG(ERR, EAL, "Options -m or --socket-mem cannot be specified "
760                                 "together with --no-huge!\n");
761                 eal_usage(prgname);
762                 return -1;
763         }
764
765         if (blacklist_index > 0)
766                 rte_eal_pci_set_blacklist(eal_dev_blacklist, blacklist_index);
767
768         if (optind >= 0)
769                 argv[optind-1] = prgname;
770
771         /* if no memory amounts were requested, this will result in 0 and
772          * will be overriden later, right after eal_hugepage_info_init() */
773         for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
774                 internal_config.memory += internal_config.socket_mem[i];
775
776         ret = optind-1;
777         optind = 0; /* reset getopt lib */
778         return ret;
779 }
780
781 static void
782 eal_check_mem_on_local_socket(void)
783 {
784         const struct rte_memseg *ms;
785         int i, socket_id;
786
787         socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
788
789         ms = rte_eal_get_physmem_layout();
790
791         for (i = 0; i < RTE_MAX_MEMSEG; i++)
792                 if (ms[i].socket_id == socket_id &&
793                                 ms[i].len > 0)
794                         return;
795
796         RTE_LOG(WARNING, EAL, "WARNING: Master core has no "
797                         "memory on local socket!\n");
798 }
799
800 static int
801 sync_func(__attribute__((unused)) void *arg)
802 {
803         return 0;
804 }
805
806 inline static void 
807 rte_eal_mcfg_complete(void)
808 {
809         /* ALL shared mem_config related INIT DONE */
810         if (rte_config.process_type == RTE_PROC_PRIMARY)
811                 rte_config.mem_config->magic = RTE_MAGIC;
812 }
813
814 /* Launch threads, called at application init(). */
815 int
816 rte_eal_init(int argc, char **argv)
817 {
818         int i, fctret, ret;
819         pthread_t thread_id;
820         static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
821
822         if (!rte_atomic32_test_and_set(&run_once))
823                 return -1;
824
825         thread_id = pthread_self();
826
827         if (rte_eal_log_early_init() < 0)
828                 rte_panic("Cannot init early logs\n");
829
830         fctret = eal_parse_args(argc, argv);
831         if (fctret < 0)
832                 exit(1);
833
834         if (internal_config.no_hugetlbfs == 0 &&
835                         internal_config.process_type != RTE_PROC_SECONDARY &&
836                         eal_hugepage_info_init() < 0)
837                 rte_panic("Cannot get hugepage information\n");
838
839         if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
840                 if (internal_config.no_hugetlbfs)
841                         internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
842                 else
843                         internal_config.memory = eal_get_hugepage_mem_size();
844         }
845
846         if (internal_config.vmware_tsc_map == 1) {
847 #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
848                 rte_cycles_vmware_tsc_map = 1;
849                 RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, "
850                                 "you must have monitor_control.pseudo_perfctr = TRUE\n");
851 #else
852                 RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because "
853                                 "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n");
854 #endif
855         }
856
857         rte_srand(rte_rdtsc());
858
859         rte_config_init();
860         
861         if (rte_eal_cpu_init() < 0)
862                 rte_panic("Cannot detect lcores\n");
863
864         if (rte_eal_memory_init() < 0)
865                 rte_panic("Cannot init memory\n");
866
867         /* the directories are locked during eal_hugepage_info_init */
868         eal_hugedirs_unlock();
869         
870         if (rte_eal_memzone_init() < 0)
871                 rte_panic("Cannot init memzone\n");
872
873         if (rte_eal_tailqs_init() < 0)
874                 rte_panic("Cannot init tail queues for objects\n");
875
876         if (rte_eal_log_init(argv[0], internal_config.syslog_facility) < 0)
877                 rte_panic("Cannot init logs\n");
878
879         if (rte_eal_alarm_init() < 0)
880                 rte_panic("Cannot init interrupt-handling thread\n");
881
882         if (rte_eal_intr_init() < 0)
883                 rte_panic("Cannot init interrupt-handling thread\n");
884
885         if (rte_eal_timer_init() < 0)
886                 rte_panic("Cannot init HPET or TSC timers\n");
887
888         if (rte_eal_pci_init() < 0)
889                 rte_panic("Cannot init PCI\n");
890
891         RTE_LOG(DEBUG, EAL, "Master core %u is ready (tid=%x)\n",
892                 rte_config.master_lcore, (int)thread_id);
893
894         eal_check_mem_on_local_socket();
895
896         rte_eal_mcfg_complete();
897
898         RTE_LCORE_FOREACH_SLAVE(i) {
899
900                 /*
901                  * create communication pipes between master thread
902                  * and children
903                  */
904                 if (pipe(lcore_config[i].pipe_master2slave) < 0)
905                         rte_panic("Cannot create pipe\n");
906                 if (pipe(lcore_config[i].pipe_slave2master) < 0)
907                         rte_panic("Cannot create pipe\n");
908
909                 lcore_config[i].state = WAIT;
910
911                 /* create a thread for each lcore */
912                 ret = pthread_create(&lcore_config[i].thread_id, NULL,
913                                      eal_thread_loop, NULL);
914                 if (ret != 0)
915                         rte_panic("Cannot create thread\n");
916         }
917
918         eal_thread_init_master(rte_config.master_lcore);
919
920         /*
921          * Launch a dummy function on all slave lcores, so that master lcore
922          * knows they are all ready when this function returns.
923          */
924         rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
925         rte_eal_mp_wait_lcore();
926
927         return fctret;
928 }
929
930 /* get core role */
931 enum rte_lcore_role_t
932 rte_eal_lcore_role(unsigned lcore_id)
933 {
934         return (rte_config.lcore_role[lcore_id]);
935 }
936
937 enum rte_proc_type_t
938 rte_eal_process_type(void)
939 {
940         return (rte_config.process_type);
941 }
942