eal: rework CPU mask parsing
[dpdk.git] / lib / librte_eal / linuxapp / eal / eal.c
1 /*-
2  *   BSD LICENSE
3  * 
4  *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  * 
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  * 
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  * 
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <stdint.h>
37 #include <string.h>
38 #include <stdarg.h>
39 #include <unistd.h>
40 #include <pthread.h>
41 #include <syslog.h>
42 #include <getopt.h>
43 #include <sys/file.h>
44 #include <stddef.h>
45 #include <errno.h>
46 #include <limits.h>
47 #include <errno.h>
48 #include <sys/mman.h>
49 #include <sys/queue.h>
50 #include <sys/user.h>
51 #include <linux/binfmts.h>
52
53 #include <rte_common.h>
54 #include <rte_debug.h>
55 #include <rte_memory.h>
56 #include <rte_memzone.h>
57 #include <rte_launch.h>
58 #include <rte_tailq.h>
59 #include <rte_eal.h>
60 #include <rte_eal_memconfig.h>
61 #include <rte_per_lcore.h>
62 #include <rte_lcore.h>
63 #include <rte_log.h>
64 #include <rte_random.h>
65 #include <rte_cycles.h>
66 #include <rte_string_fns.h>
67 #include <rte_cpuflags.h>
68 #include <rte_interrupts.h>
69 #include <rte_pci.h>
70 #include <rte_common.h>
71 #include <rte_version.h>
72 #include <rte_atomic.h>
73 #include <malloc_heap.h>
74
75 #include "eal_private.h"
76 #include "eal_thread.h"
77 #include "eal_internal_cfg.h"
78 #include "eal_filesystem.h"
79 #include "eal_hugepages.h"
80
81 #define OPT_HUGE_DIR    "huge-dir"
82 #define OPT_PROC_TYPE   "proc-type"
83 #define OPT_NO_SHCONF   "no-shconf"
84 #define OPT_NO_HPET     "no-hpet"
85 #define OPT_VMWARE_TSC_MAP   "vmware-tsc-map"
86 #define OPT_NO_PCI      "no-pci"
87 #define OPT_NO_HUGE     "no-huge"
88 #define OPT_FILE_PREFIX "file-prefix"
89 #define OPT_SOCKET_MEM  "socket-mem"
90 #define OPT_SYSLOG      "syslog"
91
92 #define RTE_EAL_BLACKLIST_SIZE  0x100
93
94 #define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
95
96 #define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10)
97
98 #define BITS_PER_HEX 4
99
100 #define GET_BLACKLIST_FIELD(in, fd, lim, dlm)                   \
101 {                                                               \
102         unsigned long val;                                      \
103         char *end;                                              \
104         errno = 0;                                              \
105         val = strtoul((in), &end, 16);                          \
106         if (errno != 0 || end[0] != (dlm) || val > (lim))       \
107                 return (-EINVAL);                               \
108         (fd) = (typeof (fd))val;                                \
109         (in) = end + 1;                                         \
110 }
111
112 /* Allow the application to print its usage message too if set */
113 static rte_usage_hook_t rte_application_usage_hook = NULL;
114 /* early configuration structure, when memory config is not mmapped */
115 static struct rte_mem_config early_mem_config;
116
117 /* define fd variable here, because file needs to be kept open for the
118  * duration of the program, as we hold a write lock on it in the primary proc */
119 static int mem_cfg_fd = -1;
120
121 static struct flock wr_lock = {
122                 .l_type = F_WRLCK,
123                 .l_whence = SEEK_SET,
124                 .l_start = offsetof(struct rte_mem_config, memseg),
125                 .l_len = sizeof(early_mem_config.memseg),
126 };
127
128 /* Address of global and public configuration */
129 static struct rte_config rte_config = {
130                 .mem_config = &early_mem_config,
131 };
132
133 static struct rte_pci_addr eal_dev_blacklist[RTE_EAL_BLACKLIST_SIZE];
134
135 /* internal configuration (per-core) */
136 struct lcore_config lcore_config[RTE_MAX_LCORE];
137
138 /* internal configuration */
139 struct internal_config internal_config;
140
141 /* used by rte_rdtsc() */
142 int rte_cycles_vmware_tsc_map;
143
144 /* Return a pointer to the configuration structure */
145 struct rte_config *
146 rte_eal_get_configuration(void)
147 {
148         return &rte_config;
149 }
150
151 /* parse a sysfs (or other) file containing one integer value */
152 int
153 eal_parse_sysfs_value(const char *filename, unsigned long *val)
154 {
155         FILE *f;
156         char buf[BUFSIZ];
157         char *end = NULL;
158
159         if ((f = fopen(filename, "r")) == NULL) {
160                 RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
161                         __func__, filename);
162                 return -1;
163         }
164
165         if (fgets(buf, sizeof(buf), f) == NULL) {
166                 RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
167                         __func__, filename);
168                 fclose(f);
169                 return -1;
170         }
171         *val = strtoul(buf, &end, 0);
172         if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
173                 RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
174                                 __func__, filename);
175                 fclose(f);
176                 return -1;
177         }
178         fclose(f);
179         return 0;
180 }
181
182
183 /* create memory configuration in shared/mmap memory. Take out
184  * a write lock on the memsegs, so we can auto-detect primary/secondary.
185  * This means we never close the file while running (auto-close on exit).
186  * We also don't lock the whole file, so that in future we can use read-locks
187  * on other parts, e.g. memzones, to detect if there are running secondary
188  * processes. */
189 static void
190 rte_eal_config_create(void)
191 {
192         void *rte_mem_cfg_addr;
193         int retval;
194
195         const char *pathname = eal_runtime_config_path();
196
197         if (internal_config.no_shconf)
198                 return;
199
200         if (mem_cfg_fd < 0){
201                 mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
202                 if (mem_cfg_fd < 0)
203                         rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
204         }
205
206         retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
207         if (retval < 0){
208                 close(mem_cfg_fd);
209                 rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
210         }
211
212         retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
213         if (retval < 0){
214                 close(mem_cfg_fd);
215                 rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
216                                 "process running?\n", pathname);
217         }
218
219         rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
220                                 PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
221
222         if (rte_mem_cfg_addr == MAP_FAILED){
223                 rte_panic("Cannot mmap memory for rte_config\n");
224         }
225         memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
226         rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
227 }
228
229 /* attach to an existing shared memory config */
230 static void
231 rte_eal_config_attach(void)
232 {
233         void *rte_mem_cfg_addr;
234         const char *pathname = eal_runtime_config_path();
235
236         if (internal_config.no_shconf)
237                 return;
238
239         if (mem_cfg_fd < 0){
240                 mem_cfg_fd = open(pathname, O_RDWR);
241                 if (mem_cfg_fd < 0)
242                         rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
243         }
244
245         rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), 
246                                 PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
247         close(mem_cfg_fd);
248         if (rte_mem_cfg_addr == MAP_FAILED)
249                 rte_panic("Cannot mmap memory for rte_config\n");
250
251         rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
252 }
253
254 /* Detect if we are a primary or a secondary process */
255 static enum rte_proc_type_t
256 eal_proc_type_detect(void)
257 {
258         enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
259         const char *pathname = eal_runtime_config_path();
260
261         /* if we can open the file but not get a write-lock we are a secondary
262          * process. NOTE: if we get a file handle back, we keep that open
263          * and don't close it to prevent a race condition between multiple opens */
264         if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
265                         (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
266                 ptype = RTE_PROC_SECONDARY;
267
268         RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
269                         ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
270
271         return ptype;
272 }
273
274 /* Sets up rte_config structure with the pointer to shared memory config.*/
275 static void
276 rte_config_init(void)
277 {
278         /* set the magic in configuration structure */
279         rte_config.magic = RTE_MAGIC;
280         rte_config.process_type = (internal_config.process_type == RTE_PROC_AUTO) ?
281                         eal_proc_type_detect() : /* for auto, detect the type */
282                         internal_config.process_type; /* otherwise use what's already set */
283
284         switch (rte_config.process_type){
285         case RTE_PROC_PRIMARY:
286                 rte_eal_config_create();
287                 break;
288         case RTE_PROC_SECONDARY:
289                 rte_eal_config_attach();
290                 rte_eal_mcfg_wait_complete(rte_config.mem_config);
291                 break;
292         case RTE_PROC_AUTO:
293         case RTE_PROC_INVALID:
294                 rte_panic("Invalid process type\n");
295         }
296 }
297
298 /* Unlocks hugepage directories that were locked by eal_hugepage_info_init */
299 static void
300 eal_hugedirs_unlock(void)
301 {
302         int i;
303
304         for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
305         {
306                 /* skip uninitialized */
307                 if (internal_config.hugepage_info[i].lock_descriptor == 0)
308                         continue;
309                 /* unlock hugepage file */
310                 flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN);
311                 close(internal_config.hugepage_info[i].lock_descriptor);
312                 /* reset the field */
313                 internal_config.hugepage_info[i].lock_descriptor = 0;
314         }
315 }
316
317 /* display usage */
318 static void
319 eal_usage(const char *prgname)
320 {
321         printf("\nUsage: %s -c COREMASK -n NUM [-m NB] [-r NUM] [-b <domain:bus:devid.func>]"
322                "[--proc-type primary|secondary|auto] \n\n"
323                "EAL options:\n"
324                "  -c COREMASK  : A hexadecimal bitmask of cores to run on\n"
325                "  -n NUM       : Number of memory channels\n"
326                    "  -v           : Display version information on startup\n"
327                "  -b <domain:bus:devid.func>: to prevent EAL from using specified "
328            "PCI device\n"
329                "                 (multiple -b options are allowed)\n"
330                "  -m MB        : memory to allocate (see also --"OPT_SOCKET_MEM")\n"
331                "  -r NUM       : force number of memory ranks (don't detect)\n"
332                "  --"OPT_SYSLOG"     : set syslog facility\n"
333                "  --"OPT_SOCKET_MEM" : memory to allocate on specific \n"
334                    "                 sockets (use comma separated values)\n"
335                "  --"OPT_HUGE_DIR"   : directory where hugetlbfs is mounted\n"
336                "  --"OPT_PROC_TYPE"  : type of this process\n"
337                "  --"OPT_FILE_PREFIX": prefix for hugepage filenames\n"
338                "  --"OPT_VMWARE_TSC_MAP": use VMware TSC map instead of "
339                            "native RDTSC\n"
340                "\nEAL options for DEBUG use only:\n"
341                "  --"OPT_NO_HUGE"  : use malloc instead of hugetlbfs\n"
342                "  --"OPT_NO_PCI"   : disable pci\n"
343                "  --"OPT_NO_HPET"  : disable hpet\n"
344                "  --"OPT_NO_SHCONF": no shared config (mmap'd files)\n"
345                "\n",
346                prgname);
347         /* Allow the application to print its usage message too if hook is set */
348         if ( rte_application_usage_hook ) {
349                 printf("===== Application Usage =====\n\n");
350                 rte_application_usage_hook(prgname);
351         }
352 }
353
354 /* Set a per-application usage message */
355 rte_usage_hook_t
356 rte_set_application_usage_hook( rte_usage_hook_t usage_func )
357 {
358         rte_usage_hook_t        old_func;
359
360         /* Will be NULL on the first call to denote the last usage routine. */
361         old_func                                        = rte_application_usage_hook;
362         rte_application_usage_hook      = usage_func;
363
364         return old_func;
365 }
366
367 /*
368  * Parse the coremask given as argument (hexadecimal string) and fill
369  * the global configuration (core role and core count) with the parsed
370  * value.
371  */
372 static int xdigit2val(unsigned char c)
373 {
374         int val;
375         if(isdigit(c)) 
376                 val = c - '0';
377         else if(isupper(c))
378                 val = c - 'A' + 10;
379         else 
380                 val = c - 'a' + 10;
381         return val;
382 }
383 static int
384 eal_parse_coremask(const char *coremask)
385 {
386         struct rte_config *cfg = rte_eal_get_configuration();
387         int i, j, idx = 0 ;
388         unsigned count = 0;
389         char c;
390         int val;
391
392         if (coremask == NULL)
393                 return -1;
394         /* Remove all blank characters ahead and after .
395          * Remove 0x/0X if exists.
396          */
397         while (isblank(*coremask))
398                 coremask++;
399         if (coremask[0] == '0' && ((coremask[1] == 'x')
400                 ||  (coremask[1] == 'X')) )
401                 coremask += 2;
402         i = strnlen(coremask, MAX_ARG_STRLEN);
403         while ((i > 0) && isblank(coremask[i - 1]))
404                 i--;
405         if (i == 0)
406                 return -1;
407
408         for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
409                 c = coremask[i];
410                 if (isxdigit(c) == 0) {
411                         /* invalid characters */
412                         return (-1);
413                 }
414                 val = xdigit2val(c);
415                 for(j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; j++, idx++) {
416                         if((1 << j) & val) {
417                                 cfg->lcore_role[idx] = ROLE_RTE;
418                                 if(count == 0)
419                                         cfg->master_lcore = idx;
420                                 count++;
421                         } else  {
422                                 cfg->lcore_role[idx] = ROLE_OFF;
423                         }
424                 }
425         }
426         for(; i >= 0; i--)
427                 if(coremask[i] != '0')
428                         return -1;
429         for(; idx < RTE_MAX_LCORE; idx++)
430                 cfg->lcore_role[idx] = ROLE_OFF;
431         if(count == 0)
432                 return -1;
433         return 0;
434 }
435
436 static int
437 eal_parse_syslog(const char *facility)
438 {
439         int i;
440         static struct {
441                 const char *name;
442                 int value;
443         } map[] = {
444                 { "auth", LOG_AUTH },
445                 { "cron", LOG_CRON },
446                 { "daemon", LOG_DAEMON },
447                 { "ftp", LOG_FTP },
448                 { "kern", LOG_KERN },
449                 { "lpr", LOG_LPR },
450                 { "mail", LOG_MAIL },
451                 { "news", LOG_NEWS },
452                 { "syslog", LOG_SYSLOG },
453                 { "user", LOG_USER },
454                 { "uucp", LOG_UUCP },
455                 { "local0", LOG_LOCAL0 },
456                 { "local1", LOG_LOCAL1 },
457                 { "local2", LOG_LOCAL2 },
458                 { "local3", LOG_LOCAL3 },
459                 { "local4", LOG_LOCAL4 },
460                 { "local5", LOG_LOCAL5 },
461                 { "local6", LOG_LOCAL6 },
462                 { "local7", LOG_LOCAL7 },
463                 { NULL, 0 }
464         };
465
466         for (i = 0; map[i].name; i++) {
467                 if (!strcmp(facility, map[i].name)) {
468                         internal_config.syslog_facility = map[i].value;
469                         return 0;
470                 }
471         }
472         return -1;
473 }
474
475 static int
476 eal_parse_socket_mem(char *socket_mem)
477 {
478         char * arg[RTE_MAX_NUMA_NODES];
479         char *end;
480         int arg_num, i, len;
481         uint64_t total_mem = 0;
482
483         len = strnlen(socket_mem, SOCKET_MEM_STRLEN);
484         if (len == SOCKET_MEM_STRLEN) {
485                 RTE_LOG(ERR, EAL, "--socket-mem is too long\n");
486                 return -1;
487         }
488
489         /* all other error cases will be caught later */
490         if (!isdigit(socket_mem[len-1]))
491                 return -1;
492
493         /* split the optarg into separate socket values */
494         arg_num = rte_strsplit(socket_mem, len,
495                         arg, RTE_MAX_NUMA_NODES, ',');
496
497         /* if split failed, or 0 arguments */
498         if (arg_num <= 0)
499                 return -1;
500
501         internal_config.force_sockets = 1;
502
503         /* parse each defined socket option */
504         errno = 0;
505         for (i = 0; i < arg_num; i++) {
506                 end = NULL;
507                 internal_config.socket_mem[i] = strtoull(arg[i], &end, 10);
508
509                 /* check for invalid input */
510                 if ((errno != 0)  ||
511                                 (arg[i][0] == '\0') || (end == NULL) || (*end != '\0'))
512                         return -1;
513                 internal_config.socket_mem[i] *= 1024ULL;
514                 internal_config.socket_mem[i] *= 1024ULL;
515                 total_mem += internal_config.socket_mem[i];
516         }
517
518         /* check if we have a positive amount of total memory */
519         if (total_mem == 0)
520                 return -1;
521
522         return 0;
523 }
524
525 static inline size_t
526 eal_get_hugepage_mem_size(void)
527 {
528         uint64_t size = 0;
529         unsigned i, j;
530
531         for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
532                 struct hugepage_info *hpi = &internal_config.hugepage_info[i];
533                 if (hpi->hugedir != NULL) {
534                         for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
535                                 size += hpi->hugepage_sz * hpi->num_pages[j];
536                         }
537                 }
538         }
539
540         return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
541 }
542
543 static enum rte_proc_type_t
544 eal_parse_proc_type(const char *arg)
545 {
546         if (strncasecmp(arg, "primary", sizeof("primary")) == 0)
547                 return RTE_PROC_PRIMARY;
548         if (strncasecmp(arg, "secondary", sizeof("secondary")) == 0)
549                 return RTE_PROC_SECONDARY;
550         if (strncasecmp(arg, "auto", sizeof("auto")) == 0)
551                 return RTE_PROC_AUTO;
552
553         return RTE_PROC_INVALID;
554 }
555
556 static int
557 eal_parse_blacklist(const char *input,  struct rte_pci_addr *dev2bl)
558 {
559         GET_BLACKLIST_FIELD(input, dev2bl->domain, UINT16_MAX, ':');
560         GET_BLACKLIST_FIELD(input, dev2bl->bus, UINT8_MAX, ':');
561         GET_BLACKLIST_FIELD(input, dev2bl->devid, UINT8_MAX, '.');
562         GET_BLACKLIST_FIELD(input, dev2bl->function, UINT8_MAX, 0);
563         return (0);
564 }
565
566 static ssize_t
567 eal_parse_blacklist_opt(const char *optarg, size_t idx)
568 {
569         if (idx >= sizeof (eal_dev_blacklist) / sizeof (eal_dev_blacklist[0])) {
570                 RTE_LOG(ERR, EAL,
571                     "%s - too many devices to blacklist...\n",
572                     optarg);
573                 return (-EINVAL);
574         } else if (eal_parse_blacklist(optarg, eal_dev_blacklist + idx) != 0) {
575                 RTE_LOG(ERR, EAL,
576                     "%s - invalid device to blacklist...\n",
577                     optarg);
578                 return (-EINVAL);
579         }
580
581         idx += 1;
582         return (idx);
583 }
584
585 /* Parse the argument given in the command line of the application */
586 static int
587 eal_parse_args(int argc, char **argv)
588 {
589         int opt, ret, i;
590         char **argvopt;
591         int option_index;
592         int coremask_ok = 0;
593         ssize_t blacklist_index = 0;;
594         char *prgname = argv[0];
595         static struct option lgopts[] = {
596                 {OPT_NO_HUGE, 0, 0, 0},
597                 {OPT_NO_PCI, 0, 0, 0},
598                 {OPT_NO_HPET, 0, 0, 0},
599                 {OPT_VMWARE_TSC_MAP, 0, 0, 0},
600                 {OPT_HUGE_DIR, 1, 0, 0},
601                 {OPT_NO_SHCONF, 0, 0, 0},
602                 {OPT_PROC_TYPE, 1, 0, 0},
603                 {OPT_FILE_PREFIX, 1, 0, 0},
604                 {OPT_SOCKET_MEM, 1, 0, 0},
605                 {OPT_SYSLOG, 1, NULL, 0},
606                 {0, 0, 0, 0}
607         };
608
609         argvopt = argv;
610
611         internal_config.memory = 0;
612         internal_config.force_nrank = 0;
613         internal_config.force_nchannel = 0;
614         internal_config.hugefile_prefix = HUGEFILE_PREFIX_DEFAULT;
615         internal_config.hugepage_dir = NULL;
616         internal_config.force_sockets = 0;
617         internal_config.syslog_facility = LOG_DAEMON;
618 #ifdef RTE_LIBEAL_USE_HPET
619         internal_config.no_hpet = 0;
620 #else
621         internal_config.no_hpet = 1;
622 #endif
623         /* zero out the NUMA config */
624         for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
625                 internal_config.socket_mem[i] = 0;
626
627         /* zero out hugedir descriptors */
628         for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
629                 internal_config.hugepage_info[i].lock_descriptor = 0;
630
631         internal_config.vmware_tsc_map = 0;
632
633         while ((opt = getopt_long(argc, argvopt, "b:c:m:n:r:v",
634                                   lgopts, &option_index)) != EOF) {
635
636                 switch (opt) {
637                 /* blacklist */
638                 case 'b':
639                         if ((blacklist_index = eal_parse_blacklist_opt(optarg,
640                             blacklist_index)) < 0) {
641                                 eal_usage(prgname);
642                                 return (-1);
643                         }
644                         break;
645                 /* coremask */
646                 case 'c':
647                         if (eal_parse_coremask(optarg) < 0) {
648                                 RTE_LOG(ERR, EAL, "invalid coremask\n");
649                                 eal_usage(prgname);
650                                 return -1;
651                         }
652                         coremask_ok = 1;
653                         break;
654                 /* size of memory */
655                 case 'm':
656                         internal_config.memory = atoi(optarg);
657                         internal_config.memory *= 1024ULL;
658                         internal_config.memory *= 1024ULL;
659                         break;
660                 /* force number of channels */
661                 case 'n':
662                         internal_config.force_nchannel = atoi(optarg);
663                         if (internal_config.force_nchannel == 0 ||
664                             internal_config.force_nchannel > 4) {
665                                 RTE_LOG(ERR, EAL, "invalid channel number\n");
666                                 eal_usage(prgname);
667                                 return -1;
668                         }
669                         break;
670                 /* force number of ranks */
671                 case 'r':
672                         internal_config.force_nrank = atoi(optarg);
673                         if (internal_config.force_nrank == 0 ||
674                             internal_config.force_nrank > 16) {
675                                 RTE_LOG(ERR, EAL, "invalid rank number\n");
676                                 eal_usage(prgname);
677                                 return -1;
678                         }
679                         break;
680                 case 'v':
681                         /* since message is explicitly requested by user, we
682                          * write message at highest log level so it can always be seen
683                          * even if info or warning messages are disabled */
684                         RTE_LOG(CRIT, EAL, "RTE Version: '%s'\n", rte_version());
685                         break;
686
687                 /* long options */
688                 case 0:
689                         if (!strcmp(lgopts[option_index].name, OPT_NO_HUGE)) {
690                                 internal_config.no_hugetlbfs = 1;
691                         }
692                         else if (!strcmp(lgopts[option_index].name, OPT_NO_PCI)) {
693                                 internal_config.no_pci = 1;
694                         }
695                         else if (!strcmp(lgopts[option_index].name, OPT_NO_HPET)) {
696                                 internal_config.no_hpet = 1;
697                         }
698                         else if (!strcmp(lgopts[option_index].name, OPT_VMWARE_TSC_MAP)) {
699                                 internal_config.vmware_tsc_map = 1;
700                         }
701                         else if (!strcmp(lgopts[option_index].name, OPT_NO_SHCONF)) {
702                                 internal_config.no_shconf = 1;
703                         }
704                         else if (!strcmp(lgopts[option_index].name, OPT_HUGE_DIR)) {
705                                 internal_config.hugepage_dir = optarg;
706                         }
707                         else if (!strcmp(lgopts[option_index].name, OPT_PROC_TYPE)) {
708                                 internal_config.process_type = eal_parse_proc_type(optarg);
709                         }
710                         else if (!strcmp(lgopts[option_index].name, OPT_FILE_PREFIX)) {
711                                 internal_config.hugefile_prefix = optarg;
712                         }
713                         else if (!strcmp(lgopts[option_index].name, OPT_SOCKET_MEM)) {
714                                 if (eal_parse_socket_mem(optarg) < 0) {
715                                         RTE_LOG(ERR, EAL, "invalid parameters for --"
716                                                         OPT_SOCKET_MEM "\n");
717                                         eal_usage(prgname);
718                                         return -1;
719                                 }
720                         }
721                         else if (!strcmp(lgopts[option_index].name, OPT_SYSLOG)) {
722                                 if (eal_parse_syslog(optarg) < 0) {
723                                         RTE_LOG(ERR, EAL, "invalid parameters for --"
724                                                         OPT_SYSLOG "\n");
725                                         eal_usage(prgname);
726                                         return -1;
727                                 }
728                         }
729                         break;
730
731                 default:
732                         eal_usage(prgname);
733                         return -1;
734                 }
735         }
736
737         /* sanity checks */
738         if (!coremask_ok) {
739                 RTE_LOG(ERR, EAL, "coremask not specified\n");
740                 eal_usage(prgname);
741                 return -1;
742         }
743         if (internal_config.process_type == RTE_PROC_AUTO){
744                 internal_config.process_type = eal_proc_type_detect();
745         }
746         if (internal_config.process_type == RTE_PROC_INVALID){
747                 RTE_LOG(ERR, EAL, "Invalid process type specified\n");
748                 eal_usage(prgname);
749                 return -1;
750         }
751         if (internal_config.process_type == RTE_PROC_PRIMARY &&
752                         internal_config.force_nchannel == 0) {
753                 RTE_LOG(ERR, EAL, "Number of memory channels (-n) not specified\n");
754                 eal_usage(prgname);
755                 return -1;
756         }
757         if (index(internal_config.hugefile_prefix,'%') != NULL){
758                 RTE_LOG(ERR, EAL, "Invalid char, '%%', in '"OPT_FILE_PREFIX"' option\n");
759                 eal_usage(prgname);
760                 return -1;
761         }
762         if (internal_config.memory > 0 && internal_config.force_sockets == 1) {
763                 RTE_LOG(ERR, EAL, "Options -m and --socket-mem cannot be specified "
764                                 "at the same time\n");
765                 eal_usage(prgname);
766                 return -1;
767         }
768         /* --no-huge doesn't make sense with either -m or --socket-mem */
769         if (internal_config.no_hugetlbfs &&
770                         (internal_config.memory > 0 ||
771                                         internal_config.force_sockets == 1)) {
772                 RTE_LOG(ERR, EAL, "Options -m or --socket-mem cannot be specified "
773                                 "together with --no-huge!\n");
774                 eal_usage(prgname);
775                 return -1;
776         }
777
778         if (blacklist_index > 0)
779                 rte_eal_pci_set_blacklist(eal_dev_blacklist, blacklist_index);
780
781         if (optind >= 0)
782                 argv[optind-1] = prgname;
783
784         /* if no memory amounts were requested, this will result in 0 and
785          * will be overriden later, right after eal_hugepage_info_init() */
786         for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
787                 internal_config.memory += internal_config.socket_mem[i];
788
789         ret = optind-1;
790         optind = 0; /* reset getopt lib */
791         return ret;
792 }
793
794 static void
795 eal_check_mem_on_local_socket(void)
796 {
797         const struct rte_memseg *ms;
798         int i, socket_id;
799
800         socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
801
802         ms = rte_eal_get_physmem_layout();
803
804         for (i = 0; i < RTE_MAX_MEMSEG; i++)
805                 if (ms[i].socket_id == socket_id &&
806                                 ms[i].len > 0)
807                         return;
808
809         RTE_LOG(WARNING, EAL, "WARNING: Master core has no "
810                         "memory on local socket!\n");
811 }
812
813 static int
814 sync_func(__attribute__((unused)) void *arg)
815 {
816         return 0;
817 }
818
819 inline static void 
820 rte_eal_mcfg_complete(void)
821 {
822         /* ALL shared mem_config related INIT DONE */
823         if (rte_config.process_type == RTE_PROC_PRIMARY)
824                 rte_config.mem_config->magic = RTE_MAGIC;
825 }
826
827 /* Launch threads, called at application init(). */
828 int
829 rte_eal_init(int argc, char **argv)
830 {
831         int i, fctret, ret;
832         pthread_t thread_id;
833         static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
834
835         if (!rte_atomic32_test_and_set(&run_once))
836                 return -1;
837
838         thread_id = pthread_self();
839
840         if (rte_eal_log_early_init() < 0)
841                 rte_panic("Cannot init early logs\n");
842
843         fctret = eal_parse_args(argc, argv);
844         if (fctret < 0)
845                 exit(1);
846
847         if (internal_config.no_hugetlbfs == 0 &&
848                         internal_config.process_type != RTE_PROC_SECONDARY &&
849                         eal_hugepage_info_init() < 0)
850                 rte_panic("Cannot get hugepage information\n");
851
852         if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
853                 if (internal_config.no_hugetlbfs)
854                         internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
855                 else
856                         internal_config.memory = eal_get_hugepage_mem_size();
857         }
858
859         if (internal_config.vmware_tsc_map == 1) {
860 #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
861                 rte_cycles_vmware_tsc_map = 1;
862                 RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, "
863                                 "you must have monitor_control.pseudo_perfctr = TRUE\n");
864 #else
865                 RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because "
866                                 "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n");
867 #endif
868         }
869
870         rte_srand(rte_rdtsc());
871
872         rte_config_init();
873         
874         if (rte_eal_cpu_init() < 0)
875                 rte_panic("Cannot detect lcores\n");
876
877         if (rte_eal_memory_init() < 0)
878                 rte_panic("Cannot init memory\n");
879
880         /* the directories are locked during eal_hugepage_info_init */
881         eal_hugedirs_unlock();
882         
883         if (rte_eal_memzone_init() < 0)
884                 rte_panic("Cannot init memzone\n");
885
886         if (rte_eal_tailqs_init() < 0)
887                 rte_panic("Cannot init tail queues for objects\n");
888
889         if (rte_eal_log_init(argv[0], internal_config.syslog_facility) < 0)
890                 rte_panic("Cannot init logs\n");
891
892         if (rte_eal_alarm_init() < 0)
893                 rte_panic("Cannot init interrupt-handling thread\n");
894
895         if (rte_eal_intr_init() < 0)
896                 rte_panic("Cannot init interrupt-handling thread\n");
897
898         if (rte_eal_timer_init() < 0)
899                 rte_panic("Cannot init HPET or TSC timers\n");
900
901         if (rte_eal_pci_init() < 0)
902                 rte_panic("Cannot init PCI\n");
903
904         RTE_LOG(DEBUG, EAL, "Master core %u is ready (tid=%x)\n",
905                 rte_config.master_lcore, (int)thread_id);
906
907         eal_check_mem_on_local_socket();
908
909         rte_eal_mcfg_complete();
910
911         RTE_LCORE_FOREACH_SLAVE(i) {
912
913                 /*
914                  * create communication pipes between master thread
915                  * and children
916                  */
917                 if (pipe(lcore_config[i].pipe_master2slave) < 0)
918                         rte_panic("Cannot create pipe\n");
919                 if (pipe(lcore_config[i].pipe_slave2master) < 0)
920                         rte_panic("Cannot create pipe\n");
921
922                 lcore_config[i].state = WAIT;
923
924                 /* create a thread for each lcore */
925                 ret = pthread_create(&lcore_config[i].thread_id, NULL,
926                                      eal_thread_loop, NULL);
927                 if (ret != 0)
928                         rte_panic("Cannot create thread\n");
929         }
930
931         eal_thread_init_master(rte_config.master_lcore);
932
933         /*
934          * Launch a dummy function on all slave lcores, so that master lcore
935          * knows they are all ready when this function returns.
936          */
937         rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
938         rte_eal_mp_wait_lcore();
939
940         return fctret;
941 }
942
943 /* get core role */
944 enum rte_lcore_role_t
945 rte_eal_lcore_role(unsigned lcore_id)
946 {
947         return (rte_config.lcore_role[lcore_id]);
948 }
949
950 enum rte_proc_type_t
951 rte_eal_process_type(void)
952 {
953         return (rte_config.process_type);
954 }
955