72b4dd7b13330587eb89932fe453e6622d225334
[dpdk.git] / lib / librte_eal / linuxapp / eal / eal.c
1 /*-
2  *   BSD LICENSE
3  * 
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   Copyright(c) 2012-2013 6WIND S.A.
6  *   All rights reserved.
7  * 
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  * 
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  * 
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <stdint.h>
38 #include <string.h>
39 #include <stdarg.h>
40 #include <unistd.h>
41 #include <pthread.h>
42 #include <syslog.h>
43 #include <getopt.h>
44 #include <sys/file.h>
45 #include <fcntl.h>
46 #include <dlfcn.h>
47 #include <stddef.h>
48 #include <errno.h>
49 #include <limits.h>
50 #include <errno.h>
51 #include <sys/mman.h>
52 #include <sys/queue.h>
53 #include <sys/io.h>
54
55 #include <rte_common.h>
56 #include <rte_debug.h>
57 #include <rte_memory.h>
58 #include <rte_memzone.h>
59 #include <rte_launch.h>
60 #include <rte_tailq.h>
61 #include <rte_eal.h>
62 #include <rte_eal_memconfig.h>
63 #include <rte_per_lcore.h>
64 #include <rte_lcore.h>
65 #include <rte_log.h>
66 #include <rte_random.h>
67 #include <rte_cycles.h>
68 #include <rte_string_fns.h>
69 #include <rte_cpuflags.h>
70 #include <rte_interrupts.h>
71 #include <rte_pci.h>
72 #include <rte_common.h>
73 #include <rte_version.h>
74 #include <rte_atomic.h>
75 #include <malloc_heap.h>
76 #include <rte_eth_ring.h>
77
78 #include "eal_private.h"
79 #include "eal_thread.h"
80 #include "eal_internal_cfg.h"
81 #include "eal_filesystem.h"
82 #include "eal_hugepages.h"
83
84 #define OPT_HUGE_DIR    "huge-dir"
85 #define OPT_PROC_TYPE   "proc-type"
86 #define OPT_NO_SHCONF   "no-shconf"
87 #define OPT_NO_HPET     "no-hpet"
88 #define OPT_VMWARE_TSC_MAP   "vmware-tsc-map"
89 #define OPT_NO_PCI      "no-pci"
90 #define OPT_NO_HUGE     "no-huge"
91 #define OPT_FILE_PREFIX "file-prefix"
92 #define OPT_SOCKET_MEM  "socket-mem"
93 #define OPT_USE_DEVICE  "use-device"
94 #define OPT_SYSLOG      "syslog"
95 #define OPT_BASE_VIRTADDR   "base-virtaddr"
96 #define OPT_XEN_DOM0    "xen-dom0"
97 #define OPT_CREATE_UIO_DEV "create-uio-dev"
98
99 #define RTE_EAL_BLACKLIST_SIZE  0x100
100
101 #define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
102
103 #define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10)
104
105 #define HIGHEST_RPL 3
106
107 #define BITS_PER_HEX 4
108
109 /* Allow the application to print its usage message too if set */
110 static rte_usage_hook_t rte_application_usage_hook = NULL;
111
112 TAILQ_HEAD(shared_driver_list, shared_driver);
113
114 /* Definition for shared object drivers. */
115 struct shared_driver {
116         TAILQ_ENTRY(shared_driver) next;
117
118         char    name[PATH_MAX];
119         void*   lib_handle;
120 };
121
122 /* List of external loadable drivers */
123 static struct shared_driver_list solib_list =
124 TAILQ_HEAD_INITIALIZER(solib_list);
125
126 /* early configuration structure, when memory config is not mmapped */
127 static struct rte_mem_config early_mem_config;
128
129 /* define fd variable here, because file needs to be kept open for the
130  * duration of the program, as we hold a write lock on it in the primary proc */
131 static int mem_cfg_fd = -1;
132
133 static struct flock wr_lock = {
134                 .l_type = F_WRLCK,
135                 .l_whence = SEEK_SET,
136                 .l_start = offsetof(struct rte_mem_config, memseg),
137                 .l_len = sizeof(early_mem_config.memseg),
138 };
139
140 /* Address of global and public configuration */
141 static struct rte_config rte_config = {
142                 .mem_config = &early_mem_config,
143 };
144
145 static struct rte_pci_addr eal_dev_blacklist[RTE_EAL_BLACKLIST_SIZE];
146
147 /* internal configuration (per-core) */
148 struct lcore_config lcore_config[RTE_MAX_LCORE];
149
150 /* internal configuration */
151 struct internal_config internal_config;
152
153 /* used by rte_rdtsc() */
154 int rte_cycles_vmware_tsc_map;
155
156 /* Return a pointer to the configuration structure */
157 struct rte_config *
158 rte_eal_get_configuration(void)
159 {
160         return &rte_config;
161 }
162
163 /* parse a sysfs (or other) file containing one integer value */
164 int
165 eal_parse_sysfs_value(const char *filename, unsigned long *val)
166 {
167         FILE *f;
168         char buf[BUFSIZ];
169         char *end = NULL;
170
171         if ((f = fopen(filename, "r")) == NULL) {
172                 RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
173                         __func__, filename);
174                 return -1;
175         }
176
177         if (fgets(buf, sizeof(buf), f) == NULL) {
178                 RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
179                         __func__, filename);
180                 fclose(f);
181                 return -1;
182         }
183         *val = strtoul(buf, &end, 0);
184         if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
185                 RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
186                                 __func__, filename);
187                 fclose(f);
188                 return -1;
189         }
190         fclose(f);
191         return 0;
192 }
193
194
195 /* create memory configuration in shared/mmap memory. Take out
196  * a write lock on the memsegs, so we can auto-detect primary/secondary.
197  * This means we never close the file while running (auto-close on exit).
198  * We also don't lock the whole file, so that in future we can use read-locks
199  * on other parts, e.g. memzones, to detect if there are running secondary
200  * processes. */
201 static void
202 rte_eal_config_create(void)
203 {
204         void *rte_mem_cfg_addr;
205         int retval;
206
207         const char *pathname = eal_runtime_config_path();
208
209         if (internal_config.no_shconf)
210                 return;
211
212         if (mem_cfg_fd < 0){
213                 mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
214                 if (mem_cfg_fd < 0)
215                         rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
216         }
217
218         retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
219         if (retval < 0){
220                 close(mem_cfg_fd);
221                 rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
222         }
223
224         retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
225         if (retval < 0){
226                 close(mem_cfg_fd);
227                 rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
228                                 "process running?\n", pathname);
229         }
230
231         rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
232                                 PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
233
234         if (rte_mem_cfg_addr == MAP_FAILED){
235                 rte_panic("Cannot mmap memory for rte_config\n");
236         }
237         memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
238         rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
239 }
240
241 /* attach to an existing shared memory config */
242 static void
243 rte_eal_config_attach(void)
244 {
245         void *rte_mem_cfg_addr;
246         const char *pathname = eal_runtime_config_path();
247
248         if (internal_config.no_shconf)
249                 return;
250
251         if (mem_cfg_fd < 0){
252                 mem_cfg_fd = open(pathname, O_RDWR);
253                 if (mem_cfg_fd < 0)
254                         rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
255         }
256
257         rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), 
258                                 PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
259         close(mem_cfg_fd);
260         if (rte_mem_cfg_addr == MAP_FAILED)
261                 rte_panic("Cannot mmap memory for rte_config\n");
262
263         rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
264 }
265
266 /* Detect if we are a primary or a secondary process */
267 static enum rte_proc_type_t
268 eal_proc_type_detect(void)
269 {
270         enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
271         const char *pathname = eal_runtime_config_path();
272
273         /* if we can open the file but not get a write-lock we are a secondary
274          * process. NOTE: if we get a file handle back, we keep that open
275          * and don't close it to prevent a race condition between multiple opens */
276         if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
277                         (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
278                 ptype = RTE_PROC_SECONDARY;
279
280         RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
281                         ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
282
283         return ptype;
284 }
285
286 /* Sets up rte_config structure with the pointer to shared memory config.*/
287 static void
288 rte_config_init(void)
289 {
290         /* set the magic in configuration structure */
291         rte_config.magic = RTE_MAGIC;
292         rte_config.process_type = (internal_config.process_type == RTE_PROC_AUTO) ?
293                         eal_proc_type_detect() : /* for auto, detect the type */
294                         internal_config.process_type; /* otherwise use what's already set */
295
296         switch (rte_config.process_type){
297         case RTE_PROC_PRIMARY:
298                 rte_eal_config_create();
299                 break;
300         case RTE_PROC_SECONDARY:
301                 rte_eal_config_attach();
302                 rte_eal_mcfg_wait_complete(rte_config.mem_config);
303                 break;
304         case RTE_PROC_AUTO:
305         case RTE_PROC_INVALID:
306                 rte_panic("Invalid process type\n");
307         }
308 }
309
310 /* Unlocks hugepage directories that were locked by eal_hugepage_info_init */
311 static void
312 eal_hugedirs_unlock(void)
313 {
314         int i;
315
316         for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
317         {
318                 /* skip uninitialized */
319                 if (internal_config.hugepage_info[i].lock_descriptor < 0)
320                         continue;
321                 /* unlock hugepage file */
322                 flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN);
323                 close(internal_config.hugepage_info[i].lock_descriptor);
324                 /* reset the field */
325                 internal_config.hugepage_info[i].lock_descriptor = -1;
326         }
327 }
328
329 /* display usage */
330 static void
331 eal_usage(const char *prgname)
332 {
333         printf("\nUsage: %s -c COREMASK -n NUM [-m NB] [-r NUM] [-b <domain:bus:devid.func>]"
334                "[--proc-type primary|secondary|auto] \n\n"
335                "EAL options:\n"
336                "  -c COREMASK  : A hexadecimal bitmask of cores to run on\n"
337                "  -n NUM       : Number of memory channels\n"
338                    "  -v           : Display version information on startup\n"
339                "  -d LIB.so    : add driver (can be used multiple times)\n"
340                "  -b <domain:bus:devid.func>: to prevent EAL from using specified "
341            "PCI device\n"
342                "                 (multiple -b options are allowed)\n"
343                "  -m MB        : memory to allocate (see also --"OPT_SOCKET_MEM")\n"
344                "  -r NUM       : force number of memory ranks (don't detect)\n"
345                "  --"OPT_XEN_DOM0" : support application running on Xen Domain0 "
346                            "without hugetlbfs\n"
347                "  --"OPT_SYSLOG"     : set syslog facility\n"
348                "  --"OPT_SOCKET_MEM" : memory to allocate on specific \n"
349                    "                 sockets (use comma separated values)\n"
350                "  --"OPT_HUGE_DIR"   : directory where hugetlbfs is mounted\n"
351                "  --"OPT_PROC_TYPE"  : type of this process\n"
352                "  --"OPT_FILE_PREFIX": prefix for hugepage filenames\n"
353                "  --"OPT_USE_DEVICE": use the specified ethernet device(s) only. "
354                            "Use comma-separate <[domain:]bus:devid.func> values.\n"
355                "               [NOTE: Cannot be used with -b option]\n"
356                "  --"OPT_VMWARE_TSC_MAP": use VMware TSC map instead of "
357                            "native RDTSC\n"
358                "  --"OPT_BASE_VIRTADDR": specify base virtual address\n"
359                "  --"OPT_CREATE_UIO_DEV": create /dev/uioX (usually done by hotplug)\n"
360                "\nEAL options for DEBUG use only:\n"
361                "  --"OPT_NO_HUGE"  : use malloc instead of hugetlbfs\n"
362                "  --"OPT_NO_PCI"   : disable pci\n"
363                "  --"OPT_NO_HPET"  : disable hpet\n"
364                "  --"OPT_NO_SHCONF": no shared config (mmap'd files)\n"
365                "\n",
366                prgname);
367         /* Allow the application to print its usage message too if hook is set */
368         if ( rte_application_usage_hook ) {
369                 printf("===== Application Usage =====\n\n");
370                 rte_application_usage_hook(prgname);
371         }
372 }
373
374 /* Set a per-application usage message */
375 rte_usage_hook_t
376 rte_set_application_usage_hook( rte_usage_hook_t usage_func )
377 {
378         rte_usage_hook_t        old_func;
379
380         /* Will be NULL on the first call to denote the last usage routine. */
381         old_func                                        = rte_application_usage_hook;
382         rte_application_usage_hook      = usage_func;
383
384         return old_func;
385 }
386
387 /*
388  * Parse the coremask given as argument (hexadecimal string) and fill
389  * the global configuration (core role and core count) with the parsed
390  * value.
391  */
392 static int xdigit2val(unsigned char c)
393 {
394         int val;
395         if(isdigit(c)) 
396                 val = c - '0';
397         else if(isupper(c))
398                 val = c - 'A' + 10;
399         else 
400                 val = c - 'a' + 10;
401         return val;
402 }
403 static int
404 eal_parse_coremask(const char *coremask)
405 {
406         struct rte_config *cfg = rte_eal_get_configuration();
407         int i, j, idx = 0 ;
408         unsigned count = 0;
409         char c;
410         int val;
411
412         if (coremask == NULL)
413                 return -1;
414         /* Remove all blank characters ahead and after .
415          * Remove 0x/0X if exists.
416          */
417         while (isblank(*coremask))
418                 coremask++;
419         if (coremask[0] == '0' && ((coremask[1] == 'x')
420                 ||  (coremask[1] == 'X')) )
421                 coremask += 2;
422         i = strnlen(coremask, PATH_MAX);
423         while ((i > 0) && isblank(coremask[i - 1]))
424                 i--;
425         if (i == 0)
426                 return -1;
427
428         for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
429                 c = coremask[i];
430                 if (isxdigit(c) == 0) {
431                         /* invalid characters */
432                         return (-1);
433                 }
434                 val = xdigit2val(c);
435                 for(j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; j++, idx++) {
436                         if((1 << j) & val) {
437                                 cfg->lcore_role[idx] = ROLE_RTE;
438                                 if(count == 0)
439                                         cfg->master_lcore = idx;
440                                 count++;
441                         } else  {
442                                 cfg->lcore_role[idx] = ROLE_OFF;
443                         }
444                 }
445         }
446         for(; i >= 0; i--)
447                 if(coremask[i] != '0')
448                         return -1;
449         for(; idx < RTE_MAX_LCORE; idx++)
450                 cfg->lcore_role[idx] = ROLE_OFF;
451         if(count == 0)
452                 return -1;
453         /* Update the count of enabled logical cores of the EAL configuration */
454         cfg->lcore_count = count;
455         return 0;
456 }
457
458 static int
459 eal_parse_syslog(const char *facility)
460 {
461         int i;
462         static struct {
463                 const char *name;
464                 int value;
465         } map[] = {
466                 { "auth", LOG_AUTH },
467                 { "cron", LOG_CRON },
468                 { "daemon", LOG_DAEMON },
469                 { "ftp", LOG_FTP },
470                 { "kern", LOG_KERN },
471                 { "lpr", LOG_LPR },
472                 { "mail", LOG_MAIL },
473                 { "news", LOG_NEWS },
474                 { "syslog", LOG_SYSLOG },
475                 { "user", LOG_USER },
476                 { "uucp", LOG_UUCP },
477                 { "local0", LOG_LOCAL0 },
478                 { "local1", LOG_LOCAL1 },
479                 { "local2", LOG_LOCAL2 },
480                 { "local3", LOG_LOCAL3 },
481                 { "local4", LOG_LOCAL4 },
482                 { "local5", LOG_LOCAL5 },
483                 { "local6", LOG_LOCAL6 },
484                 { "local7", LOG_LOCAL7 },
485                 { NULL, 0 }
486         };
487
488         for (i = 0; map[i].name; i++) {
489                 if (!strcmp(facility, map[i].name)) {
490                         internal_config.syslog_facility = map[i].value;
491                         return 0;
492                 }
493         }
494         return -1;
495 }
496
497 static int
498 eal_parse_socket_mem(char *socket_mem)
499 {
500         char * arg[RTE_MAX_NUMA_NODES];
501         char *end;
502         int arg_num, i, len;
503         uint64_t total_mem = 0;
504
505         len = strnlen(socket_mem, SOCKET_MEM_STRLEN);
506         if (len == SOCKET_MEM_STRLEN) {
507                 RTE_LOG(ERR, EAL, "--socket-mem is too long\n");
508                 return -1;
509         }
510
511         /* all other error cases will be caught later */
512         if (!isdigit(socket_mem[len-1]))
513                 return -1;
514
515         /* split the optarg into separate socket values */
516         arg_num = rte_strsplit(socket_mem, len,
517                         arg, RTE_MAX_NUMA_NODES, ',');
518
519         /* if split failed, or 0 arguments */
520         if (arg_num <= 0)
521                 return -1;
522
523         internal_config.force_sockets = 1;
524
525         /* parse each defined socket option */
526         errno = 0;
527         for (i = 0; i < arg_num; i++) {
528                 end = NULL;
529                 internal_config.socket_mem[i] = strtoull(arg[i], &end, 10);
530
531                 /* check for invalid input */
532                 if ((errno != 0)  ||
533                                 (arg[i][0] == '\0') || (end == NULL) || (*end != '\0'))
534                         return -1;
535                 internal_config.socket_mem[i] *= 1024ULL;
536                 internal_config.socket_mem[i] *= 1024ULL;
537                 total_mem += internal_config.socket_mem[i];
538         }
539
540         /* check if we have a positive amount of total memory */
541         if (total_mem == 0)
542                 return -1;
543
544         return 0;
545 }
546
547 static int
548 eal_parse_base_virtaddr(const char *arg)
549 {
550         char *end;
551         uint64_t addr;
552
553         addr = strtoull(arg, &end, 16);
554
555         /* check for errors */
556         if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0'))
557                 return -1;
558
559         /* make sure we don't exceed 32-bit boundary on 32-bit target */
560 #ifndef RTE_ARCH_X86_64
561         if (addr >= UINTPTR_MAX)
562                 return -1;
563 #endif
564
565         /* align the addr on 2M boundary */
566         internal_config.base_virtaddr = RTE_PTR_ALIGN_CEIL((uintptr_t)addr,
567                                                            RTE_PGSIZE_2M);
568
569         return 0;
570 }
571
572 static inline size_t
573 eal_get_hugepage_mem_size(void)
574 {
575         uint64_t size = 0;
576         unsigned i, j;
577
578         for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
579                 struct hugepage_info *hpi = &internal_config.hugepage_info[i];
580                 if (hpi->hugedir != NULL) {
581                         for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
582                                 size += hpi->hugepage_sz * hpi->num_pages[j];
583                         }
584                 }
585         }
586
587         return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
588 }
589
590 static enum rte_proc_type_t
591 eal_parse_proc_type(const char *arg)
592 {
593         if (strncasecmp(arg, "primary", sizeof("primary")) == 0)
594                 return RTE_PROC_PRIMARY;
595         if (strncasecmp(arg, "secondary", sizeof("secondary")) == 0)
596                 return RTE_PROC_SECONDARY;
597         if (strncasecmp(arg, "auto", sizeof("auto")) == 0)
598                 return RTE_PROC_AUTO;
599
600         return RTE_PROC_INVALID;
601 }
602
603 static ssize_t
604 eal_parse_blacklist_opt(const char *optarg, size_t idx)
605 {
606         if (idx >= sizeof (eal_dev_blacklist) / sizeof (eal_dev_blacklist[0])) {
607                 RTE_LOG(ERR, EAL, "%s - too many devices to blacklist...\n", optarg);
608                 return (-EINVAL);
609         } else if (eal_parse_pci_DomBDF(optarg, eal_dev_blacklist + idx) < 0 &&
610                         eal_parse_pci_BDF(optarg, eal_dev_blacklist + idx) < 0) {
611                 RTE_LOG(ERR, EAL, "%s - invalid device to blacklist...\n", optarg);
612                 return (-EINVAL);
613         }
614
615         idx += 1;
616         return (idx);
617 }
618
619 /* Parse the argument given in the command line of the application */
620 static int
621 eal_parse_args(int argc, char **argv)
622 {
623         int opt, ret, i;
624         char **argvopt;
625         int option_index;
626         int coremask_ok = 0;
627         ssize_t blacklist_index = 0;
628         char *prgname = argv[0];
629         static struct option lgopts[] = {
630                 {OPT_NO_HUGE, 0, 0, 0},
631                 {OPT_NO_PCI, 0, 0, 0},
632                 {OPT_NO_HPET, 0, 0, 0},
633                 {OPT_VMWARE_TSC_MAP, 0, 0, 0},
634                 {OPT_HUGE_DIR, 1, 0, 0},
635                 {OPT_NO_SHCONF, 0, 0, 0},
636                 {OPT_PROC_TYPE, 1, 0, 0},
637                 {OPT_FILE_PREFIX, 1, 0, 0},
638                 {OPT_SOCKET_MEM, 1, 0, 0},
639                 {OPT_USE_DEVICE, 1, 0, 0},
640                 {OPT_SYSLOG, 1, NULL, 0},
641                 {OPT_BASE_VIRTADDR, 1, 0, 0},
642                 {OPT_XEN_DOM0, 0, 0, 0},
643                 {OPT_CREATE_UIO_DEV, 1, NULL, 0},
644                 {0, 0, 0, 0}
645         };
646         struct shared_driver *solib;
647
648         argvopt = argv;
649
650         internal_config.memory = 0;
651         internal_config.force_nrank = 0;
652         internal_config.force_nchannel = 0;
653         internal_config.hugefile_prefix = HUGEFILE_PREFIX_DEFAULT;
654         internal_config.hugepage_dir = NULL;
655         internal_config.force_sockets = 0;
656         internal_config.syslog_facility = LOG_DAEMON;
657         internal_config.xen_dom0_support = 0;
658 #ifdef RTE_LIBEAL_USE_HPET
659         internal_config.no_hpet = 0;
660 #else
661         internal_config.no_hpet = 1;
662 #endif
663         /* zero out the NUMA config */
664         for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
665                 internal_config.socket_mem[i] = 0;
666
667         /* zero out hugedir descriptors */
668         for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
669                 internal_config.hugepage_info[i].lock_descriptor = -1;
670
671         internal_config.vmware_tsc_map = 0;
672         internal_config.base_virtaddr = 0;
673
674         while ((opt = getopt_long(argc, argvopt, "b:c:d:m:n:r:v",
675                                   lgopts, &option_index)) != EOF) {
676
677                 switch (opt) {
678                 /* blacklist */
679                 case 'b':
680                         if ((blacklist_index = eal_parse_blacklist_opt(optarg,
681                             blacklist_index)) < 0) {
682                                 eal_usage(prgname);
683                                 return (-1);
684                         }
685                         break;
686                 /* coremask */
687                 case 'c':
688                         if (eal_parse_coremask(optarg) < 0) {
689                                 RTE_LOG(ERR, EAL, "invalid coremask\n");
690                                 eal_usage(prgname);
691                                 return -1;
692                         }
693                         coremask_ok = 1;
694                         break;
695                 /* force loading of external driver */
696                 case 'd':
697                         solib = malloc(sizeof(*solib));
698                         if (solib == NULL) {
699                                 RTE_LOG(ERR, EAL, "malloc(solib) failed\n");
700                                 return -1;
701                         }
702                         memset(solib, 0, sizeof(*solib));
703                         strncpy(solib->name, optarg, PATH_MAX-1);
704                         solib->name[PATH_MAX-1] = 0;
705                         TAILQ_INSERT_TAIL(&solib_list, solib, next);
706                         break;
707                 /* size of memory */
708                 case 'm':
709                         internal_config.memory = atoi(optarg);
710                         internal_config.memory *= 1024ULL;
711                         internal_config.memory *= 1024ULL;
712                         break;
713                 /* force number of channels */
714                 case 'n':
715                         internal_config.force_nchannel = atoi(optarg);
716                         if (internal_config.force_nchannel == 0 ||
717                             internal_config.force_nchannel > 4) {
718                                 RTE_LOG(ERR, EAL, "invalid channel number\n");
719                                 eal_usage(prgname);
720                                 return -1;
721                         }
722                         break;
723                 /* force number of ranks */
724                 case 'r':
725                         internal_config.force_nrank = atoi(optarg);
726                         if (internal_config.force_nrank == 0 ||
727                             internal_config.force_nrank > 16) {
728                                 RTE_LOG(ERR, EAL, "invalid rank number\n");
729                                 eal_usage(prgname);
730                                 return -1;
731                         }
732                         break;
733                 case 'v':
734                         /* since message is explicitly requested by user, we
735                          * write message at highest log level so it can always be seen
736                          * even if info or warning messages are disabled */
737                         RTE_LOG(CRIT, EAL, "RTE Version: '%s'\n", rte_version());
738                         break;
739
740                 /* long options */
741                 case 0:
742                         if (!strcmp(lgopts[option_index].name, OPT_NO_HUGE)) {
743                                 internal_config.no_hugetlbfs = 1;
744                         }
745                         if (!strcmp(lgopts[option_index].name, OPT_XEN_DOM0)) {
746                 #ifdef RTE_LIBRTE_XEN_DOM0
747                                 internal_config.xen_dom0_support = 1;
748                 #else
749                                 RTE_LOG(ERR, EAL, "Can't support DPDK app "
750                                         "running on Dom0, please configure"
751                                         " RTE_LIBRTE_XEN_DOM0=y\n");
752                                 return -1;
753                 #endif 
754                         }
755                         else if (!strcmp(lgopts[option_index].name, OPT_NO_PCI)) {
756                                 internal_config.no_pci = 1;
757                         }
758                         else if (!strcmp(lgopts[option_index].name, OPT_NO_HPET)) {
759                                 internal_config.no_hpet = 1;
760                         }
761                         else if (!strcmp(lgopts[option_index].name, OPT_VMWARE_TSC_MAP)) {
762                                 internal_config.vmware_tsc_map = 1;
763                         }
764                         else if (!strcmp(lgopts[option_index].name, OPT_NO_SHCONF)) {
765                                 internal_config.no_shconf = 1;
766                         }
767                         else if (!strcmp(lgopts[option_index].name, OPT_HUGE_DIR)) {
768                                 internal_config.hugepage_dir = optarg;
769                         }
770                         else if (!strcmp(lgopts[option_index].name, OPT_PROC_TYPE)) {
771                                 internal_config.process_type = eal_parse_proc_type(optarg);
772                         }
773                         else if (!strcmp(lgopts[option_index].name, OPT_FILE_PREFIX)) {
774                                 internal_config.hugefile_prefix = optarg;
775                         }
776                         else if (!strcmp(lgopts[option_index].name, OPT_SOCKET_MEM)) {
777                                 if (eal_parse_socket_mem(optarg) < 0) {
778                                         RTE_LOG(ERR, EAL, "invalid parameters for --"
779                                                         OPT_SOCKET_MEM "\n");
780                                         eal_usage(prgname);
781                                         return -1;
782                                 }
783                         }
784                         else if (!strcmp(lgopts[option_index].name, OPT_USE_DEVICE)) {
785                                 eal_dev_whitelist_add_entry(optarg);
786                         }
787                         else if (!strcmp(lgopts[option_index].name, OPT_SYSLOG)) {
788                                 if (eal_parse_syslog(optarg) < 0) {
789                                         RTE_LOG(ERR, EAL, "invalid parameters for --"
790                                                         OPT_SYSLOG "\n");
791                                         eal_usage(prgname);
792                                         return -1;
793                                 }
794                         }
795                         else if (!strcmp(lgopts[option_index].name, OPT_BASE_VIRTADDR)) {
796                                 if (eal_parse_base_virtaddr(optarg) < 0) {
797                                         RTE_LOG(ERR, EAL, "invalid parameter for --"
798                                                         OPT_BASE_VIRTADDR "\n");
799                                         eal_usage(prgname);
800                                         return -1;
801                                 }
802                         }
803                         else if (!strcmp(lgopts[option_index].name, OPT_CREATE_UIO_DEV)) {
804                                 internal_config.create_uio_dev = 1;
805                         }
806                         break;
807
808                 default:
809                         eal_usage(prgname);
810                         return -1;
811                 }
812         }
813
814         /* sanity checks */
815         if (!coremask_ok) {
816                 RTE_LOG(ERR, EAL, "coremask not specified\n");
817                 eal_usage(prgname);
818                 return -1;
819         }
820         if (internal_config.process_type == RTE_PROC_AUTO){
821                 internal_config.process_type = eal_proc_type_detect();
822         }
823         if (internal_config.process_type == RTE_PROC_INVALID){
824                 RTE_LOG(ERR, EAL, "Invalid process type specified\n");
825                 eal_usage(prgname);
826                 return -1;
827         }
828         if (internal_config.process_type == RTE_PROC_PRIMARY &&
829                         internal_config.force_nchannel == 0) {
830                 RTE_LOG(ERR, EAL, "Number of memory channels (-n) not specified\n");
831                 eal_usage(prgname);
832                 return -1;
833         }
834         if (index(internal_config.hugefile_prefix,'%') != NULL){
835                 RTE_LOG(ERR, EAL, "Invalid char, '%%', in '"OPT_FILE_PREFIX"' option\n");
836                 eal_usage(prgname);
837                 return -1;
838         }
839         if (internal_config.memory > 0 && internal_config.force_sockets == 1) {
840                 RTE_LOG(ERR, EAL, "Options -m and --socket-mem cannot be specified "
841                                 "at the same time\n");
842                 eal_usage(prgname);
843                 return -1;
844         }
845         /* --no-huge doesn't make sense with either -m or --socket-mem */
846         if (internal_config.no_hugetlbfs &&
847                         (internal_config.memory > 0 ||
848                                         internal_config.force_sockets == 1)) {
849                 RTE_LOG(ERR, EAL, "Options -m or --socket-mem cannot be specified "
850                                 "together with --no-huge!\n");
851                 eal_usage(prgname);
852                 return -1;
853         }
854         /* --xen-dom0 doesn't make sense with --socket-mem */
855         if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) {
856                 RTE_LOG(ERR, EAL, "Options --socket-mem cannot be specified "
857                                         "together with --xen_dom0!\n");
858                 eal_usage(prgname);
859                 return -1;
860         }
861         /* if no blacklist, parse a whitelist */
862         if (blacklist_index > 0) {
863                 if (eal_dev_whitelist_exists()) {
864                         RTE_LOG(ERR, EAL, "Error: blacklist [-b] and whitelist "
865                                         "[--use-device] options cannot be used at the same time\n");
866                         eal_usage(prgname);
867                         return -1;
868                 }
869                 rte_eal_pci_set_blacklist(eal_dev_blacklist, blacklist_index);
870         } else {
871                 if (eal_dev_whitelist_exists() && eal_dev_whitelist_parse() < 0) {
872                         RTE_LOG(ERR,EAL, "Error parsing whitelist[--use-device] options\n");
873                         return -1;
874                 }
875         }
876
877         if (optind >= 0)
878                 argv[optind-1] = prgname;
879
880         /* if no memory amounts were requested, this will result in 0 and
881          * will be overriden later, right after eal_hugepage_info_init() */
882         for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
883                 internal_config.memory += internal_config.socket_mem[i];
884
885         ret = optind-1;
886         optind = 0; /* reset getopt lib */
887         return ret;
888 }
889
890 static void
891 eal_check_mem_on_local_socket(void)
892 {
893         const struct rte_memseg *ms;
894         int i, socket_id;
895
896         socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
897
898         ms = rte_eal_get_physmem_layout();
899
900         for (i = 0; i < RTE_MAX_MEMSEG; i++)
901                 if (ms[i].socket_id == socket_id &&
902                                 ms[i].len > 0)
903                         return;
904
905         RTE_LOG(WARNING, EAL, "WARNING: Master core has no "
906                         "memory on local socket!\n");
907 }
908
909 static int
910 sync_func(__attribute__((unused)) void *arg)
911 {
912         return 0;
913 }
914
915 inline static void 
916 rte_eal_mcfg_complete(void)
917 {
918         /* ALL shared mem_config related INIT DONE */
919         if (rte_config.process_type == RTE_PROC_PRIMARY)
920                 rte_config.mem_config->magic = RTE_MAGIC;
921 }
922
923 /*
924  * Request iopl priviledge for all RPL, returns 0 on success
925  */
926 static int
927 rte_eal_iopl_init(void)
928 {
929         return iopl(HIGHEST_RPL);
930 }
931
932 /* Launch threads, called at application init(). */
933 int
934 rte_eal_init(int argc, char **argv)
935 {
936         int i, fctret, ret;
937         pthread_t thread_id;
938         static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
939         struct shared_driver *solib = NULL;
940         const char *logid;
941
942         if (!rte_atomic32_test_and_set(&run_once))
943                 return -1;
944
945         logid = strrchr(argv[0], '/');
946         logid = strdup(logid ? logid + 1: argv[0]);
947
948         thread_id = pthread_self();
949
950         if (rte_eal_log_early_init() < 0)
951                 rte_panic("Cannot init early logs\n");
952
953         if (rte_eal_cpu_init() < 0)
954                 rte_panic("Cannot detect lcores\n");
955
956         fctret = eal_parse_args(argc, argv);
957         if (fctret < 0)
958                 exit(1);
959
960         if (internal_config.no_hugetlbfs == 0 &&
961                         internal_config.process_type != RTE_PROC_SECONDARY &&
962                         internal_config.xen_dom0_support == 0 &&
963                         eal_hugepage_info_init() < 0)
964                 rte_panic("Cannot get hugepage information\n");
965
966         if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
967                 if (internal_config.no_hugetlbfs)
968                         internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
969                 else
970                         internal_config.memory = eal_get_hugepage_mem_size();
971         }
972
973         if (internal_config.vmware_tsc_map == 1) {
974 #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
975                 rte_cycles_vmware_tsc_map = 1;
976                 RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, "
977                                 "you must have monitor_control.pseudo_perfctr = TRUE\n");
978 #else
979                 RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because "
980                                 "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n");
981 #endif
982         }
983
984         rte_srand(rte_rdtsc());
985
986         rte_config_init();
987
988         if (rte_eal_iopl_init() == 0)
989                 rte_config.flags |= EAL_FLG_HIGH_IOPL;
990         
991         if (rte_eal_pci_init() < 0)
992                 rte_panic("Cannot init PCI\n");
993
994 #ifdef RTE_LIBRTE_IVSHMEM
995         if (rte_eal_ivshmem_init() < 0)
996                 rte_panic("Cannot init IVSHMEM\n");
997 #endif
998
999         if (rte_eal_memory_init() < 0)
1000                 rte_panic("Cannot init memory\n");
1001
1002         /* the directories are locked during eal_hugepage_info_init */
1003         eal_hugedirs_unlock();
1004         
1005         if (rte_eal_memzone_init() < 0)
1006                 rte_panic("Cannot init memzone\n");
1007
1008         if (rte_eal_tailqs_init() < 0)
1009                 rte_panic("Cannot init tail queues for objects\n");
1010
1011 #ifdef RTE_LIBRTE_IVSHMEM
1012         if (rte_eal_ivshmem_obj_init() < 0)
1013                 rte_panic("Cannot init IVSHMEM objects\n");
1014 #endif
1015
1016         if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0)
1017                 rte_panic("Cannot init logs\n");
1018
1019         if (rte_eal_alarm_init() < 0)
1020                 rte_panic("Cannot init interrupt-handling thread\n");
1021
1022         if (rte_eal_intr_init() < 0)
1023                 rte_panic("Cannot init interrupt-handling thread\n");
1024
1025         if (rte_eal_timer_init() < 0)
1026                 rte_panic("Cannot init HPET or TSC timers\n");
1027
1028         eal_check_mem_on_local_socket();
1029
1030         rte_eal_mcfg_complete();
1031
1032         if (rte_eal_non_pci_ethdev_init() < 0)
1033                 rte_panic("Cannot init non-PCI eth_devs\n");
1034
1035         TAILQ_FOREACH(solib, &solib_list, next) {
1036                 solib->lib_handle = dlopen(solib->name, RTLD_NOW);
1037                 if ((solib->lib_handle == NULL) && (solib->name[0] != '/')) {
1038                         /* relative path: try again with "./" prefix */
1039                         char sopath[PATH_MAX];
1040                         snprintf(sopath, sizeof(sopath), "./%s", solib->name);
1041                         solib->lib_handle = dlopen(sopath, RTLD_NOW);
1042                 }
1043                 if (solib->lib_handle == NULL)
1044                         RTE_LOG(WARNING, EAL, "%s\n", dlerror());
1045         }
1046
1047         RTE_LOG(DEBUG, EAL, "Master core %u is ready (tid=%x)\n",
1048                 rte_config.master_lcore, (int)thread_id);
1049
1050         RTE_LCORE_FOREACH_SLAVE(i) {
1051
1052                 /*
1053                  * create communication pipes between master thread
1054                  * and children
1055                  */
1056                 if (pipe(lcore_config[i].pipe_master2slave) < 0)
1057                         rte_panic("Cannot create pipe\n");
1058                 if (pipe(lcore_config[i].pipe_slave2master) < 0)
1059                         rte_panic("Cannot create pipe\n");
1060
1061                 lcore_config[i].state = WAIT;
1062
1063                 /* create a thread for each lcore */
1064                 ret = pthread_create(&lcore_config[i].thread_id, NULL,
1065                                      eal_thread_loop, NULL);
1066                 if (ret != 0)
1067                         rte_panic("Cannot create thread\n");
1068         }
1069
1070         eal_thread_init_master(rte_config.master_lcore);
1071
1072         /*
1073          * Launch a dummy function on all slave lcores, so that master lcore
1074          * knows they are all ready when this function returns.
1075          */
1076         rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
1077         rte_eal_mp_wait_lcore();
1078
1079         return fctret;
1080 }
1081
1082 /* get core role */
1083 enum rte_lcore_role_t
1084 rte_eal_lcore_role(unsigned lcore_id)
1085 {
1086         return (rte_config.lcore_role[lcore_id]);
1087 }
1088
1089 enum rte_proc_type_t
1090 rte_eal_process_type(void)
1091 {
1092         return (rte_config.process_type);
1093 }
1094
1095 int rte_eal_has_hugepages(void)
1096 {
1097         return ! internal_config.no_hugetlbfs;
1098 }