eal: remove unused macros
[dpdk.git] / lib / librte_eal / linuxapp / eal / eal.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   Copyright(c) 2012-2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <stdint.h>
38 #include <string.h>
39 #include <stdarg.h>
40 #include <unistd.h>
41 #include <pthread.h>
42 #include <syslog.h>
43 #include <getopt.h>
44 #include <sys/file.h>
45 #include <fcntl.h>
46 #include <dlfcn.h>
47 #include <stddef.h>
48 #include <errno.h>
49 #include <limits.h>
50 #include <errno.h>
51 #include <sys/mman.h>
52 #include <sys/queue.h>
53 #include <sys/io.h>
54
55 #include <rte_common.h>
56 #include <rte_debug.h>
57 #include <rte_memory.h>
58 #include <rte_memzone.h>
59 #include <rte_launch.h>
60 #include <rte_tailq.h>
61 #include <rte_eal.h>
62 #include <rte_eal_memconfig.h>
63 #include <rte_per_lcore.h>
64 #include <rte_lcore.h>
65 #include <rte_log.h>
66 #include <rte_random.h>
67 #include <rte_cycles.h>
68 #include <rte_string_fns.h>
69 #include <rte_cpuflags.h>
70 #include <rte_interrupts.h>
71 #include <rte_pci.h>
72 #include <rte_devargs.h>
73 #include <rte_common.h>
74 #include <rte_version.h>
75 #include <rte_atomic.h>
76 #include <malloc_heap.h>
77 #include <rte_eth_ring.h>
78 #include <rte_dev.h>
79
80 #include "eal_private.h"
81 #include "eal_thread.h"
82 #include "eal_internal_cfg.h"
83 #include "eal_filesystem.h"
84 #include "eal_hugepages.h"
85
86 #define OPT_HUGE_DIR    "huge-dir"
87 #define OPT_PROC_TYPE   "proc-type"
88 #define OPT_NO_SHCONF   "no-shconf"
89 #define OPT_NO_HPET     "no-hpet"
90 #define OPT_VMWARE_TSC_MAP   "vmware-tsc-map"
91 #define OPT_NO_PCI      "no-pci"
92 #define OPT_NO_HUGE     "no-huge"
93 #define OPT_FILE_PREFIX "file-prefix"
94 #define OPT_SOCKET_MEM  "socket-mem"
95 #define OPT_USE_DEVICE  "use-device"
96 #define OPT_PCI_WHITELIST "pci-whitelist"
97 #define OPT_PCI_BLACKLIST "pci-blacklist"
98 #define OPT_VDEV        "vdev"
99 #define OPT_SYSLOG      "syslog"
100 #define OPT_BASE_VIRTADDR   "base-virtaddr"
101 #define OPT_XEN_DOM0    "xen-dom0"
102 #define OPT_CREATE_UIO_DEV "create-uio-dev"
103 #define OPT_VFIO_INTR    "vfio-intr"
104
105 #define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
106
107 #define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10)
108
109 #define HIGHEST_RPL 3
110
111 #define BITS_PER_HEX 4
112
113 /* Allow the application to print its usage message too if set */
114 static rte_usage_hook_t rte_application_usage_hook = NULL;
115
116 TAILQ_HEAD(shared_driver_list, shared_driver);
117
118 /* Definition for shared object drivers. */
119 struct shared_driver {
120         TAILQ_ENTRY(shared_driver) next;
121
122         char    name[PATH_MAX];
123         void*   lib_handle;
124 };
125
126 /* List of external loadable drivers */
127 static struct shared_driver_list solib_list =
128 TAILQ_HEAD_INITIALIZER(solib_list);
129
130 /* early configuration structure, when memory config is not mmapped */
131 static struct rte_mem_config early_mem_config;
132
133 /* define fd variable here, because file needs to be kept open for the
134  * duration of the program, as we hold a write lock on it in the primary proc */
135 static int mem_cfg_fd = -1;
136
137 static struct flock wr_lock = {
138                 .l_type = F_WRLCK,
139                 .l_whence = SEEK_SET,
140                 .l_start = offsetof(struct rte_mem_config, memseg),
141                 .l_len = sizeof(early_mem_config.memseg),
142 };
143
144 /* Address of global and public configuration */
145 static struct rte_config rte_config = {
146                 .mem_config = &early_mem_config,
147 };
148
149 /* internal configuration (per-core) */
150 struct lcore_config lcore_config[RTE_MAX_LCORE];
151
152 /* internal configuration */
153 struct internal_config internal_config;
154
155 /* used by rte_rdtsc() */
156 int rte_cycles_vmware_tsc_map;
157
158 /* Return a pointer to the configuration structure */
159 struct rte_config *
160 rte_eal_get_configuration(void)
161 {
162         return &rte_config;
163 }
164
165 /* parse a sysfs (or other) file containing one integer value */
166 int
167 eal_parse_sysfs_value(const char *filename, unsigned long *val)
168 {
169         FILE *f;
170         char buf[BUFSIZ];
171         char *end = NULL;
172
173         if ((f = fopen(filename, "r")) == NULL) {
174                 RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
175                         __func__, filename);
176                 return -1;
177         }
178
179         if (fgets(buf, sizeof(buf), f) == NULL) {
180                 RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
181                         __func__, filename);
182                 fclose(f);
183                 return -1;
184         }
185         *val = strtoul(buf, &end, 0);
186         if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
187                 RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
188                                 __func__, filename);
189                 fclose(f);
190                 return -1;
191         }
192         fclose(f);
193         return 0;
194 }
195
196
197 /* create memory configuration in shared/mmap memory. Take out
198  * a write lock on the memsegs, so we can auto-detect primary/secondary.
199  * This means we never close the file while running (auto-close on exit).
200  * We also don't lock the whole file, so that in future we can use read-locks
201  * on other parts, e.g. memzones, to detect if there are running secondary
202  * processes. */
203 static void
204 rte_eal_config_create(void)
205 {
206         void *rte_mem_cfg_addr;
207         int retval;
208
209         const char *pathname = eal_runtime_config_path();
210
211         if (internal_config.no_shconf)
212                 return;
213
214         /* map the config before hugepage address so that we don't waste a page */
215         if (internal_config.base_virtaddr != 0)
216                 rte_mem_cfg_addr = (void *)
217                         RTE_ALIGN_FLOOR(internal_config.base_virtaddr -
218                         sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE));
219         else
220                 rte_mem_cfg_addr = NULL;
221
222         if (mem_cfg_fd < 0){
223                 mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660);
224                 if (mem_cfg_fd < 0)
225                         rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
226         }
227
228         retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config));
229         if (retval < 0){
230                 close(mem_cfg_fd);
231                 rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname);
232         }
233
234         retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock);
235         if (retval < 0){
236                 close(mem_cfg_fd);
237                 rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary "
238                                 "process running?\n", pathname);
239         }
240
241         rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config),
242                                 PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
243
244         if (rte_mem_cfg_addr == MAP_FAILED){
245                 rte_panic("Cannot mmap memory for rte_config\n");
246         }
247         memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
248         rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
249
250         /* store address of the config in the config itself so that secondary
251          * processes could later map the config into this exact location */
252         rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
253
254 }
255
256 /* attach to an existing shared memory config */
257 static void
258 rte_eal_config_attach(void)
259 {
260         struct rte_mem_config *mem_config;
261
262         const char *pathname = eal_runtime_config_path();
263
264         if (internal_config.no_shconf)
265                 return;
266
267         if (mem_cfg_fd < 0){
268                 mem_cfg_fd = open(pathname, O_RDWR);
269                 if (mem_cfg_fd < 0)
270                         rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
271         }
272
273         /* map it as read-only first */
274         mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config),
275                         PROT_READ, MAP_SHARED, mem_cfg_fd, 0);
276         if (mem_config == MAP_FAILED)
277                 rte_panic("Cannot mmap memory for rte_config\n");
278
279         rte_config.mem_config = mem_config;
280 }
281
282 /* reattach the shared config at exact memory location primary process has it */
283 static void
284 rte_eal_config_reattach(void)
285 {
286         struct rte_mem_config *mem_config;
287         void *rte_mem_cfg_addr;
288
289         if (internal_config.no_shconf)
290                 return;
291
292         /* save the address primary process has mapped shared config to */
293         rte_mem_cfg_addr = (void *) (uintptr_t) rte_config.mem_config->mem_cfg_addr;
294
295         /* unmap original config */
296         munmap(rte_config.mem_config, sizeof(struct rte_mem_config));
297
298         /* remap the config at proper address */
299         mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr,
300                         sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED,
301                         mem_cfg_fd, 0);
302         close(mem_cfg_fd);
303         if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr)
304                 rte_panic("Cannot mmap memory for rte_config\n");
305
306         rte_config.mem_config = mem_config;
307 }
308
309 /* Detect if we are a primary or a secondary process */
310 static enum rte_proc_type_t
311 eal_proc_type_detect(void)
312 {
313         enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
314         const char *pathname = eal_runtime_config_path();
315
316         /* if we can open the file but not get a write-lock we are a secondary
317          * process. NOTE: if we get a file handle back, we keep that open
318          * and don't close it to prevent a race condition between multiple opens */
319         if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
320                         (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
321                 ptype = RTE_PROC_SECONDARY;
322
323         RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
324                         ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
325
326         return ptype;
327 }
328
329 /* Sets up rte_config structure with the pointer to shared memory config.*/
330 static void
331 rte_config_init(void)
332 {
333         rte_config.process_type = (internal_config.process_type == RTE_PROC_AUTO) ?
334                         eal_proc_type_detect() : /* for auto, detect the type */
335                         internal_config.process_type; /* otherwise use what's already set */
336
337         switch (rte_config.process_type){
338         case RTE_PROC_PRIMARY:
339                 rte_eal_config_create();
340                 break;
341         case RTE_PROC_SECONDARY:
342                 rte_eal_config_attach();
343                 rte_eal_mcfg_wait_complete(rte_config.mem_config);
344                 rte_eal_config_reattach();
345                 break;
346         case RTE_PROC_AUTO:
347         case RTE_PROC_INVALID:
348                 rte_panic("Invalid process type\n");
349         }
350 }
351
352 /* Unlocks hugepage directories that were locked by eal_hugepage_info_init */
353 static void
354 eal_hugedirs_unlock(void)
355 {
356         int i;
357
358         for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
359         {
360                 /* skip uninitialized */
361                 if (internal_config.hugepage_info[i].lock_descriptor < 0)
362                         continue;
363                 /* unlock hugepage file */
364                 flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN);
365                 close(internal_config.hugepage_info[i].lock_descriptor);
366                 /* reset the field */
367                 internal_config.hugepage_info[i].lock_descriptor = -1;
368         }
369 }
370
371 /* display usage */
372 static void
373 eal_usage(const char *prgname)
374 {
375         printf("\nUsage: %s -c COREMASK -n NUM [-m NB] [-r NUM] [-b <domain:bus:devid.func>]"
376                "[--proc-type primary|secondary|auto] \n\n"
377                "EAL options:\n"
378                "  -c COREMASK  : A hexadecimal bitmask of cores to run on\n"
379                "  -n NUM       : Number of memory channels\n"
380                    "  -v           : Display version information on startup\n"
381                "  -d LIB.so    : add driver (can be used multiple times)\n"
382                "  -m MB        : memory to allocate (see also --"OPT_SOCKET_MEM")\n"
383                "  -r NUM       : force number of memory ranks (don't detect)\n"
384                "  --"OPT_XEN_DOM0" : support application running on Xen Domain0 "
385                            "without hugetlbfs\n"
386                "  --"OPT_SYSLOG"     : set syslog facility\n"
387                "  --"OPT_SOCKET_MEM" : memory to allocate on specific \n"
388                    "                 sockets (use comma separated values)\n"
389                "  --"OPT_HUGE_DIR"   : directory where hugetlbfs is mounted\n"
390                "  --"OPT_PROC_TYPE"  : type of this process\n"
391                "  --"OPT_FILE_PREFIX": prefix for hugepage filenames\n"
392                "  --"OPT_PCI_BLACKLIST", -b: add a PCI device in black list.\n"
393                "               Prevent EAL from using this PCI device. The argument\n"
394                "               format is <domain:bus:devid.func>.\n"
395                "  --"OPT_PCI_WHITELIST", -w: add a PCI device in white list.\n"
396                "               Only use the specified PCI devices. The argument format\n"
397                "               is <[domain:]bus:devid.func>. This option can be present\n"
398                "               several times (once per device).\n"
399                "               [NOTE: PCI whitelist cannot be used with -b option]\n"
400                "  --"OPT_VDEV": add a virtual device.\n"
401                "               The argument format is <driver><id>[,key=val,...]\n"
402                "               (ex: --vdev=eth_pcap0,iface=eth2).\n"
403                "  --"OPT_VMWARE_TSC_MAP": use VMware TSC map instead of native RDTSC\n"
404                "  --"OPT_BASE_VIRTADDR": specify base virtual address\n"
405                "  --"OPT_VFIO_INTR": specify desired interrupt mode for VFIO "
406                            "(legacy|msi|msix)\n"
407                "  --"OPT_CREATE_UIO_DEV": create /dev/uioX (usually done by hotplug)\n"
408                "\nEAL options for DEBUG use only:\n"
409                "  --"OPT_NO_HUGE"  : use malloc instead of hugetlbfs\n"
410                "  --"OPT_NO_PCI"   : disable pci\n"
411                "  --"OPT_NO_HPET"  : disable hpet\n"
412                "  --"OPT_NO_SHCONF": no shared config (mmap'd files)\n"
413                "\n",
414                prgname);
415         /* Allow the application to print its usage message too if hook is set */
416         if ( rte_application_usage_hook ) {
417                 printf("===== Application Usage =====\n\n");
418                 rte_application_usage_hook(prgname);
419         }
420 }
421
422 /* Set a per-application usage message */
423 rte_usage_hook_t
424 rte_set_application_usage_hook( rte_usage_hook_t usage_func )
425 {
426         rte_usage_hook_t        old_func;
427
428         /* Will be NULL on the first call to denote the last usage routine. */
429         old_func                                        = rte_application_usage_hook;
430         rte_application_usage_hook      = usage_func;
431
432         return old_func;
433 }
434
435 /*
436  * Parse the coremask given as argument (hexadecimal string) and fill
437  * the global configuration (core role and core count) with the parsed
438  * value.
439  */
440 static int xdigit2val(unsigned char c)
441 {
442         int val;
443         if(isdigit(c))
444                 val = c - '0';
445         else if(isupper(c))
446                 val = c - 'A' + 10;
447         else
448                 val = c - 'a' + 10;
449         return val;
450 }
451 static int
452 eal_parse_coremask(const char *coremask)
453 {
454         struct rte_config *cfg = rte_eal_get_configuration();
455         int i, j, idx = 0 ;
456         unsigned count = 0;
457         char c;
458         int val;
459
460         if (coremask == NULL)
461                 return -1;
462         /* Remove all blank characters ahead and after .
463          * Remove 0x/0X if exists.
464          */
465         while (isblank(*coremask))
466                 coremask++;
467         if (coremask[0] == '0' && ((coremask[1] == 'x')
468                 ||  (coremask[1] == 'X')) )
469                 coremask += 2;
470         i = strnlen(coremask, PATH_MAX);
471         while ((i > 0) && isblank(coremask[i - 1]))
472                 i--;
473         if (i == 0)
474                 return -1;
475
476         for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) {
477                 c = coremask[i];
478                 if (isxdigit(c) == 0) {
479                         /* invalid characters */
480                         return (-1);
481                 }
482                 val = xdigit2val(c);
483                 for(j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; j++, idx++) {
484                         if((1 << j) & val) {
485                                 if (!lcore_config[idx].detected) {
486                                         RTE_LOG(ERR, EAL, "lcore %u "
487                                                 "unavailable\n", idx);
488                                         return -1;
489                                 }
490                                 cfg->lcore_role[idx] = ROLE_RTE;
491                                 if(count == 0)
492                                         cfg->master_lcore = idx;
493                                 count++;
494                         } else  {
495                                 cfg->lcore_role[idx] = ROLE_OFF;
496                         }
497                 }
498         }
499         for(; i >= 0; i--)
500                 if(coremask[i] != '0')
501                         return -1;
502         for(; idx < RTE_MAX_LCORE; idx++)
503                 cfg->lcore_role[idx] = ROLE_OFF;
504         if(count == 0)
505                 return -1;
506         /* Update the count of enabled logical cores of the EAL configuration */
507         cfg->lcore_count = count;
508         return 0;
509 }
510
511 static int
512 eal_parse_syslog(const char *facility)
513 {
514         int i;
515         static struct {
516                 const char *name;
517                 int value;
518         } map[] = {
519                 { "auth", LOG_AUTH },
520                 { "cron", LOG_CRON },
521                 { "daemon", LOG_DAEMON },
522                 { "ftp", LOG_FTP },
523                 { "kern", LOG_KERN },
524                 { "lpr", LOG_LPR },
525                 { "mail", LOG_MAIL },
526                 { "news", LOG_NEWS },
527                 { "syslog", LOG_SYSLOG },
528                 { "user", LOG_USER },
529                 { "uucp", LOG_UUCP },
530                 { "local0", LOG_LOCAL0 },
531                 { "local1", LOG_LOCAL1 },
532                 { "local2", LOG_LOCAL2 },
533                 { "local3", LOG_LOCAL3 },
534                 { "local4", LOG_LOCAL4 },
535                 { "local5", LOG_LOCAL5 },
536                 { "local6", LOG_LOCAL6 },
537                 { "local7", LOG_LOCAL7 },
538                 { NULL, 0 }
539         };
540
541         for (i = 0; map[i].name; i++) {
542                 if (!strcmp(facility, map[i].name)) {
543                         internal_config.syslog_facility = map[i].value;
544                         return 0;
545                 }
546         }
547         return -1;
548 }
549
550 static int
551 eal_parse_socket_mem(char *socket_mem)
552 {
553         char * arg[RTE_MAX_NUMA_NODES];
554         char *end;
555         int arg_num, i, len;
556         uint64_t total_mem = 0;
557
558         len = strnlen(socket_mem, SOCKET_MEM_STRLEN);
559         if (len == SOCKET_MEM_STRLEN) {
560                 RTE_LOG(ERR, EAL, "--socket-mem is too long\n");
561                 return -1;
562         }
563
564         /* all other error cases will be caught later */
565         if (!isdigit(socket_mem[len-1]))
566                 return -1;
567
568         /* split the optarg into separate socket values */
569         arg_num = rte_strsplit(socket_mem, len,
570                         arg, RTE_MAX_NUMA_NODES, ',');
571
572         /* if split failed, or 0 arguments */
573         if (arg_num <= 0)
574                 return -1;
575
576         internal_config.force_sockets = 1;
577
578         /* parse each defined socket option */
579         errno = 0;
580         for (i = 0; i < arg_num; i++) {
581                 end = NULL;
582                 internal_config.socket_mem[i] = strtoull(arg[i], &end, 10);
583
584                 /* check for invalid input */
585                 if ((errno != 0)  ||
586                                 (arg[i][0] == '\0') || (end == NULL) || (*end != '\0'))
587                         return -1;
588                 internal_config.socket_mem[i] *= 1024ULL;
589                 internal_config.socket_mem[i] *= 1024ULL;
590                 total_mem += internal_config.socket_mem[i];
591         }
592
593         /* check if we have a positive amount of total memory */
594         if (total_mem == 0)
595                 return -1;
596
597         return 0;
598 }
599
600 static int
601 eal_parse_base_virtaddr(const char *arg)
602 {
603         char *end;
604         uint64_t addr;
605
606         errno = 0;
607         addr = strtoull(arg, &end, 16);
608
609         /* check for errors */
610         if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0'))
611                 return -1;
612
613         /* make sure we don't exceed 32-bit boundary on 32-bit target */
614 #ifndef RTE_ARCH_X86_64
615         if (addr >= UINTPTR_MAX)
616                 return -1;
617 #endif
618
619         /* align the addr on 2M boundary */
620         internal_config.base_virtaddr = RTE_PTR_ALIGN_CEIL((uintptr_t)addr,
621                                                            RTE_PGSIZE_2M);
622
623         return 0;
624 }
625
626 static int
627 eal_parse_vfio_intr(const char *mode)
628 {
629         unsigned i;
630         static struct {
631                 const char *name;
632                 enum rte_intr_mode value;
633         } map[] = {
634                 { "legacy", RTE_INTR_MODE_LEGACY },
635                 { "msi", RTE_INTR_MODE_MSI },
636                 { "msix", RTE_INTR_MODE_MSIX },
637         };
638
639         for (i = 0; i < RTE_DIM(map); i++) {
640                 if (!strcmp(mode, map[i].name)) {
641                         internal_config.vfio_intr_mode = map[i].value;
642                         return 0;
643                 }
644         }
645         return -1;
646 }
647
648 static inline size_t
649 eal_get_hugepage_mem_size(void)
650 {
651         uint64_t size = 0;
652         unsigned i, j;
653
654         for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
655                 struct hugepage_info *hpi = &internal_config.hugepage_info[i];
656                 if (hpi->hugedir != NULL) {
657                         for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
658                                 size += hpi->hugepage_sz * hpi->num_pages[j];
659                         }
660                 }
661         }
662
663         return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
664 }
665
666 static enum rte_proc_type_t
667 eal_parse_proc_type(const char *arg)
668 {
669         if (strncasecmp(arg, "primary", sizeof("primary")) == 0)
670                 return RTE_PROC_PRIMARY;
671         if (strncasecmp(arg, "secondary", sizeof("secondary")) == 0)
672                 return RTE_PROC_SECONDARY;
673         if (strncasecmp(arg, "auto", sizeof("auto")) == 0)
674                 return RTE_PROC_AUTO;
675
676         return RTE_PROC_INVALID;
677 }
678
679 /* Parse the argument given in the command line of the application */
680 static int
681 eal_parse_args(int argc, char **argv)
682 {
683         int opt, ret, i;
684         char **argvopt;
685         int option_index;
686         int coremask_ok = 0;
687         char *prgname = argv[0];
688         static struct option lgopts[] = {
689                 {OPT_NO_HUGE, 0, 0, 0},
690                 {OPT_NO_PCI, 0, 0, 0},
691                 {OPT_NO_HPET, 0, 0, 0},
692                 {OPT_VMWARE_TSC_MAP, 0, 0, 0},
693                 {OPT_HUGE_DIR, 1, 0, 0},
694                 {OPT_NO_SHCONF, 0, 0, 0},
695                 {OPT_PROC_TYPE, 1, 0, 0},
696                 {OPT_FILE_PREFIX, 1, 0, 0},
697                 {OPT_SOCKET_MEM, 1, 0, 0},
698                 {OPT_PCI_WHITELIST, 1, 0, 0},
699                 {OPT_PCI_BLACKLIST, 1, 0, 0},
700                 {OPT_VDEV, 1, 0, 0},
701                 {OPT_SYSLOG, 1, NULL, 0},
702                 {OPT_VFIO_INTR, 1, NULL, 0},
703                 {OPT_BASE_VIRTADDR, 1, 0, 0},
704                 {OPT_XEN_DOM0, 0, 0, 0},
705                 {OPT_CREATE_UIO_DEV, 1, NULL, 0},
706                 {0, 0, 0, 0}
707         };
708         struct shared_driver *solib;
709
710         argvopt = argv;
711
712         internal_config.memory = 0;
713         internal_config.force_nrank = 0;
714         internal_config.force_nchannel = 0;
715         internal_config.hugefile_prefix = HUGEFILE_PREFIX_DEFAULT;
716         internal_config.hugepage_dir = NULL;
717         internal_config.force_sockets = 0;
718         internal_config.syslog_facility = LOG_DAEMON;
719         internal_config.xen_dom0_support = 0;
720         /* if set to NONE, interrupt mode is determined automatically */
721         internal_config.vfio_intr_mode = RTE_INTR_MODE_NONE;
722 #ifdef RTE_LIBEAL_USE_HPET
723         internal_config.no_hpet = 0;
724 #else
725         internal_config.no_hpet = 1;
726 #endif
727         /* zero out the NUMA config */
728         for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
729                 internal_config.socket_mem[i] = 0;
730
731         /* zero out hugedir descriptors */
732         for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
733                 internal_config.hugepage_info[i].lock_descriptor = -1;
734
735         internal_config.vmware_tsc_map = 0;
736         internal_config.base_virtaddr = 0;
737
738         while ((opt = getopt_long(argc, argvopt, "b:w:c:d:m:n:r:v",
739                                   lgopts, &option_index)) != EOF) {
740
741                 switch (opt) {
742                 /* blacklist */
743                 case 'b':
744                         if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_PCI,
745                                         optarg) < 0) {
746                                 eal_usage(prgname);
747                                 return (-1);
748                         }
749                         break;
750                 /* whitelist */
751                 case 'w':
752                         if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_PCI,
753                                         optarg) < 0) {
754                                 eal_usage(prgname);
755                                 return -1;
756                         }
757                         break;
758                 /* coremask */
759                 case 'c':
760                         if (eal_parse_coremask(optarg) < 0) {
761                                 RTE_LOG(ERR, EAL, "invalid coremask\n");
762                                 eal_usage(prgname);
763                                 return -1;
764                         }
765                         coremask_ok = 1;
766                         break;
767                 /* force loading of external driver */
768                 case 'd':
769                         solib = malloc(sizeof(*solib));
770                         if (solib == NULL) {
771                                 RTE_LOG(ERR, EAL, "malloc(solib) failed\n");
772                                 return -1;
773                         }
774                         memset(solib, 0, sizeof(*solib));
775                         strncpy(solib->name, optarg, PATH_MAX-1);
776                         solib->name[PATH_MAX-1] = 0;
777                         TAILQ_INSERT_TAIL(&solib_list, solib, next);
778                         break;
779                 /* size of memory */
780                 case 'm':
781                         internal_config.memory = atoi(optarg);
782                         internal_config.memory *= 1024ULL;
783                         internal_config.memory *= 1024ULL;
784                         break;
785                 /* force number of channels */
786                 case 'n':
787                         internal_config.force_nchannel = atoi(optarg);
788                         if (internal_config.force_nchannel == 0 ||
789                             internal_config.force_nchannel > 4) {
790                                 RTE_LOG(ERR, EAL, "invalid channel number\n");
791                                 eal_usage(prgname);
792                                 return -1;
793                         }
794                         break;
795                 /* force number of ranks */
796                 case 'r':
797                         internal_config.force_nrank = atoi(optarg);
798                         if (internal_config.force_nrank == 0 ||
799                             internal_config.force_nrank > 16) {
800                                 RTE_LOG(ERR, EAL, "invalid rank number\n");
801                                 eal_usage(prgname);
802                                 return -1;
803                         }
804                         break;
805                 case 'v':
806                         /* since message is explicitly requested by user, we
807                          * write message at highest log level so it can always be seen
808                          * even if info or warning messages are disabled */
809                         RTE_LOG(CRIT, EAL, "RTE Version: '%s'\n", rte_version());
810                         break;
811
812                 /* long options */
813                 case 0:
814                         if (!strcmp(lgopts[option_index].name, OPT_NO_HUGE)) {
815                                 internal_config.no_hugetlbfs = 1;
816                         }
817                         if (!strcmp(lgopts[option_index].name, OPT_XEN_DOM0)) {
818                 #ifdef RTE_LIBRTE_XEN_DOM0
819                                 internal_config.xen_dom0_support = 1;
820                 #else
821                                 RTE_LOG(ERR, EAL, "Can't support DPDK app "
822                                         "running on Dom0, please configure"
823                                         " RTE_LIBRTE_XEN_DOM0=y\n");
824                                 return -1;
825                 #endif
826                         }
827                         else if (!strcmp(lgopts[option_index].name, OPT_NO_PCI)) {
828                                 internal_config.no_pci = 1;
829                         }
830                         else if (!strcmp(lgopts[option_index].name, OPT_NO_HPET)) {
831                                 internal_config.no_hpet = 1;
832                         }
833                         else if (!strcmp(lgopts[option_index].name, OPT_VMWARE_TSC_MAP)) {
834                                 internal_config.vmware_tsc_map = 1;
835                         }
836                         else if (!strcmp(lgopts[option_index].name, OPT_NO_SHCONF)) {
837                                 internal_config.no_shconf = 1;
838                         }
839                         else if (!strcmp(lgopts[option_index].name, OPT_HUGE_DIR)) {
840                                 internal_config.hugepage_dir = optarg;
841                         }
842                         else if (!strcmp(lgopts[option_index].name, OPT_PROC_TYPE)) {
843                                 internal_config.process_type = eal_parse_proc_type(optarg);
844                         }
845                         else if (!strcmp(lgopts[option_index].name, OPT_FILE_PREFIX)) {
846                                 internal_config.hugefile_prefix = optarg;
847                         }
848                         else if (!strcmp(lgopts[option_index].name, OPT_SOCKET_MEM)) {
849                                 if (eal_parse_socket_mem(optarg) < 0) {
850                                         RTE_LOG(ERR, EAL, "invalid parameters for --"
851                                                         OPT_SOCKET_MEM "\n");
852                                         eal_usage(prgname);
853                                         return -1;
854                                 }
855                         }
856                         else if (!strcmp(lgopts[option_index].name, OPT_USE_DEVICE)) {
857                                 printf("The --use-device option is deprecated, please use\n"
858                                         "--whitelist or --vdev instead.\n");
859                                 eal_usage(prgname);
860                                 return -1;
861                         }
862                         else if (!strcmp(lgopts[option_index].name, OPT_PCI_BLACKLIST)) {
863                                 if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_PCI,
864                                                 optarg) < 0) {
865                                         eal_usage(prgname);
866                                         return -1;
867                                 }
868                         }
869                         else if (!strcmp(lgopts[option_index].name, OPT_PCI_WHITELIST)) {
870                                 if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_PCI,
871                                                 optarg) < 0) {
872                                         eal_usage(prgname);
873                                         return -1;
874                                 }
875                         }
876                         else if (!strcmp(lgopts[option_index].name, OPT_VDEV)) {
877                                 if (rte_eal_devargs_add(RTE_DEVTYPE_VIRTUAL,
878                                                 optarg) < 0) {
879                                         eal_usage(prgname);
880                                         return -1;
881                                 }
882                         }
883                         else if (!strcmp(lgopts[option_index].name, OPT_SYSLOG)) {
884                                 if (eal_parse_syslog(optarg) < 0) {
885                                         RTE_LOG(ERR, EAL, "invalid parameters for --"
886                                                         OPT_SYSLOG "\n");
887                                         eal_usage(prgname);
888                                         return -1;
889                                 }
890                         }
891                         else if (!strcmp(lgopts[option_index].name, OPT_BASE_VIRTADDR)) {
892                                 if (eal_parse_base_virtaddr(optarg) < 0) {
893                                         RTE_LOG(ERR, EAL, "invalid parameter for --"
894                                                         OPT_BASE_VIRTADDR "\n");
895                                         eal_usage(prgname);
896                                         return -1;
897                                 }
898                         }
899                         else if (!strcmp(lgopts[option_index].name, OPT_VFIO_INTR)) {
900                                 if (eal_parse_vfio_intr(optarg) < 0) {
901                                         RTE_LOG(ERR, EAL, "invalid parameters for --"
902                                                         OPT_VFIO_INTR "\n");
903                                         eal_usage(prgname);
904                                         return -1;
905                                 }
906                         }
907                         else if (!strcmp(lgopts[option_index].name, OPT_CREATE_UIO_DEV)) {
908                                 internal_config.create_uio_dev = 1;
909                         }
910                         break;
911
912                 default:
913                         eal_usage(prgname);
914                         return -1;
915                 }
916         }
917
918         /* sanity checks */
919         if (!coremask_ok) {
920                 RTE_LOG(ERR, EAL, "coremask not specified\n");
921                 eal_usage(prgname);
922                 return -1;
923         }
924         if (internal_config.process_type == RTE_PROC_AUTO){
925                 internal_config.process_type = eal_proc_type_detect();
926         }
927         if (internal_config.process_type == RTE_PROC_INVALID){
928                 RTE_LOG(ERR, EAL, "Invalid process type specified\n");
929                 eal_usage(prgname);
930                 return -1;
931         }
932         if (internal_config.process_type == RTE_PROC_PRIMARY &&
933                         internal_config.force_nchannel == 0) {
934                 RTE_LOG(ERR, EAL, "Number of memory channels (-n) not specified\n");
935                 eal_usage(prgname);
936                 return -1;
937         }
938         if (index(internal_config.hugefile_prefix,'%') != NULL){
939                 RTE_LOG(ERR, EAL, "Invalid char, '%%', in '"OPT_FILE_PREFIX"' option\n");
940                 eal_usage(prgname);
941                 return -1;
942         }
943         if (internal_config.memory > 0 && internal_config.force_sockets == 1) {
944                 RTE_LOG(ERR, EAL, "Options -m and --socket-mem cannot be specified "
945                                 "at the same time\n");
946                 eal_usage(prgname);
947                 return -1;
948         }
949         /* --no-huge doesn't make sense with either -m or --socket-mem */
950         if (internal_config.no_hugetlbfs &&
951                         (internal_config.memory > 0 ||
952                                         internal_config.force_sockets == 1)) {
953                 RTE_LOG(ERR, EAL, "Options -m or --socket-mem cannot be specified "
954                                 "together with --no-huge!\n");
955                 eal_usage(prgname);
956                 return -1;
957         }
958         /* --xen-dom0 doesn't make sense with --socket-mem */
959         if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) {
960                 RTE_LOG(ERR, EAL, "Options --socket-mem cannot be specified "
961                                         "together with --xen_dom0!\n");
962                 eal_usage(prgname);
963                 return -1;
964         }
965
966         if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) != 0 &&
967                 rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_PCI) != 0) {
968                 RTE_LOG(ERR, EAL, "Error: blacklist [-b] and whitelist "
969                         "[-w] options cannot be used at the same time\n");
970                 eal_usage(prgname);
971                 return -1;
972         }
973
974         if (optind >= 0)
975                 argv[optind-1] = prgname;
976
977         /* if no memory amounts were requested, this will result in 0 and
978          * will be overriden later, right after eal_hugepage_info_init() */
979         for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
980                 internal_config.memory += internal_config.socket_mem[i];
981
982         ret = optind-1;
983         optind = 0; /* reset getopt lib */
984         return ret;
985 }
986
987 static void
988 eal_check_mem_on_local_socket(void)
989 {
990         const struct rte_memseg *ms;
991         int i, socket_id;
992
993         socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
994
995         ms = rte_eal_get_physmem_layout();
996
997         for (i = 0; i < RTE_MAX_MEMSEG; i++)
998                 if (ms[i].socket_id == socket_id &&
999                                 ms[i].len > 0)
1000                         return;
1001
1002         RTE_LOG(WARNING, EAL, "WARNING: Master core has no "
1003                         "memory on local socket!\n");
1004 }
1005
1006 static int
1007 sync_func(__attribute__((unused)) void *arg)
1008 {
1009         return 0;
1010 }
1011
1012 inline static void
1013 rte_eal_mcfg_complete(void)
1014 {
1015         /* ALL shared mem_config related INIT DONE */
1016         if (rte_config.process_type == RTE_PROC_PRIMARY)
1017                 rte_config.mem_config->magic = RTE_MAGIC;
1018 }
1019
1020 /*
1021  * Request iopl privilege for all RPL, returns 0 on success
1022  */
1023 static int
1024 rte_eal_iopl_init(void)
1025 {
1026         return iopl(HIGHEST_RPL);
1027 }
1028
1029 /* Launch threads, called at application init(). */
1030 int
1031 rte_eal_init(int argc, char **argv)
1032 {
1033         int i, fctret, ret;
1034         pthread_t thread_id;
1035         static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
1036         struct shared_driver *solib = NULL;
1037         const char *logid;
1038
1039         if (!rte_atomic32_test_and_set(&run_once))
1040                 return -1;
1041
1042         logid = strrchr(argv[0], '/');
1043         logid = strdup(logid ? logid + 1: argv[0]);
1044
1045         thread_id = pthread_self();
1046
1047         if (rte_eal_log_early_init() < 0)
1048                 rte_panic("Cannot init early logs\n");
1049
1050         if (rte_eal_cpu_init() < 0)
1051                 rte_panic("Cannot detect lcores\n");
1052
1053         fctret = eal_parse_args(argc, argv);
1054         if (fctret < 0)
1055                 exit(1);
1056
1057         if (internal_config.no_hugetlbfs == 0 &&
1058                         internal_config.process_type != RTE_PROC_SECONDARY &&
1059                         internal_config.xen_dom0_support == 0 &&
1060                         eal_hugepage_info_init() < 0)
1061                 rte_panic("Cannot get hugepage information\n");
1062
1063         if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
1064                 if (internal_config.no_hugetlbfs)
1065                         internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
1066                 else
1067                         internal_config.memory = eal_get_hugepage_mem_size();
1068         }
1069
1070         if (internal_config.vmware_tsc_map == 1) {
1071 #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT
1072                 rte_cycles_vmware_tsc_map = 1;
1073                 RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, "
1074                                 "you must have monitor_control.pseudo_perfctr = TRUE\n");
1075 #else
1076                 RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because "
1077                                 "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n");
1078 #endif
1079         }
1080
1081         rte_srand(rte_rdtsc());
1082
1083         rte_config_init();
1084
1085         if (rte_eal_iopl_init() == 0)
1086                 rte_config.flags |= EAL_FLG_HIGH_IOPL;
1087
1088         if (rte_eal_pci_init() < 0)
1089                 rte_panic("Cannot init PCI\n");
1090
1091 #ifdef RTE_LIBRTE_IVSHMEM
1092         if (rte_eal_ivshmem_init() < 0)
1093                 rte_panic("Cannot init IVSHMEM\n");
1094 #endif
1095
1096         if (rte_eal_memory_init() < 0)
1097                 rte_panic("Cannot init memory\n");
1098
1099         /* the directories are locked during eal_hugepage_info_init */
1100         eal_hugedirs_unlock();
1101
1102         if (rte_eal_memzone_init() < 0)
1103                 rte_panic("Cannot init memzone\n");
1104
1105         if (rte_eal_tailqs_init() < 0)
1106                 rte_panic("Cannot init tail queues for objects\n");
1107
1108 #ifdef RTE_LIBRTE_IVSHMEM
1109         if (rte_eal_ivshmem_obj_init() < 0)
1110                 rte_panic("Cannot init IVSHMEM objects\n");
1111 #endif
1112
1113         if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0)
1114                 rte_panic("Cannot init logs\n");
1115
1116         if (rte_eal_alarm_init() < 0)
1117                 rte_panic("Cannot init interrupt-handling thread\n");
1118
1119         if (rte_eal_intr_init() < 0)
1120                 rte_panic("Cannot init interrupt-handling thread\n");
1121
1122         if (rte_eal_timer_init() < 0)
1123                 rte_panic("Cannot init HPET or TSC timers\n");
1124
1125         eal_check_mem_on_local_socket();
1126
1127         rte_eal_mcfg_complete();
1128
1129         TAILQ_FOREACH(solib, &solib_list, next) {
1130                 RTE_LOG(INFO, EAL, "open shared lib %s\n", solib->name);
1131                 solib->lib_handle = dlopen(solib->name, RTLD_NOW);
1132                 if (solib->lib_handle == NULL)
1133                         RTE_LOG(WARNING, EAL, "%s\n", dlerror());
1134         }
1135
1136         eal_thread_init_master(rte_config.master_lcore);
1137
1138         RTE_LOG(DEBUG, EAL, "Master core %u is ready (tid=%x)\n",
1139                 rte_config.master_lcore, (int)thread_id);
1140
1141         if (rte_eal_dev_init(PMD_INIT_PRE_PCI_PROBE) < 0)
1142                 rte_panic("Cannot init pmd devices\n");
1143
1144         RTE_LCORE_FOREACH_SLAVE(i) {
1145
1146                 /*
1147                  * create communication pipes between master thread
1148                  * and children
1149                  */
1150                 if (pipe(lcore_config[i].pipe_master2slave) < 0)
1151                         rte_panic("Cannot create pipe\n");
1152                 if (pipe(lcore_config[i].pipe_slave2master) < 0)
1153                         rte_panic("Cannot create pipe\n");
1154
1155                 lcore_config[i].state = WAIT;
1156
1157                 /* create a thread for each lcore */
1158                 ret = pthread_create(&lcore_config[i].thread_id, NULL,
1159                                      eal_thread_loop, NULL);
1160                 if (ret != 0)
1161                         rte_panic("Cannot create thread\n");
1162         }
1163
1164         /*
1165          * Launch a dummy function on all slave lcores, so that master lcore
1166          * knows they are all ready when this function returns.
1167          */
1168         rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
1169         rte_eal_mp_wait_lcore();
1170
1171         /* Probe & Initialize PCI devices */
1172         if (rte_eal_pci_probe())
1173                         rte_panic("Cannot probe PCI\n");
1174
1175         /* Initialize any outstanding devices */
1176         if (rte_eal_dev_init(PMD_INIT_POST_PCI_PROBE) < 0)
1177                 rte_panic("Cannot init pmd devices\n");
1178
1179         return fctret;
1180 }
1181
1182 /* get core role */
1183 enum rte_lcore_role_t
1184 rte_eal_lcore_role(unsigned lcore_id)
1185 {
1186         return (rte_config.lcore_role[lcore_id]);
1187 }
1188
1189 enum rte_proc_type_t
1190 rte_eal_process_type(void)
1191 {
1192         return (rte_config.process_type);
1193 }
1194
1195 int rte_eal_has_hugepages(void)
1196 {
1197         return ! internal_config.no_hugetlbfs;
1198 }