examples/l3fwd: merge l3fwd-acl example
[dpdk.git] / lib / eal / linux / eal_memory.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation.
3  * Copyright(c) 2013 6WIND S.A.
4  */
5
6 #include <errno.h>
7 #include <fcntl.h>
8 #include <stdbool.h>
9 #include <stdlib.h>
10 #include <stdio.h>
11 #include <stdint.h>
12 #include <inttypes.h>
13 #include <string.h>
14 #include <sys/mman.h>
15 #include <sys/stat.h>
16 #include <sys/file.h>
17 #include <sys/resource.h>
18 #include <unistd.h>
19 #include <limits.h>
20 #include <signal.h>
21 #include <setjmp.h>
22 #ifdef F_ADD_SEALS /* if file sealing is supported, so is memfd */
23 #define MEMFD_SUPPORTED
24 #endif
25 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
26 #include <numa.h>
27 #include <numaif.h>
28 #endif
29
30 #include <rte_errno.h>
31 #include <rte_log.h>
32 #include <rte_memory.h>
33 #include <rte_eal.h>
34 #include <rte_lcore.h>
35 #include <rte_common.h>
36
37 #include "eal_private.h"
38 #include "eal_memalloc.h"
39 #include "eal_memcfg.h"
40 #include "eal_internal_cfg.h"
41 #include "eal_filesystem.h"
42 #include "eal_hugepages.h"
43 #include "eal_options.h"
44
45 #define PFN_MASK_SIZE   8
46
47 /**
48  * @file
49  * Huge page mapping under linux
50  *
51  * To reserve a big contiguous amount of memory, we use the hugepage
52  * feature of linux. For that, we need to have hugetlbfs mounted. This
53  * code will create many files in this directory (one per page) and
54  * map them in virtual memory. For each page, we will retrieve its
55  * physical address and remap it in order to have a virtual contiguous
56  * zone as well as a physical contiguous zone.
57  */
58
59 static int phys_addrs_available = -1;
60
61 #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
62
63 uint64_t eal_get_baseaddr(void)
64 {
65         /*
66          * Linux kernel uses a really high address as starting address for
67          * serving mmaps calls. If there exists addressing limitations and IOVA
68          * mode is VA, this starting address is likely too high for those
69          * devices. However, it is possible to use a lower address in the
70          * process virtual address space as with 64 bits there is a lot of
71          * available space.
72          *
73          * Current known limitations are 39 or 40 bits. Setting the starting
74          * address at 4GB implies there are 508GB or 1020GB for mapping the
75          * available hugepages. This is likely enough for most systems, although
76          * a device with addressing limitations should call
77          * rte_mem_check_dma_mask for ensuring all memory is within supported
78          * range.
79          */
80         return 0x100000000ULL;
81 }
82
83 /*
84  * Get physical address of any mapped virtual address in the current process.
85  */
86 phys_addr_t
87 rte_mem_virt2phy(const void *virtaddr)
88 {
89         int fd, retval;
90         uint64_t page, physaddr;
91         unsigned long virt_pfn;
92         int page_size;
93         off_t offset;
94
95         if (phys_addrs_available == 0)
96                 return RTE_BAD_IOVA;
97
98         /* standard page size */
99         page_size = getpagesize();
100
101         fd = open("/proc/self/pagemap", O_RDONLY);
102         if (fd < 0) {
103                 RTE_LOG(INFO, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
104                         __func__, strerror(errno));
105                 return RTE_BAD_IOVA;
106         }
107
108         virt_pfn = (unsigned long)virtaddr / page_size;
109         offset = sizeof(uint64_t) * virt_pfn;
110         if (lseek(fd, offset, SEEK_SET) == (off_t) -1) {
111                 RTE_LOG(INFO, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
112                                 __func__, strerror(errno));
113                 close(fd);
114                 return RTE_BAD_IOVA;
115         }
116
117         retval = read(fd, &page, PFN_MASK_SIZE);
118         close(fd);
119         if (retval < 0) {
120                 RTE_LOG(INFO, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
121                                 __func__, strerror(errno));
122                 return RTE_BAD_IOVA;
123         } else if (retval != PFN_MASK_SIZE) {
124                 RTE_LOG(INFO, EAL, "%s(): read %d bytes from /proc/self/pagemap "
125                                 "but expected %d:\n",
126                                 __func__, retval, PFN_MASK_SIZE);
127                 return RTE_BAD_IOVA;
128         }
129
130         /*
131          * the pfn (page frame number) are bits 0-54 (see
132          * pagemap.txt in linux Documentation)
133          */
134         if ((page & 0x7fffffffffffffULL) == 0)
135                 return RTE_BAD_IOVA;
136
137         physaddr = ((page & 0x7fffffffffffffULL) * page_size)
138                 + ((unsigned long)virtaddr % page_size);
139
140         return physaddr;
141 }
142
143 rte_iova_t
144 rte_mem_virt2iova(const void *virtaddr)
145 {
146         if (rte_eal_iova_mode() == RTE_IOVA_VA)
147                 return (uintptr_t)virtaddr;
148         return rte_mem_virt2phy(virtaddr);
149 }
150
151 /*
152  * For each hugepage in hugepg_tbl, fill the physaddr value. We find
153  * it by browsing the /proc/self/pagemap special file.
154  */
155 static int
156 find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
157 {
158         unsigned int i;
159         phys_addr_t addr;
160
161         for (i = 0; i < hpi->num_pages[0]; i++) {
162                 addr = rte_mem_virt2phy(hugepg_tbl[i].orig_va);
163                 if (addr == RTE_BAD_PHYS_ADDR)
164                         return -1;
165                 hugepg_tbl[i].physaddr = addr;
166         }
167         return 0;
168 }
169
170 /*
171  * For each hugepage in hugepg_tbl, fill the physaddr value sequentially.
172  */
173 static int
174 set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
175 {
176         unsigned int i;
177         static phys_addr_t addr;
178
179         for (i = 0; i < hpi->num_pages[0]; i++) {
180                 hugepg_tbl[i].physaddr = addr;
181                 addr += hugepg_tbl[i].size;
182         }
183         return 0;
184 }
185
186 /*
187  * Check whether address-space layout randomization is enabled in
188  * the kernel. This is important for multi-process as it can prevent
189  * two processes mapping data to the same virtual address
190  * Returns:
191  *    0 - address space randomization disabled
192  *    1/2 - address space randomization enabled
193  *    negative error code on error
194  */
195 static int
196 aslr_enabled(void)
197 {
198         char c;
199         int retval, fd = open(RANDOMIZE_VA_SPACE_FILE, O_RDONLY);
200         if (fd < 0)
201                 return -errno;
202         retval = read(fd, &c, 1);
203         close(fd);
204         if (retval < 0)
205                 return -errno;
206         if (retval == 0)
207                 return -EIO;
208         switch (c) {
209                 case '0' : return 0;
210                 case '1' : return 1;
211                 case '2' : return 2;
212                 default: return -EINVAL;
213         }
214 }
215
216 static sigjmp_buf huge_jmpenv;
217
218 static void huge_sigbus_handler(int signo __rte_unused)
219 {
220         siglongjmp(huge_jmpenv, 1);
221 }
222
223 /* Put setjmp into a wrap method to avoid compiling error. Any non-volatile,
224  * non-static local variable in the stack frame calling sigsetjmp might be
225  * clobbered by a call to longjmp.
226  */
227 static int huge_wrap_sigsetjmp(void)
228 {
229         return sigsetjmp(huge_jmpenv, 1);
230 }
231
232 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
233 /* Callback for numa library. */
234 void numa_error(char *where)
235 {
236         RTE_LOG(ERR, EAL, "%s failed: %s\n", where, strerror(errno));
237 }
238 #endif
239
240 /*
241  * Mmap all hugepages of hugepage table: it first open a file in
242  * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the
243  * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored
244  * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to
245  * map contiguous physical blocks in contiguous virtual blocks.
246  */
247 static unsigned
248 map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
249                   uint64_t *essential_memory __rte_unused)
250 {
251         int fd;
252         unsigned i;
253         void *virtaddr;
254 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
255         int node_id = -1;
256         int essential_prev = 0;
257         int oldpolicy;
258         struct bitmask *oldmask = NULL;
259         bool have_numa = true;
260         unsigned long maxnode = 0;
261         const struct internal_config *internal_conf =
262                 eal_get_internal_configuration();
263
264         /* Check if kernel supports NUMA. */
265         if (numa_available() != 0) {
266                 RTE_LOG(DEBUG, EAL, "NUMA is not supported.\n");
267                 have_numa = false;
268         }
269
270         if (have_numa) {
271                 RTE_LOG(DEBUG, EAL, "Trying to obtain current memory policy.\n");
272                 oldmask = numa_allocate_nodemask();
273                 if (get_mempolicy(&oldpolicy, oldmask->maskp,
274                                   oldmask->size + 1, 0, 0) < 0) {
275                         RTE_LOG(ERR, EAL,
276                                 "Failed to get current mempolicy: %s. "
277                                 "Assuming MPOL_DEFAULT.\n", strerror(errno));
278                         oldpolicy = MPOL_DEFAULT;
279                 }
280                 for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
281                         if (internal_conf->socket_mem[i])
282                                 maxnode = i + 1;
283         }
284 #endif
285
286         for (i = 0; i < hpi->num_pages[0]; i++) {
287                 struct hugepage_file *hf = &hugepg_tbl[i];
288                 uint64_t hugepage_sz = hpi->hugepage_sz;
289
290 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
291                 if (maxnode) {
292                         unsigned int j;
293
294                         for (j = 0; j < maxnode; j++)
295                                 if (essential_memory[j])
296                                         break;
297
298                         if (j == maxnode) {
299                                 node_id = (node_id + 1) % maxnode;
300                                 while (!internal_conf->socket_mem[node_id]) {
301                                         node_id++;
302                                         node_id %= maxnode;
303                                 }
304                                 essential_prev = 0;
305                         } else {
306                                 node_id = j;
307                                 essential_prev = essential_memory[j];
308
309                                 if (essential_memory[j] < hugepage_sz)
310                                         essential_memory[j] = 0;
311                                 else
312                                         essential_memory[j] -= hugepage_sz;
313                         }
314
315                         RTE_LOG(DEBUG, EAL,
316                                 "Setting policy MPOL_PREFERRED for socket %d\n",
317                                 node_id);
318                         numa_set_preferred(node_id);
319                 }
320 #endif
321
322                 hf->file_id = i;
323                 hf->size = hugepage_sz;
324                 eal_get_hugefile_path(hf->filepath, sizeof(hf->filepath),
325                                 hpi->hugedir, hf->file_id);
326                 hf->filepath[sizeof(hf->filepath) - 1] = '\0';
327
328                 /* try to create hugepage file */
329                 fd = open(hf->filepath, O_CREAT | O_RDWR, 0600);
330                 if (fd < 0) {
331                         RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__,
332                                         strerror(errno));
333                         goto out;
334                 }
335
336                 /* map the segment, and populate page tables,
337                  * the kernel fills this segment with zeros. we don't care where
338                  * this gets mapped - we already have contiguous memory areas
339                  * ready for us to map into.
340                  */
341                 virtaddr = mmap(NULL, hugepage_sz, PROT_READ | PROT_WRITE,
342                                 MAP_SHARED | MAP_POPULATE, fd, 0);
343                 if (virtaddr == MAP_FAILED) {
344                         RTE_LOG(DEBUG, EAL, "%s(): mmap failed: %s\n", __func__,
345                                         strerror(errno));
346                         close(fd);
347                         goto out;
348                 }
349
350                 hf->orig_va = virtaddr;
351
352                 /* In linux, hugetlb limitations, like cgroup, are
353                  * enforced at fault time instead of mmap(), even
354                  * with the option of MAP_POPULATE. Kernel will send
355                  * a SIGBUS signal. To avoid to be killed, save stack
356                  * environment here, if SIGBUS happens, we can jump
357                  * back here.
358                  */
359                 if (huge_wrap_sigsetjmp()) {
360                         RTE_LOG(DEBUG, EAL, "SIGBUS: Cannot mmap more "
361                                 "hugepages of size %u MB\n",
362                                 (unsigned int)(hugepage_sz / 0x100000));
363                         munmap(virtaddr, hugepage_sz);
364                         close(fd);
365                         unlink(hugepg_tbl[i].filepath);
366 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
367                         if (maxnode)
368                                 essential_memory[node_id] =
369                                         essential_prev;
370 #endif
371                         goto out;
372                 }
373                 *(int *)virtaddr = 0;
374
375                 /* set shared lock on the file. */
376                 if (flock(fd, LOCK_SH) < 0) {
377                         RTE_LOG(DEBUG, EAL, "%s(): Locking file failed:%s \n",
378                                 __func__, strerror(errno));
379                         close(fd);
380                         goto out;
381                 }
382
383                 close(fd);
384         }
385
386 out:
387 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
388         if (maxnode) {
389                 RTE_LOG(DEBUG, EAL,
390                         "Restoring previous memory policy: %d\n", oldpolicy);
391                 if (oldpolicy == MPOL_DEFAULT) {
392                         numa_set_localalloc();
393                 } else if (set_mempolicy(oldpolicy, oldmask->maskp,
394                                          oldmask->size + 1) < 0) {
395                         RTE_LOG(ERR, EAL, "Failed to restore mempolicy: %s\n",
396                                 strerror(errno));
397                         numa_set_localalloc();
398                 }
399         }
400         if (oldmask != NULL)
401                 numa_free_cpumask(oldmask);
402 #endif
403         return i;
404 }
405
406 /*
407  * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge
408  * page.
409  */
410 static int
411 find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
412 {
413         int socket_id;
414         char *end, *nodestr;
415         unsigned i, hp_count = 0;
416         uint64_t virt_addr;
417         char buf[BUFSIZ];
418         char hugedir_str[PATH_MAX];
419         FILE *f;
420
421         f = fopen("/proc/self/numa_maps", "r");
422         if (f == NULL) {
423                 RTE_LOG(NOTICE, EAL, "NUMA support not available"
424                         " consider that all memory is in socket_id 0\n");
425                 return 0;
426         }
427
428         snprintf(hugedir_str, sizeof(hugedir_str),
429                         "%s/%s", hpi->hugedir, eal_get_hugefile_prefix());
430
431         /* parse numa map */
432         while (fgets(buf, sizeof(buf), f) != NULL) {
433
434                 /* ignore non huge page */
435                 if (strstr(buf, " huge ") == NULL &&
436                                 strstr(buf, hugedir_str) == NULL)
437                         continue;
438
439                 /* get zone addr */
440                 virt_addr = strtoull(buf, &end, 16);
441                 if (virt_addr == 0 || end == buf) {
442                         RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
443                         goto error;
444                 }
445
446                 /* get node id (socket id) */
447                 nodestr = strstr(buf, " N");
448                 if (nodestr == NULL) {
449                         RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
450                         goto error;
451                 }
452                 nodestr += 2;
453                 end = strstr(nodestr, "=");
454                 if (end == NULL) {
455                         RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
456                         goto error;
457                 }
458                 end[0] = '\0';
459                 end = NULL;
460
461                 socket_id = strtoul(nodestr, &end, 0);
462                 if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) {
463                         RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__);
464                         goto error;
465                 }
466
467                 /* if we find this page in our mappings, set socket_id */
468                 for (i = 0; i < hpi->num_pages[0]; i++) {
469                         void *va = (void *)(unsigned long)virt_addr;
470                         if (hugepg_tbl[i].orig_va == va) {
471                                 hugepg_tbl[i].socket_id = socket_id;
472                                 hp_count++;
473 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
474                                 RTE_LOG(DEBUG, EAL,
475                                         "Hugepage %s is on socket %d\n",
476                                         hugepg_tbl[i].filepath, socket_id);
477 #endif
478                         }
479                 }
480         }
481
482         if (hp_count < hpi->num_pages[0])
483                 goto error;
484
485         fclose(f);
486         return 0;
487
488 error:
489         fclose(f);
490         return -1;
491 }
492
493 static int
494 cmp_physaddr(const void *a, const void *b)
495 {
496 #ifndef RTE_ARCH_PPC_64
497         const struct hugepage_file *p1 = a;
498         const struct hugepage_file *p2 = b;
499 #else
500         /* PowerPC needs memory sorted in reverse order from x86 */
501         const struct hugepage_file *p1 = b;
502         const struct hugepage_file *p2 = a;
503 #endif
504         if (p1->physaddr < p2->physaddr)
505                 return -1;
506         else if (p1->physaddr > p2->physaddr)
507                 return 1;
508         else
509                 return 0;
510 }
511
512 /*
513  * Uses mmap to create a shared memory area for storage of data
514  * Used in this file to store the hugepage file map on disk
515  */
516 static void *
517 create_shared_memory(const char *filename, const size_t mem_size)
518 {
519         void *retval;
520         int fd;
521         const struct internal_config *internal_conf =
522                 eal_get_internal_configuration();
523
524         /* if no shared files mode is used, create anonymous memory instead */
525         if (internal_conf->no_shconf) {
526                 retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE,
527                                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
528                 if (retval == MAP_FAILED)
529                         return NULL;
530                 return retval;
531         }
532
533         fd = open(filename, O_CREAT | O_RDWR, 0600);
534         if (fd < 0)
535                 return NULL;
536         if (ftruncate(fd, mem_size) < 0) {
537                 close(fd);
538                 return NULL;
539         }
540         retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
541         close(fd);
542         if (retval == MAP_FAILED)
543                 return NULL;
544         return retval;
545 }
546
547 /*
548  * this copies *active* hugepages from one hugepage table to another.
549  * destination is typically the shared memory.
550  */
551 static int
552 copy_hugepages_to_shared_mem(struct hugepage_file * dst, int dest_size,
553                 const struct hugepage_file * src, int src_size)
554 {
555         int src_pos, dst_pos = 0;
556
557         for (src_pos = 0; src_pos < src_size; src_pos++) {
558                 if (src[src_pos].orig_va != NULL) {
559                         /* error on overflow attempt */
560                         if (dst_pos == dest_size)
561                                 return -1;
562                         memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage_file));
563                         dst_pos++;
564                 }
565         }
566         return 0;
567 }
568
569 static int
570 unlink_hugepage_files(struct hugepage_file *hugepg_tbl,
571                 unsigned num_hp_info)
572 {
573         unsigned socket, size;
574         int page, nrpages = 0;
575         const struct internal_config *internal_conf =
576                 eal_get_internal_configuration();
577
578         /* get total number of hugepages */
579         for (size = 0; size < num_hp_info; size++)
580                 for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++)
581                         nrpages +=
582                         internal_conf->hugepage_info[size].num_pages[socket];
583
584         for (page = 0; page < nrpages; page++) {
585                 struct hugepage_file *hp = &hugepg_tbl[page];
586
587                 if (hp->orig_va != NULL && unlink(hp->filepath)) {
588                         RTE_LOG(WARNING, EAL, "%s(): Removing %s failed: %s\n",
589                                 __func__, hp->filepath, strerror(errno));
590                 }
591         }
592         return 0;
593 }
594
595 /*
596  * unmaps hugepages that are not going to be used. since we originally allocate
597  * ALL hugepages (not just those we need), additional unmapping needs to be done.
598  */
599 static int
600 unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl,
601                 struct hugepage_info *hpi,
602                 unsigned num_hp_info)
603 {
604         unsigned socket, size;
605         int page, nrpages = 0;
606         const struct internal_config *internal_conf =
607                 eal_get_internal_configuration();
608
609         /* get total number of hugepages */
610         for (size = 0; size < num_hp_info; size++)
611                 for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++)
612                         nrpages += internal_conf->hugepage_info[size].num_pages[socket];
613
614         for (size = 0; size < num_hp_info; size++) {
615                 for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
616                         unsigned pages_found = 0;
617
618                         /* traverse until we have unmapped all the unused pages */
619                         for (page = 0; page < nrpages; page++) {
620                                 struct hugepage_file *hp = &hugepg_tbl[page];
621
622                                 /* find a page that matches the criteria */
623                                 if ((hp->size == hpi[size].hugepage_sz) &&
624                                                 (hp->socket_id == (int) socket)) {
625
626                                         /* if we skipped enough pages, unmap the rest */
627                                         if (pages_found == hpi[size].num_pages[socket]) {
628                                                 uint64_t unmap_len;
629
630                                                 unmap_len = hp->size;
631
632                                                 /* get start addr and len of the remaining segment */
633                                                 munmap(hp->orig_va,
634                                                         (size_t)unmap_len);
635
636                                                 hp->orig_va = NULL;
637                                                 if (unlink(hp->filepath) == -1) {
638                                                         RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n",
639                                                                         __func__, hp->filepath, strerror(errno));
640                                                         return -1;
641                                                 }
642                                         } else {
643                                                 /* lock the page and skip */
644                                                 pages_found++;
645                                         }
646
647                                 } /* match page */
648                         } /* foreach page */
649                 } /* foreach socket */
650         } /* foreach pagesize */
651
652         return 0;
653 }
654
655 static int
656 remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
657 {
658         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
659         struct rte_memseg_list *msl;
660         struct rte_fbarray *arr;
661         int cur_page, seg_len;
662         unsigned int msl_idx;
663         int ms_idx;
664         uint64_t page_sz;
665         size_t memseg_len;
666         int socket_id;
667 #ifndef RTE_ARCH_64
668         const struct internal_config *internal_conf =
669                 eal_get_internal_configuration();
670 #endif
671         page_sz = hugepages[seg_start].size;
672         socket_id = hugepages[seg_start].socket_id;
673         seg_len = seg_end - seg_start;
674
675         RTE_LOG(DEBUG, EAL, "Attempting to map %" PRIu64 "M on socket %i\n",
676                         (seg_len * page_sz) >> 20ULL, socket_id);
677
678         /* find free space in memseg lists */
679         for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
680                 bool empty;
681                 msl = &mcfg->memsegs[msl_idx];
682                 arr = &msl->memseg_arr;
683
684                 if (msl->page_sz != page_sz)
685                         continue;
686                 if (msl->socket_id != socket_id)
687                         continue;
688
689                 /* leave space for a hole if array is not empty */
690                 empty = arr->count == 0;
691                 ms_idx = rte_fbarray_find_next_n_free(arr, 0,
692                                 seg_len + (empty ? 0 : 1));
693
694                 /* memseg list is full? */
695                 if (ms_idx < 0)
696                         continue;
697
698                 /* leave some space between memsegs, they are not IOVA
699                  * contiguous, so they shouldn't be VA contiguous either.
700                  */
701                 if (!empty)
702                         ms_idx++;
703                 break;
704         }
705         if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
706                 RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
707                                 RTE_STR(RTE_MAX_MEMSEG_PER_TYPE),
708                                 RTE_STR(RTE_MAX_MEM_MB_PER_TYPE));
709                 return -1;
710         }
711
712 #ifdef RTE_ARCH_PPC_64
713         /* for PPC64 we go through the list backwards */
714         for (cur_page = seg_end - 1; cur_page >= seg_start;
715                         cur_page--, ms_idx++) {
716 #else
717         for (cur_page = seg_start; cur_page < seg_end; cur_page++, ms_idx++) {
718 #endif
719                 struct hugepage_file *hfile = &hugepages[cur_page];
720                 struct rte_memseg *ms = rte_fbarray_get(arr, ms_idx);
721                 void *addr;
722                 int fd;
723
724                 fd = open(hfile->filepath, O_RDWR);
725                 if (fd < 0) {
726                         RTE_LOG(ERR, EAL, "Could not open '%s': %s\n",
727                                         hfile->filepath, strerror(errno));
728                         return -1;
729                 }
730                 /* set shared lock on the file. */
731                 if (flock(fd, LOCK_SH) < 0) {
732                         RTE_LOG(DEBUG, EAL, "Could not lock '%s': %s\n",
733                                         hfile->filepath, strerror(errno));
734                         close(fd);
735                         return -1;
736                 }
737                 memseg_len = (size_t)page_sz;
738                 addr = RTE_PTR_ADD(msl->base_va, ms_idx * memseg_len);
739
740                 /* we know this address is already mmapped by memseg list, so
741                  * using MAP_FIXED here is safe
742                  */
743                 addr = mmap(addr, page_sz, PROT_READ | PROT_WRITE,
744                                 MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd, 0);
745                 if (addr == MAP_FAILED) {
746                         RTE_LOG(ERR, EAL, "Couldn't remap '%s': %s\n",
747                                         hfile->filepath, strerror(errno));
748                         close(fd);
749                         return -1;
750                 }
751
752                 /* we have a new address, so unmap previous one */
753 #ifndef RTE_ARCH_64
754                 /* in 32-bit legacy mode, we have already unmapped the page */
755                 if (!internal_conf->legacy_mem)
756                         munmap(hfile->orig_va, page_sz);
757 #else
758                 munmap(hfile->orig_va, page_sz);
759 #endif
760
761                 hfile->orig_va = NULL;
762                 hfile->final_va = addr;
763
764                 /* rewrite physical addresses in IOVA as VA mode */
765                 if (rte_eal_iova_mode() == RTE_IOVA_VA)
766                         hfile->physaddr = (uintptr_t)addr;
767
768                 /* set up memseg data */
769                 ms->addr = addr;
770                 ms->hugepage_sz = page_sz;
771                 ms->len = memseg_len;
772                 ms->iova = hfile->physaddr;
773                 ms->socket_id = hfile->socket_id;
774                 ms->nchannel = rte_memory_get_nchannel();
775                 ms->nrank = rte_memory_get_nrank();
776
777                 rte_fbarray_set_used(arr, ms_idx);
778
779                 /* store segment fd internally */
780                 if (eal_memalloc_set_seg_fd(msl_idx, ms_idx, fd) < 0)
781                         RTE_LOG(ERR, EAL, "Could not store segment fd: %s\n",
782                                 rte_strerror(rte_errno));
783         }
784         RTE_LOG(DEBUG, EAL, "Allocated %" PRIu64 "M on socket %i\n",
785                         (seg_len * page_sz) >> 20, socket_id);
786         return 0;
787 }
788
789 static uint64_t
790 get_mem_amount(uint64_t page_sz, uint64_t max_mem)
791 {
792         uint64_t area_sz, max_pages;
793
794         /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
795         max_pages = RTE_MAX_MEMSEG_PER_LIST;
796         max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
797
798         area_sz = RTE_MIN(page_sz * max_pages, max_mem);
799
800         /* make sure the list isn't smaller than the page size */
801         area_sz = RTE_MAX(area_sz, page_sz);
802
803         return RTE_ALIGN(area_sz, page_sz);
804 }
805
806 static int
807 memseg_list_free(struct rte_memseg_list *msl)
808 {
809         if (rte_fbarray_destroy(&msl->memseg_arr)) {
810                 RTE_LOG(ERR, EAL, "Cannot destroy memseg list\n");
811                 return -1;
812         }
813         memset(msl, 0, sizeof(*msl));
814         return 0;
815 }
816
817 /*
818  * Our VA space is not preallocated yet, so preallocate it here. We need to know
819  * how many segments there are in order to map all pages into one address space,
820  * and leave appropriate holes between segments so that rte_malloc does not
821  * concatenate them into one big segment.
822  *
823  * we also need to unmap original pages to free up address space.
824  */
825 static int __rte_unused
826 prealloc_segments(struct hugepage_file *hugepages, int n_pages)
827 {
828         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
829         int cur_page, seg_start_page, end_seg, new_memseg;
830         unsigned int hpi_idx, socket, i;
831         int n_contig_segs, n_segs;
832         int msl_idx;
833         const struct internal_config *internal_conf =
834                 eal_get_internal_configuration();
835
836         /* before we preallocate segments, we need to free up our VA space.
837          * we're not removing files, and we already have information about
838          * PA-contiguousness, so it is safe to unmap everything.
839          */
840         for (cur_page = 0; cur_page < n_pages; cur_page++) {
841                 struct hugepage_file *hpi = &hugepages[cur_page];
842                 munmap(hpi->orig_va, hpi->size);
843                 hpi->orig_va = NULL;
844         }
845
846         /* we cannot know how many page sizes and sockets we have discovered, so
847          * loop over all of them
848          */
849         for (hpi_idx = 0; hpi_idx < internal_conf->num_hugepage_sizes;
850                         hpi_idx++) {
851                 uint64_t page_sz =
852                         internal_conf->hugepage_info[hpi_idx].hugepage_sz;
853
854                 for (i = 0; i < rte_socket_count(); i++) {
855                         struct rte_memseg_list *msl;
856
857                         socket = rte_socket_id_by_idx(i);
858                         n_contig_segs = 0;
859                         n_segs = 0;
860                         seg_start_page = -1;
861
862                         for (cur_page = 0; cur_page < n_pages; cur_page++) {
863                                 struct hugepage_file *prev, *cur;
864                                 int prev_seg_start_page = -1;
865
866                                 cur = &hugepages[cur_page];
867                                 prev = cur_page == 0 ? NULL :
868                                                 &hugepages[cur_page - 1];
869
870                                 new_memseg = 0;
871                                 end_seg = 0;
872
873                                 if (cur->size == 0)
874                                         end_seg = 1;
875                                 else if (cur->socket_id != (int) socket)
876                                         end_seg = 1;
877                                 else if (cur->size != page_sz)
878                                         end_seg = 1;
879                                 else if (cur_page == 0)
880                                         new_memseg = 1;
881 #ifdef RTE_ARCH_PPC_64
882                                 /* On PPC64 architecture, the mmap always start
883                                  * from higher address to lower address. Here,
884                                  * physical addresses are in descending order.
885                                  */
886                                 else if ((prev->physaddr - cur->physaddr) !=
887                                                 cur->size)
888                                         new_memseg = 1;
889 #else
890                                 else if ((cur->physaddr - prev->physaddr) !=
891                                                 cur->size)
892                                         new_memseg = 1;
893 #endif
894                                 if (new_memseg) {
895                                         /* if we're already inside a segment,
896                                          * new segment means end of current one
897                                          */
898                                         if (seg_start_page != -1) {
899                                                 end_seg = 1;
900                                                 prev_seg_start_page =
901                                                                 seg_start_page;
902                                         }
903                                         seg_start_page = cur_page;
904                                 }
905
906                                 if (end_seg) {
907                                         if (prev_seg_start_page != -1) {
908                                                 /* we've found a new segment */
909                                                 n_contig_segs++;
910                                                 n_segs += cur_page -
911                                                         prev_seg_start_page;
912                                         } else if (seg_start_page != -1) {
913                                                 /* we didn't find new segment,
914                                                  * but did end current one
915                                                  */
916                                                 n_contig_segs++;
917                                                 n_segs += cur_page -
918                                                                 seg_start_page;
919                                                 seg_start_page = -1;
920                                                 continue;
921                                         } else {
922                                                 /* we're skipping this page */
923                                                 continue;
924                                         }
925                                 }
926                                 /* segment continues */
927                         }
928                         /* check if we missed last segment */
929                         if (seg_start_page != -1) {
930                                 n_contig_segs++;
931                                 n_segs += cur_page - seg_start_page;
932                         }
933
934                         /* if no segments were found, do not preallocate */
935                         if (n_segs == 0)
936                                 continue;
937
938                         /* we now have total number of pages that we will
939                          * allocate for this segment list. add separator pages
940                          * to the total count, and preallocate VA space.
941                          */
942                         n_segs += n_contig_segs - 1;
943
944                         /* now, preallocate VA space for these segments */
945
946                         /* first, find suitable memseg list for this */
947                         for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
948                                         msl_idx++) {
949                                 msl = &mcfg->memsegs[msl_idx];
950
951                                 if (msl->base_va != NULL)
952                                         continue;
953                                 break;
954                         }
955                         if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
956                                 RTE_LOG(ERR, EAL, "Not enough space in memseg lists, please increase %s\n",
957                                         RTE_STR(RTE_MAX_MEMSEG_LISTS));
958                                 return -1;
959                         }
960
961                         /* now, allocate fbarray itself */
962                         if (eal_memseg_list_init(msl, page_sz, n_segs,
963                                         socket, msl_idx, true) < 0)
964                                 return -1;
965
966                         /* finally, allocate VA space */
967                         if (eal_memseg_list_alloc(msl, 0) < 0) {
968                                 RTE_LOG(ERR, EAL, "Cannot preallocate 0x%"PRIx64"kB hugepages\n",
969                                         page_sz >> 10);
970                                 return -1;
971                         }
972                 }
973         }
974         return 0;
975 }
976
977 /*
978  * We cannot reallocate memseg lists on the fly because PPC64 stores pages
979  * backwards, therefore we have to process the entire memseg first before
980  * remapping it into memseg list VA space.
981  */
982 static int
983 remap_needed_hugepages(struct hugepage_file *hugepages, int n_pages)
984 {
985         int cur_page, seg_start_page, new_memseg, ret;
986
987         seg_start_page = 0;
988         for (cur_page = 0; cur_page < n_pages; cur_page++) {
989                 struct hugepage_file *prev, *cur;
990
991                 new_memseg = 0;
992
993                 cur = &hugepages[cur_page];
994                 prev = cur_page == 0 ? NULL : &hugepages[cur_page - 1];
995
996                 /* if size is zero, no more pages left */
997                 if (cur->size == 0)
998                         break;
999
1000                 if (cur_page == 0)
1001                         new_memseg = 1;
1002                 else if (cur->socket_id != prev->socket_id)
1003                         new_memseg = 1;
1004                 else if (cur->size != prev->size)
1005                         new_memseg = 1;
1006 #ifdef RTE_ARCH_PPC_64
1007                 /* On PPC64 architecture, the mmap always start from higher
1008                  * address to lower address. Here, physical addresses are in
1009                  * descending order.
1010                  */
1011                 else if ((prev->physaddr - cur->physaddr) != cur->size)
1012                         new_memseg = 1;
1013 #else
1014                 else if ((cur->physaddr - prev->physaddr) != cur->size)
1015                         new_memseg = 1;
1016 #endif
1017
1018                 if (new_memseg) {
1019                         /* if this isn't the first time, remap segment */
1020                         if (cur_page != 0) {
1021                                 ret = remap_segment(hugepages, seg_start_page,
1022                                                 cur_page);
1023                                 if (ret != 0)
1024                                         return -1;
1025                         }
1026                         /* remember where we started */
1027                         seg_start_page = cur_page;
1028                 }
1029                 /* continuation of previous memseg */
1030         }
1031         /* we were stopped, but we didn't remap the last segment, do it now */
1032         if (cur_page != 0) {
1033                 ret = remap_segment(hugepages, seg_start_page,
1034                                 cur_page);
1035                 if (ret != 0)
1036                         return -1;
1037         }
1038         return 0;
1039 }
1040
1041 static inline size_t
1042 eal_get_hugepage_mem_size(void)
1043 {
1044         uint64_t size = 0;
1045         unsigned i, j;
1046         struct internal_config *internal_conf =
1047                 eal_get_internal_configuration();
1048
1049         for (i = 0; i < internal_conf->num_hugepage_sizes; i++) {
1050                 struct hugepage_info *hpi = &internal_conf->hugepage_info[i];
1051                 if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) {
1052                         for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
1053                                 size += hpi->hugepage_sz * hpi->num_pages[j];
1054                         }
1055                 }
1056         }
1057
1058         return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX;
1059 }
1060
1061 static struct sigaction huge_action_old;
1062 static int huge_need_recover;
1063
1064 static void
1065 huge_register_sigbus(void)
1066 {
1067         sigset_t mask;
1068         struct sigaction action;
1069
1070         sigemptyset(&mask);
1071         sigaddset(&mask, SIGBUS);
1072         action.sa_flags = 0;
1073         action.sa_mask = mask;
1074         action.sa_handler = huge_sigbus_handler;
1075
1076         huge_need_recover = !sigaction(SIGBUS, &action, &huge_action_old);
1077 }
1078
1079 static void
1080 huge_recover_sigbus(void)
1081 {
1082         if (huge_need_recover) {
1083                 sigaction(SIGBUS, &huge_action_old, NULL);
1084                 huge_need_recover = 0;
1085         }
1086 }
1087
1088 /*
1089  * Prepare physical memory mapping: fill configuration structure with
1090  * these infos, return 0 on success.
1091  *  1. map N huge pages in separate files in hugetlbfs
1092  *  2. find associated physical addr
1093  *  3. find associated NUMA socket ID
1094  *  4. sort all huge pages by physical address
1095  *  5. remap these N huge pages in the correct order
1096  *  6. unmap the first mapping
1097  *  7. fill memsegs in configuration with contiguous zones
1098  */
1099 static int
1100 eal_legacy_hugepage_init(void)
1101 {
1102         struct rte_mem_config *mcfg;
1103         struct hugepage_file *hugepage = NULL, *tmp_hp = NULL;
1104         struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
1105         struct internal_config *internal_conf =
1106                 eal_get_internal_configuration();
1107
1108         uint64_t memory[RTE_MAX_NUMA_NODES];
1109
1110         unsigned hp_offset;
1111         int i, j;
1112         int nr_hugefiles, nr_hugepages = 0;
1113         void *addr;
1114
1115         memset(used_hp, 0, sizeof(used_hp));
1116
1117         /* get pointer to global configuration */
1118         mcfg = rte_eal_get_configuration()->mem_config;
1119
1120         /* hugetlbfs can be disabled */
1121         if (internal_conf->no_hugetlbfs) {
1122                 void *prealloc_addr;
1123                 size_t mem_sz;
1124                 struct rte_memseg_list *msl;
1125                 int n_segs, fd, flags;
1126 #ifdef MEMFD_SUPPORTED
1127                 int memfd;
1128 #endif
1129                 uint64_t page_sz;
1130
1131                 /* nohuge mode is legacy mode */
1132                 internal_conf->legacy_mem = 1;
1133
1134                 /* nohuge mode is single-file segments mode */
1135                 internal_conf->single_file_segments = 1;
1136
1137                 /* create a memseg list */
1138                 msl = &mcfg->memsegs[0];
1139
1140                 mem_sz = internal_conf->memory;
1141                 page_sz = RTE_PGSIZE_4K;
1142                 n_segs = mem_sz / page_sz;
1143
1144                 if (eal_memseg_list_init_named(
1145                                 msl, "nohugemem", page_sz, n_segs, 0, true)) {
1146                         return -1;
1147                 }
1148
1149                 /* set up parameters for anonymous mmap */
1150                 fd = -1;
1151                 flags = MAP_PRIVATE | MAP_ANONYMOUS;
1152
1153 #ifdef MEMFD_SUPPORTED
1154                 /* create a memfd and store it in the segment fd table */
1155                 memfd = memfd_create("nohuge", 0);
1156                 if (memfd < 0) {
1157                         RTE_LOG(DEBUG, EAL, "Cannot create memfd: %s\n",
1158                                         strerror(errno));
1159                         RTE_LOG(DEBUG, EAL, "Falling back to anonymous map\n");
1160                 } else {
1161                         /* we got an fd - now resize it */
1162                         if (ftruncate(memfd, internal_conf->memory) < 0) {
1163                                 RTE_LOG(ERR, EAL, "Cannot resize memfd: %s\n",
1164                                                 strerror(errno));
1165                                 RTE_LOG(ERR, EAL, "Falling back to anonymous map\n");
1166                                 close(memfd);
1167                         } else {
1168                                 /* creating memfd-backed file was successful.
1169                                  * we want changes to memfd to be visible to
1170                                  * other processes (such as vhost backend), so
1171                                  * map it as shared memory.
1172                                  */
1173                                 RTE_LOG(DEBUG, EAL, "Using memfd for anonymous memory\n");
1174                                 fd = memfd;
1175                                 flags = MAP_SHARED;
1176                         }
1177                 }
1178 #endif
1179                 /* preallocate address space for the memory, so that it can be
1180                  * fit into the DMA mask.
1181                  */
1182                 if (eal_memseg_list_alloc(msl, 0)) {
1183                         RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
1184                         return -1;
1185                 }
1186
1187                 prealloc_addr = msl->base_va;
1188                 addr = mmap(prealloc_addr, mem_sz, PROT_READ | PROT_WRITE,
1189                                 flags | MAP_FIXED, fd, 0);
1190                 if (addr == MAP_FAILED || addr != prealloc_addr) {
1191                         RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
1192                                         strerror(errno));
1193                         munmap(prealloc_addr, mem_sz);
1194                         return -1;
1195                 }
1196
1197                 /* we're in single-file segments mode, so only the segment list
1198                  * fd needs to be set up.
1199                  */
1200                 if (fd != -1) {
1201                         if (eal_memalloc_set_seg_list_fd(0, fd) < 0) {
1202                                 RTE_LOG(ERR, EAL, "Cannot set up segment list fd\n");
1203                                 /* not a serious error, proceed */
1204                         }
1205                 }
1206
1207                 eal_memseg_list_populate(msl, addr, n_segs);
1208
1209                 if (mcfg->dma_maskbits &&
1210                     rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
1211                         RTE_LOG(ERR, EAL,
1212                                 "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask.\n",
1213                                 __func__);
1214                         if (rte_eal_iova_mode() == RTE_IOVA_VA &&
1215                             rte_eal_using_phys_addrs())
1216                                 RTE_LOG(ERR, EAL,
1217                                         "%s(): Please try initializing EAL with --iova-mode=pa parameter.\n",
1218                                         __func__);
1219                         goto fail;
1220                 }
1221                 return 0;
1222         }
1223
1224         /* calculate total number of hugepages available. at this point we haven't
1225          * yet started sorting them so they all are on socket 0 */
1226         for (i = 0; i < (int) internal_conf->num_hugepage_sizes; i++) {
1227                 /* meanwhile, also initialize used_hp hugepage sizes in used_hp */
1228                 used_hp[i].hugepage_sz = internal_conf->hugepage_info[i].hugepage_sz;
1229
1230                 nr_hugepages += internal_conf->hugepage_info[i].num_pages[0];
1231         }
1232
1233         /*
1234          * allocate a memory area for hugepage table.
1235          * this isn't shared memory yet. due to the fact that we need some
1236          * processing done on these pages, shared memory will be created
1237          * at a later stage.
1238          */
1239         tmp_hp = malloc(nr_hugepages * sizeof(struct hugepage_file));
1240         if (tmp_hp == NULL)
1241                 goto fail;
1242
1243         memset(tmp_hp, 0, nr_hugepages * sizeof(struct hugepage_file));
1244
1245         hp_offset = 0; /* where we start the current page size entries */
1246
1247         huge_register_sigbus();
1248
1249         /* make a copy of socket_mem, needed for balanced allocation. */
1250         for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
1251                 memory[i] = internal_conf->socket_mem[i];
1252
1253         /* map all hugepages and sort them */
1254         for (i = 0; i < (int)internal_conf->num_hugepage_sizes; i++) {
1255                 unsigned pages_old, pages_new;
1256                 struct hugepage_info *hpi;
1257
1258                 /*
1259                  * we don't yet mark hugepages as used at this stage, so
1260                  * we just map all hugepages available to the system
1261                  * all hugepages are still located on socket 0
1262                  */
1263                 hpi = &internal_conf->hugepage_info[i];
1264
1265                 if (hpi->num_pages[0] == 0)
1266                         continue;
1267
1268                 /* map all hugepages available */
1269                 pages_old = hpi->num_pages[0];
1270                 pages_new = map_all_hugepages(&tmp_hp[hp_offset], hpi, memory);
1271                 if (pages_new < pages_old) {
1272                         RTE_LOG(DEBUG, EAL,
1273                                 "%d not %d hugepages of size %u MB allocated\n",
1274                                 pages_new, pages_old,
1275                                 (unsigned)(hpi->hugepage_sz / 0x100000));
1276
1277                         int pages = pages_old - pages_new;
1278
1279                         nr_hugepages -= pages;
1280                         hpi->num_pages[0] = pages_new;
1281                         if (pages_new == 0)
1282                                 continue;
1283                 }
1284
1285                 if (rte_eal_using_phys_addrs() &&
1286                                 rte_eal_iova_mode() != RTE_IOVA_VA) {
1287                         /* find physical addresses for each hugepage */
1288                         if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
1289                                 RTE_LOG(DEBUG, EAL, "Failed to find phys addr "
1290                                         "for %u MB pages\n",
1291                                         (unsigned int)(hpi->hugepage_sz / 0x100000));
1292                                 goto fail;
1293                         }
1294                 } else {
1295                         /* set physical addresses for each hugepage */
1296                         if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
1297                                 RTE_LOG(DEBUG, EAL, "Failed to set phys addr "
1298                                         "for %u MB pages\n",
1299                                         (unsigned int)(hpi->hugepage_sz / 0x100000));
1300                                 goto fail;
1301                         }
1302                 }
1303
1304                 if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
1305                         RTE_LOG(DEBUG, EAL, "Failed to find NUMA socket for %u MB pages\n",
1306                                         (unsigned)(hpi->hugepage_sz / 0x100000));
1307                         goto fail;
1308                 }
1309
1310                 qsort(&tmp_hp[hp_offset], hpi->num_pages[0],
1311                       sizeof(struct hugepage_file), cmp_physaddr);
1312
1313                 /* we have processed a num of hugepages of this size, so inc offset */
1314                 hp_offset += hpi->num_pages[0];
1315         }
1316
1317         huge_recover_sigbus();
1318
1319         if (internal_conf->memory == 0 && internal_conf->force_sockets == 0)
1320                 internal_conf->memory = eal_get_hugepage_mem_size();
1321
1322         nr_hugefiles = nr_hugepages;
1323
1324
1325         /* clean out the numbers of pages */
1326         for (i = 0; i < (int) internal_conf->num_hugepage_sizes; i++)
1327                 for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
1328                         internal_conf->hugepage_info[i].num_pages[j] = 0;
1329
1330         /* get hugepages for each socket */
1331         for (i = 0; i < nr_hugefiles; i++) {
1332                 int socket = tmp_hp[i].socket_id;
1333
1334                 /* find a hugepage info with right size and increment num_pages */
1335                 const int nb_hpsizes = RTE_MIN(MAX_HUGEPAGE_SIZES,
1336                                 (int)internal_conf->num_hugepage_sizes);
1337                 for (j = 0; j < nb_hpsizes; j++) {
1338                         if (tmp_hp[i].size ==
1339                                         internal_conf->hugepage_info[j].hugepage_sz) {
1340                                 internal_conf->hugepage_info[j].num_pages[socket]++;
1341                         }
1342                 }
1343         }
1344
1345         /* make a copy of socket_mem, needed for number of pages calculation */
1346         for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
1347                 memory[i] = internal_conf->socket_mem[i];
1348
1349         /* calculate final number of pages */
1350         nr_hugepages = eal_dynmem_calc_num_pages_per_socket(memory,
1351                         internal_conf->hugepage_info, used_hp,
1352                         internal_conf->num_hugepage_sizes);
1353
1354         /* error if not enough memory available */
1355         if (nr_hugepages < 0)
1356                 goto fail;
1357
1358         /* reporting in! */
1359         for (i = 0; i < (int) internal_conf->num_hugepage_sizes; i++) {
1360                 for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
1361                         if (used_hp[i].num_pages[j] > 0) {
1362                                 RTE_LOG(DEBUG, EAL,
1363                                         "Requesting %u pages of size %uMB"
1364                                         " from socket %i\n",
1365                                         used_hp[i].num_pages[j],
1366                                         (unsigned)
1367                                         (used_hp[i].hugepage_sz / 0x100000),
1368                                         j);
1369                         }
1370                 }
1371         }
1372
1373         /* create shared memory */
1374         hugepage = create_shared_memory(eal_hugepage_data_path(),
1375                         nr_hugefiles * sizeof(struct hugepage_file));
1376
1377         if (hugepage == NULL) {
1378                 RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
1379                 goto fail;
1380         }
1381         memset(hugepage, 0, nr_hugefiles * sizeof(struct hugepage_file));
1382
1383         /*
1384          * unmap pages that we won't need (looks at used_hp).
1385          * also, sets final_va to NULL on pages that were unmapped.
1386          */
1387         if (unmap_unneeded_hugepages(tmp_hp, used_hp,
1388                         internal_conf->num_hugepage_sizes) < 0) {
1389                 RTE_LOG(ERR, EAL, "Unmapping and locking hugepages failed!\n");
1390                 goto fail;
1391         }
1392
1393         /*
1394          * copy stuff from malloc'd hugepage* to the actual shared memory.
1395          * this procedure only copies those hugepages that have orig_va
1396          * not NULL. has overflow protection.
1397          */
1398         if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles,
1399                         tmp_hp, nr_hugefiles) < 0) {
1400                 RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n");
1401                 goto fail;
1402         }
1403
1404 #ifndef RTE_ARCH_64
1405         /* for legacy 32-bit mode, we did not preallocate VA space, so do it */
1406         if (internal_conf->legacy_mem &&
1407                         prealloc_segments(hugepage, nr_hugefiles)) {
1408                 RTE_LOG(ERR, EAL, "Could not preallocate VA space for hugepages\n");
1409                 goto fail;
1410         }
1411 #endif
1412
1413         /* remap all pages we do need into memseg list VA space, so that those
1414          * pages become first-class citizens in DPDK memory subsystem
1415          */
1416         if (remap_needed_hugepages(hugepage, nr_hugefiles)) {
1417                 RTE_LOG(ERR, EAL, "Couldn't remap hugepage files into memseg lists\n");
1418                 goto fail;
1419         }
1420
1421         /* free the hugepage backing files */
1422         if (internal_conf->hugepage_file.unlink_before_mapping &&
1423                 unlink_hugepage_files(tmp_hp, internal_conf->num_hugepage_sizes) < 0) {
1424                 RTE_LOG(ERR, EAL, "Unlinking hugepage files failed!\n");
1425                 goto fail;
1426         }
1427
1428         /* free the temporary hugepage table */
1429         free(tmp_hp);
1430         tmp_hp = NULL;
1431
1432         munmap(hugepage, nr_hugefiles * sizeof(struct hugepage_file));
1433         hugepage = NULL;
1434
1435         /* we're not going to allocate more pages, so release VA space for
1436          * unused memseg lists
1437          */
1438         for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
1439                 struct rte_memseg_list *msl = &mcfg->memsegs[i];
1440                 size_t mem_sz;
1441
1442                 /* skip inactive lists */
1443                 if (msl->base_va == NULL)
1444                         continue;
1445                 /* skip lists where there is at least one page allocated */
1446                 if (msl->memseg_arr.count > 0)
1447                         continue;
1448                 /* this is an unused list, deallocate it */
1449                 mem_sz = msl->len;
1450                 munmap(msl->base_va, mem_sz);
1451                 msl->base_va = NULL;
1452                 msl->heap = 0;
1453
1454                 /* destroy backing fbarray */
1455                 rte_fbarray_destroy(&msl->memseg_arr);
1456         }
1457
1458         if (mcfg->dma_maskbits &&
1459             rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
1460                 RTE_LOG(ERR, EAL,
1461                         "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask.\n",
1462                         __func__);
1463                 goto fail;
1464         }
1465
1466         return 0;
1467
1468 fail:
1469         huge_recover_sigbus();
1470         free(tmp_hp);
1471         if (hugepage != NULL)
1472                 munmap(hugepage, nr_hugefiles * sizeof(struct hugepage_file));
1473
1474         return -1;
1475 }
1476
1477 /*
1478  * uses fstat to report the size of a file on disk
1479  */
1480 static off_t
1481 getFileSize(int fd)
1482 {
1483         struct stat st;
1484         if (fstat(fd, &st) < 0)
1485                 return 0;
1486         return st.st_size;
1487 }
1488
1489 /*
1490  * This creates the memory mappings in the secondary process to match that of
1491  * the server process. It goes through each memory segment in the DPDK runtime
1492  * configuration and finds the hugepages which form that segment, mapping them
1493  * in order to form a contiguous block in the virtual memory space
1494  */
1495 static int
1496 eal_legacy_hugepage_attach(void)
1497 {
1498         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1499         struct hugepage_file *hp = NULL;
1500         unsigned int num_hp = 0;
1501         unsigned int i = 0;
1502         unsigned int cur_seg;
1503         off_t size = 0;
1504         int fd, fd_hugepage = -1;
1505
1506         if (aslr_enabled() > 0) {
1507                 RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization "
1508                                 "(ASLR) is enabled in the kernel.\n");
1509                 RTE_LOG(WARNING, EAL, "   This may cause issues with mapping memory "
1510                                 "into secondary processes\n");
1511         }
1512
1513         fd_hugepage = open(eal_hugepage_data_path(), O_RDONLY);
1514         if (fd_hugepage < 0) {
1515                 RTE_LOG(ERR, EAL, "Could not open %s\n",
1516                                 eal_hugepage_data_path());
1517                 goto error;
1518         }
1519
1520         size = getFileSize(fd_hugepage);
1521         hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0);
1522         if (hp == MAP_FAILED) {
1523                 RTE_LOG(ERR, EAL, "Could not mmap %s\n",
1524                                 eal_hugepage_data_path());
1525                 goto error;
1526         }
1527
1528         num_hp = size / sizeof(struct hugepage_file);
1529         RTE_LOG(DEBUG, EAL, "Analysing %u files\n", num_hp);
1530
1531         /* map all segments into memory to make sure we get the addrs. the
1532          * segments themselves are already in memseg list (which is shared and
1533          * has its VA space already preallocated), so we just need to map
1534          * everything into correct addresses.
1535          */
1536         for (i = 0; i < num_hp; i++) {
1537                 struct hugepage_file *hf = &hp[i];
1538                 size_t map_sz = hf->size;
1539                 void *map_addr = hf->final_va;
1540                 int msl_idx, ms_idx;
1541                 struct rte_memseg_list *msl;
1542                 struct rte_memseg *ms;
1543
1544                 /* if size is zero, no more pages left */
1545                 if (map_sz == 0)
1546                         break;
1547
1548                 fd = open(hf->filepath, O_RDWR);
1549                 if (fd < 0) {
1550                         RTE_LOG(ERR, EAL, "Could not open %s: %s\n",
1551                                 hf->filepath, strerror(errno));
1552                         goto error;
1553                 }
1554
1555                 map_addr = mmap(map_addr, map_sz, PROT_READ | PROT_WRITE,
1556                                 MAP_SHARED | MAP_FIXED, fd, 0);
1557                 if (map_addr == MAP_FAILED) {
1558                         RTE_LOG(ERR, EAL, "Could not map %s: %s\n",
1559                                 hf->filepath, strerror(errno));
1560                         goto fd_error;
1561                 }
1562
1563                 /* set shared lock on the file. */
1564                 if (flock(fd, LOCK_SH) < 0) {
1565                         RTE_LOG(DEBUG, EAL, "%s(): Locking file failed: %s\n",
1566                                 __func__, strerror(errno));
1567                         goto mmap_error;
1568                 }
1569
1570                 /* find segment data */
1571                 msl = rte_mem_virt2memseg_list(map_addr);
1572                 if (msl == NULL) {
1573                         RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg list\n",
1574                                 __func__);
1575                         goto mmap_error;
1576                 }
1577                 ms = rte_mem_virt2memseg(map_addr, msl);
1578                 if (ms == NULL) {
1579                         RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg\n",
1580                                 __func__);
1581                         goto mmap_error;
1582                 }
1583
1584                 msl_idx = msl - mcfg->memsegs;
1585                 ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
1586                 if (ms_idx < 0) {
1587                         RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg idx\n",
1588                                 __func__);
1589                         goto mmap_error;
1590                 }
1591
1592                 /* store segment fd internally */
1593                 if (eal_memalloc_set_seg_fd(msl_idx, ms_idx, fd) < 0)
1594                         RTE_LOG(ERR, EAL, "Could not store segment fd: %s\n",
1595                                 rte_strerror(rte_errno));
1596         }
1597         /* unmap the hugepage config file, since we are done using it */
1598         munmap(hp, size);
1599         close(fd_hugepage);
1600         return 0;
1601
1602 mmap_error:
1603         munmap(hp[i].final_va, hp[i].size);
1604 fd_error:
1605         close(fd);
1606 error:
1607         /* unwind mmap's done so far */
1608         for (cur_seg = 0; cur_seg < i; cur_seg++)
1609                 munmap(hp[cur_seg].final_va, hp[cur_seg].size);
1610
1611         if (hp != NULL && hp != MAP_FAILED)
1612                 munmap(hp, size);
1613         if (fd_hugepage >= 0)
1614                 close(fd_hugepage);
1615         return -1;
1616 }
1617
1618 static int
1619 eal_hugepage_attach(void)
1620 {
1621         if (eal_memalloc_sync_with_primary()) {
1622                 RTE_LOG(ERR, EAL, "Could not map memory from primary process\n");
1623                 if (aslr_enabled() > 0)
1624                         RTE_LOG(ERR, EAL, "It is recommended to disable ASLR in the kernel and retry running both primary and secondary processes\n");
1625                 return -1;
1626         }
1627         return 0;
1628 }
1629
1630 int
1631 rte_eal_hugepage_init(void)
1632 {
1633         const struct internal_config *internal_conf =
1634                 eal_get_internal_configuration();
1635
1636         return internal_conf->legacy_mem ?
1637                         eal_legacy_hugepage_init() :
1638                         eal_dynmem_hugepage_init();
1639 }
1640
1641 int
1642 rte_eal_hugepage_attach(void)
1643 {
1644         const struct internal_config *internal_conf =
1645                 eal_get_internal_configuration();
1646
1647         return internal_conf->legacy_mem ?
1648                         eal_legacy_hugepage_attach() :
1649                         eal_hugepage_attach();
1650 }
1651
1652 int
1653 rte_eal_using_phys_addrs(void)
1654 {
1655         if (phys_addrs_available == -1) {
1656                 uint64_t tmp = 0;
1657
1658                 if (rte_eal_has_hugepages() != 0 &&
1659                     rte_mem_virt2phy(&tmp) != RTE_BAD_PHYS_ADDR)
1660                         phys_addrs_available = 1;
1661                 else
1662                         phys_addrs_available = 0;
1663         }
1664         return phys_addrs_available;
1665 }
1666
1667 static int __rte_unused
1668 memseg_primary_init_32(void)
1669 {
1670         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1671         int active_sockets, hpi_idx, msl_idx = 0;
1672         unsigned int socket_id, i;
1673         struct rte_memseg_list *msl;
1674         uint64_t extra_mem_per_socket, total_extra_mem, total_requested_mem;
1675         uint64_t max_mem;
1676         struct internal_config *internal_conf =
1677                 eal_get_internal_configuration();
1678
1679         /* no-huge does not need this at all */
1680         if (internal_conf->no_hugetlbfs)
1681                 return 0;
1682
1683         /* this is a giant hack, but desperate times call for desperate
1684          * measures. in legacy 32-bit mode, we cannot preallocate VA space,
1685          * because having upwards of 2 gigabytes of VA space already mapped will
1686          * interfere with our ability to map and sort hugepages.
1687          *
1688          * therefore, in legacy 32-bit mode, we will be initializing memseg
1689          * lists much later - in eal_memory.c, right after we unmap all the
1690          * unneeded pages. this will not affect secondary processes, as those
1691          * should be able to mmap the space without (too many) problems.
1692          */
1693         if (internal_conf->legacy_mem)
1694                 return 0;
1695
1696         /* 32-bit mode is a very special case. we cannot know in advance where
1697          * the user will want to allocate their memory, so we have to do some
1698          * heuristics.
1699          */
1700         active_sockets = 0;
1701         total_requested_mem = 0;
1702         if (internal_conf->force_sockets)
1703                 for (i = 0; i < rte_socket_count(); i++) {
1704                         uint64_t mem;
1705
1706                         socket_id = rte_socket_id_by_idx(i);
1707                         mem = internal_conf->socket_mem[socket_id];
1708
1709                         if (mem == 0)
1710                                 continue;
1711
1712                         active_sockets++;
1713                         total_requested_mem += mem;
1714                 }
1715         else
1716                 total_requested_mem = internal_conf->memory;
1717
1718         max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
1719         if (total_requested_mem > max_mem) {
1720                 RTE_LOG(ERR, EAL, "Invalid parameters: 32-bit process can at most use %uM of memory\n",
1721                                 (unsigned int)(max_mem >> 20));
1722                 return -1;
1723         }
1724         total_extra_mem = max_mem - total_requested_mem;
1725         extra_mem_per_socket = active_sockets == 0 ? total_extra_mem :
1726                         total_extra_mem / active_sockets;
1727
1728         /* the allocation logic is a little bit convoluted, but here's how it
1729          * works, in a nutshell:
1730          *  - if user hasn't specified on which sockets to allocate memory via
1731          *    --socket-mem, we allocate all of our memory on main core socket.
1732          *  - if user has specified sockets to allocate memory on, there may be
1733          *    some "unused" memory left (e.g. if user has specified --socket-mem
1734          *    such that not all memory adds up to 2 gigabytes), so add it to all
1735          *    sockets that are in use equally.
1736          *
1737          * page sizes are sorted by size in descending order, so we can safely
1738          * assume that we dispense with bigger page sizes first.
1739          */
1740
1741         /* create memseg lists */
1742         for (i = 0; i < rte_socket_count(); i++) {
1743                 int hp_sizes = (int) internal_conf->num_hugepage_sizes;
1744                 uint64_t max_socket_mem, cur_socket_mem;
1745                 unsigned int main_lcore_socket;
1746                 struct rte_config *cfg = rte_eal_get_configuration();
1747                 bool skip;
1748
1749                 socket_id = rte_socket_id_by_idx(i);
1750
1751 #ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
1752                 /* we can still sort pages by socket in legacy mode */
1753                 if (!internal_conf->legacy_mem && socket_id > 0)
1754                         break;
1755 #endif
1756
1757                 /* if we didn't specifically request memory on this socket */
1758                 skip = active_sockets != 0 &&
1759                                 internal_conf->socket_mem[socket_id] == 0;
1760                 /* ...or if we didn't specifically request memory on *any*
1761                  * socket, and this is not main lcore
1762                  */
1763                 main_lcore_socket = rte_lcore_to_socket_id(cfg->main_lcore);
1764                 skip |= active_sockets == 0 && socket_id != main_lcore_socket;
1765
1766                 if (skip) {
1767                         RTE_LOG(DEBUG, EAL, "Will not preallocate memory on socket %u\n",
1768                                         socket_id);
1769                         continue;
1770                 }
1771
1772                 /* max amount of memory on this socket */
1773                 max_socket_mem = (active_sockets != 0 ?
1774                                         internal_conf->socket_mem[socket_id] :
1775                                         internal_conf->memory) +
1776                                         extra_mem_per_socket;
1777                 cur_socket_mem = 0;
1778
1779                 for (hpi_idx = 0; hpi_idx < hp_sizes; hpi_idx++) {
1780                         uint64_t max_pagesz_mem, cur_pagesz_mem = 0;
1781                         uint64_t hugepage_sz;
1782                         struct hugepage_info *hpi;
1783                         int type_msl_idx, max_segs, total_segs = 0;
1784
1785                         hpi = &internal_conf->hugepage_info[hpi_idx];
1786                         hugepage_sz = hpi->hugepage_sz;
1787
1788                         /* check if pages are actually available */
1789                         if (hpi->num_pages[socket_id] == 0)
1790                                 continue;
1791
1792                         max_segs = RTE_MAX_MEMSEG_PER_TYPE;
1793                         max_pagesz_mem = max_socket_mem - cur_socket_mem;
1794
1795                         /* make it multiple of page size */
1796                         max_pagesz_mem = RTE_ALIGN_FLOOR(max_pagesz_mem,
1797                                         hugepage_sz);
1798
1799                         RTE_LOG(DEBUG, EAL, "Attempting to preallocate "
1800                                         "%" PRIu64 "M on socket %i\n",
1801                                         max_pagesz_mem >> 20, socket_id);
1802
1803                         type_msl_idx = 0;
1804                         while (cur_pagesz_mem < max_pagesz_mem &&
1805                                         total_segs < max_segs) {
1806                                 uint64_t cur_mem;
1807                                 unsigned int n_segs;
1808
1809                                 if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
1810                                         RTE_LOG(ERR, EAL,
1811                                                 "No more space in memseg lists, please increase %s\n",
1812                                                 RTE_STR(RTE_MAX_MEMSEG_LISTS));
1813                                         return -1;
1814                                 }
1815
1816                                 msl = &mcfg->memsegs[msl_idx];
1817
1818                                 cur_mem = get_mem_amount(hugepage_sz,
1819                                                 max_pagesz_mem);
1820                                 n_segs = cur_mem / hugepage_sz;
1821
1822                                 if (eal_memseg_list_init(msl, hugepage_sz,
1823                                                 n_segs, socket_id, type_msl_idx,
1824                                                 true)) {
1825                                         /* failing to allocate a memseg list is
1826                                          * a serious error.
1827                                          */
1828                                         RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
1829                                         return -1;
1830                                 }
1831
1832                                 if (eal_memseg_list_alloc(msl, 0)) {
1833                                         /* if we couldn't allocate VA space, we
1834                                          * can try with smaller page sizes.
1835                                          */
1836                                         RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list, retrying with different page size\n");
1837                                         /* deallocate memseg list */
1838                                         if (memseg_list_free(msl))
1839                                                 return -1;
1840                                         break;
1841                                 }
1842
1843                                 total_segs += msl->memseg_arr.len;
1844                                 cur_pagesz_mem = total_segs * hugepage_sz;
1845                                 type_msl_idx++;
1846                                 msl_idx++;
1847                         }
1848                         cur_socket_mem += cur_pagesz_mem;
1849                 }
1850                 if (cur_socket_mem == 0) {
1851                         RTE_LOG(ERR, EAL, "Cannot allocate VA space on socket %u\n",
1852                                 socket_id);
1853                         return -1;
1854                 }
1855         }
1856
1857         return 0;
1858 }
1859
1860 static int __rte_unused
1861 memseg_primary_init(void)
1862 {
1863         return eal_dynmem_memseg_lists_init();
1864 }
1865
1866 static int
1867 memseg_secondary_init(void)
1868 {
1869         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1870         int msl_idx = 0;
1871         struct rte_memseg_list *msl;
1872
1873         for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
1874
1875                 msl = &mcfg->memsegs[msl_idx];
1876
1877                 /* skip empty and external memseg lists */
1878                 if (msl->memseg_arr.len == 0 || msl->external)
1879                         continue;
1880
1881                 if (rte_fbarray_attach(&msl->memseg_arr)) {
1882                         RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
1883                         return -1;
1884                 }
1885
1886                 /* preallocate VA space */
1887                 if (eal_memseg_list_alloc(msl, 0)) {
1888                         RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
1889                         return -1;
1890                 }
1891         }
1892
1893         return 0;
1894 }
1895
1896 int
1897 rte_eal_memseg_init(void)
1898 {
1899         /* increase rlimit to maximum */
1900         struct rlimit lim;
1901
1902 #ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
1903         const struct internal_config *internal_conf =
1904                 eal_get_internal_configuration();
1905 #endif
1906         if (getrlimit(RLIMIT_NOFILE, &lim) == 0) {
1907                 /* set limit to maximum */
1908                 lim.rlim_cur = lim.rlim_max;
1909
1910                 if (setrlimit(RLIMIT_NOFILE, &lim) < 0) {
1911                         RTE_LOG(DEBUG, EAL, "Setting maximum number of open files failed: %s\n",
1912                                         strerror(errno));
1913                 } else {
1914                         RTE_LOG(DEBUG, EAL, "Setting maximum number of open files to %"
1915                                         PRIu64 "\n",
1916                                         (uint64_t)lim.rlim_cur);
1917                 }
1918         } else {
1919                 RTE_LOG(ERR, EAL, "Cannot get current resource limits\n");
1920         }
1921 #ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
1922         if (!internal_conf->legacy_mem && rte_socket_count() > 1) {
1923                 RTE_LOG(WARNING, EAL, "DPDK is running on a NUMA system, but is compiled without NUMA support.\n");
1924                 RTE_LOG(WARNING, EAL, "This will have adverse consequences for performance and usability.\n");
1925                 RTE_LOG(WARNING, EAL, "Please use --"OPT_LEGACY_MEM" option, or recompile with NUMA support.\n");
1926         }
1927 #endif
1928
1929         return rte_eal_process_type() == RTE_PROC_PRIMARY ?
1930 #ifndef RTE_ARCH_64
1931                         memseg_primary_init_32() :
1932 #else
1933                         memseg_primary_init() :
1934 #endif
1935                         memseg_secondary_init();
1936 }