65ea670f94fe3bdc0ef8da0f1e6b6b3c2131cf6f
[dpdk.git] / lib / librte_eal / bsdapp / eal / eal_memory.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 #include <sys/mman.h>
5 #include <unistd.h>
6 #include <sys/types.h>
7 #include <sys/sysctl.h>
8 #include <inttypes.h>
9 #include <errno.h>
10 #include <string.h>
11 #include <fcntl.h>
12
13 #include <rte_eal.h>
14 #include <rte_eal_memconfig.h>
15 #include <rte_errno.h>
16 #include <rte_log.h>
17 #include <rte_string_fns.h>
18 #include "eal_private.h"
19 #include "eal_internal_cfg.h"
20 #include "eal_filesystem.h"
21
22 #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE))
23
24 /*
25  * Get physical address of any mapped virtual address in the current process.
26  */
27 phys_addr_t
28 rte_mem_virt2phy(const void *virtaddr)
29 {
30         /* XXX not implemented. This function is only used by
31          * rte_mempool_virt2iova() when hugepages are disabled. */
32         (void)virtaddr;
33         return RTE_BAD_IOVA;
34 }
35 rte_iova_t
36 rte_mem_virt2iova(const void *virtaddr)
37 {
38         return rte_mem_virt2phy(virtaddr);
39 }
40
41 int
42 rte_eal_hugepage_init(void)
43 {
44         struct rte_mem_config *mcfg;
45         uint64_t total_mem = 0;
46         void *addr;
47         unsigned int i, j, seg_idx = 0;
48
49         /* get pointer to global configuration */
50         mcfg = rte_eal_get_configuration()->mem_config;
51
52         /* for debug purposes, hugetlbfs can be disabled */
53         if (internal_config.no_hugetlbfs) {
54                 struct rte_memseg_list *msl;
55                 struct rte_fbarray *arr;
56                 struct rte_memseg *ms;
57                 uint64_t page_sz;
58                 int n_segs, cur_seg;
59
60                 /* create a memseg list */
61                 msl = &mcfg->memsegs[0];
62
63                 page_sz = RTE_PGSIZE_4K;
64                 n_segs = internal_config.memory / page_sz;
65
66                 if (rte_fbarray_init(&msl->memseg_arr, "nohugemem", n_segs,
67                                 sizeof(struct rte_memseg))) {
68                         RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
69                         return -1;
70                 }
71
72                 addr = mmap(NULL, internal_config.memory,
73                                 PROT_READ | PROT_WRITE,
74                                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
75                 if (addr == MAP_FAILED) {
76                         RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
77                                         strerror(errno));
78                         return -1;
79                 }
80                 msl->base_va = addr;
81                 msl->page_sz = page_sz;
82                 msl->len = internal_config.memory;
83                 msl->socket_id = 0;
84
85                 /* populate memsegs. each memseg is 1 page long */
86                 for (cur_seg = 0; cur_seg < n_segs; cur_seg++) {
87                         arr = &msl->memseg_arr;
88
89                         ms = rte_fbarray_get(arr, cur_seg);
90                         if (rte_eal_iova_mode() == RTE_IOVA_VA)
91                                 ms->iova = (uintptr_t)addr;
92                         else
93                                 ms->iova = RTE_BAD_IOVA;
94                         ms->addr = addr;
95                         ms->hugepage_sz = page_sz;
96                         ms->len = page_sz;
97                         ms->socket_id = 0;
98
99                         rte_fbarray_set_used(arr, cur_seg);
100
101                         addr = RTE_PTR_ADD(addr, page_sz);
102                 }
103                 return 0;
104         }
105
106         /* map all hugepages and sort them */
107         for (i = 0; i < internal_config.num_hugepage_sizes; i ++){
108                 struct hugepage_info *hpi;
109                 rte_iova_t prev_end = 0;
110                 int prev_ms_idx = -1;
111                 uint64_t page_sz, mem_needed;
112                 unsigned int n_pages, max_pages;
113
114                 hpi = &internal_config.hugepage_info[i];
115                 page_sz = hpi->hugepage_sz;
116                 max_pages = hpi->num_pages[0];
117                 mem_needed = RTE_ALIGN_CEIL(internal_config.memory - total_mem,
118                                 page_sz);
119
120                 n_pages = RTE_MIN(mem_needed / page_sz, max_pages);
121
122                 for (j = 0; j < n_pages; j++) {
123                         struct rte_memseg_list *msl;
124                         struct rte_fbarray *arr;
125                         struct rte_memseg *seg;
126                         int msl_idx, ms_idx;
127                         rte_iova_t physaddr;
128                         int error;
129                         size_t sysctl_size = sizeof(physaddr);
130                         char physaddr_str[64];
131                         bool is_adjacent;
132
133                         /* first, check if this segment is IOVA-adjacent to
134                          * the previous one.
135                          */
136                         snprintf(physaddr_str, sizeof(physaddr_str),
137                                         "hw.contigmem.physaddr.%d", j);
138                         error = sysctlbyname(physaddr_str, &physaddr,
139                                         &sysctl_size, NULL, 0);
140                         if (error < 0) {
141                                 RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
142                                                 "from %s\n", j, hpi->hugedir);
143                                 return -1;
144                         }
145
146                         is_adjacent = prev_end != 0 && physaddr == prev_end;
147                         prev_end = physaddr + hpi->hugepage_sz;
148
149                         for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
150                                         msl_idx++) {
151                                 bool empty, need_hole;
152                                 msl = &mcfg->memsegs[msl_idx];
153                                 arr = &msl->memseg_arr;
154
155                                 if (msl->page_sz != page_sz)
156                                         continue;
157
158                                 empty = arr->count == 0;
159
160                                 /* we need a hole if this isn't an empty memseg
161                                  * list, and if previous segment was not
162                                  * adjacent to current one.
163                                  */
164                                 need_hole = !empty && !is_adjacent;
165
166                                 /* we need 1, plus hole if not adjacent */
167                                 ms_idx = rte_fbarray_find_next_n_free(arr,
168                                                 0, 1 + (need_hole ? 1 : 0));
169
170                                 /* memseg list is full? */
171                                 if (ms_idx < 0)
172                                         continue;
173
174                                 if (need_hole && prev_ms_idx == ms_idx - 1)
175                                         ms_idx++;
176                                 prev_ms_idx = ms_idx;
177
178                                 break;
179                         }
180                         if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
181                                 RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
182                                         RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE),
183                                         RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE));
184                                 return -1;
185                         }
186                         arr = &msl->memseg_arr;
187                         seg = rte_fbarray_get(arr, ms_idx);
188
189                         addr = RTE_PTR_ADD(msl->base_va,
190                                         (size_t)msl->page_sz * ms_idx);
191
192                         /* address is already mapped in memseg list, so using
193                          * MAP_FIXED here is safe.
194                          */
195                         addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE,
196                                         MAP_SHARED | MAP_FIXED,
197                                         hpi->lock_descriptor,
198                                         j * EAL_PAGE_SIZE);
199                         if (addr == MAP_FAILED) {
200                                 RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
201                                                 j, hpi->hugedir);
202                                 return -1;
203                         }
204
205                         seg->addr = addr;
206                         seg->iova = physaddr;
207                         seg->hugepage_sz = page_sz;
208                         seg->len = page_sz;
209                         seg->nchannel = mcfg->nchannel;
210                         seg->nrank = mcfg->nrank;
211                         seg->socket_id = 0;
212
213                         rte_fbarray_set_used(arr, ms_idx);
214
215                         RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
216                                         PRIx64", len %zu\n",
217                                         seg_idx++, addr, physaddr, page_sz);
218
219                         total_mem += seg->len;
220                 }
221                 if (total_mem >= internal_config.memory)
222                         break;
223         }
224         if (total_mem < internal_config.memory) {
225                 RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, "
226                                 "requested: %" PRIu64 "M "
227                                 "available: %" PRIu64 "M\n",
228                                 internal_config.memory >> 20, total_mem >> 20);
229                 return -1;
230         }
231         return 0;
232 }
233
234 struct attach_walk_args {
235         int fd_hugepage;
236         int seg_idx;
237 };
238 static int
239 attach_segment(const struct rte_memseg_list *msl __rte_unused,
240                 const struct rte_memseg *ms, void *arg)
241 {
242         struct attach_walk_args *wa = arg;
243         void *addr;
244
245         addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
246                         MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
247                         wa->seg_idx * EAL_PAGE_SIZE);
248         if (addr == MAP_FAILED || addr != ms->addr)
249                 return -1;
250         wa->seg_idx++;
251
252         return 0;
253 }
254
255 int
256 rte_eal_hugepage_attach(void)
257 {
258         const struct hugepage_info *hpi;
259         int fd_hugepage = -1;
260         unsigned int i;
261
262         hpi = &internal_config.hugepage_info[0];
263
264         for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
265                 const struct hugepage_info *cur_hpi = &hpi[i];
266                 struct attach_walk_args wa;
267
268                 memset(&wa, 0, sizeof(wa));
269
270                 /* Obtain a file descriptor for contiguous memory */
271                 fd_hugepage = open(cur_hpi->hugedir, O_RDWR);
272                 if (fd_hugepage < 0) {
273                         RTE_LOG(ERR, EAL, "Could not open %s\n",
274                                         cur_hpi->hugedir);
275                         goto error;
276                 }
277                 wa.fd_hugepage = fd_hugepage;
278                 wa.seg_idx = 0;
279
280                 /* Map the contiguous memory into each memory segment */
281                 if (rte_memseg_walk(attach_segment, &wa) < 0) {
282                         RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
283                                 wa.seg_idx, cur_hpi->hugedir);
284                         goto error;
285                 }
286
287                 close(fd_hugepage);
288                 fd_hugepage = -1;
289         }
290
291         /* hugepage_info is no longer required */
292         return 0;
293
294 error:
295         if (fd_hugepage >= 0)
296                 close(fd_hugepage);
297         return -1;
298 }
299
300 int
301 rte_eal_using_phys_addrs(void)
302 {
303         return 0;
304 }
305
306 static uint64_t
307 get_mem_amount(uint64_t page_sz, uint64_t max_mem)
308 {
309         uint64_t area_sz, max_pages;
310
311         /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
312         max_pages = RTE_MAX_MEMSEG_PER_LIST;
313         max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
314
315         area_sz = RTE_MIN(page_sz * max_pages, max_mem);
316
317         /* make sure the list isn't smaller than the page size */
318         area_sz = RTE_MAX(area_sz, page_sz);
319
320         return RTE_ALIGN(area_sz, page_sz);
321 }
322
323 #define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
324 static int
325 alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
326                 int n_segs, int socket_id, int type_msl_idx)
327 {
328         char name[RTE_FBARRAY_NAME_LEN];
329
330         snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
331                  type_msl_idx);
332         if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
333                         sizeof(struct rte_memseg))) {
334                 RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
335                         rte_strerror(rte_errno));
336                 return -1;
337         }
338
339         msl->page_sz = page_sz;
340         msl->socket_id = socket_id;
341         msl->base_va = NULL;
342
343         RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
344                         (size_t)page_sz >> 10, socket_id);
345
346         return 0;
347 }
348
349 static int
350 alloc_va_space(struct rte_memseg_list *msl)
351 {
352         uint64_t page_sz;
353         size_t mem_sz;
354         void *addr;
355         int flags = 0;
356
357 #ifdef RTE_ARCH_PPC_64
358         flags |= MAP_HUGETLB;
359 #endif
360
361         page_sz = msl->page_sz;
362         mem_sz = page_sz * msl->memseg_arr.len;
363
364         addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
365         if (addr == NULL) {
366                 if (rte_errno == EADDRNOTAVAIL)
367                         RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n",
368                                 (unsigned long long)mem_sz, msl->base_va);
369                 else
370                         RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
371                 return -1;
372         }
373         msl->base_va = addr;
374         msl->len = mem_sz;
375
376         return 0;
377 }
378
379
380 static int
381 memseg_primary_init(void)
382 {
383         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
384         int hpi_idx, msl_idx = 0;
385         struct rte_memseg_list *msl;
386         uint64_t max_mem, total_mem;
387
388         /* no-huge does not need this at all */
389         if (internal_config.no_hugetlbfs)
390                 return 0;
391
392         /* FreeBSD has an issue where core dump will dump the entire memory
393          * contents, including anonymous zero-page memory. Therefore, while we
394          * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will
395          * also be further limiting total memory amount to whatever memory is
396          * available to us through contigmem driver (plus spacing blocks).
397          *
398          * so, at each stage, we will be checking how much memory we are
399          * preallocating, and adjust all the values accordingly.
400          */
401
402         max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
403         total_mem = 0;
404
405         /* create memseg lists */
406         for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
407                         hpi_idx++) {
408                 uint64_t max_type_mem, total_type_mem = 0;
409                 uint64_t avail_mem;
410                 int type_msl_idx, max_segs, avail_segs, total_segs = 0;
411                 struct hugepage_info *hpi;
412                 uint64_t hugepage_sz;
413
414                 hpi = &internal_config.hugepage_info[hpi_idx];
415                 hugepage_sz = hpi->hugepage_sz;
416
417                 /* no NUMA support on FreeBSD */
418
419                 /* check if we've already exceeded total memory amount */
420                 if (total_mem >= max_mem)
421                         break;
422
423                 /* first, calculate theoretical limits according to config */
424                 max_type_mem = RTE_MIN(max_mem - total_mem,
425                         (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
426                 max_segs = RTE_MAX_MEMSEG_PER_TYPE;
427
428                 /* now, limit all of that to whatever will actually be
429                  * available to us, because without dynamic allocation support,
430                  * all of that extra memory will be sitting there being useless
431                  * and slowing down core dumps in case of a crash.
432                  *
433                  * we need (N*2)-1 segments because we cannot guarantee that
434                  * each segment will be IOVA-contiguous with the previous one,
435                  * so we will allocate more and put spaces inbetween segments
436                  * that are non-contiguous.
437                  */
438                 avail_segs = (hpi->num_pages[0] * 2) - 1;
439                 avail_mem = avail_segs * hugepage_sz;
440
441                 max_type_mem = RTE_MIN(avail_mem, max_type_mem);
442                 max_segs = RTE_MIN(avail_segs, max_segs);
443
444                 type_msl_idx = 0;
445                 while (total_type_mem < max_type_mem &&
446                                 total_segs < max_segs) {
447                         uint64_t cur_max_mem, cur_mem;
448                         unsigned int n_segs;
449
450                         if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
451                                 RTE_LOG(ERR, EAL,
452                                         "No more space in memseg lists, please increase %s\n",
453                                         RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
454                                 return -1;
455                         }
456
457                         msl = &mcfg->memsegs[msl_idx++];
458
459                         cur_max_mem = max_type_mem - total_type_mem;
460
461                         cur_mem = get_mem_amount(hugepage_sz,
462                                         cur_max_mem);
463                         n_segs = cur_mem / hugepage_sz;
464
465                         if (alloc_memseg_list(msl, hugepage_sz, n_segs,
466                                         0, type_msl_idx))
467                                 return -1;
468
469                         total_segs += msl->memseg_arr.len;
470                         total_type_mem = total_segs * hugepage_sz;
471                         type_msl_idx++;
472
473                         if (alloc_va_space(msl)) {
474                                 RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
475                                 return -1;
476                         }
477                 }
478                 total_mem += total_type_mem;
479         }
480         return 0;
481 }
482
483 static int
484 memseg_secondary_init(void)
485 {
486         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
487         int msl_idx = 0;
488         struct rte_memseg_list *msl;
489
490         for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
491
492                 msl = &mcfg->memsegs[msl_idx];
493
494                 /* skip empty memseg lists */
495                 if (msl->memseg_arr.len == 0)
496                         continue;
497
498                 if (rte_fbarray_attach(&msl->memseg_arr)) {
499                         RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
500                         return -1;
501                 }
502
503                 /* preallocate VA space */
504                 if (alloc_va_space(msl)) {
505                         RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
506                         return -1;
507                 }
508         }
509
510         return 0;
511 }
512
513 int
514 rte_eal_memseg_init(void)
515 {
516         return rte_eal_process_type() == RTE_PROC_PRIMARY ?
517                         memseg_primary_init() :
518                         memseg_secondary_init();
519 }