eal: hide shared memory config
[dpdk.git] / lib / librte_eal / freebsd / eal / eal_memory.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 #include <sys/mman.h>
5 #include <unistd.h>
6 #include <sys/types.h>
7 #include <sys/sysctl.h>
8 #include <inttypes.h>
9 #include <errno.h>
10 #include <string.h>
11 #include <fcntl.h>
12
13 #include <rte_eal.h>
14 #include <rte_eal_memconfig.h>
15 #include <rte_errno.h>
16 #include <rte_log.h>
17 #include <rte_string_fns.h>
18 #include "eal_private.h"
19 #include "eal_internal_cfg.h"
20 #include "eal_filesystem.h"
21 #include "eal_memcfg.h"
22
23 #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE))
24
25 /*
26  * Get physical address of any mapped virtual address in the current process.
27  */
28 phys_addr_t
29 rte_mem_virt2phy(const void *virtaddr)
30 {
31         /* XXX not implemented. This function is only used by
32          * rte_mempool_virt2iova() when hugepages are disabled. */
33         (void)virtaddr;
34         return RTE_BAD_IOVA;
35 }
36 rte_iova_t
37 rte_mem_virt2iova(const void *virtaddr)
38 {
39         return rte_mem_virt2phy(virtaddr);
40 }
41
42 int
43 rte_eal_hugepage_init(void)
44 {
45         struct rte_mem_config *mcfg;
46         uint64_t total_mem = 0;
47         void *addr;
48         unsigned int i, j, seg_idx = 0;
49
50         /* get pointer to global configuration */
51         mcfg = rte_eal_get_configuration()->mem_config;
52
53         /* for debug purposes, hugetlbfs can be disabled */
54         if (internal_config.no_hugetlbfs) {
55                 struct rte_memseg_list *msl;
56                 struct rte_fbarray *arr;
57                 struct rte_memseg *ms;
58                 uint64_t page_sz;
59                 int n_segs, cur_seg;
60
61                 /* create a memseg list */
62                 msl = &mcfg->memsegs[0];
63
64                 page_sz = RTE_PGSIZE_4K;
65                 n_segs = internal_config.memory / page_sz;
66
67                 if (rte_fbarray_init(&msl->memseg_arr, "nohugemem", n_segs,
68                                 sizeof(struct rte_memseg))) {
69                         RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
70                         return -1;
71                 }
72
73                 addr = mmap(NULL, internal_config.memory,
74                                 PROT_READ | PROT_WRITE,
75                                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
76                 if (addr == MAP_FAILED) {
77                         RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
78                                         strerror(errno));
79                         return -1;
80                 }
81                 msl->base_va = addr;
82                 msl->page_sz = page_sz;
83                 msl->len = internal_config.memory;
84                 msl->socket_id = 0;
85
86                 /* populate memsegs. each memseg is 1 page long */
87                 for (cur_seg = 0; cur_seg < n_segs; cur_seg++) {
88                         arr = &msl->memseg_arr;
89
90                         ms = rte_fbarray_get(arr, cur_seg);
91                         if (rte_eal_iova_mode() == RTE_IOVA_VA)
92                                 ms->iova = (uintptr_t)addr;
93                         else
94                                 ms->iova = RTE_BAD_IOVA;
95                         ms->addr = addr;
96                         ms->hugepage_sz = page_sz;
97                         ms->len = page_sz;
98                         ms->socket_id = 0;
99
100                         rte_fbarray_set_used(arr, cur_seg);
101
102                         addr = RTE_PTR_ADD(addr, page_sz);
103                 }
104                 return 0;
105         }
106
107         /* map all hugepages and sort them */
108         for (i = 0; i < internal_config.num_hugepage_sizes; i ++){
109                 struct hugepage_info *hpi;
110                 rte_iova_t prev_end = 0;
111                 int prev_ms_idx = -1;
112                 uint64_t page_sz, mem_needed;
113                 unsigned int n_pages, max_pages;
114
115                 hpi = &internal_config.hugepage_info[i];
116                 page_sz = hpi->hugepage_sz;
117                 max_pages = hpi->num_pages[0];
118                 mem_needed = RTE_ALIGN_CEIL(internal_config.memory - total_mem,
119                                 page_sz);
120
121                 n_pages = RTE_MIN(mem_needed / page_sz, max_pages);
122
123                 for (j = 0; j < n_pages; j++) {
124                         struct rte_memseg_list *msl;
125                         struct rte_fbarray *arr;
126                         struct rte_memseg *seg;
127                         int msl_idx, ms_idx;
128                         rte_iova_t physaddr;
129                         int error;
130                         size_t sysctl_size = sizeof(physaddr);
131                         char physaddr_str[64];
132                         bool is_adjacent;
133
134                         /* first, check if this segment is IOVA-adjacent to
135                          * the previous one.
136                          */
137                         snprintf(physaddr_str, sizeof(physaddr_str),
138                                         "hw.contigmem.physaddr.%d", j);
139                         error = sysctlbyname(physaddr_str, &physaddr,
140                                         &sysctl_size, NULL, 0);
141                         if (error < 0) {
142                                 RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
143                                                 "from %s\n", j, hpi->hugedir);
144                                 return -1;
145                         }
146
147                         is_adjacent = prev_end != 0 && physaddr == prev_end;
148                         prev_end = physaddr + hpi->hugepage_sz;
149
150                         for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
151                                         msl_idx++) {
152                                 bool empty, need_hole;
153                                 msl = &mcfg->memsegs[msl_idx];
154                                 arr = &msl->memseg_arr;
155
156                                 if (msl->page_sz != page_sz)
157                                         continue;
158
159                                 empty = arr->count == 0;
160
161                                 /* we need a hole if this isn't an empty memseg
162                                  * list, and if previous segment was not
163                                  * adjacent to current one.
164                                  */
165                                 need_hole = !empty && !is_adjacent;
166
167                                 /* we need 1, plus hole if not adjacent */
168                                 ms_idx = rte_fbarray_find_next_n_free(arr,
169                                                 0, 1 + (need_hole ? 1 : 0));
170
171                                 /* memseg list is full? */
172                                 if (ms_idx < 0)
173                                         continue;
174
175                                 if (need_hole && prev_ms_idx == ms_idx - 1)
176                                         ms_idx++;
177                                 prev_ms_idx = ms_idx;
178
179                                 break;
180                         }
181                         if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
182                                 RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
183                                         RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE),
184                                         RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE));
185                                 return -1;
186                         }
187                         arr = &msl->memseg_arr;
188                         seg = rte_fbarray_get(arr, ms_idx);
189
190                         addr = RTE_PTR_ADD(msl->base_va,
191                                         (size_t)msl->page_sz * ms_idx);
192
193                         /* address is already mapped in memseg list, so using
194                          * MAP_FIXED here is safe.
195                          */
196                         addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE,
197                                         MAP_SHARED | MAP_FIXED,
198                                         hpi->lock_descriptor,
199                                         j * EAL_PAGE_SIZE);
200                         if (addr == MAP_FAILED) {
201                                 RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
202                                                 j, hpi->hugedir);
203                                 return -1;
204                         }
205
206                         seg->addr = addr;
207                         seg->iova = physaddr;
208                         seg->hugepage_sz = page_sz;
209                         seg->len = page_sz;
210                         seg->nchannel = mcfg->nchannel;
211                         seg->nrank = mcfg->nrank;
212                         seg->socket_id = 0;
213
214                         rte_fbarray_set_used(arr, ms_idx);
215
216                         RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
217                                         PRIx64", len %zu\n",
218                                         seg_idx++, addr, physaddr, page_sz);
219
220                         total_mem += seg->len;
221                 }
222                 if (total_mem >= internal_config.memory)
223                         break;
224         }
225         if (total_mem < internal_config.memory) {
226                 RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, "
227                                 "requested: %" PRIu64 "M "
228                                 "available: %" PRIu64 "M\n",
229                                 internal_config.memory >> 20, total_mem >> 20);
230                 return -1;
231         }
232         return 0;
233 }
234
235 struct attach_walk_args {
236         int fd_hugepage;
237         int seg_idx;
238 };
239 static int
240 attach_segment(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
241                 void *arg)
242 {
243         struct attach_walk_args *wa = arg;
244         void *addr;
245
246         if (msl->external)
247                 return 0;
248
249         addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
250                         MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
251                         wa->seg_idx * EAL_PAGE_SIZE);
252         if (addr == MAP_FAILED || addr != ms->addr)
253                 return -1;
254         wa->seg_idx++;
255
256         return 0;
257 }
258
259 int
260 rte_eal_hugepage_attach(void)
261 {
262         const struct hugepage_info *hpi;
263         int fd_hugepage = -1;
264         unsigned int i;
265
266         hpi = &internal_config.hugepage_info[0];
267
268         for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
269                 const struct hugepage_info *cur_hpi = &hpi[i];
270                 struct attach_walk_args wa;
271
272                 memset(&wa, 0, sizeof(wa));
273
274                 /* Obtain a file descriptor for contiguous memory */
275                 fd_hugepage = open(cur_hpi->hugedir, O_RDWR);
276                 if (fd_hugepage < 0) {
277                         RTE_LOG(ERR, EAL, "Could not open %s\n",
278                                         cur_hpi->hugedir);
279                         goto error;
280                 }
281                 wa.fd_hugepage = fd_hugepage;
282                 wa.seg_idx = 0;
283
284                 /* Map the contiguous memory into each memory segment */
285                 if (rte_memseg_walk(attach_segment, &wa) < 0) {
286                         RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
287                                 wa.seg_idx, cur_hpi->hugedir);
288                         goto error;
289                 }
290
291                 close(fd_hugepage);
292                 fd_hugepage = -1;
293         }
294
295         /* hugepage_info is no longer required */
296         return 0;
297
298 error:
299         if (fd_hugepage >= 0)
300                 close(fd_hugepage);
301         return -1;
302 }
303
304 int
305 rte_eal_using_phys_addrs(void)
306 {
307         return 0;
308 }
309
310 static uint64_t
311 get_mem_amount(uint64_t page_sz, uint64_t max_mem)
312 {
313         uint64_t area_sz, max_pages;
314
315         /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
316         max_pages = RTE_MAX_MEMSEG_PER_LIST;
317         max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
318
319         area_sz = RTE_MIN(page_sz * max_pages, max_mem);
320
321         /* make sure the list isn't smaller than the page size */
322         area_sz = RTE_MAX(area_sz, page_sz);
323
324         return RTE_ALIGN(area_sz, page_sz);
325 }
326
327 #define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
328 static int
329 alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
330                 int n_segs, int socket_id, int type_msl_idx)
331 {
332         char name[RTE_FBARRAY_NAME_LEN];
333
334         snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
335                  type_msl_idx);
336         if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
337                         sizeof(struct rte_memseg))) {
338                 RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
339                         rte_strerror(rte_errno));
340                 return -1;
341         }
342
343         msl->page_sz = page_sz;
344         msl->socket_id = socket_id;
345         msl->base_va = NULL;
346
347         RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
348                         (size_t)page_sz >> 10, socket_id);
349
350         return 0;
351 }
352
353 static int
354 alloc_va_space(struct rte_memseg_list *msl)
355 {
356         uint64_t page_sz;
357         size_t mem_sz;
358         void *addr;
359         int flags = 0;
360
361 #ifdef RTE_ARCH_PPC_64
362         flags |= MAP_HUGETLB;
363 #endif
364
365         page_sz = msl->page_sz;
366         mem_sz = page_sz * msl->memseg_arr.len;
367
368         addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
369         if (addr == NULL) {
370                 if (rte_errno == EADDRNOTAVAIL)
371                         RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n",
372                                 (unsigned long long)mem_sz, msl->base_va);
373                 else
374                         RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
375                 return -1;
376         }
377         msl->base_va = addr;
378         msl->len = mem_sz;
379
380         return 0;
381 }
382
383
384 static int
385 memseg_primary_init(void)
386 {
387         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
388         int hpi_idx, msl_idx = 0;
389         struct rte_memseg_list *msl;
390         uint64_t max_mem, total_mem;
391
392         /* no-huge does not need this at all */
393         if (internal_config.no_hugetlbfs)
394                 return 0;
395
396         /* FreeBSD has an issue where core dump will dump the entire memory
397          * contents, including anonymous zero-page memory. Therefore, while we
398          * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will
399          * also be further limiting total memory amount to whatever memory is
400          * available to us through contigmem driver (plus spacing blocks).
401          *
402          * so, at each stage, we will be checking how much memory we are
403          * preallocating, and adjust all the values accordingly.
404          */
405
406         max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
407         total_mem = 0;
408
409         /* create memseg lists */
410         for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
411                         hpi_idx++) {
412                 uint64_t max_type_mem, total_type_mem = 0;
413                 uint64_t avail_mem;
414                 int type_msl_idx, max_segs, avail_segs, total_segs = 0;
415                 struct hugepage_info *hpi;
416                 uint64_t hugepage_sz;
417
418                 hpi = &internal_config.hugepage_info[hpi_idx];
419                 hugepage_sz = hpi->hugepage_sz;
420
421                 /* no NUMA support on FreeBSD */
422
423                 /* check if we've already exceeded total memory amount */
424                 if (total_mem >= max_mem)
425                         break;
426
427                 /* first, calculate theoretical limits according to config */
428                 max_type_mem = RTE_MIN(max_mem - total_mem,
429                         (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
430                 max_segs = RTE_MAX_MEMSEG_PER_TYPE;
431
432                 /* now, limit all of that to whatever will actually be
433                  * available to us, because without dynamic allocation support,
434                  * all of that extra memory will be sitting there being useless
435                  * and slowing down core dumps in case of a crash.
436                  *
437                  * we need (N*2)-1 segments because we cannot guarantee that
438                  * each segment will be IOVA-contiguous with the previous one,
439                  * so we will allocate more and put spaces inbetween segments
440                  * that are non-contiguous.
441                  */
442                 avail_segs = (hpi->num_pages[0] * 2) - 1;
443                 avail_mem = avail_segs * hugepage_sz;
444
445                 max_type_mem = RTE_MIN(avail_mem, max_type_mem);
446                 max_segs = RTE_MIN(avail_segs, max_segs);
447
448                 type_msl_idx = 0;
449                 while (total_type_mem < max_type_mem &&
450                                 total_segs < max_segs) {
451                         uint64_t cur_max_mem, cur_mem;
452                         unsigned int n_segs;
453
454                         if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
455                                 RTE_LOG(ERR, EAL,
456                                         "No more space in memseg lists, please increase %s\n",
457                                         RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
458                                 return -1;
459                         }
460
461                         msl = &mcfg->memsegs[msl_idx++];
462
463                         cur_max_mem = max_type_mem - total_type_mem;
464
465                         cur_mem = get_mem_amount(hugepage_sz,
466                                         cur_max_mem);
467                         n_segs = cur_mem / hugepage_sz;
468
469                         if (alloc_memseg_list(msl, hugepage_sz, n_segs,
470                                         0, type_msl_idx))
471                                 return -1;
472
473                         total_segs += msl->memseg_arr.len;
474                         total_type_mem = total_segs * hugepage_sz;
475                         type_msl_idx++;
476
477                         if (alloc_va_space(msl)) {
478                                 RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
479                                 return -1;
480                         }
481                 }
482                 total_mem += total_type_mem;
483         }
484         return 0;
485 }
486
487 static int
488 memseg_secondary_init(void)
489 {
490         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
491         int msl_idx = 0;
492         struct rte_memseg_list *msl;
493
494         for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
495
496                 msl = &mcfg->memsegs[msl_idx];
497
498                 /* skip empty memseg lists */
499                 if (msl->memseg_arr.len == 0)
500                         continue;
501
502                 if (rte_fbarray_attach(&msl->memseg_arr)) {
503                         RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
504                         return -1;
505                 }
506
507                 /* preallocate VA space */
508                 if (alloc_va_space(msl)) {
509                         RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
510                         return -1;
511                 }
512         }
513
514         return 0;
515 }
516
517 int
518 rte_eal_memseg_init(void)
519 {
520         return rte_eal_process_type() == RTE_PROC_PRIMARY ?
521                         memseg_primary_init() :
522                         memseg_secondary_init();
523 }