trace: create CTF TDSL metadata in memory
[dpdk.git] / lib / librte_eal / freebsd / eal_memory.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 #include <sys/mman.h>
5 #include <unistd.h>
6 #include <sys/types.h>
7 #include <sys/sysctl.h>
8 #include <inttypes.h>
9 #include <errno.h>
10 #include <string.h>
11 #include <fcntl.h>
12
13 #include <rte_eal.h>
14 #include <rte_errno.h>
15 #include <rte_log.h>
16 #include <rte_string_fns.h>
17
18 #include "eal_private.h"
19 #include "eal_internal_cfg.h"
20 #include "eal_filesystem.h"
21 #include "eal_memcfg.h"
22 #include "eal_options.h"
23
24 #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE))
25
26 uint64_t eal_get_baseaddr(void)
27 {
28         /*
29          * FreeBSD may allocate something in the space we will be mapping things
30          * before we get a chance to do that, so use a base address that's far
31          * away from where malloc() et al usually map things.
32          */
33         return 0x1000000000ULL;
34 }
35
36 /*
37  * Get physical address of any mapped virtual address in the current process.
38  */
39 phys_addr_t
40 rte_mem_virt2phy(const void *virtaddr)
41 {
42         /* XXX not implemented. This function is only used by
43          * rte_mempool_virt2iova() when hugepages are disabled. */
44         (void)virtaddr;
45         return RTE_BAD_IOVA;
46 }
47 rte_iova_t
48 rte_mem_virt2iova(const void *virtaddr)
49 {
50         return rte_mem_virt2phy(virtaddr);
51 }
52
53 int
54 rte_eal_hugepage_init(void)
55 {
56         struct rte_mem_config *mcfg;
57         uint64_t total_mem = 0;
58         void *addr;
59         unsigned int i, j, seg_idx = 0;
60
61         /* get pointer to global configuration */
62         mcfg = rte_eal_get_configuration()->mem_config;
63
64         /* for debug purposes, hugetlbfs can be disabled */
65         if (internal_config.no_hugetlbfs) {
66                 struct rte_memseg_list *msl;
67                 struct rte_fbarray *arr;
68                 struct rte_memseg *ms;
69                 uint64_t page_sz;
70                 int n_segs, cur_seg;
71
72                 /* create a memseg list */
73                 msl = &mcfg->memsegs[0];
74
75                 page_sz = RTE_PGSIZE_4K;
76                 n_segs = internal_config.memory / page_sz;
77
78                 if (rte_fbarray_init(&msl->memseg_arr, "nohugemem", n_segs,
79                                 sizeof(struct rte_memseg))) {
80                         RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
81                         return -1;
82                 }
83
84                 addr = mmap(NULL, internal_config.memory,
85                                 PROT_READ | PROT_WRITE,
86                                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
87                 if (addr == MAP_FAILED) {
88                         RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
89                                         strerror(errno));
90                         return -1;
91                 }
92                 msl->base_va = addr;
93                 msl->page_sz = page_sz;
94                 msl->len = internal_config.memory;
95                 msl->socket_id = 0;
96                 msl->heap = 1;
97
98                 /* populate memsegs. each memseg is 1 page long */
99                 for (cur_seg = 0; cur_seg < n_segs; cur_seg++) {
100                         arr = &msl->memseg_arr;
101
102                         ms = rte_fbarray_get(arr, cur_seg);
103                         if (rte_eal_iova_mode() == RTE_IOVA_VA)
104                                 ms->iova = (uintptr_t)addr;
105                         else
106                                 ms->iova = RTE_BAD_IOVA;
107                         ms->addr = addr;
108                         ms->hugepage_sz = page_sz;
109                         ms->len = page_sz;
110                         ms->socket_id = 0;
111
112                         rte_fbarray_set_used(arr, cur_seg);
113
114                         addr = RTE_PTR_ADD(addr, page_sz);
115                 }
116                 return 0;
117         }
118
119         /* map all hugepages and sort them */
120         for (i = 0; i < internal_config.num_hugepage_sizes; i ++){
121                 struct hugepage_info *hpi;
122                 rte_iova_t prev_end = 0;
123                 int prev_ms_idx = -1;
124                 uint64_t page_sz, mem_needed;
125                 unsigned int n_pages, max_pages;
126
127                 hpi = &internal_config.hugepage_info[i];
128                 page_sz = hpi->hugepage_sz;
129                 max_pages = hpi->num_pages[0];
130                 mem_needed = RTE_ALIGN_CEIL(internal_config.memory - total_mem,
131                                 page_sz);
132
133                 n_pages = RTE_MIN(mem_needed / page_sz, max_pages);
134
135                 for (j = 0; j < n_pages; j++) {
136                         struct rte_memseg_list *msl;
137                         struct rte_fbarray *arr;
138                         struct rte_memseg *seg;
139                         int msl_idx, ms_idx;
140                         rte_iova_t physaddr;
141                         int error;
142                         size_t sysctl_size = sizeof(physaddr);
143                         char physaddr_str[64];
144                         bool is_adjacent;
145
146                         /* first, check if this segment is IOVA-adjacent to
147                          * the previous one.
148                          */
149                         snprintf(physaddr_str, sizeof(physaddr_str),
150                                         "hw.contigmem.physaddr.%d", j);
151                         error = sysctlbyname(physaddr_str, &physaddr,
152                                         &sysctl_size, NULL, 0);
153                         if (error < 0) {
154                                 RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
155                                                 "from %s\n", j, hpi->hugedir);
156                                 return -1;
157                         }
158
159                         is_adjacent = prev_end != 0 && physaddr == prev_end;
160                         prev_end = physaddr + hpi->hugepage_sz;
161
162                         for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
163                                         msl_idx++) {
164                                 bool empty, need_hole;
165                                 msl = &mcfg->memsegs[msl_idx];
166                                 arr = &msl->memseg_arr;
167
168                                 if (msl->page_sz != page_sz)
169                                         continue;
170
171                                 empty = arr->count == 0;
172
173                                 /* we need a hole if this isn't an empty memseg
174                                  * list, and if previous segment was not
175                                  * adjacent to current one.
176                                  */
177                                 need_hole = !empty && !is_adjacent;
178
179                                 /* we need 1, plus hole if not adjacent */
180                                 ms_idx = rte_fbarray_find_next_n_free(arr,
181                                                 0, 1 + (need_hole ? 1 : 0));
182
183                                 /* memseg list is full? */
184                                 if (ms_idx < 0)
185                                         continue;
186
187                                 if (need_hole && prev_ms_idx == ms_idx - 1)
188                                         ms_idx++;
189                                 prev_ms_idx = ms_idx;
190
191                                 break;
192                         }
193                         if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
194                                 RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
195                                         RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE),
196                                         RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE));
197                                 return -1;
198                         }
199                         arr = &msl->memseg_arr;
200                         seg = rte_fbarray_get(arr, ms_idx);
201
202                         addr = RTE_PTR_ADD(msl->base_va,
203                                         (size_t)msl->page_sz * ms_idx);
204
205                         /* address is already mapped in memseg list, so using
206                          * MAP_FIXED here is safe.
207                          */
208                         addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE,
209                                         MAP_SHARED | MAP_FIXED,
210                                         hpi->lock_descriptor,
211                                         j * EAL_PAGE_SIZE);
212                         if (addr == MAP_FAILED) {
213                                 RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
214                                                 j, hpi->hugedir);
215                                 return -1;
216                         }
217
218                         seg->addr = addr;
219                         seg->iova = physaddr;
220                         seg->hugepage_sz = page_sz;
221                         seg->len = page_sz;
222                         seg->nchannel = mcfg->nchannel;
223                         seg->nrank = mcfg->nrank;
224                         seg->socket_id = 0;
225
226                         rte_fbarray_set_used(arr, ms_idx);
227
228                         RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
229                                         PRIx64", len %zu\n",
230                                         seg_idx++, addr, physaddr, page_sz);
231
232                         total_mem += seg->len;
233                 }
234                 if (total_mem >= internal_config.memory)
235                         break;
236         }
237         if (total_mem < internal_config.memory) {
238                 RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, "
239                                 "requested: %" PRIu64 "M "
240                                 "available: %" PRIu64 "M\n",
241                                 internal_config.memory >> 20, total_mem >> 20);
242                 return -1;
243         }
244         return 0;
245 }
246
247 struct attach_walk_args {
248         int fd_hugepage;
249         int seg_idx;
250 };
251 static int
252 attach_segment(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
253                 void *arg)
254 {
255         struct attach_walk_args *wa = arg;
256         void *addr;
257
258         if (msl->external)
259                 return 0;
260
261         addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
262                         MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
263                         wa->seg_idx * EAL_PAGE_SIZE);
264         if (addr == MAP_FAILED || addr != ms->addr)
265                 return -1;
266         wa->seg_idx++;
267
268         return 0;
269 }
270
271 int
272 rte_eal_hugepage_attach(void)
273 {
274         const struct hugepage_info *hpi;
275         int fd_hugepage = -1;
276         unsigned int i;
277
278         hpi = &internal_config.hugepage_info[0];
279
280         for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
281                 const struct hugepage_info *cur_hpi = &hpi[i];
282                 struct attach_walk_args wa;
283
284                 memset(&wa, 0, sizeof(wa));
285
286                 /* Obtain a file descriptor for contiguous memory */
287                 fd_hugepage = open(cur_hpi->hugedir, O_RDWR);
288                 if (fd_hugepage < 0) {
289                         RTE_LOG(ERR, EAL, "Could not open %s\n",
290                                         cur_hpi->hugedir);
291                         goto error;
292                 }
293                 wa.fd_hugepage = fd_hugepage;
294                 wa.seg_idx = 0;
295
296                 /* Map the contiguous memory into each memory segment */
297                 if (rte_memseg_walk(attach_segment, &wa) < 0) {
298                         RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
299                                 wa.seg_idx, cur_hpi->hugedir);
300                         goto error;
301                 }
302
303                 close(fd_hugepage);
304                 fd_hugepage = -1;
305         }
306
307         /* hugepage_info is no longer required */
308         return 0;
309
310 error:
311         if (fd_hugepage >= 0)
312                 close(fd_hugepage);
313         return -1;
314 }
315
316 int
317 rte_eal_using_phys_addrs(void)
318 {
319         return 0;
320 }
321
322 static uint64_t
323 get_mem_amount(uint64_t page_sz, uint64_t max_mem)
324 {
325         uint64_t area_sz, max_pages;
326
327         /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
328         max_pages = RTE_MAX_MEMSEG_PER_LIST;
329         max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
330
331         area_sz = RTE_MIN(page_sz * max_pages, max_mem);
332
333         /* make sure the list isn't smaller than the page size */
334         area_sz = RTE_MAX(area_sz, page_sz);
335
336         return RTE_ALIGN(area_sz, page_sz);
337 }
338
339 #define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
340 static int
341 alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
342                 int n_segs, int socket_id, int type_msl_idx)
343 {
344         char name[RTE_FBARRAY_NAME_LEN];
345
346         snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
347                  type_msl_idx);
348         if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
349                         sizeof(struct rte_memseg))) {
350                 RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
351                         rte_strerror(rte_errno));
352                 return -1;
353         }
354
355         msl->page_sz = page_sz;
356         msl->socket_id = socket_id;
357         msl->base_va = NULL;
358
359         RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
360                         (size_t)page_sz >> 10, socket_id);
361
362         return 0;
363 }
364
365 static int
366 alloc_va_space(struct rte_memseg_list *msl)
367 {
368         uint64_t page_sz;
369         size_t mem_sz;
370         void *addr;
371         int flags = 0;
372
373 #ifdef RTE_ARCH_PPC_64
374         flags |= MAP_HUGETLB;
375 #endif
376
377         page_sz = msl->page_sz;
378         mem_sz = page_sz * msl->memseg_arr.len;
379
380         addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
381         if (addr == NULL) {
382                 if (rte_errno == EADDRNOTAVAIL)
383                         RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - "
384                                 "please use '--" OPT_BASE_VIRTADDR "' option\n",
385                                 (unsigned long long)mem_sz, msl->base_va);
386                 else
387                         RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
388                 return -1;
389         }
390         msl->base_va = addr;
391         msl->len = mem_sz;
392
393         return 0;
394 }
395
396
397 static int
398 memseg_primary_init(void)
399 {
400         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
401         int hpi_idx, msl_idx = 0;
402         struct rte_memseg_list *msl;
403         uint64_t max_mem, total_mem;
404
405         /* no-huge does not need this at all */
406         if (internal_config.no_hugetlbfs)
407                 return 0;
408
409         /* FreeBSD has an issue where core dump will dump the entire memory
410          * contents, including anonymous zero-page memory. Therefore, while we
411          * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will
412          * also be further limiting total memory amount to whatever memory is
413          * available to us through contigmem driver (plus spacing blocks).
414          *
415          * so, at each stage, we will be checking how much memory we are
416          * preallocating, and adjust all the values accordingly.
417          */
418
419         max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
420         total_mem = 0;
421
422         /* create memseg lists */
423         for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
424                         hpi_idx++) {
425                 uint64_t max_type_mem, total_type_mem = 0;
426                 uint64_t avail_mem;
427                 int type_msl_idx, max_segs, avail_segs, total_segs = 0;
428                 struct hugepage_info *hpi;
429                 uint64_t hugepage_sz;
430
431                 hpi = &internal_config.hugepage_info[hpi_idx];
432                 hugepage_sz = hpi->hugepage_sz;
433
434                 /* no NUMA support on FreeBSD */
435
436                 /* check if we've already exceeded total memory amount */
437                 if (total_mem >= max_mem)
438                         break;
439
440                 /* first, calculate theoretical limits according to config */
441                 max_type_mem = RTE_MIN(max_mem - total_mem,
442                         (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
443                 max_segs = RTE_MAX_MEMSEG_PER_TYPE;
444
445                 /* now, limit all of that to whatever will actually be
446                  * available to us, because without dynamic allocation support,
447                  * all of that extra memory will be sitting there being useless
448                  * and slowing down core dumps in case of a crash.
449                  *
450                  * we need (N*2)-1 segments because we cannot guarantee that
451                  * each segment will be IOVA-contiguous with the previous one,
452                  * so we will allocate more and put spaces inbetween segments
453                  * that are non-contiguous.
454                  */
455                 avail_segs = (hpi->num_pages[0] * 2) - 1;
456                 avail_mem = avail_segs * hugepage_sz;
457
458                 max_type_mem = RTE_MIN(avail_mem, max_type_mem);
459                 max_segs = RTE_MIN(avail_segs, max_segs);
460
461                 type_msl_idx = 0;
462                 while (total_type_mem < max_type_mem &&
463                                 total_segs < max_segs) {
464                         uint64_t cur_max_mem, cur_mem;
465                         unsigned int n_segs;
466
467                         if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
468                                 RTE_LOG(ERR, EAL,
469                                         "No more space in memseg lists, please increase %s\n",
470                                         RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
471                                 return -1;
472                         }
473
474                         msl = &mcfg->memsegs[msl_idx++];
475
476                         cur_max_mem = max_type_mem - total_type_mem;
477
478                         cur_mem = get_mem_amount(hugepage_sz,
479                                         cur_max_mem);
480                         n_segs = cur_mem / hugepage_sz;
481
482                         if (alloc_memseg_list(msl, hugepage_sz, n_segs,
483                                         0, type_msl_idx))
484                                 return -1;
485
486                         total_segs += msl->memseg_arr.len;
487                         total_type_mem = total_segs * hugepage_sz;
488                         type_msl_idx++;
489
490                         if (alloc_va_space(msl)) {
491                                 RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
492                                 return -1;
493                         }
494                 }
495                 total_mem += total_type_mem;
496         }
497         return 0;
498 }
499
500 static int
501 memseg_secondary_init(void)
502 {
503         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
504         int msl_idx = 0;
505         struct rte_memseg_list *msl;
506
507         for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
508
509                 msl = &mcfg->memsegs[msl_idx];
510
511                 /* skip empty memseg lists */
512                 if (msl->memseg_arr.len == 0)
513                         continue;
514
515                 if (rte_fbarray_attach(&msl->memseg_arr)) {
516                         RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
517                         return -1;
518                 }
519
520                 /* preallocate VA space */
521                 if (alloc_va_space(msl)) {
522                         RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
523                         return -1;
524                 }
525         }
526
527         return 0;
528 }
529
530 int
531 rte_eal_memseg_init(void)
532 {
533         return rte_eal_process_type() == RTE_PROC_PRIMARY ?
534                         memseg_primary_init() :
535                         memseg_secondary_init();
536 }