mem: make base address hint OS specific
[dpdk.git] / lib / librte_eal / freebsd / eal / eal_memory.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 #include <sys/mman.h>
5 #include <unistd.h>
6 #include <sys/types.h>
7 #include <sys/sysctl.h>
8 #include <inttypes.h>
9 #include <errno.h>
10 #include <string.h>
11 #include <fcntl.h>
12
13 #include <rte_eal.h>
14 #include <rte_errno.h>
15 #include <rte_log.h>
16 #include <rte_string_fns.h>
17
18 #include "eal_private.h"
19 #include "eal_internal_cfg.h"
20 #include "eal_filesystem.h"
21 #include "eal_memcfg.h"
22 #include "eal_options.h"
23
24 #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE))
25
26 uint64_t eal_get_baseaddr(void)
27 {
28         /*
29          * FreeBSD may allocate something in the space we will be mapping things
30          * before we get a chance to do that, so use a base address that's far
31          * away from where malloc() et al usually map things.
32          */
33         return 0x1000000000ULL;
34 }
35
36 /*
37  * Get physical address of any mapped virtual address in the current process.
38  */
39 phys_addr_t
40 rte_mem_virt2phy(const void *virtaddr)
41 {
42         /* XXX not implemented. This function is only used by
43          * rte_mempool_virt2iova() when hugepages are disabled. */
44         (void)virtaddr;
45         return RTE_BAD_IOVA;
46 }
47 rte_iova_t
48 rte_mem_virt2iova(const void *virtaddr)
49 {
50         return rte_mem_virt2phy(virtaddr);
51 }
52
53 int
54 rte_eal_hugepage_init(void)
55 {
56         struct rte_mem_config *mcfg;
57         uint64_t total_mem = 0;
58         void *addr;
59         unsigned int i, j, seg_idx = 0;
60
61         /* get pointer to global configuration */
62         mcfg = rte_eal_get_configuration()->mem_config;
63
64         /* for debug purposes, hugetlbfs can be disabled */
65         if (internal_config.no_hugetlbfs) {
66                 struct rte_memseg_list *msl;
67                 struct rte_fbarray *arr;
68                 struct rte_memseg *ms;
69                 uint64_t page_sz;
70                 int n_segs, cur_seg;
71
72                 /* create a memseg list */
73                 msl = &mcfg->memsegs[0];
74
75                 page_sz = RTE_PGSIZE_4K;
76                 n_segs = internal_config.memory / page_sz;
77
78                 if (rte_fbarray_init(&msl->memseg_arr, "nohugemem", n_segs,
79                                 sizeof(struct rte_memseg))) {
80                         RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
81                         return -1;
82                 }
83
84                 addr = mmap(NULL, internal_config.memory,
85                                 PROT_READ | PROT_WRITE,
86                                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
87                 if (addr == MAP_FAILED) {
88                         RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
89                                         strerror(errno));
90                         return -1;
91                 }
92                 msl->base_va = addr;
93                 msl->page_sz = page_sz;
94                 msl->len = internal_config.memory;
95                 msl->socket_id = 0;
96
97                 /* populate memsegs. each memseg is 1 page long */
98                 for (cur_seg = 0; cur_seg < n_segs; cur_seg++) {
99                         arr = &msl->memseg_arr;
100
101                         ms = rte_fbarray_get(arr, cur_seg);
102                         if (rte_eal_iova_mode() == RTE_IOVA_VA)
103                                 ms->iova = (uintptr_t)addr;
104                         else
105                                 ms->iova = RTE_BAD_IOVA;
106                         ms->addr = addr;
107                         ms->hugepage_sz = page_sz;
108                         ms->len = page_sz;
109                         ms->socket_id = 0;
110
111                         rte_fbarray_set_used(arr, cur_seg);
112
113                         addr = RTE_PTR_ADD(addr, page_sz);
114                 }
115                 return 0;
116         }
117
118         /* map all hugepages and sort them */
119         for (i = 0; i < internal_config.num_hugepage_sizes; i ++){
120                 struct hugepage_info *hpi;
121                 rte_iova_t prev_end = 0;
122                 int prev_ms_idx = -1;
123                 uint64_t page_sz, mem_needed;
124                 unsigned int n_pages, max_pages;
125
126                 hpi = &internal_config.hugepage_info[i];
127                 page_sz = hpi->hugepage_sz;
128                 max_pages = hpi->num_pages[0];
129                 mem_needed = RTE_ALIGN_CEIL(internal_config.memory - total_mem,
130                                 page_sz);
131
132                 n_pages = RTE_MIN(mem_needed / page_sz, max_pages);
133
134                 for (j = 0; j < n_pages; j++) {
135                         struct rte_memseg_list *msl;
136                         struct rte_fbarray *arr;
137                         struct rte_memseg *seg;
138                         int msl_idx, ms_idx;
139                         rte_iova_t physaddr;
140                         int error;
141                         size_t sysctl_size = sizeof(physaddr);
142                         char physaddr_str[64];
143                         bool is_adjacent;
144
145                         /* first, check if this segment is IOVA-adjacent to
146                          * the previous one.
147                          */
148                         snprintf(physaddr_str, sizeof(physaddr_str),
149                                         "hw.contigmem.physaddr.%d", j);
150                         error = sysctlbyname(physaddr_str, &physaddr,
151                                         &sysctl_size, NULL, 0);
152                         if (error < 0) {
153                                 RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
154                                                 "from %s\n", j, hpi->hugedir);
155                                 return -1;
156                         }
157
158                         is_adjacent = prev_end != 0 && physaddr == prev_end;
159                         prev_end = physaddr + hpi->hugepage_sz;
160
161                         for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
162                                         msl_idx++) {
163                                 bool empty, need_hole;
164                                 msl = &mcfg->memsegs[msl_idx];
165                                 arr = &msl->memseg_arr;
166
167                                 if (msl->page_sz != page_sz)
168                                         continue;
169
170                                 empty = arr->count == 0;
171
172                                 /* we need a hole if this isn't an empty memseg
173                                  * list, and if previous segment was not
174                                  * adjacent to current one.
175                                  */
176                                 need_hole = !empty && !is_adjacent;
177
178                                 /* we need 1, plus hole if not adjacent */
179                                 ms_idx = rte_fbarray_find_next_n_free(arr,
180                                                 0, 1 + (need_hole ? 1 : 0));
181
182                                 /* memseg list is full? */
183                                 if (ms_idx < 0)
184                                         continue;
185
186                                 if (need_hole && prev_ms_idx == ms_idx - 1)
187                                         ms_idx++;
188                                 prev_ms_idx = ms_idx;
189
190                                 break;
191                         }
192                         if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
193                                 RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
194                                         RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE),
195                                         RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE));
196                                 return -1;
197                         }
198                         arr = &msl->memseg_arr;
199                         seg = rte_fbarray_get(arr, ms_idx);
200
201                         addr = RTE_PTR_ADD(msl->base_va,
202                                         (size_t)msl->page_sz * ms_idx);
203
204                         /* address is already mapped in memseg list, so using
205                          * MAP_FIXED here is safe.
206                          */
207                         addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE,
208                                         MAP_SHARED | MAP_FIXED,
209                                         hpi->lock_descriptor,
210                                         j * EAL_PAGE_SIZE);
211                         if (addr == MAP_FAILED) {
212                                 RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
213                                                 j, hpi->hugedir);
214                                 return -1;
215                         }
216
217                         seg->addr = addr;
218                         seg->iova = physaddr;
219                         seg->hugepage_sz = page_sz;
220                         seg->len = page_sz;
221                         seg->nchannel = mcfg->nchannel;
222                         seg->nrank = mcfg->nrank;
223                         seg->socket_id = 0;
224
225                         rte_fbarray_set_used(arr, ms_idx);
226
227                         RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
228                                         PRIx64", len %zu\n",
229                                         seg_idx++, addr, physaddr, page_sz);
230
231                         total_mem += seg->len;
232                 }
233                 if (total_mem >= internal_config.memory)
234                         break;
235         }
236         if (total_mem < internal_config.memory) {
237                 RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, "
238                                 "requested: %" PRIu64 "M "
239                                 "available: %" PRIu64 "M\n",
240                                 internal_config.memory >> 20, total_mem >> 20);
241                 return -1;
242         }
243         return 0;
244 }
245
246 struct attach_walk_args {
247         int fd_hugepage;
248         int seg_idx;
249 };
250 static int
251 attach_segment(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
252                 void *arg)
253 {
254         struct attach_walk_args *wa = arg;
255         void *addr;
256
257         if (msl->external)
258                 return 0;
259
260         addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
261                         MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
262                         wa->seg_idx * EAL_PAGE_SIZE);
263         if (addr == MAP_FAILED || addr != ms->addr)
264                 return -1;
265         wa->seg_idx++;
266
267         return 0;
268 }
269
270 int
271 rte_eal_hugepage_attach(void)
272 {
273         const struct hugepage_info *hpi;
274         int fd_hugepage = -1;
275         unsigned int i;
276
277         hpi = &internal_config.hugepage_info[0];
278
279         for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
280                 const struct hugepage_info *cur_hpi = &hpi[i];
281                 struct attach_walk_args wa;
282
283                 memset(&wa, 0, sizeof(wa));
284
285                 /* Obtain a file descriptor for contiguous memory */
286                 fd_hugepage = open(cur_hpi->hugedir, O_RDWR);
287                 if (fd_hugepage < 0) {
288                         RTE_LOG(ERR, EAL, "Could not open %s\n",
289                                         cur_hpi->hugedir);
290                         goto error;
291                 }
292                 wa.fd_hugepage = fd_hugepage;
293                 wa.seg_idx = 0;
294
295                 /* Map the contiguous memory into each memory segment */
296                 if (rte_memseg_walk(attach_segment, &wa) < 0) {
297                         RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
298                                 wa.seg_idx, cur_hpi->hugedir);
299                         goto error;
300                 }
301
302                 close(fd_hugepage);
303                 fd_hugepage = -1;
304         }
305
306         /* hugepage_info is no longer required */
307         return 0;
308
309 error:
310         if (fd_hugepage >= 0)
311                 close(fd_hugepage);
312         return -1;
313 }
314
315 int
316 rte_eal_using_phys_addrs(void)
317 {
318         return 0;
319 }
320
321 static uint64_t
322 get_mem_amount(uint64_t page_sz, uint64_t max_mem)
323 {
324         uint64_t area_sz, max_pages;
325
326         /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
327         max_pages = RTE_MAX_MEMSEG_PER_LIST;
328         max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
329
330         area_sz = RTE_MIN(page_sz * max_pages, max_mem);
331
332         /* make sure the list isn't smaller than the page size */
333         area_sz = RTE_MAX(area_sz, page_sz);
334
335         return RTE_ALIGN(area_sz, page_sz);
336 }
337
338 #define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
339 static int
340 alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
341                 int n_segs, int socket_id, int type_msl_idx)
342 {
343         char name[RTE_FBARRAY_NAME_LEN];
344
345         snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
346                  type_msl_idx);
347         if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
348                         sizeof(struct rte_memseg))) {
349                 RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
350                         rte_strerror(rte_errno));
351                 return -1;
352         }
353
354         msl->page_sz = page_sz;
355         msl->socket_id = socket_id;
356         msl->base_va = NULL;
357
358         RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
359                         (size_t)page_sz >> 10, socket_id);
360
361         return 0;
362 }
363
364 static int
365 alloc_va_space(struct rte_memseg_list *msl)
366 {
367         uint64_t page_sz;
368         size_t mem_sz;
369         void *addr;
370         int flags = 0;
371
372 #ifdef RTE_ARCH_PPC_64
373         flags |= MAP_HUGETLB;
374 #endif
375
376         page_sz = msl->page_sz;
377         mem_sz = page_sz * msl->memseg_arr.len;
378
379         addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
380         if (addr == NULL) {
381                 if (rte_errno == EADDRNOTAVAIL)
382                         RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - "
383                                 "please use '--" OPT_BASE_VIRTADDR "' option\n",
384                                 (unsigned long long)mem_sz, msl->base_va);
385                 else
386                         RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
387                 return -1;
388         }
389         msl->base_va = addr;
390         msl->len = mem_sz;
391
392         return 0;
393 }
394
395
396 static int
397 memseg_primary_init(void)
398 {
399         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
400         int hpi_idx, msl_idx = 0;
401         struct rte_memseg_list *msl;
402         uint64_t max_mem, total_mem;
403
404         /* no-huge does not need this at all */
405         if (internal_config.no_hugetlbfs)
406                 return 0;
407
408         /* FreeBSD has an issue where core dump will dump the entire memory
409          * contents, including anonymous zero-page memory. Therefore, while we
410          * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will
411          * also be further limiting total memory amount to whatever memory is
412          * available to us through contigmem driver (plus spacing blocks).
413          *
414          * so, at each stage, we will be checking how much memory we are
415          * preallocating, and adjust all the values accordingly.
416          */
417
418         max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
419         total_mem = 0;
420
421         /* create memseg lists */
422         for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
423                         hpi_idx++) {
424                 uint64_t max_type_mem, total_type_mem = 0;
425                 uint64_t avail_mem;
426                 int type_msl_idx, max_segs, avail_segs, total_segs = 0;
427                 struct hugepage_info *hpi;
428                 uint64_t hugepage_sz;
429
430                 hpi = &internal_config.hugepage_info[hpi_idx];
431                 hugepage_sz = hpi->hugepage_sz;
432
433                 /* no NUMA support on FreeBSD */
434
435                 /* check if we've already exceeded total memory amount */
436                 if (total_mem >= max_mem)
437                         break;
438
439                 /* first, calculate theoretical limits according to config */
440                 max_type_mem = RTE_MIN(max_mem - total_mem,
441                         (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
442                 max_segs = RTE_MAX_MEMSEG_PER_TYPE;
443
444                 /* now, limit all of that to whatever will actually be
445                  * available to us, because without dynamic allocation support,
446                  * all of that extra memory will be sitting there being useless
447                  * and slowing down core dumps in case of a crash.
448                  *
449                  * we need (N*2)-1 segments because we cannot guarantee that
450                  * each segment will be IOVA-contiguous with the previous one,
451                  * so we will allocate more and put spaces inbetween segments
452                  * that are non-contiguous.
453                  */
454                 avail_segs = (hpi->num_pages[0] * 2) - 1;
455                 avail_mem = avail_segs * hugepage_sz;
456
457                 max_type_mem = RTE_MIN(avail_mem, max_type_mem);
458                 max_segs = RTE_MIN(avail_segs, max_segs);
459
460                 type_msl_idx = 0;
461                 while (total_type_mem < max_type_mem &&
462                                 total_segs < max_segs) {
463                         uint64_t cur_max_mem, cur_mem;
464                         unsigned int n_segs;
465
466                         if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
467                                 RTE_LOG(ERR, EAL,
468                                         "No more space in memseg lists, please increase %s\n",
469                                         RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
470                                 return -1;
471                         }
472
473                         msl = &mcfg->memsegs[msl_idx++];
474
475                         cur_max_mem = max_type_mem - total_type_mem;
476
477                         cur_mem = get_mem_amount(hugepage_sz,
478                                         cur_max_mem);
479                         n_segs = cur_mem / hugepage_sz;
480
481                         if (alloc_memseg_list(msl, hugepage_sz, n_segs,
482                                         0, type_msl_idx))
483                                 return -1;
484
485                         total_segs += msl->memseg_arr.len;
486                         total_type_mem = total_segs * hugepage_sz;
487                         type_msl_idx++;
488
489                         if (alloc_va_space(msl)) {
490                                 RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
491                                 return -1;
492                         }
493                 }
494                 total_mem += total_type_mem;
495         }
496         return 0;
497 }
498
499 static int
500 memseg_secondary_init(void)
501 {
502         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
503         int msl_idx = 0;
504         struct rte_memseg_list *msl;
505
506         for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
507
508                 msl = &mcfg->memsegs[msl_idx];
509
510                 /* skip empty memseg lists */
511                 if (msl->memseg_arr.len == 0)
512                         continue;
513
514                 if (rte_fbarray_attach(&msl->memseg_arr)) {
515                         RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
516                         return -1;
517                 }
518
519                 /* preallocate VA space */
520                 if (alloc_va_space(msl)) {
521                         RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
522                         return -1;
523                 }
524         }
525
526         return 0;
527 }
528
529 int
530 rte_eal_memseg_init(void)
531 {
532         return rte_eal_process_type() == RTE_PROC_PRIMARY ?
533                         memseg_primary_init() :
534                         memseg_secondary_init();
535 }