mem: extract common dynamic memory allocation
[dpdk.git] / lib / librte_eal / freebsd / eal_memory.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 #include <sys/mman.h>
5 #include <unistd.h>
6 #include <sys/types.h>
7 #include <sys/sysctl.h>
8 #include <inttypes.h>
9 #include <errno.h>
10 #include <string.h>
11 #include <fcntl.h>
12
13 #include <rte_eal.h>
14 #include <rte_errno.h>
15 #include <rte_log.h>
16 #include <rte_string_fns.h>
17
18 #include "eal_private.h"
19 #include "eal_internal_cfg.h"
20 #include "eal_filesystem.h"
21 #include "eal_memcfg.h"
22 #include "eal_options.h"
23
24 #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE))
25
26 uint64_t eal_get_baseaddr(void)
27 {
28         /*
29          * FreeBSD may allocate something in the space we will be mapping things
30          * before we get a chance to do that, so use a base address that's far
31          * away from where malloc() et al usually map things.
32          */
33         return 0x1000000000ULL;
34 }
35
36 /*
37  * Get physical address of any mapped virtual address in the current process.
38  */
39 phys_addr_t
40 rte_mem_virt2phy(const void *virtaddr)
41 {
42         /* XXX not implemented. This function is only used by
43          * rte_mempool_virt2iova() when hugepages are disabled. */
44         (void)virtaddr;
45         return RTE_BAD_IOVA;
46 }
47 rte_iova_t
48 rte_mem_virt2iova(const void *virtaddr)
49 {
50         return rte_mem_virt2phy(virtaddr);
51 }
52
53 int
54 rte_eal_hugepage_init(void)
55 {
56         struct rte_mem_config *mcfg;
57         uint64_t total_mem = 0;
58         void *addr;
59         unsigned int i, j, seg_idx = 0;
60
61         /* get pointer to global configuration */
62         mcfg = rte_eal_get_configuration()->mem_config;
63
64         /* for debug purposes, hugetlbfs can be disabled */
65         if (internal_config.no_hugetlbfs) {
66                 struct rte_memseg_list *msl;
67                 uint64_t mem_sz, page_sz;
68                 int n_segs;
69
70                 /* create a memseg list */
71                 msl = &mcfg->memsegs[0];
72
73                 mem_sz = internal_config.memory;
74                 page_sz = RTE_PGSIZE_4K;
75                 n_segs = mem_sz / page_sz;
76
77                 if (eal_memseg_list_init_named(
78                                 msl, "nohugemem", page_sz, n_segs, 0, true)) {
79                         return -1;
80                 }
81
82                 addr = mmap(NULL, mem_sz, PROT_READ | PROT_WRITE,
83                                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
84                 if (addr == MAP_FAILED) {
85                         RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
86                                         strerror(errno));
87                         return -1;
88                 }
89
90                 msl->base_va = addr;
91                 msl->len = mem_sz;
92
93                 eal_memseg_list_populate(msl, addr, n_segs);
94
95                 return 0;
96         }
97
98         /* map all hugepages and sort them */
99         for (i = 0; i < internal_config.num_hugepage_sizes; i ++){
100                 struct hugepage_info *hpi;
101                 rte_iova_t prev_end = 0;
102                 int prev_ms_idx = -1;
103                 uint64_t page_sz, mem_needed;
104                 unsigned int n_pages, max_pages;
105
106                 hpi = &internal_config.hugepage_info[i];
107                 page_sz = hpi->hugepage_sz;
108                 max_pages = hpi->num_pages[0];
109                 mem_needed = RTE_ALIGN_CEIL(internal_config.memory - total_mem,
110                                 page_sz);
111
112                 n_pages = RTE_MIN(mem_needed / page_sz, max_pages);
113
114                 for (j = 0; j < n_pages; j++) {
115                         struct rte_memseg_list *msl;
116                         struct rte_fbarray *arr;
117                         struct rte_memseg *seg;
118                         int msl_idx, ms_idx;
119                         rte_iova_t physaddr;
120                         int error;
121                         size_t sysctl_size = sizeof(physaddr);
122                         char physaddr_str[64];
123                         bool is_adjacent;
124
125                         /* first, check if this segment is IOVA-adjacent to
126                          * the previous one.
127                          */
128                         snprintf(physaddr_str, sizeof(physaddr_str),
129                                         "hw.contigmem.physaddr.%d", j);
130                         error = sysctlbyname(physaddr_str, &physaddr,
131                                         &sysctl_size, NULL, 0);
132                         if (error < 0) {
133                                 RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
134                                                 "from %s\n", j, hpi->hugedir);
135                                 return -1;
136                         }
137
138                         is_adjacent = prev_end != 0 && physaddr == prev_end;
139                         prev_end = physaddr + hpi->hugepage_sz;
140
141                         for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
142                                         msl_idx++) {
143                                 bool empty, need_hole;
144                                 msl = &mcfg->memsegs[msl_idx];
145                                 arr = &msl->memseg_arr;
146
147                                 if (msl->page_sz != page_sz)
148                                         continue;
149
150                                 empty = arr->count == 0;
151
152                                 /* we need a hole if this isn't an empty memseg
153                                  * list, and if previous segment was not
154                                  * adjacent to current one.
155                                  */
156                                 need_hole = !empty && !is_adjacent;
157
158                                 /* we need 1, plus hole if not adjacent */
159                                 ms_idx = rte_fbarray_find_next_n_free(arr,
160                                                 0, 1 + (need_hole ? 1 : 0));
161
162                                 /* memseg list is full? */
163                                 if (ms_idx < 0)
164                                         continue;
165
166                                 if (need_hole && prev_ms_idx == ms_idx - 1)
167                                         ms_idx++;
168                                 prev_ms_idx = ms_idx;
169
170                                 break;
171                         }
172                         if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
173                                 RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
174                                         RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE),
175                                         RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE));
176                                 return -1;
177                         }
178                         arr = &msl->memseg_arr;
179                         seg = rte_fbarray_get(arr, ms_idx);
180
181                         addr = RTE_PTR_ADD(msl->base_va,
182                                         (size_t)msl->page_sz * ms_idx);
183
184                         /* address is already mapped in memseg list, so using
185                          * MAP_FIXED here is safe.
186                          */
187                         addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE,
188                                         MAP_SHARED | MAP_FIXED,
189                                         hpi->lock_descriptor,
190                                         j * EAL_PAGE_SIZE);
191                         if (addr == MAP_FAILED) {
192                                 RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
193                                                 j, hpi->hugedir);
194                                 return -1;
195                         }
196
197                         seg->addr = addr;
198                         seg->iova = physaddr;
199                         seg->hugepage_sz = page_sz;
200                         seg->len = page_sz;
201                         seg->nchannel = mcfg->nchannel;
202                         seg->nrank = mcfg->nrank;
203                         seg->socket_id = 0;
204
205                         rte_fbarray_set_used(arr, ms_idx);
206
207                         RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
208                                         PRIx64", len %zu\n",
209                                         seg_idx++, addr, physaddr, page_sz);
210
211                         total_mem += seg->len;
212                 }
213                 if (total_mem >= internal_config.memory)
214                         break;
215         }
216         if (total_mem < internal_config.memory) {
217                 RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, "
218                                 "requested: %" PRIu64 "M "
219                                 "available: %" PRIu64 "M\n",
220                                 internal_config.memory >> 20, total_mem >> 20);
221                 return -1;
222         }
223         return 0;
224 }
225
226 struct attach_walk_args {
227         int fd_hugepage;
228         int seg_idx;
229 };
230 static int
231 attach_segment(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
232                 void *arg)
233 {
234         struct attach_walk_args *wa = arg;
235         void *addr;
236
237         if (msl->external)
238                 return 0;
239
240         addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
241                         MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
242                         wa->seg_idx * EAL_PAGE_SIZE);
243         if (addr == MAP_FAILED || addr != ms->addr)
244                 return -1;
245         wa->seg_idx++;
246
247         return 0;
248 }
249
250 int
251 rte_eal_hugepage_attach(void)
252 {
253         const struct hugepage_info *hpi;
254         int fd_hugepage = -1;
255         unsigned int i;
256
257         hpi = &internal_config.hugepage_info[0];
258
259         for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
260                 const struct hugepage_info *cur_hpi = &hpi[i];
261                 struct attach_walk_args wa;
262
263                 memset(&wa, 0, sizeof(wa));
264
265                 /* Obtain a file descriptor for contiguous memory */
266                 fd_hugepage = open(cur_hpi->hugedir, O_RDWR);
267                 if (fd_hugepage < 0) {
268                         RTE_LOG(ERR, EAL, "Could not open %s\n",
269                                         cur_hpi->hugedir);
270                         goto error;
271                 }
272                 wa.fd_hugepage = fd_hugepage;
273                 wa.seg_idx = 0;
274
275                 /* Map the contiguous memory into each memory segment */
276                 if (rte_memseg_walk(attach_segment, &wa) < 0) {
277                         RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
278                                 wa.seg_idx, cur_hpi->hugedir);
279                         goto error;
280                 }
281
282                 close(fd_hugepage);
283                 fd_hugepage = -1;
284         }
285
286         /* hugepage_info is no longer required */
287         return 0;
288
289 error:
290         if (fd_hugepage >= 0)
291                 close(fd_hugepage);
292         return -1;
293 }
294
295 int
296 rte_eal_using_phys_addrs(void)
297 {
298         return 0;
299 }
300
301 static uint64_t
302 get_mem_amount(uint64_t page_sz, uint64_t max_mem)
303 {
304         uint64_t area_sz, max_pages;
305
306         /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
307         max_pages = RTE_MAX_MEMSEG_PER_LIST;
308         max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
309
310         area_sz = RTE_MIN(page_sz * max_pages, max_mem);
311
312         /* make sure the list isn't smaller than the page size */
313         area_sz = RTE_MAX(area_sz, page_sz);
314
315         return RTE_ALIGN(area_sz, page_sz);
316 }
317
318 static int
319 memseg_list_alloc(struct rte_memseg_list *msl)
320 {
321         int flags = 0;
322
323 #ifdef RTE_ARCH_PPC_64
324         flags |= EAL_RESERVE_HUGEPAGES;
325 #endif
326         return eal_memseg_list_alloc(msl, flags);
327 }
328
329 static int
330 memseg_primary_init(void)
331 {
332         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
333         int hpi_idx, msl_idx = 0;
334         struct rte_memseg_list *msl;
335         uint64_t max_mem, total_mem;
336
337         /* no-huge does not need this at all */
338         if (internal_config.no_hugetlbfs)
339                 return 0;
340
341         /* FreeBSD has an issue where core dump will dump the entire memory
342          * contents, including anonymous zero-page memory. Therefore, while we
343          * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will
344          * also be further limiting total memory amount to whatever memory is
345          * available to us through contigmem driver (plus spacing blocks).
346          *
347          * so, at each stage, we will be checking how much memory we are
348          * preallocating, and adjust all the values accordingly.
349          */
350
351         max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
352         total_mem = 0;
353
354         /* create memseg lists */
355         for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
356                         hpi_idx++) {
357                 uint64_t max_type_mem, total_type_mem = 0;
358                 uint64_t avail_mem;
359                 int type_msl_idx, max_segs, avail_segs, total_segs = 0;
360                 struct hugepage_info *hpi;
361                 uint64_t hugepage_sz;
362
363                 hpi = &internal_config.hugepage_info[hpi_idx];
364                 hugepage_sz = hpi->hugepage_sz;
365
366                 /* no NUMA support on FreeBSD */
367
368                 /* check if we've already exceeded total memory amount */
369                 if (total_mem >= max_mem)
370                         break;
371
372                 /* first, calculate theoretical limits according to config */
373                 max_type_mem = RTE_MIN(max_mem - total_mem,
374                         (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
375                 max_segs = RTE_MAX_MEMSEG_PER_TYPE;
376
377                 /* now, limit all of that to whatever will actually be
378                  * available to us, because without dynamic allocation support,
379                  * all of that extra memory will be sitting there being useless
380                  * and slowing down core dumps in case of a crash.
381                  *
382                  * we need (N*2)-1 segments because we cannot guarantee that
383                  * each segment will be IOVA-contiguous with the previous one,
384                  * so we will allocate more and put spaces between segments
385                  * that are non-contiguous.
386                  */
387                 avail_segs = (hpi->num_pages[0] * 2) - 1;
388                 avail_mem = avail_segs * hugepage_sz;
389
390                 max_type_mem = RTE_MIN(avail_mem, max_type_mem);
391                 max_segs = RTE_MIN(avail_segs, max_segs);
392
393                 type_msl_idx = 0;
394                 while (total_type_mem < max_type_mem &&
395                                 total_segs < max_segs) {
396                         uint64_t cur_max_mem, cur_mem;
397                         unsigned int n_segs;
398
399                         if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
400                                 RTE_LOG(ERR, EAL,
401                                         "No more space in memseg lists, please increase %s\n",
402                                         RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
403                                 return -1;
404                         }
405
406                         msl = &mcfg->memsegs[msl_idx++];
407
408                         cur_max_mem = max_type_mem - total_type_mem;
409
410                         cur_mem = get_mem_amount(hugepage_sz,
411                                         cur_max_mem);
412                         n_segs = cur_mem / hugepage_sz;
413
414                         if (eal_memseg_list_init(msl, hugepage_sz, n_segs,
415                                         0, type_msl_idx, false))
416                                 return -1;
417
418                         total_segs += msl->memseg_arr.len;
419                         total_type_mem = total_segs * hugepage_sz;
420                         type_msl_idx++;
421
422                         if (memseg_list_alloc(msl)) {
423                                 RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
424                                 return -1;
425                         }
426                 }
427                 total_mem += total_type_mem;
428         }
429         return 0;
430 }
431
432 static int
433 memseg_secondary_init(void)
434 {
435         struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
436         int msl_idx = 0;
437         struct rte_memseg_list *msl;
438
439         for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
440
441                 msl = &mcfg->memsegs[msl_idx];
442
443                 /* skip empty memseg lists */
444                 if (msl->memseg_arr.len == 0)
445                         continue;
446
447                 if (rte_fbarray_attach(&msl->memseg_arr)) {
448                         RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
449                         return -1;
450                 }
451
452                 /* preallocate VA space */
453                 if (memseg_list_alloc(msl)) {
454                         RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
455                         return -1;
456                 }
457         }
458
459         return 0;
460 }
461
462 int
463 rte_eal_memseg_init(void)
464 {
465         return rte_eal_process_type() == RTE_PROC_PRIMARY ?
466                         memseg_primary_init() :
467                         memseg_secondary_init();
468 }