eal: ignore failure of naming a control thread
[dpdk.git] / lib / librte_eal / common / eal_common_memory.c
index 54329dc..e941e5d 100644 (file)
@@ -34,7 +34,7 @@
 
 #define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
 
-static uint64_t baseaddr_offset;
+static void *next_baseaddr;
 static uint64_t system_page_sz;
 
 void *
@@ -56,9 +56,12 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
        allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0;
        unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0;
 
-       if (requested_addr == NULL && internal_config.base_virtaddr != 0) {
-               requested_addr = (void *) (internal_config.base_virtaddr +
-                               (size_t)baseaddr_offset);
+       if (next_baseaddr == NULL && internal_config.base_virtaddr != 0 &&
+                       rte_eal_process_type() == RTE_PROC_PRIMARY)
+               next_baseaddr = (void *) internal_config.base_virtaddr;
+
+       if (requested_addr == NULL && next_baseaddr != NULL) {
+               requested_addr = next_baseaddr;
                requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
                addr_is_hint = true;
        }
@@ -70,13 +73,18 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
         * system page size is the same as requested page size.
         */
        no_align = (requested_addr != NULL &&
-               ((uintptr_t)requested_addr & (page_sz - 1)) == 0) ||
+               ((uintptr_t)requested_addr & (page_sz - 1))) ||
                page_sz == system_page_sz;
 
        do {
                map_sz = no_align ? *size : *size + page_sz;
+               if (map_sz > SIZE_MAX) {
+                       RTE_LOG(ERR, EAL, "Map size too big\n");
+                       rte_errno = E2BIG;
+                       return NULL;
+               }
 
-               mapped_addr = mmap(requested_addr, map_sz, PROT_READ,
+               mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_READ,
                                mmap_flags, -1, 0);
                if (mapped_addr == MAP_FAILED && allow_shrink)
                        *size -= page_sz;
@@ -111,15 +119,39 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
                RTE_LOG(WARNING, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n",
                        requested_addr, aligned_addr);
                RTE_LOG(WARNING, EAL, "   This may cause issues with mapping memory into secondary processes\n");
+       } else if (next_baseaddr != NULL) {
+               next_baseaddr = RTE_PTR_ADD(aligned_addr, *size);
        }
 
        RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
                aligned_addr, *size);
 
-       if (unmap)
+       if (unmap) {
                munmap(mapped_addr, map_sz);
+       } else if (!no_align) {
+               void *map_end, *aligned_end;
+               size_t before_len, after_len;
+
+               /* when we reserve space with alignment, we add alignment to
+                * mapping size. On 32-bit, if 1GB alignment was requested, this
+                * would waste 1GB of address space, which is a luxury we cannot
+                * afford. so, if alignment was performed, check if any unneeded
+                * address space can be unmapped back.
+                */
 
-       baseaddr_offset += *size;
+               map_end = RTE_PTR_ADD(mapped_addr, (size_t)map_sz);
+               aligned_end = RTE_PTR_ADD(aligned_addr, *size);
+
+               /* unmap space before aligned mmap address */
+               before_len = RTE_PTR_DIFF(aligned_addr, mapped_addr);
+               if (before_len > 0)
+                       munmap(mapped_addr, before_len);
+
+               /* unmap space after aligned end mmap address */
+               after_len = RTE_PTR_DIFF(map_end, aligned_end);
+               if (after_len > 0)
+                       munmap(aligned_end, after_len);
+       }
 
        return aligned_addr;
 }
@@ -141,6 +173,17 @@ get_mem_amount(uint64_t page_sz, uint64_t max_mem)
        return RTE_ALIGN(area_sz, page_sz);
 }
 
+static int
+free_memseg_list(struct rte_memseg_list *msl)
+{
+       if (rte_fbarray_destroy(&msl->memseg_arr)) {
+               RTE_LOG(ERR, EAL, "Cannot destroy memseg list\n");
+               return -1;
+       }
+       memset(msl, 0, sizeof(*msl));
+       return 0;
+}
+
 static int
 alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
                uint64_t max_mem, int socket_id, int type_msl_idx)
@@ -318,6 +361,10 @@ memseg_primary_init_32(void)
                        hpi = &internal_config.hugepage_info[hpi_idx];
                        hugepage_sz = hpi->hugepage_sz;
 
+                       /* check if pages are actually available */
+                       if (hpi->num_pages[socket_id] == 0)
+                               continue;
+
                        max_segs = RTE_MAX_MEMSEG_PER_TYPE;
                        max_pagesz_mem = max_socket_mem - cur_socket_mem;
 
@@ -339,24 +386,41 @@ memseg_primary_init_32(void)
                                        return -1;
                                }
 
-                               msl = &mcfg->memsegs[msl_idx++];
+                               msl = &mcfg->memsegs[msl_idx];
 
                                if (alloc_memseg_list(msl, hugepage_sz,
                                                max_pagesz_mem, socket_id,
-                                               type_msl_idx))
+                                               type_msl_idx)) {
+                                       /* failing to allocate a memseg list is
+                                        * a serious error.
+                                        */
+                                       RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
                                        return -1;
+                               }
+
+                               if (alloc_va_space(msl)) {
+                                       /* if we couldn't allocate VA space, we
+                                        * can try with smaller page sizes.
+                                        */
+                                       RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list, retrying with different page size\n");
+                                       /* deallocate memseg list */
+                                       if (free_memseg_list(msl))
+                                               return -1;
+                                       break;
+                               }
 
                                total_segs += msl->memseg_arr.len;
                                cur_pagesz_mem = total_segs * hugepage_sz;
                                type_msl_idx++;
-
-                               if (alloc_va_space(msl)) {
-                                       RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
-                                       return -1;
-                               }
+                               msl_idx++;
                        }
                        cur_socket_mem += cur_pagesz_mem;
                }
+               if (cur_socket_mem == 0) {
+                       RTE_LOG(ERR, EAL, "Cannot allocate VA space on socket %u\n",
+                               socket_id);
+                       return -1;
+               }
        }
 
        return 0;
@@ -397,6 +461,9 @@ memseg_primary_init(void)
                                break;
 #endif
 
+                       if (total_mem >= max_mem)
+                               break;
+
                        max_type_mem = RTE_MIN(max_mem - total_mem,
                                (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
                        max_segs = RTE_MAX_MEMSEG_PER_TYPE;
@@ -472,6 +539,9 @@ virt2memseg(const void *addr, const struct rte_memseg_list *msl)
        void *start, *end;
        int ms_idx;
 
+       if (msl == NULL)
+               return NULL;
+
        /* a memseg list was specified, check if it's the right one */
        start = msl->base_va;
        end = RTE_PTR_ADD(start, (size_t)msl->page_sz * msl->memseg_arr.len);
@@ -629,7 +699,8 @@ dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
  * is in eal_common_memalloc.c, like all other memalloc internals.
  */
 int __rte_experimental
-rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb)
+rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
+               void *arg)
 {
        /* FreeBSD boots with legacy mem enabled by default */
        if (internal_config.legacy_mem) {
@@ -637,11 +708,11 @@ rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb)
                rte_errno = ENOTSUP;
                return -1;
        }
-       return eal_memalloc_mem_event_callback_register(name, clb);
+       return eal_memalloc_mem_event_callback_register(name, clb, arg);
 }
 
 int __rte_experimental
-rte_mem_event_callback_unregister(const char *name)
+rte_mem_event_callback_unregister(const char *name, void *arg)
 {
        /* FreeBSD boots with legacy mem enabled by default */
        if (internal_config.legacy_mem) {
@@ -649,7 +720,7 @@ rte_mem_event_callback_unregister(const char *name)
                rte_errno = ENOTSUP;
                return -1;
        }
-       return eal_memalloc_mem_event_callback_unregister(name);
+       return eal_memalloc_mem_event_callback_unregister(name, arg);
 }
 
 int __rte_experimental
@@ -864,6 +935,9 @@ rte_eal_memory_init(void)
        if (retval < 0)
                goto fail;
 
+       if (eal_memalloc_init() < 0)
+               goto fail;
+
        retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
                        rte_eal_hugepage_init() :
                        rte_eal_hugepage_attach();