1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (c) 2020 Dmitry Kozlyuk
8 #include <rte_eal_paging.h>
11 #include "eal_internal_cfg.h"
12 #include "eal_memalloc.h"
13 #include "eal_memcfg.h"
14 #include "eal_options.h"
15 #include "eal_private.h"
16 #include "eal_windows.h"
18 #include <rte_virt2phys.h>
20 /* MinGW-w64 headers lack VirtualAlloc2() in some distributions.
21 * Note: definitions are copied verbatim from Microsoft documentation
22 * and don't follow DPDK code style.
24 #ifndef MEM_EXTENDED_PARAMETER_TYPE_BITS
26 #define MEM_EXTENDED_PARAMETER_TYPE_BITS 4
28 /* https://docs.microsoft.com/en-us/windows/win32/api/winnt/ne-winnt-mem_extended_parameter_type */
29 typedef enum MEM_EXTENDED_PARAMETER_TYPE {
30 MemExtendedParameterInvalidType,
31 MemExtendedParameterAddressRequirements,
32 MemExtendedParameterNumaNode,
33 MemExtendedParameterPartitionHandle,
34 MemExtendedParameterUserPhysicalHandle,
35 MemExtendedParameterAttributeFlags,
36 MemExtendedParameterMax
37 } *PMEM_EXTENDED_PARAMETER_TYPE;
39 /* https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-mem_extended_parameter */
40 typedef struct MEM_EXTENDED_PARAMETER {
42 DWORD64 Type : MEM_EXTENDED_PARAMETER_TYPE_BITS;
43 DWORD64 Reserved : 64 - MEM_EXTENDED_PARAMETER_TYPE_BITS;
52 } MEM_EXTENDED_PARAMETER, *PMEM_EXTENDED_PARAMETER;
54 #endif /* defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) */
56 /* https://docs.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-virtualalloc2 */
57 typedef PVOID (*VirtualAlloc2_type)(
63 MEM_EXTENDED_PARAMETER *ExtendedParameters,
67 /* MinGW-w64 distributions, even those that declare VirtualAlloc2(),
68 * lack it in import libraries, which results in a failure at link time.
69 * Link it dynamically in such case.
71 static VirtualAlloc2_type VirtualAlloc2_ptr;
73 #ifdef RTE_TOOLCHAIN_GCC
75 #define MEM_COALESCE_PLACEHOLDERS 0x00000001
76 #define MEM_PRESERVE_PLACEHOLDER 0x00000002
77 #define MEM_REPLACE_PLACEHOLDER 0x00004000
78 #define MEM_RESERVE_PLACEHOLDER 0x00040000
81 eal_mem_win32api_init(void)
83 /* Contrary to the docs, VirtualAlloc2() is not in kernel32.dll,
84 * see https://github.com/MicrosoftDocs/feedback/issues/1129.
86 static const char library_name[] = "kernelbase.dll";
87 static const char function[] = "VirtualAlloc2";
89 HMODULE library = NULL;
93 if (VirtualAlloc2_ptr != NULL)
96 library = LoadLibraryA(library_name);
97 if (library == NULL) {
98 RTE_LOG_WIN32_ERR("LoadLibraryA(\"%s\")", library_name);
102 VirtualAlloc2_ptr = (VirtualAlloc2_type)(
103 (void *)GetProcAddress(library, function));
104 if (VirtualAlloc2_ptr == NULL) {
105 RTE_LOG_WIN32_ERR("GetProcAddress(\"%s\", \"%s\")\n",
106 library_name, function);
108 /* Contrary to the docs, Server 2016 is not supported. */
109 RTE_LOG(ERR, EAL, "Windows 10 or Windows Server 2019 "
110 " is required for memory management\n");
114 FreeLibrary(library);
121 /* Stub in case VirtualAlloc2() is provided by the toolchain. */
123 eal_mem_win32api_init(void)
125 VirtualAlloc2_ptr = VirtualAlloc2;
129 #endif /* defined(RTE_TOOLCHAIN_GCC) */
131 static HANDLE virt2phys_device = INVALID_HANDLE_VALUE;
134 eal_mem_virt2iova_init(void)
136 HDEVINFO list = INVALID_HANDLE_VALUE;
137 SP_DEVICE_INTERFACE_DATA ifdata;
138 SP_DEVICE_INTERFACE_DETAIL_DATA *detail = NULL;
142 list = SetupDiGetClassDevs(
143 &GUID_DEVINTERFACE_VIRT2PHYS, NULL, NULL,
144 DIGCF_DEVICEINTERFACE | DIGCF_PRESENT);
145 if (list == INVALID_HANDLE_VALUE) {
146 RTE_LOG_WIN32_ERR("SetupDiGetClassDevs()");
150 ifdata.cbSize = sizeof(ifdata);
151 if (!SetupDiEnumDeviceInterfaces(
152 list, NULL, &GUID_DEVINTERFACE_VIRT2PHYS, 0, &ifdata)) {
153 RTE_LOG_WIN32_ERR("SetupDiEnumDeviceInterfaces()");
157 if (!SetupDiGetDeviceInterfaceDetail(
158 list, &ifdata, NULL, 0, &detail_size, NULL)) {
159 if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
161 "SetupDiGetDeviceInterfaceDetail(probe)");
166 detail = malloc(detail_size);
167 if (detail == NULL) {
168 RTE_LOG(ERR, EAL, "Cannot allocate virt2phys "
169 "device interface detail data\n");
173 detail->cbSize = sizeof(*detail);
174 if (!SetupDiGetDeviceInterfaceDetail(
175 list, &ifdata, detail, detail_size, NULL, NULL)) {
176 RTE_LOG_WIN32_ERR("SetupDiGetDeviceInterfaceDetail(read)");
180 RTE_LOG(DEBUG, EAL, "Found virt2phys device: %s\n", detail->DevicePath);
182 virt2phys_device = CreateFile(
183 detail->DevicePath, 0, 0, NULL, OPEN_EXISTING, 0, NULL);
184 if (virt2phys_device == INVALID_HANDLE_VALUE) {
185 RTE_LOG_WIN32_ERR("CreateFile()");
189 /* Indicate success. */
195 if (list != INVALID_HANDLE_VALUE)
196 SetupDiDestroyDeviceInfoList(list);
202 eal_mem_virt2iova_cleanup(void)
204 if (virt2phys_device != INVALID_HANDLE_VALUE)
205 CloseHandle(virt2phys_device);
209 rte_mem_virt2phy(const void *virt)
212 DWORD bytes_returned;
214 if (virt2phys_device == INVALID_HANDLE_VALUE)
215 return RTE_BAD_PHYS_ADDR;
217 if (!DeviceIoControl(
218 virt2phys_device, IOCTL_VIRT2PHYS_TRANSLATE,
219 &virt, sizeof(virt), &phys, sizeof(phys),
220 &bytes_returned, NULL)) {
221 RTE_LOG_WIN32_ERR("DeviceIoControl(IOCTL_VIRT2PHYS_TRANSLATE)");
222 return RTE_BAD_PHYS_ADDR;
225 return phys.QuadPart;
229 rte_mem_virt2iova(const void *virt)
233 if (rte_eal_iova_mode() == RTE_IOVA_VA)
234 return (rte_iova_t)virt;
236 phys = rte_mem_virt2phy(virt);
237 if (phys == RTE_BAD_PHYS_ADDR)
239 return (rte_iova_t)phys;
242 /* Always using physical addresses under Windows if they can be obtained. */
244 rte_eal_using_phys_addrs(void)
246 return virt2phys_device != INVALID_HANDLE_VALUE;
249 /* Approximate error mapping from VirtualAlloc2() to POSIX mmap(3). */
251 set_errno_from_win32_alloc_error(DWORD code)
258 case ERROR_INVALID_ADDRESS:
259 /* A valid requested address is not available. */
260 case ERROR_COMMITMENT_LIMIT:
261 /* May occur when committing regular memory. */
262 case ERROR_NO_SYSTEM_RESOURCES:
263 /* Occurs when the system runs out of hugepages. */
267 case ERROR_INVALID_PARAMETER:
275 eal_mem_reserve(void *requested_addr, size_t size, int flags)
280 /* Windows requires hugepages to be committed. */
281 if (flags & EAL_RESERVE_HUGEPAGES) {
286 process = GetCurrentProcess();
288 virt = VirtualAlloc2_ptr(process, requested_addr, size,
289 MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, PAGE_NOACCESS,
292 DWORD err = GetLastError();
293 RTE_LOG_WIN32_ERR("VirtualAlloc2()");
294 set_errno_from_win32_alloc_error(err);
298 if ((flags & EAL_RESERVE_FORCE_ADDRESS) && (virt != requested_addr)) {
299 if (!VirtualFreeEx(process, virt, 0, MEM_RELEASE))
300 RTE_LOG_WIN32_ERR("VirtualFreeEx()");
309 eal_mem_alloc_socket(size_t size, int socket_id)
311 DWORD flags = MEM_RESERVE | MEM_COMMIT;
314 flags = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES;
315 addr = VirtualAllocExNuma(GetCurrentProcess(), NULL, size, flags,
316 PAGE_READWRITE, eal_socket_numa_node(socket_id));
323 eal_mem_commit(void *requested_addr, size_t size, int socket_id)
326 MEM_EXTENDED_PARAMETER param;
327 DWORD param_count = 0;
331 process = GetCurrentProcess();
333 if (requested_addr != NULL) {
334 MEMORY_BASIC_INFORMATION info;
336 if (VirtualQueryEx(process, requested_addr, &info,
337 sizeof(info)) != sizeof(info)) {
338 RTE_LOG_WIN32_ERR("VirtualQuery(%p)", requested_addr);
342 /* Split reserved region if only a part is committed. */
343 flags = MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER;
344 if ((info.RegionSize > size) && !VirtualFreeEx(
345 process, requested_addr, size, flags)) {
347 "VirtualFreeEx(%p, %zu, preserve placeholder)",
348 requested_addr, size);
352 /* Temporarily release the region to be committed.
354 * There is an inherent race for this memory range
355 * if another thread allocates memory via OS API.
356 * However, VirtualAlloc2(MEM_REPLACE_PLACEHOLDER)
357 * doesn't work with MEM_LARGE_PAGES on Windows Server.
359 if (!VirtualFreeEx(process, requested_addr, 0, MEM_RELEASE)) {
360 RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, 0, release)",
366 if (socket_id != SOCKET_ID_ANY) {
368 memset(¶m, 0, sizeof(param));
369 param.Type = MemExtendedParameterNumaNode;
370 param.ULong = eal_socket_numa_node(socket_id);
373 flags = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES;
374 addr = VirtualAlloc2_ptr(process, requested_addr, size,
375 flags, PAGE_READWRITE, ¶m, param_count);
377 /* Logging may overwrite GetLastError() result. */
378 DWORD err = GetLastError();
379 RTE_LOG_WIN32_ERR("VirtualAlloc2(%p, %zu, commit large pages)",
380 requested_addr, size);
381 set_errno_from_win32_alloc_error(err);
385 if ((requested_addr != NULL) && (addr != requested_addr)) {
386 /* We lost the race for the requested_addr. */
387 if (!VirtualFreeEx(process, addr, 0, MEM_RELEASE))
388 RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, release)", addr);
390 rte_errno = EADDRNOTAVAIL;
398 eal_mem_decommit(void *addr, size_t size)
404 process = GetCurrentProcess();
406 /* Hugepages cannot be decommited on Windows,
407 * so free them and replace the block with a placeholder.
408 * There is a race for VA in this block until VirtualAlloc2 call.
410 if (!VirtualFreeEx(process, addr, 0, MEM_RELEASE)) {
411 RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, 0, release)", addr);
415 flags = MEM_RESERVE | MEM_RESERVE_PLACEHOLDER;
416 stub = VirtualAlloc2_ptr(
417 process, addr, size, flags, PAGE_NOACCESS, NULL, 0);
419 /* We lost the race for the VA. */
420 if (!VirtualFreeEx(process, stub, 0, MEM_RELEASE))
421 RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, release)", stub);
422 rte_errno = EADDRNOTAVAIL;
426 /* No need to join reserved regions adjacent to the freed one:
427 * eal_mem_commit() will just pick up the page-size placeholder
434 * Free a reserved memory region in full or in part.
437 * Starting address of the area to free.
439 * Number of bytes to free. Must be a multiple of page size.
441 * Fail if the region is not in reserved state.
443 * * 0 on successful deallocation;
444 * * 1 if region must be in reserved state but it is not;
445 * * (-1) on system API failures.
448 mem_free(void *addr, size_t size, bool reserved)
450 MEMORY_BASIC_INFORMATION info;
453 process = GetCurrentProcess();
456 process, addr, &info, sizeof(info)) != sizeof(info)) {
457 RTE_LOG_WIN32_ERR("VirtualQueryEx(%p)", addr);
461 if (reserved && (info.State != MEM_RESERVE))
464 /* Free complete region. */
465 if ((addr == info.AllocationBase) && (size == info.RegionSize)) {
466 if (!VirtualFreeEx(process, addr, 0, MEM_RELEASE)) {
467 RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, 0, release)",
473 /* Split the part to be freed and the remaining reservation. */
474 if (!VirtualFreeEx(process, addr, size,
475 MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER)) {
477 "VirtualFreeEx(%p, %zu, preserve placeholder)",
482 /* Actually free reservation part. */
483 if (!VirtualFreeEx(process, addr, 0, MEM_RELEASE)) {
484 RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, 0, release)", addr);
492 eal_mem_free(void *virt, size_t size)
494 mem_free(virt, size, false);
498 eal_mem_set_dump(void *virt, size_t size, bool dump)
504 /* Windows does not dump reserved memory by default.
506 * There is <werapi.h> to include or exclude regions from the dump,
507 * but this is not currently required by EAL.
515 rte_mem_map(void *requested_addr, size_t size, int prot, int flags,
516 int fd, uint64_t offset)
518 HANDLE file_handle = INVALID_HANDLE_VALUE;
519 HANDLE mapping_handle = INVALID_HANDLE_VALUE;
521 DWORD sys_access = 0;
522 DWORD size_high = (DWORD)(size >> 32);
523 DWORD size_low = (DWORD)size;
524 DWORD offset_high = (DWORD)(offset >> 32);
525 DWORD offset_low = (DWORD)offset;
528 if (prot & RTE_PROT_EXECUTE) {
529 if (prot & RTE_PROT_READ) {
530 sys_prot = PAGE_EXECUTE_READ;
531 sys_access = FILE_MAP_READ | FILE_MAP_EXECUTE;
533 if (prot & RTE_PROT_WRITE) {
534 sys_prot = PAGE_EXECUTE_READWRITE;
535 sys_access = FILE_MAP_WRITE | FILE_MAP_EXECUTE;
538 if (prot & RTE_PROT_READ) {
539 sys_prot = PAGE_READONLY;
540 sys_access = FILE_MAP_READ;
542 if (prot & RTE_PROT_WRITE) {
543 sys_prot = PAGE_READWRITE;
544 sys_access = FILE_MAP_WRITE;
548 if (flags & RTE_MAP_PRIVATE)
549 sys_access |= FILE_MAP_COPY;
551 if ((flags & RTE_MAP_ANONYMOUS) == 0)
552 file_handle = (HANDLE)_get_osfhandle(fd);
554 mapping_handle = CreateFileMapping(
555 file_handle, NULL, sys_prot, size_high, size_low, NULL);
556 if (mapping_handle == INVALID_HANDLE_VALUE) {
557 RTE_LOG_WIN32_ERR("CreateFileMapping()");
561 /* There is a race for the requested_addr between mem_free()
562 * and MapViewOfFileEx(). MapViewOfFile3() that can replace a reserved
563 * region with a mapping in a single operation, but it does not support
566 if (requested_addr != NULL) {
567 int ret = mem_free(requested_addr, size, true);
570 RTE_LOG(ERR, EAL, "Cannot map memory "
571 "to a region not reserved\n");
572 rte_errno = EADDRNOTAVAIL;
578 virt = MapViewOfFileEx(mapping_handle, sys_access,
579 offset_high, offset_low, size, requested_addr);
581 RTE_LOG_WIN32_ERR("MapViewOfFileEx()");
585 if ((flags & RTE_MAP_FORCE_ADDRESS) && (virt != requested_addr)) {
586 if (!UnmapViewOfFile(virt))
587 RTE_LOG_WIN32_ERR("UnmapViewOfFile()");
591 if (!CloseHandle(mapping_handle))
592 RTE_LOG_WIN32_ERR("CloseHandle()");
598 rte_mem_unmap(void *virt, size_t size)
602 if (!UnmapViewOfFile(virt)) {
603 RTE_LOG_WIN32_ERR("UnmapViewOfFile()");
611 eal_get_baseaddr(void)
613 /* Windows strategy for memory allocation is undocumented.
614 * Returning 0 here effectively disables address guessing
615 * unless user provides an address hint.
621 rte_mem_page_size(void)
623 static SYSTEM_INFO info;
625 if (info.dwPageSize == 0)
626 GetSystemInfo(&info);
628 return info.dwPageSize;
632 rte_mem_lock(const void *virt, size_t size)
634 /* VirtualLock() takes `void*`, work around compiler warning. */
635 void *addr = (void *)((uintptr_t)virt);
637 if (!VirtualLock(addr, size)) {
638 RTE_LOG_WIN32_ERR("VirtualLock(%p %#zx)", virt, size);
646 rte_eal_memseg_init(void)
648 if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
649 EAL_LOG_NOT_IMPLEMENTED();
653 return eal_dynmem_memseg_lists_init();
657 eal_nohuge_init(void)
659 struct rte_mem_config *mcfg;
660 struct rte_memseg_list *msl;
662 uint64_t mem_sz, page_sz;
665 mcfg = rte_eal_get_configuration()->mem_config;
666 struct internal_config *internal_conf =
667 eal_get_internal_configuration();
669 /* nohuge mode is legacy mode */
670 internal_conf->legacy_mem = 1;
672 msl = &mcfg->memsegs[0];
674 mem_sz = internal_conf->memory;
675 page_sz = RTE_PGSIZE_4K;
676 n_segs = mem_sz / page_sz;
678 if (eal_memseg_list_init_named(
679 msl, "nohugemem", page_sz, n_segs, 0, true)) {
684 NULL, mem_sz, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
686 RTE_LOG_WIN32_ERR("VirtualAlloc(size=%#zx)", mem_sz);
687 RTE_LOG(ERR, EAL, "Cannot allocate memory\n");
694 eal_memseg_list_populate(msl, addr, n_segs);
696 if (mcfg->dma_maskbits &&
697 rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
699 "%s(): couldn't allocate memory due to IOVA "
700 "exceeding limits of current DMA mask.\n", __func__);
708 rte_eal_hugepage_init(void)
710 const struct internal_config *internal_conf =
711 eal_get_internal_configuration();
713 return internal_conf->no_hugetlbfs ?
714 eal_nohuge_init() : eal_dynmem_hugepage_init();
718 rte_eal_hugepage_attach(void)
720 EAL_LOG_NOT_IMPLEMENTED();