1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (c) 2020 Dmitry Kozlyuk
8 #include <rte_eal_paging.h>
11 #include "eal_internal_cfg.h"
12 #include "eal_memalloc.h"
13 #include "eal_memcfg.h"
14 #include "eal_options.h"
15 #include "eal_private.h"
16 #include "eal_windows.h"
18 #include <rte_virt2phys.h>
20 /* MinGW-w64 headers lack VirtualAlloc2() in some distributions.
21 * Provide a copy of definitions and code to load it dynamically.
22 * Note: definitions are copied verbatim from Microsoft documentation
23 * and don't follow DPDK code style.
25 * MEM_RESERVE_PLACEHOLDER being defined means VirtualAlloc2() is present too.
27 #ifndef MEM_PRESERVE_PLACEHOLDER
29 /* https://docs.microsoft.com/en-us/windows/win32/api/winnt/ne-winnt-mem_extended_parameter_type */
30 typedef enum MEM_EXTENDED_PARAMETER_TYPE {
31 MemExtendedParameterInvalidType,
32 MemExtendedParameterAddressRequirements,
33 MemExtendedParameterNumaNode,
34 MemExtendedParameterPartitionHandle,
35 MemExtendedParameterUserPhysicalHandle,
36 MemExtendedParameterAttributeFlags,
37 MemExtendedParameterMax
38 } *PMEM_EXTENDED_PARAMETER_TYPE;
40 #define MEM_EXTENDED_PARAMETER_TYPE_BITS 4
42 /* https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-mem_extended_parameter */
43 typedef struct MEM_EXTENDED_PARAMETER {
45 DWORD64 Type : MEM_EXTENDED_PARAMETER_TYPE_BITS;
46 DWORD64 Reserved : 64 - MEM_EXTENDED_PARAMETER_TYPE_BITS;
55 } MEM_EXTENDED_PARAMETER, *PMEM_EXTENDED_PARAMETER;
57 /* https://docs.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-virtualalloc2 */
58 typedef PVOID (*VirtualAlloc2_type)(
64 MEM_EXTENDED_PARAMETER *ExtendedParameters,
68 /* VirtualAlloc2() flags. */
69 #define MEM_COALESCE_PLACEHOLDERS 0x00000001
70 #define MEM_PRESERVE_PLACEHOLDER 0x00000002
71 #define MEM_REPLACE_PLACEHOLDER 0x00004000
72 #define MEM_RESERVE_PLACEHOLDER 0x00040000
74 /* Named exactly as the function, so that user code does not depend
75 * on it being found at compile time or dynamically.
77 static VirtualAlloc2_type VirtualAlloc2;
80 eal_mem_win32api_init(void)
82 /* Contrary to the docs, VirtualAlloc2() is not in kernel32.dll,
83 * see https://github.com/MicrosoftDocs/feedback/issues/1129.
85 static const char library_name[] = "kernelbase.dll";
86 static const char function[] = "VirtualAlloc2";
88 HMODULE library = NULL;
92 if (VirtualAlloc2 != NULL)
95 library = LoadLibraryA(library_name);
96 if (library == NULL) {
97 RTE_LOG_WIN32_ERR("LoadLibraryA(\"%s\")", library_name);
101 VirtualAlloc2 = (VirtualAlloc2_type)(
102 (void *)GetProcAddress(library, function));
103 if (VirtualAlloc2 == NULL) {
104 RTE_LOG_WIN32_ERR("GetProcAddress(\"%s\", \"%s\")\n",
105 library_name, function);
107 /* Contrary to the docs, Server 2016 is not supported. */
108 RTE_LOG(ERR, EAL, "Windows 10 or Windows Server 2019 "
109 " is required for memory management\n");
113 FreeLibrary(library);
120 /* Stub in case VirtualAlloc2() is provided by the compiler. */
122 eal_mem_win32api_init(void)
127 #endif /* defined(MEM_RESERVE_PLACEHOLDER) */
129 static HANDLE virt2phys_device = INVALID_HANDLE_VALUE;
132 eal_mem_virt2iova_init(void)
134 HDEVINFO list = INVALID_HANDLE_VALUE;
135 SP_DEVICE_INTERFACE_DATA ifdata;
136 SP_DEVICE_INTERFACE_DETAIL_DATA *detail = NULL;
140 list = SetupDiGetClassDevs(
141 &GUID_DEVINTERFACE_VIRT2PHYS, NULL, NULL,
142 DIGCF_DEVICEINTERFACE | DIGCF_PRESENT);
143 if (list == INVALID_HANDLE_VALUE) {
144 RTE_LOG_WIN32_ERR("SetupDiGetClassDevs()");
148 ifdata.cbSize = sizeof(ifdata);
149 if (!SetupDiEnumDeviceInterfaces(
150 list, NULL, &GUID_DEVINTERFACE_VIRT2PHYS, 0, &ifdata)) {
151 RTE_LOG_WIN32_ERR("SetupDiEnumDeviceInterfaces()");
155 if (!SetupDiGetDeviceInterfaceDetail(
156 list, &ifdata, NULL, 0, &detail_size, NULL)) {
157 if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
159 "SetupDiGetDeviceInterfaceDetail(probe)");
164 detail = malloc(detail_size);
165 if (detail == NULL) {
166 RTE_LOG(ERR, EAL, "Cannot allocate virt2phys "
167 "device interface detail data\n");
171 detail->cbSize = sizeof(*detail);
172 if (!SetupDiGetDeviceInterfaceDetail(
173 list, &ifdata, detail, detail_size, NULL, NULL)) {
174 RTE_LOG_WIN32_ERR("SetupDiGetDeviceInterfaceDetail(read)");
178 RTE_LOG(DEBUG, EAL, "Found virt2phys device: %s\n", detail->DevicePath);
180 virt2phys_device = CreateFile(
181 detail->DevicePath, 0, 0, NULL, OPEN_EXISTING, 0, NULL);
182 if (virt2phys_device == INVALID_HANDLE_VALUE) {
183 RTE_LOG_WIN32_ERR("CreateFile()");
187 /* Indicate success. */
193 if (list != INVALID_HANDLE_VALUE)
194 SetupDiDestroyDeviceInfoList(list);
200 rte_mem_virt2phy(const void *virt)
203 DWORD bytes_returned;
205 if (virt2phys_device == INVALID_HANDLE_VALUE)
206 return RTE_BAD_PHYS_ADDR;
208 if (!DeviceIoControl(
209 virt2phys_device, IOCTL_VIRT2PHYS_TRANSLATE,
210 &virt, sizeof(virt), &phys, sizeof(phys),
211 &bytes_returned, NULL)) {
212 RTE_LOG_WIN32_ERR("DeviceIoControl(IOCTL_VIRT2PHYS_TRANSLATE)");
213 return RTE_BAD_PHYS_ADDR;
216 return phys.QuadPart;
219 /* Windows currently only supports IOVA as PA. */
221 rte_mem_virt2iova(const void *virt)
225 if (virt2phys_device == INVALID_HANDLE_VALUE)
228 phys = rte_mem_virt2phy(virt);
229 if (phys == RTE_BAD_PHYS_ADDR)
232 return (rte_iova_t)phys;
235 /* Always using physical addresses under Windows if they can be obtained. */
237 rte_eal_using_phys_addrs(void)
239 return virt2phys_device != INVALID_HANDLE_VALUE;
242 /* Approximate error mapping from VirtualAlloc2() to POSIX mmap(3). */
244 set_errno_from_win32_alloc_error(DWORD code)
251 case ERROR_INVALID_ADDRESS:
252 /* A valid requested address is not available. */
253 case ERROR_COMMITMENT_LIMIT:
254 /* May occur when committing regular memory. */
255 case ERROR_NO_SYSTEM_RESOURCES:
256 /* Occurs when the system runs out of hugepages. */
260 case ERROR_INVALID_PARAMETER:
268 eal_mem_reserve(void *requested_addr, size_t size, int flags)
273 /* Windows requires hugepages to be committed. */
274 if (flags & EAL_RESERVE_HUGEPAGES) {
279 process = GetCurrentProcess();
281 virt = VirtualAlloc2(process, requested_addr, size,
282 MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, PAGE_NOACCESS,
285 DWORD err = GetLastError();
286 RTE_LOG_WIN32_ERR("VirtualAlloc2()");
287 set_errno_from_win32_alloc_error(err);
291 if ((flags & EAL_RESERVE_FORCE_ADDRESS) && (virt != requested_addr)) {
292 if (!VirtualFreeEx(process, virt, 0, MEM_RELEASE))
293 RTE_LOG_WIN32_ERR("VirtualFreeEx()");
302 eal_mem_alloc_socket(size_t size, int socket_id)
304 DWORD flags = MEM_RESERVE | MEM_COMMIT;
307 flags = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES;
308 addr = VirtualAllocExNuma(GetCurrentProcess(), NULL, size, flags,
309 PAGE_READWRITE, eal_socket_numa_node(socket_id));
316 eal_mem_commit(void *requested_addr, size_t size, int socket_id)
319 MEM_EXTENDED_PARAMETER param;
320 DWORD param_count = 0;
324 process = GetCurrentProcess();
326 if (requested_addr != NULL) {
327 MEMORY_BASIC_INFORMATION info;
329 if (VirtualQueryEx(process, requested_addr, &info,
330 sizeof(info)) != sizeof(info)) {
331 RTE_LOG_WIN32_ERR("VirtualQuery(%p)", requested_addr);
335 /* Split reserved region if only a part is committed. */
336 flags = MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER;
337 if ((info.RegionSize > size) && !VirtualFreeEx(
338 process, requested_addr, size, flags)) {
340 "VirtualFreeEx(%p, %zu, preserve placeholder)",
341 requested_addr, size);
345 /* Temporarily release the region to be committed.
347 * There is an inherent race for this memory range
348 * if another thread allocates memory via OS API.
349 * However, VirtualAlloc2(MEM_REPLACE_PLACEHOLDER)
350 * doesn't work with MEM_LARGE_PAGES on Windows Server.
352 if (!VirtualFreeEx(process, requested_addr, 0, MEM_RELEASE)) {
353 RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, 0, release)",
359 if (socket_id != SOCKET_ID_ANY) {
361 memset(¶m, 0, sizeof(param));
362 param.Type = MemExtendedParameterNumaNode;
363 param.ULong = eal_socket_numa_node(socket_id);
366 flags = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES;
367 addr = VirtualAlloc2(process, requested_addr, size,
368 flags, PAGE_READWRITE, ¶m, param_count);
370 /* Logging may overwrite GetLastError() result. */
371 DWORD err = GetLastError();
372 RTE_LOG_WIN32_ERR("VirtualAlloc2(%p, %zu, commit large pages)",
373 requested_addr, size);
374 set_errno_from_win32_alloc_error(err);
378 if ((requested_addr != NULL) && (addr != requested_addr)) {
379 /* We lost the race for the requested_addr. */
380 if (!VirtualFreeEx(process, addr, 0, MEM_RELEASE))
381 RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, release)", addr);
383 rte_errno = EADDRNOTAVAIL;
391 eal_mem_decommit(void *addr, size_t size)
397 process = GetCurrentProcess();
399 /* Hugepages cannot be decommited on Windows,
400 * so free them and replace the block with a placeholder.
401 * There is a race for VA in this block until VirtualAlloc2 call.
403 if (!VirtualFreeEx(process, addr, 0, MEM_RELEASE)) {
404 RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, 0, release)", addr);
408 flags = MEM_RESERVE | MEM_RESERVE_PLACEHOLDER;
409 stub = VirtualAlloc2(
410 process, addr, size, flags, PAGE_NOACCESS, NULL, 0);
412 /* We lost the race for the VA. */
413 if (!VirtualFreeEx(process, stub, 0, MEM_RELEASE))
414 RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, release)", stub);
415 rte_errno = EADDRNOTAVAIL;
419 /* No need to join reserved regions adjacent to the freed one:
420 * eal_mem_commit() will just pick up the page-size placeholder
427 * Free a reserved memory region in full or in part.
430 * Starting address of the area to free.
432 * Number of bytes to free. Must be a multiple of page size.
434 * Fail if the region is not in reserved state.
436 * * 0 on successful deallocation;
437 * * 1 if region must be in reserved state but it is not;
438 * * (-1) on system API failures.
441 mem_free(void *addr, size_t size, bool reserved)
443 MEMORY_BASIC_INFORMATION info;
446 process = GetCurrentProcess();
449 process, addr, &info, sizeof(info)) != sizeof(info)) {
450 RTE_LOG_WIN32_ERR("VirtualQueryEx(%p)", addr);
454 if (reserved && (info.State != MEM_RESERVE))
457 /* Free complete region. */
458 if ((addr == info.AllocationBase) && (size == info.RegionSize)) {
459 if (!VirtualFreeEx(process, addr, 0, MEM_RELEASE)) {
460 RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, 0, release)",
466 /* Split the part to be freed and the remaining reservation. */
467 if (!VirtualFreeEx(process, addr, size,
468 MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER)) {
470 "VirtualFreeEx(%p, %zu, preserve placeholder)",
475 /* Actually free reservation part. */
476 if (!VirtualFreeEx(process, addr, 0, MEM_RELEASE)) {
477 RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, 0, release)", addr);
485 eal_mem_free(void *virt, size_t size)
487 mem_free(virt, size, false);
491 eal_mem_set_dump(void *virt, size_t size, bool dump)
497 /* Windows does not dump reserved memory by default.
499 * There is <werapi.h> to include or exclude regions from the dump,
500 * but this is not currently required by EAL.
508 rte_mem_map(void *requested_addr, size_t size, int prot, int flags,
509 int fd, size_t offset)
511 HANDLE file_handle = INVALID_HANDLE_VALUE;
512 HANDLE mapping_handle = INVALID_HANDLE_VALUE;
514 DWORD sys_access = 0;
515 DWORD size_high = (DWORD)(size >> 32);
516 DWORD size_low = (DWORD)size;
517 DWORD offset_high = (DWORD)(offset >> 32);
518 DWORD offset_low = (DWORD)offset;
521 if (prot & RTE_PROT_EXECUTE) {
522 if (prot & RTE_PROT_READ) {
523 sys_prot = PAGE_EXECUTE_READ;
524 sys_access = FILE_MAP_READ | FILE_MAP_EXECUTE;
526 if (prot & RTE_PROT_WRITE) {
527 sys_prot = PAGE_EXECUTE_READWRITE;
528 sys_access = FILE_MAP_WRITE | FILE_MAP_EXECUTE;
531 if (prot & RTE_PROT_READ) {
532 sys_prot = PAGE_READONLY;
533 sys_access = FILE_MAP_READ;
535 if (prot & RTE_PROT_WRITE) {
536 sys_prot = PAGE_READWRITE;
537 sys_access = FILE_MAP_WRITE;
541 if (flags & RTE_MAP_PRIVATE)
542 sys_access |= FILE_MAP_COPY;
544 if ((flags & RTE_MAP_ANONYMOUS) == 0)
545 file_handle = (HANDLE)_get_osfhandle(fd);
547 mapping_handle = CreateFileMapping(
548 file_handle, NULL, sys_prot, size_high, size_low, NULL);
549 if (mapping_handle == INVALID_HANDLE_VALUE) {
550 RTE_LOG_WIN32_ERR("CreateFileMapping()");
554 /* There is a race for the requested_addr between mem_free()
555 * and MapViewOfFileEx(). MapViewOfFile3() that can replace a reserved
556 * region with a mapping in a single operation, but it does not support
559 if (requested_addr != NULL) {
560 int ret = mem_free(requested_addr, size, true);
563 RTE_LOG(ERR, EAL, "Cannot map memory "
564 "to a region not reserved\n");
565 rte_errno = EADDRNOTAVAIL;
571 virt = MapViewOfFileEx(mapping_handle, sys_access,
572 offset_high, offset_low, size, requested_addr);
574 RTE_LOG_WIN32_ERR("MapViewOfFileEx()");
578 if ((flags & RTE_MAP_FORCE_ADDRESS) && (virt != requested_addr)) {
579 if (!UnmapViewOfFile(virt))
580 RTE_LOG_WIN32_ERR("UnmapViewOfFile()");
584 if (!CloseHandle(mapping_handle))
585 RTE_LOG_WIN32_ERR("CloseHandle()");
591 rte_mem_unmap(void *virt, size_t size)
595 if (!UnmapViewOfFile(virt)) {
596 RTE_LOG_WIN32_ERR("UnmapViewOfFile()");
604 eal_get_baseaddr(void)
606 /* Windows strategy for memory allocation is undocumented.
607 * Returning 0 here effectively disables address guessing
608 * unless user provides an address hint.
614 rte_mem_page_size(void)
616 static SYSTEM_INFO info;
618 if (info.dwPageSize == 0)
619 GetSystemInfo(&info);
621 return info.dwPageSize;
625 rte_mem_lock(const void *virt, size_t size)
627 /* VirtualLock() takes `void*`, work around compiler warning. */
628 void *addr = (void *)((uintptr_t)virt);
630 if (!VirtualLock(addr, size)) {
631 RTE_LOG_WIN32_ERR("VirtualLock(%p %#zx)", virt, size);
639 rte_eal_memseg_init(void)
641 if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
642 EAL_LOG_NOT_IMPLEMENTED();
646 return eal_dynmem_memseg_lists_init();
650 eal_nohuge_init(void)
652 struct rte_mem_config *mcfg;
653 struct rte_memseg_list *msl;
655 uint64_t mem_sz, page_sz;
658 mcfg = rte_eal_get_configuration()->mem_config;
659 struct internal_config *internal_conf =
660 eal_get_internal_configuration();
662 /* nohuge mode is legacy mode */
663 internal_conf->legacy_mem = 1;
665 msl = &mcfg->memsegs[0];
667 mem_sz = internal_conf->memory;
668 page_sz = RTE_PGSIZE_4K;
669 n_segs = mem_sz / page_sz;
671 if (eal_memseg_list_init_named(
672 msl, "nohugemem", page_sz, n_segs, 0, true)) {
677 NULL, mem_sz, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
679 RTE_LOG_WIN32_ERR("VirtualAlloc(size=%#zx)", mem_sz);
680 RTE_LOG(ERR, EAL, "Cannot allocate memory\n");
687 eal_memseg_list_populate(msl, addr, n_segs);
689 if (mcfg->dma_maskbits &&
690 rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
692 "%s(): couldn't allocate memory due to IOVA "
693 "exceeding limits of current DMA mask.\n", __func__);
701 rte_eal_hugepage_init(void)
703 const struct internal_config *internal_conf =
704 eal_get_internal_configuration();
706 return internal_conf->no_hugetlbfs ?
707 eal_nohuge_init() : eal_dynmem_hugepage_init();
711 rte_eal_hugepage_attach(void)
713 EAL_LOG_NOT_IMPLEMENTED();