1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
11 * Memory-related RTE API.
22 #include <rte_common.h>
23 #include <rte_compat.h>
24 #include <rte_config.h>
25 #include <rte_fbarray.h>
27 #define RTE_PGSIZE_4K (1ULL << 12)
28 #define RTE_PGSIZE_64K (1ULL << 16)
29 #define RTE_PGSIZE_256K (1ULL << 18)
30 #define RTE_PGSIZE_2M (1ULL << 21)
31 #define RTE_PGSIZE_16M (1ULL << 24)
32 #define RTE_PGSIZE_256M (1ULL << 28)
33 #define RTE_PGSIZE_512M (1ULL << 29)
34 #define RTE_PGSIZE_1G (1ULL << 30)
35 #define RTE_PGSIZE_4G (1ULL << 32)
36 #define RTE_PGSIZE_16G (1ULL << 34)
38 #define SOCKET_ID_ANY -1 /**< Any NUMA socket. */
41 * Physical memory segment descriptor.
43 #define RTE_MEMSEG_FLAG_DO_NOT_FREE (1 << 0)
44 /**< Prevent this segment from being freed back to the OS. */
48 phys_addr_t phys_addr; /**< deprecated - Start physical address. */
49 rte_iova_t iova; /**< Start IO address. */
53 void *addr; /**< Start virtual address. */
54 uint64_t addr_64; /**< Makes sure addr is always 64 bits */
56 size_t len; /**< Length of the segment. */
57 uint64_t hugepage_sz; /**< The pagesize of underlying memory */
58 int32_t socket_id; /**< NUMA socket ID. */
59 uint32_t nchannel; /**< Number of channels. */
60 uint32_t nrank; /**< Number of ranks. */
61 uint32_t flags; /**< Memseg-specific flags */
65 * memseg list is a special case as we need to store a bunch of other data
66 * together with the array itself.
68 struct rte_memseg_list {
72 /**< Base virtual address for this memseg list. */
74 /**< Makes sure addr is always 64-bits */
76 uint64_t page_sz; /**< Page size for all memsegs in this list. */
77 int socket_id; /**< Socket ID for all memsegs in this list. */
78 volatile uint32_t version; /**< version number for multiprocess sync. */
79 size_t len; /**< Length of memory area covered by this memseg list. */
80 unsigned int external; /**< 1 if this list points to external memory */
81 unsigned int heap; /**< 1 if this list points to a heap */
82 struct rte_fbarray memseg_arr;
86 * Lock page in physical memory and prevent from swapping.
89 * The virtual address.
91 * 0 on success, negative on error.
93 int rte_mem_lock_page(const void *virt);
96 * Get physical address of any mapped virtual address in the current process.
97 * It is found by browsing the /proc/self/pagemap special file.
98 * The page must be locked.
101 * The virtual address.
103 * The physical address or RTE_BAD_IOVA on error.
105 phys_addr_t rte_mem_virt2phy(const void *virt);
108 * Get IO virtual address of any mapped virtual address in the current process.
110 * @note This function will not check internal page table. Instead, in IOVA as
111 * PA mode, it will fall back to getting real physical address (which may
112 * not match the expected IOVA, such as what was specified for external
116 * The virtual address.
118 * The IO address or RTE_BAD_IOVA on error.
120 rte_iova_t rte_mem_virt2iova(const void *virt);
123 * Get virtual memory address corresponding to iova address.
125 * @note This function read-locks the memory hotplug subsystem, and thus cannot
126 * be used within memory-related callback functions.
131 * Virtual address corresponding to iova address (or NULL if address does not
132 * exist within DPDK memory map).
136 rte_mem_iova2virt(rte_iova_t iova);
139 * Get memseg to which a particular virtual address belongs.
142 * The virtual address.
144 * The memseg list in which to look up based on ``virt`` address
147 * Memseg pointer on success, or NULL on error.
151 rte_mem_virt2memseg(const void *virt, const struct rte_memseg_list *msl);
154 * Get memseg list corresponding to virtual memory address.
157 * The virtual address.
159 * Memseg list to which this virtual address belongs to.
162 struct rte_memseg_list *
163 rte_mem_virt2memseg_list(const void *virt);
166 * Memseg walk function prototype.
168 * Returning 0 will continue walk
169 * Returning 1 will stop the walk
170 * Returning -1 will stop the walk and report error
172 typedef int (*rte_memseg_walk_t)(const struct rte_memseg_list *msl,
173 const struct rte_memseg *ms, void *arg);
176 * Memseg contig walk function prototype. This will trigger a callback on every
177 * VA-contiguous area starting at memseg ``ms``, so total valid VA space at each
178 * callback call will be [``ms->addr``, ``ms->addr + len``).
180 * Returning 0 will continue walk
181 * Returning 1 will stop the walk
182 * Returning -1 will stop the walk and report error
184 typedef int (*rte_memseg_contig_walk_t)(const struct rte_memseg_list *msl,
185 const struct rte_memseg *ms, size_t len, void *arg);
188 * Memseg list walk function prototype. This will trigger a callback on every
189 * allocated memseg list.
191 * Returning 0 will continue walk
192 * Returning 1 will stop the walk
193 * Returning -1 will stop the walk and report error
195 typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl,
199 * Walk list of all memsegs.
201 * @note This function read-locks the memory hotplug subsystem, and thus cannot
202 * be used within memory-related callback functions.
204 * @note This function will also walk through externally allocated segments. It
205 * is up to the user to decide whether to skip through these segments.
210 * Argument passed to iterator
212 * 0 if walked over the entire list
213 * 1 if stopped by the user
214 * -1 if user function reported error
218 rte_memseg_walk(rte_memseg_walk_t func, void *arg);
221 * Walk each VA-contiguous area.
223 * @note This function read-locks the memory hotplug subsystem, and thus cannot
224 * be used within memory-related callback functions.
226 * @note This function will also walk through externally allocated segments. It
227 * is up to the user to decide whether to skip through these segments.
232 * Argument passed to iterator
234 * 0 if walked over the entire list
235 * 1 if stopped by the user
236 * -1 if user function reported error
240 rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg);
243 * Walk each allocated memseg list.
245 * @note This function read-locks the memory hotplug subsystem, and thus cannot
246 * be used within memory-related callback functions.
248 * @note This function will also walk through externally allocated segments. It
249 * is up to the user to decide whether to skip through these segments.
254 * Argument passed to iterator
256 * 0 if walked over the entire list
257 * 1 if stopped by the user
258 * -1 if user function reported error
262 rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg);
265 * Walk list of all memsegs without performing any locking.
267 * @note This function does not perform any locking, and is only safe to call
268 * from within memory-related callback functions.
273 * Argument passed to iterator
275 * 0 if walked over the entire list
276 * 1 if stopped by the user
277 * -1 if user function reported error
281 rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg);
284 * Walk each VA-contiguous area without performing any locking.
286 * @note This function does not perform any locking, and is only safe to call
287 * from within memory-related callback functions.
292 * Argument passed to iterator
294 * 0 if walked over the entire list
295 * 1 if stopped by the user
296 * -1 if user function reported error
300 rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg);
303 * Walk each allocated memseg list without performing any locking.
305 * @note This function does not perform any locking, and is only safe to call
306 * from within memory-related callback functions.
311 * Argument passed to iterator
313 * 0 if walked over the entire list
314 * 1 if stopped by the user
315 * -1 if user function reported error
319 rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg);
322 * Return file descriptor associated with a particular memseg (if available).
324 * @note This function read-locks the memory hotplug subsystem, and thus cannot
325 * be used within memory-related callback functions.
327 * @note This returns an internal file descriptor. Performing any operations on
328 * this file descriptor is inherently dangerous, so it should be treated
329 * as read-only for all intents and purposes.
332 * A pointer to memseg for which to get file descriptor.
335 * Valid file descriptor in case of success.
336 * -1 in case of error, with ``rte_errno`` set to the following values:
337 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
338 * - ENODEV - ``ms`` fd is not available
339 * - ENOENT - ``ms`` is an unused segment
340 * - ENOTSUP - segment fd's are not supported
344 rte_memseg_get_fd(const struct rte_memseg *ms);
347 * Return file descriptor associated with a particular memseg (if available).
349 * @note This function does not perform any locking, and is only safe to call
350 * from within memory-related callback functions.
352 * @note This returns an internal file descriptor. Performing any operations on
353 * this file descriptor is inherently dangerous, so it should be treated
354 * as read-only for all intents and purposes.
357 * A pointer to memseg for which to get file descriptor.
360 * Valid file descriptor in case of success.
361 * -1 in case of error, with ``rte_errno`` set to the following values:
362 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
363 * - ENODEV - ``ms`` fd is not available
364 * - ENOENT - ``ms`` is an unused segment
365 * - ENOTSUP - segment fd's are not supported
369 rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms);
372 * Get offset into segment file descriptor associated with a particular memseg
375 * @note This function read-locks the memory hotplug subsystem, and thus cannot
376 * be used within memory-related callback functions.
379 * A pointer to memseg for which to get file descriptor.
381 * A pointer to offset value where the result will be stored.
384 * Valid file descriptor in case of success.
385 * -1 in case of error, with ``rte_errno`` set to the following values:
386 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
387 * - EINVAL - ``offset`` pointer was NULL
388 * - ENODEV - ``ms`` fd is not available
389 * - ENOENT - ``ms`` is an unused segment
390 * - ENOTSUP - segment fd's are not supported
394 rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset);
397 * Get offset into segment file descriptor associated with a particular memseg
400 * @note This function does not perform any locking, and is only safe to call
401 * from within memory-related callback functions.
404 * A pointer to memseg for which to get file descriptor.
406 * A pointer to offset value where the result will be stored.
409 * Valid file descriptor in case of success.
410 * -1 in case of error, with ``rte_errno`` set to the following values:
411 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
412 * - EINVAL - ``offset`` pointer was NULL
413 * - ENODEV - ``ms`` fd is not available
414 * - ENOENT - ``ms`` is an unused segment
415 * - ENOTSUP - segment fd's are not supported
419 rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
424 * @b EXPERIMENTAL: this API may change without prior notice
426 * Register external memory chunk with DPDK.
428 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
431 * @note This API will not perform any DMA mapping. It is expected that user
432 * will do that themselves.
434 * @note Before accessing this memory in other processes, it needs to be
435 * attached in each of those processes by calling ``rte_extmem_attach`` in
436 * each other process.
439 * Start of virtual area to register. Must be aligned by ``page_sz``.
441 * Length of virtual area to register. Must be aligned by ``page_sz``.
443 * Array of page IOVA addresses corresponding to each page in this memory
444 * area. Can be NULL, in which case page IOVA addresses will be set to
447 * Number of elements in the iova_addrs array. Ignored if ``iova_addrs``
450 * Page size of the underlying memory
454 * - -1 in case of error, with rte_errno set to one of the following:
455 * EINVAL - one of the parameters was invalid
456 * EEXIST - memory chunk is already registered
457 * ENOSPC - no more space in internal config to store a new memory chunk
461 rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[],
462 unsigned int n_pages, size_t page_sz);
466 * @b EXPERIMENTAL: this API may change without prior notice
468 * Unregister external memory chunk with DPDK.
470 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
473 * @note This API will not perform any DMA unmapping. It is expected that user
474 * will do that themselves.
476 * @note Before calling this function, all other processes must call
477 * ``rte_extmem_detach`` to detach from the memory area.
480 * Start of virtual area to unregister
482 * Length of virtual area to unregister
486 * - -1 in case of error, with rte_errno set to one of the following:
487 * EINVAL - one of the parameters was invalid
488 * ENOENT - memory chunk was not found
492 rte_extmem_unregister(void *va_addr, size_t len);
496 * @b EXPERIMENTAL: this API may change without prior notice
498 * Attach to external memory chunk registered in another process.
500 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
503 * @note This API will not perform any DMA mapping. It is expected that user
504 * will do that themselves.
507 * Start of virtual area to register
509 * Length of virtual area to register
513 * - -1 in case of error, with rte_errno set to one of the following:
514 * EINVAL - one of the parameters was invalid
515 * ENOENT - memory chunk was not found
519 rte_extmem_attach(void *va_addr, size_t len);
523 * @b EXPERIMENTAL: this API may change without prior notice
525 * Detach from external memory chunk registered in another process.
527 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
530 * @note This API will not perform any DMA unmapping. It is expected that user
531 * will do that themselves.
534 * Start of virtual area to unregister
536 * Length of virtual area to unregister
540 * - -1 in case of error, with rte_errno set to one of the following:
541 * EINVAL - one of the parameters was invalid
542 * ENOENT - memory chunk was not found
546 rte_extmem_detach(void *va_addr, size_t len);
549 * Dump the physical memory layout to a file.
551 * @note This function read-locks the memory hotplug subsystem, and thus cannot
552 * be used within memory-related callback functions.
555 * A pointer to a file for output
557 void rte_dump_physmem_layout(FILE *f);
560 * Get the total amount of available physical memory.
562 * @note This function read-locks the memory hotplug subsystem, and thus cannot
563 * be used within memory-related callback functions.
566 * The total amount of available physical memory in bytes.
568 uint64_t rte_eal_get_physmem_size(void);
571 * Get the number of memory channels.
574 * The number of memory channels on the system. The value is 0 if unknown
575 * or not the same on all devices.
577 unsigned rte_memory_get_nchannel(void);
580 * Get the number of memory ranks.
583 * The number of memory ranks on the system. The value is 0 if unknown or
584 * not the same on all devices.
586 unsigned rte_memory_get_nrank(void);
590 * @b EXPERIMENTAL: this API may change without prior notice
592 * Check if all currently allocated memory segments are compliant with
593 * supplied DMA address width.
596 * Address width to check against.
599 int rte_mem_check_dma_mask(uint8_t maskbits);
603 * @b EXPERIMENTAL: this API may change without prior notice
605 * Check if all currently allocated memory segments are compliant with
606 * supplied DMA address width. This function will use
607 * rte_memseg_walk_thread_unsafe instead of rte_memseg_walk implying
608 * memory_hotplug_lock will not be acquired avoiding deadlock during
609 * memory initialization.
611 * This function is just for EAL core memory internal use. Drivers should
612 * use the previous rte_mem_check_dma_mask.
615 * Address width to check against.
618 int rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits);
622 * @b EXPERIMENTAL: this API may change without prior notice
624 * Set dma mask to use once memory initialization is done. Previous functions
625 * rte_mem_check_dma_mask and rte_mem_check_dma_mask_thread_unsafe can not be
626 * used safely until memory has been initialized.
629 void rte_mem_set_dma_mask(uint8_t maskbits);
632 * Drivers based on uio will not load unless physical
633 * addresses are obtainable. It is only possible to get
634 * physical addresses when running as a privileged user.
637 * 1 if the system is able to obtain physical addresses.
638 * 0 if using DMA addresses through an IOMMU.
640 int rte_eal_using_phys_addrs(void);
644 * Enum indicating which kind of memory event has happened. Used by callbacks to
645 * distinguish between memory allocations and deallocations.
648 RTE_MEM_EVENT_ALLOC = 0, /**< Allocation event. */
649 RTE_MEM_EVENT_FREE, /**< Deallocation event. */
651 #define RTE_MEM_EVENT_CALLBACK_NAME_LEN 64
652 /**< maximum length of callback name */
655 * Function typedef used to register callbacks for memory events.
657 typedef void (*rte_mem_event_callback_t)(enum rte_mem_event event_type,
658 const void *addr, size_t len, void *arg);
661 * Function used to register callbacks for memory events.
663 * @note callbacks will happen while memory hotplug subsystem is write-locked,
664 * therefore some functions (e.g. `rte_memseg_walk()`) will cause a
665 * deadlock when called from within such callbacks.
667 * @note mem event callbacks not being supported is an expected error condition,
668 * so user code needs to handle this situation. In these cases, return
669 * value will be -1, and rte_errno will be set to ENOTSUP.
672 * Name associated with specified callback to be added to the list.
675 * Callback function pointer.
678 * Argument to pass to the callback.
681 * 0 on successful callback register
682 * -1 on unsuccessful callback register, with rte_errno value indicating
683 * reason for failure.
687 rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
691 * Function used to unregister callbacks for memory events.
694 * Name associated with specified callback to be removed from the list.
697 * Argument to look for among callbacks with specified callback name.
700 * 0 on successful callback unregister
701 * -1 on unsuccessful callback unregister, with rte_errno value indicating
702 * reason for failure.
706 rte_mem_event_callback_unregister(const char *name, void *arg);
709 #define RTE_MEM_ALLOC_VALIDATOR_NAME_LEN 64
710 /**< maximum length of alloc validator name */
712 * Function typedef used to register memory allocation validation callbacks.
714 * Returning 0 will allow allocation attempt to continue. Returning -1 will
715 * prevent allocation from succeeding.
717 typedef int (*rte_mem_alloc_validator_t)(int socket_id,
718 size_t cur_limit, size_t new_len);
721 * @brief Register validator callback for memory allocations.
723 * Callbacks registered by this function will be called right before memory
724 * allocator is about to trigger allocation of more pages from the system if
725 * said allocation will bring total memory usage above specified limit on
726 * specified socket. User will be able to cancel pending allocation if callback
729 * @note callbacks will happen while memory hotplug subsystem is write-locked,
730 * therefore some functions (e.g. `rte_memseg_walk()`) will cause a
731 * deadlock when called from within such callbacks.
733 * @note validator callbacks not being supported is an expected error condition,
734 * so user code needs to handle this situation. In these cases, return
735 * value will be -1, and rte_errno will be set to ENOTSUP.
738 * Name associated with specified callback to be added to the list.
741 * Callback function pointer.
744 * Socket ID on which to watch for allocations.
747 * Limit above which to trigger callbacks.
750 * 0 on successful callback register
751 * -1 on unsuccessful callback register, with rte_errno value indicating
752 * reason for failure.
756 rte_mem_alloc_validator_register(const char *name,
757 rte_mem_alloc_validator_t clb, int socket_id, size_t limit);
760 * @brief Unregister validator callback for memory allocations.
763 * Name associated with specified callback to be removed from the list.
766 * Socket ID on which to watch for allocations.
769 * 0 on successful callback unregister
770 * -1 on unsuccessful callback unregister, with rte_errno value indicating
771 * reason for failure.
775 rte_mem_alloc_validator_unregister(const char *name, int socket_id);
781 #endif /* _RTE_MEMORY_H_ */