1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
11 * Memory-related RTE API.
22 #include <rte_common.h>
23 #include <rte_compat.h>
24 #include <rte_config.h>
25 #include <rte_fbarray.h>
29 RTE_PGSIZE_4K = 1ULL << 12,
30 RTE_PGSIZE_64K = 1ULL << 16,
31 RTE_PGSIZE_256K = 1ULL << 18,
32 RTE_PGSIZE_2M = 1ULL << 21,
33 RTE_PGSIZE_16M = 1ULL << 24,
34 RTE_PGSIZE_256M = 1ULL << 28,
35 RTE_PGSIZE_512M = 1ULL << 29,
36 RTE_PGSIZE_1G = 1ULL << 30,
37 RTE_PGSIZE_4G = 1ULL << 32,
38 RTE_PGSIZE_16G = 1ULL << 34,
41 #define SOCKET_ID_ANY -1 /**< Any NUMA socket. */
44 * Physical memory segment descriptor.
46 #define RTE_MEMSEG_FLAG_DO_NOT_FREE (1 << 0)
47 /**< Prevent this segment from being freed back to the OS. */
51 phys_addr_t phys_addr; /**< deprecated - Start physical address. */
52 rte_iova_t iova; /**< Start IO address. */
56 void *addr; /**< Start virtual address. */
57 uint64_t addr_64; /**< Makes sure addr is always 64 bits */
59 size_t len; /**< Length of the segment. */
60 uint64_t hugepage_sz; /**< The pagesize of underlying memory */
61 int32_t socket_id; /**< NUMA socket ID. */
62 uint32_t nchannel; /**< Number of channels. */
63 uint32_t nrank; /**< Number of ranks. */
64 uint32_t flags; /**< Memseg-specific flags */
68 * memseg list is a special case as we need to store a bunch of other data
69 * together with the array itself.
71 struct rte_memseg_list {
75 /**< Base virtual address for this memseg list. */
77 /**< Makes sure addr is always 64-bits */
79 uint64_t page_sz; /**< Page size for all memsegs in this list. */
80 int socket_id; /**< Socket ID for all memsegs in this list. */
81 volatile uint32_t version; /**< version number for multiprocess sync. */
82 size_t len; /**< Length of memory area covered by this memseg list. */
83 unsigned int external; /**< 1 if this list points to external memory */
84 unsigned int heap; /**< 1 if this list points to a heap */
85 struct rte_fbarray memseg_arr;
89 * Lock page in physical memory and prevent from swapping.
92 * The virtual address.
94 * 0 on success, negative on error.
96 int rte_mem_lock_page(const void *virt);
99 * Get physical address of any mapped virtual address in the current process.
100 * It is found by browsing the /proc/self/pagemap special file.
101 * The page must be locked.
104 * The virtual address.
106 * The physical address or RTE_BAD_IOVA on error.
108 phys_addr_t rte_mem_virt2phy(const void *virt);
111 * Get IO virtual address of any mapped virtual address in the current process.
113 * @note This function will not check internal page table. Instead, in IOVA as
114 * PA mode, it will fall back to getting real physical address (which may
115 * not match the expected IOVA, such as what was specified for external
119 * The virtual address.
121 * The IO address or RTE_BAD_IOVA on error.
123 rte_iova_t rte_mem_virt2iova(const void *virt);
126 * Get virtual memory address corresponding to iova address.
128 * @note This function read-locks the memory hotplug subsystem, and thus cannot
129 * be used within memory-related callback functions.
134 * Virtual address corresponding to iova address (or NULL if address does not
135 * exist within DPDK memory map).
139 rte_mem_iova2virt(rte_iova_t iova);
142 * Get memseg to which a particular virtual address belongs.
145 * The virtual address.
147 * The memseg list in which to look up based on ``virt`` address
150 * Memseg pointer on success, or NULL on error.
154 rte_mem_virt2memseg(const void *virt, const struct rte_memseg_list *msl);
157 * Get memseg list corresponding to virtual memory address.
160 * The virtual address.
162 * Memseg list to which this virtual address belongs to.
165 struct rte_memseg_list *
166 rte_mem_virt2memseg_list(const void *virt);
169 * Memseg walk function prototype.
171 * Returning 0 will continue walk
172 * Returning 1 will stop the walk
173 * Returning -1 will stop the walk and report error
175 typedef int (*rte_memseg_walk_t)(const struct rte_memseg_list *msl,
176 const struct rte_memseg *ms, void *arg);
179 * Memseg contig walk function prototype. This will trigger a callback on every
180 * VA-contiguous area starting at memseg ``ms``, so total valid VA space at each
181 * callback call will be [``ms->addr``, ``ms->addr + len``).
183 * Returning 0 will continue walk
184 * Returning 1 will stop the walk
185 * Returning -1 will stop the walk and report error
187 typedef int (*rte_memseg_contig_walk_t)(const struct rte_memseg_list *msl,
188 const struct rte_memseg *ms, size_t len, void *arg);
191 * Memseg list walk function prototype. This will trigger a callback on every
192 * allocated memseg list.
194 * Returning 0 will continue walk
195 * Returning 1 will stop the walk
196 * Returning -1 will stop the walk and report error
198 typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl,
202 * Walk list of all memsegs.
204 * @note This function read-locks the memory hotplug subsystem, and thus cannot
205 * be used within memory-related callback functions.
207 * @note This function will also walk through externally allocated segments. It
208 * is up to the user to decide whether to skip through these segments.
213 * Argument passed to iterator
215 * 0 if walked over the entire list
216 * 1 if stopped by the user
217 * -1 if user function reported error
221 rte_memseg_walk(rte_memseg_walk_t func, void *arg);
224 * Walk each VA-contiguous area.
226 * @note This function read-locks the memory hotplug subsystem, and thus cannot
227 * be used within memory-related callback functions.
229 * @note This function will also walk through externally allocated segments. It
230 * is up to the user to decide whether to skip through these segments.
235 * Argument passed to iterator
237 * 0 if walked over the entire list
238 * 1 if stopped by the user
239 * -1 if user function reported error
243 rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg);
246 * Walk each allocated memseg list.
248 * @note This function read-locks the memory hotplug subsystem, and thus cannot
249 * be used within memory-related callback functions.
251 * @note This function will also walk through externally allocated segments. It
252 * is up to the user to decide whether to skip through these segments.
257 * Argument passed to iterator
259 * 0 if walked over the entire list
260 * 1 if stopped by the user
261 * -1 if user function reported error
265 rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg);
268 * Walk list of all memsegs without performing any locking.
270 * @note This function does not perform any locking, and is only safe to call
271 * from within memory-related callback functions.
276 * Argument passed to iterator
278 * 0 if walked over the entire list
279 * 1 if stopped by the user
280 * -1 if user function reported error
284 rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg);
287 * Walk each VA-contiguous area without performing any locking.
289 * @note This function does not perform any locking, and is only safe to call
290 * from within memory-related callback functions.
295 * Argument passed to iterator
297 * 0 if walked over the entire list
298 * 1 if stopped by the user
299 * -1 if user function reported error
303 rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg);
306 * Walk each allocated memseg list without performing any locking.
308 * @note This function does not perform any locking, and is only safe to call
309 * from within memory-related callback functions.
314 * Argument passed to iterator
316 * 0 if walked over the entire list
317 * 1 if stopped by the user
318 * -1 if user function reported error
322 rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg);
325 * Return file descriptor associated with a particular memseg (if available).
327 * @note This function read-locks the memory hotplug subsystem, and thus cannot
328 * be used within memory-related callback functions.
330 * @note This returns an internal file descriptor. Performing any operations on
331 * this file descriptor is inherently dangerous, so it should be treated
332 * as read-only for all intents and purposes.
335 * A pointer to memseg for which to get file descriptor.
338 * Valid file descriptor in case of success.
339 * -1 in case of error, with ``rte_errno`` set to the following values:
340 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
341 * - ENODEV - ``ms`` fd is not available
342 * - ENOENT - ``ms`` is an unused segment
343 * - ENOTSUP - segment fd's are not supported
347 rte_memseg_get_fd(const struct rte_memseg *ms);
350 * Return file descriptor associated with a particular memseg (if available).
352 * @note This function does not perform any locking, and is only safe to call
353 * from within memory-related callback functions.
355 * @note This returns an internal file descriptor. Performing any operations on
356 * this file descriptor is inherently dangerous, so it should be treated
357 * as read-only for all intents and purposes.
360 * A pointer to memseg for which to get file descriptor.
363 * Valid file descriptor in case of success.
364 * -1 in case of error, with ``rte_errno`` set to the following values:
365 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
366 * - ENODEV - ``ms`` fd is not available
367 * - ENOENT - ``ms`` is an unused segment
368 * - ENOTSUP - segment fd's are not supported
372 rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms);
375 * Get offset into segment file descriptor associated with a particular memseg
378 * @note This function read-locks the memory hotplug subsystem, and thus cannot
379 * be used within memory-related callback functions.
382 * A pointer to memseg for which to get file descriptor.
384 * A pointer to offset value where the result will be stored.
387 * Valid file descriptor in case of success.
388 * -1 in case of error, with ``rte_errno`` set to the following values:
389 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
390 * - EINVAL - ``offset`` pointer was NULL
391 * - ENODEV - ``ms`` fd is not available
392 * - ENOENT - ``ms`` is an unused segment
393 * - ENOTSUP - segment fd's are not supported
397 rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset);
400 * Get offset into segment file descriptor associated with a particular memseg
403 * @note This function does not perform any locking, and is only safe to call
404 * from within memory-related callback functions.
407 * A pointer to memseg for which to get file descriptor.
409 * A pointer to offset value where the result will be stored.
412 * Valid file descriptor in case of success.
413 * -1 in case of error, with ``rte_errno`` set to the following values:
414 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
415 * - EINVAL - ``offset`` pointer was NULL
416 * - ENODEV - ``ms`` fd is not available
417 * - ENOENT - ``ms`` is an unused segment
418 * - ENOTSUP - segment fd's are not supported
422 rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
427 * @b EXPERIMENTAL: this API may change without prior notice
429 * Register external memory chunk with DPDK.
431 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
434 * @note This API will not perform any DMA mapping. It is expected that user
435 * will do that themselves.
437 * @note Before accessing this memory in other processes, it needs to be
438 * attached in each of those processes by calling ``rte_extmem_attach`` in
439 * each other process.
442 * Start of virtual area to register. Must be aligned by ``page_sz``.
444 * Length of virtual area to register. Must be aligned by ``page_sz``.
446 * Array of page IOVA addresses corresponding to each page in this memory
447 * area. Can be NULL, in which case page IOVA addresses will be set to
450 * Number of elements in the iova_addrs array. Ignored if ``iova_addrs``
453 * Page size of the underlying memory
457 * - -1 in case of error, with rte_errno set to one of the following:
458 * EINVAL - one of the parameters was invalid
459 * EEXIST - memory chunk is already registered
460 * ENOSPC - no more space in internal config to store a new memory chunk
464 rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[],
465 unsigned int n_pages, size_t page_sz);
469 * @b EXPERIMENTAL: this API may change without prior notice
471 * Unregister external memory chunk with DPDK.
473 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
476 * @note This API will not perform any DMA unmapping. It is expected that user
477 * will do that themselves.
479 * @note Before calling this function, all other processes must call
480 * ``rte_extmem_detach`` to detach from the memory area.
483 * Start of virtual area to unregister
485 * Length of virtual area to unregister
489 * - -1 in case of error, with rte_errno set to one of the following:
490 * EINVAL - one of the parameters was invalid
491 * ENOENT - memory chunk was not found
495 rte_extmem_unregister(void *va_addr, size_t len);
499 * @b EXPERIMENTAL: this API may change without prior notice
501 * Attach to external memory chunk registered in another process.
503 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
506 * @note This API will not perform any DMA mapping. It is expected that user
507 * will do that themselves.
510 * Start of virtual area to register
512 * Length of virtual area to register
516 * - -1 in case of error, with rte_errno set to one of the following:
517 * EINVAL - one of the parameters was invalid
518 * ENOENT - memory chunk was not found
522 rte_extmem_attach(void *va_addr, size_t len);
526 * @b EXPERIMENTAL: this API may change without prior notice
528 * Detach from external memory chunk registered in another process.
530 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
533 * @note This API will not perform any DMA unmapping. It is expected that user
534 * will do that themselves.
537 * Start of virtual area to unregister
539 * Length of virtual area to unregister
543 * - -1 in case of error, with rte_errno set to one of the following:
544 * EINVAL - one of the parameters was invalid
545 * ENOENT - memory chunk was not found
549 rte_extmem_detach(void *va_addr, size_t len);
552 * Dump the physical memory layout to a file.
554 * @note This function read-locks the memory hotplug subsystem, and thus cannot
555 * be used within memory-related callback functions.
558 * A pointer to a file for output
560 void rte_dump_physmem_layout(FILE *f);
563 * Get the total amount of available physical memory.
565 * @note This function read-locks the memory hotplug subsystem, and thus cannot
566 * be used within memory-related callback functions.
569 * The total amount of available physical memory in bytes.
571 uint64_t rte_eal_get_physmem_size(void);
574 * Get the number of memory channels.
577 * The number of memory channels on the system. The value is 0 if unknown
578 * or not the same on all devices.
580 unsigned rte_memory_get_nchannel(void);
583 * Get the number of memory ranks.
586 * The number of memory ranks on the system. The value is 0 if unknown or
587 * not the same on all devices.
589 unsigned rte_memory_get_nrank(void);
593 * @b EXPERIMENTAL: this API may change without prior notice
595 * Check if all currently allocated memory segments are compliant with
596 * supplied DMA address width.
599 * Address width to check against.
602 int rte_mem_check_dma_mask(uint8_t maskbits);
606 * @b EXPERIMENTAL: this API may change without prior notice
608 * Check if all currently allocated memory segments are compliant with
609 * supplied DMA address width. This function will use
610 * rte_memseg_walk_thread_unsafe instead of rte_memseg_walk implying
611 * memory_hotplug_lock will not be acquired avoiding deadlock during
612 * memory initialization.
614 * This function is just for EAL core memory internal use. Drivers should
615 * use the previous rte_mem_check_dma_mask.
618 * Address width to check against.
621 int rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits);
625 * @b EXPERIMENTAL: this API may change without prior notice
627 * Set dma mask to use once memory initialization is done. Previous functions
628 * rte_mem_check_dma_mask and rte_mem_check_dma_mask_thread_unsafe can not be
629 * used safely until memory has been initialized.
632 void rte_mem_set_dma_mask(uint8_t maskbits);
635 * Drivers based on uio will not load unless physical
636 * addresses are obtainable. It is only possible to get
637 * physical addresses when running as a privileged user.
640 * 1 if the system is able to obtain physical addresses.
641 * 0 if using DMA addresses through an IOMMU.
643 int rte_eal_using_phys_addrs(void);
647 * Enum indicating which kind of memory event has happened. Used by callbacks to
648 * distinguish between memory allocations and deallocations.
651 RTE_MEM_EVENT_ALLOC = 0, /**< Allocation event. */
652 RTE_MEM_EVENT_FREE, /**< Deallocation event. */
654 #define RTE_MEM_EVENT_CALLBACK_NAME_LEN 64
655 /**< maximum length of callback name */
658 * Function typedef used to register callbacks for memory events.
660 typedef void (*rte_mem_event_callback_t)(enum rte_mem_event event_type,
661 const void *addr, size_t len, void *arg);
664 * Function used to register callbacks for memory events.
666 * @note callbacks will happen while memory hotplug subsystem is write-locked,
667 * therefore some functions (e.g. `rte_memseg_walk()`) will cause a
668 * deadlock when called from within such callbacks.
670 * @note mem event callbacks not being supported is an expected error condition,
671 * so user code needs to handle this situation. In these cases, return
672 * value will be -1, and rte_errno will be set to ENOTSUP.
675 * Name associated with specified callback to be added to the list.
678 * Callback function pointer.
681 * Argument to pass to the callback.
684 * 0 on successful callback register
685 * -1 on unsuccessful callback register, with rte_errno value indicating
686 * reason for failure.
690 rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
694 * Function used to unregister callbacks for memory events.
697 * Name associated with specified callback to be removed from the list.
700 * Argument to look for among callbacks with specified callback name.
703 * 0 on successful callback unregister
704 * -1 on unsuccessful callback unregister, with rte_errno value indicating
705 * reason for failure.
709 rte_mem_event_callback_unregister(const char *name, void *arg);
712 #define RTE_MEM_ALLOC_VALIDATOR_NAME_LEN 64
713 /**< maximum length of alloc validator name */
715 * Function typedef used to register memory allocation validation callbacks.
717 * Returning 0 will allow allocation attempt to continue. Returning -1 will
718 * prevent allocation from succeeding.
720 typedef int (*rte_mem_alloc_validator_t)(int socket_id,
721 size_t cur_limit, size_t new_len);
724 * @brief Register validator callback for memory allocations.
726 * Callbacks registered by this function will be called right before memory
727 * allocator is about to trigger allocation of more pages from the system if
728 * said allocation will bring total memory usage above specified limit on
729 * specified socket. User will be able to cancel pending allocation if callback
732 * @note callbacks will happen while memory hotplug subsystem is write-locked,
733 * therefore some functions (e.g. `rte_memseg_walk()`) will cause a
734 * deadlock when called from within such callbacks.
736 * @note validator callbacks not being supported is an expected error condition,
737 * so user code needs to handle this situation. In these cases, return
738 * value will be -1, and rte_errno will be set to ENOTSUP.
741 * Name associated with specified callback to be added to the list.
744 * Callback function pointer.
747 * Socket ID on which to watch for allocations.
750 * Limit above which to trigger callbacks.
753 * 0 on successful callback register
754 * -1 on unsuccessful callback register, with rte_errno value indicating
755 * reason for failure.
759 rte_mem_alloc_validator_register(const char *name,
760 rte_mem_alloc_validator_t clb, int socket_id, size_t limit);
763 * @brief Unregister validator callback for memory allocations.
766 * Name associated with specified callback to be removed from the list.
769 * Socket ID on which to watch for allocations.
772 * 0 on successful callback unregister
773 * -1 on unsuccessful callback unregister, with rte_errno value indicating
774 * reason for failure.
778 rte_mem_alloc_validator_unregister(const char *name, int socket_id);
784 #endif /* _RTE_MEMORY_H_ */