1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
11 * Memory-related RTE API.
22 #include <rte_common.h>
23 #include <rte_compat.h>
24 #include <rte_config.h>
25 #include <rte_fbarray.h>
27 #define RTE_PGSIZE_4K (1ULL << 12)
28 #define RTE_PGSIZE_64K (1ULL << 16)
29 #define RTE_PGSIZE_256K (1ULL << 18)
30 #define RTE_PGSIZE_2M (1ULL << 21)
31 #define RTE_PGSIZE_16M (1ULL << 24)
32 #define RTE_PGSIZE_256M (1ULL << 28)
33 #define RTE_PGSIZE_512M (1ULL << 29)
34 #define RTE_PGSIZE_1G (1ULL << 30)
35 #define RTE_PGSIZE_4G (1ULL << 32)
36 #define RTE_PGSIZE_16G (1ULL << 34)
38 #define SOCKET_ID_ANY -1 /**< Any NUMA socket. */
41 * Physical memory segment descriptor.
43 #define RTE_MEMSEG_FLAG_DO_NOT_FREE (1 << 0)
44 /**< Prevent this segment from being freed back to the OS. */
46 rte_iova_t iova; /**< Start IO address. */
49 void *addr; /**< Start virtual address. */
50 uint64_t addr_64; /**< Makes sure addr is always 64 bits */
52 size_t len; /**< Length of the segment. */
53 uint64_t hugepage_sz; /**< The pagesize of underlying memory */
54 int32_t socket_id; /**< NUMA socket ID. */
55 uint32_t nchannel; /**< Number of channels. */
56 uint32_t nrank; /**< Number of ranks. */
57 uint32_t flags; /**< Memseg-specific flags */
61 * memseg list is a special case as we need to store a bunch of other data
62 * together with the array itself.
64 struct rte_memseg_list {
68 /**< Base virtual address for this memseg list. */
70 /**< Makes sure addr is always 64-bits */
72 uint64_t page_sz; /**< Page size for all memsegs in this list. */
73 int socket_id; /**< Socket ID for all memsegs in this list. */
74 volatile uint32_t version; /**< version number for multiprocess sync. */
75 size_t len; /**< Length of memory area covered by this memseg list. */
76 unsigned int external; /**< 1 if this list points to external memory */
77 unsigned int heap; /**< 1 if this list points to a heap */
78 struct rte_fbarray memseg_arr;
82 * Lock page in physical memory and prevent from swapping.
85 * The virtual address.
87 * 0 on success, negative on error.
89 int rte_mem_lock_page(const void *virt);
92 * Get physical address of any mapped virtual address in the current process.
93 * It is found by browsing the /proc/self/pagemap special file.
94 * The page must be locked.
97 * The virtual address.
99 * The physical address or RTE_BAD_IOVA on error.
101 phys_addr_t rte_mem_virt2phy(const void *virt);
104 * Get IO virtual address of any mapped virtual address in the current process.
106 * @note This function will not check internal page table. Instead, in IOVA as
107 * PA mode, it will fall back to getting real physical address (which may
108 * not match the expected IOVA, such as what was specified for external
112 * The virtual address.
114 * The IO address or RTE_BAD_IOVA on error.
116 rte_iova_t rte_mem_virt2iova(const void *virt);
119 * Get virtual memory address corresponding to iova address.
121 * @note This function read-locks the memory hotplug subsystem, and thus cannot
122 * be used within memory-related callback functions.
127 * Virtual address corresponding to iova address (or NULL if address does not
128 * exist within DPDK memory map).
131 rte_mem_iova2virt(rte_iova_t iova);
134 * Get memseg to which a particular virtual address belongs.
137 * The virtual address.
139 * The memseg list in which to look up based on ``virt`` address
142 * Memseg pointer on success, or NULL on error.
145 rte_mem_virt2memseg(const void *virt, const struct rte_memseg_list *msl);
148 * Get memseg list corresponding to virtual memory address.
151 * The virtual address.
153 * Memseg list to which this virtual address belongs to.
155 struct rte_memseg_list *
156 rte_mem_virt2memseg_list(const void *virt);
159 * Memseg walk function prototype.
161 * Returning 0 will continue walk
162 * Returning 1 will stop the walk
163 * Returning -1 will stop the walk and report error
165 typedef int (*rte_memseg_walk_t)(const struct rte_memseg_list *msl,
166 const struct rte_memseg *ms, void *arg);
169 * Memseg contig walk function prototype. This will trigger a callback on every
170 * VA-contiguous area starting at memseg ``ms``, so total valid VA space at each
171 * callback call will be [``ms->addr``, ``ms->addr + len``).
173 * Returning 0 will continue walk
174 * Returning 1 will stop the walk
175 * Returning -1 will stop the walk and report error
177 typedef int (*rte_memseg_contig_walk_t)(const struct rte_memseg_list *msl,
178 const struct rte_memseg *ms, size_t len, void *arg);
181 * Memseg list walk function prototype. This will trigger a callback on every
182 * allocated memseg list.
184 * Returning 0 will continue walk
185 * Returning 1 will stop the walk
186 * Returning -1 will stop the walk and report error
188 typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl,
192 * Walk list of all memsegs.
194 * @note This function read-locks the memory hotplug subsystem, and thus cannot
195 * be used within memory-related callback functions.
197 * @note This function will also walk through externally allocated segments. It
198 * is up to the user to decide whether to skip through these segments.
203 * Argument passed to iterator
205 * 0 if walked over the entire list
206 * 1 if stopped by the user
207 * -1 if user function reported error
210 rte_memseg_walk(rte_memseg_walk_t func, void *arg);
213 * Walk each VA-contiguous area.
215 * @note This function read-locks the memory hotplug subsystem, and thus cannot
216 * be used within memory-related callback functions.
218 * @note This function will also walk through externally allocated segments. It
219 * is up to the user to decide whether to skip through these segments.
224 * Argument passed to iterator
226 * 0 if walked over the entire list
227 * 1 if stopped by the user
228 * -1 if user function reported error
231 rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg);
234 * Walk each allocated memseg list.
236 * @note This function read-locks the memory hotplug subsystem, and thus cannot
237 * be used within memory-related callback functions.
239 * @note This function will also walk through externally allocated segments. It
240 * is up to the user to decide whether to skip through these segments.
245 * Argument passed to iterator
247 * 0 if walked over the entire list
248 * 1 if stopped by the user
249 * -1 if user function reported error
252 rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg);
255 * Walk list of all memsegs without performing any locking.
257 * @note This function does not perform any locking, and is only safe to call
258 * from within memory-related callback functions.
263 * Argument passed to iterator
265 * 0 if walked over the entire list
266 * 1 if stopped by the user
267 * -1 if user function reported error
270 rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg);
273 * Walk each VA-contiguous area without performing any locking.
275 * @note This function does not perform any locking, and is only safe to call
276 * from within memory-related callback functions.
281 * Argument passed to iterator
283 * 0 if walked over the entire list
284 * 1 if stopped by the user
285 * -1 if user function reported error
288 rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg);
291 * Walk each allocated memseg list without performing any locking.
293 * @note This function does not perform any locking, and is only safe to call
294 * from within memory-related callback functions.
299 * Argument passed to iterator
301 * 0 if walked over the entire list
302 * 1 if stopped by the user
303 * -1 if user function reported error
306 rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg);
309 * Return file descriptor associated with a particular memseg (if available).
311 * @note This function read-locks the memory hotplug subsystem, and thus cannot
312 * be used within memory-related callback functions.
314 * @note This returns an internal file descriptor. Performing any operations on
315 * this file descriptor is inherently dangerous, so it should be treated
316 * as read-only for all intents and purposes.
319 * A pointer to memseg for which to get file descriptor.
322 * Valid file descriptor in case of success.
323 * -1 in case of error, with ``rte_errno`` set to the following values:
324 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
325 * - ENODEV - ``ms`` fd is not available
326 * - ENOENT - ``ms`` is an unused segment
327 * - ENOTSUP - segment fd's are not supported
330 rte_memseg_get_fd(const struct rte_memseg *ms);
333 * Return file descriptor associated with a particular memseg (if available).
335 * @note This function does not perform any locking, and is only safe to call
336 * from within memory-related callback functions.
338 * @note This returns an internal file descriptor. Performing any operations on
339 * this file descriptor is inherently dangerous, so it should be treated
340 * as read-only for all intents and purposes.
343 * A pointer to memseg for which to get file descriptor.
346 * Valid file descriptor in case of success.
347 * -1 in case of error, with ``rte_errno`` set to the following values:
348 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
349 * - ENODEV - ``ms`` fd is not available
350 * - ENOENT - ``ms`` is an unused segment
351 * - ENOTSUP - segment fd's are not supported
354 rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms);
357 * Get offset into segment file descriptor associated with a particular memseg
360 * @note This function read-locks the memory hotplug subsystem, and thus cannot
361 * be used within memory-related callback functions.
364 * A pointer to memseg for which to get file descriptor.
366 * A pointer to offset value where the result will be stored.
369 * Valid file descriptor in case of success.
370 * -1 in case of error, with ``rte_errno`` set to the following values:
371 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
372 * - EINVAL - ``offset`` pointer was NULL
373 * - ENODEV - ``ms`` fd is not available
374 * - ENOENT - ``ms`` is an unused segment
375 * - ENOTSUP - segment fd's are not supported
378 rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset);
381 * Get offset into segment file descriptor associated with a particular memseg
384 * @note This function does not perform any locking, and is only safe to call
385 * from within memory-related callback functions.
388 * A pointer to memseg for which to get file descriptor.
390 * A pointer to offset value where the result will be stored.
393 * Valid file descriptor in case of success.
394 * -1 in case of error, with ``rte_errno`` set to the following values:
395 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
396 * - EINVAL - ``offset`` pointer was NULL
397 * - ENODEV - ``ms`` fd is not available
398 * - ENOENT - ``ms`` is an unused segment
399 * - ENOTSUP - segment fd's are not supported
402 rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
407 * @b EXPERIMENTAL: this API may change without prior notice
409 * Register external memory chunk with DPDK.
411 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
414 * @note This API will not perform any DMA mapping. It is expected that user
415 * will do that themselves.
417 * @note Before accessing this memory in other processes, it needs to be
418 * attached in each of those processes by calling ``rte_extmem_attach`` in
419 * each other process.
422 * Start of virtual area to register. Must be aligned by ``page_sz``.
424 * Length of virtual area to register. Must be aligned by ``page_sz``.
426 * Array of page IOVA addresses corresponding to each page in this memory
427 * area. Can be NULL, in which case page IOVA addresses will be set to
430 * Number of elements in the iova_addrs array. Ignored if ``iova_addrs``
433 * Page size of the underlying memory
437 * - -1 in case of error, with rte_errno set to one of the following:
438 * EINVAL - one of the parameters was invalid
439 * EEXIST - memory chunk is already registered
440 * ENOSPC - no more space in internal config to store a new memory chunk
444 rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[],
445 unsigned int n_pages, size_t page_sz);
449 * @b EXPERIMENTAL: this API may change without prior notice
451 * Unregister external memory chunk with DPDK.
453 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
456 * @note This API will not perform any DMA unmapping. It is expected that user
457 * will do that themselves.
459 * @note Before calling this function, all other processes must call
460 * ``rte_extmem_detach`` to detach from the memory area.
463 * Start of virtual area to unregister
465 * Length of virtual area to unregister
469 * - -1 in case of error, with rte_errno set to one of the following:
470 * EINVAL - one of the parameters was invalid
471 * ENOENT - memory chunk was not found
475 rte_extmem_unregister(void *va_addr, size_t len);
479 * @b EXPERIMENTAL: this API may change without prior notice
481 * Attach to external memory chunk registered in another process.
483 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
486 * @note This API will not perform any DMA mapping. It is expected that user
487 * will do that themselves.
490 * Start of virtual area to register
492 * Length of virtual area to register
496 * - -1 in case of error, with rte_errno set to one of the following:
497 * EINVAL - one of the parameters was invalid
498 * ENOENT - memory chunk was not found
502 rte_extmem_attach(void *va_addr, size_t len);
506 * @b EXPERIMENTAL: this API may change without prior notice
508 * Detach from external memory chunk registered in another process.
510 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
513 * @note This API will not perform any DMA unmapping. It is expected that user
514 * will do that themselves.
517 * Start of virtual area to unregister
519 * Length of virtual area to unregister
523 * - -1 in case of error, with rte_errno set to one of the following:
524 * EINVAL - one of the parameters was invalid
525 * ENOENT - memory chunk was not found
529 rte_extmem_detach(void *va_addr, size_t len);
532 * Dump the physical memory layout to a file.
534 * @note This function read-locks the memory hotplug subsystem, and thus cannot
535 * be used within memory-related callback functions.
538 * A pointer to a file for output
540 void rte_dump_physmem_layout(FILE *f);
543 * Get the total amount of available physical memory.
545 * @note This function read-locks the memory hotplug subsystem, and thus cannot
546 * be used within memory-related callback functions.
549 * The total amount of available physical memory in bytes.
551 uint64_t rte_eal_get_physmem_size(void);
554 * Get the number of memory channels.
557 * The number of memory channels on the system. The value is 0 if unknown
558 * or not the same on all devices.
560 unsigned rte_memory_get_nchannel(void);
563 * Get the number of memory ranks.
566 * The number of memory ranks on the system. The value is 0 if unknown or
567 * not the same on all devices.
569 unsigned rte_memory_get_nrank(void);
573 * @b EXPERIMENTAL: this API may change without prior notice
575 * Check if all currently allocated memory segments are compliant with
576 * supplied DMA address width.
579 * Address width to check against.
582 int rte_mem_check_dma_mask(uint8_t maskbits);
586 * @b EXPERIMENTAL: this API may change without prior notice
588 * Check if all currently allocated memory segments are compliant with
589 * supplied DMA address width. This function will use
590 * rte_memseg_walk_thread_unsafe instead of rte_memseg_walk implying
591 * memory_hotplug_lock will not be acquired avoiding deadlock during
592 * memory initialization.
594 * This function is just for EAL core memory internal use. Drivers should
595 * use the previous rte_mem_check_dma_mask.
598 * Address width to check against.
601 int rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits);
605 * @b EXPERIMENTAL: this API may change without prior notice
607 * Set dma mask to use once memory initialization is done. Previous functions
608 * rte_mem_check_dma_mask and rte_mem_check_dma_mask_thread_unsafe can not be
609 * used safely until memory has been initialized.
612 void rte_mem_set_dma_mask(uint8_t maskbits);
615 * Drivers based on uio will not load unless physical
616 * addresses are obtainable. It is only possible to get
617 * physical addresses when running as a privileged user.
620 * 1 if the system is able to obtain physical addresses.
621 * 0 if using DMA addresses through an IOMMU.
623 int rte_eal_using_phys_addrs(void);
627 * Enum indicating which kind of memory event has happened. Used by callbacks to
628 * distinguish between memory allocations and deallocations.
631 RTE_MEM_EVENT_ALLOC = 0, /**< Allocation event. */
632 RTE_MEM_EVENT_FREE, /**< Deallocation event. */
634 #define RTE_MEM_EVENT_CALLBACK_NAME_LEN 64
635 /**< maximum length of callback name */
638 * Function typedef used to register callbacks for memory events.
640 typedef void (*rte_mem_event_callback_t)(enum rte_mem_event event_type,
641 const void *addr, size_t len, void *arg);
644 * Function used to register callbacks for memory events.
646 * @note callbacks will happen while memory hotplug subsystem is write-locked,
647 * therefore some functions (e.g. `rte_memseg_walk()`) will cause a
648 * deadlock when called from within such callbacks.
650 * @note mem event callbacks not being supported is an expected error condition,
651 * so user code needs to handle this situation. In these cases, return
652 * value will be -1, and rte_errno will be set to ENOTSUP.
655 * Name associated with specified callback to be added to the list.
658 * Callback function pointer.
661 * Argument to pass to the callback.
664 * 0 on successful callback register
665 * -1 on unsuccessful callback register, with rte_errno value indicating
666 * reason for failure.
669 rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
673 * Function used to unregister callbacks for memory events.
676 * Name associated with specified callback to be removed from the list.
679 * Argument to look for among callbacks with specified callback name.
682 * 0 on successful callback unregister
683 * -1 on unsuccessful callback unregister, with rte_errno value indicating
684 * reason for failure.
687 rte_mem_event_callback_unregister(const char *name, void *arg);
690 #define RTE_MEM_ALLOC_VALIDATOR_NAME_LEN 64
691 /**< maximum length of alloc validator name */
693 * Function typedef used to register memory allocation validation callbacks.
695 * Returning 0 will allow allocation attempt to continue. Returning -1 will
696 * prevent allocation from succeeding.
698 typedef int (*rte_mem_alloc_validator_t)(int socket_id,
699 size_t cur_limit, size_t new_len);
702 * @brief Register validator callback for memory allocations.
704 * Callbacks registered by this function will be called right before memory
705 * allocator is about to trigger allocation of more pages from the system if
706 * said allocation will bring total memory usage above specified limit on
707 * specified socket. User will be able to cancel pending allocation if callback
710 * @note callbacks will happen while memory hotplug subsystem is write-locked,
711 * therefore some functions (e.g. `rte_memseg_walk()`) will cause a
712 * deadlock when called from within such callbacks.
714 * @note validator callbacks not being supported is an expected error condition,
715 * so user code needs to handle this situation. In these cases, return
716 * value will be -1, and rte_errno will be set to ENOTSUP.
719 * Name associated with specified callback to be added to the list.
722 * Callback function pointer.
725 * Socket ID on which to watch for allocations.
728 * Limit above which to trigger callbacks.
731 * 0 on successful callback register
732 * -1 on unsuccessful callback register, with rte_errno value indicating
733 * reason for failure.
736 rte_mem_alloc_validator_register(const char *name,
737 rte_mem_alloc_validator_t clb, int socket_id, size_t limit);
740 * @brief Unregister validator callback for memory allocations.
743 * Name associated with specified callback to be removed from the list.
746 * Socket ID on which to watch for allocations.
749 * 0 on successful callback unregister
750 * -1 on unsuccessful callback unregister, with rte_errno value indicating
751 * reason for failure.
754 rte_mem_alloc_validator_unregister(const char *name, int socket_id);
760 #endif /* _RTE_MEMORY_H_ */