1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
11 * Memory-related RTE API.
22 #include <rte_common.h>
23 #include <rte_compat.h>
24 #include <rte_config.h>
26 /* forward declaration for pointers */
27 struct rte_memseg_list;
31 RTE_PGSIZE_4K = 1ULL << 12,
32 RTE_PGSIZE_64K = 1ULL << 16,
33 RTE_PGSIZE_256K = 1ULL << 18,
34 RTE_PGSIZE_2M = 1ULL << 21,
35 RTE_PGSIZE_16M = 1ULL << 24,
36 RTE_PGSIZE_256M = 1ULL << 28,
37 RTE_PGSIZE_512M = 1ULL << 29,
38 RTE_PGSIZE_1G = 1ULL << 30,
39 RTE_PGSIZE_4G = 1ULL << 32,
40 RTE_PGSIZE_16G = 1ULL << 34,
43 #define SOCKET_ID_ANY -1 /**< Any NUMA socket. */
44 #define RTE_CACHE_LINE_MASK (RTE_CACHE_LINE_SIZE-1) /**< Cache line mask. */
46 #define RTE_CACHE_LINE_ROUNDUP(size) \
47 (RTE_CACHE_LINE_SIZE * ((size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE))
48 /**< Return the first cache-aligned value greater or equal to size. */
50 /**< Cache line size in terms of log2 */
51 #if RTE_CACHE_LINE_SIZE == 64
52 #define RTE_CACHE_LINE_SIZE_LOG2 6
53 #elif RTE_CACHE_LINE_SIZE == 128
54 #define RTE_CACHE_LINE_SIZE_LOG2 7
56 #error "Unsupported cache line size"
59 #define RTE_CACHE_LINE_MIN_SIZE 64 /**< Minimum Cache line size. */
62 * Force alignment to cache line.
64 #define __rte_cache_aligned __rte_aligned(RTE_CACHE_LINE_SIZE)
67 * Force minimum cache line alignment.
69 #define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
71 typedef uint64_t phys_addr_t; /**< Physical address. */
72 #define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1)
74 * IO virtual address type.
75 * When the physical addressing mode (IOVA as PA) is in use,
76 * the translation from an IO virtual address (IOVA) to a physical address
77 * is a direct mapping, i.e. the same value.
78 * Otherwise, in virtual mode (IOVA as VA), an IOMMU may do the translation.
80 typedef uint64_t rte_iova_t;
81 #define RTE_BAD_IOVA ((rte_iova_t)-1)
84 * Physical memory segment descriptor.
86 #define RTE_MEMSEG_FLAG_DO_NOT_FREE (1 << 0)
87 /**< Prevent this segment from being freed back to the OS. */
91 phys_addr_t phys_addr; /**< deprecated - Start physical address. */
92 rte_iova_t iova; /**< Start IO address. */
96 void *addr; /**< Start virtual address. */
97 uint64_t addr_64; /**< Makes sure addr is always 64 bits */
99 size_t len; /**< Length of the segment. */
100 uint64_t hugepage_sz; /**< The pagesize of underlying memory */
101 int32_t socket_id; /**< NUMA socket ID. */
102 uint32_t nchannel; /**< Number of channels. */
103 uint32_t nrank; /**< Number of ranks. */
104 uint32_t flags; /**< Memseg-specific flags */
108 * Lock page in physical memory and prevent from swapping.
111 * The virtual address.
113 * 0 on success, negative on error.
115 int rte_mem_lock_page(const void *virt);
118 * Get physical address of any mapped virtual address in the current process.
119 * It is found by browsing the /proc/self/pagemap special file.
120 * The page must be locked.
123 * The virtual address.
125 * The physical address or RTE_BAD_IOVA on error.
127 phys_addr_t rte_mem_virt2phy(const void *virt);
130 * Get IO virtual address of any mapped virtual address in the current process.
133 * The virtual address.
135 * The IO address or RTE_BAD_IOVA on error.
137 rte_iova_t rte_mem_virt2iova(const void *virt);
140 * Get virtual memory address corresponding to iova address.
142 * @note This function read-locks the memory hotplug subsystem, and thus cannot
143 * be used within memory-related callback functions.
148 * Virtual address corresponding to iova address (or NULL if address does not
149 * exist within DPDK memory map).
153 rte_mem_iova2virt(rte_iova_t iova);
156 * Get memseg to which a particular virtual address belongs.
159 * The virtual address.
161 * The memseg list in which to look up based on ``virt`` address
164 * Memseg pointer on success, or NULL on error.
168 rte_mem_virt2memseg(const void *virt, const struct rte_memseg_list *msl);
171 * Get memseg list corresponding to virtual memory address.
174 * The virtual address.
176 * Memseg list to which this virtual address belongs to.
179 struct rte_memseg_list *
180 rte_mem_virt2memseg_list(const void *virt);
183 * Memseg walk function prototype.
185 * Returning 0 will continue walk
186 * Returning 1 will stop the walk
187 * Returning -1 will stop the walk and report error
189 typedef int (*rte_memseg_walk_t)(const struct rte_memseg_list *msl,
190 const struct rte_memseg *ms, void *arg);
193 * Memseg contig walk function prototype. This will trigger a callback on every
194 * VA-contiguous are starting at memseg ``ms``, so total valid VA space at each
195 * callback call will be [``ms->addr``, ``ms->addr + len``).
197 * Returning 0 will continue walk
198 * Returning 1 will stop the walk
199 * Returning -1 will stop the walk and report error
201 typedef int (*rte_memseg_contig_walk_t)(const struct rte_memseg_list *msl,
202 const struct rte_memseg *ms, size_t len, void *arg);
205 * Memseg list walk function prototype. This will trigger a callback on every
206 * allocated memseg list.
208 * Returning 0 will continue walk
209 * Returning 1 will stop the walk
210 * Returning -1 will stop the walk and report error
212 typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl,
216 * Walk list of all memsegs.
218 * @note This function read-locks the memory hotplug subsystem, and thus cannot
219 * be used within memory-related callback functions.
221 * @note This function will also walk through externally allocated segments. It
222 * is up to the user to decide whether to skip through these segments.
227 * Argument passed to iterator
229 * 0 if walked over the entire list
230 * 1 if stopped by the user
231 * -1 if user function reported error
235 rte_memseg_walk(rte_memseg_walk_t func, void *arg);
238 * Walk each VA-contiguous area.
240 * @note This function read-locks the memory hotplug subsystem, and thus cannot
241 * be used within memory-related callback functions.
243 * @note This function will also walk through externally allocated segments. It
244 * is up to the user to decide whether to skip through these segments.
249 * Argument passed to iterator
251 * 0 if walked over the entire list
252 * 1 if stopped by the user
253 * -1 if user function reported error
257 rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg);
260 * Walk each allocated memseg list.
262 * @note This function read-locks the memory hotplug subsystem, and thus cannot
263 * be used within memory-related callback functions.
265 * @note This function will also walk through externally allocated segments. It
266 * is up to the user to decide whether to skip through these segments.
271 * Argument passed to iterator
273 * 0 if walked over the entire list
274 * 1 if stopped by the user
275 * -1 if user function reported error
279 rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg);
282 * Walk list of all memsegs without performing any locking.
284 * @note This function does not perform any locking, and is only safe to call
285 * from within memory-related callback functions.
290 * Argument passed to iterator
292 * 0 if walked over the entire list
293 * 1 if stopped by the user
294 * -1 if user function reported error
298 rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg);
301 * Walk each VA-contiguous area without performing any locking.
303 * @note This function does not perform any locking, and is only safe to call
304 * from within memory-related callback functions.
309 * Argument passed to iterator
311 * 0 if walked over the entire list
312 * 1 if stopped by the user
313 * -1 if user function reported error
317 rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg);
320 * Walk each allocated memseg list without performing any locking.
322 * @note This function does not perform any locking, and is only safe to call
323 * from within memory-related callback functions.
328 * Argument passed to iterator
330 * 0 if walked over the entire list
331 * 1 if stopped by the user
332 * -1 if user function reported error
336 rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg);
339 * Return file descriptor associated with a particular memseg (if available).
341 * @note This function read-locks the memory hotplug subsystem, and thus cannot
342 * be used within memory-related callback functions.
344 * @note This returns an internal file descriptor. Performing any operations on
345 * this file descriptor is inherently dangerous, so it should be treated
346 * as read-only for all intents and purposes.
349 * A pointer to memseg for which to get file descriptor.
352 * Valid file descriptor in case of success.
353 * -1 in case of error, with ``rte_errno`` set to the following values:
354 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
355 * - ENODEV - ``ms`` fd is not available
356 * - ENOENT - ``ms`` is an unused segment
357 * - ENOTSUP - segment fd's are not supported
361 rte_memseg_get_fd(const struct rte_memseg *ms);
364 * Return file descriptor associated with a particular memseg (if available).
366 * @note This function does not perform any locking, and is only safe to call
367 * from within memory-related callback functions.
369 * @note This returns an internal file descriptor. Performing any operations on
370 * this file descriptor is inherently dangerous, so it should be treated
371 * as read-only for all intents and purposes.
374 * A pointer to memseg for which to get file descriptor.
377 * Valid file descriptor in case of success.
378 * -1 in case of error, with ``rte_errno`` set to the following values:
379 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
380 * - ENODEV - ``ms`` fd is not available
381 * - ENOENT - ``ms`` is an unused segment
382 * - ENOTSUP - segment fd's are not supported
386 rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms);
389 * Get offset into segment file descriptor associated with a particular memseg
392 * @note This function read-locks the memory hotplug subsystem, and thus cannot
393 * be used within memory-related callback functions.
396 * A pointer to memseg for which to get file descriptor.
398 * A pointer to offset value where the result will be stored.
401 * Valid file descriptor in case of success.
402 * -1 in case of error, with ``rte_errno`` set to the following values:
403 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
404 * - EINVAL - ``offset`` pointer was NULL
405 * - ENODEV - ``ms`` fd is not available
406 * - ENOENT - ``ms`` is an unused segment
407 * - ENOTSUP - segment fd's are not supported
411 rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset);
414 * Get offset into segment file descriptor associated with a particular memseg
417 * @note This function does not perform any locking, and is only safe to call
418 * from within memory-related callback functions.
421 * A pointer to memseg for which to get file descriptor.
423 * A pointer to offset value where the result will be stored.
426 * Valid file descriptor in case of success.
427 * -1 in case of error, with ``rte_errno`` set to the following values:
428 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
429 * - EINVAL - ``offset`` pointer was NULL
430 * - ENODEV - ``ms`` fd is not available
431 * - ENOENT - ``ms`` is an unused segment
432 * - ENOTSUP - segment fd's are not supported
436 rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
441 * @b EXPERIMENTAL: this API may change without prior notice
443 * Register external memory chunk with DPDK.
445 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
448 * @note This API will not perform any DMA mapping. It is expected that user
449 * will do that themselves.
451 * @note Before accessing this memory in other processes, it needs to be
452 * attached in each of those processes by calling ``rte_extmem_attach`` in
453 * each other process.
456 * Start of virtual area to register. Must be aligned by ``page_sz``.
458 * Length of virtual area to register. Must be aligned by ``page_sz``.
460 * Array of page IOVA addresses corresponding to each page in this memory
461 * area. Can be NULL, in which case page IOVA addresses will be set to
464 * Number of elements in the iova_addrs array. Ignored if ``iova_addrs``
467 * Page size of the underlying memory
471 * - -1 in case of error, with rte_errno set to one of the following:
472 * EINVAL - one of the parameters was invalid
473 * EEXIST - memory chunk is already registered
474 * ENOSPC - no more space in internal config to store a new memory chunk
478 rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[],
479 unsigned int n_pages, size_t page_sz);
483 * @b EXPERIMENTAL: this API may change without prior notice
485 * Unregister external memory chunk with DPDK.
487 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
490 * @note This API will not perform any DMA unmapping. It is expected that user
491 * will do that themselves.
493 * @note Before calling this function, all other processes must call
494 * ``rte_extmem_detach`` to detach from the memory area.
497 * Start of virtual area to unregister
499 * Length of virtual area to unregister
503 * - -1 in case of error, with rte_errno set to one of the following:
504 * EINVAL - one of the parameters was invalid
505 * ENOENT - memory chunk was not found
509 rte_extmem_unregister(void *va_addr, size_t len);
513 * @b EXPERIMENTAL: this API may change without prior notice
515 * Attach to external memory chunk registered in another process.
517 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
520 * @note This API will not perform any DMA mapping. It is expected that user
521 * will do that themselves.
524 * Start of virtual area to register
526 * Length of virtual area to register
530 * - -1 in case of error, with rte_errno set to one of the following:
531 * EINVAL - one of the parameters was invalid
532 * ENOENT - memory chunk was not found
536 rte_extmem_attach(void *va_addr, size_t len);
540 * @b EXPERIMENTAL: this API may change without prior notice
542 * Detach from external memory chunk registered in another process.
544 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
547 * @note This API will not perform any DMA unmapping. It is expected that user
548 * will do that themselves.
551 * Start of virtual area to unregister
553 * Length of virtual area to unregister
557 * - -1 in case of error, with rte_errno set to one of the following:
558 * EINVAL - one of the parameters was invalid
559 * ENOENT - memory chunk was not found
563 rte_extmem_detach(void *va_addr, size_t len);
566 * Dump the physical memory layout to a file.
568 * @note This function read-locks the memory hotplug subsystem, and thus cannot
569 * be used within memory-related callback functions.
572 * A pointer to a file for output
574 void rte_dump_physmem_layout(FILE *f);
577 * Get the total amount of available physical memory.
579 * @note This function read-locks the memory hotplug subsystem, and thus cannot
580 * be used within memory-related callback functions.
583 * The total amount of available physical memory in bytes.
585 uint64_t rte_eal_get_physmem_size(void);
588 * Get the number of memory channels.
591 * The number of memory channels on the system. The value is 0 if unknown
592 * or not the same on all devices.
594 unsigned rte_memory_get_nchannel(void);
597 * Get the number of memory ranks.
600 * The number of memory ranks on the system. The value is 0 if unknown or
601 * not the same on all devices.
603 unsigned rte_memory_get_nrank(void);
607 * @b EXPERIMENTAL: this API may change without prior notice
609 * Check if all currently allocated memory segments are compliant with
610 * supplied DMA address width.
613 * Address width to check against.
616 int rte_mem_check_dma_mask(uint8_t maskbits);
620 * @b EXPERIMENTAL: this API may change without prior notice
622 * Check if all currently allocated memory segments are compliant with
623 * supplied DMA address width. This function will use
624 * rte_memseg_walk_thread_unsafe instead of rte_memseg_walk implying
625 * memory_hotplug_lock will not be acquired avoiding deadlock during
626 * memory initialization.
628 * This function is just for EAL core memory internal use. Drivers should
629 * use the previous rte_mem_check_dma_mask.
632 * Address width to check against.
635 int rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits);
639 * @b EXPERIMENTAL: this API may change without prior notice
641 * Set dma mask to use once memory initialization is done. Previous functions
642 * rte_mem_check_dma_mask and rte_mem_check_dma_mask_thread_unsafe can not be
643 * used safely until memory has been initialized.
646 void rte_mem_set_dma_mask(uint8_t maskbits);
649 * Drivers based on uio will not load unless physical
650 * addresses are obtainable. It is only possible to get
651 * physical addresses when running as a privileged user.
654 * 1 if the system is able to obtain physical addresses.
655 * 0 if using DMA addresses through an IOMMU.
657 int rte_eal_using_phys_addrs(void);
661 * Enum indicating which kind of memory event has happened. Used by callbacks to
662 * distinguish between memory allocations and deallocations.
665 RTE_MEM_EVENT_ALLOC = 0, /**< Allocation event. */
666 RTE_MEM_EVENT_FREE, /**< Deallocation event. */
668 #define RTE_MEM_EVENT_CALLBACK_NAME_LEN 64
669 /**< maximum length of callback name */
672 * Function typedef used to register callbacks for memory events.
674 typedef void (*rte_mem_event_callback_t)(enum rte_mem_event event_type,
675 const void *addr, size_t len, void *arg);
678 * Function used to register callbacks for memory events.
680 * @note callbacks will happen while memory hotplug subsystem is write-locked,
681 * therefore some functions (e.g. `rte_memseg_walk()`) will cause a
682 * deadlock when called from within such callbacks.
684 * @note mem event callbacks not being supported is an expected error condition,
685 * so user code needs to handle this situation. In these cases, return
686 * value will be -1, and rte_errno will be set to ENOTSUP.
689 * Name associated with specified callback to be added to the list.
692 * Callback function pointer.
695 * Argument to pass to the callback.
698 * 0 on successful callback register
699 * -1 on unsuccessful callback register, with rte_errno value indicating
700 * reason for failure.
704 rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
708 * Function used to unregister callbacks for memory events.
711 * Name associated with specified callback to be removed from the list.
714 * Argument to look for among callbacks with specified callback name.
717 * 0 on successful callback unregister
718 * -1 on unsuccessful callback unregister, with rte_errno value indicating
719 * reason for failure.
723 rte_mem_event_callback_unregister(const char *name, void *arg);
726 #define RTE_MEM_ALLOC_VALIDATOR_NAME_LEN 64
727 /**< maximum length of alloc validator name */
729 * Function typedef used to register memory allocation validation callbacks.
731 * Returning 0 will allow allocation attempt to continue. Returning -1 will
732 * prevent allocation from succeeding.
734 typedef int (*rte_mem_alloc_validator_t)(int socket_id,
735 size_t cur_limit, size_t new_len);
738 * @brief Register validator callback for memory allocations.
740 * Callbacks registered by this function will be called right before memory
741 * allocator is about to trigger allocation of more pages from the system if
742 * said allocation will bring total memory usage above specified limit on
743 * specified socket. User will be able to cancel pending allocation if callback
746 * @note callbacks will happen while memory hotplug subsystem is write-locked,
747 * therefore some functions (e.g. `rte_memseg_walk()`) will cause a
748 * deadlock when called from within such callbacks.
750 * @note validator callbacks not being supported is an expected error condition,
751 * so user code needs to handle this situation. In these cases, return
752 * value will be -1, and rte_errno will be set to ENOTSUP.
755 * Name associated with specified callback to be added to the list.
758 * Callback function pointer.
761 * Socket ID on which to watch for allocations.
764 * Limit above which to trigger callbacks.
767 * 0 on successful callback register
768 * -1 on unsuccessful callback register, with rte_errno value indicating
769 * reason for failure.
773 rte_mem_alloc_validator_register(const char *name,
774 rte_mem_alloc_validator_t clb, int socket_id, size_t limit);
777 * @brief Unregister validator callback for memory allocations.
780 * Name associated with specified callback to be removed from the list.
783 * Socket ID on which to watch for allocations.
786 * 0 on successful callback unregister
787 * -1 on unsuccessful callback unregister, with rte_errno value indicating
788 * reason for failure.
792 rte_mem_alloc_validator_unregister(const char *name, int socket_id);
798 #endif /* _RTE_MEMORY_H_ */