1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
11 * Memory-related RTE API.
22 #include <rte_common.h>
23 #include <rte_compat.h>
24 #include <rte_config.h>
25 #include <rte_fbarray.h>
29 RTE_PGSIZE_4K = 1ULL << 12,
30 RTE_PGSIZE_64K = 1ULL << 16,
31 RTE_PGSIZE_256K = 1ULL << 18,
32 RTE_PGSIZE_2M = 1ULL << 21,
33 RTE_PGSIZE_16M = 1ULL << 24,
34 RTE_PGSIZE_256M = 1ULL << 28,
35 RTE_PGSIZE_512M = 1ULL << 29,
36 RTE_PGSIZE_1G = 1ULL << 30,
37 RTE_PGSIZE_4G = 1ULL << 32,
38 RTE_PGSIZE_16G = 1ULL << 34,
41 #define SOCKET_ID_ANY -1 /**< Any NUMA socket. */
42 #define RTE_CACHE_LINE_MASK (RTE_CACHE_LINE_SIZE-1) /**< Cache line mask. */
44 #define RTE_CACHE_LINE_ROUNDUP(size) \
45 (RTE_CACHE_LINE_SIZE * ((size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE))
46 /**< Return the first cache-aligned value greater or equal to size. */
48 /**< Cache line size in terms of log2 */
49 #if RTE_CACHE_LINE_SIZE == 64
50 #define RTE_CACHE_LINE_SIZE_LOG2 6
51 #elif RTE_CACHE_LINE_SIZE == 128
52 #define RTE_CACHE_LINE_SIZE_LOG2 7
54 #error "Unsupported cache line size"
57 #define RTE_CACHE_LINE_MIN_SIZE 64 /**< Minimum Cache line size. */
60 * Force alignment to cache line.
62 #define __rte_cache_aligned __rte_aligned(RTE_CACHE_LINE_SIZE)
65 * Force minimum cache line alignment.
67 #define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
69 typedef uint64_t phys_addr_t; /**< Physical address. */
70 #define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1)
72 * IO virtual address type.
73 * When the physical addressing mode (IOVA as PA) is in use,
74 * the translation from an IO virtual address (IOVA) to a physical address
75 * is a direct mapping, i.e. the same value.
76 * Otherwise, in virtual mode (IOVA as VA), an IOMMU may do the translation.
78 typedef uint64_t rte_iova_t;
79 #define RTE_BAD_IOVA ((rte_iova_t)-1)
82 * Physical memory segment descriptor.
84 #define RTE_MEMSEG_FLAG_DO_NOT_FREE (1 << 0)
85 /**< Prevent this segment from being freed back to the OS. */
89 phys_addr_t phys_addr; /**< deprecated - Start physical address. */
90 rte_iova_t iova; /**< Start IO address. */
94 void *addr; /**< Start virtual address. */
95 uint64_t addr_64; /**< Makes sure addr is always 64 bits */
97 size_t len; /**< Length of the segment. */
98 uint64_t hugepage_sz; /**< The pagesize of underlying memory */
99 int32_t socket_id; /**< NUMA socket ID. */
100 uint32_t nchannel; /**< Number of channels. */
101 uint32_t nrank; /**< Number of ranks. */
102 uint32_t flags; /**< Memseg-specific flags */
106 * memseg list is a special case as we need to store a bunch of other data
107 * together with the array itself.
109 struct rte_memseg_list {
113 /**< Base virtual address for this memseg list. */
115 /**< Makes sure addr is always 64-bits */
117 uint64_t page_sz; /**< Page size for all memsegs in this list. */
118 int socket_id; /**< Socket ID for all memsegs in this list. */
119 volatile uint32_t version; /**< version number for multiprocess sync. */
120 size_t len; /**< Length of memory area covered by this memseg list. */
121 unsigned int external; /**< 1 if this list points to external memory */
122 struct rte_fbarray memseg_arr;
126 * Lock page in physical memory and prevent from swapping.
129 * The virtual address.
131 * 0 on success, negative on error.
133 int rte_mem_lock_page(const void *virt);
136 * Get physical address of any mapped virtual address in the current process.
137 * It is found by browsing the /proc/self/pagemap special file.
138 * The page must be locked.
141 * The virtual address.
143 * The physical address or RTE_BAD_IOVA on error.
145 phys_addr_t rte_mem_virt2phy(const void *virt);
148 * Get IO virtual address of any mapped virtual address in the current process.
151 * The virtual address.
153 * The IO address or RTE_BAD_IOVA on error.
155 rte_iova_t rte_mem_virt2iova(const void *virt);
158 * Get virtual memory address corresponding to iova address.
160 * @note This function read-locks the memory hotplug subsystem, and thus cannot
161 * be used within memory-related callback functions.
166 * Virtual address corresponding to iova address (or NULL if address does not
167 * exist within DPDK memory map).
171 rte_mem_iova2virt(rte_iova_t iova);
174 * Get memseg to which a particular virtual address belongs.
177 * The virtual address.
179 * The memseg list in which to look up based on ``virt`` address
182 * Memseg pointer on success, or NULL on error.
186 rte_mem_virt2memseg(const void *virt, const struct rte_memseg_list *msl);
189 * Get memseg list corresponding to virtual memory address.
192 * The virtual address.
194 * Memseg list to which this virtual address belongs to.
197 struct rte_memseg_list *
198 rte_mem_virt2memseg_list(const void *virt);
201 * Memseg walk function prototype.
203 * Returning 0 will continue walk
204 * Returning 1 will stop the walk
205 * Returning -1 will stop the walk and report error
207 typedef int (*rte_memseg_walk_t)(const struct rte_memseg_list *msl,
208 const struct rte_memseg *ms, void *arg);
211 * Memseg contig walk function prototype. This will trigger a callback on every
212 * VA-contiguous area starting at memseg ``ms``, so total valid VA space at each
213 * callback call will be [``ms->addr``, ``ms->addr + len``).
215 * Returning 0 will continue walk
216 * Returning 1 will stop the walk
217 * Returning -1 will stop the walk and report error
219 typedef int (*rte_memseg_contig_walk_t)(const struct rte_memseg_list *msl,
220 const struct rte_memseg *ms, size_t len, void *arg);
223 * Memseg list walk function prototype. This will trigger a callback on every
224 * allocated memseg list.
226 * Returning 0 will continue walk
227 * Returning 1 will stop the walk
228 * Returning -1 will stop the walk and report error
230 typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl,
234 * Walk list of all memsegs.
236 * @note This function read-locks the memory hotplug subsystem, and thus cannot
237 * be used within memory-related callback functions.
239 * @note This function will also walk through externally allocated segments. It
240 * is up to the user to decide whether to skip through these segments.
245 * Argument passed to iterator
247 * 0 if walked over the entire list
248 * 1 if stopped by the user
249 * -1 if user function reported error
253 rte_memseg_walk(rte_memseg_walk_t func, void *arg);
256 * Walk each VA-contiguous area.
258 * @note This function read-locks the memory hotplug subsystem, and thus cannot
259 * be used within memory-related callback functions.
261 * @note This function will also walk through externally allocated segments. It
262 * is up to the user to decide whether to skip through these segments.
267 * Argument passed to iterator
269 * 0 if walked over the entire list
270 * 1 if stopped by the user
271 * -1 if user function reported error
275 rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg);
278 * Walk each allocated memseg list.
280 * @note This function read-locks the memory hotplug subsystem, and thus cannot
281 * be used within memory-related callback functions.
283 * @note This function will also walk through externally allocated segments. It
284 * is up to the user to decide whether to skip through these segments.
289 * Argument passed to iterator
291 * 0 if walked over the entire list
292 * 1 if stopped by the user
293 * -1 if user function reported error
297 rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg);
300 * Walk list of all memsegs without performing any locking.
302 * @note This function does not perform any locking, and is only safe to call
303 * from within memory-related callback functions.
308 * Argument passed to iterator
310 * 0 if walked over the entire list
311 * 1 if stopped by the user
312 * -1 if user function reported error
316 rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg);
319 * Walk each VA-contiguous area without performing any locking.
321 * @note This function does not perform any locking, and is only safe to call
322 * from within memory-related callback functions.
327 * Argument passed to iterator
329 * 0 if walked over the entire list
330 * 1 if stopped by the user
331 * -1 if user function reported error
335 rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg);
338 * Walk each allocated memseg list without performing any locking.
340 * @note This function does not perform any locking, and is only safe to call
341 * from within memory-related callback functions.
346 * Argument passed to iterator
348 * 0 if walked over the entire list
349 * 1 if stopped by the user
350 * -1 if user function reported error
354 rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg);
357 * Return file descriptor associated with a particular memseg (if available).
359 * @note This function read-locks the memory hotplug subsystem, and thus cannot
360 * be used within memory-related callback functions.
362 * @note This returns an internal file descriptor. Performing any operations on
363 * this file descriptor is inherently dangerous, so it should be treated
364 * as read-only for all intents and purposes.
367 * A pointer to memseg for which to get file descriptor.
370 * Valid file descriptor in case of success.
371 * -1 in case of error, with ``rte_errno`` set to the following values:
372 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
373 * - ENODEV - ``ms`` fd is not available
374 * - ENOENT - ``ms`` is an unused segment
375 * - ENOTSUP - segment fd's are not supported
379 rte_memseg_get_fd(const struct rte_memseg *ms);
382 * Return file descriptor associated with a particular memseg (if available).
384 * @note This function does not perform any locking, and is only safe to call
385 * from within memory-related callback functions.
387 * @note This returns an internal file descriptor. Performing any operations on
388 * this file descriptor is inherently dangerous, so it should be treated
389 * as read-only for all intents and purposes.
392 * A pointer to memseg for which to get file descriptor.
395 * Valid file descriptor in case of success.
396 * -1 in case of error, with ``rte_errno`` set to the following values:
397 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
398 * - ENODEV - ``ms`` fd is not available
399 * - ENOENT - ``ms`` is an unused segment
400 * - ENOTSUP - segment fd's are not supported
404 rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms);
407 * Get offset into segment file descriptor associated with a particular memseg
410 * @note This function read-locks the memory hotplug subsystem, and thus cannot
411 * be used within memory-related callback functions.
414 * A pointer to memseg for which to get file descriptor.
416 * A pointer to offset value where the result will be stored.
419 * Valid file descriptor in case of success.
420 * -1 in case of error, with ``rte_errno`` set to the following values:
421 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
422 * - EINVAL - ``offset`` pointer was NULL
423 * - ENODEV - ``ms`` fd is not available
424 * - ENOENT - ``ms`` is an unused segment
425 * - ENOTSUP - segment fd's are not supported
429 rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset);
432 * Get offset into segment file descriptor associated with a particular memseg
435 * @note This function does not perform any locking, and is only safe to call
436 * from within memory-related callback functions.
439 * A pointer to memseg for which to get file descriptor.
441 * A pointer to offset value where the result will be stored.
444 * Valid file descriptor in case of success.
445 * -1 in case of error, with ``rte_errno`` set to the following values:
446 * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
447 * - EINVAL - ``offset`` pointer was NULL
448 * - ENODEV - ``ms`` fd is not available
449 * - ENOENT - ``ms`` is an unused segment
450 * - ENOTSUP - segment fd's are not supported
454 rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
459 * @b EXPERIMENTAL: this API may change without prior notice
461 * Register external memory chunk with DPDK.
463 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
466 * @note This API will not perform any DMA mapping. It is expected that user
467 * will do that themselves.
469 * @note Before accessing this memory in other processes, it needs to be
470 * attached in each of those processes by calling ``rte_extmem_attach`` in
471 * each other process.
474 * Start of virtual area to register. Must be aligned by ``page_sz``.
476 * Length of virtual area to register. Must be aligned by ``page_sz``.
478 * Array of page IOVA addresses corresponding to each page in this memory
479 * area. Can be NULL, in which case page IOVA addresses will be set to
482 * Number of elements in the iova_addrs array. Ignored if ``iova_addrs``
485 * Page size of the underlying memory
489 * - -1 in case of error, with rte_errno set to one of the following:
490 * EINVAL - one of the parameters was invalid
491 * EEXIST - memory chunk is already registered
492 * ENOSPC - no more space in internal config to store a new memory chunk
496 rte_extmem_register(void *va_addr, size_t len, rte_iova_t iova_addrs[],
497 unsigned int n_pages, size_t page_sz);
501 * @b EXPERIMENTAL: this API may change without prior notice
503 * Unregister external memory chunk with DPDK.
505 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
508 * @note This API will not perform any DMA unmapping. It is expected that user
509 * will do that themselves.
511 * @note Before calling this function, all other processes must call
512 * ``rte_extmem_detach`` to detach from the memory area.
515 * Start of virtual area to unregister
517 * Length of virtual area to unregister
521 * - -1 in case of error, with rte_errno set to one of the following:
522 * EINVAL - one of the parameters was invalid
523 * ENOENT - memory chunk was not found
527 rte_extmem_unregister(void *va_addr, size_t len);
531 * @b EXPERIMENTAL: this API may change without prior notice
533 * Attach to external memory chunk registered in another process.
535 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
538 * @note This API will not perform any DMA mapping. It is expected that user
539 * will do that themselves.
542 * Start of virtual area to register
544 * Length of virtual area to register
548 * - -1 in case of error, with rte_errno set to one of the following:
549 * EINVAL - one of the parameters was invalid
550 * ENOENT - memory chunk was not found
554 rte_extmem_attach(void *va_addr, size_t len);
558 * @b EXPERIMENTAL: this API may change without prior notice
560 * Detach from external memory chunk registered in another process.
562 * @note Using this API is mutually exclusive with ``rte_malloc`` family of
565 * @note This API will not perform any DMA unmapping. It is expected that user
566 * will do that themselves.
569 * Start of virtual area to unregister
571 * Length of virtual area to unregister
575 * - -1 in case of error, with rte_errno set to one of the following:
576 * EINVAL - one of the parameters was invalid
577 * ENOENT - memory chunk was not found
581 rte_extmem_detach(void *va_addr, size_t len);
584 * Dump the physical memory layout to a file.
586 * @note This function read-locks the memory hotplug subsystem, and thus cannot
587 * be used within memory-related callback functions.
590 * A pointer to a file for output
592 void rte_dump_physmem_layout(FILE *f);
595 * Get the total amount of available physical memory.
597 * @note This function read-locks the memory hotplug subsystem, and thus cannot
598 * be used within memory-related callback functions.
601 * The total amount of available physical memory in bytes.
603 uint64_t rte_eal_get_physmem_size(void);
606 * Get the number of memory channels.
609 * The number of memory channels on the system. The value is 0 if unknown
610 * or not the same on all devices.
612 unsigned rte_memory_get_nchannel(void);
615 * Get the number of memory ranks.
618 * The number of memory ranks on the system. The value is 0 if unknown or
619 * not the same on all devices.
621 unsigned rte_memory_get_nrank(void);
625 * @b EXPERIMENTAL: this API may change without prior notice
627 * Check if all currently allocated memory segments are compliant with
628 * supplied DMA address width.
631 * Address width to check against.
634 int rte_mem_check_dma_mask(uint8_t maskbits);
638 * @b EXPERIMENTAL: this API may change without prior notice
640 * Check if all currently allocated memory segments are compliant with
641 * supplied DMA address width. This function will use
642 * rte_memseg_walk_thread_unsafe instead of rte_memseg_walk implying
643 * memory_hotplug_lock will not be acquired avoiding deadlock during
644 * memory initialization.
646 * This function is just for EAL core memory internal use. Drivers should
647 * use the previous rte_mem_check_dma_mask.
650 * Address width to check against.
653 int rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits);
657 * @b EXPERIMENTAL: this API may change without prior notice
659 * Set dma mask to use once memory initialization is done. Previous functions
660 * rte_mem_check_dma_mask and rte_mem_check_dma_mask_thread_unsafe can not be
661 * used safely until memory has been initialized.
664 void rte_mem_set_dma_mask(uint8_t maskbits);
667 * Drivers based on uio will not load unless physical
668 * addresses are obtainable. It is only possible to get
669 * physical addresses when running as a privileged user.
672 * 1 if the system is able to obtain physical addresses.
673 * 0 if using DMA addresses through an IOMMU.
675 int rte_eal_using_phys_addrs(void);
679 * Enum indicating which kind of memory event has happened. Used by callbacks to
680 * distinguish between memory allocations and deallocations.
683 RTE_MEM_EVENT_ALLOC = 0, /**< Allocation event. */
684 RTE_MEM_EVENT_FREE, /**< Deallocation event. */
686 #define RTE_MEM_EVENT_CALLBACK_NAME_LEN 64
687 /**< maximum length of callback name */
690 * Function typedef used to register callbacks for memory events.
692 typedef void (*rte_mem_event_callback_t)(enum rte_mem_event event_type,
693 const void *addr, size_t len, void *arg);
696 * Function used to register callbacks for memory events.
698 * @note callbacks will happen while memory hotplug subsystem is write-locked,
699 * therefore some functions (e.g. `rte_memseg_walk()`) will cause a
700 * deadlock when called from within such callbacks.
702 * @note mem event callbacks not being supported is an expected error condition,
703 * so user code needs to handle this situation. In these cases, return
704 * value will be -1, and rte_errno will be set to ENOTSUP.
707 * Name associated with specified callback to be added to the list.
710 * Callback function pointer.
713 * Argument to pass to the callback.
716 * 0 on successful callback register
717 * -1 on unsuccessful callback register, with rte_errno value indicating
718 * reason for failure.
722 rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
726 * Function used to unregister callbacks for memory events.
729 * Name associated with specified callback to be removed from the list.
732 * Argument to look for among callbacks with specified callback name.
735 * 0 on successful callback unregister
736 * -1 on unsuccessful callback unregister, with rte_errno value indicating
737 * reason for failure.
741 rte_mem_event_callback_unregister(const char *name, void *arg);
744 #define RTE_MEM_ALLOC_VALIDATOR_NAME_LEN 64
745 /**< maximum length of alloc validator name */
747 * Function typedef used to register memory allocation validation callbacks.
749 * Returning 0 will allow allocation attempt to continue. Returning -1 will
750 * prevent allocation from succeeding.
752 typedef int (*rte_mem_alloc_validator_t)(int socket_id,
753 size_t cur_limit, size_t new_len);
756 * @brief Register validator callback for memory allocations.
758 * Callbacks registered by this function will be called right before memory
759 * allocator is about to trigger allocation of more pages from the system if
760 * said allocation will bring total memory usage above specified limit on
761 * specified socket. User will be able to cancel pending allocation if callback
764 * @note callbacks will happen while memory hotplug subsystem is write-locked,
765 * therefore some functions (e.g. `rte_memseg_walk()`) will cause a
766 * deadlock when called from within such callbacks.
768 * @note validator callbacks not being supported is an expected error condition,
769 * so user code needs to handle this situation. In these cases, return
770 * value will be -1, and rte_errno will be set to ENOTSUP.
773 * Name associated with specified callback to be added to the list.
776 * Callback function pointer.
779 * Socket ID on which to watch for allocations.
782 * Limit above which to trigger callbacks.
785 * 0 on successful callback register
786 * -1 on unsuccessful callback register, with rte_errno value indicating
787 * reason for failure.
791 rte_mem_alloc_validator_register(const char *name,
792 rte_mem_alloc_validator_t clb, int socket_id, size_t limit);
795 * @brief Unregister validator callback for memory allocations.
798 * Name associated with specified callback to be removed from the list.
801 * Socket ID on which to watch for allocations.
804 * 0 on successful callback unregister
805 * -1 on unsuccessful callback unregister, with rte_errno value indicating
806 * reason for failure.
810 rte_mem_alloc_validator_unregister(const char *name, int socket_id);
816 #endif /* _RTE_MEMORY_H_ */