-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
*/
#ifndef _RTE_MEMORY_H_
#include <stddef.h>
#include <stdio.h>
-#ifdef RTE_EXEC_ENV_LINUXAPP
-#include <exec-env/rte_dom0_common.h>
-#endif
-
#ifdef __cplusplus
extern "C" {
#endif
#include <rte_common.h>
+#include <rte_compat.h>
+#include <rte_config.h>
+
+/* forward declaration for pointers */
+struct rte_memseg_list;
+__extension__
enum rte_page_sizes {
RTE_PGSIZE_4K = 1ULL << 12,
RTE_PGSIZE_64K = 1ULL << 16,
*/
#define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
-typedef uint64_t phys_addr_t; /**< Physical address definition. */
+typedef uint64_t phys_addr_t; /**< Physical address. */
#define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1)
+/**
+ * IO virtual address type.
+ * When the physical addressing mode (IOVA as PA) is in use,
+ * the translation from an IO virtual address (IOVA) to a physical address
+ * is a direct mapping, i.e. the same value.
+ * Otherwise, in virtual mode (IOVA as VA), an IOMMU may do the translation.
+ */
+typedef uint64_t rte_iova_t;
+#define RTE_BAD_IOVA ((rte_iova_t)-1)
/**
* Physical memory segment descriptor.
*/
+#define RTE_MEMSEG_FLAG_DO_NOT_FREE (1 << 0)
+/**< Prevent this segment from being freed back to the OS. */
struct rte_memseg {
- phys_addr_t phys_addr; /**< Start physical address. */
+ RTE_STD_C11
+ union {
+ phys_addr_t phys_addr; /**< deprecated - Start physical address. */
+ rte_iova_t iova; /**< Start IO address. */
+ };
+ RTE_STD_C11
union {
void *addr; /**< Start virtual address. */
uint64_t addr_64; /**< Makes sure addr is always 64 bits */
};
-#ifdef RTE_LIBRTE_IVSHMEM
- phys_addr_t ioremap_addr; /**< Real physical address inside the VM */
-#endif
size_t len; /**< Length of the segment. */
uint64_t hugepage_sz; /**< The pagesize of underlying memory */
int32_t socket_id; /**< NUMA socket ID. */
uint32_t nchannel; /**< Number of channels. */
uint32_t nrank; /**< Number of ranks. */
-#ifdef RTE_LIBRTE_XEN_DOM0
- /**< store segment MFNs */
- uint64_t mfn[DOM0_NUM_MEMBLOCK];
-#endif
+ uint32_t flags; /**< Memseg-specific flags */
} __rte_packed;
/**
* @param virt
* The virtual address.
* @return
- * The physical address or RTE_BAD_PHYS_ADDR on error.
+ * The physical address or RTE_BAD_IOVA on error.
*/
phys_addr_t rte_mem_virt2phy(const void *virt);
/**
- * Get the layout of the available physical memory.
+ * Get IO virtual address of any mapped virtual address in the current process.
+ *
+ * @param virt
+ * The virtual address.
+ * @return
+ * The IO address or RTE_BAD_IOVA on error.
+ */
+rte_iova_t rte_mem_virt2iova(const void *virt);
+
+/**
+ * Get virtual memory address corresponding to iova address.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @param iova
+ * The iova address.
+ * @return
+ * Virtual address corresponding to iova address (or NULL if address does not
+ * exist within DPDK memory map).
+ */
+__rte_experimental void *
+rte_mem_iova2virt(rte_iova_t iova);
+
+/**
+ * Get memseg to which a particular virtual address belongs.
+ *
+ * @param virt
+ * The virtual address.
+ * @param msl
+ * The memseg list in which to look up based on ``virt`` address
+ * (can be NULL).
+ * @return
+ * Memseg pointer on success, or NULL on error.
+ */
+__rte_experimental struct rte_memseg *
+rte_mem_virt2memseg(const void *virt, const struct rte_memseg_list *msl);
+
+/**
+ * Get memseg list corresponding to virtual memory address.
+ *
+ * @param virt
+ * The virtual address.
+ * @return
+ * Memseg list to which this virtual address belongs to.
+ */
+__rte_experimental struct rte_memseg_list *
+rte_mem_virt2memseg_list(const void *virt);
+
+/**
+ * Memseg walk function prototype.
+ *
+ * Returning 0 will continue walk
+ * Returning 1 will stop the walk
+ * Returning -1 will stop the walk and report error
+ */
+typedef int (*rte_memseg_walk_t)(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, void *arg);
+
+/**
+ * Memseg contig walk function prototype. This will trigger a callback on every
+ * VA-contiguous are starting at memseg ``ms``, so total valid VA space at each
+ * callback call will be [``ms->addr``, ``ms->addr + len``).
+ *
+ * Returning 0 will continue walk
+ * Returning 1 will stop the walk
+ * Returning -1 will stop the walk and report error
+ */
+typedef int (*rte_memseg_contig_walk_t)(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, size_t len, void *arg);
+
+/**
+ * Memseg list walk function prototype. This will trigger a callback on every
+ * allocated memseg list.
+ *
+ * Returning 0 will continue walk
+ * Returning 1 will stop the walk
+ * Returning -1 will stop the walk and report error
+ */
+typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl,
+ void *arg);
+
+/**
+ * Walk list of all memsegs.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @note This function will also walk through externally allocated segments. It
+ * is up to the user to decide whether to skip through these segments.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_walk(rte_memseg_walk_t func, void *arg);
+
+/**
+ * Walk each VA-contiguous area.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @note This function will also walk through externally allocated segments. It
+ * is up to the user to decide whether to skip through these segments.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg);
+
+/**
+ * Walk each allocated memseg list.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @note This function will also walk through externally allocated segments. It
+ * is up to the user to decide whether to skip through these segments.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg);
+
+/**
+ * Walk list of all memsegs without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg);
+
+/**
+ * Walk each VA-contiguous area without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg);
+
+/**
+ * Walk each allocated memseg list without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg);
+
+/**
+ * Return file descriptor associated with a particular memseg (if available).
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @note This returns an internal file descriptor. Performing any operations on
+ * this file descriptor is inherently dangerous, so it should be treated
+ * as read-only for all intents and purposes.
+ *
+ * @param ms
+ * A pointer to memseg for which to get file descriptor.
+ *
+ * @return
+ * Valid file descriptor in case of success.
+ * -1 in case of error, with ``rte_errno`` set to the following values:
+ * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
+ * - ENODEV - ``ms`` fd is not available
+ * - ENOENT - ``ms`` is an unused segment
+ * - ENOTSUP - segment fd's are not supported
+ */
+int __rte_experimental
+rte_memseg_get_fd(const struct rte_memseg *ms);
+
+/**
+ * Return file descriptor associated with a particular memseg (if available).
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @note This returns an internal file descriptor. Performing any operations on
+ * this file descriptor is inherently dangerous, so it should be treated
+ * as read-only for all intents and purposes.
+ *
+ * @param ms
+ * A pointer to memseg for which to get file descriptor.
+ *
+ * @return
+ * Valid file descriptor in case of success.
+ * -1 in case of error, with ``rte_errno`` set to the following values:
+ * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
+ * - ENODEV - ``ms`` fd is not available
+ * - ENOENT - ``ms`` is an unused segment
+ * - ENOTSUP - segment fd's are not supported
+ */
+int __rte_experimental
+rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms);
+
+/**
+ * Get offset into segment file descriptor associated with a particular memseg
+ * (if available).
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @param ms
+ * A pointer to memseg for which to get file descriptor.
+ * @param offset
+ * A pointer to offset value where the result will be stored.
+ *
+ * @return
+ * Valid file descriptor in case of success.
+ * -1 in case of error, with ``rte_errno`` set to the following values:
+ * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
+ * - EINVAL - ``offset`` pointer was NULL
+ * - ENODEV - ``ms`` fd is not available
+ * - ENOENT - ``ms`` is an unused segment
+ * - ENOTSUP - segment fd's are not supported
+ */
+int __rte_experimental
+rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset);
+
+/**
+ * Get offset into segment file descriptor associated with a particular memseg
+ * (if available).
*
- * It can be useful for an application to have the full physical
- * memory layout to decide the size of a memory zone to reserve. This
- * table is stored in rte_config (see rte_eal_get_configuration()).
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param ms
+ * A pointer to memseg for which to get file descriptor.
+ * @param offset
+ * A pointer to offset value where the result will be stored.
*
* @return
- * - On success, return a pointer to a read-only table of struct
- * rte_physmem_desc elements, containing the layout of all
- * addressable physical memory. The last element of the table
- * contains a NULL address.
- * - On error, return NULL. This should not happen since it is a fatal
- * error that will probably cause the entire system to panic.
+ * Valid file descriptor in case of success.
+ * -1 in case of error, with ``rte_errno`` set to the following values:
+ * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
+ * - EINVAL - ``offset`` pointer was NULL
+ * - ENODEV - ``ms`` fd is not available
+ * - ENOENT - ``ms`` is an unused segment
+ * - ENOTSUP - segment fd's are not supported
*/
-const struct rte_memseg *rte_eal_get_physmem_layout(void);
+int __rte_experimental
+rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
+ size_t *offset);
/**
- * Dump the physical memory layout to the console.
+ * Dump the physical memory layout to a file.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
*
* @param f
* A pointer to a file for output
/**
* Get the total amount of available physical memory.
*
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
* @return
* The total amount of available physical memory in bytes.
*/
*/
unsigned rte_memory_get_nrank(void);
-#ifdef RTE_LIBRTE_XEN_DOM0
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Check if all currently allocated memory segments are compliant with
+ * supplied DMA address width.
+ *
+ * @param maskbits
+ * Address width to check against.
+ */
+int __rte_experimental rte_mem_check_dma_mask(uint8_t maskbits);
-/**< Internal use only - should DOM0 memory mapping be used */
-int rte_xen_dom0_supported(void);
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Check if all currently allocated memory segments are compliant with
+ * supplied DMA address width. This function will use
+ * rte_memseg_walk_thread_unsafe instead of rte_memseg_walk implying
+ * memory_hotplug_lock will not be acquired avoiding deadlock during
+ * memory initialization.
+ *
+ * This function is just for EAL core memory internal use. Drivers should
+ * use the previous rte_mem_check_dma_mask.
+ *
+ * @param maskbits
+ * Address width to check against.
+ */
+int __rte_experimental rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits);
-/**< Internal use only - phys to virt mapping for xen */
-phys_addr_t rte_xen_mem_phy2mch(uint32_t, const phys_addr_t);
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Set dma mask to use once memory initialization is done. Previous functions
+ * rte_mem_check_dma_mask and rte_mem_check_dma_mask_thread_unsafe can not be
+ * used safely until memory has been initialized.
+ */
+void __rte_experimental rte_mem_set_dma_mask(uint8_t maskbits);
/**
- * Return the physical address of elt, which is an element of the pool mp.
+ * Drivers based on uio will not load unless physical
+ * addresses are obtainable. It is only possible to get
+ * physical addresses when running as a privileged user.
*
- * @param memseg_id
- * The mempool is from which memory segment.
- * @param phy_addr
- * physical address of elt.
+ * @return
+ * 1 if the system is able to obtain physical addresses.
+ * 0 if using DMA addresses through an IOMMU.
+ */
+int rte_eal_using_phys_addrs(void);
+
+
+/**
+ * Enum indicating which kind of memory event has happened. Used by callbacks to
+ * distinguish between memory allocations and deallocations.
+ */
+enum rte_mem_event {
+ RTE_MEM_EVENT_ALLOC = 0, /**< Allocation event. */
+ RTE_MEM_EVENT_FREE, /**< Deallocation event. */
+};
+#define RTE_MEM_EVENT_CALLBACK_NAME_LEN 64
+/**< maximum length of callback name */
+
+/**
+ * Function typedef used to register callbacks for memory events.
+ */
+typedef void (*rte_mem_event_callback_t)(enum rte_mem_event event_type,
+ const void *addr, size_t len, void *arg);
+
+/**
+ * Function used to register callbacks for memory events.
+ *
+ * @note callbacks will happen while memory hotplug subsystem is write-locked,
+ * therefore some functions (e.g. `rte_memseg_walk()`) will cause a
+ * deadlock when called from within such callbacks.
+ *
+ * @note mem event callbacks not being supported is an expected error condition,
+ * so user code needs to handle this situation. In these cases, return
+ * value will be -1, and rte_errno will be set to ENOTSUP.
+ *
+ * @param name
+ * Name associated with specified callback to be added to the list.
+ *
+ * @param clb
+ * Callback function pointer.
+ *
+ * @param arg
+ * Argument to pass to the callback.
*
* @return
- * The physical address or error.
- */
-static inline phys_addr_t
-rte_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr)
-{
- if (rte_xen_dom0_supported())
- return rte_xen_mem_phy2mch(memseg_id, phy_addr);
- else
- return phy_addr;
-}
+ * 0 on successful callback register
+ * -1 on unsuccessful callback register, with rte_errno value indicating
+ * reason for failure.
+ */
+int __rte_experimental
+rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
+ void *arg);
/**
- * Memory init for supporting application running on Xen domain0.
+ * Function used to unregister callbacks for memory events.
*
- * @param void
+ * @param name
+ * Name associated with specified callback to be removed from the list.
+ *
+ * @param arg
+ * Argument to look for among callbacks with specified callback name.
*
* @return
- * 0: successfully
- * negative: error
+ * 0 on successful callback unregister
+ * -1 on unsuccessful callback unregister, with rte_errno value indicating
+ * reason for failure.
*/
-int rte_xen_dom0_memory_init(void);
+int __rte_experimental
+rte_mem_event_callback_unregister(const char *name, void *arg);
+
+#define RTE_MEM_ALLOC_VALIDATOR_NAME_LEN 64
+/**< maximum length of alloc validator name */
/**
- * Attach to memory setments of primary process on Xen domain0.
+ * Function typedef used to register memory allocation validation callbacks.
*
- * @param void
+ * Returning 0 will allow allocation attempt to continue. Returning -1 will
+ * prevent allocation from succeeding.
+ */
+typedef int (*rte_mem_alloc_validator_t)(int socket_id,
+ size_t cur_limit, size_t new_len);
+
+/**
+ * @brief Register validator callback for memory allocations.
+ *
+ * Callbacks registered by this function will be called right before memory
+ * allocator is about to trigger allocation of more pages from the system if
+ * said allocation will bring total memory usage above specified limit on
+ * specified socket. User will be able to cancel pending allocation if callback
+ * returns -1.
+ *
+ * @note callbacks will happen while memory hotplug subsystem is write-locked,
+ * therefore some functions (e.g. `rte_memseg_walk()`) will cause a
+ * deadlock when called from within such callbacks.
+ *
+ * @note validator callbacks not being supported is an expected error condition,
+ * so user code needs to handle this situation. In these cases, return
+ * value will be -1, and rte_errno will be set to ENOTSUP.
+ *
+ * @param name
+ * Name associated with specified callback to be added to the list.
+ *
+ * @param clb
+ * Callback function pointer.
+ *
+ * @param socket_id
+ * Socket ID on which to watch for allocations.
+ *
+ * @param limit
+ * Limit above which to trigger callbacks.
*
* @return
- * 0: successfully
- * negative: error
+ * 0 on successful callback register
+ * -1 on unsuccessful callback register, with rte_errno value indicating
+ * reason for failure.
*/
-int rte_xen_dom0_memory_attach(void);
-#else
-static inline int rte_xen_dom0_supported(void)
-{
- return 0;
-}
+int __rte_experimental
+rte_mem_alloc_validator_register(const char *name,
+ rte_mem_alloc_validator_t clb, int socket_id, size_t limit);
-static inline phys_addr_t
-rte_mem_phy2mch(uint32_t memseg_id __rte_unused, const phys_addr_t phy_addr)
-{
- return phy_addr;
-}
-#endif
+/**
+ * @brief Unregister validator callback for memory allocations.
+ *
+ * @param name
+ * Name associated with specified callback to be removed from the list.
+ *
+ * @param socket_id
+ * Socket ID on which to watch for allocations.
+ *
+ * @return
+ * 0 on successful callback unregister
+ * -1 on unsuccessful callback unregister, with rte_errno value indicating
+ * reason for failure.
+ */
+int __rte_experimental
+rte_mem_alloc_validator_unregister(const char *name, int socket_id);
#ifdef __cplusplus
}