#include <rte_branch_prediction.h>
#include <rte_common.h>
+#include <rte_eal_memconfig.h>
#include <rte_errno.h>
#include <rte_malloc.h>
#include <rte_memory.h>
}
/**
- * Releass resources of detached MR having no online entry.
+ * Release resources of detached MR having no online entry.
*
* @param dev
* Pointer to Ethernet device.
}
/**
- * Create a new global Memroy Region (MR) for a missing virtual address.
- * Register entire virtually contiguous memory chunk around the address.
+ * Create a new global Memory Region (MR) for a missing virtual address.
+ * This API should be called on a secondary process, then a request is sent to
+ * the primary process in order to create a MR for the address. As the global MR
+ * list is on the shared memory, following LKey lookup should succeed unless the
+ * request fails.
*
* @param dev
* Pointer to Ethernet device.
* Searched LKey on success, UINT32_MAX on failure and rte_errno is set.
*/
static uint32_t
-mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry,
- uintptr_t addr)
+mlx4_mr_create_secondary(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry,
+ uintptr_t addr)
+{
+ struct mlx4_priv *priv = dev->data->dev_private;
+ int ret;
+
+ DEBUG("port %u requesting MR creation for address (%p)",
+ dev->data->port_id, (void *)addr);
+ ret = mlx4_mp_req_mr_create(dev, addr);
+ if (ret) {
+ DEBUG("port %u fail to request MR creation for address (%p)",
+ dev->data->port_id, (void *)addr);
+ return UINT32_MAX;
+ }
+ rte_rwlock_read_lock(&priv->mr.rwlock);
+ /* Fill in output data. */
+ mr_lookup_dev(dev, entry, addr);
+ /* Lookup can't fail. */
+ assert(entry->lkey != UINT32_MAX);
+ rte_rwlock_read_unlock(&priv->mr.rwlock);
+ DEBUG("port %u MR CREATED by primary process for %p:\n"
+ " [0x%" PRIxPTR ", 0x%" PRIxPTR "), lkey=0x%x",
+ dev->data->port_id, (void *)addr,
+ entry->start, entry->end, entry->lkey);
+ return entry->lkey;
+}
+
+/**
+ * Create a new global Memory Region (MR) for a missing virtual address.
+ * Register entire virtually contiguous memory chunk around the address.
+ * This must be called from the primary process.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param[out] entry
+ * Pointer to returning MR cache entry, found in the global cache or newly
+ * created. If failed to create one, this will not be updated.
+ * @param addr
+ * Target virtual address to register.
+ *
+ * @return
+ * Searched LKey on success, UINT32_MAX on failure and rte_errno is set.
+ */
+uint32_t
+mlx4_mr_create_primary(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry,
+ uintptr_t addr)
{
struct mlx4_priv *priv = dev->data->dev_private;
- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
const struct rte_memseg_list *msl;
const struct rte_memseg *ms;
struct mlx4_mr *mr = NULL;
DEBUG("port %u creating a MR using address (%p)",
dev->data->port_id, (void *)addr);
- if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
- WARN("port %u using address (%p) of unregistered mempool"
- " in secondary process, please create mempool"
- " before rte_eth_dev_start()",
- dev->data->port_id, (void *)addr);
- rte_errno = EPERM;
- goto err_nolock;
- }
/*
* Release detached MRs if any. This can't be called with holding either
* memory_hotplug_lock or priv->mr.rwlock. MRs on the free list have
*/
mlx4_mr_garbage_collect(dev);
/*
- * Find out a contiguous virtual address chunk in use, to which the
- * given address belongs, in order to register maximum range. In the
- * best case where mempools are not dynamically recreated and
+ * If enabled, find out a contiguous virtual address chunk in use, to
+ * which the given address belongs, in order to register maximum range.
+ * In the best case where mempools are not dynamically recreated and
* '--socket-mem' is specified as an EAL option, it is very likely to
* have only one MR(LKey) per a socket and per a hugepage-size even
- * though the system memory is highly fragmented.
+ * though the system memory is highly fragmented. As the whole memory
+ * chunk will be pinned by kernel, it can't be reused unless entire
+ * chunk is freed from EAL.
+ *
+ * If disabled, just register one memseg (page). Then, memory
+ * consumption will be minimized but it may drop performance if there
+ * are many MRs to lookup on the datapath.
*/
- if (!rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data)) {
+ if (!priv->mr_ext_memseg_en) {
+ data.msl = rte_mem_virt2memseg_list((void *)addr);
+ data.start = RTE_ALIGN_FLOOR(addr, data.msl->page_sz);
+ data.end = data.start + data.msl->page_sz;
+ } else if (!rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data)) {
WARN("port %u unable to find virtually contiguous"
" chunk for address (%p)."
" rte_memseg_contig_walk() failed.",
bmp_mem = RTE_PTR_ALIGN_CEIL(mr + 1, RTE_CACHE_LINE_SIZE);
mr->ms_bmp = rte_bitmap_init(ms_n, bmp_mem, bmp_size);
if (mr->ms_bmp == NULL) {
- WARN("port %u unable to initialize bitamp for a new MR of"
+ WARN("port %u unable to initialize bitmap for a new MR of"
" address (%p).",
dev->data->port_id, (void *)addr);
rte_errno = EINVAL;
* just single page. If not, go on with the big chunk atomically from
* here.
*/
- rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+ rte_mcfg_mem_read_lock();
data_re = data;
if (len > msl->page_sz &&
!rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data_re)) {
*/
data.start = RTE_ALIGN_FLOOR(addr, msl->page_sz);
data.end = data.start + msl->page_sz;
- rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+ rte_mcfg_mem_read_unlock();
mr_free(mr);
goto alloc_resources;
}
DEBUG("port %u found MR for %p on final lookup, abort",
dev->data->port_id, (void *)addr);
rte_rwlock_write_unlock(&priv->mr.rwlock);
- rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+ rte_mcfg_mem_read_unlock();
/*
* Must be unlocked before calling rte_free() because
* mlx4_mr_mem_event_free_cb() can be called inside.
/* Lookup can't fail. */
assert(entry->lkey != UINT32_MAX);
rte_rwlock_write_unlock(&priv->mr.rwlock);
- rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+ rte_mcfg_mem_read_unlock();
return entry->lkey;
err_mrlock:
rte_rwlock_write_unlock(&priv->mr.rwlock);
err_memlock:
- rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+ rte_mcfg_mem_read_unlock();
err_nolock:
/*
* In case of error, as this can be called in a datapath, a warning
return UINT32_MAX;
}
+/**
+ * Create a new global Memory Region (MR) for a missing virtual address.
+ * This can be called from primary and secondary process.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param[out] entry
+ * Pointer to returning MR cache entry, found in the global cache or newly
+ * created. If failed to create one, this will not be updated.
+ * @param addr
+ * Target virtual address to register.
+ *
+ * @return
+ * Searched LKey on success, UINT32_MAX on failure and rte_errno is set.
+ */
+static uint32_t
+mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry,
+ uintptr_t addr)
+{
+ uint32_t ret = 0;
+
+ switch (rte_eal_process_type()) {
+ case RTE_PROC_PRIMARY:
+ ret = mlx4_mr_create_primary(dev, entry, addr);
+ break;
+ case RTE_PROC_SECONDARY:
+ ret = mlx4_mr_create_secondary(dev, entry, addr);
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
/**
* Rebuild the global B-tree cache of device from the original MR list.
*
mlx4_mr_release(struct rte_eth_dev *dev)
{
struct mlx4_priv *priv = dev->data->dev_private;
- struct mlx4_mr *mr_next = LIST_FIRST(&priv->mr.mr_list);
+ struct mlx4_mr *mr_next;
/* Remove from memory callback device list. */
rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock);
#endif
rte_rwlock_write_lock(&priv->mr.rwlock);
/* Detach from MR list and move to free list. */
+ mr_next = LIST_FIRST(&priv->mr.mr_list);
while (mr_next != NULL) {
struct mlx4_mr *mr = mr_next;