X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx4%2Fmlx4_mr.c;h=6b2f0cf1877ea571746066cc58d65bfadfc35879;hb=dd8384a4f5dd7e04cd3bc7ded0f0a6d9c79d25b4;hp=01894faecf5d833dab952364cd883c9a80b00d93;hpb=0203d33a105982da3eeff5a890f4d60f23234304;p=dpdk.git diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c index 01894faecf..6b2f0cf187 100644 --- a/drivers/net/mlx4/mlx4_mr.c +++ b/drivers/net/mlx4/mlx4_mr.c @@ -8,7 +8,6 @@ * Memory management functions for mlx4 driver. */ -#include #include #include #include @@ -26,6 +25,7 @@ #include #include +#include #include #include #include @@ -112,12 +112,12 @@ mr_btree_lookup(struct mlx4_mr_btree *bt, uint16_t *idx, uintptr_t addr) uint16_t n; uint16_t base = 0; - assert(bt != NULL); + MLX4_ASSERT(bt != NULL); lkp_tbl = *bt->table; n = bt->len; /* First entry must be NULL for comparison. */ - assert(bt->len > 0 || (lkp_tbl[0].start == 0 && - lkp_tbl[0].lkey == UINT32_MAX)); + MLX4_ASSERT(bt->len > 0 || (lkp_tbl[0].start == 0 && + lkp_tbl[0].lkey == UINT32_MAX)); /* Binary search. */ do { register uint16_t delta = n >> 1; @@ -129,7 +129,7 @@ mr_btree_lookup(struct mlx4_mr_btree *bt, uint16_t *idx, uintptr_t addr) n -= delta; } } while (n > 1); - assert(addr >= lkp_tbl[base].start); + MLX4_ASSERT(addr >= lkp_tbl[base].start); *idx = base; if (addr < lkp_tbl[base].end) return lkp_tbl[base].lkey; @@ -155,9 +155,9 @@ mr_btree_insert(struct mlx4_mr_btree *bt, struct mlx4_mr_cache *entry) uint16_t idx = 0; size_t shift; - assert(bt != NULL); - assert(bt->len <= bt->size); - assert(bt->len > 0); + MLX4_ASSERT(bt != NULL); + MLX4_ASSERT(bt->len <= bt->size); + MLX4_ASSERT(bt->len > 0); lkp_tbl = *bt->table; /* Find out the slot for insertion. */ if (mr_btree_lookup(bt, &idx, entry->start) != UINT32_MAX) { @@ -241,7 +241,7 @@ mlx4_mr_btree_free(struct mlx4_mr_btree *bt) memset(bt, 0, sizeof(*bt)); } -#ifndef NDEBUG +#ifdef RTE_LIBRTE_MLX4_DEBUG /** * Dump all the entries in a B-tree * @@ -293,9 +293,9 @@ mr_find_next_chunk(struct mlx4_mr *mr, struct mlx4_mr_cache *entry, if (mr->msl == NULL) { struct ibv_mr *ibv_mr = mr->ibv_mr; - assert(mr->ms_bmp_n == 1); - assert(mr->ms_n == 1); - assert(base_idx == 0); + MLX4_ASSERT(mr->ms_bmp_n == 1); + MLX4_ASSERT(mr->ms_n == 1); + MLX4_ASSERT(base_idx == 0); /* * Can't search it from memseg list but get it directly from * verbs MR as there's only one chunk. @@ -314,7 +314,7 @@ mr_find_next_chunk(struct mlx4_mr *mr, struct mlx4_mr_cache *entry, msl = mr->msl; ms = rte_fbarray_get(&msl->memseg_arr, mr->ms_base_idx + idx); - assert(msl->page_sz == ms->hugepage_sz); + MLX4_ASSERT(msl->page_sz == ms->hugepage_sz); if (!start) start = ms->addr_64; end = ms->addr_64 + ms->hugepage_sz; @@ -451,8 +451,8 @@ mr_lookup_dev(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, if (mr != NULL) lkey = entry->lkey; } - assert(lkey == UINT32_MAX || (addr >= entry->start && - addr < entry->end)); + MLX4_ASSERT(lkey == UINT32_MAX || (addr >= entry->start && + addr < entry->end)); return lkey; } @@ -477,7 +477,7 @@ mr_free(struct mlx4_mr *mr) } /** - * Releass resources of detached MR having no online entry. + * Release resources of detached MR having no online entry. * * @param dev * Pointer to Ethernet device. @@ -490,7 +490,7 @@ mlx4_mr_garbage_collect(struct rte_eth_dev *dev) struct mlx4_mr_list free_list = LIST_HEAD_INITIALIZER(free_list); /* Must be called from the primary process. */ - assert(rte_eal_process_type() == RTE_PROC_PRIMARY); + MLX4_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); /* * MR can't be freed with holding the lock because rte_free() could call * memory free callback function. This will be a deadlock situation. @@ -527,8 +527,11 @@ mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl, } /** - * Create a new global Memroy Region (MR) for a missing virtual address. - * Register entire virtually contiguous memory chunk around the address. + * Create a new global Memory Region (MR) for a missing virtual address. + * This API should be called on a secondary process, then a request is sent to + * the primary process in order to create a MR for the address. As the global MR + * list is on the shared memory, following LKey lookup should succeed unless the + * request fails. * * @param dev * Pointer to Ethernet device. @@ -542,11 +545,54 @@ mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl, * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. */ static uint32_t -mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, - uintptr_t addr) +mlx4_mr_create_secondary(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, + uintptr_t addr) +{ + struct mlx4_priv *priv = dev->data->dev_private; + int ret; + + DEBUG("port %u requesting MR creation for address (%p)", + dev->data->port_id, (void *)addr); + ret = mlx4_mp_req_mr_create(dev, addr); + if (ret) { + DEBUG("port %u fail to request MR creation for address (%p)", + dev->data->port_id, (void *)addr); + return UINT32_MAX; + } + rte_rwlock_read_lock(&priv->mr.rwlock); + /* Fill in output data. */ + mr_lookup_dev(dev, entry, addr); + /* Lookup can't fail. */ + MLX4_ASSERT(entry->lkey != UINT32_MAX); + rte_rwlock_read_unlock(&priv->mr.rwlock); + DEBUG("port %u MR CREATED by primary process for %p:\n" + " [0x%" PRIxPTR ", 0x%" PRIxPTR "), lkey=0x%x", + dev->data->port_id, (void *)addr, + entry->start, entry->end, entry->lkey); + return entry->lkey; +} + +/** + * Create a new global Memory Region (MR) for a missing virtual address. + * Register entire virtually contiguous memory chunk around the address. + * This must be called from the primary process. + * + * @param dev + * Pointer to Ethernet device. + * @param[out] entry + * Pointer to returning MR cache entry, found in the global cache or newly + * created. If failed to create one, this will not be updated. + * @param addr + * Target virtual address to register. + * + * @return + * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. + */ +uint32_t +mlx4_mr_create_primary(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, + uintptr_t addr) { struct mlx4_priv *priv = dev->data->dev_private; - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; const struct rte_memseg_list *msl; const struct rte_memseg *ms; struct mlx4_mr *mr = NULL; @@ -563,14 +609,6 @@ mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, DEBUG("port %u creating a MR using address (%p)", dev->data->port_id, (void *)addr); - if (rte_eal_process_type() != RTE_PROC_PRIMARY) { - WARN("port %u using address (%p) of unregistered mempool" - " in secondary process, please create mempool" - " before rte_eth_dev_start()", - dev->data->port_id, (void *)addr); - rte_errno = EPERM; - goto err_nolock; - } /* * Release detached MRs if any. This can't be called with holding either * memory_hotplug_lock or priv->mr.rwlock. MRs on the free list have @@ -580,14 +618,24 @@ mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, */ mlx4_mr_garbage_collect(dev); /* - * Find out a contiguous virtual address chunk in use, to which the - * given address belongs, in order to register maximum range. In the - * best case where mempools are not dynamically recreated and + * If enabled, find out a contiguous virtual address chunk in use, to + * which the given address belongs, in order to register maximum range. + * In the best case where mempools are not dynamically recreated and * '--socket-mem' is specified as an EAL option, it is very likely to * have only one MR(LKey) per a socket and per a hugepage-size even - * though the system memory is highly fragmented. + * though the system memory is highly fragmented. As the whole memory + * chunk will be pinned by kernel, it can't be reused unless entire + * chunk is freed from EAL. + * + * If disabled, just register one memseg (page). Then, memory + * consumption will be minimized but it may drop performance if there + * are many MRs to lookup on the datapath. */ - if (!rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data)) { + if (!priv->mr_ext_memseg_en) { + data.msl = rte_mem_virt2memseg_list((void *)addr); + data.start = RTE_ALIGN_FLOOR(addr, data.msl->page_sz); + data.end = data.start + data.msl->page_sz; + } else if (!rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data)) { WARN("port %u unable to find virtually contiguous" " chunk for address (%p)." " rte_memseg_contig_walk() failed.", @@ -597,12 +645,12 @@ mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, } alloc_resources: /* Addresses must be page-aligned. */ - assert(rte_is_aligned((void *)data.start, data.msl->page_sz)); - assert(rte_is_aligned((void *)data.end, data.msl->page_sz)); + MLX4_ASSERT(rte_is_aligned((void *)data.start, data.msl->page_sz)); + MLX4_ASSERT(rte_is_aligned((void *)data.end, data.msl->page_sz)); msl = data.msl; ms = rte_mem_virt2memseg((void *)data.start, msl); len = data.end - data.start; - assert(msl->page_sz == ms->hugepage_sz); + MLX4_ASSERT(msl->page_sz == ms->hugepage_sz); /* Number of memsegs in the range. */ ms_n = len / msl->page_sz; DEBUG("port %u extending %p to [0x%" PRIxPTR ", 0x%" PRIxPTR ")," @@ -633,7 +681,7 @@ alloc_resources: bmp_mem = RTE_PTR_ALIGN_CEIL(mr + 1, RTE_CACHE_LINE_SIZE); mr->ms_bmp = rte_bitmap_init(ms_n, bmp_mem, bmp_size); if (mr->ms_bmp == NULL) { - WARN("port %u unable to initialize bitamp for a new MR of" + WARN("port %u unable to initialize bitmap for a new MR of" " address (%p).", dev->data->port_id, (void *)addr); rte_errno = EINVAL; @@ -647,7 +695,7 @@ alloc_resources: * just single page. If not, go on with the big chunk atomically from * here. */ - rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + rte_mcfg_mem_read_lock(); data_re = data; if (len > msl->page_sz && !rte_memseg_contig_walk(mr_find_contig_memsegs_cb, &data_re)) { @@ -665,11 +713,11 @@ alloc_resources: */ data.start = RTE_ALIGN_FLOOR(addr, msl->page_sz); data.end = data.start + msl->page_sz; - rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + rte_mcfg_mem_read_unlock(); mr_free(mr); goto alloc_resources; } - assert(data.msl == data_re.msl); + MLX4_ASSERT(data.msl == data_re.msl); rte_rwlock_write_lock(&priv->mr.rwlock); /* * Check the address is really missing. If other thread already created @@ -685,7 +733,7 @@ alloc_resources: DEBUG("port %u found MR for %p on final lookup, abort", dev->data->port_id, (void *)addr); rte_rwlock_write_unlock(&priv->mr.rwlock); - rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + rte_mcfg_mem_read_unlock(); /* * Must be unlocked before calling rte_free() because * mlx4_mr_mem_event_free_cb() can be called inside. @@ -722,7 +770,7 @@ alloc_resources: } len = data.end - data.start; mr->ms_bmp_n = len / msl->page_sz; - assert(ms_idx_shift + mr->ms_bmp_n <= ms_n); + MLX4_ASSERT(ms_idx_shift + mr->ms_bmp_n <= ms_n); /* * Finally create a verbs MR for the memory chunk. ibv_reg_mr() can be * called with holding the memory lock because it doesn't use @@ -737,8 +785,8 @@ alloc_resources: rte_errno = EINVAL; goto err_mrlock; } - assert((uintptr_t)mr->ibv_mr->addr == data.start); - assert(mr->ibv_mr->length == len); + MLX4_ASSERT((uintptr_t)mr->ibv_mr->addr == data.start); + MLX4_ASSERT(mr->ibv_mr->length == len); LIST_INSERT_HEAD(&priv->mr.mr_list, mr, mr); DEBUG("port %u MR CREATED (%p) for %p:\n" " [0x%" PRIxPTR ", 0x%" PRIxPTR ")," @@ -751,14 +799,14 @@ alloc_resources: /* Fill in output data. */ mr_lookup_dev(dev, entry, addr); /* Lookup can't fail. */ - assert(entry->lkey != UINT32_MAX); + MLX4_ASSERT(entry->lkey != UINT32_MAX); rte_rwlock_write_unlock(&priv->mr.rwlock); - rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + rte_mcfg_mem_read_unlock(); return entry->lkey; err_mrlock: rte_rwlock_write_unlock(&priv->mr.rwlock); err_memlock: - rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + rte_mcfg_mem_read_unlock(); err_nolock: /* * In case of error, as this can be called in a datapath, a warning @@ -770,6 +818,40 @@ err_nolock: return UINT32_MAX; } +/** + * Create a new global Memory Region (MR) for a missing virtual address. + * This can be called from primary and secondary process. + * + * @param dev + * Pointer to Ethernet device. + * @param[out] entry + * Pointer to returning MR cache entry, found in the global cache or newly + * created. If failed to create one, this will not be updated. + * @param addr + * Target virtual address to register. + * + * @return + * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. + */ +static uint32_t +mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, + uintptr_t addr) +{ + uint32_t ret = 0; + + switch (rte_eal_process_type()) { + case RTE_PROC_PRIMARY: + ret = mlx4_mr_create_primary(dev, entry, addr); + break; + case RTE_PROC_SECONDARY: + ret = mlx4_mr_create_secondary(dev, entry, addr); + break; + default: + break; + } + return ret; +} + /** * Rebuild the global B-tree cache of device from the original MR list. * @@ -822,8 +904,9 @@ mlx4_mr_mem_event_free_cb(struct rte_eth_dev *dev, const void *addr, size_t len) dev->data->port_id, addr, len); msl = rte_mem_virt2memseg_list(addr); /* addr and len must be page-aligned. */ - assert((uintptr_t)addr == RTE_ALIGN((uintptr_t)addr, msl->page_sz)); - assert(len == RTE_ALIGN(len, msl->page_sz)); + MLX4_ASSERT((uintptr_t)addr == + RTE_ALIGN((uintptr_t)addr, msl->page_sz)); + MLX4_ASSERT(len == RTE_ALIGN(len, msl->page_sz)); ms_n = len / msl->page_sz; rte_rwlock_write_lock(&priv->mr.rwlock); /* Clear bits of freed memsegs from MR. */ @@ -839,14 +922,14 @@ mlx4_mr_mem_event_free_cb(struct rte_eth_dev *dev, const void *addr, size_t len) mr = mr_lookup_dev_list(dev, &entry, start); if (mr == NULL) continue; - assert(mr->msl); /* Can't be external memory. */ + MLX4_ASSERT(mr->msl); /* Can't be external memory. */ ms = rte_mem_virt2memseg((void *)start, msl); - assert(ms != NULL); - assert(msl->page_sz == ms->hugepage_sz); + MLX4_ASSERT(ms != NULL); + MLX4_ASSERT(msl->page_sz == ms->hugepage_sz); ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); pos = ms_idx - mr->ms_base_idx; - assert(rte_bitmap_get(mr->ms_bmp, pos)); - assert(pos < mr->ms_bmp_n); + MLX4_ASSERT(rte_bitmap_get(mr->ms_bmp, pos)); + MLX4_ASSERT(pos < mr->ms_bmp_n); DEBUG("port %u MR(%p): clear bitmap[%u] for addr %p", dev->data->port_id, (void *)mr, pos, (void *)start); rte_bitmap_clear(mr->ms_bmp, pos); @@ -879,7 +962,7 @@ mlx4_mr_mem_event_free_cb(struct rte_eth_dev *dev, const void *addr, size_t len) rte_smp_wmb(); } rte_rwlock_write_unlock(&priv->mr.rwlock); -#ifndef NDEBUG +#ifdef RTE_LIBRTE_MLX4_DEBUG if (rebuild) mlx4_mr_dump_dev(dev); #endif @@ -903,7 +986,7 @@ mlx4_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr, struct mlx4_dev_list *dev_list = &mlx4_shared_data->mem_event_cb_list; /* Must be called from the primary process. */ - assert(rte_eal_process_type() == RTE_PROC_PRIMARY); + MLX4_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); switch (event_type) { case RTE_MEM_EVENT_FREE: rte_rwlock_read_lock(&mlx4_shared_data->mem_event_rwlock); @@ -1039,8 +1122,6 @@ mlx4_rx_addr2mr_bh(struct rxq *rxq, uintptr_t addr) struct mlx4_mr_ctrl *mr_ctrl = &rxq->mr_ctrl; struct mlx4_priv *priv = rxq->priv; - DEBUG("Rx queue %u: miss on top-half, mru=%u, head=%u, addr=%p", - rxq->stats.idx, mr_ctrl->mru, mr_ctrl->head, (void *)addr); return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr); } @@ -1061,8 +1142,6 @@ mlx4_tx_addr2mr_bh(struct txq *txq, uintptr_t addr) struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl; struct mlx4_priv *priv = txq->priv; - DEBUG("Tx queue %u: miss on top-half, mru=%u, head=%u, addr=%p", - txq->stats.idx, mr_ctrl->mru, mr_ctrl->head, (void *)addr); return mlx4_mr_addr2mr_bh(ETH_DEV(priv), mr_ctrl, addr); } @@ -1143,7 +1222,7 @@ mlx4_mr_update_ext_mp_cb(struct rte_mempool *mp, void *opaque, struct mlx4_mr_cache entry; uint32_t lkey; - assert(rte_eal_process_type() == RTE_PROC_PRIMARY); + MLX4_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); /* If already registered, it should return. */ rte_rwlock_read_lock(&priv->mr.rwlock); lkey = mr_lookup_dev(dev, &entry, addr); @@ -1301,7 +1380,7 @@ mlx4_mr_update_mp(struct rte_eth_dev *dev, struct mlx4_mr_ctrl *mr_ctrl, return data.ret; } -#ifndef NDEBUG +#ifdef RTE_LIBRTE_MLX4_DEBUG /** * Dump all the created MRs and the global cache entries. * @@ -1355,17 +1434,18 @@ void mlx4_mr_release(struct rte_eth_dev *dev) { struct mlx4_priv *priv = dev->data->dev_private; - struct mlx4_mr *mr_next = LIST_FIRST(&priv->mr.mr_list); + struct mlx4_mr *mr_next; /* Remove from memory callback device list. */ rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock); LIST_REMOVE(priv, mem_event_cb); rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock); -#ifndef NDEBUG +#ifdef RTE_LIBRTE_MLX4_DEBUG mlx4_mr_dump_dev(dev); #endif rte_rwlock_write_lock(&priv->mr.rwlock); /* Detach from MR list and move to free list. */ + mr_next = LIST_FIRST(&priv->mr.mr_list); while (mr_next != NULL) { struct mlx4_mr *mr = mr_next;