From 2f6c2adbe550ea95a0f73c4f9a9cc5da890b9bf2 Mon Sep 17 00:00:00 2001 From: Michael Baum Date: Mon, 28 Jun 2021 18:06:14 +0300 Subject: [PATCH] common/mlx5: fix memory region leak All the mlx5 drivers using MRs for data-path must unregister the mapped memory when it is freed by the dpdk process. Currently, only the net/eth driver unregisters MRs in free event. Move the net callback handler from net driver to common. Cc: stable@dpdk.org Signed-off-by: Michael Baum Acked-by: Matan Azrad --- drivers/common/mlx5/mlx5_common_mr.c | 89 +++++++++++++++++++++++++++ drivers/common/mlx5/mlx5_common_mr.h | 3 + drivers/common/mlx5/version.map | 1 + drivers/net/mlx5/mlx5_mr.c | 90 +--------------------------- 4 files changed, 95 insertions(+), 88 deletions(-) diff --git a/drivers/common/mlx5/mlx5_common_mr.c b/drivers/common/mlx5/mlx5_common_mr.c index afb5b3d0a7..98fe8698e2 100644 --- a/drivers/common/mlx5/mlx5_common_mr.c +++ b/drivers/common/mlx5/mlx5_common_mr.c @@ -1062,6 +1062,95 @@ mlx5_create_mr_ext(void *pd, uintptr_t addr, size_t len, int socket_id, return mr; } +/** + * Callback for memory free event. Iterate freed memsegs and check whether it + * belongs to an existing MR. If found, clear the bit from bitmap of MR. As a + * result, the MR would be fragmented. If it becomes empty, the MR will be freed + * later by mlx5_mr_garbage_collect(). Even if this callback is called from a + * secondary process, the garbage collector will be called in primary process + * as the secondary process can't call mlx5_mr_create(). + * + * The global cache must be rebuilt if there's any change and this event has to + * be propagated to dataplane threads to flush the local caches. + * + * @param share_cache + * Pointer to a global shared MR cache. + * @param ibdev_name + * Name of ibv device. + * @param addr + * Address of freed memory. + * @param len + * Size of freed memory. + */ +void +mlx5_free_mr_by_addr(struct mlx5_mr_share_cache *share_cache, + const char *ibdev_name, const void *addr, size_t len) +{ + const struct rte_memseg_list *msl; + struct mlx5_mr *mr; + int ms_n; + int i; + int rebuild = 0; + + DRV_LOG(DEBUG, "device %s free callback: addr=%p, len=%zu", + ibdev_name, addr, len); + msl = rte_mem_virt2memseg_list(addr); + /* addr and len must be page-aligned. */ + MLX5_ASSERT((uintptr_t)addr == + RTE_ALIGN((uintptr_t)addr, msl->page_sz)); + MLX5_ASSERT(len == RTE_ALIGN(len, msl->page_sz)); + ms_n = len / msl->page_sz; + rte_rwlock_write_lock(&share_cache->rwlock); + /* Clear bits of freed memsegs from MR. */ + for (i = 0; i < ms_n; ++i) { + const struct rte_memseg *ms; + struct mr_cache_entry entry; + uintptr_t start; + int ms_idx; + uint32_t pos; + + /* Find MR having this memseg. */ + start = (uintptr_t)addr + i * msl->page_sz; + mr = mlx5_mr_lookup_list(share_cache, &entry, start); + if (mr == NULL) + continue; + MLX5_ASSERT(mr->msl); /* Can't be external memory. */ + ms = rte_mem_virt2memseg((void *)start, msl); + MLX5_ASSERT(ms != NULL); + MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); + ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); + pos = ms_idx - mr->ms_base_idx; + MLX5_ASSERT(rte_bitmap_get(mr->ms_bmp, pos)); + MLX5_ASSERT(pos < mr->ms_bmp_n); + DRV_LOG(DEBUG, "device %s MR(%p): clear bitmap[%u] for addr %p", + ibdev_name, (void *)mr, pos, (void *)start); + rte_bitmap_clear(mr->ms_bmp, pos); + if (--mr->ms_n == 0) { + LIST_REMOVE(mr, mr); + LIST_INSERT_HEAD(&share_cache->mr_free_list, mr, mr); + DRV_LOG(DEBUG, "device %s remove MR(%p) from list", + ibdev_name, (void *)mr); + } + /* + * MR is fragmented or will be freed. the global cache must be + * rebuilt. + */ + rebuild = 1; + } + if (rebuild) { + mlx5_mr_rebuild_cache(share_cache); + /* + * No explicit wmb is needed after updating dev_gen due to + * store-release ordering in unlock that provides the + * implicit barrier at the software visible level. + */ + ++share_cache->dev_gen; + DRV_LOG(DEBUG, "broadcasting local cache flush, gen=%d", + share_cache->dev_gen); + } + rte_rwlock_write_unlock(&share_cache->rwlock); +} + /** * Dump all the created MRs and the global cache entries. * diff --git a/drivers/common/mlx5/mlx5_common_mr.h b/drivers/common/mlx5/mlx5_common_mr.h index 5cc3f097c2..6e465a05e9 100644 --- a/drivers/common/mlx5/mlx5_common_mr.h +++ b/drivers/common/mlx5/mlx5_common_mr.h @@ -144,6 +144,9 @@ void mlx5_mr_rebuild_cache(struct mlx5_mr_share_cache *share_cache); __rte_internal void mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl); __rte_internal +void mlx5_free_mr_by_addr(struct mlx5_mr_share_cache *share_cache, + const char *ibdev_name, const void *addr, size_t len); +__rte_internal int mlx5_mr_insert_cache(struct mlx5_mr_share_cache *share_cache, struct mlx5_mr *mr); diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map index db4f13f1f7..b8be73a77b 100644 --- a/drivers/common/mlx5/version.map +++ b/drivers/common/mlx5/version.map @@ -103,6 +103,7 @@ INTERNAL { mlx5_mr_insert_cache; mlx5_mr_lookup_cache; mlx5_mr_lookup_list; + mlx5_free_mr_by_addr; mlx5_mr_rebuild_cache; mlx5_mr_release_cache; diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c index 0c5403e493..0b6cfc8cb9 100644 --- a/drivers/net/mlx5/mlx5_mr.c +++ b/drivers/net/mlx5/mlx5_mr.c @@ -31,93 +31,6 @@ struct mr_update_mp_data { int ret; }; -/** - * Callback for memory free event. Iterate freed memsegs and check whether it - * belongs to an existing MR. If found, clear the bit from bitmap of MR. As a - * result, the MR would be fragmented. If it becomes empty, the MR will be freed - * later by mlx5_mr_garbage_collect(). Even if this callback is called from a - * secondary process, the garbage collector will be called in primary process - * as the secondary process can't call mlx5_mr_create(). - * - * The global cache must be rebuilt if there's any change and this event has to - * be propagated to dataplane threads to flush the local caches. - * - * @param sh - * Pointer to the Ethernet device shared context. - * @param addr - * Address of freed memory. - * @param len - * Size of freed memory. - */ -static void -mlx5_mr_mem_event_free_cb(struct mlx5_dev_ctx_shared *sh, - const void *addr, size_t len) -{ - const struct rte_memseg_list *msl; - struct mlx5_mr *mr; - int ms_n; - int i; - int rebuild = 0; - - DRV_LOG(DEBUG, "device %s free callback: addr=%p, len=%zu", - sh->ibdev_name, addr, len); - msl = rte_mem_virt2memseg_list(addr); - /* addr and len must be page-aligned. */ - MLX5_ASSERT((uintptr_t)addr == - RTE_ALIGN((uintptr_t)addr, msl->page_sz)); - MLX5_ASSERT(len == RTE_ALIGN(len, msl->page_sz)); - ms_n = len / msl->page_sz; - rte_rwlock_write_lock(&sh->share_cache.rwlock); - /* Clear bits of freed memsegs from MR. */ - for (i = 0; i < ms_n; ++i) { - const struct rte_memseg *ms; - struct mr_cache_entry entry; - uintptr_t start; - int ms_idx; - uint32_t pos; - - /* Find MR having this memseg. */ - start = (uintptr_t)addr + i * msl->page_sz; - mr = mlx5_mr_lookup_list(&sh->share_cache, &entry, start); - if (mr == NULL) - continue; - MLX5_ASSERT(mr->msl); /* Can't be external memory. */ - ms = rte_mem_virt2memseg((void *)start, msl); - MLX5_ASSERT(ms != NULL); - MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); - ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); - pos = ms_idx - mr->ms_base_idx; - MLX5_ASSERT(rte_bitmap_get(mr->ms_bmp, pos)); - MLX5_ASSERT(pos < mr->ms_bmp_n); - DRV_LOG(DEBUG, "device %s MR(%p): clear bitmap[%u] for addr %p", - sh->ibdev_name, (void *)mr, pos, (void *)start); - rte_bitmap_clear(mr->ms_bmp, pos); - if (--mr->ms_n == 0) { - LIST_REMOVE(mr, mr); - LIST_INSERT_HEAD(&sh->share_cache.mr_free_list, mr, mr); - DRV_LOG(DEBUG, "device %s remove MR(%p) from list", - sh->ibdev_name, (void *)mr); - } - /* - * MR is fragmented or will be freed. the global cache must be - * rebuilt. - */ - rebuild = 1; - } - if (rebuild) { - mlx5_mr_rebuild_cache(&sh->share_cache); - /* - * No explicit wmb is needed after updating dev_gen due to - * store-release ordering in unlock that provides the - * implicit barrier at the software visible level. - */ - ++sh->share_cache.dev_gen; - DRV_LOG(DEBUG, "broadcasting local cache flush, gen=%d", - sh->share_cache.dev_gen); - } - rte_rwlock_write_unlock(&sh->share_cache.rwlock); -} - /** * Callback for memory event. This can be called from both primary and secondary * process. @@ -143,7 +56,8 @@ mlx5_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr, rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock); /* Iterate all the existing mlx5 devices. */ LIST_FOREACH(sh, dev_list, mem_event_cb) - mlx5_mr_mem_event_free_cb(sh, addr, len); + mlx5_free_mr_by_addr(&sh->share_cache, + sh->ibdev_name, addr, len); rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock); break; case RTE_MEM_EVENT_ALLOC: -- 2.20.1