From 29ca3215f391c8b1af866341e95c3249631f9679 Mon Sep 17 00:00:00 2001 From: Michael Baum Date: Mon, 12 Jul 2021 10:06:42 +0300 Subject: [PATCH] regex/mlx5: fix memory region unregistration The issue can cause illegal physical address access while a huge-page A is released and huge-page B is allocated on the same virtual address. The old MR can be matched using the virtual address of huge-page B but the HW will access the physical address of huge-page A which is no more part of the DPDK process. Register a driver callback for memory event in order to free out all the MRs of memory that is going to be freed from the DPDK process. Fixes: cda883bbb655 ("regex/mlx5: add dynamic memory registration to datapath") Cc: stable@dpdk.org Signed-off-by: Michael Baum Acked-by: Ori Kam --- drivers/regex/mlx5/mlx5_regex.c | 55 ++++++++++++++++++++++++ drivers/regex/mlx5/mlx5_regex.h | 2 + drivers/regex/mlx5/mlx5_regex_control.c | 2 + drivers/regex/mlx5/mlx5_regex_fastpath.c | 50 +++++++++++++++------ 4 files changed, 97 insertions(+), 12 deletions(-) diff --git a/drivers/regex/mlx5/mlx5_regex.c b/drivers/regex/mlx5/mlx5_regex.c index 9d93eaa934..84ba2e5224 100644 --- a/drivers/regex/mlx5/mlx5_regex.c +++ b/drivers/regex/mlx5/mlx5_regex.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -24,6 +25,10 @@ int mlx5_regex_logtype; +TAILQ_HEAD(regex_mem_event, mlx5_regex_priv) mlx5_mem_event_list = + TAILQ_HEAD_INITIALIZER(mlx5_mem_event_list); +static pthread_mutex_t mem_event_list_lock = PTHREAD_MUTEX_INITIALIZER; + const struct rte_regexdev_ops mlx5_regexdev_ops = { .dev_info_get = mlx5_regex_info_get, .dev_configure = mlx5_regex_configure, @@ -81,6 +86,40 @@ mlx5_regex_get_name(char *name, struct rte_device *dev) sprintf(name, "mlx5_regex_%s", dev->name); } +/** + * Callback for memory event. + * + * @param event_type + * Memory event type. + * @param addr + * Address of memory. + * @param len + * Size of memory. + */ +static void +mlx5_regex_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr, + size_t len, void *arg __rte_unused) +{ + struct mlx5_regex_priv *priv; + + /* Must be called from the primary process. */ + MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); + switch (event_type) { + case RTE_MEM_EVENT_FREE: + pthread_mutex_lock(&mem_event_list_lock); + /* Iterate all the existing mlx5 devices. */ + TAILQ_FOREACH(priv, &mlx5_mem_event_list, mem_event_cb) + mlx5_free_mr_by_addr(&priv->mr_scache, + priv->ctx->device->name, + addr, len); + pthread_mutex_unlock(&mem_event_list_lock); + break; + case RTE_MEM_EVENT_ALLOC: + default: + break; + } +} + static int mlx5_regex_dev_probe(struct rte_device *rte_dev) { @@ -185,6 +224,15 @@ mlx5_regex_dev_probe(struct rte_device *rte_dev) rte_errno = ENOMEM; goto error; } + /* Register callback function for global shared MR cache management. */ + if (TAILQ_EMPTY(&mlx5_mem_event_list)) + rte_mem_event_callback_register("MLX5_MEM_EVENT_CB", + mlx5_regex_mr_mem_event_cb, + NULL); + /* Add device to memory callback list. */ + pthread_mutex_lock(&mem_event_list_lock); + TAILQ_INSERT_TAIL(&mlx5_mem_event_list, priv, mem_event_cb); + pthread_mutex_unlock(&mem_event_list_lock); DRV_LOG(INFO, "RegEx GGA is %s.", priv->has_umr ? "supported" : "unsupported"); return 0; @@ -217,6 +265,13 @@ mlx5_regex_dev_remove(struct rte_device *rte_dev) return 0; priv = dev->data->dev_private; if (priv) { + /* Remove from memory callback device list. */ + pthread_mutex_lock(&mem_event_list_lock); + TAILQ_REMOVE(&mlx5_mem_event_list, priv, mem_event_cb); + pthread_mutex_unlock(&mem_event_list_lock); + if (TAILQ_EMPTY(&mlx5_mem_event_list)) + rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB", + NULL); if (priv->pd) mlx5_glue->dealloc_pd(priv->pd); if (priv->uar) diff --git a/drivers/regex/mlx5/mlx5_regex.h b/drivers/regex/mlx5/mlx5_regex.h index 45200bf937..514f3408f9 100644 --- a/drivers/regex/mlx5/mlx5_regex.h +++ b/drivers/regex/mlx5/mlx5_regex.h @@ -69,6 +69,8 @@ struct mlx5_regex_priv { uint32_t nb_engines; /* Number of RegEx engines. */ struct mlx5dv_devx_uar *uar; /* UAR object. */ struct ibv_pd *pd; + TAILQ_ENTRY(mlx5_regex_priv) mem_event_cb; + /**< Called by memory event callback. */ struct mlx5_mr_share_cache mr_scache; /* Global shared MR cache. */ uint8_t is_bf2; /* The device is BF2 device. */ uint8_t sq_ts_format; /* Whether SQ supports timestamp formats. */ diff --git a/drivers/regex/mlx5/mlx5_regex_control.c b/drivers/regex/mlx5/mlx5_regex_control.c index eef0fe579d..8ce2dabb55 100644 --- a/drivers/regex/mlx5/mlx5_regex_control.c +++ b/drivers/regex/mlx5/mlx5_regex_control.c @@ -246,6 +246,8 @@ mlx5_regex_qp_setup(struct rte_regexdev *dev, uint16_t qp_ind, nb_sq_config++; } + /* Save pointer of global generation number to check memory event. */ + qp->mr_ctrl.dev_gen_ptr = &priv->mr_scache.dev_gen; ret = mlx5_mr_btree_init(&qp->mr_ctrl.cache_bh, MLX5_MR_BTREE_CACHE_N, rte_socket_id()); if (ret) { diff --git a/drivers/regex/mlx5/mlx5_regex_fastpath.c b/drivers/regex/mlx5/mlx5_regex_fastpath.c index 910bc845f3..786718af53 100644 --- a/drivers/regex/mlx5/mlx5_regex_fastpath.c +++ b/drivers/regex/mlx5/mlx5_regex_fastpath.c @@ -109,6 +109,40 @@ set_wqe_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg, uint16_t pi, uint8_t opcode, seg->imm = imm; } +/** + * Query LKey from a packet buffer for QP. If not found, add the mempool. + * + * @param priv + * Pointer to the priv object. + * @param mr_ctrl + * Pointer to per-queue MR control structure. + * @param mbuf + * Pointer to source mbuf, to search in. + * + * @return + * Searched LKey on success, UINT32_MAX on no match. + */ +static inline uint32_t +mlx5_regex_addr2mr(struct mlx5_regex_priv *priv, struct mlx5_mr_ctrl *mr_ctrl, + struct rte_mbuf *mbuf) +{ + uintptr_t addr = rte_pktmbuf_mtod(mbuf, uintptr_t); + uint32_t lkey; + + /* Check generation bit to see if there's any change on existing MRs. */ + if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen)) + mlx5_mr_flush_local_cache(mr_ctrl); + /* Linear search on MR cache array. */ + lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru, + MLX5_MR_CACHE_N, addr); + if (likely(lkey != UINT32_MAX)) + return lkey; + /* Take slower bottom-half on miss. */ + return mlx5_mr_addr2mr_bh(priv->pd, 0, &priv->mr_scache, mr_ctrl, addr, + !!(mbuf->ol_flags & EXT_ATTACHED_MBUF)); +} + + static inline void __prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_sq *sq, struct rte_regex_ops *op, struct mlx5_regex_job *job, @@ -160,10 +194,7 @@ prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp, struct mlx5_klm klm; klm.byte_count = rte_pktmbuf_data_len(op->mbuf); - klm.mkey = mlx5_mr_addr2mr_bh(priv->pd, 0, - &priv->mr_scache, &qp->mr_ctrl, - rte_pktmbuf_mtod(op->mbuf, uintptr_t), - !!(op->mbuf->ol_flags & EXT_ATTACHED_MBUF)); + klm.mkey = mlx5_regex_addr2mr(priv, &qp->mr_ctrl, op->mbuf); klm.address = rte_pktmbuf_mtod(op->mbuf, uintptr_t); __prep_one(priv, sq, op, job, sq->pi, &klm); sq->db_pi = sq->pi; @@ -329,10 +360,8 @@ prep_regex_umr_wqe_set(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp, (qp->jobs[mkey_job_id].imkey->id); while (mbuf) { /* Build indirect mkey seg's KLM. */ - mkey_klm->mkey = mlx5_mr_addr2mr_bh(priv->pd, - NULL, &priv->mr_scache, &qp->mr_ctrl, - rte_pktmbuf_mtod(mbuf, uintptr_t), - !!(mbuf->ol_flags & EXT_ATTACHED_MBUF)); + mkey_klm->mkey = mlx5_regex_addr2mr + (priv, &qp->mr_ctrl, mbuf); mkey_klm->address = rte_cpu_to_be_64 (rte_pktmbuf_mtod(mbuf, uintptr_t)); mkey_klm->byte_count = rte_cpu_to_be_32 @@ -350,10 +379,7 @@ prep_regex_umr_wqe_set(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp, klm.byte_count = scatter_size; } else { /* The single mubf case. Build the KLM directly. */ - klm.mkey = mlx5_mr_addr2mr_bh(priv->pd, NULL, - &priv->mr_scache, &qp->mr_ctrl, - rte_pktmbuf_mtod(mbuf, uintptr_t), - !!(mbuf->ol_flags & EXT_ATTACHED_MBUF)); + klm.mkey = mlx5_regex_addr2mr(priv, &qp->mr_ctrl, mbuf); klm.address = rte_pktmbuf_mtod(mbuf, uintptr_t); klm.byte_count = rte_pktmbuf_data_len(mbuf); } -- 2.20.1