regex/mlx5: fix memory region unregistration
authorMichael Baum <michaelba@nvidia.com>
Mon, 12 Jul 2021 07:06:42 +0000 (10:06 +0300)
committerThomas Monjalon <thomas@monjalon.net>
Thu, 22 Jul 2021 13:19:30 +0000 (15:19 +0200)
The issue can cause illegal physical address access while a huge-page A
is released and huge-page B is allocated on the same virtual address.
The old MR can be matched using the virtual address of huge-page B but
the HW will access the physical address of huge-page A which is no more
part of the DPDK process.

Register a driver callback for memory event in order to free out all the
MRs of memory that is going to be freed from the DPDK process.

Fixes: cda883bbb655 ("regex/mlx5: add dynamic memory registration to datapath")
Cc: stable@dpdk.org
Signed-off-by: Michael Baum <michaelba@nvidia.com>
Acked-by: Ori Kam <orika@nvidia.com>
drivers/regex/mlx5/mlx5_regex.c
drivers/regex/mlx5/mlx5_regex.h
drivers/regex/mlx5/mlx5_regex_control.c
drivers/regex/mlx5/mlx5_regex_fastpath.c

index 9d93eaa..84ba2e5 100644 (file)
@@ -12,6 +12,7 @@
 #include <rte_bus_pci.h>
 
 #include <mlx5_common.h>
+#include <mlx5_common_mr.h>
 #include <mlx5_glue.h>
 #include <mlx5_devx_cmds.h>
 #include <mlx5_prm.h>
 
 int mlx5_regex_logtype;
 
+TAILQ_HEAD(regex_mem_event, mlx5_regex_priv) mlx5_mem_event_list =
+                               TAILQ_HEAD_INITIALIZER(mlx5_mem_event_list);
+static pthread_mutex_t mem_event_list_lock = PTHREAD_MUTEX_INITIALIZER;
+
 const struct rte_regexdev_ops mlx5_regexdev_ops = {
        .dev_info_get = mlx5_regex_info_get,
        .dev_configure = mlx5_regex_configure,
@@ -81,6 +86,40 @@ mlx5_regex_get_name(char *name, struct rte_device *dev)
        sprintf(name, "mlx5_regex_%s", dev->name);
 }
 
+/**
+ * Callback for memory event.
+ *
+ * @param event_type
+ *   Memory event type.
+ * @param addr
+ *   Address of memory.
+ * @param len
+ *   Size of memory.
+ */
+static void
+mlx5_regex_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr,
+                          size_t len, void *arg __rte_unused)
+{
+       struct mlx5_regex_priv *priv;
+
+       /* Must be called from the primary process. */
+       MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
+       switch (event_type) {
+       case RTE_MEM_EVENT_FREE:
+               pthread_mutex_lock(&mem_event_list_lock);
+               /* Iterate all the existing mlx5 devices. */
+               TAILQ_FOREACH(priv, &mlx5_mem_event_list, mem_event_cb)
+                       mlx5_free_mr_by_addr(&priv->mr_scache,
+                                            priv->ctx->device->name,
+                                            addr, len);
+               pthread_mutex_unlock(&mem_event_list_lock);
+               break;
+       case RTE_MEM_EVENT_ALLOC:
+       default:
+               break;
+       }
+}
+
 static int
 mlx5_regex_dev_probe(struct rte_device *rte_dev)
 {
@@ -185,6 +224,15 @@ mlx5_regex_dev_probe(struct rte_device *rte_dev)
            rte_errno = ENOMEM;
                goto error;
        }
+       /* Register callback function for global shared MR cache management. */
+       if (TAILQ_EMPTY(&mlx5_mem_event_list))
+               rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
+                                               mlx5_regex_mr_mem_event_cb,
+                                               NULL);
+       /* Add device to memory callback list. */
+       pthread_mutex_lock(&mem_event_list_lock);
+       TAILQ_INSERT_TAIL(&mlx5_mem_event_list, priv, mem_event_cb);
+       pthread_mutex_unlock(&mem_event_list_lock);
        DRV_LOG(INFO, "RegEx GGA is %s.",
                priv->has_umr ? "supported" : "unsupported");
        return 0;
@@ -217,6 +265,13 @@ mlx5_regex_dev_remove(struct rte_device *rte_dev)
                return 0;
        priv = dev->data->dev_private;
        if (priv) {
+               /* Remove from memory callback device list. */
+               pthread_mutex_lock(&mem_event_list_lock);
+               TAILQ_REMOVE(&mlx5_mem_event_list, priv, mem_event_cb);
+               pthread_mutex_unlock(&mem_event_list_lock);
+               if (TAILQ_EMPTY(&mlx5_mem_event_list))
+                       rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB",
+                                                         NULL);
                if (priv->pd)
                        mlx5_glue->dealloc_pd(priv->pd);
                if (priv->uar)
index 45200bf..514f340 100644 (file)
@@ -69,6 +69,8 @@ struct mlx5_regex_priv {
        uint32_t nb_engines; /* Number of RegEx engines. */
        struct mlx5dv_devx_uar *uar; /* UAR object. */
        struct ibv_pd *pd;
+       TAILQ_ENTRY(mlx5_regex_priv) mem_event_cb;
+       /**< Called by memory event callback. */
        struct mlx5_mr_share_cache mr_scache; /* Global shared MR cache. */
        uint8_t is_bf2; /* The device is BF2 device. */
        uint8_t sq_ts_format; /* Whether SQ supports timestamp formats. */
index eef0fe5..8ce2dab 100644 (file)
@@ -246,6 +246,8 @@ mlx5_regex_qp_setup(struct rte_regexdev *dev, uint16_t qp_ind,
                nb_sq_config++;
        }
 
+       /* Save pointer of global generation number to check memory event. */
+       qp->mr_ctrl.dev_gen_ptr = &priv->mr_scache.dev_gen;
        ret = mlx5_mr_btree_init(&qp->mr_ctrl.cache_bh, MLX5_MR_BTREE_CACHE_N,
                                 rte_socket_id());
        if (ret) {
index 910bc84..786718a 100644 (file)
@@ -109,6 +109,40 @@ set_wqe_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg, uint16_t pi, uint8_t opcode,
        seg->imm = imm;
 }
 
+/**
+ * Query LKey from a packet buffer for QP. If not found, add the mempool.
+ *
+ * @param priv
+ *   Pointer to the priv object.
+ * @param mr_ctrl
+ *   Pointer to per-queue MR control structure.
+ * @param mbuf
+ *   Pointer to source mbuf, to search in.
+ *
+ * @return
+ *   Searched LKey on success, UINT32_MAX on no match.
+ */
+static inline uint32_t
+mlx5_regex_addr2mr(struct mlx5_regex_priv *priv, struct mlx5_mr_ctrl *mr_ctrl,
+                  struct rte_mbuf *mbuf)
+{
+       uintptr_t addr = rte_pktmbuf_mtod(mbuf, uintptr_t);
+       uint32_t lkey;
+
+       /* Check generation bit to see if there's any change on existing MRs. */
+       if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen))
+               mlx5_mr_flush_local_cache(mr_ctrl);
+       /* Linear search on MR cache array. */
+       lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru,
+                                  MLX5_MR_CACHE_N, addr);
+       if (likely(lkey != UINT32_MAX))
+               return lkey;
+       /* Take slower bottom-half on miss. */
+       return mlx5_mr_addr2mr_bh(priv->pd, 0, &priv->mr_scache, mr_ctrl, addr,
+                                 !!(mbuf->ol_flags & EXT_ATTACHED_MBUF));
+}
+
+
 static inline void
 __prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_sq *sq,
           struct rte_regex_ops *op, struct mlx5_regex_job *job,
@@ -160,10 +194,7 @@ prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
        struct mlx5_klm klm;
 
        klm.byte_count = rte_pktmbuf_data_len(op->mbuf);
-       klm.mkey = mlx5_mr_addr2mr_bh(priv->pd, 0,
-                                 &priv->mr_scache, &qp->mr_ctrl,
-                                 rte_pktmbuf_mtod(op->mbuf, uintptr_t),
-                                 !!(op->mbuf->ol_flags & EXT_ATTACHED_MBUF));
+       klm.mkey = mlx5_regex_addr2mr(priv, &qp->mr_ctrl, op->mbuf);
        klm.address = rte_pktmbuf_mtod(op->mbuf, uintptr_t);
        __prep_one(priv, sq, op, job, sq->pi, &klm);
        sq->db_pi = sq->pi;
@@ -329,10 +360,8 @@ prep_regex_umr_wqe_set(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
                                        (qp->jobs[mkey_job_id].imkey->id);
                        while (mbuf) {
                                /* Build indirect mkey seg's KLM. */
-                               mkey_klm->mkey = mlx5_mr_addr2mr_bh(priv->pd,
-                                       NULL, &priv->mr_scache, &qp->mr_ctrl,
-                                       rte_pktmbuf_mtod(mbuf, uintptr_t),
-                                       !!(mbuf->ol_flags & EXT_ATTACHED_MBUF));
+                               mkey_klm->mkey = mlx5_regex_addr2mr
+                                               (priv, &qp->mr_ctrl, mbuf);
                                mkey_klm->address = rte_cpu_to_be_64
                                        (rte_pktmbuf_mtod(mbuf, uintptr_t));
                                mkey_klm->byte_count = rte_cpu_to_be_32
@@ -350,10 +379,7 @@ prep_regex_umr_wqe_set(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
                        klm.byte_count = scatter_size;
                } else {
                        /* The single mubf case. Build the KLM directly. */
-                       klm.mkey = mlx5_mr_addr2mr_bh(priv->pd, NULL,
-                                       &priv->mr_scache, &qp->mr_ctrl,
-                                       rte_pktmbuf_mtod(mbuf, uintptr_t),
-                                       !!(mbuf->ol_flags & EXT_ATTACHED_MBUF));
+                       klm.mkey = mlx5_regex_addr2mr(priv, &qp->mr_ctrl, mbuf);
                        klm.address = rte_pktmbuf_mtod(mbuf, uintptr_t);
                        klm.byte_count = rte_pktmbuf_data_len(mbuf);
                }