From: Yongseok Koh Date: Mon, 1 Apr 2019 21:17:57 +0000 (-0700) Subject: net/mlx4: enable secondary process to register DMA memory X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=0b259b8e965575cdbd622c48b6ae5b04bed7aa73;p=dpdk.git net/mlx4: enable secondary process to register DMA memory The Memory Region (MR) for DMA memory can't be created from secondary process due to lib/driver limitation. Whenever it is needed, secondary process can make a request to primary process through the EAL IPC channel (rte_mp_msg) which is established on initialization. Once a MR is created by primary process, it is immediately visible to secondary process because the MR list is global per a device. Thus, secondary process can look up the list after the request is successfully returned. Signed-off-by: Yongseok Koh Acked-by: Shahaf Shuler --- diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst index c8a02be4dd..aaf1907532 100644 --- a/doc/guides/nics/mlx4.rst +++ b/doc/guides/nics/mlx4.rst @@ -159,7 +159,6 @@ Limitations - For secondary process: - Forked secondary process not supported. - - All mempools must be initialized before rte_eth_dev_start(). - External memory unregistered in EAL memseg list cannot be used for DMA unless such memory has been registered by ``mlx4_mr_update_ext_mp()`` in primary process and remapped to the same virtual address in secondary diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 4ff98d772b..1db23d6cc9 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -79,6 +79,7 @@ enum { /* Request types for IPC. */ enum mlx4_mp_req_type { MLX4_MP_REQ_VERBS_CMD_FD = 1, + MLX4_MP_REQ_CREATE_MR, MLX4_MP_REQ_START_RXTX, MLX4_MP_REQ_STOP_RXTX, }; @@ -88,6 +89,10 @@ struct mlx4_mp_param { enum mlx4_mp_req_type type; int port_id; int result; + RTE_STD_C11 + union { + uintptr_t addr; /* MLX4_MP_REQ_CREATE_MR */ + } args; }; /** Request timeout for IPC. */ @@ -235,6 +240,7 @@ int mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx); /* mlx4_mp.c */ void mlx4_mp_req_start_rxtx(struct rte_eth_dev *dev); void mlx4_mp_req_stop_rxtx(struct rte_eth_dev *dev); +int mlx4_mp_req_mr_create(struct rte_eth_dev *dev, uintptr_t addr); int mlx4_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev); void mlx4_mp_init_primary(void); void mlx4_mp_uninit_primary(void); diff --git a/drivers/net/mlx4/mlx4_mp.c b/drivers/net/mlx4/mlx4_mp.c index eaeb257348..183622453c 100644 --- a/drivers/net/mlx4/mlx4_mp.c +++ b/drivers/net/mlx4/mlx4_mp.c @@ -58,6 +58,8 @@ mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer) (const struct mlx4_mp_param *)mp_msg->param; struct rte_eth_dev *dev; struct mlx4_priv *priv; + struct mlx4_mr_cache entry; + uint32_t lkey; int ret; assert(rte_eal_process_type() == RTE_PROC_PRIMARY); @@ -69,6 +71,13 @@ mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer) dev = &rte_eth_devices[param->port_id]; priv = dev->data->dev_private; switch (param->type) { + case MLX4_MP_REQ_CREATE_MR: + mp_init_msg(dev, &mp_res, param->type); + lkey = mlx4_mr_create_primary(dev, &entry, param->args.addr); + if (lkey == UINT32_MAX) + res->result = -rte_errno; + ret = rte_mp_reply(&mp_res, peer); + break; case MLX4_MP_REQ_VERBS_CMD_FD: mp_init_msg(dev, &mp_res, param->type); mp_res.num_fds = 1; @@ -217,6 +226,47 @@ mlx4_mp_req_stop_rxtx(struct rte_eth_dev *dev) mp_req_on_rxtx(dev, MLX4_MP_REQ_STOP_RXTX); } +/** + * Request Memory Region creation to the primary process. + * + * @param[in] dev + * Pointer to Ethernet structure. + * @param addr + * Target virtual address to register. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx4_mp_req_mr_create(struct rte_eth_dev *dev, uintptr_t addr) +{ + struct rte_mp_msg mp_req; + struct rte_mp_msg *mp_res; + struct rte_mp_reply mp_rep; + struct mlx4_mp_param *req = (struct mlx4_mp_param *)mp_req.param; + struct mlx4_mp_param *res; + struct timespec ts = {.tv_sec = MLX4_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0}; + int ret; + + assert(rte_eal_process_type() == RTE_PROC_SECONDARY); + mp_init_msg(dev, &mp_req, MLX4_MP_REQ_CREATE_MR); + req->args.addr = addr; + ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts); + if (ret) { + ERROR("port %u request to primary process failed", + dev->data->port_id); + return -rte_errno; + } + assert(mp_rep.nb_received == 1); + mp_res = &mp_rep.msgs[0]; + res = (struct mlx4_mp_param *)mp_res->param; + ret = res->result; + if (ret) + rte_errno = -ret; + free(mp_rep.msgs); + return ret; +} + /** * IPC message handler of primary process. * diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c index 6db917a092..ad7d4832f2 100644 --- a/drivers/net/mlx4/mlx4_mr.c +++ b/drivers/net/mlx4/mlx4_mr.c @@ -528,7 +528,10 @@ mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl, /** * Create a new global Memroy Region (MR) for a missing virtual address. - * Register entire virtually contiguous memory chunk around the address. + * This API should be called on a secondary process, then a request is sent to + * the primary process in order to create a MR for the address. As the global MR + * list is on the shared memory, following LKey lookup should succeed unless the + * request fails. * * @param dev * Pointer to Ethernet device. @@ -542,8 +545,52 @@ mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl, * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. */ static uint32_t -mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, - uintptr_t addr) +mlx4_mr_create_secondary(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, + uintptr_t addr) +{ + struct mlx4_priv *priv = dev->data->dev_private; + int ret; + + DEBUG("port %u requesting MR creation for address (%p)", + dev->data->port_id, (void *)addr); + ret = mlx4_mp_req_mr_create(dev, addr); + if (ret) { + DEBUG("port %u fail to request MR creation for address (%p)", + dev->data->port_id, (void *)addr); + return UINT32_MAX; + } + rte_rwlock_read_lock(&priv->mr.rwlock); + /* Fill in output data. */ + mr_lookup_dev(dev, entry, addr); + /* Lookup can't fail. */ + assert(entry->lkey != UINT32_MAX); + rte_rwlock_read_unlock(&priv->mr.rwlock); + DEBUG("port %u MR CREATED by primary process for %p:\n" + " [0x%" PRIxPTR ", 0x%" PRIxPTR "), lkey=0x%x", + dev->data->port_id, (void *)addr, + entry->start, entry->end, entry->lkey); + return entry->lkey; +} + +/** + * Create a new global Memroy Region (MR) for a missing virtual address. + * Register entire virtually contiguous memory chunk around the address. + * This must be called from the primary process. + * + * @param dev + * Pointer to Ethernet device. + * @param[out] entry + * Pointer to returning MR cache entry, found in the global cache or newly + * created. If failed to create one, this will not be updated. + * @param addr + * Target virtual address to register. + * + * @return + * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. + */ +uint32_t +mlx4_mr_create_primary(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, + uintptr_t addr) { struct mlx4_priv *priv = dev->data->dev_private; struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; @@ -563,14 +610,6 @@ mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, DEBUG("port %u creating a MR using address (%p)", dev->data->port_id, (void *)addr); - if (rte_eal_process_type() != RTE_PROC_PRIMARY) { - WARN("port %u using address (%p) of unregistered mempool" - " in secondary process, please create mempool" - " before rte_eth_dev_start()", - dev->data->port_id, (void *)addr); - rte_errno = EPERM; - goto err_nolock; - } /* * Release detached MRs if any. This can't be called with holding either * memory_hotplug_lock or priv->mr.rwlock. MRs on the free list have @@ -780,6 +819,40 @@ err_nolock: return UINT32_MAX; } +/** + * Create a new global Memroy Region (MR) for a missing virtual address. + * This can be called from primary and secondary process. + * + * @param dev + * Pointer to Ethernet device. + * @param[out] entry + * Pointer to returning MR cache entry, found in the global cache or newly + * created. If failed to create one, this will not be updated. + * @param addr + * Target virtual address to register. + * + * @return + * Searched LKey on success, UINT32_MAX on failure and rte_errno is set. + */ +static uint32_t +mlx4_mr_create(struct rte_eth_dev *dev, struct mlx4_mr_cache *entry, + uintptr_t addr) +{ + uint32_t ret = 0; + + switch (rte_eal_process_type()) { + case RTE_PROC_PRIMARY: + ret = mlx4_mr_create_primary(dev, entry, addr); + break; + case RTE_PROC_SECONDARY: + ret = mlx4_mr_create_secondary(dev, entry, addr); + break; + default: + break; + } + return ret; +} + /** * Rebuild the global B-tree cache of device from the original MR list. * diff --git a/drivers/net/mlx4/mlx4_mr.h b/drivers/net/mlx4/mlx4_mr.h index 37a365a8b5..9d125e239d 100644 --- a/drivers/net/mlx4/mlx4_mr.h +++ b/drivers/net/mlx4/mlx4_mr.h @@ -75,6 +75,8 @@ extern rte_rwlock_t mlx4_mem_event_rwlock; int mlx4_mr_btree_init(struct mlx4_mr_btree *bt, int n, int socket); void mlx4_mr_btree_free(struct mlx4_mr_btree *bt); void mlx4_mr_btree_dump(struct mlx4_mr_btree *bt); +uint32_t mlx4_mr_create_primary(struct rte_eth_dev *dev, + struct mlx4_mr_cache *entry, uintptr_t addr); void mlx4_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr, size_t len, void *arg); int mlx4_mr_update_mp(struct rte_eth_dev *dev, struct mlx4_mr_ctrl *mr_ctrl,