From: NĂ©lio Laranjeiro Date: Mon, 9 Oct 2017 14:44:45 +0000 (+0200) Subject: net/mlx5: add reference counter on memory region X-Git-Tag: spdx-start~1525 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=f8fb87d51f0f69a9bb7eee436a331aa93336c9fa;p=dpdk.git net/mlx5: add reference counter on memory region This patch introduce the Memory region as a shared object where users should get a reference to it by calling the priv_mr_get() or priv_mr_new() to create the memory region. This last one will register the memory pool in the kernel driver and retrieve the associated memory region. This should help to reduce the memory consumption cause by registering multiple times the same memory pool. Signed-off-by: Nelio Laranjeiro Acked-by: Yongseok Koh --- diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 1397016107..b658b2ba64 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -256,6 +256,9 @@ mlx5_dev_close(struct rte_eth_dev *dev) ret = priv_flow_verify(priv); if (ret) WARN("%p: some flows still remain", (void *)priv); + ret = priv_mr_verify(priv); + if (ret) + WARN("%p: some Memory Region still remain", (void *)priv); priv_unlock(priv); memset(priv, 0, sizeof(*priv)); } diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index c6563bd87a..f5637229d9 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -146,6 +146,7 @@ struct priv { unsigned int reta_idx_n; /* RETA index size. */ struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */ TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */ + LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */ uint32_t link_speed_capa; /* Link speed capabilities. */ struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */ rte_spinlock_t lock; /* Lock for control functions. */ @@ -299,4 +300,11 @@ int priv_socket_uninit(struct priv *priv); void priv_socket_handle(struct priv *priv); int priv_socket_connect(struct priv *priv); +/* mlx5_mr.c */ + +struct mlx5_mr *priv_mr_new(struct priv *, struct rte_mempool *); +struct mlx5_mr *priv_mr_get(struct priv *, struct rte_mempool *); +int priv_mr_release(struct priv *, struct mlx5_mr *); +int priv_mr_verify(struct priv *); + #endif /* RTE_PMD_MLX5_H_ */ diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c index 6199746d7a..54fdc16480 100644 --- a/drivers/net/mlx5/mlx5_mr.c +++ b/drivers/net/mlx5/mlx5_mr.c @@ -42,6 +42,7 @@ #endif #include +#include #include "mlx5.h" #include "mlx5_rxtx.h" @@ -110,54 +111,6 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start, return data.ret; } -/** - * Register mempool as a memory region. - * - * @param pd - * Pointer to protection domain. - * @param mp - * Pointer to memory pool. - * - * @return - * Memory region pointer, NULL in case of error. - */ -struct ibv_mr * -mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp) -{ - const struct rte_memseg *ms = rte_eal_get_physmem_layout(); - uintptr_t start; - uintptr_t end; - unsigned int i; - - if (mlx5_check_mempool(mp, &start, &end) != 0) { - ERROR("mempool %p: not virtually contiguous", - (void *)mp); - return NULL; - } - - DEBUG("mempool %p area start=%p end=%p size=%zu", - (void *)mp, (void *)start, (void *)end, - (size_t)(end - start)); - /* Round start and end to page boundary if found in memory segments. */ - for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) { - uintptr_t addr = (uintptr_t)ms[i].addr; - size_t len = ms[i].len; - unsigned int align = ms[i].hugepage_sz; - - if ((start > addr) && (start < addr + len)) - start = RTE_ALIGN_FLOOR(start, align); - if ((end > addr) && (end < addr + len)) - end = RTE_ALIGN_CEIL(end, align); - } - DEBUG("mempool %p using start=%p end=%p size=%zu for MR", - (void *)mp, (void *)start, (void *)end, - (size_t)(end - start)); - return ibv_reg_mr(pd, - (void *)start, - end - start, - IBV_ACCESS_LOCAL_WRITE); -} - /** * Register a Memory Region (MR) <-> Memory Pool (MP) association in * txq->mp2mr[]. If mp2mr[] is full, remove an entry first. @@ -172,44 +125,42 @@ mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp) * Index of the next available entry. * * @return - * mr->lkey on success, (uint32_t)-1 on failure. + * mr on success, NULL on failure. */ -uint32_t +struct mlx5_mr* mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp, unsigned int idx) { struct mlx5_txq_ctrl *txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); - struct ibv_mr *mr; + struct mlx5_mr *mr; /* Add a new entry, register MR first. */ DEBUG("%p: discovered new memory pool \"%s\" (%p)", (void *)txq_ctrl, mp->name, (void *)mp); - mr = mlx5_mp2mr(txq_ctrl->priv->pd, mp); + mr = priv_mr_get(txq_ctrl->priv, mp); + if (mr == NULL) + mr = priv_mr_new(txq_ctrl->priv, mp); if (unlikely(mr == NULL)) { DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.", (void *)txq_ctrl); - return (uint32_t)-1; + return NULL; } - if (unlikely(idx == RTE_DIM(txq_ctrl->txq.mp2mr))) { + if (unlikely(idx == RTE_DIM(txq->mp2mr))) { /* Table is full, remove oldest entry. */ DEBUG("%p: MR <-> MP table full, dropping oldest entry.", (void *)txq_ctrl); --idx; - claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[0].mr)); - memmove(&txq_ctrl->txq.mp2mr[0], &txq_ctrl->txq.mp2mr[1], - (sizeof(txq_ctrl->txq.mp2mr) - - sizeof(txq_ctrl->txq.mp2mr[0]))); + priv_mr_release(txq_ctrl->priv, txq->mp2mr[0]); + memmove(&txq->mp2mr[0], &txq->mp2mr[1], + (sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0]))); } /* Store the new entry. */ - txq_ctrl->txq.mp2mr[idx].start = (uintptr_t)mr->addr; - txq_ctrl->txq.mp2mr[idx].end = (uintptr_t)mr->addr + mr->length; - txq_ctrl->txq.mp2mr[idx].mr = mr; - txq_ctrl->txq.mp2mr[idx].lkey = rte_cpu_to_be_32(mr->lkey); + txq_ctrl->txq.mp2mr[idx] = mr; DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32, (void *)txq_ctrl, mp->name, (void *)mp, - txq_ctrl->txq.mp2mr[idx].lkey); - return txq_ctrl->txq.mp2mr[idx].lkey; + txq_ctrl->txq.mp2mr[idx]->lkey); + return mr; } struct txq_mp2mr_mbuf_check_data { @@ -275,15 +226,149 @@ mlx5_txq_mp2mr_iter(struct rte_mempool *mp, void *arg) return; } for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) { - struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr; - - if (unlikely(mr == NULL)) { + if (unlikely(txq_ctrl->txq.mp2mr[i] == NULL)) { /* Unknown MP, add a new MR for it. */ break; } - if (start >= (uintptr_t)mr->addr && - end <= (uintptr_t)mr->addr + mr->length) + if (start >= (uintptr_t)txq_ctrl->txq.mp2mr[i]->start && + end <= (uintptr_t)txq_ctrl->txq.mp2mr[i]->end) return; } mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mp, i); } + +/** + * Register a new memory region from the mempool and store it in the memory + * region list. + * + * @param priv + * Pointer to private structure. + * @param mp + * Pointer to the memory pool to register. + * @return + * The memory region on success. + */ +struct mlx5_mr* +priv_mr_new(struct priv *priv, struct rte_mempool *mp) +{ + const struct rte_memseg *ms = rte_eal_get_physmem_layout(); + uintptr_t start; + uintptr_t end; + unsigned int i; + struct mlx5_mr *mr; + + mr = rte_zmalloc_socket(__func__, sizeof(*mr), 0, mp->socket_id); + if (!mr) { + DEBUG("unable to configure MR, ibv_reg_mr() failed."); + return NULL; + } + if (mlx5_check_mempool(mp, &start, &end) != 0) { + ERROR("mempool %p: not virtually contiguous", + (void *)mp); + return NULL; + } + DEBUG("mempool %p area start=%p end=%p size=%zu", + (void *)mp, (void *)start, (void *)end, + (size_t)(end - start)); + /* Round start and end to page boundary if found in memory segments. */ + for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) { + uintptr_t addr = (uintptr_t)ms[i].addr; + size_t len = ms[i].len; + unsigned int align = ms[i].hugepage_sz; + + if ((start > addr) && (start < addr + len)) + start = RTE_ALIGN_FLOOR(start, align); + if ((end > addr) && (end < addr + len)) + end = RTE_ALIGN_CEIL(end, align); + } + DEBUG("mempool %p using start=%p end=%p size=%zu for MR", + (void *)mp, (void *)start, (void *)end, + (size_t)(end - start)); + mr->mr = ibv_reg_mr(priv->pd, (void *)start, end - start, + IBV_ACCESS_LOCAL_WRITE); + mr->mp = mp; + mr->lkey = rte_cpu_to_be_32(mr->mr->lkey); + mr->start = start; + mr->end = (uintptr_t)mr->mr->addr + mr->mr->length; + rte_atomic32_inc(&mr->refcnt); + DEBUG("%p: new Memory Region %p refcnt: %d", (void *)priv, + (void *)mr, rte_atomic32_read(&mr->refcnt)); + LIST_INSERT_HEAD(&priv->mr, mr, next); + return mr; +} + +/** + * Search the memory region object in the memory region list. + * + * @param priv + * Pointer to private structure. + * @param mp + * Pointer to the memory pool to register. + * @return + * The memory region on success. + */ +struct mlx5_mr* +priv_mr_get(struct priv *priv, struct rte_mempool *mp) +{ + struct mlx5_mr *mr; + + assert(mp); + if (LIST_EMPTY(&priv->mr)) + return NULL; + LIST_FOREACH(mr, &priv->mr, next) { + if (mr->mp == mp) { + rte_atomic32_inc(&mr->refcnt); + DEBUG("Memory Region %p refcnt: %d", + (void *)mr, rte_atomic32_read(&mr->refcnt)); + return mr; + } + } + return NULL; +} + +/** + * Release the memory region object. + * + * @param mr + * Pointer to memory region to release. + * + * @return + * 0 on success, errno on failure. + */ +int +priv_mr_release(struct priv *priv, struct mlx5_mr *mr) +{ + (void)priv; + assert(mr); + DEBUG("Memory Region %p refcnt: %d", + (void *)mr, rte_atomic32_read(&mr->refcnt)); + if (rte_atomic32_dec_and_test(&mr->refcnt)) { + claim_zero(ibv_dereg_mr(mr->mr)); + LIST_REMOVE(mr, next); + rte_free(mr); + return 0; + } + return EBUSY; +} + +/** + * Verify the flow list is empty + * + * @param priv + * Pointer to private structure. + * + * @return the number of object not released. + */ +int +priv_mr_verify(struct priv *priv) +{ + int ret = 0; + struct mlx5_mr *mr; + + LIST_FOREACH(mr, &priv->mr, next) { + DEBUG("%p: mr %p still referenced", (void *)priv, + (void *)mr); + ++ret; + } + return ret; +} diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index 683a4a75a5..0d645ec6f3 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -673,7 +673,7 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n) .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)), .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)), - .lkey = rte_cpu_to_be_32(rxq_ctrl->mr->lkey), + .lkey = rxq_ctrl->mr->lkey, }; (*rxq_ctrl->rxq.elts)[i] = buf; } @@ -767,7 +767,7 @@ mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl) if (rxq_ctrl->channel != NULL) claim_zero(ibv_destroy_comp_channel(rxq_ctrl->channel)); if (rxq_ctrl->mr != NULL) - claim_zero(ibv_dereg_mr(rxq_ctrl->mr)); + priv_mr_release(rxq_ctrl->priv, rxq_ctrl->mr); memset(rxq_ctrl, 0, sizeof(*rxq_ctrl)); } @@ -929,12 +929,15 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl, tmpl.rxq.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum; /* Use the entire RX mempool as the memory region. */ - tmpl.mr = mlx5_mp2mr(priv->pd, mp); + tmpl.mr = priv_mr_get(priv, mp); if (tmpl.mr == NULL) { - ret = EINVAL; - ERROR("%p: MR creation failure: %s", - (void *)dev, strerror(ret)); - goto error; + tmpl.mr = priv_mr_new(priv, mp); + if (tmpl.mr == NULL) { + ret = EINVAL; + ERROR("%p: MR creation failure: %s", + (void *)dev, strerror(ret)); + goto error; + } } if (dev->data->dev_conf.intr_conf.rxq) { tmpl.channel = ibv_create_comp_channel(priv->ctx); diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index 6ffcfb792f..89e60ea361 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -36,6 +36,7 @@ #include #include +#include /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ @@ -52,6 +53,7 @@ #include #include #include +#include #include "mlx5_utils.h" #include "mlx5.h" @@ -80,6 +82,17 @@ struct mlx5_txq_stats { struct priv; +/* Memory region queue object. */ +struct mlx5_mr { + LIST_ENTRY(mlx5_mr) next; /**< Pointer to the next element. */ + rte_atomic32_t refcnt; /*<< Reference counter. */ + uint32_t lkey; /*<< rte_cpu_to_be_32(mr->lkey) */ + uintptr_t start; /* Start address of MR */ + uintptr_t end; /* End address of MR */ + struct ibv_mr *mr; /*<< Memory Region. */ + struct rte_mempool *mp; /*<< Memory Pool. */ +}; + /* Compressed CQE context. */ struct rxq_zip { uint16_t ai; /* Array index. */ @@ -126,7 +139,7 @@ struct mlx5_rxq_ctrl { struct priv *priv; /* Back pointer to private data. */ struct ibv_cq *cq; /* Completion Queue. */ struct ibv_wq *wq; /* Work Queue. */ - struct ibv_mr *mr; /* Memory Region (for mp). */ + struct mlx5_mr *mr; /* Memory Region (for mp). */ struct ibv_comp_channel *channel; unsigned int socket; /* CPU socket ID for allocations. */ struct mlx5_rxq_data rxq; /* Data path structure. */ @@ -252,6 +265,7 @@ struct mlx5_txq_data { uint16_t mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */ uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */ uint16_t inline_max_packet_sz; /* Max packet size for inlining. */ + uint16_t mr_cache_idx; /* Index of last hit entry. */ uint32_t qp_num_8s; /* QP number shifted by 8. */ uint32_t flags; /* Flags for Tx Queue. */ volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */ @@ -259,13 +273,7 @@ struct mlx5_txq_data { volatile uint32_t *qp_db; /* Work queue doorbell. */ volatile uint32_t *cq_db; /* Completion queue doorbell. */ volatile void *bf_reg; /* Blueflame register. */ - struct { - uintptr_t start; /* Start address of MR */ - uintptr_t end; /* End address of MR */ - struct ibv_mr *mr; /* Memory Region (for mp). */ - uint32_t lkey; /* rte_cpu_to_be_32(mr->lkey) */ - } mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */ - uint16_t mr_cache_idx; /* Index of last hit entry. */ + struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */ struct rte_mbuf *(*elts)[]; /* TX elements. */ struct mlx5_txq_stats stats; /* TX queue counters. */ } __rte_cache_aligned; @@ -341,8 +349,8 @@ uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t); struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *); void mlx5_txq_mp2mr_iter(struct rte_mempool *, void *); -uint32_t mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *, - unsigned int); +struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *, + unsigned int); #ifndef NDEBUG /** @@ -564,26 +572,36 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb) { uint16_t i = txq->mr_cache_idx; uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t); + struct mlx5_mr *mr; assert(i < RTE_DIM(txq->mp2mr)); - if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr)) - return txq->mp2mr[i].lkey; + if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end >= addr)) + return txq->mp2mr[i]->lkey; for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) { - if (unlikely(txq->mp2mr[i].mr == NULL)) { + if (unlikely(txq->mp2mr[i]->mr == NULL)) { /* Unknown MP, add a new MR for it. */ break; } - if (txq->mp2mr[i].start <= addr && - txq->mp2mr[i].end >= addr) { - assert(txq->mp2mr[i].lkey != (uint32_t)-1); - assert(rte_cpu_to_be_32(txq->mp2mr[i].mr->lkey) == - txq->mp2mr[i].lkey); + if (txq->mp2mr[i]->start <= addr && + txq->mp2mr[i]->end >= addr) { + assert(txq->mp2mr[i]->lkey != (uint32_t)-1); + assert(rte_cpu_to_be_32(txq->mp2mr[i]->mr->lkey) == + txq->mp2mr[i]->lkey); txq->mr_cache_idx = i; - return txq->mp2mr[i].lkey; + return txq->mp2mr[i]->lkey; } } txq->mr_cache_idx = 0; - return mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i); + mr = mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i); + /* + * Request the reference to use in this queue, the original one is + * kept by the control plane. + */ + if (mr) { + rte_atomic32_inc(&mr->refcnt); + return mr->lkey; + } + return (uint32_t)-1; } /** diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index f551f87fa6..1899850db4 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c @@ -142,11 +142,9 @@ mlx5_txq_cleanup(struct mlx5_txq_ctrl *txq_ctrl) claim_zero(ibv_destroy_qp(txq_ctrl->qp)); if (txq_ctrl->cq != NULL) claim_zero(ibv_destroy_cq(txq_ctrl->cq)); - for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) { - if (txq_ctrl->txq.mp2mr[i].mr == NULL) - break; - claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[i].mr)); - } + for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) + if (txq_ctrl->txq.mp2mr[i]) + priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[i]); memset(txq_ctrl, 0, sizeof(*txq_ctrl)); }