X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_mr.c;h=fdbe7986fdddd5c1075362f2c210c362a5103590;hb=c2450e933f01d4d31448240f7304730292db7ee8;hp=c91d6a410a046b94de36615b6a03684ac82adf61;hpb=6e88bc42c7a8c1e0fe60fd0d36f686cc1ca08507;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c index c91d6a410a..fdbe7986fd 100644 --- a/drivers/net/mlx5/mlx5_mr.c +++ b/drivers/net/mlx5/mlx5_mr.c @@ -3,27 +3,19 @@ * Copyright 2016 Mellanox Technologies, Ltd */ -#ifdef PEDANTIC -#pragma GCC diagnostic ignored "-Wpedantic" -#endif -#include -#ifdef PEDANTIC -#pragma GCC diagnostic error "-Wpedantic" -#endif - #include #include #include #include -#include -#include #include #include #include "mlx5.h" #include "mlx5_mr.h" #include "mlx5_rxtx.h" +#include "mlx5_rx.h" +#include "mlx5_tx.h" struct mr_find_contig_memsegs_data { uintptr_t addr; @@ -38,98 +30,6 @@ struct mr_update_mp_data { int ret; }; -/** - * Callback for memory free event. Iterate freed memsegs and check whether it - * belongs to an existing MR. If found, clear the bit from bitmap of MR. As a - * result, the MR would be fragmented. If it becomes empty, the MR will be freed - * later by mlx5_mr_garbage_collect(). Even if this callback is called from a - * secondary process, the garbage collector will be called in primary process - * as the secondary process can't call mlx5_mr_create(). - * - * The global cache must be rebuilt if there's any change and this event has to - * be propagated to dataplane threads to flush the local caches. - * - * @param sh - * Pointer to the Ethernet device shared context. - * @param addr - * Address of freed memory. - * @param len - * Size of freed memory. - */ -static void -mlx5_mr_mem_event_free_cb(struct mlx5_dev_ctx_shared *sh, - const void *addr, size_t len) -{ - const struct rte_memseg_list *msl; - struct mlx5_mr *mr; - int ms_n; - int i; - int rebuild = 0; - - DEBUG("device %s free callback: addr=%p, len=%zu", - sh->ibdev_name, addr, len); - msl = rte_mem_virt2memseg_list(addr); - /* addr and len must be page-aligned. */ - MLX5_ASSERT((uintptr_t)addr == - RTE_ALIGN((uintptr_t)addr, msl->page_sz)); - MLX5_ASSERT(len == RTE_ALIGN(len, msl->page_sz)); - ms_n = len / msl->page_sz; - rte_rwlock_write_lock(&sh->share_cache.rwlock); - /* Clear bits of freed memsegs from MR. */ - for (i = 0; i < ms_n; ++i) { - const struct rte_memseg *ms; - struct mr_cache_entry entry; - uintptr_t start; - int ms_idx; - uint32_t pos; - - /* Find MR having this memseg. */ - start = (uintptr_t)addr + i * msl->page_sz; - mr = mlx5_mr_lookup_list(&sh->share_cache, &entry, start); - if (mr == NULL) - continue; - MLX5_ASSERT(mr->msl); /* Can't be external memory. */ - ms = rte_mem_virt2memseg((void *)start, msl); - MLX5_ASSERT(ms != NULL); - MLX5_ASSERT(msl->page_sz == ms->hugepage_sz); - ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms); - pos = ms_idx - mr->ms_base_idx; - MLX5_ASSERT(rte_bitmap_get(mr->ms_bmp, pos)); - MLX5_ASSERT(pos < mr->ms_bmp_n); - DEBUG("device %s MR(%p): clear bitmap[%u] for addr %p", - sh->ibdev_name, (void *)mr, pos, (void *)start); - rte_bitmap_clear(mr->ms_bmp, pos); - if (--mr->ms_n == 0) { - LIST_REMOVE(mr, mr); - LIST_INSERT_HEAD(&sh->share_cache.mr_free_list, mr, mr); - DEBUG("device %s remove MR(%p) from list", - sh->ibdev_name, (void *)mr); - } - /* - * MR is fragmented or will be freed. the global cache must be - * rebuilt. - */ - rebuild = 1; - } - if (rebuild) { - mlx5_mr_rebuild_cache(&sh->share_cache); - /* - * Flush local caches by propagating invalidation across cores. - * rte_smp_wmb() is enough to synchronize this event. If one of - * freed memsegs is seen by other core, that means the memseg - * has been allocated by allocator, which will come after this - * free call. Therefore, this store instruction (incrementing - * generation below) will be guaranteed to be seen by other core - * before the core sees the newly allocated memory. - */ - ++sh->share_cache.dev_gen; - DEBUG("broadcasting local cache flush, gen=%d", - sh->share_cache.dev_gen); - rte_smp_wmb(); - } - rte_rwlock_write_unlock(&sh->share_cache.rwlock); -} - /** * Callback for memory event. This can be called from both primary and secondary * process. @@ -155,7 +55,8 @@ mlx5_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr, rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock); /* Iterate all the existing mlx5 devices. */ LIST_FOREACH(sh, dev_list, mem_event_cb) - mlx5_mr_mem_event_free_cb(sh, addr, len); + mlx5_free_mr_by_addr(&sh->share_cache, + sh->ibdev_name, addr, len); rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock); break; case RTE_MEM_EVENT_ALLOC: @@ -164,30 +65,6 @@ mlx5_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr, } } -/** - * Bottom-half of LKey search on Rx. - * - * @param rxq - * Pointer to Rx queue structure. - * @param addr - * Search key. - * - * @return - * Searched LKey on success, UINT32_MAX on no match. - */ -uint32_t -mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr) -{ - struct mlx5_rxq_ctrl *rxq_ctrl = - container_of(rxq, struct mlx5_rxq_ctrl, rxq); - struct mlx5_mr_ctrl *mr_ctrl = &rxq->mr_ctrl; - struct mlx5_priv *priv = rxq_ctrl->priv; - - return mlx5_mr_addr2mr_bh(priv->sh->pd, &priv->mp_id, - &priv->sh->share_cache, mr_ctrl, addr, - priv->config.mr_ext_memseg_en); -} - /** * Bottom-half of LKey search on Tx. * @@ -227,9 +104,36 @@ mlx5_tx_addr2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr) uint32_t mlx5_tx_mb2mr_bh(struct mlx5_txq_data *txq, struct rte_mbuf *mb) { + struct mlx5_txq_ctrl *txq_ctrl = + container_of(txq, struct mlx5_txq_ctrl, txq); + struct mlx5_mr_ctrl *mr_ctrl = &txq->mr_ctrl; + struct mlx5_priv *priv = txq_ctrl->priv; uintptr_t addr = (uintptr_t)mb->buf_addr; uint32_t lkey; + if (priv->config.mr_mempool_reg_en) { + struct rte_mempool *mp = NULL; + struct mlx5_mprq_buf *buf; + + if (!RTE_MBUF_HAS_EXTBUF(mb)) { + mp = mlx5_mb2mp(mb); + } else if (mb->shinfo->free_cb == mlx5_mprq_buf_free_cb) { + /* Recover MPRQ mempool. */ + buf = mb->shinfo->fcb_opaque; + mp = buf->mp; + } + if (mp != NULL) { + lkey = mlx5_mr_mempool2mr_bh(&priv->sh->share_cache, + mr_ctrl, mp, addr); + /* + * Lookup can only fail on invalid input, e.g. "addr" + * is not from "mp" or "mp" has RTE_MEMPOOL_F_NON_IO set. + */ + if (lkey != UINT32_MAX) + return lkey; + } + /* Fallback for generic mechanism in corner cases. */ + } lkey = mlx5_tx_addr2mr_bh(txq, addr); if (lkey == UINT32_MAX && rte_errno == ENXIO) { /* Mempool may have externally allocated memory. */ @@ -276,7 +180,8 @@ mlx5_mr_update_ext_mp_cb(struct rte_mempool *mp, void *opaque, return; DRV_LOG(DEBUG, "port %u register MR for chunk #%d of mempool (%s)", dev->data->port_id, mem_idx, mp->name); - mr = mlx5_create_mr_ext(sh->pd, addr, len, mp->socket_id); + mr = mlx5_create_mr_ext(sh->pd, addr, len, mp->socket_id, + sh->share_cache.reg_mr_cb); if (!mr) { DRV_LOG(WARNING, "port %u unable to allocate a new MR of" @@ -296,32 +201,33 @@ mlx5_mr_update_ext_mp_cb(struct rte_mempool *mp, void *opaque, } /** - * Finds the first ethdev that match the pci device. + * Finds the first ethdev that match the device. * The existence of multiple ethdev per pci device is only with representors. * On such case, it is enough to get only one of the ports as they all share * the same ibv context. * - * @param pdev - * Pointer to the PCI device. + * @param dev + * Pointer to the device. * * @return * Pointer to the ethdev if found, NULL otherwise. */ static struct rte_eth_dev * -pci_dev_to_eth_dev(struct rte_pci_device *pdev) +dev_to_eth_dev(struct rte_device *dev) { uint16_t port_id; - RTE_ETH_FOREACH_DEV_OF(port_id, &pdev->device) - return &rte_eth_devices[port_id]; - return NULL; + port_id = rte_eth_find_next_of(0, dev); + if (port_id == RTE_MAX_ETHPORTS) + return NULL; + return &rte_eth_devices[port_id]; } /** - * DPDK callback to DMA map external memory to a PCI device. + * Callback to DMA map external memory to a device. * - * @param pdev - * Pointer to the PCI device. + * @param rte_dev + * Pointer to the generic device. * @param addr * Starting virtual address of memory to be mapped. * @param iova @@ -333,24 +239,25 @@ pci_dev_to_eth_dev(struct rte_pci_device *pdev) * 0 on success, negative value on error. */ int -mlx5_dma_map(struct rte_pci_device *pdev, void *addr, - uint64_t iova __rte_unused, size_t len) +mlx5_net_dma_map(struct rte_device *rte_dev, void *addr, + uint64_t iova __rte_unused, size_t len) { struct rte_eth_dev *dev; struct mlx5_mr *mr; struct mlx5_priv *priv; struct mlx5_dev_ctx_shared *sh; - dev = pci_dev_to_eth_dev(pdev); + dev = dev_to_eth_dev(rte_dev); if (!dev) { DRV_LOG(WARNING, "unable to find matching ethdev " - "to PCI device %p", (void *)pdev); + "to device %s", rte_dev->name); rte_errno = ENODEV; return -1; } priv = dev->data->dev_private; sh = priv->sh; - mr = mlx5_create_mr_ext(sh->pd, (uintptr_t)addr, len, SOCKET_ID_ANY); + mr = mlx5_create_mr_ext(sh->pd, (uintptr_t)addr, len, SOCKET_ID_ANY, + sh->share_cache.reg_mr_cb); if (!mr) { DRV_LOG(WARNING, "port %u unable to dma map", dev->data->port_id); @@ -366,10 +273,10 @@ mlx5_dma_map(struct rte_pci_device *pdev, void *addr, } /** - * DPDK callback to DMA unmap external memory to a PCI device. + * Callback to DMA unmap external memory to a device. * - * @param pdev - * Pointer to the PCI device. + * @param rte_dev + * Pointer to the generic device. * @param addr * Starting virtual address of memory to be unmapped. * @param iova @@ -381,8 +288,8 @@ mlx5_dma_map(struct rte_pci_device *pdev, void *addr, * 0 on success, negative value on error. */ int -mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, - uint64_t iova __rte_unused, size_t len __rte_unused) +mlx5_net_dma_unmap(struct rte_device *rte_dev, void *addr, + uint64_t iova __rte_unused, size_t len __rte_unused) { struct rte_eth_dev *dev; struct mlx5_priv *priv; @@ -390,44 +297,38 @@ mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, struct mlx5_mr *mr; struct mr_cache_entry entry; - dev = pci_dev_to_eth_dev(pdev); + dev = dev_to_eth_dev(rte_dev); if (!dev) { - DRV_LOG(WARNING, "unable to find matching ethdev " - "to PCI device %p", (void *)pdev); + DRV_LOG(WARNING, "unable to find matching ethdev to device %s", + rte_dev->name); rte_errno = ENODEV; return -1; } priv = dev->data->dev_private; sh = priv->sh; - rte_rwlock_read_lock(&sh->share_cache.rwlock); + rte_rwlock_write_lock(&sh->share_cache.rwlock); mr = mlx5_mr_lookup_list(&sh->share_cache, &entry, (uintptr_t)addr); if (!mr) { - rte_rwlock_read_unlock(&sh->share_cache.rwlock); - DRV_LOG(WARNING, "address 0x%" PRIxPTR " wasn't registered " - "to PCI device %p", (uintptr_t)addr, - (void *)pdev); + rte_rwlock_write_unlock(&sh->share_cache.rwlock); + DRV_LOG(WARNING, "address 0x%" PRIxPTR " wasn't registered to device %s", + (uintptr_t)addr, rte_dev->name); rte_errno = EINVAL; return -1; } LIST_REMOVE(mr, mr); - LIST_INSERT_HEAD(&sh->share_cache.mr_free_list, mr, mr); - DEBUG("port %u remove MR(%p) from list", dev->data->port_id, + DRV_LOG(DEBUG, "port %u remove MR(%p) from list", dev->data->port_id, (void *)mr); + mlx5_mr_free(mr, sh->share_cache.dereg_mr_cb); mlx5_mr_rebuild_cache(&sh->share_cache); /* - * Flush local caches by propagating invalidation across cores. - * rte_smp_wmb() is enough to synchronize this event. If one of - * freed memsegs is seen by other core, that means the memseg - * has been allocated by allocator, which will come after this - * free call. Therefore, this store instruction (incrementing - * generation below) will be guaranteed to be seen by other core - * before the core sees the newly allocated memory. + * No explicit wmb is needed after updating dev_gen due to + * store-release ordering in unlock that provides the + * implicit barrier at the software visible level. */ ++sh->share_cache.dev_gen; - DEBUG("broadcasting local cache flush, gen=%d", + DRV_LOG(DEBUG, "broadcasting local cache flush, gen=%d", sh->share_cache.dev_gen); - rte_smp_wmb(); - rte_rwlock_read_unlock(&sh->share_cache.rwlock); + rte_rwlock_write_unlock(&sh->share_cache.rwlock); return 0; } @@ -494,58 +395,3 @@ mlx5_tx_update_ext_mp(struct mlx5_txq_data *txq, uintptr_t addr, mlx5_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp); return mlx5_tx_addr2mr_bh(txq, addr); } - -/* Called during rte_mempool_mem_iter() by mlx5_mr_update_mp(). */ -static void -mlx5_mr_update_mp_cb(struct rte_mempool *mp __rte_unused, void *opaque, - struct rte_mempool_memhdr *memhdr, - unsigned mem_idx __rte_unused) -{ - struct mr_update_mp_data *data = opaque; - struct rte_eth_dev *dev = data->dev; - struct mlx5_priv *priv = dev->data->dev_private; - - uint32_t lkey; - - /* Stop iteration if failed in the previous walk. */ - if (data->ret < 0) - return; - /* Register address of the chunk and update local caches. */ - lkey = mlx5_mr_addr2mr_bh(priv->sh->pd, &priv->mp_id, - &priv->sh->share_cache, data->mr_ctrl, - (uintptr_t)memhdr->addr, - priv->config.mr_ext_memseg_en); - if (lkey == UINT32_MAX) - data->ret = -1; -} - -/** - * Register entire memory chunks in a Mempool. - * - * @param dev - * Pointer to Ethernet device. - * @param mr_ctrl - * Pointer to per-queue MR control structure. - * @param mp - * Pointer to registering Mempool. - * - * @return - * 0 on success, -1 on failure. - */ -int -mlx5_mr_update_mp(struct rte_eth_dev *dev, struct mlx5_mr_ctrl *mr_ctrl, - struct rte_mempool *mp) -{ - struct mr_update_mp_data data = { - .dev = dev, - .mr_ctrl = mr_ctrl, - .ret = 0, - }; - - rte_mempool_mem_iter(mp, mlx5_mr_update_mp_cb, &data); - if (data.ret < 0 && rte_errno == ENXIO) { - /* Mempool may have externally allocated memory. */ - return mlx5_mr_update_ext_mp(dev, mr_ctrl, mp); - } - return data.ret; -}