From: Xueming Li Date: Thu, 25 Jan 2018 15:00:24 +0000 (+0800) Subject: net/mlx5: map UAR address around huge pages X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=4a984153430cbdafe2b971e999bba8d96610b299;p=dpdk.git net/mlx5: map UAR address around huge pages Reserving the memory space for the UAR near huge pages helps to **reduce** the cases where the secondary process cannot start. Those pages being physical pages they must be mapped at the same virtual address as in the primary process to have a working secondary process. As this remap is almost the latest being done by the processes (libraries, heaps, stacks are already loaded), similar to huge pages, there is **no guarantee** this mechanism will always work. Signed-off-by: Xueming Li Acked-by: Nelio Laranjeiro --- diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 8a549edd7e..5a959f7c52 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -39,6 +39,7 @@ #include #include #include +#include /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ @@ -56,6 +57,7 @@ #include #include #include +#include #include #include "mlx5.h" @@ -478,6 +480,106 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) static struct rte_pci_driver mlx5_driver; +/* + * Reserved UAR address space for TXQ UAR(hw doorbell) mapping, process + * local resource used by both primary and secondary to avoid duplicate + * reservation. + * The space has to be available on both primary and secondary process, + * TXQ UAR maps to this area using fixed mmap w/o double check. + */ +static void *uar_base; + +/** + * Reserve UAR address space for primary process. + * + * @param[in] priv + * Pointer to private structure. + * + * @return + * 0 on success, errno value on failure. + */ +static int +priv_uar_init_primary(struct priv *priv) +{ + void *addr = (void *)0; + int i; + const struct rte_mem_config *mcfg; + int ret; + + if (uar_base) { /* UAR address space mapped. */ + priv->uar_base = uar_base; + return 0; + } + /* find out lower bound of hugepage segments */ + mcfg = rte_eal_get_configuration()->mem_config; + for (i = 0; i < RTE_MAX_MEMSEG && mcfg->memseg[i].addr; i++) { + if (addr) + addr = RTE_MIN(addr, mcfg->memseg[i].addr); + else + addr = mcfg->memseg[i].addr; + } + /* keep distance to hugepages to minimize potential conflicts. */ + addr = RTE_PTR_SUB(addr, MLX5_UAR_OFFSET + MLX5_UAR_SIZE); + /* anonymous mmap, no real memory consumption. */ + addr = mmap(addr, MLX5_UAR_SIZE, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + ERROR("Failed to reserve UAR address space, please adjust " + "MLX5_UAR_SIZE or try --base-virtaddr"); + ret = ENOMEM; + return ret; + } + /* Accept either same addr or a new addr returned from mmap if target + * range occupied. + */ + INFO("Reserved UAR address space: %p", addr); + priv->uar_base = addr; /* for primary and secondary UAR re-mmap. */ + uar_base = addr; /* process local, don't reserve again. */ + return 0; +} + +/** + * Reserve UAR address space for secondary process, align with + * primary process. + * + * @param[in] priv + * Pointer to private structure. + * + * @return + * 0 on success, errno value on failure. + */ +static int +priv_uar_init_secondary(struct priv *priv) +{ + void *addr; + int ret; + + assert(priv->uar_base); + if (uar_base) { /* already reserved. */ + assert(uar_base == priv->uar_base); + return 0; + } + /* anonymous mmap, no real memory consumption. */ + addr = mmap(priv->uar_base, MLX5_UAR_SIZE, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + ERROR("UAR mmap failed: %p size: %llu", + priv->uar_base, MLX5_UAR_SIZE); + ret = ENXIO; + return ret; + } + if (priv->uar_base != addr) { + ERROR("UAR address %p size %llu occupied, please adjust " + "MLX5_UAR_OFFSET or try EAL parameter --base-virtaddr", + priv->uar_base, MLX5_UAR_SIZE); + ret = ENXIO; + return ret; + } + uar_base = addr; /* process local, don't reserve again */ + INFO("Reserved UAR address space: %p", addr); + return 0; +} + /** * DPDK callback to register a PCI device. * @@ -663,6 +765,11 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) eth_dev->device = &pci_dev->device; eth_dev->dev_ops = &mlx5_dev_sec_ops; priv = eth_dev->data->dev_private; + err = priv_uar_init_secondary(priv); + if (err < 0) { + err = -err; + goto error; + } /* Receive command fd from primary process */ err = priv_socket_connect(priv); if (err < 0) { @@ -671,10 +778,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) } /* Remap UAR for Tx queues. */ err = priv_tx_uar_remap(priv, err); - if (err < 0) { - err = -err; + if (err) goto error; - } /* * Ethdev pointer is still required as input since * the primary device is not accessible from the @@ -820,6 +925,9 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) WARN("Rx CQE compression isn't supported"); config.cqe_comp = 0; } + err = priv_uar_init_primary(priv); + if (err) + goto port_error; /* Configure the first MAC address by default. */ if (priv_get_mac(priv, &mac.addr_bytes)) { ERROR("cannot get MAC address, is mlx5_en loaded?" diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index f9e18c2944..7851cac968 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -179,6 +179,7 @@ struct priv { struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */ rte_spinlock_t lock; /* Lock for control functions. */ int primary_socket; /* Unix socket for primary process. */ + void *uar_base; /* Reserved address space for UAR mapping */ struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */ struct mlx5_dev_config config; /* Device configuration. */ struct mlx5_verbs_alloc_ctx verbs_alloc_ctx; diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h index 57f295c586..a925921eb9 100644 --- a/drivers/net/mlx5/mlx5_defs.h +++ b/drivers/net/mlx5/mlx5_defs.h @@ -113,4 +113,14 @@ /* Maximum number of attempts to query link status before giving up. */ #define MLX5_MAX_LINK_QUERY_ATTEMPTS 5 +/* Reserved address space for UAR mapping. */ +#define MLX5_UAR_SIZE (1ULL << 32) + +/* Offset of reserved UAR address space to hugepage memory. Offset is used here + * to minimize possibility of address next to hugepage being used by other code + * in either primary or secondary process, failing to map TX UAR would make TX + * packets invisible to HW. + */ +#define MLX5_UAR_OFFSET (1ULL << 32) + #endif /* RTE_PMD_MLX5_DEFS_H_ */ diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index 6d1bc85e6e..30dfaf3590 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -205,7 +205,7 @@ struct mlx5_txq_data { volatile void *wqes; /* Work queue (use volatile to write into). */ volatile uint32_t *qp_db; /* Work queue doorbell. */ volatile uint32_t *cq_db; /* Completion queue doorbell. */ - volatile void *bf_reg; /* Blueflame register. */ + volatile void *bf_reg; /* Blueflame register remapped. */ struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */ struct rte_mbuf *(*elts)[]; /* TX elements. */ struct mlx5_txq_stats stats; /* TX queue counters. */ @@ -230,6 +230,7 @@ struct mlx5_txq_ctrl { struct mlx5_txq_ibv *ibv; /* Verbs queue object. */ struct mlx5_txq_data txq; /* Data path structure. */ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ + volatile void *bf_reg_orig; /* Blueflame register from verbs. */ }; /* mlx5_rxq.c */ diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c index c5429e182e..c397b9b267 100644 --- a/drivers/net/mlx5/mlx5_trigger.c +++ b/drivers/net/mlx5/mlx5_trigger.c @@ -76,10 +76,13 @@ priv_txq_start(struct priv *priv) goto error; } } - return -ret; + ret = priv_tx_uar_remap(priv, priv->ctx->cmd_fd); + if (ret) + goto error; + return ret; error: priv_txq_stop(priv); - return -ret; + return ret; } static void diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index 18321c3e3a..65086d36fd 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c @@ -288,7 +288,9 @@ mlx5_tx_queue_release(void *dpdk_txq) /** - * Map locally UAR used in Tx queues for BlueFlame doorbell. + * Mmap TX UAR(HW doorbell) pages into reserved UAR address space. + * Both primary and secondary process do mmap to make UAR address + * aligned. * * @param[in] priv * Pointer to private structure. @@ -305,11 +307,14 @@ priv_tx_uar_remap(struct priv *priv, int fd) uintptr_t pages[priv->txqs_n]; unsigned int pages_n = 0; uintptr_t uar_va; + uintptr_t off; void *addr; + void *ret; struct mlx5_txq_data *txq; struct mlx5_txq_ctrl *txq_ctrl; int already_mapped; size_t page_size = sysconf(_SC_PAGESIZE); + int r; memset(pages, 0, priv->txqs_n * sizeof(uintptr_t)); /* @@ -320,8 +325,10 @@ priv_tx_uar_remap(struct priv *priv, int fd) for (i = 0; i != priv->txqs_n; ++i) { txq = (*priv->txqs)[i]; txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); - uar_va = (uintptr_t)txq_ctrl->txq.bf_reg; - uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); + /* UAR addr form verbs used to find dup and offset in page. */ + uar_va = (uintptr_t)txq_ctrl->bf_reg_orig; + off = uar_va & (page_size - 1); /* offset in page. */ + uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */ already_mapped = 0; for (j = 0; j != pages_n; ++j) { if (pages[j] == uar_va) { @@ -329,16 +336,30 @@ priv_tx_uar_remap(struct priv *priv, int fd) break; } } - if (already_mapped) - continue; - pages[pages_n++] = uar_va; - addr = mmap((void *)uar_va, page_size, - PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, - txq_ctrl->uar_mmap_offset); - if (addr != (void *)uar_va) { - ERROR("call to mmap failed on UAR for txq %d\n", i); - return -1; + /* new address in reserved UAR address space. */ + addr = RTE_PTR_ADD(priv->uar_base, + uar_va & (MLX5_UAR_SIZE - 1)); + if (!already_mapped) { + pages[pages_n++] = uar_va; + /* fixed mmap to specified address in reserved + * address space. + */ + ret = mmap(addr, page_size, + PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, + txq_ctrl->uar_mmap_offset); + if (ret != addr) { + /* fixed mmap have to return same address */ + ERROR("call to mmap failed on UAR for txq %d\n", + i); + r = ENXIO; + return r; + } } + if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */ + txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off); + else + assert(txq_ctrl->txq.bf_reg == + RTE_PTR_ADD((void *)addr, off)); } return 0; } @@ -505,7 +526,7 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) txq_data->wqes = qp.sq.buf; txq_data->wqe_n = log2above(qp.sq.wqe_cnt); txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR]; - txq_data->bf_reg = qp.bf.reg; + txq_ctrl->bf_reg_orig = qp.bf.reg; txq_data->cq_db = cq_info.dbrec; txq_data->cqes = (volatile struct mlx5_cqe (*)[]) @@ -840,6 +861,7 @@ mlx5_priv_txq_release(struct priv *priv, uint16_t idx) { unsigned int i; struct mlx5_txq_ctrl *txq; + size_t page_size = sysconf(_SC_PAGESIZE); if (!(*priv->txqs)[idx]) return 0; @@ -859,6 +881,9 @@ mlx5_priv_txq_release(struct priv *priv, uint16_t idx) txq->txq.mp2mr[i] = NULL; } } + if (priv->uar_base) + munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, + page_size), page_size); if (rte_atomic32_dec_and_test(&txq->refcnt)) { txq_free_elts(txq); LIST_REMOVE(txq, next);