From: Yongseok Koh Date: Thu, 6 Jul 2017 18:41:08 +0000 (-0700) Subject: net/mlx5: use buffer address for LKEY search X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=b0b09384579357265db6cb01a0d1ee24b19b3163;p=dpdk.git net/mlx5: use buffer address for LKEY search When searching LKEY, if search key is mempool pointer, the 2nd cacheline has to be accessed and it even requires to check whether a buffer is indirect per every search. Instead, using address for search key can reduce cycles taken. And caching the last hit entry is beneficial as well. Signed-off-by: Yongseok Koh Acked-by: Nelio Laranjeiro --- diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c index 0a3638460b..2873351799 100644 --- a/drivers/net/mlx5/mlx5_mr.c +++ b/drivers/net/mlx5/mlx5_mr.c @@ -207,7 +207,8 @@ txq_mp2mr_reg(struct txq *txq, struct rte_mempool *mp, unsigned int idx) sizeof(txq_ctrl->txq.mp2mr[0]))); } /* Store the new entry. */ - txq_ctrl->txq.mp2mr[idx].mp = mp; + txq_ctrl->txq.mp2mr[idx].start = (uintptr_t)mr->addr; + txq_ctrl->txq.mp2mr[idx].end = (uintptr_t)mr->addr + mr->length; txq_ctrl->txq.mp2mr[idx].mr = mr; txq_ctrl->txq.mp2mr[idx].lkey = htonl(mr->lkey); DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32, @@ -265,18 +266,28 @@ txq_mp2mr_iter(struct rte_mempool *mp, void *arg) struct txq_mp2mr_mbuf_check_data data = { .ret = 0, }; + uintptr_t start; + uintptr_t end; unsigned int i; /* Register mempool only if the first element looks like a mbuf. */ if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 || data.ret == -1) return; + if (mlx5_check_mempool(mp, &start, &end) != 0) { + ERROR("mempool %p: not virtually contiguous", + (void *)mp); + return; + } for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) { - if (unlikely(txq_ctrl->txq.mp2mr[i].mp == NULL)) { + struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr; + + if (unlikely(mr == NULL)) { /* Unknown MP, add a new MR for it. */ break; } - if (txq_ctrl->txq.mp2mr[i].mp == mp) + if (start >= (uintptr_t)mr->addr && + end <= (uintptr_t)mr->addr + mr->length) return; } txq_mp2mr_reg(&txq_ctrl->txq, mp, i); diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 66593679f4..688ee9028a 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -77,7 +77,7 @@ static __rte_always_inline void txq_complete(struct txq *txq); static __rte_always_inline uint32_t -txq_mp2mr(struct txq *txq, struct rte_mempool *mp); +txq_mb2mr(struct txq *txq, struct rte_mbuf *mb); static __rte_always_inline void mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe); @@ -352,7 +352,7 @@ txq_mb2mp(struct rte_mbuf *buf) } /** - * Get Memory Region (MR) <-> Memory Pool (MP) association from txq->mp2mr[]. + * Get Memory Region (MR) <-> rte_mbuf association from txq->mp2mr[]. * Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full, * remove an entry first. * @@ -365,27 +365,30 @@ txq_mb2mp(struct rte_mbuf *buf) * mr->lkey on success, (uint32_t)-1 on failure. */ static inline uint32_t -txq_mp2mr(struct txq *txq, struct rte_mempool *mp) +txq_mb2mr(struct txq *txq, struct rte_mbuf *mb) { - unsigned int i; - uint32_t lkey = (uint32_t)-1; + uint16_t i = txq->mr_cache_idx; + uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t); + assert(i < RTE_DIM(txq->mp2mr)); + if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr)) + return txq->mp2mr[i].lkey; for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) { - if (unlikely(txq->mp2mr[i].mp == NULL)) { + if (unlikely(txq->mp2mr[i].mr == NULL)) { /* Unknown MP, add a new MR for it. */ break; } - if (txq->mp2mr[i].mp == mp) { + if (txq->mp2mr[i].start <= addr && + txq->mp2mr[i].end >= addr) { assert(txq->mp2mr[i].lkey != (uint32_t)-1); assert(htonl(txq->mp2mr[i].mr->lkey) == txq->mp2mr[i].lkey); - lkey = txq->mp2mr[i].lkey; - break; + txq->mr_cache_idx = i; + return txq->mp2mr[i].lkey; } } - if (unlikely(lkey == (uint32_t)-1)) - lkey = txq_mp2mr_reg(txq, mp, i); - return lkey; + txq->mr_cache_idx = 0; + return txq_mp2mr_reg(txq, txq_mb2mp(mb), i); } /** @@ -770,7 +773,7 @@ use_dseg: naddr = htonll(addr); *dseg = (rte_v128u32_t){ htonl(length), - txq_mp2mr(txq, txq_mb2mp(buf)), + txq_mb2mr(txq, buf), naddr, naddr >> 32, }; @@ -809,7 +812,7 @@ next_seg: naddr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)); *dseg = (rte_v128u32_t){ htonl(length), - txq_mp2mr(txq, txq_mb2mp(buf)), + txq_mb2mr(txq, buf), naddr, naddr >> 32, }; @@ -1051,7 +1054,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) addr = rte_pktmbuf_mtod(buf, uintptr_t); *dseg = (struct mlx5_wqe_data_seg){ .byte_count = htonl(DATA_LEN(buf)), - .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + .lkey = txq_mb2mr(txq, buf), .addr = htonll(addr), }; #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) @@ -1297,7 +1300,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, addr = rte_pktmbuf_mtod(buf, uintptr_t); *dseg = (struct mlx5_wqe_data_seg){ .byte_count = htonl(DATA_LEN(buf)), - .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + .lkey = txq_mb2mr(txq, buf), .addr = htonll(addr), }; #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) @@ -1604,7 +1607,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) addr = rte_pktmbuf_mtod(buf, uintptr_t); *dseg = (struct mlx5_wqe_data_seg){ .byte_count = htonl(DATA_LEN(buf)), - .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + .lkey = txq_mb2mr(txq, buf), .addr = htonll(addr), }; #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) @@ -1687,7 +1690,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) naddr = htonll(addr); *dseg = (rte_v128u32_t) { htonl(length), - txq_mp2mr(txq, txq_mb2mp(buf)), + txq_mb2mr(txq, buf), naddr, naddr >> 32, }; diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index a219950305..d0f508e903 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -267,10 +267,12 @@ struct txq { volatile uint32_t *cq_db; /* Completion queue doorbell. */ volatile void *bf_reg; /* Blueflame register. */ struct { - const struct rte_mempool *mp; /* Cached Memory Pool. */ + uintptr_t start; /* Start address of MR */ + uintptr_t end; /* End address of MR */ struct ibv_mr *mr; /* Memory Region (for mp). */ uint32_t lkey; /* htonl(mr->lkey) */ } mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */ + uint16_t mr_cache_idx; /* Index of last hit entry. */ struct rte_mbuf *(*elts)[]; /* TX elements. */ struct mlx5_txq_stats stats; /* TX queue counters. */ } __rte_cache_aligned; diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index f0729a2a85..ac9dfc5f0c 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c @@ -149,9 +149,8 @@ txq_cleanup(struct txq_ctrl *txq_ctrl) if (txq_ctrl->cq != NULL) claim_zero(ibv_destroy_cq(txq_ctrl->cq)); for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) { - if (txq_ctrl->txq.mp2mr[i].mp == NULL) + if (txq_ctrl->txq.mp2mr[i].mr == NULL) break; - assert(txq_ctrl->txq.mp2mr[i].mr != NULL); claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[i].mr)); } memset(txq_ctrl, 0, sizeof(*txq_ctrl));