From: NĂ©lio Laranjeiro Date: Tue, 20 Sep 2016 08:53:47 +0000 (+0200) Subject: net/mlx5: reduce memory overhead of Rx/Tx descriptors X-Git-Tag: spdx-start~5844 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=b4b12e55a041168a1fb12d68ebdaa5fd37f4b135;p=dpdk.git net/mlx5: reduce memory overhead of Rx/Tx descriptors PMD uses only power of two number of descriptors, storing the number of elements in log2 helps to reduce the size of the container to store it. Signed-off-by: Nelio Laranjeiro Signed-off-by: Adrien Mazarguil --- diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index 205ce9c6b1..bf4232a98f 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -808,7 +808,7 @@ recover: if (rehash) ret = rxq_rehash(dev, rxq_ctrl); else - ret = rxq_ctrl_setup(dev, rxq_ctrl, rxq->elts_n, + ret = rxq_ctrl_setup(dev, rxq_ctrl, 1 << rxq->elts_n, rxq_ctrl->socket, NULL, rxq->mp); if (!ret) continue; @@ -1314,7 +1314,7 @@ mlx5_secondary_data_setup(struct priv *priv) if (txq_ctrl != NULL) { if (txq_ctrl_setup(priv->dev, primary_txq_ctrl, - primary_txq->elts_n, + 1 << primary_txq->elts_n, primary_txq_ctrl->socket, NULL) == 0) { txq_ctrl->txq.stats.idx = diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index 8e02440f15..a784c8e68c 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -723,7 +723,7 @@ rxq_free_elts(struct rxq_ctrl *rxq_ctrl) if (rxq_ctrl->rxq.elts == NULL) return; - for (i = 0; (i != rxq_ctrl->rxq.elts_n); ++i) { + for (i = 0; (i != (1u << rxq_ctrl->rxq.elts_n)); ++i) { if ((*rxq_ctrl->rxq.elts)[i] != NULL) rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); (*rxq_ctrl->rxq.elts)[i] = NULL; @@ -807,7 +807,7 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl) int rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl) { - unsigned int elts_n = rxq_ctrl->rxq.elts_n; + unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n; unsigned int i; struct ibv_exp_wq_attr mod; int err; @@ -870,7 +870,7 @@ rxq_setup(struct rxq_ctrl *tmpl) struct ibv_cq *ibcq = tmpl->cq; struct mlx5_cq *cq = to_mxxx(cq, cq); struct mlx5_rwq *rwq = container_of(tmpl->wq, struct mlx5_rwq, wq); - struct rte_mbuf *(*elts)[tmpl->rxq.elts_n] = + struct rte_mbuf *(*elts)[1 << tmpl->rxq.elts_n] = rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket); if (cq->cqe_sz != RTE_CACHE_LINE_SIZE) { @@ -924,7 +924,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, .priv = priv, .socket = socket, .rxq = { - .elts_n = desc, + .elts_n = log2above(desc), .mp = mp, }, }; @@ -1148,7 +1148,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, } /* Reuse buffers from original queue if possible. */ if (rxq_ctrl->rxq.elts_n) { - assert(rxq_ctrl->rxq.elts_n == desc); + assert(1 << rxq_ctrl->rxq.elts_n == desc); assert(rxq_ctrl->rxq.elts != tmpl.rxq.elts); ret = rxq_alloc_elts(&tmpl, desc, rxq_ctrl->rxq.elts); } else diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 7efa76ab1b..ebc094e811 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -166,8 +166,8 @@ txq_complete(struct txq *txq) __attribute__((always_inline)); static inline void txq_complete(struct txq *txq) { - const unsigned int elts_n = txq->elts_n; const unsigned int cqe_n = txq->cqe_n; + const unsigned int elts_n = 1 << txq->elts_n; const unsigned int cqe_cnt = cqe_n - 1; uint16_t elts_free = txq->elts_tail; uint16_t elts_tail; @@ -468,7 +468,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct txq *txq = (struct txq *)dpdk_txq; uint16_t elts_head = txq->elts_head; - const unsigned int elts_n = txq->elts_n; + const unsigned int elts_n = 1 << txq->elts_n; unsigned int i = 0; unsigned int j = 0; unsigned int max; @@ -680,7 +680,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct txq *txq = (struct txq *)dpdk_txq; uint16_t elts_head = txq->elts_head; - const unsigned int elts_n = txq->elts_n; + const unsigned int elts_n = 1 << txq->elts_n; unsigned int i = 0; unsigned int j = 0; unsigned int max; @@ -884,7 +884,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, { struct txq *txq = (struct txq *)dpdk_txq; uint16_t elts_head = txq->elts_head; - const unsigned int elts_n = txq->elts_n; + const unsigned int elts_n = 1 << txq->elts_n; unsigned int i = 0; unsigned int j = 0; unsigned int max; @@ -1272,8 +1272,8 @@ uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct rxq *rxq = dpdk_rxq; - const unsigned int wqe_cnt = rxq->elts_n - 1; const unsigned int cqe_cnt = rxq->cqe_n - 1; + const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; const unsigned int sges_n = rxq->sges_n; struct rte_mbuf *pkt = NULL; struct rte_mbuf *seg = NULL; diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index b3cc8ee417..71a7d58e30 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -54,6 +54,7 @@ #endif #include #include +#include #ifdef PEDANTIC #pragma GCC diagnostic error "-Wpedantic" #endif @@ -109,16 +110,16 @@ struct rxq { unsigned int vlan_strip:1; /* Enable VLAN stripping. */ unsigned int crc_present:1; /* CRC must be subtracted. */ unsigned int sges_n:2; /* Log 2 of SGEs (max buffers per packet). */ + unsigned int elts_n:4; /* Log 2 of Mbufs. */ + unsigned int port_id:8; + volatile uint32_t *rq_db; + volatile uint32_t *cq_db; uint16_t rq_ci; uint16_t cq_ci; - uint16_t elts_n; uint16_t cqe_n; /* Number of CQ elements. */ - uint16_t port_id; volatile struct mlx5_wqe_data_seg(*wqes)[]; volatile struct mlx5_cqe(*cqes)[]; struct rxq_zip zip; /* Compressed context. */ - volatile uint32_t *rq_db; - volatile uint32_t *cq_db; struct rte_mbuf *(*elts)[]; struct rte_mempool *mp; struct mlx5_rxq_stats stats; @@ -238,15 +239,16 @@ struct hash_rxq { }; /* TX queue descriptor. */ +RTE_STD_C11 struct txq { uint16_t elts_head; /* Current index in (*elts)[]. */ uint16_t elts_tail; /* First element awaiting completion. */ uint16_t elts_comp; /* Counter since last completion request. */ - uint16_t elts_n; /* (*elts)[] length. */ uint16_t cq_ci; /* Consumer index for completion queue. */ uint16_t cqe_n; /* Number of CQ elements. */ uint16_t wqe_ci; /* Consumer index for work queue. */ uint16_t wqe_n; /* Number of WQ elements. */ + uint16_t elts_n:4; /* (*elts)[] length (in log2). */ uint16_t bf_offset; /* Blueflame offset. */ uint16_t bf_buf_size; /* Blueflame size. */ uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */ diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index 269ffe64f6..d9e61ed9a0 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c @@ -101,7 +101,7 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n) static void txq_free_elts(struct txq_ctrl *txq_ctrl) { - unsigned int elts_n = txq_ctrl->txq.elts_n; + unsigned int elts_n = 1 << txq_ctrl->txq.elts_n; unsigned int elts_head = txq_ctrl->txq.elts_head; unsigned int elts_tail = txq_ctrl->txq.elts_tail; struct rte_mbuf *(*elts)[elts_n] = txq_ctrl->txq.elts; @@ -227,7 +227,7 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl) (volatile struct mlx5_cqe (*)[]) (uintptr_t)cq->active_buf->buf; tmpl->txq.elts = - (struct rte_mbuf *(*)[tmpl->txq.elts_n]) + (struct rte_mbuf *(*)[1 << tmpl->txq.elts_n]) ((uintptr_t)txq_ctrl + sizeof(*txq_ctrl)); return 0; } @@ -277,7 +277,7 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl, } (void)conf; /* Thresholds configuration (ignored). */ assert(desc > MLX5_TX_COMP_THRESH); - tmpl.txq.elts_n = desc; + tmpl.txq.elts_n = log2above(desc); /* MRs will be registered in mp2mr[] later. */ attr.rd = (struct ibv_exp_res_domain_init_attr){ .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |