From: Adrien Mazarguil Date: Fri, 30 Oct 2015 18:55:05 +0000 (+0100) Subject: mlx5: get rid of the WR structure in Rx queue elements X-Git-Tag: spdx-start~8254 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=aa7f63ab35ccac3ef696eb289a98fa851eca21fa;p=dpdk.git mlx5: get rid of the WR structure in Rx queue elements Removing this structure reduces the size of SG and non-SG RX queue elements significantly to improve performance. An nice side effect is that the mbuf pointer is now fully stored in struct rxq_elt instead of relying on the WR ID data offset hack. Signed-off-by: Adrien Mazarguil Signed-off-by: Olga Shern Signed-off-by: Or Ami Signed-off-by: Nelio Laranjeiro --- diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 3a1e7a631d..c8a517cb36 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -115,24 +115,6 @@ struct priv { rte_spinlock_t lock; /* Lock for control functions. */ }; -/* Work Request ID data type (64 bit). */ -typedef union { - struct { - uint32_t id; - uint16_t offset; - } data; - uint64_t raw; -} wr_id_t; - -/* Compile-time check. */ -static inline void wr_id_t_check(void) -{ - wr_id_t check[1 + (2 * -!(sizeof(wr_id_t) == sizeof(uint64_t)))]; - - (void)check; - (void)wr_id_t_check; -} - /** * Lock private structure to protect it from concurrent access in the * control path. diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index 5a55886886..f2f773e744 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -60,6 +60,7 @@ #endif #include "mlx5.h" +#include "mlx5_autoconf.h" #include "mlx5_rxtx.h" #include "mlx5_utils.h" #include "mlx5_defs.h" @@ -97,16 +98,10 @@ rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n, for (i = 0; (i != elts_n); ++i) { unsigned int j; struct rxq_elt_sp *elt = &(*elts)[i]; - struct ibv_recv_wr *wr = &elt->wr; struct ibv_sge (*sges)[RTE_DIM(elt->sges)] = &elt->sges; /* These two arrays must have the same size. */ assert(RTE_DIM(elt->sges) == RTE_DIM(elt->bufs)); - /* Configure WR. */ - wr->wr_id = i; - wr->next = &(*elts)[(i + 1)].wr; - wr->sg_list = &(*sges)[0]; - wr->num_sge = RTE_DIM(*sges); /* For each SGE (segment). */ for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) { struct ibv_sge *sge = &(*sges)[j]; @@ -149,8 +144,6 @@ rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n, assert(sge->length == rte_pktmbuf_tailroom(buf)); } } - /* The last WR pointer must be NULL. */ - (*elts)[(i - 1)].wr.next = NULL; DEBUG("%p: allocated and configured %u WRs (%zu segments)", (void *)rxq, elts_n, (elts_n * RTE_DIM((*elts)[0].sges))); rxq->elts_n = elts_n; @@ -242,7 +235,6 @@ rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool) /* For each WR (packet). */ for (i = 0; (i != elts_n); ++i) { struct rxq_elt *elt = &(*elts)[i]; - struct ibv_recv_wr *wr = &elt->wr; struct ibv_sge *sge = &(*elts)[i].sge; struct rte_mbuf *buf; @@ -258,16 +250,7 @@ rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool) ret = ENOMEM; goto error; } - /* Configure WR. Work request ID contains its own index in - * the elts array and the offset between SGE buffer header and - * its data. */ - WR_ID(wr->wr_id).id = i; - WR_ID(wr->wr_id).offset = - (((uintptr_t)buf->buf_addr + RTE_PKTMBUF_HEADROOM) - - (uintptr_t)buf); - wr->next = &(*elts)[(i + 1)].wr; - wr->sg_list = sge; - wr->num_sge = 1; + elt->buf = buf; /* Headroom is reserved by rte_pktmbuf_alloc(). */ assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM); /* Buffer is supposed to be empty. */ @@ -282,21 +265,7 @@ rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool) sge->lkey = rxq->mr->lkey; /* Redundant check for tailroom. */ assert(sge->length == rte_pktmbuf_tailroom(buf)); - /* Make sure elts index and SGE mbuf pointer can be deduced - * from WR ID. */ - if ((WR_ID(wr->wr_id).id != i) || - ((void *)((uintptr_t)sge->addr - - WR_ID(wr->wr_id).offset) != buf)) { - ERROR("%p: cannot store index and offset in WR ID", - (void *)rxq); - sge->addr = 0; - rte_pktmbuf_free(buf); - ret = EOVERFLOW; - goto error; - } } - /* The last WR pointer must be NULL. */ - (*elts)[(i - 1)].wr.next = NULL; DEBUG("%p: allocated and configured %u single-segment WRs", (void *)rxq, elts_n); rxq->elts_n = elts_n; @@ -309,14 +278,10 @@ error: assert(pool == NULL); for (i = 0; (i != RTE_DIM(*elts)); ++i) { struct rxq_elt *elt = &(*elts)[i]; - struct rte_mbuf *buf; + struct rte_mbuf *buf = elt->buf; - if (elt->sge.addr == 0) - continue; - assert(WR_ID(elt->wr.wr_id).id == i); - buf = (void *)((uintptr_t)elt->sge.addr - - WR_ID(elt->wr.wr_id).offset); - rte_pktmbuf_free_seg(buf); + if (buf != NULL) + rte_pktmbuf_free_seg(buf); } rte_free(elts); } @@ -345,14 +310,10 @@ rxq_free_elts(struct rxq *rxq) return; for (i = 0; (i != RTE_DIM(*elts)); ++i) { struct rxq_elt *elt = &(*elts)[i]; - struct rte_mbuf *buf; + struct rte_mbuf *buf = elt->buf; - if (elt->sge.addr == 0) - continue; - assert(WR_ID(elt->wr.wr_id).id == i); - buf = (void *)((uintptr_t)elt->sge.addr - - WR_ID(elt->wr.wr_id).offset); - rte_pktmbuf_free_seg(buf); + if (buf != NULL) + rte_pktmbuf_free_seg(buf); } rte_free(elts); } @@ -552,7 +513,6 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq) struct rte_mbuf **pool; unsigned int i, k; struct ibv_exp_qp_attr mod; - struct ibv_recv_wr *bad_wr; int err; int parent = (rxq == &priv->rxq_parent); @@ -670,11 +630,8 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq) for (i = 0; (i != RTE_DIM(*elts)); ++i) { struct rxq_elt *elt = &(*elts)[i]; - struct rte_mbuf *buf = (void *) - ((uintptr_t)elt->sge.addr - - WR_ID(elt->wr.wr_id).offset); + struct rte_mbuf *buf = elt->buf; - assert(WR_ID(elt->wr.wr_id).id == i); pool[k++] = buf; } } @@ -698,17 +655,41 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq) rxq->elts_n = 0; rte_free(rxq->elts.sp); rxq->elts.sp = NULL; - /* Post WRs. */ - err = ibv_post_recv(tmpl.qp, - (tmpl.sp ? - &(*tmpl.elts.sp)[0].wr : - &(*tmpl.elts.no_sp)[0].wr), - &bad_wr); + /* Post SGEs. */ + assert(tmpl.if_qp != NULL); + if (tmpl.sp) { + struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp; + + for (i = 0; (i != RTE_DIM(*elts)); ++i) { +#ifdef HAVE_EXP_QP_BURST_RECV_SG_LIST + err = tmpl.if_qp->recv_sg_list + (tmpl.qp, + (*elts)[i].sges, + RTE_DIM((*elts)[i].sges)); +#else /* HAVE_EXP_QP_BURST_RECV_SG_LIST */ + errno = ENOSYS; + err = -1; +#endif /* HAVE_EXP_QP_BURST_RECV_SG_LIST */ + if (err) + break; + } + } else { + struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp; + + for (i = 0; (i != RTE_DIM(*elts)); ++i) { + err = tmpl.if_qp->recv_burst( + tmpl.qp, + &(*elts)[i].sge, + 1); + if (err) + break; + } + } if (err) { - ERROR("%p: ibv_post_recv() failed for WR %p: %s", - (void *)dev, - (void *)bad_wr, - strerror(err)); + ERROR("%p: failed to post SGEs with error %d", + (void *)dev, err); + /* Set err because it does not contain a valid errno value. */ + err = EIO; goto skip_rtr; } mod = (struct ibv_exp_qp_attr){ @@ -761,10 +742,10 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc, struct ibv_exp_res_domain_init_attr rd; } attr; enum ibv_exp_query_intf_status status; - struct ibv_recv_wr *bad_wr; struct rte_mbuf *buf; int ret = 0; int parent = (rxq == &priv->rxq_parent); + unsigned int i; (void)conf; /* Thresholds configuration (ignored). */ /* @@ -900,28 +881,7 @@ skip_mr: (void *)dev, strerror(ret)); goto error; } - ret = ibv_post_recv(tmpl.qp, - (tmpl.sp ? - &(*tmpl.elts.sp)[0].wr : - &(*tmpl.elts.no_sp)[0].wr), - &bad_wr); - if (ret) { - ERROR("%p: ibv_post_recv() failed for WR %p: %s", - (void *)dev, - (void *)bad_wr, - strerror(ret)); - goto error; - } skip_alloc: - mod = (struct ibv_exp_qp_attr){ - .qp_state = IBV_QPS_RTR - }; - ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE); - if (ret) { - ERROR("%p: QP state to IBV_QPS_RTR failed: %s", - (void *)dev, strerror(ret)); - goto error; - } /* Save port ID. */ tmpl.port_id = dev->data->port_id; DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id); @@ -947,6 +907,51 @@ skip_alloc: (void *)dev, status); goto error; } + /* Post SGEs. */ + if (!parent && tmpl.sp) { + struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp; + + for (i = 0; (i != RTE_DIM(*elts)); ++i) { +#ifdef HAVE_EXP_QP_BURST_RECV_SG_LIST + ret = tmpl.if_qp->recv_sg_list + (tmpl.qp, + (*elts)[i].sges, + RTE_DIM((*elts)[i].sges)); +#else /* HAVE_EXP_QP_BURST_RECV_SG_LIST */ + errno = ENOSYS; + ret = -1; +#endif /* HAVE_EXP_QP_BURST_RECV_SG_LIST */ + if (ret) + break; + } + } else if (!parent) { + struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp; + + for (i = 0; (i != RTE_DIM(*elts)); ++i) { + ret = tmpl.if_qp->recv_burst( + tmpl.qp, + &(*elts)[i].sge, + 1); + if (ret) + break; + } + } + if (ret) { + ERROR("%p: failed to post SGEs with error %d", + (void *)dev, ret); + /* Set ret because it does not contain a valid errno value. */ + ret = EIO; + goto error; + } + mod = (struct ibv_exp_qp_attr){ + .qp_state = IBV_QPS_RTR + }; + ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE); + if (ret) { + ERROR("%p: QP state to IBV_QPS_RTR failed: %s", + (void *)dev, strerror(ret)); + goto error; + } /* Clean up rxq in case we're reinitializing it. */ DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq); rxq_cleanup(rxq); diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 8872f193bc..f48fec1b79 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -612,8 +612,6 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) return 0; for (i = 0; (i != pkts_n); ++i) { struct rxq_elt_sp *elt = &(*elts)[elts_head]; - struct ibv_recv_wr *wr = &elt->wr; - uint64_t wr_id = wr->wr_id; unsigned int len; unsigned int pkt_buf_len; struct rte_mbuf *pkt_buf = NULL; /* Buffer returned in pkts. */ @@ -623,12 +621,6 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) uint32_t flags; /* Sanity checks. */ -#ifdef NDEBUG - (void)wr_id; -#endif - assert(wr_id < rxq->elts_n); - assert(wr->sg_list == elt->sges); - assert(wr->num_sge == RTE_DIM(elt->sges)); assert(elts_head < rxq->elts_n); assert(rxq->elts_head < rxq->elts_n); ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL, @@ -677,6 +669,7 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) struct rte_mbuf *rep; unsigned int seg_tailroom; + assert(seg != NULL); /* * Fetch initial bytes of packet descriptor into a * cacheline while allocating rep. @@ -688,9 +681,8 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) * Unable to allocate a replacement mbuf, * repost WR. */ - DEBUG("rxq=%p, wr_id=%" PRIu64 ":" - " can't allocate a new mbuf", - (void *)rxq, wr_id); + DEBUG("rxq=%p: can't allocate a new mbuf", + (void *)rxq); if (pkt_buf != NULL) { *pkt_buf_next = NULL; rte_pktmbuf_free(pkt_buf); @@ -825,18 +817,13 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) return mlx5_rx_burst_sp(dpdk_rxq, pkts, pkts_n); for (i = 0; (i != pkts_n); ++i) { struct rxq_elt *elt = &(*elts)[elts_head]; - struct ibv_recv_wr *wr = &elt->wr; - uint64_t wr_id = wr->wr_id; unsigned int len; - struct rte_mbuf *seg = (void *)((uintptr_t)elt->sge.addr - - WR_ID(wr_id).offset); + struct rte_mbuf *seg = elt->buf; struct rte_mbuf *rep; uint32_t flags; /* Sanity checks. */ - assert(WR_ID(wr_id).id < rxq->elts_n); - assert(wr->sg_list == &elt->sge); - assert(wr->num_sge == 1); + assert(seg != NULL); assert(elts_head < rxq->elts_n); assert(rxq->elts_head < rxq->elts_n); /* @@ -888,9 +875,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) * Unable to allocate a replacement mbuf, * repost WR. */ - DEBUG("rxq=%p, wr_id=%" PRIu32 ":" - " can't allocate a new mbuf", - (void *)rxq, WR_ID(wr_id).id); + DEBUG("rxq=%p: can't allocate a new mbuf", + (void *)rxq); /* Increment out of memory counters. */ ++rxq->stats.rx_nombuf; ++rxq->priv->dev->data->rx_mbuf_alloc_failed; @@ -900,10 +886,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Reconfigure sge to use rep instead of seg. */ elt->sge.addr = (uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM; assert(elt->sge.lkey == rxq->mr->lkey); - WR_ID(wr->wr_id).offset = - (((uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM) - - (uintptr_t)rep); - assert(WR_ID(wr->wr_id).id == WR_ID(wr_id).id); + elt->buf = rep; /* Add SGE to array for repost. */ sges[i] = elt->sge; diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index d86d6239b7..90c99dc24a 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -81,16 +81,14 @@ struct mlx5_txq_stats { /* RX element (scattered packets). */ struct rxq_elt_sp { - struct ibv_recv_wr wr; /* Work Request. */ struct ibv_sge sges[MLX5_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */ struct rte_mbuf *bufs[MLX5_PMD_SGE_WR_N]; /* SGEs buffers. */ }; /* RX element. */ struct rxq_elt { - struct ibv_recv_wr wr; /* Work Request. */ struct ibv_sge sge; /* Scatter/Gather Element. */ - /* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */ + struct rte_mbuf *buf; /* SGE buffer. */ }; struct priv; diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h index 8ff075b350..f1fad188bf 100644 --- a/drivers/net/mlx5/mlx5_utils.h +++ b/drivers/net/mlx5/mlx5_utils.h @@ -161,6 +161,4 @@ pmd_drv_log_basename(const char *s) \ snprintf(name, sizeof(name), __VA_ARGS__) -#define WR_ID(o) (((wr_id_t *)&(o))->data) - #endif /* RTE_PMD_MLX5_UTILS_H_ */