From: Adrien Mazarguil Date: Tue, 30 Jun 2015 09:28:01 +0000 (+0200) Subject: mlx4: use MOFED 3.0 fast verbs interface for Tx operations X-Git-Tag: spdx-start~8892 X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=9980f81dc2623291b89cf1c281a6a9f116fd2394;p=dpdk.git mlx4: use MOFED 3.0 fast verbs interface for Tx operations The "raw" post send interface was experimental and has been deprecated. This commit replaces it with a new low level interface that dissociates post and flush (doorbell) operations for improved QP performance. The CQ polling function is updated as well. Signed-off-by: Alex Rosenbaum Signed-off-by: Adrien Mazarguil --- diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile index ce1f2b0121..fd74dc8ec9 100644 --- a/drivers/net/mlx4/Makefile +++ b/drivers/net/mlx4/Makefile @@ -108,10 +108,6 @@ mlx4_autoconf.h: $(RTE_SDK)/scripts/auto-config-h.sh INLINE_RECV \ infiniband/verbs.h \ enum IBV_EXP_DEVICE_ATTR_INLINE_RECV_SZ $(AUTOCONF_OUTPUT) - $Q sh -- '$<' '$@' \ - SEND_RAW_WR_SUPPORT \ - infiniband/verbs.h \ - type 'struct ibv_send_wr_raw' $(AUTOCONF_OUTPUT) $Q sh -- '$<' '$@' \ HAVE_EXP_QUERY_DEVICE \ infiniband/verbs.h \ diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index f76f415c12..3dff64d4f6 100644 --- a/drivers/net/mlx4/mlx4.c +++ b/drivers/net/mlx4/mlx4.c @@ -139,15 +139,6 @@ static inline void wr_id_t_check(void) (void)wr_id_t_check; } -/* If raw send operations are available, use them since they are faster. */ -#ifdef SEND_RAW_WR_SUPPORT -typedef struct ibv_send_wr_raw mlx4_send_wr_t; -#define mlx4_post_send ibv_post_send_raw -#else -typedef struct ibv_send_wr mlx4_send_wr_t; -#define mlx4_post_send ibv_post_send -#endif - struct mlx4_rxq_stats { unsigned int idx; /**< Mapping index. */ #ifdef MLX4_PMD_SOFT_COUNTERS @@ -212,7 +203,7 @@ struct rxq { /* TX element. */ struct txq_elt { - mlx4_send_wr_t wr; /* Work Request. */ + struct ibv_send_wr wr; /* Work Request. */ struct ibv_sge sges[MLX4_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */ /* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */ }; @@ -235,6 +226,8 @@ struct txq { } mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */ struct ibv_cq *cq; /* Completion Queue. */ struct ibv_qp *qp; /* Queue Pair. */ + struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */ + struct ibv_exp_cq_family *if_cq; /* CQ interface. */ #if MLX4_PMD_MAX_INLINE > 0 uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */ #endif @@ -797,7 +790,7 @@ txq_alloc_elts(struct txq *txq, unsigned int elts_n) } for (i = 0; (i != elts_n); ++i) { struct txq_elt *elt = &(*elts)[i]; - mlx4_send_wr_t *wr = &elt->wr; + struct ibv_send_wr *wr = &elt->wr; /* Configure WR. */ WR_ID(wr->wr_id).id = i; @@ -883,10 +876,33 @@ txq_free_elts(struct txq *txq) static void txq_cleanup(struct txq *txq) { + struct ibv_exp_release_intf_params params; size_t i; DEBUG("cleaning up %p", (void *)txq); txq_free_elts(txq); + if (txq->if_qp != NULL) { + assert(txq->priv != NULL); + assert(txq->priv->ctx != NULL); + assert(txq->qp != NULL); + params = (struct ibv_exp_release_intf_params){ + .comp_mask = 0, + }; + claim_zero(ibv_exp_release_intf(txq->priv->ctx, + txq->if_qp, + ¶ms)); + } + if (txq->if_cq != NULL) { + assert(txq->priv != NULL); + assert(txq->priv->ctx != NULL); + assert(txq->cq != NULL); + params = (struct ibv_exp_release_intf_params){ + .comp_mask = 0, + }; + claim_zero(ibv_exp_release_intf(txq->priv->ctx, + txq->if_cq, + ¶ms)); + } if (txq->qp != NULL) claim_zero(ibv_destroy_qp(txq->qp)); if (txq->cq != NULL) @@ -920,7 +936,6 @@ txq_complete(struct txq *txq) unsigned int elts_comp = txq->elts_comp; unsigned int elts_tail = txq->elts_tail; const unsigned int elts_n = txq->elts_n; - struct ibv_wc wcs[elts_comp]; int wcs_n; if (unlikely(elts_comp == 0)) @@ -929,7 +944,7 @@ txq_complete(struct txq *txq) DEBUG("%p: processing %u work requests completions", (void *)txq, elts_comp); #endif - wcs_n = ibv_poll_cq(txq->cq, elts_comp, wcs); + wcs_n = txq->if_cq->poll_cnt(txq->cq, elts_comp); if (unlikely(wcs_n == 0)) return 0; if (unlikely(wcs_n < 0)) { @@ -1059,9 +1074,8 @@ static uint16_t mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct txq *txq = (struct txq *)dpdk_txq; - mlx4_send_wr_t head; - mlx4_send_wr_t **wr_next = &head.next; - mlx4_send_wr_t *bad_wr; + struct ibv_send_wr head; + struct ibv_send_wr **wr_next = &head.next; unsigned int elts_head = txq->elts_head; const unsigned int elts_tail = txq->elts_tail; const unsigned int elts_n = txq->elts_n; @@ -1087,13 +1101,14 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) for (i = 0; (i != max); ++i) { struct rte_mbuf *buf = pkts[i]; struct txq_elt *elt = &(*txq->elts)[elts_head]; - mlx4_send_wr_t *wr = &elt->wr; + struct ibv_send_wr *wr = &elt->wr; unsigned int segs = NB_SEGS(buf); -#if (MLX4_PMD_MAX_INLINE > 0) || defined(MLX4_PMD_SOFT_COUNTERS) +#ifdef MLX4_PMD_SOFT_COUNTERS unsigned int sent_size = 0; #endif unsigned int j; int linearize = 0; + uint32_t send_flags = 0; /* Clean up old buffer. */ if (likely(WR_ID(wr->wr_id).offset != 0)) { @@ -1179,7 +1194,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) (uintptr_t)sge->addr); sge->length = DATA_LEN(buf); sge->lkey = lkey; -#if (MLX4_PMD_MAX_INLINE > 0) || defined(MLX4_PMD_SOFT_COUNTERS) +#ifdef MLX4_PMD_SOFT_COUNTERS sent_size += sge->length; #endif buf = NEXT(buf); @@ -1236,24 +1251,19 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) sge->addr = (uintptr_t)&(*linear)[0]; sge->length = size; sge->lkey = txq->mr_linear->lkey; -#if (MLX4_PMD_MAX_INLINE > 0) || defined(MLX4_PMD_SOFT_COUNTERS) +#ifdef MLX4_PMD_SOFT_COUNTERS sent_size += size; #endif } /* Link WRs together for ibv_post_send(). */ *wr_next = wr; wr_next = &wr->next; -#if MLX4_PMD_MAX_INLINE > 0 - if (sent_size <= txq->max_inline) - wr->send_flags = IBV_SEND_INLINE; - else -#endif - wr->send_flags = 0; + assert(wr->send_flags == 0); /* Request TX completion. */ if (unlikely(--elts_comp_cd == 0)) { elts_comp_cd = txq->elts_comp_cd_init; ++elts_comp; - wr->send_flags |= IBV_SEND_SIGNALED; + send_flags |= IBV_EXP_QP_BURST_SIGNALED; } if (++elts_head >= elts_n) elts_head = 0; @@ -1261,6 +1271,24 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Increment sent bytes counter. */ txq->stats.obytes += sent_size; #endif + /* Put SG list into send queue and ask for completion event. */ +#if MLX4_PMD_MAX_INLINE > 0 + if ((segs == 1) && + (elt->sges[0].length <= txq->max_inline)) + err = txq->if_qp->send_pending_inline + (txq->qp, + (void *)(uintptr_t)elt->sges[0].addr, + elt->sges[0].length, + send_flags); + else +#endif + err = txq->if_qp->send_pending_sg_list + (txq->qp, + elt->sges, + segs, + send_flags); + if (unlikely(err)) + goto stop; } stop: /* Take a shortcut if nothing must be sent. */ @@ -1271,62 +1299,13 @@ stop: txq->stats.opackets += i; #endif *wr_next = NULL; - err = mlx4_post_send(txq->qp, head.next, &bad_wr); + /* Ring QP doorbell. */ + err = txq->if_qp->send_flush(txq->qp); if (unlikely(err)) { - unsigned int unsent = 0; - - /* An error occurred, fix counters. */ - while (bad_wr != NULL) { - struct txq_elt *elt = - containerof(bad_wr, struct txq_elt, wr); - mlx4_send_wr_t *wr = &elt->wr; - mlx4_send_wr_t *next = wr->next; -#if defined(MLX4_PMD_SOFT_COUNTERS) || !defined(NDEBUG) - unsigned int j; -#endif - - assert(wr == bad_wr); - /* Clean up TX element without freeing it, caller - * should take care of this. */ - WR_ID(elt->wr.wr_id).offset = 0; -#ifdef MLX4_PMD_SOFT_COUNTERS - for (j = 0; ((int)j < wr->num_sge); ++j) - txq->stats.obytes -= wr->sg_list[j].length; -#endif - ++unsent; - if (wr->send_flags & IBV_SEND_SIGNALED) { - assert(elts_comp != 0); - --elts_comp; - } - if (elts_comp_cd == txq->elts_comp_cd_init) - elts_comp_cd = 1; - else - ++elts_comp_cd; -#ifndef NDEBUG - /* For assert(). */ - for (j = 0; ((int)j < wr->num_sge); ++j) { - elt->sges[j].addr = 0; - elt->sges[j].length = 0; - elt->sges[j].lkey = 0; - } - wr->next = NULL; - wr->num_sge = 0; -#endif - bad_wr = next; - } -#ifdef MLX4_PMD_SOFT_COUNTERS - txq->stats.opackets -= unsent; -#endif - assert(i >= unsent); - i -= unsent; - /* "Unsend" remaining packets. */ - elts_head -= unsent; - if (elts_head >= elts_n) - elts_head += elts_n; - assert(elts_head < elts_n); - DEBUG("%p: mlx4_post_send() failed, %u unprocessed WRs: %s", - (void *)txq, unsent, - ((err <= -1) ? "Internal error" : strerror(err))); + /* A nonzero value is not supposed to be returned. + * Nothing can be done about it. */ + DEBUG("%p: send_flush() failed with error %d", + (void *)txq, err); } txq->elts_head = elts_head; txq->elts_comp += elts_comp; @@ -1361,9 +1340,11 @@ txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc, .socket = socket }; union { + struct ibv_exp_query_intf_params params; struct ibv_qp_init_attr init; struct ibv_exp_qp_attr mod; } attr; + enum ibv_exp_query_intf_status status; int ret = 0; (void)conf; /* Thresholds configuration (ignored). */ @@ -1455,6 +1436,28 @@ txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc, (void *)dev, strerror(ret)); goto error; } + attr.params = (struct ibv_exp_query_intf_params){ + .intf_scope = IBV_EXP_INTF_GLOBAL, + .intf = IBV_EXP_INTF_CQ, + .obj = tmpl.cq, + }; + tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status); + if (tmpl.if_cq == NULL) { + ERROR("%p: CQ interface family query failed with status %d", + (void *)dev, status); + goto error; + } + attr.params = (struct ibv_exp_query_intf_params){ + .intf_scope = IBV_EXP_INTF_GLOBAL, + .intf = IBV_EXP_INTF_QP_BURST, + .obj = tmpl.qp, + }; + tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status); + if (tmpl.if_qp == NULL) { + ERROR("%p: QP interface family query failed with status %d", + (void *)dev, status); + goto error; + } /* Clean up txq in case we're reinitializing it. */ DEBUG("%p: cleaning-up old txq just in case", (void *)txq); txq_cleanup(txq);