net/mlx4: revert fast verbs interface for Tx
authorAdrien Mazarguil <adrien.mazarguil@6wind.com>
Fri, 1 Sep 2017 08:06:39 +0000 (10:06 +0200)
committerFerruh Yigit <ferruh.yigit@intel.com>
Fri, 6 Oct 2017 00:49:48 +0000 (02:49 +0200)
This reverts commit 9980f81dc2623291b89cf1c281a6a9f116fd2394.

"Fast Verbs" is a nonstandard experimental interface that must be reverted
for compatibility reasons. Its replacement is slower but temporary,
performance will be restored by a subsequent commit through an enhanced
data path implementation. This one focuses on maintaining basic
functionality in the meantime.

Signed-off-by: Moti Haimovsky <motih@mellanox.com>
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
drivers/net/mlx4/mlx4.c
drivers/net/mlx4/mlx4.h

index 5abfb37..4432952 100644 (file)
@@ -666,33 +666,10 @@ txq_free_elts(struct txq *txq)
 static void
 txq_cleanup(struct txq *txq)
 {
-       struct ibv_exp_release_intf_params params;
        size_t i;
 
        DEBUG("cleaning up %p", (void *)txq);
        txq_free_elts(txq);
-       if (txq->if_qp != NULL) {
-               assert(txq->priv != NULL);
-               assert(txq->priv->ctx != NULL);
-               assert(txq->qp != NULL);
-               params = (struct ibv_exp_release_intf_params){
-                       .comp_mask = 0,
-               };
-               claim_zero(ibv_exp_release_intf(txq->priv->ctx,
-                                               txq->if_qp,
-                                               &params));
-       }
-       if (txq->if_cq != NULL) {
-               assert(txq->priv != NULL);
-               assert(txq->priv->ctx != NULL);
-               assert(txq->cq != NULL);
-               params = (struct ibv_exp_release_intf_params){
-                       .comp_mask = 0,
-               };
-               claim_zero(ibv_exp_release_intf(txq->priv->ctx,
-                                               txq->if_cq,
-                                               &params));
-       }
        if (txq->qp != NULL)
                claim_zero(ibv_destroy_qp(txq->qp));
        if (txq->cq != NULL)
@@ -726,11 +703,12 @@ txq_complete(struct txq *txq)
        unsigned int elts_comp = txq->elts_comp;
        unsigned int elts_tail = txq->elts_tail;
        const unsigned int elts_n = txq->elts_n;
+       struct ibv_wc wcs[elts_comp];
        int wcs_n;
 
        if (unlikely(elts_comp == 0))
                return 0;
-       wcs_n = txq->if_cq->poll_cnt(txq->cq, elts_comp);
+       wcs_n = ibv_poll_cq(txq->cq, elts_comp, wcs);
        if (unlikely(wcs_n == 0))
                return 0;
        if (unlikely(wcs_n < 0)) {
@@ -1014,6 +992,9 @@ static uint16_t
 mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
        struct txq *txq = (struct txq *)dpdk_txq;
+       struct ibv_send_wr *wr_head = NULL;
+       struct ibv_send_wr **wr_next = &wr_head;
+       struct ibv_send_wr *wr_bad = NULL;
        unsigned int elts_head = txq->elts_head;
        const unsigned int elts_n = txq->elts_n;
        unsigned int elts_comp_cd = txq->elts_comp_cd;
@@ -1041,6 +1022,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        (((elts_head + 1) == elts_n) ? 0 : elts_head + 1);
                struct txq_elt *elt_next = &(*txq->elts)[elts_head_next];
                struct txq_elt *elt = &(*txq->elts)[elts_head];
+               struct ibv_send_wr *wr = &elt->wr;
                unsigned int segs = NB_SEGS(buf);
                unsigned int sent_size = 0;
                uint32_t send_flags = 0;
@@ -1065,9 +1047,10 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                if (unlikely(--elts_comp_cd == 0)) {
                        elts_comp_cd = txq->elts_comp_cd_init;
                        ++elts_comp;
-                       send_flags |= IBV_EXP_QP_BURST_SIGNALED;
+                       send_flags |= IBV_SEND_SIGNALED;
                }
                if (likely(segs == 1)) {
+                       struct ibv_sge *sge = &elt->sge;
                        uintptr_t addr;
                        uint32_t length;
                        uint32_t lkey;
@@ -1091,30 +1074,26 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                rte_prefetch0((volatile void *)
                                              (uintptr_t)addr);
                        RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
-                       /* Put packet into send queue. */
-                       if (length <= txq->max_inline)
-                               err = txq->if_qp->send_pending_inline
-                                       (txq->qp,
-                                        (void *)addr,
-                                        length,
-                                        send_flags);
-                       else
-                               err = txq->if_qp->send_pending
-                                       (txq->qp,
-                                        addr,
-                                        length,
-                                        lkey,
-                                        send_flags);
-                       if (unlikely(err))
-                               goto stop;
+                       sge->addr = addr;
+                       sge->length = length;
+                       sge->lkey = lkey;
                        sent_size += length;
                } else {
                        err = -1;
                        goto stop;
                }
+               if (sent_size <= txq->max_inline)
+                       send_flags |= IBV_SEND_INLINE;
                elts_head = elts_head_next;
                /* Increment sent bytes counter. */
                txq->stats.obytes += sent_size;
+               /* Set up WR. */
+               wr->sg_list = &elt->sge;
+               wr->num_sge = segs;
+               wr->opcode = IBV_WR_SEND;
+               wr->send_flags = send_flags;
+               *wr_next = wr;
+               wr_next = &wr->next;
        }
 stop:
        /* Take a shortcut if nothing must be sent. */
@@ -1123,12 +1102,37 @@ stop:
        /* Increment sent packets counter. */
        txq->stats.opackets += i;
        /* Ring QP doorbell. */
-       err = txq->if_qp->send_flush(txq->qp);
+       *wr_next = NULL;
+       assert(wr_head);
+       err = ibv_post_send(txq->qp, wr_head, &wr_bad);
        if (unlikely(err)) {
-               /* A nonzero value is not supposed to be returned.
-                * Nothing can be done about it. */
-               DEBUG("%p: send_flush() failed with error %d",
-                     (void *)txq, err);
+               uint64_t obytes = 0;
+               uint64_t opackets = 0;
+
+               /* Rewind bad WRs. */
+               while (wr_bad != NULL) {
+                       int j;
+
+                       /* Force completion request if one was lost. */
+                       if (wr_bad->send_flags & IBV_SEND_SIGNALED) {
+                               elts_comp_cd = 1;
+                               --elts_comp;
+                       }
+                       ++opackets;
+                       for (j = 0; j < wr_bad->num_sge; ++j)
+                               obytes += wr_bad->sg_list[j].length;
+                       elts_head = (elts_head ? elts_head : elts_n) - 1;
+                       wr_bad = wr_bad->next;
+               }
+               txq->stats.opackets -= opackets;
+               txq->stats.obytes -= obytes;
+               i -= opackets;
+               DEBUG("%p: ibv_post_send() failed, %" PRIu64 " packets"
+                     " (%" PRIu64 " bytes) rejected: %s",
+                     (void *)txq,
+                     opackets,
+                     obytes,
+                     (err <= -1) ? "Internal error" : strerror(err));
        }
        txq->elts_head = elts_head;
        txq->elts_comp += elts_comp;
@@ -1163,11 +1167,9 @@ txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
                .socket = socket
        };
        union {
-               struct ibv_exp_query_intf_params params;
                struct ibv_qp_init_attr init;
                struct ibv_qp_attr mod;
        } attr;
-       enum ibv_exp_query_intf_status status;
        int ret = 0;
 
        (void)conf; /* Thresholds configuration (ignored). */
@@ -1251,28 +1253,6 @@ txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,
                      (void *)dev, strerror(ret));
                goto error;
        }
-       attr.params = (struct ibv_exp_query_intf_params){
-               .intf_scope = IBV_EXP_INTF_GLOBAL,
-               .intf = IBV_EXP_INTF_CQ,
-               .obj = tmpl.cq,
-       };
-       tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
-       if (tmpl.if_cq == NULL) {
-               ERROR("%p: CQ interface family query failed with status %d",
-                     (void *)dev, status);
-               goto error;
-       }
-       attr.params = (struct ibv_exp_query_intf_params){
-               .intf_scope = IBV_EXP_INTF_GLOBAL,
-               .intf = IBV_EXP_INTF_QP_BURST,
-               .obj = tmpl.qp,
-       };
-       tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
-       if (tmpl.if_qp == NULL) {
-               ERROR("%p: QP interface family query failed with status %d",
-                     (void *)dev, status);
-               goto error;
-       }
        /* Clean up txq in case we're reinitializing it. */
        DEBUG("%p: cleaning-up old txq just in case", (void *)txq);
        txq_cleanup(txq);
index edec40c..8a9a678 100644 (file)
@@ -171,6 +171,8 @@ struct rxq {
 
 /* TX element. */
 struct txq_elt {
+       struct ibv_send_wr wr; /* Work request. */
+       struct ibv_sge sge; /* Scatter/gather element. */
        struct rte_mbuf *buf;
 };
 
@@ -191,8 +193,6 @@ struct txq {
        } mp2mr[MLX4_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
        struct ibv_cq *cq; /* Completion Queue. */
        struct ibv_qp *qp; /* Queue Pair. */
-       struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */
-       struct ibv_exp_cq_family *if_cq; /* CQ interface. */
        uint32_t max_inline; /* Max inline send size <= MLX4_PMD_MAX_INLINE. */
        unsigned int elts_n; /* (*elts)[] length. */
        struct txq_elt (*elts)[]; /* TX elements. */