mlx5: get rid of the WR structure in Rx queue elements
authorAdrien Mazarguil <adrien.mazarguil@6wind.com>
Fri, 30 Oct 2015 18:55:05 +0000 (19:55 +0100)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Fri, 30 Oct 2015 23:21:58 +0000 (00:21 +0100)
Removing this structure reduces the size of SG and non-SG RX queue elements
significantly to improve performance.

An nice side effect is that the mbuf pointer is now fully stored in
struct rxq_elt instead of relying on the WR ID data offset hack.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Signed-off-by: Olga Shern <olgas@mellanox.com>
Signed-off-by: Or Ami <ora@mellanox.com>
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
drivers/net/mlx5/mlx5.h
drivers/net/mlx5/mlx5_rxq.c
drivers/net/mlx5/mlx5_rxtx.c
drivers/net/mlx5/mlx5_rxtx.h
drivers/net/mlx5/mlx5_utils.h

index 3a1e7a6..c8a517c 100644 (file)
@@ -115,24 +115,6 @@ struct priv {
        rte_spinlock_t lock; /* Lock for control functions. */
 };
 
-/* Work Request ID data type (64 bit). */
-typedef union {
-       struct {
-               uint32_t id;
-               uint16_t offset;
-       } data;
-       uint64_t raw;
-} wr_id_t;
-
-/* Compile-time check. */
-static inline void wr_id_t_check(void)
-{
-       wr_id_t check[1 + (2 * -!(sizeof(wr_id_t) == sizeof(uint64_t)))];
-
-       (void)check;
-       (void)wr_id_t_check;
-}
-
 /**
  * Lock private structure to protect it from concurrent access in the
  * control path.
index 5a55886..f2f773e 100644 (file)
@@ -60,6 +60,7 @@
 #endif
 
 #include "mlx5.h"
+#include "mlx5_autoconf.h"
 #include "mlx5_rxtx.h"
 #include "mlx5_utils.h"
 #include "mlx5_defs.h"
@@ -97,16 +98,10 @@ rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n,
        for (i = 0; (i != elts_n); ++i) {
                unsigned int j;
                struct rxq_elt_sp *elt = &(*elts)[i];
-               struct ibv_recv_wr *wr = &elt->wr;
                struct ibv_sge (*sges)[RTE_DIM(elt->sges)] = &elt->sges;
 
                /* These two arrays must have the same size. */
                assert(RTE_DIM(elt->sges) == RTE_DIM(elt->bufs));
-               /* Configure WR. */
-               wr->wr_id = i;
-               wr->next = &(*elts)[(i + 1)].wr;
-               wr->sg_list = &(*sges)[0];
-               wr->num_sge = RTE_DIM(*sges);
                /* For each SGE (segment). */
                for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
                        struct ibv_sge *sge = &(*sges)[j];
@@ -149,8 +144,6 @@ rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n,
                        assert(sge->length == rte_pktmbuf_tailroom(buf));
                }
        }
-       /* The last WR pointer must be NULL. */
-       (*elts)[(i - 1)].wr.next = NULL;
        DEBUG("%p: allocated and configured %u WRs (%zu segments)",
              (void *)rxq, elts_n, (elts_n * RTE_DIM((*elts)[0].sges)));
        rxq->elts_n = elts_n;
@@ -242,7 +235,6 @@ rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
        /* For each WR (packet). */
        for (i = 0; (i != elts_n); ++i) {
                struct rxq_elt *elt = &(*elts)[i];
-               struct ibv_recv_wr *wr = &elt->wr;
                struct ibv_sge *sge = &(*elts)[i].sge;
                struct rte_mbuf *buf;
 
@@ -258,16 +250,7 @@ rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
                        ret = ENOMEM;
                        goto error;
                }
-               /* Configure WR. Work request ID contains its own index in
-                * the elts array and the offset between SGE buffer header and
-                * its data. */
-               WR_ID(wr->wr_id).id = i;
-               WR_ID(wr->wr_id).offset =
-                       (((uintptr_t)buf->buf_addr + RTE_PKTMBUF_HEADROOM) -
-                        (uintptr_t)buf);
-               wr->next = &(*elts)[(i + 1)].wr;
-               wr->sg_list = sge;
-               wr->num_sge = 1;
+               elt->buf = buf;
                /* Headroom is reserved by rte_pktmbuf_alloc(). */
                assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
                /* Buffer is supposed to be empty. */
@@ -282,21 +265,7 @@ rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
                sge->lkey = rxq->mr->lkey;
                /* Redundant check for tailroom. */
                assert(sge->length == rte_pktmbuf_tailroom(buf));
-               /* Make sure elts index and SGE mbuf pointer can be deduced
-                * from WR ID. */
-               if ((WR_ID(wr->wr_id).id != i) ||
-                   ((void *)((uintptr_t)sge->addr -
-                       WR_ID(wr->wr_id).offset) != buf)) {
-                       ERROR("%p: cannot store index and offset in WR ID",
-                             (void *)rxq);
-                       sge->addr = 0;
-                       rte_pktmbuf_free(buf);
-                       ret = EOVERFLOW;
-                       goto error;
-               }
        }
-       /* The last WR pointer must be NULL. */
-       (*elts)[(i - 1)].wr.next = NULL;
        DEBUG("%p: allocated and configured %u single-segment WRs",
              (void *)rxq, elts_n);
        rxq->elts_n = elts_n;
@@ -309,14 +278,10 @@ error:
                assert(pool == NULL);
                for (i = 0; (i != RTE_DIM(*elts)); ++i) {
                        struct rxq_elt *elt = &(*elts)[i];
-                       struct rte_mbuf *buf;
+                       struct rte_mbuf *buf = elt->buf;
 
-                       if (elt->sge.addr == 0)
-                               continue;
-                       assert(WR_ID(elt->wr.wr_id).id == i);
-                       buf = (void *)((uintptr_t)elt->sge.addr -
-                               WR_ID(elt->wr.wr_id).offset);
-                       rte_pktmbuf_free_seg(buf);
+                       if (buf != NULL)
+                               rte_pktmbuf_free_seg(buf);
                }
                rte_free(elts);
        }
@@ -345,14 +310,10 @@ rxq_free_elts(struct rxq *rxq)
                return;
        for (i = 0; (i != RTE_DIM(*elts)); ++i) {
                struct rxq_elt *elt = &(*elts)[i];
-               struct rte_mbuf *buf;
+               struct rte_mbuf *buf = elt->buf;
 
-               if (elt->sge.addr == 0)
-                       continue;
-               assert(WR_ID(elt->wr.wr_id).id == i);
-               buf = (void *)((uintptr_t)elt->sge.addr -
-                       WR_ID(elt->wr.wr_id).offset);
-               rte_pktmbuf_free_seg(buf);
+               if (buf != NULL)
+                       rte_pktmbuf_free_seg(buf);
        }
        rte_free(elts);
 }
@@ -552,7 +513,6 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
        struct rte_mbuf **pool;
        unsigned int i, k;
        struct ibv_exp_qp_attr mod;
-       struct ibv_recv_wr *bad_wr;
        int err;
        int parent = (rxq == &priv->rxq_parent);
 
@@ -670,11 +630,8 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
 
                for (i = 0; (i != RTE_DIM(*elts)); ++i) {
                        struct rxq_elt *elt = &(*elts)[i];
-                       struct rte_mbuf *buf = (void *)
-                               ((uintptr_t)elt->sge.addr -
-                                WR_ID(elt->wr.wr_id).offset);
+                       struct rte_mbuf *buf = elt->buf;
 
-                       assert(WR_ID(elt->wr.wr_id).id == i);
                        pool[k++] = buf;
                }
        }
@@ -698,17 +655,41 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
        rxq->elts_n = 0;
        rte_free(rxq->elts.sp);
        rxq->elts.sp = NULL;
-       /* Post WRs. */
-       err = ibv_post_recv(tmpl.qp,
-                           (tmpl.sp ?
-                            &(*tmpl.elts.sp)[0].wr :
-                            &(*tmpl.elts.no_sp)[0].wr),
-                           &bad_wr);
+       /* Post SGEs. */
+       assert(tmpl.if_qp != NULL);
+       if (tmpl.sp) {
+               struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp;
+
+               for (i = 0; (i != RTE_DIM(*elts)); ++i) {
+#ifdef HAVE_EXP_QP_BURST_RECV_SG_LIST
+                       err = tmpl.if_qp->recv_sg_list
+                               (tmpl.qp,
+                                (*elts)[i].sges,
+                                RTE_DIM((*elts)[i].sges));
+#else /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
+                       errno = ENOSYS;
+                       err = -1;
+#endif /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
+                       if (err)
+                               break;
+               }
+       } else {
+               struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp;
+
+               for (i = 0; (i != RTE_DIM(*elts)); ++i) {
+                       err = tmpl.if_qp->recv_burst(
+                               tmpl.qp,
+                               &(*elts)[i].sge,
+                               1);
+                       if (err)
+                               break;
+               }
+       }
        if (err) {
-               ERROR("%p: ibv_post_recv() failed for WR %p: %s",
-                     (void *)dev,
-                     (void *)bad_wr,
-                     strerror(err));
+               ERROR("%p: failed to post SGEs with error %d",
+                     (void *)dev, err);
+               /* Set err because it does not contain a valid errno value. */
+               err = EIO;
                goto skip_rtr;
        }
        mod = (struct ibv_exp_qp_attr){
@@ -761,10 +742,10 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
                struct ibv_exp_res_domain_init_attr rd;
        } attr;
        enum ibv_exp_query_intf_status status;
-       struct ibv_recv_wr *bad_wr;
        struct rte_mbuf *buf;
        int ret = 0;
        int parent = (rxq == &priv->rxq_parent);
+       unsigned int i;
 
        (void)conf; /* Thresholds configuration (ignored). */
        /*
@@ -900,28 +881,7 @@ skip_mr:
                      (void *)dev, strerror(ret));
                goto error;
        }
-       ret = ibv_post_recv(tmpl.qp,
-                           (tmpl.sp ?
-                            &(*tmpl.elts.sp)[0].wr :
-                            &(*tmpl.elts.no_sp)[0].wr),
-                           &bad_wr);
-       if (ret) {
-               ERROR("%p: ibv_post_recv() failed for WR %p: %s",
-                     (void *)dev,
-                     (void *)bad_wr,
-                     strerror(ret));
-               goto error;
-       }
 skip_alloc:
-       mod = (struct ibv_exp_qp_attr){
-               .qp_state = IBV_QPS_RTR
-       };
-       ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
-       if (ret) {
-               ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
-                     (void *)dev, strerror(ret));
-               goto error;
-       }
        /* Save port ID. */
        tmpl.port_id = dev->data->port_id;
        DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id);
@@ -947,6 +907,51 @@ skip_alloc:
                      (void *)dev, status);
                goto error;
        }
+       /* Post SGEs. */
+       if (!parent && tmpl.sp) {
+               struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp;
+
+               for (i = 0; (i != RTE_DIM(*elts)); ++i) {
+#ifdef HAVE_EXP_QP_BURST_RECV_SG_LIST
+                       ret = tmpl.if_qp->recv_sg_list
+                               (tmpl.qp,
+                                (*elts)[i].sges,
+                                RTE_DIM((*elts)[i].sges));
+#else /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
+                       errno = ENOSYS;
+                       ret = -1;
+#endif /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
+                       if (ret)
+                               break;
+               }
+       } else if (!parent) {
+               struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp;
+
+               for (i = 0; (i != RTE_DIM(*elts)); ++i) {
+                       ret = tmpl.if_qp->recv_burst(
+                               tmpl.qp,
+                               &(*elts)[i].sge,
+                               1);
+                       if (ret)
+                               break;
+               }
+       }
+       if (ret) {
+               ERROR("%p: failed to post SGEs with error %d",
+                     (void *)dev, ret);
+               /* Set ret because it does not contain a valid errno value. */
+               ret = EIO;
+               goto error;
+       }
+       mod = (struct ibv_exp_qp_attr){
+               .qp_state = IBV_QPS_RTR
+       };
+       ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
+       if (ret) {
+               ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
+                     (void *)dev, strerror(ret));
+               goto error;
+       }
        /* Clean up rxq in case we're reinitializing it. */
        DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
        rxq_cleanup(rxq);
index 8872f19..f48fec1 100644 (file)
@@ -612,8 +612,6 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                return 0;
        for (i = 0; (i != pkts_n); ++i) {
                struct rxq_elt_sp *elt = &(*elts)[elts_head];
-               struct ibv_recv_wr *wr = &elt->wr;
-               uint64_t wr_id = wr->wr_id;
                unsigned int len;
                unsigned int pkt_buf_len;
                struct rte_mbuf *pkt_buf = NULL; /* Buffer returned in pkts. */
@@ -623,12 +621,6 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                uint32_t flags;
 
                /* Sanity checks. */
-#ifdef NDEBUG
-               (void)wr_id;
-#endif
-               assert(wr_id < rxq->elts_n);
-               assert(wr->sg_list == elt->sges);
-               assert(wr->num_sge == RTE_DIM(elt->sges));
                assert(elts_head < rxq->elts_n);
                assert(rxq->elts_head < rxq->elts_n);
                ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
@@ -677,6 +669,7 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        struct rte_mbuf *rep;
                        unsigned int seg_tailroom;
 
+                       assert(seg != NULL);
                        /*
                         * Fetch initial bytes of packet descriptor into a
                         * cacheline while allocating rep.
@@ -688,9 +681,8 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                                 * Unable to allocate a replacement mbuf,
                                 * repost WR.
                                 */
-                               DEBUG("rxq=%p, wr_id=%" PRIu64 ":"
-                                     " can't allocate a new mbuf",
-                                     (void *)rxq, wr_id);
+                               DEBUG("rxq=%p: can't allocate a new mbuf",
+                                     (void *)rxq);
                                if (pkt_buf != NULL) {
                                        *pkt_buf_next = NULL;
                                        rte_pktmbuf_free(pkt_buf);
@@ -825,18 +817,13 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                return mlx5_rx_burst_sp(dpdk_rxq, pkts, pkts_n);
        for (i = 0; (i != pkts_n); ++i) {
                struct rxq_elt *elt = &(*elts)[elts_head];
-               struct ibv_recv_wr *wr = &elt->wr;
-               uint64_t wr_id = wr->wr_id;
                unsigned int len;
-               struct rte_mbuf *seg = (void *)((uintptr_t)elt->sge.addr -
-                       WR_ID(wr_id).offset);
+               struct rte_mbuf *seg = elt->buf;
                struct rte_mbuf *rep;
                uint32_t flags;
 
                /* Sanity checks. */
-               assert(WR_ID(wr_id).id < rxq->elts_n);
-               assert(wr->sg_list == &elt->sge);
-               assert(wr->num_sge == 1);
+               assert(seg != NULL);
                assert(elts_head < rxq->elts_n);
                assert(rxq->elts_head < rxq->elts_n);
                /*
@@ -888,9 +875,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                         * Unable to allocate a replacement mbuf,
                         * repost WR.
                         */
-                       DEBUG("rxq=%p, wr_id=%" PRIu32 ":"
-                             " can't allocate a new mbuf",
-                             (void *)rxq, WR_ID(wr_id).id);
+                       DEBUG("rxq=%p: can't allocate a new mbuf",
+                             (void *)rxq);
                        /* Increment out of memory counters. */
                        ++rxq->stats.rx_nombuf;
                        ++rxq->priv->dev->data->rx_mbuf_alloc_failed;
@@ -900,10 +886,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                /* Reconfigure sge to use rep instead of seg. */
                elt->sge.addr = (uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM;
                assert(elt->sge.lkey == rxq->mr->lkey);
-               WR_ID(wr->wr_id).offset =
-                       (((uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM) -
-                        (uintptr_t)rep);
-               assert(WR_ID(wr->wr_id).id == WR_ID(wr_id).id);
+               elt->buf = rep;
 
                /* Add SGE to array for repost. */
                sges[i] = elt->sge;
index d86d623..90c99dc 100644 (file)
@@ -81,16 +81,14 @@ struct mlx5_txq_stats {
 
 /* RX element (scattered packets). */
 struct rxq_elt_sp {
-       struct ibv_recv_wr wr; /* Work Request. */
        struct ibv_sge sges[MLX5_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
        struct rte_mbuf *bufs[MLX5_PMD_SGE_WR_N]; /* SGEs buffers. */
 };
 
 /* RX element. */
 struct rxq_elt {
-       struct ibv_recv_wr wr; /* Work Request. */
        struct ibv_sge sge; /* Scatter/Gather Element. */
-       /* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
+       struct rte_mbuf *buf; /* SGE buffer. */
 };
 
 struct priv;
index 8ff075b..f1fad18 100644 (file)
@@ -161,6 +161,4 @@ pmd_drv_log_basename(const char *s)
        \
        snprintf(name, sizeof(name), __VA_ARGS__)
 
-#define WR_ID(o) (((wr_id_t *)&(o))->data)
-
 #endif /* RTE_PMD_MLX5_UTILS_H_ */