mlx5: support scattered Rx and Tx
authorAdrien Mazarguil <adrien.mazarguil@6wind.com>
Fri, 30 Oct 2015 18:52:34 +0000 (19:52 +0100)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Fri, 30 Oct 2015 21:23:00 +0000 (22:23 +0100)
A dedicated RX callback is added to handle scattered buffers. For better
performance, it is only used when jumbo frames are enabled and MTU is larger
than a single mbuf.

On the TX path, scattered buffers are also handled in a separate function.
When there are more than MLX5_PMD_SGE_WR_N segments in a given mbuf, the
remaining segments are linearized in the last SGE entry.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
drivers/net/mlx5/mlx5_rxq.c
drivers/net/mlx5/mlx5_rxtx.c
drivers/net/mlx5/mlx5_rxtx.h

index 8450fe3..1eddfc7 100644 (file)
 #include "mlx5_utils.h"
 #include "mlx5_defs.h"
 
+/**
+ * Allocate RX queue elements with scattered packets support.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ * @param elts_n
+ *   Number of elements to allocate.
+ * @param[in] pool
+ *   If not NULL, fetch buffers from this array instead of allocating them
+ *   with rte_pktmbuf_alloc().
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n,
+                 struct rte_mbuf **pool)
+{
+       unsigned int i;
+       struct rxq_elt_sp (*elts)[elts_n] =
+               rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
+                                 rxq->socket);
+       int ret = 0;
+
+       if (elts == NULL) {
+               ERROR("%p: can't allocate packets array", (void *)rxq);
+               ret = ENOMEM;
+               goto error;
+       }
+       /* For each WR (packet). */
+       for (i = 0; (i != elts_n); ++i) {
+               unsigned int j;
+               struct rxq_elt_sp *elt = &(*elts)[i];
+               struct ibv_recv_wr *wr = &elt->wr;
+               struct ibv_sge (*sges)[RTE_DIM(elt->sges)] = &elt->sges;
+
+               /* These two arrays must have the same size. */
+               assert(RTE_DIM(elt->sges) == RTE_DIM(elt->bufs));
+               /* Configure WR. */
+               wr->wr_id = i;
+               wr->next = &(*elts)[(i + 1)].wr;
+               wr->sg_list = &(*sges)[0];
+               wr->num_sge = RTE_DIM(*sges);
+               /* For each SGE (segment). */
+               for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
+                       struct ibv_sge *sge = &(*sges)[j];
+                       struct rte_mbuf *buf;
+
+                       if (pool != NULL) {
+                               buf = *(pool++);
+                               assert(buf != NULL);
+                               rte_pktmbuf_reset(buf);
+                       } else
+                               buf = rte_pktmbuf_alloc(rxq->mp);
+                       if (buf == NULL) {
+                               assert(pool == NULL);
+                               ERROR("%p: empty mbuf pool", (void *)rxq);
+                               ret = ENOMEM;
+                               goto error;
+                       }
+                       elt->bufs[j] = buf;
+                       /* Headroom is reserved by rte_pktmbuf_alloc(). */
+                       assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
+                       /* Buffer is supposed to be empty. */
+                       assert(rte_pktmbuf_data_len(buf) == 0);
+                       assert(rte_pktmbuf_pkt_len(buf) == 0);
+                       /* sge->addr must be able to store a pointer. */
+                       assert(sizeof(sge->addr) >= sizeof(uintptr_t));
+                       if (j == 0) {
+                               /* The first SGE keeps its headroom. */
+                               sge->addr = rte_pktmbuf_mtod(buf, uintptr_t);
+                               sge->length = (buf->buf_len -
+                                              RTE_PKTMBUF_HEADROOM);
+                       } else {
+                               /* Subsequent SGEs lose theirs. */
+                               assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
+                               SET_DATA_OFF(buf, 0);
+                               sge->addr = (uintptr_t)buf->buf_addr;
+                               sge->length = buf->buf_len;
+                       }
+                       sge->lkey = rxq->mr->lkey;
+                       /* Redundant check for tailroom. */
+                       assert(sge->length == rte_pktmbuf_tailroom(buf));
+               }
+       }
+       /* The last WR pointer must be NULL. */
+       (*elts)[(i - 1)].wr.next = NULL;
+       DEBUG("%p: allocated and configured %u WRs (%zu segments)",
+             (void *)rxq, elts_n, (elts_n * RTE_DIM((*elts)[0].sges)));
+       rxq->elts_n = elts_n;
+       rxq->elts_head = 0;
+       rxq->elts.sp = elts;
+       assert(ret == 0);
+       return 0;
+error:
+       if (elts != NULL) {
+               assert(pool == NULL);
+               for (i = 0; (i != RTE_DIM(*elts)); ++i) {
+                       unsigned int j;
+                       struct rxq_elt_sp *elt = &(*elts)[i];
+
+                       for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
+                               struct rte_mbuf *buf = elt->bufs[j];
+
+                               if (buf != NULL)
+                                       rte_pktmbuf_free_seg(buf);
+                       }
+               }
+               rte_free(elts);
+       }
+       DEBUG("%p: failed, freed everything", (void *)rxq);
+       assert(ret > 0);
+       return ret;
+}
+
+/**
+ * Free RX queue elements with scattered packets support.
+ *
+ * @param rxq
+ *   Pointer to RX queue structure.
+ */
+static void
+rxq_free_elts_sp(struct rxq *rxq)
+{
+       unsigned int i;
+       unsigned int elts_n = rxq->elts_n;
+       struct rxq_elt_sp (*elts)[elts_n] = rxq->elts.sp;
+
+       DEBUG("%p: freeing WRs", (void *)rxq);
+       rxq->elts_n = 0;
+       rxq->elts.sp = NULL;
+       if (elts == NULL)
+               return;
+       for (i = 0; (i != RTE_DIM(*elts)); ++i) {
+               unsigned int j;
+               struct rxq_elt_sp *elt = &(*elts)[i];
+
+               for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
+                       struct rte_mbuf *buf = elt->bufs[j];
+
+                       if (buf != NULL)
+                               rte_pktmbuf_free_seg(buf);
+               }
+       }
+       rte_free(elts);
+}
+
 /**
  * Allocate RX queue elements.
  *
@@ -224,7 +371,10 @@ rxq_cleanup(struct rxq *rxq)
        struct ibv_exp_release_intf_params params;
 
        DEBUG("cleaning up %p", (void *)rxq);
-       rxq_free_elts(rxq);
+       if (rxq->sp)
+               rxq_free_elts_sp(rxq);
+       else
+               rxq_free_elts(rxq);
        if (rxq->if_qp != NULL) {
                assert(rxq->priv != NULL);
                assert(rxq->priv->ctx != NULL);
@@ -445,6 +595,15 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
                rte_pktmbuf_tailroom(buf)) == tmpl.mb_len);
        assert(rte_pktmbuf_headroom(buf) == RTE_PKTMBUF_HEADROOM);
        rte_pktmbuf_free(buf);
+       /* Enable scattered packets support for this queue if necessary. */
+       if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
+           (dev->data->dev_conf.rxmode.max_rx_pkt_len >
+            (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) {
+               tmpl.sp = 1;
+               desc /= MLX5_PMD_SGE_WR_N;
+       }
+       DEBUG("%p: %s scattered packets support (%u WRs)",
+             (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc);
        /* Use the entire RX mempool as the memory region. */
        tmpl.mr = ibv_reg_mr(priv->pd,
                             (void *)mp->elt_va_start,
@@ -528,14 +687,19 @@ skip_mr:
        /* Allocate descriptors for RX queues, except for the RSS parent. */
        if (parent)
                goto skip_alloc;
-       ret = rxq_alloc_elts(&tmpl, desc, NULL);
+       if (tmpl.sp)
+               ret = rxq_alloc_elts_sp(&tmpl, desc, NULL);
+       else
+               ret = rxq_alloc_elts(&tmpl, desc, NULL);
        if (ret) {
                ERROR("%p: RXQ allocation failed: %s",
                      (void *)dev, strerror(ret));
                goto error;
        }
        ret = ibv_post_recv(tmpl.qp,
-                           &(*tmpl.elts.no_sp)[0].wr,
+                           (tmpl.sp ?
+                            &(*tmpl.elts.sp)[0].wr :
+                            &(*tmpl.elts.no_sp)[0].wr),
                            &bad_wr);
        if (ret) {
                ERROR("%p: ibv_post_recv() failed for WR %p: %s",
@@ -655,7 +819,10 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
                      (void *)dev, (void *)rxq);
                (*priv->rxqs)[idx] = rxq;
                /* Update receive callback. */
-               dev->rx_pkt_burst = mlx5_rx_burst;
+               if (rxq->sp)
+                       dev->rx_pkt_burst = mlx5_rx_burst_sp;
+               else
+                       dev->rx_pkt_burst = mlx5_rx_burst;
        }
        priv_unlock(priv);
        return -ret;
index 0f1e541..ed6faa1 100644 (file)
@@ -173,6 +173,154 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
        return txq->mp2mr[i].lkey;
 }
 
+#if MLX5_PMD_SGE_WR_N > 1
+
+/**
+ * Copy scattered mbuf contents to a single linear buffer.
+ *
+ * @param[out] linear
+ *   Linear output buffer.
+ * @param[in] buf
+ *   Scattered input buffer.
+ *
+ * @return
+ *   Number of bytes copied to the output buffer or 0 if not large enough.
+ */
+static unsigned int
+linearize_mbuf(linear_t *linear, struct rte_mbuf *buf)
+{
+       unsigned int size = 0;
+       unsigned int offset;
+
+       do {
+               unsigned int len = DATA_LEN(buf);
+
+               offset = size;
+               size += len;
+               if (unlikely(size > sizeof(*linear)))
+                       return 0;
+               memcpy(&(*linear)[offset],
+                      rte_pktmbuf_mtod(buf, uint8_t *),
+                      len);
+               buf = NEXT(buf);
+       } while (buf != NULL);
+       return size;
+}
+
+/**
+ * Handle scattered buffers for mlx5_tx_burst().
+ *
+ * @param txq
+ *   TX queue structure.
+ * @param segs
+ *   Number of segments in buf.
+ * @param elt
+ *   TX queue element to fill.
+ * @param[in] buf
+ *   Buffer to process.
+ * @param elts_head
+ *   Index of the linear buffer to use if necessary (normally txq->elts_head).
+ * @param[out] sges
+ *   Array filled with SGEs on success.
+ *
+ * @return
+ *   A structure containing the processed packet size in bytes and the
+ *   number of SGEs. Both fields are set to (unsigned int)-1 in case of
+ *   failure.
+ */
+static struct tx_burst_sg_ret {
+       unsigned int length;
+       unsigned int num;
+}
+tx_burst_sg(struct txq *txq, unsigned int segs, struct txq_elt *elt,
+           struct rte_mbuf *buf, unsigned int elts_head,
+           struct ibv_sge (*sges)[MLX5_PMD_SGE_WR_N])
+{
+       unsigned int sent_size = 0;
+       unsigned int j;
+       int linearize = 0;
+
+       /* When there are too many segments, extra segments are
+        * linearized in the last SGE. */
+       if (unlikely(segs > RTE_DIM(*sges))) {
+               segs = (RTE_DIM(*sges) - 1);
+               linearize = 1;
+       }
+       /* Update element. */
+       elt->buf = buf;
+       /* Register segments as SGEs. */
+       for (j = 0; (j != segs); ++j) {
+               struct ibv_sge *sge = &(*sges)[j];
+               uint32_t lkey;
+
+               /* Retrieve Memory Region key for this memory pool. */
+               lkey = txq_mp2mr(txq, buf->pool);
+               if (unlikely(lkey == (uint32_t)-1)) {
+                       /* MR does not exist. */
+                       DEBUG("%p: unable to get MP <-> MR association",
+                             (void *)txq);
+                       /* Clean up TX element. */
+                       elt->buf = NULL;
+                       goto stop;
+               }
+               /* Update SGE. */
+               sge->addr = rte_pktmbuf_mtod(buf, uintptr_t);
+               if (txq->priv->vf)
+                       rte_prefetch0((volatile void *)
+                                     (uintptr_t)sge->addr);
+               sge->length = DATA_LEN(buf);
+               sge->lkey = lkey;
+               sent_size += sge->length;
+               buf = NEXT(buf);
+       }
+       /* If buf is not NULL here and is not going to be linearized,
+        * nb_segs is not valid. */
+       assert(j == segs);
+       assert((buf == NULL) || (linearize));
+       /* Linearize extra segments. */
+       if (linearize) {
+               struct ibv_sge *sge = &(*sges)[segs];
+               linear_t *linear = &(*txq->elts_linear)[elts_head];
+               unsigned int size = linearize_mbuf(linear, buf);
+
+               assert(segs == (RTE_DIM(*sges) - 1));
+               if (size == 0) {
+                       /* Invalid packet. */
+                       DEBUG("%p: packet too large to be linearized.",
+                             (void *)txq);
+                       /* Clean up TX element. */
+                       elt->buf = NULL;
+                       goto stop;
+               }
+               /* If MLX5_PMD_SGE_WR_N is 1, free mbuf immediately. */
+               if (RTE_DIM(*sges) == 1) {
+                       do {
+                               struct rte_mbuf *next = NEXT(buf);
+
+                               rte_pktmbuf_free_seg(buf);
+                               buf = next;
+                       } while (buf != NULL);
+                       elt->buf = NULL;
+               }
+               /* Update SGE. */
+               sge->addr = (uintptr_t)&(*linear)[0];
+               sge->length = size;
+               sge->lkey = txq->mr_linear->lkey;
+               sent_size += size;
+       }
+       return (struct tx_burst_sg_ret){
+               .length = sent_size,
+               .num = segs,
+       };
+stop:
+       return (struct tx_burst_sg_ret){
+               .length = -1,
+               .num = -1,
+       };
+}
+
+#endif /* MLX5_PMD_SGE_WR_N > 1 */
+
 /**
  * DPDK callback for TX.
  *
@@ -282,9 +430,28 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        if (unlikely(err))
                                goto stop;
                } else {
+#if MLX5_PMD_SGE_WR_N > 1
+                       struct ibv_sge sges[MLX5_PMD_SGE_WR_N];
+                       struct tx_burst_sg_ret ret;
+
+                       ret = tx_burst_sg(txq, segs, elt, buf, elts_head,
+                                         &sges);
+                       if (ret.length == (unsigned int)-1)
+                               goto stop;
+                       RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
+                       /* Put SG list into send queue. */
+                       err = txq->if_qp->send_pending_sg_list
+                               (txq->qp,
+                                sges,
+                                ret.num,
+                                send_flags);
+                       if (unlikely(err))
+                               goto stop;
+#else /* MLX5_PMD_SGE_WR_N > 1 */
                        DEBUG("%p: TX scattered buffers support not"
                              " compiled in", (void *)txq);
                        goto stop;
+#endif /* MLX5_PMD_SGE_WR_N > 1 */
                }
                elts_head = elts_head_next;
        }
@@ -306,9 +473,216 @@ stop:
        return i;
 }
 
+/**
+ * DPDK callback for RX with scattered packets support.
+ *
+ * @param dpdk_rxq
+ *   Generic pointer to RX queue structure.
+ * @param[out] pkts
+ *   Array to store received packets.
+ * @param pkts_n
+ *   Maximum number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully received (<= pkts_n).
+ */
+uint16_t
+mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+       struct rxq *rxq = (struct rxq *)dpdk_rxq;
+       struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp;
+       const unsigned int elts_n = rxq->elts_n;
+       unsigned int elts_head = rxq->elts_head;
+       struct ibv_recv_wr head;
+       struct ibv_recv_wr **next = &head.next;
+       struct ibv_recv_wr *bad_wr;
+       unsigned int i;
+       unsigned int pkts_ret = 0;
+       int ret;
+
+       if (unlikely(!rxq->sp))
+               return mlx5_rx_burst(dpdk_rxq, pkts, pkts_n);
+       if (unlikely(elts == NULL)) /* See RTE_DEV_CMD_SET_MTU. */
+               return 0;
+       for (i = 0; (i != pkts_n); ++i) {
+               struct rxq_elt_sp *elt = &(*elts)[elts_head];
+               struct ibv_recv_wr *wr = &elt->wr;
+               uint64_t wr_id = wr->wr_id;
+               unsigned int len;
+               unsigned int pkt_buf_len;
+               struct rte_mbuf *pkt_buf = NULL; /* Buffer returned in pkts. */
+               struct rte_mbuf **pkt_buf_next = &pkt_buf;
+               unsigned int seg_headroom = RTE_PKTMBUF_HEADROOM;
+               unsigned int j = 0;
+               uint32_t flags;
+
+               /* Sanity checks. */
+#ifdef NDEBUG
+               (void)wr_id;
+#endif
+               assert(wr_id < rxq->elts_n);
+               assert(wr->sg_list == elt->sges);
+               assert(wr->num_sge == RTE_DIM(elt->sges));
+               assert(elts_head < rxq->elts_n);
+               assert(rxq->elts_head < rxq->elts_n);
+               ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
+                                                   &flags);
+               if (unlikely(ret < 0)) {
+                       struct ibv_wc wc;
+                       int wcs_n;
+
+                       DEBUG("rxq=%p, poll_length() failed (ret=%d)",
+                             (void *)rxq, ret);
+                       /* ibv_poll_cq() must be used in case of failure. */
+                       wcs_n = ibv_poll_cq(rxq->cq, 1, &wc);
+                       if (unlikely(wcs_n == 0))
+                               break;
+                       if (unlikely(wcs_n < 0)) {
+                               DEBUG("rxq=%p, ibv_poll_cq() failed (wcs_n=%d)",
+                                     (void *)rxq, wcs_n);
+                               break;
+                       }
+                       assert(wcs_n == 1);
+                       if (unlikely(wc.status != IBV_WC_SUCCESS)) {
+                               /* Whatever, just repost the offending WR. */
+                               DEBUG("rxq=%p, wr_id=%" PRIu64 ": bad work"
+                                     " completion status (%d): %s",
+                                     (void *)rxq, wc.wr_id, wc.status,
+                                     ibv_wc_status_str(wc.status));
+                               /* Link completed WRs together for repost. */
+                               *next = wr;
+                               next = &wr->next;
+                               goto repost;
+                       }
+                       ret = wc.byte_len;
+               }
+               if (ret == 0)
+                       break;
+               len = ret;
+               pkt_buf_len = len;
+               /* Link completed WRs together for repost. */
+               *next = wr;
+               next = &wr->next;
+               /*
+                * Replace spent segments with new ones, concatenate and
+                * return them as pkt_buf.
+                */
+               while (1) {
+                       struct ibv_sge *sge = &elt->sges[j];
+                       struct rte_mbuf *seg = elt->bufs[j];
+                       struct rte_mbuf *rep;
+                       unsigned int seg_tailroom;
+
+                       /*
+                        * Fetch initial bytes of packet descriptor into a
+                        * cacheline while allocating rep.
+                        */
+                       rte_prefetch0(seg);
+                       rep = __rte_mbuf_raw_alloc(rxq->mp);
+                       if (unlikely(rep == NULL)) {
+                               /*
+                                * Unable to allocate a replacement mbuf,
+                                * repost WR.
+                                */
+                               DEBUG("rxq=%p, wr_id=%" PRIu64 ":"
+                                     " can't allocate a new mbuf",
+                                     (void *)rxq, wr_id);
+                               if (pkt_buf != NULL) {
+                                       *pkt_buf_next = NULL;
+                                       rte_pktmbuf_free(pkt_buf);
+                               }
+                               /* Increment out of memory counters. */
+                               ++rxq->priv->dev->data->rx_mbuf_alloc_failed;
+                               goto repost;
+                       }
+#ifndef NDEBUG
+                       /* Poison user-modifiable fields in rep. */
+                       NEXT(rep) = (void *)((uintptr_t)-1);
+                       SET_DATA_OFF(rep, 0xdead);
+                       DATA_LEN(rep) = 0xd00d;
+                       PKT_LEN(rep) = 0xdeadd00d;
+                       NB_SEGS(rep) = 0x2a;
+                       PORT(rep) = 0x2a;
+                       rep->ol_flags = -1;
+#endif
+                       assert(rep->buf_len == seg->buf_len);
+                       assert(rep->buf_len == rxq->mb_len);
+                       /* Reconfigure sge to use rep instead of seg. */
+                       assert(sge->lkey == rxq->mr->lkey);
+                       sge->addr = ((uintptr_t)rep->buf_addr + seg_headroom);
+                       elt->bufs[j] = rep;
+                       ++j;
+                       /* Update pkt_buf if it's the first segment, or link
+                        * seg to the previous one and update pkt_buf_next. */
+                       *pkt_buf_next = seg;
+                       pkt_buf_next = &NEXT(seg);
+                       /* Update seg information. */
+                       seg_tailroom = (seg->buf_len - seg_headroom);
+                       assert(sge->length == seg_tailroom);
+                       SET_DATA_OFF(seg, seg_headroom);
+                       if (likely(len <= seg_tailroom)) {
+                               /* Last segment. */
+                               DATA_LEN(seg) = len;
+                               PKT_LEN(seg) = len;
+                               /* Sanity check. */
+                               assert(rte_pktmbuf_headroom(seg) ==
+                                      seg_headroom);
+                               assert(rte_pktmbuf_tailroom(seg) ==
+                                      (seg_tailroom - len));
+                               break;
+                       }
+                       DATA_LEN(seg) = seg_tailroom;
+                       PKT_LEN(seg) = seg_tailroom;
+                       /* Sanity check. */
+                       assert(rte_pktmbuf_headroom(seg) == seg_headroom);
+                       assert(rte_pktmbuf_tailroom(seg) == 0);
+                       /* Fix len and clear headroom for next segments. */
+                       len -= seg_tailroom;
+                       seg_headroom = 0;
+               }
+               /* Update head and tail segments. */
+               *pkt_buf_next = NULL;
+               assert(pkt_buf != NULL);
+               assert(j != 0);
+               NB_SEGS(pkt_buf) = j;
+               PORT(pkt_buf) = rxq->port_id;
+               PKT_LEN(pkt_buf) = pkt_buf_len;
+
+               /* Return packet. */
+               *(pkts++) = pkt_buf;
+               ++pkts_ret;
+repost:
+               if (++elts_head >= elts_n)
+                       elts_head = 0;
+               continue;
+       }
+       if (unlikely(i == 0))
+               return 0;
+       *next = NULL;
+       /* Repost WRs. */
+#ifdef DEBUG_RECV
+       DEBUG("%p: reposting %d WRs", (void *)rxq, i);
+#endif
+       ret = ibv_post_recv(rxq->qp, head.next, &bad_wr);
+       if (unlikely(ret)) {
+               /* Inability to repost WRs is fatal. */
+               DEBUG("%p: ibv_post_recv(): failed for WR %p: %s",
+                     (void *)rxq->priv,
+                     (void *)bad_wr,
+                     strerror(ret));
+               abort();
+       }
+       rxq->elts_head = elts_head;
+       return pkts_ret;
+}
+
 /**
  * DPDK callback for RX.
  *
+ * The following function is the same as mlx5_rx_burst_sp(), except it doesn't
+ * manage scattered packets. Improves performance when MRU is lower than the
+ * size of the first segment.
+ *
  * @param dpdk_rxq
  *   Generic pointer to RX queue structure.
  * @param[out] pkts
@@ -331,6 +705,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
        unsigned int pkts_ret = 0;
        int ret;
 
+       if (unlikely(rxq->sp))
+               return mlx5_rx_burst_sp(dpdk_rxq, pkts, pkts_n);
        for (i = 0; (i != pkts_n); ++i) {
                struct rxq_elt *elt = &(*elts)[elts_head];
                struct ibv_recv_wr *wr = &elt->wr;
index 3733d3e..c7f634e 100644 (file)
 #include "mlx5.h"
 #include "mlx5_defs.h"
 
+/* RX element (scattered packets). */
+struct rxq_elt_sp {
+       struct ibv_recv_wr wr; /* Work Request. */
+       struct ibv_sge sges[MLX5_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
+       struct rte_mbuf *bufs[MLX5_PMD_SGE_WR_N]; /* SGEs buffers. */
+};
+
 /* RX element. */
 struct rxq_elt {
        struct ibv_recv_wr wr; /* Work Request. */
@@ -84,8 +91,10 @@ struct rxq {
        unsigned int elts_n; /* (*elts)[] length. */
        unsigned int elts_head; /* Current index in (*elts)[]. */
        union {
+               struct rxq_elt_sp (*sp)[]; /* Scattered RX elements. */
                struct rxq_elt (*no_sp)[]; /* RX elements. */
        } elts;
+       unsigned int sp:1; /* Use scattered RX elements. */
        uint32_t mb_len; /* Length of a mp-issued mbuf. */
        unsigned int socket; /* CPU socket ID for allocations. */
        struct ibv_exp_res_domain *rd; /* Resource Domain. */
@@ -151,6 +160,7 @@ void mlx5_tx_queue_release(void *);
 /* mlx5_rxtx.c */
 
 uint16_t mlx5_tx_burst(void *, struct rte_mbuf **, uint16_t);
+uint16_t mlx5_rx_burst_sp(void *, struct rte_mbuf **, uint16_t);
 uint16_t mlx5_rx_burst(void *, struct rte_mbuf **, uint16_t);
 uint16_t removed_tx_burst(void *, struct rte_mbuf **, uint16_t);
 uint16_t removed_rx_burst(void *, struct rte_mbuf **, uint16_t);