net/mlx5: enable MPRQ multi-stride operations
authorAlexander Kozyrev <akozyrev@mellanox.com>
Thu, 9 Apr 2020 22:23:52 +0000 (22:23 +0000)
committerFerruh Yigit <ferruh.yigit@intel.com>
Tue, 21 Apr 2020 11:57:08 +0000 (13:57 +0200)
MPRQ feature should be updated to allow a packet to be received
into multiple strides in order to support the MTU exceeding 8KB.
Special care is needed to prevent the headroom corruption in the
multi-stride mode since the headroom space is borrowed by the PMD
from the tail of the preceding stride. Copy the whole packet into
a separate mbuf in this case or just the overlapping data if the
Rx scattering is supported by an application.

Cc: stable@dpdk.org
Signed-off-by: Alexander Kozyrev <akozyrev@mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
drivers/net/mlx5/mlx5_rxq.c
drivers/net/mlx5/mlx5_rxtx.c
drivers/net/mlx5/mlx5_rxtx.h

index 1b57f00..1cc9f1d 100644 (file)
@@ -1797,7 +1797,6 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
        unsigned int mprq_stride_size;
        unsigned int mprq_stride_cap;
        struct mlx5_dev_config *config = &priv->config;
-       unsigned int strd_headroom_en;
        /*
         * Always allocate extra slots, even if eventually
         * the vector Rx will not be used.
@@ -1843,26 +1842,11 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
        tmpl->socket = socket;
        if (dev->data->dev_conf.intr_conf.rxq)
                tmpl->irq = 1;
-       /*
-        * LRO packet may consume all the stride memory, hence we cannot
-        * guaranty head-room near the packet memory in the stride.
-        * In this case scatter is, for sure, enabled and an empty mbuf may be
-        * added in the start for the head-room.
-        */
-       if (lro_on_queue && RTE_PKTMBUF_HEADROOM > 0 &&
-           non_scatter_min_mbuf_size > mb_len) {
-               strd_headroom_en = 0;
-               mprq_stride_size = RTE_MIN(max_rx_pkt_len,
-                                       1u << config->mprq.max_stride_size_n);
-       } else {
-               strd_headroom_en = 1;
-               mprq_stride_size = non_scatter_min_mbuf_size;
-       }
        mprq_stride_nums = config->mprq.stride_num_n ?
                config->mprq.stride_num_n : MLX5_MPRQ_STRIDE_NUM_N;
-       mprq_stride_size = (mprq_stride_size <=
-                       (1U << config->mprq.max_stride_size_n)) ?
-               log2above(mprq_stride_size) : MLX5_MPRQ_STRIDE_SIZE_N;
+       mprq_stride_size = non_scatter_min_mbuf_size <=
+               (1U << config->mprq.max_stride_size_n) ?
+               log2above(non_scatter_min_mbuf_size) : MLX5_MPRQ_STRIDE_SIZE_N;
        mprq_stride_cap = (config->mprq.stride_num_n ?
                (1U << config->mprq.stride_num_n) : (1U << mprq_stride_nums)) *
                        (config->mprq.stride_size_n ?
@@ -1879,8 +1863,7 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
         *  Otherwise, enable Rx scatter if necessary.
         */
        if (mprq_en && desc > (1U << mprq_stride_nums) &&
-           (non_scatter_min_mbuf_size -
-            (lro_on_queue ? RTE_PKTMBUF_HEADROOM : 0) <=
+           (non_scatter_min_mbuf_size <=
             (1U << config->mprq.max_stride_size_n) ||
             (config->mprq.stride_size_n &&
              non_scatter_min_mbuf_size <= mprq_stride_cap))) {
@@ -1893,7 +1876,8 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
                tmpl->rxq.strd_sz_n = config->mprq.stride_size_n ?
                        config->mprq.stride_size_n : mprq_stride_size;
                tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT;
-               tmpl->rxq.strd_headroom_en = strd_headroom_en;
+               tmpl->rxq.strd_scatter_en =
+                               !!(offloads & DEV_RX_OFFLOAD_SCATTER);
                tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size,
                                config->mprq.max_memcpy_len);
                max_lro_size = RTE_MIN(max_rx_pkt_len,
index f3bf763..4c27952 100644 (file)
@@ -1658,21 +1658,20 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
        unsigned int i = 0;
        uint32_t rq_ci = rxq->rq_ci;
        uint16_t consumed_strd = rxq->consumed_strd;
-       uint16_t headroom_sz = rxq->strd_headroom_en * RTE_PKTMBUF_HEADROOM;
        struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
 
        while (i < pkts_n) {
                struct rte_mbuf *pkt;
                void *addr;
                int ret;
-               unsigned int len;
+               uint32_t len;
                uint16_t strd_cnt;
                uint16_t strd_idx;
                uint32_t offset;
                uint32_t byte_cnt;
+               int32_t hdrm_overlap;
                volatile struct mlx5_mini_cqe8 *mcqe = NULL;
                uint32_t rss_hash_res = 0;
-               uint8_t lro_num_seg;
 
                if (consumed_strd == strd_n) {
                        /* Replace WQE only if the buffer is still in use. */
@@ -1719,18 +1718,6 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                MLX5_ASSERT(strd_idx < strd_n);
                MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) &
                            wq_mask));
-               lro_num_seg = cqe->lro_num_seg;
-               /*
-                * Currently configured to receive a packet per a stride. But if
-                * MTU is adjusted through kernel interface, device could
-                * consume multiple strides without raising an error. In this
-                * case, the packet should be dropped because it is bigger than
-                * the max_rx_pkt_len.
-                */
-               if (unlikely(!lro_num_seg && strd_cnt > 1)) {
-                       ++rxq->stats.idropped;
-                       continue;
-               }
                pkt = rte_pktmbuf_alloc(rxq->mp);
                if (unlikely(pkt == NULL)) {
                        ++rxq->stats.rx_nombuf;
@@ -1742,12 +1729,16 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        len -= RTE_ETHER_CRC_LEN;
                offset = strd_idx * strd_sz + strd_shift;
                addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset);
+               hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz;
                /*
                 * Memcpy packets to the target mbuf if:
                 * - The size of packet is smaller than mprq_max_memcpy_len.
                 * - Out of buffer in the Mempool for Multi-Packet RQ.
+                * - There is no space for a headroom and scatter is disabled.
                 */
-               if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl == NULL) {
+               if (len <= rxq->mprq_max_memcpy_len ||
+                   rxq->mprq_repl == NULL ||
+                   (hdrm_overlap > 0 && !rxq->strd_scatter_en)) {
                        /*
                         * When memcpy'ing packet due to out-of-buffer, the
                         * packet must be smaller than the target mbuf.
@@ -1769,7 +1760,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        rte_atomic16_add_return(&buf->refcnt, 1);
                        MLX5_ASSERT((uint16_t)rte_atomic16_read(&buf->refcnt) <=
                                    strd_n + 1);
-                       buf_addr = RTE_PTR_SUB(addr, headroom_sz);
+                       buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM);
                        /*
                         * MLX5 device doesn't use iova but it is necessary in a
                         * case where the Rx packet is transmitted via a
@@ -1788,43 +1779,42 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova,
                                                  buf_len, shinfo);
                        /* Set mbuf head-room. */
-                       pkt->data_off = headroom_sz;
+                       SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM);
                        MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF);
-                       /*
-                        * Prevent potential overflow due to MTU change through
-                        * kernel interface.
-                        */
-                       if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) {
-                               rte_pktmbuf_free_seg(pkt);
-                               ++rxq->stats.idropped;
-                               continue;
-                       }
+                       MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) <
+                               len - (hdrm_overlap > 0 ? hdrm_overlap : 0));
                        DATA_LEN(pkt) = len;
                        /*
-                        * LRO packet may consume all the stride memory, in this
-                        * case packet head-room space is not guaranteed so must
-                        * to add an empty mbuf for the head-room.
+                        * Copy the last fragment of a packet (up to headroom
+                        * size bytes) in case there is a stride overlap with
+                        * a next packet's headroom. Allocate a separate mbuf
+                        * to store this fragment and link it. Scatter is on.
                         */
-                       if (!rxq->strd_headroom_en) {
-                               struct rte_mbuf *headroom_mbuf =
-                                               rte_pktmbuf_alloc(rxq->mp);
+                       if (hdrm_overlap > 0) {
+                               MLX5_ASSERT(rxq->strd_scatter_en);
+                               struct rte_mbuf *seg =
+                                       rte_pktmbuf_alloc(rxq->mp);
 
-                               if (unlikely(headroom_mbuf == NULL)) {
+                               if (unlikely(seg == NULL)) {
                                        rte_pktmbuf_free_seg(pkt);
                                        ++rxq->stats.rx_nombuf;
                                        break;
                                }
-                               PORT(pkt) = rxq->port_id;
-                               NEXT(headroom_mbuf) = pkt;
-                               pkt = headroom_mbuf;
+                               SET_DATA_OFF(seg, 0);
+                               rte_memcpy(rte_pktmbuf_mtod(seg, void *),
+                                       RTE_PTR_ADD(addr, len - hdrm_overlap),
+                                       hdrm_overlap);
+                               DATA_LEN(seg) = hdrm_overlap;
+                               DATA_LEN(pkt) = len - hdrm_overlap;
+                               NEXT(pkt) = seg;
                                NB_SEGS(pkt) = 2;
                        }
                }
                rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
-               if (lro_num_seg > 1) {
+               if (cqe->lro_num_seg > 1) {
                        mlx5_lro_update_hdr(addr, cqe, len);
                        pkt->ol_flags |= PKT_RX_LRO;
-                       pkt->tso_segsz = strd_sz;
+                       pkt->tso_segsz = len / cqe->lro_num_seg;
                }
                PKT_LEN(pkt) = len;
                PORT(pkt) = rxq->port_id;
index 939778a..d155c24 100644 (file)
@@ -119,7 +119,7 @@ struct mlx5_rxq_data {
        unsigned int strd_sz_n:4; /* Log 2 of stride size. */
        unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */
        unsigned int err_state:2; /* enum mlx5_rxq_err_state. */
-       unsigned int strd_headroom_en:1; /* Enable mbuf headroom in MPRQ. */
+       unsigned int strd_scatter_en:1; /* Scattered packets from a stride. */
        unsigned int lro:1; /* Enable LRO. */
        unsigned int :1; /* Remaining bits. */
        volatile uint32_t *rq_db;