From bd0d5930bf567b41c634b5a7ef0fe76c167ef3b6 Mon Sep 17 00:00:00 2001 From: Alexander Kozyrev Date: Thu, 9 Apr 2020 22:23:52 +0000 Subject: [PATCH] net/mlx5: enable MPRQ multi-stride operations MPRQ feature should be updated to allow a packet to be received into multiple strides in order to support the MTU exceeding 8KB. Special care is needed to prevent the headroom corruption in the multi-stride mode since the headroom space is borrowed by the PMD from the tail of the preceding stride. Copy the whole packet into a separate mbuf in this case or just the overlapping data if the Rx scattering is supported by an application. Cc: stable@dpdk.org Signed-off-by: Alexander Kozyrev Acked-by: Viacheslav Ovsiienko Acked-by: Matan Azrad --- drivers/net/mlx5/mlx5_rxq.c | 28 ++++----------- drivers/net/mlx5/mlx5_rxtx.c | 68 +++++++++++++++--------------------- drivers/net/mlx5/mlx5_rxtx.h | 2 +- 3 files changed, 36 insertions(+), 62 deletions(-) diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index 1b57f00cb2..1cc9f1dba8 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -1797,7 +1797,6 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, unsigned int mprq_stride_size; unsigned int mprq_stride_cap; struct mlx5_dev_config *config = &priv->config; - unsigned int strd_headroom_en; /* * Always allocate extra slots, even if eventually * the vector Rx will not be used. @@ -1843,26 +1842,11 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, tmpl->socket = socket; if (dev->data->dev_conf.intr_conf.rxq) tmpl->irq = 1; - /* - * LRO packet may consume all the stride memory, hence we cannot - * guaranty head-room near the packet memory in the stride. - * In this case scatter is, for sure, enabled and an empty mbuf may be - * added in the start for the head-room. - */ - if (lro_on_queue && RTE_PKTMBUF_HEADROOM > 0 && - non_scatter_min_mbuf_size > mb_len) { - strd_headroom_en = 0; - mprq_stride_size = RTE_MIN(max_rx_pkt_len, - 1u << config->mprq.max_stride_size_n); - } else { - strd_headroom_en = 1; - mprq_stride_size = non_scatter_min_mbuf_size; - } mprq_stride_nums = config->mprq.stride_num_n ? config->mprq.stride_num_n : MLX5_MPRQ_STRIDE_NUM_N; - mprq_stride_size = (mprq_stride_size <= - (1U << config->mprq.max_stride_size_n)) ? - log2above(mprq_stride_size) : MLX5_MPRQ_STRIDE_SIZE_N; + mprq_stride_size = non_scatter_min_mbuf_size <= + (1U << config->mprq.max_stride_size_n) ? + log2above(non_scatter_min_mbuf_size) : MLX5_MPRQ_STRIDE_SIZE_N; mprq_stride_cap = (config->mprq.stride_num_n ? (1U << config->mprq.stride_num_n) : (1U << mprq_stride_nums)) * (config->mprq.stride_size_n ? @@ -1879,8 +1863,7 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, * Otherwise, enable Rx scatter if necessary. */ if (mprq_en && desc > (1U << mprq_stride_nums) && - (non_scatter_min_mbuf_size - - (lro_on_queue ? RTE_PKTMBUF_HEADROOM : 0) <= + (non_scatter_min_mbuf_size <= (1U << config->mprq.max_stride_size_n) || (config->mprq.stride_size_n && non_scatter_min_mbuf_size <= mprq_stride_cap))) { @@ -1893,7 +1876,8 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, tmpl->rxq.strd_sz_n = config->mprq.stride_size_n ? config->mprq.stride_size_n : mprq_stride_size; tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT; - tmpl->rxq.strd_headroom_en = strd_headroom_en; + tmpl->rxq.strd_scatter_en = + !!(offloads & DEV_RX_OFFLOAD_SCATTER); tmpl->rxq.mprq_max_memcpy_len = RTE_MIN(first_mb_free_size, config->mprq.max_memcpy_len); max_lro_size = RTE_MIN(max_rx_pkt_len, diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index f3bf763769..4c279520d1 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -1658,21 +1658,20 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) unsigned int i = 0; uint32_t rq_ci = rxq->rq_ci; uint16_t consumed_strd = rxq->consumed_strd; - uint16_t headroom_sz = rxq->strd_headroom_en * RTE_PKTMBUF_HEADROOM; struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; while (i < pkts_n) { struct rte_mbuf *pkt; void *addr; int ret; - unsigned int len; + uint32_t len; uint16_t strd_cnt; uint16_t strd_idx; uint32_t offset; uint32_t byte_cnt; + int32_t hdrm_overlap; volatile struct mlx5_mini_cqe8 *mcqe = NULL; uint32_t rss_hash_res = 0; - uint8_t lro_num_seg; if (consumed_strd == strd_n) { /* Replace WQE only if the buffer is still in use. */ @@ -1719,18 +1718,6 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) MLX5_ASSERT(strd_idx < strd_n); MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask)); - lro_num_seg = cqe->lro_num_seg; - /* - * Currently configured to receive a packet per a stride. But if - * MTU is adjusted through kernel interface, device could - * consume multiple strides without raising an error. In this - * case, the packet should be dropped because it is bigger than - * the max_rx_pkt_len. - */ - if (unlikely(!lro_num_seg && strd_cnt > 1)) { - ++rxq->stats.idropped; - continue; - } pkt = rte_pktmbuf_alloc(rxq->mp); if (unlikely(pkt == NULL)) { ++rxq->stats.rx_nombuf; @@ -1742,12 +1729,16 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) len -= RTE_ETHER_CRC_LEN; offset = strd_idx * strd_sz + strd_shift; addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset); + hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz; /* * Memcpy packets to the target mbuf if: * - The size of packet is smaller than mprq_max_memcpy_len. * - Out of buffer in the Mempool for Multi-Packet RQ. + * - There is no space for a headroom and scatter is disabled. */ - if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl == NULL) { + if (len <= rxq->mprq_max_memcpy_len || + rxq->mprq_repl == NULL || + (hdrm_overlap > 0 && !rxq->strd_scatter_en)) { /* * When memcpy'ing packet due to out-of-buffer, the * packet must be smaller than the target mbuf. @@ -1769,7 +1760,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) rte_atomic16_add_return(&buf->refcnt, 1); MLX5_ASSERT((uint16_t)rte_atomic16_read(&buf->refcnt) <= strd_n + 1); - buf_addr = RTE_PTR_SUB(addr, headroom_sz); + buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM); /* * MLX5 device doesn't use iova but it is necessary in a * case where the Rx packet is transmitted via a @@ -1788,43 +1779,42 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova, buf_len, shinfo); /* Set mbuf head-room. */ - pkt->data_off = headroom_sz; + SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM); MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF); - /* - * Prevent potential overflow due to MTU change through - * kernel interface. - */ - if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) { - rte_pktmbuf_free_seg(pkt); - ++rxq->stats.idropped; - continue; - } + MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) < + len - (hdrm_overlap > 0 ? hdrm_overlap : 0)); DATA_LEN(pkt) = len; /* - * LRO packet may consume all the stride memory, in this - * case packet head-room space is not guaranteed so must - * to add an empty mbuf for the head-room. + * Copy the last fragment of a packet (up to headroom + * size bytes) in case there is a stride overlap with + * a next packet's headroom. Allocate a separate mbuf + * to store this fragment and link it. Scatter is on. */ - if (!rxq->strd_headroom_en) { - struct rte_mbuf *headroom_mbuf = - rte_pktmbuf_alloc(rxq->mp); + if (hdrm_overlap > 0) { + MLX5_ASSERT(rxq->strd_scatter_en); + struct rte_mbuf *seg = + rte_pktmbuf_alloc(rxq->mp); - if (unlikely(headroom_mbuf == NULL)) { + if (unlikely(seg == NULL)) { rte_pktmbuf_free_seg(pkt); ++rxq->stats.rx_nombuf; break; } - PORT(pkt) = rxq->port_id; - NEXT(headroom_mbuf) = pkt; - pkt = headroom_mbuf; + SET_DATA_OFF(seg, 0); + rte_memcpy(rte_pktmbuf_mtod(seg, void *), + RTE_PTR_ADD(addr, len - hdrm_overlap), + hdrm_overlap); + DATA_LEN(seg) = hdrm_overlap; + DATA_LEN(pkt) = len - hdrm_overlap; + NEXT(pkt) = seg; NB_SEGS(pkt) = 2; } } rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); - if (lro_num_seg > 1) { + if (cqe->lro_num_seg > 1) { mlx5_lro_update_hdr(addr, cqe, len); pkt->ol_flags |= PKT_RX_LRO; - pkt->tso_segsz = strd_sz; + pkt->tso_segsz = len / cqe->lro_num_seg; } PKT_LEN(pkt) = len; PORT(pkt) = rxq->port_id; diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index 939778aa55..d155c241eb 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -119,7 +119,7 @@ struct mlx5_rxq_data { unsigned int strd_sz_n:4; /* Log 2 of stride size. */ unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */ unsigned int err_state:2; /* enum mlx5_rxq_err_state. */ - unsigned int strd_headroom_en:1; /* Enable mbuf headroom in MPRQ. */ + unsigned int strd_scatter_en:1; /* Scattered packets from a stride. */ unsigned int lro:1; /* Enable LRO. */ unsigned int :1; /* Remaining bits. */ volatile uint32_t *rq_db; -- 2.20.1