From bdb8e5b1ea7be5ec55b87c4090e712316701f175 Mon Sep 17 00:00:00 2001 From: Viacheslav Ovsiienko Date: Mon, 20 Jan 2020 19:16:26 +0000 Subject: [PATCH] net/mlx5: allow allocated mbuf with external buffer In the Rx datapath the flags in the newly allocated mbufs are all explicitly cleared but the EXT_ATTACHED_MBUF must be preserved. It would allow to use mbuf pools with pre-attached external data buffers. The vectorized rx_burst routines are updated in order to inherit the EXT_ATTACHED_MBUF from mbuf pool private RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF flag. Signed-off-by: Viacheslav Ovsiienko Acked-by: Matan Azrad --- drivers/net/mlx5/mlx5_rxq.c | 7 +++++- drivers/net/mlx5/mlx5_rxtx.c | 2 +- drivers/net/mlx5/mlx5_rxtx.h | 2 +- drivers/net/mlx5/mlx5_rxtx_vec.h | 14 ++++-------- drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 5 ++-- drivers/net/mlx5/mlx5_rxtx_vec_neon.h | 29 ++++++++++++------------ drivers/net/mlx5/mlx5_rxtx_vec_sse.h | 2 +- 7 files changed, 30 insertions(+), 31 deletions(-) diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index ca25e324c3..c87ce151d7 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -225,6 +225,9 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) if (mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0) { struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; struct rte_mbuf *mbuf_init = &rxq->fake_mbuf; + struct rte_pktmbuf_pool_private *priv = + (struct rte_pktmbuf_pool_private *) + rte_mempool_get_priv(rxq_ctrl->rxq.mp); int j; /* Initialize default rearm_data for vPMD. */ @@ -232,13 +235,15 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl) rte_mbuf_refcnt_set(mbuf_init, 1); mbuf_init->nb_segs = 1; mbuf_init->port = rxq->port_id; + if (priv->flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) + mbuf_init->ol_flags = EXT_ATTACHED_MBUF; /* * prevent compiler reordering: * rearm_data covers previous fields. */ rte_compiler_barrier(); rxq->mbuf_initializer = - *(uint64_t *)&mbuf_init->rearm_data; + *(rte_xmm_t *)&mbuf_init->rearm_data; /* Padding with a fake mbuf for vectorized Rx. */ for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j) (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf; diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index b4702ff9f6..a06db01ba8 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -1341,7 +1341,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) } pkt = seg; assert(len >= (rxq->crc_present << 2)); - pkt->ol_flags = 0; + pkt->ol_flags &= EXT_ATTACHED_MBUF; /* If compressed, take hash result from mini-CQE. */ rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ? cqe->rx_hash_res : diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index e927343f7d..f35cc873a5 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -144,7 +144,7 @@ struct mlx5_rxq_data { struct mlx5_mprq_buf *mprq_repl; /* Stashed mbuf for replenish. */ uint16_t idx; /* Queue index. */ struct mlx5_rxq_stats stats; - uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */ + rte_xmm_t mbuf_initializer; /* Default rearm/flags for vectorized Rx. */ struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */ void *cq_uar; /* CQ user access region. */ uint32_t cqn; /* CQ number. */ diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.h b/drivers/net/mlx5/mlx5_rxtx_vec.h index 85e0bd5441..d8c07f285a 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec.h @@ -97,18 +97,12 @@ mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n) void *buf_addr; /* - * Load the virtual address for Rx WQE. non-x86 processors - * (mostly RISC such as ARM and Power) are more vulnerable to - * load stall. For x86, reducing the number of instructions - * seems to matter most. + * In order to support the mbufs with external attached + * data buffer we should use the buf_addr pointer instead of + * rte_mbuf_buf_addr(). It touches the mbuf itself and may + * impact the performance. */ -#ifdef RTE_ARCH_X86_64 buf_addr = elts[i]->buf_addr; - assert(buf_addr == rte_mbuf_buf_addr(elts[i], rxq->mp)); -#else - buf_addr = rte_mbuf_buf_addr(elts[i], rxq->mp); - assert(buf_addr == elts[i]->buf_addr); -#endif wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr + RTE_PKTMBUF_HEADROOM); /* If there's only one MR, no need to replace LKey in WQE. */ diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h index 8e79883dfe..9e5c6ee7ab 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h @@ -344,9 +344,8 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED}; const vector unsigned char mbuf_init = - (vector unsigned char)(vector unsigned long){ - *(__attribute__((__aligned__(8))) unsigned long *) - &rxq->mbuf_initializer, 0LL}; + (vector unsigned char)vec_vsx_ld + (0, (vector unsigned char *)&rxq->mbuf_initializer); const vector unsigned short rearm_sel_mask = (vector unsigned short){0, 0, 0, 0, 0xffff, 0xffff, 0, 0}; vector unsigned char rearm0, rearm1, rearm2, rearm3; diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h index 86785c7496..332e9ac406 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h @@ -264,8 +264,8 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, const uint32x4_t cv_mask = vdupq_n_u32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED); - const uint64x1_t mbuf_init = vld1_u64(&rxq->mbuf_initializer); - const uint64x1_t r32_mask = vcreate_u64(0xffffffff); + const uint64x2_t mbuf_init = vld1q_u64 + ((const uint64_t *)&rxq->mbuf_initializer); uint64x2_t rearm0, rearm1, rearm2, rearm3; uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3; @@ -326,18 +326,19 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, /* Merge to ol_flags. */ ol_flags = vorrq_u32(ol_flags, cv_flags); /* Merge mbuf_init and ol_flags, and store. */ - rearm0 = vcombine_u64(mbuf_init, - vshr_n_u64(vget_high_u64(vreinterpretq_u64_u32( - ol_flags)), 32)); - rearm1 = vcombine_u64(mbuf_init, - vand_u64(vget_high_u64(vreinterpretq_u64_u32( - ol_flags)), r32_mask)); - rearm2 = vcombine_u64(mbuf_init, - vshr_n_u64(vget_low_u64(vreinterpretq_u64_u32( - ol_flags)), 32)); - rearm3 = vcombine_u64(mbuf_init, - vand_u64(vget_low_u64(vreinterpretq_u64_u32( - ol_flags)), r32_mask)); + rearm0 = vreinterpretq_u64_u32(vsetq_lane_u32 + (vgetq_lane_u32(ol_flags, 3), + vreinterpretq_u32_u64(mbuf_init), 2)); + rearm1 = vreinterpretq_u64_u32(vsetq_lane_u32 + (vgetq_lane_u32(ol_flags, 2), + vreinterpretq_u32_u64(mbuf_init), 2)); + rearm2 = vreinterpretq_u64_u32(vsetq_lane_u32 + (vgetq_lane_u32(ol_flags, 1), + vreinterpretq_u32_u64(mbuf_init), 2)); + rearm3 = vreinterpretq_u64_u32(vsetq_lane_u32 + (vgetq_lane_u32(ol_flags, 0), + vreinterpretq_u32_u64(mbuf_init), 2)); + vst1q_u64((void *)&pkts[0]->rearm_data, rearm0); vst1q_u64((void *)&pkts[1]->rearm_data, rearm1); vst1q_u64((void *)&pkts[2]->rearm_data, rearm2); diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h index 35b7761007..07d40d5f48 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h @@ -259,7 +259,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4], PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED); const __m128i mbuf_init = - _mm_loadl_epi64((__m128i *)&rxq->mbuf_initializer); + _mm_load_si128((__m128i *)&rxq->mbuf_initializer); __m128i rearm0, rearm1, rearm2, rearm3; uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3; -- 2.20.1