From b57e414b48c0da58e445e3d2e92a05758632147d Mon Sep 17 00:00:00 2001 From: Alexander Kozyrev Date: Wed, 16 Jun 2021 17:46:02 +0300 Subject: [PATCH] net/mlx5: convert meta register to big-endian Metadata were stored in the CPU order (little-endian format on x86), while all the packet header fields are stored in the network order. That caused wrong results whenever we tried to use metadata value in the modify_field action: bytes were swapped as a result. Convert the metadata value into big-endian format before storing it in the Mellanox NIC to achieve consistent behaviour. Fixes: 641dbe4fb053 ("net/mlx5: support modify field flow action") Cc: stable@dpdk.org Signed-off-by: Alexander Kozyrev Acked-by: Viacheslav Ovsiienko --- drivers/net/mlx5/mlx5_flow_dv.c | 40 +++++------------------- drivers/net/mlx5/mlx5_rx.c | 5 +-- drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 22 +++++++++---- drivers/net/mlx5/mlx5_rxtx_vec_neon.h | 30 +++++++++++------- drivers/net/mlx5/mlx5_rxtx_vec_sse.h | 18 ++++++++--- 5 files changed, 59 insertions(+), 56 deletions(-) diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index a4a42f455d..62edc4fa85 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -1262,8 +1262,8 @@ flow_dv_convert_action_set_meta const struct rte_flow_action_set_meta *conf, struct rte_flow_error *error) { - uint32_t data = conf->data; - uint32_t mask = conf->mask; + uint32_t mask = rte_cpu_to_be_32(conf->mask); + uint32_t data = rte_cpu_to_be_32(conf->data) & mask; struct rte_flow_item item = { .spec = &data, .mask = &mask, @@ -1276,25 +1276,14 @@ flow_dv_convert_action_set_meta if (reg < 0) return reg; MLX5_ASSERT(reg != REG_NON); - /* - * In datapath code there is no endianness - * coversions for perfromance reasons, all - * pattern conversions are done in rte_flow. - */ if (reg == REG_C_0) { struct mlx5_priv *priv = dev->data->dev_private; uint32_t msk_c0 = priv->sh->dv_regc0_mask; - uint32_t shl_c0; + uint32_t shl_c0 = rte_bsf32(msk_c0); - MLX5_ASSERT(msk_c0); -#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN - shl_c0 = rte_bsf32(msk_c0); -#else - shl_c0 = sizeof(msk_c0) * CHAR_BIT - rte_fls_u32(msk_c0); -#endif - mask <<= shl_c0; - data <<= shl_c0; - MLX5_ASSERT(!(~msk_c0 & rte_cpu_to_be_32(mask))); + data = rte_cpu_to_be_32(rte_cpu_to_be_32(data) << shl_c0); + mask = rte_cpu_to_be_32(mask) & msk_c0; + mask = rte_cpu_to_be_32(mask << shl_c0); } reg_c_x[0] = (struct field_modify_info){4, 0, reg_to_field[reg]}; /* The routine expects parameters in memory as big-endian ones. */ @@ -9255,27 +9244,14 @@ flow_dv_translate_item_meta(struct rte_eth_dev *dev, if (reg < 0) return; MLX5_ASSERT(reg != REG_NON); - /* - * In datapath code there is no endianness - * coversions for perfromance reasons, all - * pattern conversions are done in rte_flow. - */ - value = rte_cpu_to_be_32(value); - mask = rte_cpu_to_be_32(mask); if (reg == REG_C_0) { struct mlx5_priv *priv = dev->data->dev_private; uint32_t msk_c0 = priv->sh->dv_regc0_mask; uint32_t shl_c0 = rte_bsf32(msk_c0); -#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN - uint32_t shr_c0 = __builtin_clz(priv->sh->dv_meta_mask); - value >>= shr_c0; - mask >>= shr_c0; -#endif - value <<= shl_c0; + mask &= msk_c0; mask <<= shl_c0; - MLX5_ASSERT(msk_c0); - MLX5_ASSERT(!(~msk_c0 & mask)); + value <<= shl_c0; } flow_dv_match_meta_reg(matcher, key, reg, value, mask); } diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c index 6cd71a44eb..777a1d6e45 100644 --- a/drivers/net/mlx5/mlx5_rx.c +++ b/drivers/net/mlx5/mlx5_rx.c @@ -740,8 +740,9 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, } } if (rxq->dynf_meta) { - uint32_t meta = cqe->flow_table_metadata & - rxq->flow_meta_port_mask; + uint32_t meta = rte_be_to_cpu_32(cqe->flow_table_metadata >> + __builtin_popcount(rxq->flow_meta_port_mask)) & + rxq->flow_meta_port_mask; if (meta) { pkt->ol_flags |= rxq->flow_meta_mask; diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h index 2d1154b624..648c59e2c2 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h @@ -1221,23 +1221,33 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, if (rxq->dynf_meta) { uint64_t flag = rxq->flow_meta_mask; int32_t offs = rxq->flow_meta_offset; - uint32_t metadata, mask; + uint32_t mask = rxq->flow_meta_port_mask; + uint32_t shift = + __builtin_popcount(rxq->flow_meta_port_mask); + uint32_t metadata; - mask = rxq->flow_meta_port_mask; /* This code is subject for futher optimization. */ - metadata = cq[pos].flow_table_metadata & mask; + metadata = (rte_be_to_cpu_32 + (cq[pos].flow_table_metadata) >> shift) & + mask; *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = metadata; pkts[pos]->ol_flags |= metadata ? flag : 0ULL; - metadata = cq[pos + 1].flow_table_metadata & mask; + metadata = (rte_be_to_cpu_32 + (cq[pos + 1].flow_table_metadata) >> shift) & + mask; *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) = metadata; pkts[pos + 1]->ol_flags |= metadata ? flag : 0ULL; - metadata = cq[pos + 2].flow_table_metadata & mask; + metadata = (rte_be_to_cpu_32 + (cq[pos + 2].flow_table_metadata) >> shift) & + mask; *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) = metadata; pkts[pos + 2]->ol_flags |= metadata ? flag : 0ULL; - metadata = cq[pos + 3].flow_table_metadata & mask; + metadata = (rte_be_to_cpu_32 + (cq[pos + 3].flow_table_metadata) >> shift) & + mask; *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) = metadata; pkts[pos + 3]->ol_flags |= metadata ? flag : 0ULL; diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h index 2234fbe6b2..5c569ee199 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h @@ -833,23 +833,29 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, /* This code is subject for futher optimization. */ int32_t offs = rxq->flow_meta_offset; uint32_t mask = rxq->flow_meta_port_mask; + uint32_t shift = + __builtin_popcount(rxq->flow_meta_port_mask); *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = - container_of(p0, struct mlx5_cqe, - pkt_info)->flow_table_metadata & - mask; + (rte_be_to_cpu_32(container_of + (p0, struct mlx5_cqe, + pkt_info)->flow_table_metadata) >> shift) & + mask; *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) = - container_of(p1, struct mlx5_cqe, - pkt_info)->flow_table_metadata & - mask; + (rte_be_to_cpu_32(container_of + (p1, struct mlx5_cqe, + pkt_info)->flow_table_metadata) >> shift) & + mask; *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) = - container_of(p2, struct mlx5_cqe, - pkt_info)->flow_table_metadata & - mask; + (rte_be_to_cpu_32(container_of + (p2, struct mlx5_cqe, + pkt_info)->flow_table_metadata) >> shift) & + mask; *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) = - container_of(p3, struct mlx5_cqe, - pkt_info)->flow_table_metadata & - mask; + (rte_be_to_cpu_32(container_of + (p3, struct mlx5_cqe, + pkt_info)->flow_table_metadata) >> shift) & + mask; if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *)) elts[pos]->ol_flags |= rxq->flow_meta_mask; if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *)) diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h index c508a7a4f2..661fa7273c 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h @@ -769,15 +769,25 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, /* This code is subject for futher optimization. */ int32_t offs = rxq->flow_meta_offset; uint32_t mask = rxq->flow_meta_port_mask; + uint32_t shift = + __builtin_popcount(rxq->flow_meta_port_mask); *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = - cq[pos].flow_table_metadata & mask; + (rte_be_to_cpu_32 + (cq[pos].flow_table_metadata) >> shift) & + mask; *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) = - cq[pos + p1].flow_table_metadata & mask; + (rte_be_to_cpu_32 + (cq[pos + p1].flow_table_metadata) >> shift) & + mask; *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) = - cq[pos + p2].flow_table_metadata & mask; + (rte_be_to_cpu_32 + (cq[pos + p2].flow_table_metadata) >> shift) & + mask; *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) = - cq[pos + p3].flow_table_metadata & mask; + (rte_be_to_cpu_32 + (cq[pos + p3].flow_table_metadata) >> shift) & + mask; if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *)) pkts[pos]->ol_flags |= rxq->flow_meta_mask; if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *)) -- 2.20.1