net/mlx5: convert meta register to big-endian
authorAlexander Kozyrev <akozyrev@nvidia.com>
Wed, 16 Jun 2021 14:46:02 +0000 (17:46 +0300)
committerRaslan Darawsheh <rasland@nvidia.com>
Thu, 24 Jun 2021 11:19:53 +0000 (13:19 +0200)
Metadata were stored in the CPU order (little-endian format on x86),
while all the packet header fields are stored in the network order.
That caused wrong results whenever we tried to use metadata value
in the modify_field action: bytes were swapped as a result.

Convert the metadata value into big-endian format before storing it
in the Mellanox NIC to achieve consistent behaviour.

Fixes: 641dbe4fb053 ("net/mlx5: support modify field flow action")
Cc: stable@dpdk.org
Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
drivers/net/mlx5/mlx5_flow_dv.c
drivers/net/mlx5/mlx5_rx.c
drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
drivers/net/mlx5/mlx5_rxtx_vec_neon.h
drivers/net/mlx5/mlx5_rxtx_vec_sse.h

index a4a42f4..62edc4f 100644 (file)
@@ -1262,8 +1262,8 @@ flow_dv_convert_action_set_meta
                         const struct rte_flow_action_set_meta *conf,
                         struct rte_flow_error *error)
 {
-       uint32_t data = conf->data;
-       uint32_t mask = conf->mask;
+       uint32_t mask = rte_cpu_to_be_32(conf->mask);
+       uint32_t data = rte_cpu_to_be_32(conf->data) & mask;
        struct rte_flow_item item = {
                .spec = &data,
                .mask = &mask,
@@ -1276,25 +1276,14 @@ flow_dv_convert_action_set_meta
        if (reg < 0)
                return reg;
        MLX5_ASSERT(reg != REG_NON);
-       /*
-        * In datapath code there is no endianness
-        * coversions for perfromance reasons, all
-        * pattern conversions are done in rte_flow.
-        */
        if (reg == REG_C_0) {
                struct mlx5_priv *priv = dev->data->dev_private;
                uint32_t msk_c0 = priv->sh->dv_regc0_mask;
-               uint32_t shl_c0;
+               uint32_t shl_c0 = rte_bsf32(msk_c0);
 
-               MLX5_ASSERT(msk_c0);
-#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
-               shl_c0 = rte_bsf32(msk_c0);
-#else
-               shl_c0 = sizeof(msk_c0) * CHAR_BIT - rte_fls_u32(msk_c0);
-#endif
-               mask <<= shl_c0;
-               data <<= shl_c0;
-               MLX5_ASSERT(!(~msk_c0 & rte_cpu_to_be_32(mask)));
+               data = rte_cpu_to_be_32(rte_cpu_to_be_32(data) << shl_c0);
+               mask = rte_cpu_to_be_32(mask) & msk_c0;
+               mask = rte_cpu_to_be_32(mask << shl_c0);
        }
        reg_c_x[0] = (struct field_modify_info){4, 0, reg_to_field[reg]};
        /* The routine expects parameters in memory as big-endian ones. */
@@ -9255,27 +9244,14 @@ flow_dv_translate_item_meta(struct rte_eth_dev *dev,
                if (reg < 0)
                        return;
                MLX5_ASSERT(reg != REG_NON);
-               /*
-                * In datapath code there is no endianness
-                * coversions for perfromance reasons, all
-                * pattern conversions are done in rte_flow.
-                */
-               value = rte_cpu_to_be_32(value);
-               mask = rte_cpu_to_be_32(mask);
                if (reg == REG_C_0) {
                        struct mlx5_priv *priv = dev->data->dev_private;
                        uint32_t msk_c0 = priv->sh->dv_regc0_mask;
                        uint32_t shl_c0 = rte_bsf32(msk_c0);
-#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
-                       uint32_t shr_c0 = __builtin_clz(priv->sh->dv_meta_mask);
 
-                       value >>= shr_c0;
-                       mask >>= shr_c0;
-#endif
-                       value <<= shl_c0;
+                       mask &= msk_c0;
                        mask <<= shl_c0;
-                       MLX5_ASSERT(msk_c0);
-                       MLX5_ASSERT(!(~msk_c0 & mask));
+                       value <<= shl_c0;
                }
                flow_dv_match_meta_reg(matcher, key, reg, value, mask);
        }
index 6cd71a4..777a1d6 100644 (file)
@@ -740,8 +740,9 @@ rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
                }
        }
        if (rxq->dynf_meta) {
-               uint32_t meta = cqe->flow_table_metadata &
-                               rxq->flow_meta_port_mask;
+               uint32_t meta = rte_be_to_cpu_32(cqe->flow_table_metadata >>
+                       __builtin_popcount(rxq->flow_meta_port_mask)) &
+                       rxq->flow_meta_port_mask;
 
                if (meta) {
                        pkt->ol_flags |= rxq->flow_meta_mask;
index 2d1154b..648c59e 100644 (file)
@@ -1221,23 +1221,33 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
                if (rxq->dynf_meta) {
                        uint64_t flag = rxq->flow_meta_mask;
                        int32_t offs = rxq->flow_meta_offset;
-                       uint32_t metadata, mask;
+                       uint32_t mask = rxq->flow_meta_port_mask;
+                       uint32_t shift =
+                               __builtin_popcount(rxq->flow_meta_port_mask);
+                       uint32_t metadata;
 
-                       mask = rxq->flow_meta_port_mask;
                        /* This code is subject for futher optimization. */
-                       metadata = cq[pos].flow_table_metadata & mask;
+                       metadata = (rte_be_to_cpu_32
+                               (cq[pos].flow_table_metadata) >> shift) &
+                               mask;
                        *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
                                                                metadata;
                        pkts[pos]->ol_flags |= metadata ? flag : 0ULL;
-                       metadata = cq[pos + 1].flow_table_metadata & mask;
+                       metadata = (rte_be_to_cpu_32
+                               (cq[pos + 1].flow_table_metadata) >> shift) &
+                               mask;
                        *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) =
                                                                metadata;
                        pkts[pos + 1]->ol_flags |= metadata ? flag : 0ULL;
-                       metadata = cq[pos + 2].flow_table_metadata & mask;
+                       metadata = (rte_be_to_cpu_32
+                               (cq[pos + 2].flow_table_metadata) >> shift) &
+                               mask;
                        *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) =
                                                                metadata;
                        pkts[pos + 2]->ol_flags |= metadata ? flag : 0ULL;
-                       metadata = cq[pos + 3].flow_table_metadata & mask;
+                       metadata = (rte_be_to_cpu_32
+                               (cq[pos + 3].flow_table_metadata) >> shift) &
+                               mask;
                        *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) =
                                                                metadata;
                        pkts[pos + 3]->ol_flags |= metadata ? flag : 0ULL;
index 2234fbe..5c569ee 100644 (file)
@@ -833,23 +833,29 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
                        /* This code is subject for futher optimization. */
                        int32_t offs = rxq->flow_meta_offset;
                        uint32_t mask = rxq->flow_meta_port_mask;
+                       uint32_t shift =
+                               __builtin_popcount(rxq->flow_meta_port_mask);
 
                        *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
-                               container_of(p0, struct mlx5_cqe,
-                                            pkt_info)->flow_table_metadata &
-                                            mask;
+                               (rte_be_to_cpu_32(container_of
+                               (p0, struct mlx5_cqe,
+                               pkt_info)->flow_table_metadata) >> shift) &
+                               mask;
                        *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) =
-                               container_of(p1, struct mlx5_cqe,
-                                            pkt_info)->flow_table_metadata &
-                                            mask;
+                               (rte_be_to_cpu_32(container_of
+                               (p1, struct mlx5_cqe,
+                               pkt_info)->flow_table_metadata) >> shift) &
+                               mask;
                        *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) =
-                               container_of(p2, struct mlx5_cqe,
-                                            pkt_info)->flow_table_metadata &
-                                            mask;
+                               (rte_be_to_cpu_32(container_of
+                               (p2, struct mlx5_cqe,
+                               pkt_info)->flow_table_metadata) >> shift) &
+                               mask;
                        *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) =
-                               container_of(p3, struct mlx5_cqe,
-                                            pkt_info)->flow_table_metadata &
-                                            mask;
+                               (rte_be_to_cpu_32(container_of
+                               (p3, struct mlx5_cqe,
+                               pkt_info)->flow_table_metadata) >> shift) &
+                               mask;
                        if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *))
                                elts[pos]->ol_flags |= rxq->flow_meta_mask;
                        if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *))
index c508a7a..661fa72 100644 (file)
@@ -769,15 +769,25 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
                        /* This code is subject for futher optimization. */
                        int32_t offs = rxq->flow_meta_offset;
                        uint32_t mask = rxq->flow_meta_port_mask;
+                       uint32_t shift =
+                               __builtin_popcount(rxq->flow_meta_port_mask);
 
                        *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
-                               cq[pos].flow_table_metadata & mask;
+                               (rte_be_to_cpu_32
+                               (cq[pos].flow_table_metadata) >> shift) &
+                               mask;
                        *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) =
-                               cq[pos + p1].flow_table_metadata  & mask;
+                               (rte_be_to_cpu_32
+                               (cq[pos + p1].flow_table_metadata) >> shift) &
+                               mask;
                        *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) =
-                               cq[pos + p2].flow_table_metadata & mask;
+                               (rte_be_to_cpu_32
+                               (cq[pos + p2].flow_table_metadata) >> shift) &
+                               mask;
                        *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) =
-                               cq[pos + p3].flow_table_metadata & mask;
+                               (rte_be_to_cpu_32
+                               (cq[pos + p3].flow_table_metadata) >> shift) &
+                               mask;
                        if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *))
                                pkts[pos]->ol_flags |= rxq->flow_meta_mask;
                        if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *))