common/mlx5: introduce common library
[dpdk.git] / drivers / net / mlx5 / mlx5_rxtx_vec_neon.h
index 9930286..5b846c1 100644 (file)
 #include <rte_mempool.h>
 #include <rte_prefetch.h>
 
+#include <mlx5_prm.h>
+
+#include "mlx5_defs.h"
 #include "mlx5.h"
 #include "mlx5_utils.h"
 #include "mlx5_rxtx.h"
 #include "mlx5_rxtx_vec.h"
 #include "mlx5_autoconf.h"
-#include "mlx5_defs.h"
-#include "mlx5_prm.h"
 
 #pragma GCC diagnostic ignored "-Wcast-qual"
 
@@ -264,8 +265,8 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
        const uint32x4_t cv_mask =
                vdupq_n_u32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
                            PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
-       const uint64x1_t mbuf_init = vld1_u64(&rxq->mbuf_initializer);
-       const uint64x1_t r32_mask = vcreate_u64(0xffffffff);
+       const uint64x2_t mbuf_init = vld1q_u64
+                               ((const uint64_t *)&rxq->mbuf_initializer);
        uint64x2_t rearm0, rearm1, rearm2, rearm3;
        uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3;
 
@@ -326,18 +327,19 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
        /* Merge to ol_flags. */
        ol_flags = vorrq_u32(ol_flags, cv_flags);
        /* Merge mbuf_init and ol_flags, and store. */
-       rearm0 = vcombine_u64(mbuf_init,
-                             vshr_n_u64(vget_high_u64(vreinterpretq_u64_u32(
-                                                      ol_flags)), 32));
-       rearm1 = vcombine_u64(mbuf_init,
-                             vand_u64(vget_high_u64(vreinterpretq_u64_u32(
-                                                    ol_flags)), r32_mask));
-       rearm2 = vcombine_u64(mbuf_init,
-                             vshr_n_u64(vget_low_u64(vreinterpretq_u64_u32(
-                                                     ol_flags)), 32));
-       rearm3 = vcombine_u64(mbuf_init,
-                             vand_u64(vget_low_u64(vreinterpretq_u64_u32(
-                                                   ol_flags)), r32_mask));
+       rearm0 = vreinterpretq_u64_u32(vsetq_lane_u32
+                                       (vgetq_lane_u32(ol_flags, 3),
+                                        vreinterpretq_u32_u64(mbuf_init), 2));
+       rearm1 = vreinterpretq_u64_u32(vsetq_lane_u32
+                                       (vgetq_lane_u32(ol_flags, 2),
+                                        vreinterpretq_u32_u64(mbuf_init), 2));
+       rearm2 = vreinterpretq_u64_u32(vsetq_lane_u32
+                                       (vgetq_lane_u32(ol_flags, 1),
+                                        vreinterpretq_u32_u64(mbuf_init), 2));
+       rearm3 = vreinterpretq_u64_u32(vsetq_lane_u32
+                                       (vgetq_lane_u32(ol_flags, 0),
+                                        vreinterpretq_u32_u64(mbuf_init), 2));
+
        vst1q_u64((void *)&pkts[0]->rearm_data, rearm0);
        vst1q_u64((void *)&pkts[1]->rearm_data, rearm1);
        vst1q_u64((void *)&pkts[2]->rearm_data, rearm2);
@@ -687,6 +689,29 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
                                        container_of(p3, struct mlx5_cqe,
                                                     pkt_info)->timestamp);
                }
+               if (rte_flow_dynf_metadata_avail()) {
+                       /* This code is subject for futher optimization. */
+                       *RTE_FLOW_DYNF_METADATA(elts[pos]) =
+                               container_of(p0, struct mlx5_cqe,
+                                            pkt_info)->flow_table_metadata;
+                       *RTE_FLOW_DYNF_METADATA(elts[pos + 1]) =
+                               container_of(p1, struct mlx5_cqe,
+                                            pkt_info)->flow_table_metadata;
+                       *RTE_FLOW_DYNF_METADATA(elts[pos + 2]) =
+                               container_of(p2, struct mlx5_cqe,
+                                            pkt_info)->flow_table_metadata;
+                       *RTE_FLOW_DYNF_METADATA(elts[pos + 3]) =
+                               container_of(p3, struct mlx5_cqe,
+                                            pkt_info)->flow_table_metadata;
+                       if (*RTE_FLOW_DYNF_METADATA(elts[pos]))
+                               elts[pos]->ol_flags |= PKT_RX_DYNF_METADATA;
+                       if (*RTE_FLOW_DYNF_METADATA(elts[pos + 1]))
+                               elts[pos + 1]->ol_flags |= PKT_RX_DYNF_METADATA;
+                       if (*RTE_FLOW_DYNF_METADATA(elts[pos + 2]))
+                               elts[pos + 2]->ol_flags |= PKT_RX_DYNF_METADATA;
+                       if (*RTE_FLOW_DYNF_METADATA(elts[pos + 3]))
+                               elts[pos + 3]->ol_flags |= PKT_RX_DYNF_METADATA;
+               }
 #ifdef MLX5_PMD_SOFT_COUNTERS
                /* Add up received bytes count. */
                byte_cnt = vbic_u16(byte_cnt, invalid_mask);
@@ -727,7 +752,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
                        rxq->decompressed -= n;
                }
        }
-       rte_compiler_barrier();
+       rte_cio_wmb();
        *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
        return rcvd_pkt;
 }