X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx_vec_neon.h;h=77979c939c2f23fd2dda06b1a1f3e5d8d9c23610;hb=c2a42d19d967e24223f06f2fc797eaed8e17c345;hp=4c067d8801f851be646665b60ff4157d3e767ac3;hpb=71094ae3ceeab84a5486996d1dfb2448f210ce58;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h index 4c067d8801..77979c939c 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h @@ -593,7 +593,7 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, * there's no instruction to count trailing zeros. __builtin_clzl() is * used instead. * - * A. copy 4 mbuf pointers from elts ring to returing pkts. + * A. copy 4 mbuf pointers from elts ring to returning pkts. * B. load 64B CQE and extract necessary fields * Final 16bytes cqes[] extracted from original 64bytes CQE has the * following structure: @@ -767,16 +767,15 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, comp_idx = __builtin_clzl(vget_lane_u64(vreinterpret_u64_u16( comp_mask), 0)) / (sizeof(uint16_t) * 8); - /* D.6 mask out entries after the compressed CQE. */ - mask = vcreate_u16(comp_idx < MLX5_VPMD_DESCS_PER_LOOP ? - -1UL >> (comp_idx * sizeof(uint16_t) * 8) : - 0); - invalid_mask = vorr_u16(invalid_mask, mask); + invalid_mask = vorr_u16(invalid_mask, comp_mask); /* D.7 count non-compressed valid CQEs. */ n = __builtin_clzl(vget_lane_u64(vreinterpret_u64_u16( invalid_mask), 0)) / (sizeof(uint16_t) * 8); nocmp_n += n; - /* D.2 get the final invalid mask. */ + /* + * D.2 mask out entries after the compressed CQE. + * get the final invalid mask. + */ mask = vcreate_u16(n < MLX5_VPMD_DESCS_PER_LOOP ? -1UL >> (n * sizeof(uint16_t) * 8) : 0); invalid_mask = vorr_u16(invalid_mask, mask); @@ -832,19 +831,30 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, if (rxq->dynf_meta) { /* This code is subject for futher optimization. */ int32_t offs = rxq->flow_meta_offset; + uint32_t mask = rxq->flow_meta_port_mask; + uint32_t shift = + __builtin_popcount(rxq->flow_meta_port_mask); *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = - container_of(p0, struct mlx5_cqe, - pkt_info)->flow_table_metadata; + (rte_be_to_cpu_32(container_of + (p0, struct mlx5_cqe, + pkt_info)->flow_table_metadata) >> shift) & + mask; *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) = - container_of(p1, struct mlx5_cqe, - pkt_info)->flow_table_metadata; + (rte_be_to_cpu_32(container_of + (p1, struct mlx5_cqe, + pkt_info)->flow_table_metadata) >> shift) & + mask; *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) = - container_of(p2, struct mlx5_cqe, - pkt_info)->flow_table_metadata; + (rte_be_to_cpu_32(container_of + (p2, struct mlx5_cqe, + pkt_info)->flow_table_metadata) >> shift) & + mask; *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) = - container_of(p3, struct mlx5_cqe, - pkt_info)->flow_table_metadata; + (rte_be_to_cpu_32(container_of + (p3, struct mlx5_cqe, + pkt_info)->flow_table_metadata) >> shift) & + mask; if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *)) elts[pos]->ol_flags |= rxq->flow_meta_mask; if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *))