X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx_vec_neon.h;h=5ff792f4cb5ec1bb5461a782c326c19aabf73b29;hb=675a6c18746694a8e19a0256b16bd6b3e8dcbfd2;hp=5c569ee1991d4770005395fb2e501790b22307ba;hpb=b57e414b48c0da58e445e3d2e92a05758632147d;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h index 5c569ee199..5ff792f4cb 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h @@ -593,7 +593,7 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, * there's no instruction to count trailing zeros. __builtin_clzl() is * used instead. * - * A. copy 4 mbuf pointers from elts ring to returing pkts. + * A. copy 4 mbuf pointers from elts ring to returning pkts. * B. load 64B CQE and extract necessary fields * Final 16bytes cqes[] extracted from original 64bytes CQE has the * following structure: @@ -767,16 +767,15 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, comp_idx = __builtin_clzl(vget_lane_u64(vreinterpret_u64_u16( comp_mask), 0)) / (sizeof(uint16_t) * 8); - /* D.6 mask out entries after the compressed CQE. */ - mask = vcreate_u16(comp_idx < MLX5_VPMD_DESCS_PER_LOOP ? - -1UL >> (comp_idx * sizeof(uint16_t) * 8) : - 0); - invalid_mask = vorr_u16(invalid_mask, mask); + invalid_mask = vorr_u16(invalid_mask, comp_mask); /* D.7 count non-compressed valid CQEs. */ n = __builtin_clzl(vget_lane_u64(vreinterpret_u64_u16( invalid_mask), 0)) / (sizeof(uint16_t) * 8); nocmp_n += n; - /* D.2 get the final invalid mask. */ + /* + * D.2 mask out entries after the compressed CQE. + * get the final invalid mask. + */ mask = vcreate_u16(n < MLX5_VPMD_DESCS_PER_LOOP ? -1UL >> (n * sizeof(uint16_t) * 8) : 0); invalid_mask = vorr_u16(invalid_mask, mask); @@ -833,29 +832,23 @@ rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, /* This code is subject for futher optimization. */ int32_t offs = rxq->flow_meta_offset; uint32_t mask = rxq->flow_meta_port_mask; - uint32_t shift = - __builtin_popcount(rxq->flow_meta_port_mask); *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = - (rte_be_to_cpu_32(container_of + rte_be_to_cpu_32(container_of (p0, struct mlx5_cqe, - pkt_info)->flow_table_metadata) >> shift) & - mask; + pkt_info)->flow_table_metadata) & mask; *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) = - (rte_be_to_cpu_32(container_of + rte_be_to_cpu_32(container_of (p1, struct mlx5_cqe, - pkt_info)->flow_table_metadata) >> shift) & - mask; + pkt_info)->flow_table_metadata) & mask; *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) = - (rte_be_to_cpu_32(container_of + rte_be_to_cpu_32(container_of (p2, struct mlx5_cqe, - pkt_info)->flow_table_metadata) >> shift) & - mask; + pkt_info)->flow_table_metadata) & mask; *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) = - (rte_be_to_cpu_32(container_of + rte_be_to_cpu_32(container_of (p3, struct mlx5_cqe, - pkt_info)->flow_table_metadata) >> shift) & - mask; + pkt_info)->flow_table_metadata) & mask; if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *)) elts[pos]->ol_flags |= rxq->flow_meta_mask; if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *))