X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx_vec_altivec.h;h=b55138ac7780fb0faf59c577e79e381bf2c6e796;hb=0bf68c660e932e76087dc8c87f8b1dacba89c2be;hp=3be3a6df4787c7bbb2e0b9949094938c88fea120;hpb=2e542da709371ee51d61d74c9a1b357ad34ae13e;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h index 3be3a6df47..b55138ac77 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h @@ -6,24 +6,24 @@ #ifndef RTE_PMD_MLX5_RXTX_VEC_ALTIVEC_H_ #define RTE_PMD_MLX5_RXTX_VEC_ALTIVEC_H_ -#include #include #include #include -#include +#include #include #include #include +#include + +#include "mlx5_defs.h" #include "mlx5.h" #include "mlx5_utils.h" #include "mlx5_rxtx.h" #include "mlx5_rxtx_vec.h" #include "mlx5_autoconf.h" -#include "mlx5_defs.h" -#include "mlx5_prm.h" #ifndef __INTEL_COMPILER #pragma GCC diagnostic ignored "-Wcast-qual" @@ -155,8 +155,9 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, const vector unsigned long shmax = {64, 64}; #endif - if (!(pos & 0x7) && pos + 8 < mcqe_n) - rte_prefetch0((void *)(cq + pos + 8)); + for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) + if (likely(pos + i < mcqe_n)) + rte_prefetch0((void *)(cq + pos + i)); /* A.1 load mCQEs into a 128bit register. */ mcqe1 = (vector unsigned char)vec_vsx_ld(0, @@ -263,6 +264,25 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, elts[pos + 2]->hash.fdir.hi = flow_tag; elts[pos + 3]->hash.fdir.hi = flow_tag; } + if (rxq->dynf_meta) { + int32_t offs = rxq->flow_meta_offset; + const uint32_t meta = + *RTE_MBUF_DYNFIELD(t_pkt, offs, uint32_t *); + + /* Check if title packet has valid metadata. */ + if (meta) { + MLX5_ASSERT(t_pkt->ol_flags & + rxq->flow_meta_mask); + *RTE_MBUF_DYNFIELD(elts[pos], offs, + uint32_t *) = meta; + *RTE_MBUF_DYNFIELD(elts[pos + 1], offs, + uint32_t *) = meta; + *RTE_MBUF_DYNFIELD(elts[pos + 2], offs, + uint32_t *) = meta; + *RTE_MBUF_DYNFIELD(elts[pos + 3], offs, + uint32_t *) = meta; + } + } pos += MLX5_VPMD_DESCS_PER_LOOP; /* Move to next CQE and invalidate consumed CQEs. */ @@ -344,9 +364,8 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED}; const vector unsigned char mbuf_init = - (vector unsigned char)(vector unsigned long){ - *(__attribute__((__aligned__(8))) unsigned long *) - &rxq->mbuf_initializer, 0LL}; + (vector unsigned char)vec_vsx_ld + (0, (vector unsigned char *)&rxq->mbuf_initializer); const vector unsigned short rearm_sel_mask = (vector unsigned short){0, 0, 0, 0, 0xffff, 0xffff, 0, 0}; vector unsigned char rearm0, rearm1, rearm2, rearm3; @@ -416,7 +435,6 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, vec_cmpeq((vector unsigned int)flow_tag, (vector unsigned int)pinfo_ft_mask))); } - /* * Merge the two fields to generate the following: * bit[1] = l3_ok @@ -546,13 +564,15 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, * @param[out] err * Pointer to a flag. Set non-zero value if pkts array has at least one error * packet to handle. + * @param[out] no_cq + * Pointer to a boolean. Set true if no new CQE seen. * * @return * Number of packets received including errors (<= pkts_n). */ static inline uint16_t rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, - uint64_t *err) + uint64_t *err, bool *no_cq) { const uint16_t q_n = 1 << rxq->cqe_n; const uint16_t q_mask = q_n - 1; @@ -617,8 +637,8 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, const vector unsigned short cqe_sel_mask2 = (vector unsigned short){0, 0, 0xffff, 0, 0, 0, 0, 0}; - assert(rxq->sges_n == 0); - assert(rxq->cqe_n == rxq->elts_n); + MLX5_ASSERT(rxq->sges_n == 0); + MLX5_ASSERT(rxq->cqe_n == rxq->elts_n); cq = &(*rxq->cqes)[cq_idx]; rte_prefetch0(cq); rte_prefetch0(cq + 1); @@ -645,10 +665,12 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, /* Not to cross queue end. */ pkts_n = RTE_MIN(pkts_n, q_n - elts_idx); pkts_n = RTE_MIN(pkts_n, q_n - cq_idx); - if (!pkts_n) + if (!pkts_n) { + *no_cq = !rcvd_pkt; return rcvd_pkt; + } /* At this point, there shouldn't be any remaining packets. */ - assert(rxq->decompressed == 0); + MLX5_ASSERT(rxq->decompressed == 0); /* * A. load first Qword (8bytes) in one loop. @@ -734,13 +756,13 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, /* A.1 load cqes. */ p3 = (unsigned int)((vector unsigned short)p)[3]; cqes[3] = (vector unsigned char)(vector unsigned long){ - *(__attribute__((__aligned__(8))) unsigned long *) + *(__rte_aligned(8) unsigned long *) &cq[pos + p3].sop_drop_qpn, 0LL}; rte_compiler_barrier(); p2 = (unsigned int)((vector unsigned short)p)[2]; cqes[2] = (vector unsigned char)(vector unsigned long){ - *(__attribute__((__aligned__(8))) unsigned long *) + *(__rte_aligned(8) unsigned long *) &cq[pos + p2].sop_drop_qpn, 0LL}; rte_compiler_barrier(); @@ -753,12 +775,12 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, /* A.1 load a block having op_own. */ p1 = (unsigned int)((vector unsigned short)p)[1]; cqes[1] = (vector unsigned char)(vector unsigned long){ - *(__attribute__((__aligned__(8))) unsigned long *) + *(__rte_aligned(8) unsigned long *) &cq[pos + p1].sop_drop_qpn, 0LL}; rte_compiler_barrier(); cqes[0] = (vector unsigned char)(vector unsigned long){ - *(__attribute__((__aligned__(8))) unsigned long *) + *(__rte_aligned(8) unsigned long *) &cq[pos].sop_drop_qpn, 0LL}; rte_compiler_barrier(); @@ -785,10 +807,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, vec_sel((vector unsigned short)cqes[2], (vector unsigned short)cqe_tmp1, cqe_sel_mask1); cqe_tmp2 = (vector unsigned char)(vector unsigned long){ - *(__attribute__((__aligned__(8))) unsigned long *) + *(__rte_aligned(8) unsigned long *) &cq[pos + p3].rsvd3[9], 0LL}; cqe_tmp1 = (vector unsigned char)(vector unsigned long){ - *(__attribute__((__aligned__(8))) unsigned long *) + *(__rte_aligned(8) unsigned long *) &cq[pos + p2].rsvd3[9], 0LL}; cqes[3] = (vector unsigned char) vec_sel((vector unsigned short)cqes[3], @@ -848,10 +870,10 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, vec_sel((vector unsigned short)cqes[0], (vector unsigned short)cqe_tmp1, cqe_sel_mask1); cqe_tmp2 = (vector unsigned char)(vector unsigned long){ - *(__attribute__((__aligned__(8))) unsigned long *) + *(__rte_aligned(8) unsigned long *) &cq[pos + p1].rsvd3[9], 0LL}; cqe_tmp1 = (vector unsigned char)(vector unsigned long){ - *(__attribute__((__aligned__(8))) unsigned long *) + *(__rte_aligned(8) unsigned long *) &cq[pos].rsvd3[9], 0LL}; cqes[1] = (vector unsigned char) vec_sel((vector unsigned short)cqes[1], @@ -1011,7 +1033,29 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, pkts[pos + 3]->timestamp = rte_be_to_cpu_64(cq[pos + p3].timestamp); } - + if (rxq->dynf_meta) { + uint64_t flag = rxq->flow_meta_mask; + int32_t offs = rxq->flow_meta_offset; + uint32_t metadata; + + /* This code is subject for futher optimization. */ + metadata = cq[pos].flow_table_metadata; + *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = + metadata; + pkts[pos]->ol_flags |= metadata ? flag : 0ULL; + metadata = cq[pos + 1].flow_table_metadata; + *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) = + metadata; + pkts[pos + 1]->ol_flags |= metadata ? flag : 0ULL; + metadata = cq[pos + 2].flow_table_metadata; + *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) = + metadata; + pkts[pos + 2]->ol_flags |= metadata ? flag : 0ULL; + metadata = cq[pos + 3].flow_table_metadata; + *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) = + metadata; + pkts[pos + 3]->ol_flags |= metadata ? flag : 0ULL; + } #ifdef MLX5_PMD_SOFT_COUNTERS /* Add up received bytes count. */ byte_cnt = vec_perm(op_own, zero, len_shuf_mask); @@ -1039,10 +1083,12 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, break; } /* If no new CQE seen, return without updating cq_db. */ - if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) + if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) { + *no_cq = true; return rcvd_pkt; + } /* Update the consumer indexes for non-compressed CQEs. */ - assert(nocmp_n <= pkts_n); + MLX5_ASSERT(nocmp_n <= pkts_n); rxq->cq_ci += nocmp_n; rxq->rq_pi += nocmp_n; rcvd_pkt += nocmp_n; @@ -1052,7 +1098,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, #endif /* Decompress the last CQE if compressed. */ if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n) { - assert(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP)); + MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP)); rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n], &elts[nocmp_n]); /* Return more packets if needed. */ @@ -1068,6 +1114,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, } rte_compiler_barrier(); *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); + *no_cq = !rcvd_pkt; return rcvd_pkt; }