X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx_vec_sse.h;h=406f23f5955434680a0cb6c81c0b0f3278c4595e;hb=1b7b9f170fcebbbd0708fab554dcb5a7badef8cf;hp=ca8ed411db947af9f4de95e724fea784941a7170;hpb=8b8f7994f11cd29c49fed0b43dcb782512dc6418;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h index ca8ed411db..406f23f595 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h @@ -6,7 +6,6 @@ #ifndef RTE_PMD_MLX5_RXTX_VEC_SSE_H_ #define RTE_PMD_MLX5_RXTX_VEC_SSE_H_ -#include #include #include #include @@ -16,13 +15,14 @@ #include #include +#include + +#include "mlx5_defs.h" #include "mlx5.h" #include "mlx5_utils.h" #include "mlx5_rxtx.h" #include "mlx5_rxtx_vec.h" #include "mlx5_autoconf.h" -#include "mlx5_defs.h" -#include "mlx5_prm.h" #ifndef __INTEL_COMPILER #pragma GCC diagnostic ignored "-Wcast-qual" @@ -259,7 +259,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4], PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED); const __m128i mbuf_init = - _mm_loadl_epi64((__m128i *)&rxq->mbuf_initializer); + _mm_load_si128((__m128i *)&rxq->mbuf_initializer); __m128i rearm0, rearm1, rearm2, rearm3; uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3; @@ -426,8 +426,8 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, rxq->crc_present * RTE_ETHER_CRC_LEN); const __m128i flow_mark_adj = _mm_set_epi32(rxq->mark * (-1), 0, 0, 0); - assert(rxq->sges_n == 0); - assert(rxq->cqe_n == rxq->elts_n); + MLX5_ASSERT(rxq->sges_n == 0); + MLX5_ASSERT(rxq->cqe_n == rxq->elts_n); cq = &(*rxq->cqes)[cq_idx]; rte_prefetch0(cq); rte_prefetch0(cq + 1); @@ -456,7 +456,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, if (!pkts_n) return rcvd_pkt; /* At this point, there shouldn't be any remained packets. */ - assert(rxq->decompressed == 0); + MLX5_ASSERT(rxq->decompressed == 0); /* * A. load first Qword (8bytes) in one loop. * B. copy 4 mbuf pointers from elts ring to returing pkts. @@ -537,8 +537,8 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, cqe_tmp1 = _mm_loadu_si128((__m128i *)&cq[pos + p2].csum); cqes[3] = _mm_blend_epi16(cqes[3], cqe_tmp2, 0x30); cqes[2] = _mm_blend_epi16(cqes[2], cqe_tmp1, 0x30); - cqe_tmp2 = _mm_loadl_epi64((__m128i *)&cq[pos + p3].rsvd3[9]); - cqe_tmp1 = _mm_loadl_epi64((__m128i *)&cq[pos + p2].rsvd3[9]); + cqe_tmp2 = _mm_loadl_epi64((__m128i *)&cq[pos + p3].rsvd4[2]); + cqe_tmp1 = _mm_loadl_epi64((__m128i *)&cq[pos + p2].rsvd4[2]); cqes[3] = _mm_blend_epi16(cqes[3], cqe_tmp2, 0x04); cqes[2] = _mm_blend_epi16(cqes[2], cqe_tmp1, 0x04); /* C.2 generate final structure for mbuf with swapping bytes. */ @@ -564,8 +564,8 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, cqe_tmp1 = _mm_loadu_si128((__m128i *)&cq[pos].csum); cqes[1] = _mm_blend_epi16(cqes[1], cqe_tmp2, 0x30); cqes[0] = _mm_blend_epi16(cqes[0], cqe_tmp1, 0x30); - cqe_tmp2 = _mm_loadl_epi64((__m128i *)&cq[pos + p1].rsvd3[9]); - cqe_tmp1 = _mm_loadl_epi64((__m128i *)&cq[pos].rsvd3[9]); + cqe_tmp2 = _mm_loadl_epi64((__m128i *)&cq[pos + p1].rsvd4[2]); + cqe_tmp1 = _mm_loadl_epi64((__m128i *)&cq[pos].rsvd4[2]); cqes[1] = _mm_blend_epi16(cqes[1], cqe_tmp2, 0x04); cqes[0] = _mm_blend_epi16(cqes[0], cqe_tmp1, 0x04); /* C.2 generate final structure for mbuf with swapping bytes. */ @@ -640,6 +640,25 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, pkts[pos + 3]->timestamp = rte_be_to_cpu_64(cq[pos + p3].timestamp); } + if (rte_flow_dynf_metadata_avail()) { + /* This code is subject for futher optimization. */ + *RTE_FLOW_DYNF_METADATA(pkts[pos]) = + cq[pos].flow_table_metadata; + *RTE_FLOW_DYNF_METADATA(pkts[pos + 1]) = + cq[pos + p1].flow_table_metadata; + *RTE_FLOW_DYNF_METADATA(pkts[pos + 2]) = + cq[pos + p2].flow_table_metadata; + *RTE_FLOW_DYNF_METADATA(pkts[pos + 3]) = + cq[pos + p3].flow_table_metadata; + if (*RTE_FLOW_DYNF_METADATA(pkts[pos])) + pkts[pos]->ol_flags |= PKT_RX_DYNF_METADATA; + if (*RTE_FLOW_DYNF_METADATA(pkts[pos + 1])) + pkts[pos + 1]->ol_flags |= PKT_RX_DYNF_METADATA; + if (*RTE_FLOW_DYNF_METADATA(pkts[pos + 2])) + pkts[pos + 2]->ol_flags |= PKT_RX_DYNF_METADATA; + if (*RTE_FLOW_DYNF_METADATA(pkts[pos + 3])) + pkts[pos + 3]->ol_flags |= PKT_RX_DYNF_METADATA; + } #ifdef MLX5_PMD_SOFT_COUNTERS /* Add up received bytes count. */ byte_cnt = _mm_shuffle_epi8(op_own, len_shuf_mask); @@ -658,7 +677,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) return rcvd_pkt; /* Update the consumer indexes for non-compressed CQEs. */ - assert(nocmp_n <= pkts_n); + MLX5_ASSERT(nocmp_n <= pkts_n); rxq->cq_ci += nocmp_n; rxq->rq_pi += nocmp_n; rcvd_pkt += nocmp_n; @@ -668,7 +687,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, #endif /* Decompress the last CQE if compressed. */ if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n) { - assert(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP)); + MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP)); rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n], &elts[nocmp_n]); /* Return more packets if needed. */