X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx_vec_sse.h;h=554924d7fcfd3454fc9f0e3e9cac8fc6bc316569;hb=6f52bd338374e4bd54ff1f872928e53ebdf1108d;hp=6e1b967a36daa05b5814119bdf5c6e7c15cc3777;hpb=7b4f1e6bd367855716d84e95a1dd85ac3647a4c8;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h index 6e1b967a36..554924d7fc 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h @@ -6,7 +6,6 @@ #ifndef RTE_PMD_MLX5_RXTX_VEC_SSE_H_ #define RTE_PMD_MLX5_RXTX_VEC_SSE_H_ -#include #include #include #include @@ -119,7 +118,6 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, 14, 15, 6, 7, 10, 11, 2, 3); #endif - /* * A. load mCQEs into a 128bit register. * B. store rearm data to mbuf. @@ -134,8 +132,9 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, __m128i byte_cnt, invalid_mask; #endif - if (!(pos & 0x7) && pos + 8 < mcqe_n) - rte_prefetch0((void *)(cq + pos + 8)); + for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) + if (likely(pos + i < mcqe_n)) + rte_prefetch0((void *)(cq + pos + i)); /* A.1 load mCQEs into a 128bit register. */ mcqe1 = _mm_loadu_si128((__m128i *)&mcq[pos % 8]); mcqe2 = _mm_loadu_si128((__m128i *)&mcq[pos % 8 + 2]); @@ -192,9 +191,30 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, elts[pos + 2]->hash.fdir.hi = flow_tag; elts[pos + 3]->hash.fdir.hi = flow_tag; } + if (rxq->dynf_meta) { + int32_t offs = rxq->flow_meta_offset; + const uint32_t meta = + *RTE_MBUF_DYNFIELD(t_pkt, offs, uint32_t *); + + /* Check if title packet has valid metadata. */ + if (meta) { + MLX5_ASSERT(t_pkt->ol_flags & + rxq->flow_meta_mask); + *RTE_MBUF_DYNFIELD(elts[pos], offs, + uint32_t *) = meta; + *RTE_MBUF_DYNFIELD(elts[pos + 1], offs, + uint32_t *) = meta; + *RTE_MBUF_DYNFIELD(elts[pos + 2], offs, + uint32_t *) = meta; + *RTE_MBUF_DYNFIELD(elts[pos + 3], offs, + uint32_t *) = meta; + } + } pos += MLX5_VPMD_DESCS_PER_LOOP; /* Move to next CQE and invalidate consumed CQEs. */ if (!(pos & 0x7) && pos < mcqe_n) { + if (pos + 8 < mcqe_n) + rte_prefetch0((void *)(cq + pos + 8)); mcq = (void *)(cq + pos); for (i = 0; i < 8; ++i) cq[inv++].op_own = MLX5_CQE_INVALIDATE; @@ -366,13 +386,15 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4], * @param[out] err * Pointer to a flag. Set non-zero value if pkts array has at least one error * packet to handle. + * @param[out] no_cq + * Pointer to a boolean. Set true if no new CQE seen. * * @return * Number of packets received including errors (<= pkts_n). */ static inline uint16_t rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, - uint64_t *err) + uint64_t *err, bool *no_cq) { const uint16_t q_n = 1 << rxq->cqe_n; const uint16_t q_mask = q_n - 1; @@ -427,8 +449,8 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, rxq->crc_present * RTE_ETHER_CRC_LEN); const __m128i flow_mark_adj = _mm_set_epi32(rxq->mark * (-1), 0, 0, 0); - assert(rxq->sges_n == 0); - assert(rxq->cqe_n == rxq->elts_n); + MLX5_ASSERT(rxq->sges_n == 0); + MLX5_ASSERT(rxq->cqe_n == rxq->elts_n); cq = &(*rxq->cqes)[cq_idx]; rte_prefetch0(cq); rte_prefetch0(cq + 1); @@ -454,10 +476,12 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, /* Not to cross queue end. */ pkts_n = RTE_MIN(pkts_n, q_n - elts_idx); pkts_n = RTE_MIN(pkts_n, q_n - cq_idx); - if (!pkts_n) + if (!pkts_n) { + *no_cq = !rcvd_pkt; return rcvd_pkt; + } /* At this point, there shouldn't be any remained packets. */ - assert(rxq->decompressed == 0); + MLX5_ASSERT(rxq->decompressed == 0); /* * A. load first Qword (8bytes) in one loop. * B. copy 4 mbuf pointers from elts ring to returing pkts. @@ -632,33 +656,53 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, /* D.5 fill in mbuf - rearm_data and packet_type. */ rxq_cq_to_ptype_oflags_v(rxq, cqes, opcode, &pkts[pos]); if (rxq->hw_timestamp) { - pkts[pos]->timestamp = - rte_be_to_cpu_64(cq[pos].timestamp); - pkts[pos + 1]->timestamp = - rte_be_to_cpu_64(cq[pos + p1].timestamp); - pkts[pos + 2]->timestamp = - rte_be_to_cpu_64(cq[pos + p2].timestamp); - pkts[pos + 3]->timestamp = - rte_be_to_cpu_64(cq[pos + p3].timestamp); + if (rxq->rt_timestamp) { + struct mlx5_dev_ctx_shared *sh = rxq->sh; + uint64_t ts; + + ts = rte_be_to_cpu_64(cq[pos].timestamp); + pkts[pos]->timestamp = + mlx5_txpp_convert_rx_ts(sh, ts); + ts = rte_be_to_cpu_64(cq[pos + p1].timestamp); + pkts[pos + 1]->timestamp = + mlx5_txpp_convert_rx_ts(sh, ts); + ts = rte_be_to_cpu_64(cq[pos + p2].timestamp); + pkts[pos + 2]->timestamp = + mlx5_txpp_convert_rx_ts(sh, ts); + ts = rte_be_to_cpu_64(cq[pos + p3].timestamp); + pkts[pos + 3]->timestamp = + mlx5_txpp_convert_rx_ts(sh, ts); + } else { + pkts[pos]->timestamp = rte_be_to_cpu_64 + (cq[pos].timestamp); + pkts[pos + 1]->timestamp = rte_be_to_cpu_64 + (cq[pos + p1].timestamp); + pkts[pos + 2]->timestamp = rte_be_to_cpu_64 + (cq[pos + p2].timestamp); + pkts[pos + 3]->timestamp = rte_be_to_cpu_64 + (cq[pos + p3].timestamp); + } } - if (rte_flow_dynf_metadata_avail()) { + if (rxq->dynf_meta) { /* This code is subject for futher optimization. */ - *RTE_FLOW_DYNF_METADATA(pkts[pos]) = + int32_t offs = rxq->flow_meta_offset; + + *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = cq[pos].flow_table_metadata; - *RTE_FLOW_DYNF_METADATA(pkts[pos + 1]) = + *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) = cq[pos + p1].flow_table_metadata; - *RTE_FLOW_DYNF_METADATA(pkts[pos + 2]) = + *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) = cq[pos + p2].flow_table_metadata; - *RTE_FLOW_DYNF_METADATA(pkts[pos + 3]) = + *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) = cq[pos + p3].flow_table_metadata; - if (*RTE_FLOW_DYNF_METADATA(pkts[pos])) - pkts[pos]->ol_flags |= PKT_RX_DYNF_METADATA; - if (*RTE_FLOW_DYNF_METADATA(pkts[pos + 1])) - pkts[pos + 1]->ol_flags |= PKT_RX_DYNF_METADATA; - if (*RTE_FLOW_DYNF_METADATA(pkts[pos + 2])) - pkts[pos + 2]->ol_flags |= PKT_RX_DYNF_METADATA; - if (*RTE_FLOW_DYNF_METADATA(pkts[pos + 3])) - pkts[pos + 3]->ol_flags |= PKT_RX_DYNF_METADATA; + if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *)) + pkts[pos]->ol_flags |= rxq->flow_meta_mask; + if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *)) + pkts[pos + 1]->ol_flags |= rxq->flow_meta_mask; + if (*RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *)) + pkts[pos + 2]->ol_flags |= rxq->flow_meta_mask; + if (*RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *)) + pkts[pos + 3]->ol_flags |= rxq->flow_meta_mask; } #ifdef MLX5_PMD_SOFT_COUNTERS /* Add up received bytes count. */ @@ -675,10 +719,12 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, break; } /* If no new CQE seen, return without updating cq_db. */ - if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) + if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) { + *no_cq = true; return rcvd_pkt; + } /* Update the consumer indexes for non-compressed CQEs. */ - assert(nocmp_n <= pkts_n); + MLX5_ASSERT(nocmp_n <= pkts_n); rxq->cq_ci += nocmp_n; rxq->rq_pi += nocmp_n; rcvd_pkt += nocmp_n; @@ -688,7 +734,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, #endif /* Decompress the last CQE if compressed. */ if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n) { - assert(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP)); + MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP)); rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n], &elts[nocmp_n]); /* Return more packets if needed. */ @@ -704,6 +750,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, } rte_compiler_barrier(); *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); + *no_cq = !rcvd_pkt; return rcvd_pkt; }