X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx_vec_neon.h;h=58e4556890abd1a9f339da9a682ee61121bb39a5;hb=0f20acbf5eda;hp=555c34262601fed60acd89bfdb59c2e320a5bf31;hpb=a2854c4de12970dc351eaefe9fbd3c77d403caf7;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h index 555c342626..58e4556890 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h @@ -29,18 +29,16 @@ /** * Store free buffers to RX SW ring. * - * @param rxq - * Pointer to RX queue structure. + * @param elts + * Pointer to SW ring to be filled. * @param pkts * Pointer to array of packets to be stored. * @param pkts_n * Number of packets to be stored. */ static inline void -rxq_copy_mbuf_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t n) +rxq_copy_mbuf_v(struct rte_mbuf **elts, struct rte_mbuf **pkts, uint16_t n) { - const uint16_t q_mask = (1 << rxq->elts_n) - 1; - struct rte_mbuf **elts = &(*rxq->elts)[rxq->rq_pi & q_mask]; unsigned int pos; uint16_t p = n & -2; @@ -145,6 +143,7 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, -1UL << ((mcqe_n - pos) * sizeof(uint16_t) * 8) : 0); #endif + for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) if (likely(pos + i < mcqe_n)) rte_prefetch0((void *)(cq + pos + i)); @@ -227,6 +226,8 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, pos += MLX5_VPMD_DESCS_PER_LOOP; /* Move to next CQE and invalidate consumed CQEs. */ if (!(pos & 0x7) && pos < mcqe_n) { + if (pos + 8 < mcqe_n) + rte_prefetch0((void *)(cq + pos + 8)); mcq = (void *)&(cq + pos)->pkt_info; for (i = 0; i < 8; ++i) cq[inv++].op_own = MLX5_CQE_INVALIDATE; @@ -268,7 +269,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, uint32x4_t pinfo, cv_flags; uint32x4_t ol_flags = vdupq_n_u32(rxq->rss_hash * PKT_RX_RSS_HASH | - rxq->hw_timestamp * PKT_RX_TIMESTAMP); + rxq->hw_timestamp * rxq->timestamp_rx_flag); const uint32x4_t ptype_ol_mask = { 0x106, 0x106, 0x106, 0x106 }; const uint8x16_t cv_flag_sel = { 0, @@ -365,12 +366,16 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, } /** - * Receive burst of packets. An errored completion also consumes a mbuf, but the - * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed - * before returning to application. + * Process a non-compressed completion and fill in mbufs in RX SW ring + * with data extracted from the title completion descriptor. * * @param rxq * Pointer to RX queue structure. + * @param cq + * Pointer to completion array having a non-compressed completion at first. + * @param elts + * Pointer to SW ring to be filled. The first mbuf has to be pre-built from + * the title completion descriptor to be copied to the rest of mbufs. * @param[out] pkts * Array to store received packets. * @param pkts_n @@ -378,28 +383,23 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, * @param[out] err * Pointer to a flag. Set non-zero value if pkts array has at least one error * packet to handle. - * @param[out] no_cq - * Pointer to a boolean. Set true if no new CQE seen. + * @param[out] comp + * Pointer to a index. Set it to the first compressed completion if any. * * @return - * Number of packets received including errors (<= pkts_n). + * Number of CQEs successfully processed. */ static inline uint16_t -rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, - uint64_t *err, bool *no_cq) +rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, + struct rte_mbuf **elts, struct rte_mbuf **pkts, + uint16_t pkts_n, uint64_t *err, uint64_t *comp) { const uint16_t q_n = 1 << rxq->cqe_n; const uint16_t q_mask = q_n - 1; - volatile struct mlx5_cqe *cq; - struct rte_mbuf **elts; unsigned int pos; - uint64_t n; - uint16_t repl_n; + uint64_t n = 0; uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP; uint16_t nocmp_n = 0; - uint16_t rcvd_pkt = 0; - unsigned int cq_idx = rxq->cq_ci & q_mask; - unsigned int elts_idx; const uint16x4_t ownership = vdup_n_u16(!(rxq->cq_ci & (q_mask + 1))); const uint16x4_t owner_check = vcreate_u16(0x0001000100010001); const uint16x4_t opcode_check = vcreate_u16(0x00f000f000f000f0); @@ -460,39 +460,6 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, }; const uint32x4_t flow_mark_adj = { 0, 0, 0, rxq->mark * (-1) }; - MLX5_ASSERT(rxq->sges_n == 0); - MLX5_ASSERT(rxq->cqe_n == rxq->elts_n); - cq = &(*rxq->cqes)[cq_idx]; - rte_prefetch_non_temporal(cq); - rte_prefetch_non_temporal(cq + 1); - rte_prefetch_non_temporal(cq + 2); - rte_prefetch_non_temporal(cq + 3); - pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST); - repl_n = q_n - (rxq->rq_ci - rxq->rq_pi); - if (repl_n >= rxq->rq_repl_thresh) - mlx5_rx_replenish_bulk_mbuf(rxq, repl_n); - /* See if there're unreturned mbufs from compressed CQE. */ - rcvd_pkt = rxq->decompressed; - if (rcvd_pkt > 0) { - rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n); - rxq_copy_mbuf_v(rxq, pkts, rcvd_pkt); - rxq->rq_pi += rcvd_pkt; - pkts += rcvd_pkt; - rxq->decompressed -= rcvd_pkt; - } - elts_idx = rxq->rq_pi & q_mask; - elts = &(*rxq->elts)[elts_idx]; - /* Not to overflow pkts array. */ - pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP); - /* Not to cross queue end. */ - pkts_n = RTE_MIN(pkts_n, q_n - elts_idx); - pkts_n = RTE_MIN(pkts_n, q_n - cq_idx); - if (!pkts_n) { - *no_cq = !rcvd_pkt; - return rcvd_pkt; - } - /* At this point, there shouldn't be any remained packets. */ - MLX5_ASSERT(rxq->decompressed == 0); /* * Note that vectors have reverse order - {v3, v2, v1, v0}, because * there's no instruction to count trailing zeros. __builtin_clzl() is @@ -551,7 +518,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, /* B.0 (CQE 0) load a block having op_own. */ c0 = vld1q_u64((uint64_t *)(p0 + 48)); /* Synchronize for loading the rest of blocks. */ - rte_cio_rmb(); + rte_io_rmb(); /* Prefetch next 4 CQEs. */ if (pkts_n - pos >= 2 * MLX5_VPMD_DESCS_PER_LOOP) { unsigned int next = pos + MLX5_VPMD_DESCS_PER_LOOP; @@ -694,6 +661,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, rxq_cq_to_ptype_oflags_v(rxq, ptype_info, flow_tag, opcode, &elts[pos]); if (rxq->hw_timestamp) { + int offset = rxq->timestamp_offset; if (rxq->rt_timestamp) { struct mlx5_dev_ctx_shared *sh = rxq->sh; uint64_t ts; @@ -701,52 +669,52 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, ts = rte_be_to_cpu_64 (container_of(p0, struct mlx5_cqe, pkt_info)->timestamp); - elts[pos]->timestamp = - mlx5_txpp_convert_rx_ts(sh, ts); + mlx5_timestamp_set(elts[pos], offset, + mlx5_txpp_convert_rx_ts(sh, ts)); ts = rte_be_to_cpu_64 (container_of(p1, struct mlx5_cqe, pkt_info)->timestamp); - elts[pos + 1]->timestamp = - mlx5_txpp_convert_rx_ts(sh, ts); + mlx5_timestamp_set(elts[pos + 1], offset, + mlx5_txpp_convert_rx_ts(sh, ts)); ts = rte_be_to_cpu_64 (container_of(p2, struct mlx5_cqe, pkt_info)->timestamp); - elts[pos + 2]->timestamp = - mlx5_txpp_convert_rx_ts(sh, ts); + mlx5_timestamp_set(elts[pos + 2], offset, + mlx5_txpp_convert_rx_ts(sh, ts)); ts = rte_be_to_cpu_64 (container_of(p3, struct mlx5_cqe, pkt_info)->timestamp); - elts[pos + 3]->timestamp = - mlx5_txpp_convert_rx_ts(sh, ts); + mlx5_timestamp_set(elts[pos + 3], offset, + mlx5_txpp_convert_rx_ts(sh, ts)); } else { - elts[pos]->timestamp = rte_be_to_cpu_64 - (container_of(p0, struct mlx5_cqe, - pkt_info)->timestamp); - elts[pos + 1]->timestamp = rte_be_to_cpu_64 - (container_of(p1, struct mlx5_cqe, - pkt_info)->timestamp); - elts[pos + 2]->timestamp = rte_be_to_cpu_64 - (container_of(p2, struct mlx5_cqe, - pkt_info)->timestamp); - elts[pos + 3]->timestamp = rte_be_to_cpu_64 - (container_of(p3, struct mlx5_cqe, - pkt_info)->timestamp); + mlx5_timestamp_set(elts[pos], offset, + rte_be_to_cpu_64(container_of(p0, + struct mlx5_cqe, pkt_info)->timestamp)); + mlx5_timestamp_set(elts[pos + 1], offset, + rte_be_to_cpu_64(container_of(p1, + struct mlx5_cqe, pkt_info)->timestamp)); + mlx5_timestamp_set(elts[pos + 2], offset, + rte_be_to_cpu_64(container_of(p2, + struct mlx5_cqe, pkt_info)->timestamp)); + mlx5_timestamp_set(elts[pos + 3], offset, + rte_be_to_cpu_64(container_of(p3, + struct mlx5_cqe, pkt_info)->timestamp)); } } - if (!!rxq->flow_meta_mask) { + if (rxq->dynf_meta) { /* This code is subject for futher optimization. */ int32_t offs = rxq->flow_meta_offset; *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = container_of(p0, struct mlx5_cqe, pkt_info)->flow_table_metadata; - *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = + *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) = container_of(p1, struct mlx5_cqe, pkt_info)->flow_table_metadata; - *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = + *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) = container_of(p2, struct mlx5_cqe, pkt_info)->flow_table_metadata; - *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) = + *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) = container_of(p3, struct mlx5_cqe, pkt_info)->flow_table_metadata; if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *)) @@ -770,40 +738,13 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, if (n != MLX5_VPMD_DESCS_PER_LOOP) break; } - /* If no new CQE seen, return without updating cq_db. */ - if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) { - *no_cq = true; - return rcvd_pkt; - } - /* Update the consumer indexes for non-compressed CQEs. */ - MLX5_ASSERT(nocmp_n <= pkts_n); - rxq->cq_ci += nocmp_n; - rxq->rq_pi += nocmp_n; - rcvd_pkt += nocmp_n; #ifdef MLX5_PMD_SOFT_COUNTERS rxq->stats.ipackets += nocmp_n; rxq->stats.ibytes += rcvd_byte; #endif - /* Decompress the last CQE if compressed. */ - if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n) { - MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP)); - rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n], - &elts[nocmp_n]); - /* Return more packets if needed. */ - if (nocmp_n < pkts_n) { - uint16_t n = rxq->decompressed; - - n = RTE_MIN(n, pkts_n - nocmp_n); - rxq_copy_mbuf_v(rxq, &pkts[nocmp_n], n); - rxq->rq_pi += n; - rcvd_pkt += n; - rxq->decompressed -= n; - } - } - rte_cio_wmb(); - *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); - *no_cq = !rcvd_pkt; - return rcvd_pkt; + if (comp_idx == n) + *comp = comp_idx; + return nocmp_n; } #endif /* RTE_PMD_MLX5_RXTX_VEC_NEON_H_ */