X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx_vec_neon.h;h=883fe1bf9141b0fc50c93302f7f8d2297b1027f4;hb=4a5140ab17d29e77eefa47b5cb514238e8e0c132;hp=240d396e3518b954dac9d96b55ecbeebfb90f4d7;hpb=5f8ba81c42283107f064aa6c5566ac8726b35f45;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h index 240d396e35..883fe1bf91 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h @@ -107,8 +107,6 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, assert(elts_n > pkts_n); mlx5_tx_complete(txq); - /* A CQE slot must always be available. */ - assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); if (unlikely(!pkts_n)) return 0; for (n = 0; n < pkts_n; ++n) { @@ -167,8 +165,8 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, vst1q_u8((void *)t_wqe, ctrl); /* Fill ESEG in the header. */ vst1q_u16((void *)(t_wqe + 1), - (uint16x8_t) { 0, 0, cs_flags, rte_cpu_to_be_16(len), - 0, 0, 0, 0 }); + ((uint16x8_t) { 0, 0, cs_flags, rte_cpu_to_be_16(len), + 0, 0, 0, 0 })); txq->wqe_ci = wqe_ci; } if (!n) @@ -176,12 +174,11 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, txq->elts_comp += (uint16_t)(elts_head - txq->elts_head); txq->elts_head = elts_head; if (txq->elts_comp >= MLX5_TX_COMP_THRESH) { + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci)); wqe->ctrl[2] = rte_cpu_to_be_32(8); wqe->ctrl[3] = txq->elts_head; txq->elts_comp = 0; -#ifndef NDEBUG - ++txq->cq_pi; -#endif } #ifdef MLX5_PMD_SOFT_COUNTERS txq->stats.opackets += n; @@ -204,13 +201,15 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, * Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST). * @param cs_flags * Checksum offload flags to be written in the descriptor. + * @param metadata + * Metadata value to be written in the descriptor. * * @return * Number of packets successfully transmitted (<= pkts_n). */ static inline uint16_t txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n, - uint8_t cs_flags) + uint8_t cs_flags, rte_be32_t metadata) { struct rte_mbuf **elts; uint16_t elts_head = txq->elts_head; @@ -245,8 +244,6 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n, assert(elts_n > pkts_n); mlx5_tx_complete(txq); max_elts = (elts_n - (elts_head - txq->elts_tail)); - /* A CQE slot must always be available. */ - assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts); if (unlikely(!pkts_n)) @@ -282,11 +279,10 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n, if (txq->elts_comp + pkts_n < MLX5_TX_COMP_THRESH) { txq->elts_comp += pkts_n; } else { + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci)); /* Request a completion. */ txq->elts_comp = 0; -#ifndef NDEBUG - ++txq->cq_pi; -#endif comp_req = 8; } /* Fill CTRL in the header. */ @@ -299,11 +295,8 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n, ctrl = vqtbl1q_u8(ctrl, ctrl_shuf_m); vst1q_u8((void *)t_wqe, ctrl); /* Fill ESEG in the header. */ - vst1q_u8((void *)(t_wqe + 1), - (uint8x16_t) { 0, 0, 0, 0, - cs_flags, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0 }); + vst1q_u32((void *)(t_wqe + 1), + ((uint32x4_t) { 0, cs_flags, metadata, 0 })); #ifdef MLX5_PMD_SOFT_COUNTERS txq->stats.opackets += pkts_n; #endif @@ -551,6 +544,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, const uint64x1_t mbuf_init = vld1_u64(&rxq->mbuf_initializer); const uint64x1_t r32_mask = vcreate_u64(0xffffffff); uint64x2_t rearm0, rearm1, rearm2, rearm3; + uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3; if (rxq->mark) { const uint32x4_t ft_def = vdupq_n_u32(MLX5_FLOW_MARK_DEFAULT); @@ -583,14 +577,18 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, ptype = vshrn_n_u32(ptype_info, 10); /* Errored packets will have RTE_PTYPE_ALL_MASK. */ ptype = vorr_u16(ptype, op_err); - pkts[0]->packet_type = - mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 6)]; - pkts[1]->packet_type = - mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 4)]; - pkts[2]->packet_type = - mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 2)]; - pkts[3]->packet_type = - mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 0)]; + pt_idx0 = vget_lane_u8(vreinterpret_u8_u16(ptype), 6); + pt_idx1 = vget_lane_u8(vreinterpret_u8_u16(ptype), 4); + pt_idx2 = vget_lane_u8(vreinterpret_u8_u16(ptype), 2); + pt_idx3 = vget_lane_u8(vreinterpret_u8_u16(ptype), 0); + pkts[0]->packet_type = mlx5_ptype_table[pt_idx0] | + !!(pt_idx0 & (1 << 6)) * rxq->tunnel; + pkts[1]->packet_type = mlx5_ptype_table[pt_idx1] | + !!(pt_idx1 & (1 << 6)) * rxq->tunnel; + pkts[2]->packet_type = mlx5_ptype_table[pt_idx2] | + !!(pt_idx2 & (1 << 6)) * rxq->tunnel; + pkts[3]->packet_type = mlx5_ptype_table[pt_idx3] | + !!(pt_idx3 & (1 << 6)) * rxq->tunnel; /* Fill flags for checksum and VLAN. */ pinfo = vandq_u32(ptype_info, ptype_ol_mask); pinfo = vreinterpretq_u32_u8( @@ -734,7 +732,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, * N - (rq_ci - rq_pi) := # of buffers consumed (to be replenished). */ repl_n = q_n - (rxq->rq_ci - rxq->rq_pi); - if (repl_n >= MLX5_VPMD_RXQ_RPLNSH_THRESH) + if (repl_n >= rxq->rq_repl_thresh) mlx5_rx_replenish_bulk_mbuf(rxq, repl_n); /* See if there're unreturned mbufs from compressed CQE. */ rcvd_pkt = rxq->cq_ci - rxq->rq_pi;