X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_rxtx_vec_sse.h;h=c49c52c261367c98a2394e65d63334726daf57fa;hb=1c191691a635c6ecf7959a1221584e353cdad228;hp=2b9f16010bd7f4b840e347ed63bf46f22ff51b63;hpb=cac35e6bad008c24d628676feb9fec7ebf2c890a;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h index 2b9f16010b..c49c52c261 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox Technologies, Ltd */ #ifndef RTE_PMD_MLX5_RXTX_VEC_SSE_H_ @@ -132,6 +104,8 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, sizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE; unsigned int n; volatile struct mlx5_wqe *wqe = NULL; + bool metadata_ol = + txq->offloads & DEV_TX_OFFLOAD_MATCH_METADATA ? true : false; assert(elts_n > pkts_n); mlx5_tx_complete(txq); @@ -148,11 +122,14 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, 8, 9, 10, 11, /* bswap32 */ 4, 5, 6, 7, /* bswap32 */ 0, 1, 2, 3 /* bswap32 */); - uint8_t cs_flags = 0; + uint8_t cs_flags; uint16_t max_elts; uint16_t max_wqe; __m128i *t_wqe, *dseg; __m128i ctrl; + rte_be32_t metadata = + metadata_ol && (buf->ol_flags & PKT_TX_METADATA) ? + buf->tx_metadata : 0; assert(segs_n); max_elts = elts_n - (elts_head - txq->elts_tail); @@ -170,22 +147,7 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, } wqe = &((volatile struct mlx5_wqe64 *) txq->wqes)[wqe_ci & wq_mask].hdr; - if (buf->ol_flags & - (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { - const uint64_t is_tunneled = - buf->ol_flags & (PKT_TX_TUNNEL_GRE | - PKT_TX_TUNNEL_VXLAN); - - if (is_tunneled && txq->tunnel_en) { - cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM | - MLX5_ETH_WQE_L4_INNER_CSUM; - if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM) - cs_flags |= MLX5_ETH_WQE_L3_CSUM; - } else { - cs_flags = MLX5_ETH_WQE_L3_CSUM | - MLX5_ETH_WQE_L4_CSUM; - } - } + cs_flags = txq_ol_cksum_to_cs(buf); /* Title WQEBB pointer. */ t_wqe = (__m128i *)wqe; dseg = (__m128i *)(wqe + 1); @@ -208,9 +170,9 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, _mm_store_si128(t_wqe, ctrl); /* Fill ESEG in the header. */ _mm_store_si128(t_wqe + 1, - _mm_set_epi16(0, 0, 0, 0, - rte_cpu_to_be_16(len), cs_flags, - 0, 0)); + _mm_set_epi32(0, metadata, + (rte_cpu_to_be_16(len) << 16) | + cs_flags, 0)); txq->wqe_ci = wqe_ci; } if (!n) @@ -218,10 +180,11 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, txq->elts_comp += (uint16_t)(elts_head - txq->elts_head); txq->elts_head = elts_head; if (txq->elts_comp >= MLX5_TX_COMP_THRESH) { + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci)); wqe->ctrl[2] = rte_cpu_to_be_32(8); wqe->ctrl[3] = txq->elts_head; txq->elts_comp = 0; - ++txq->cq_pi; } #ifdef MLX5_PMD_SOFT_COUNTERS txq->stats.opackets += n; @@ -234,7 +197,7 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, * Send burst of packets with Enhanced MPW. If it encounters a multi-seg packet, * it returns to make it processed by txq_scatter_v(). All the packets in * the pkts list should be single segment packets having same offload flags. - * This must be checked by txq_check_multiseg() and txq_calc_offload(). + * This must be checked by txq_count_contig_single_seg() and txq_calc_offload(). * * @param txq * Pointer to TX queue structure. @@ -244,13 +207,15 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, * Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST). * @param cs_flags * Checksum offload flags to be written in the descriptor. + * @param metadata + * Metadata value to be written in the descriptor. * * @return * Number of packets successfully transmitted (<= pkts_n). */ static inline uint16_t txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n, - uint8_t cs_flags) + uint8_t cs_flags, rte_be32_t metadata) { struct rte_mbuf **elts; uint16_t elts_head = txq->elts_head; @@ -320,9 +285,10 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n, if (txq->elts_comp + pkts_n < MLX5_TX_COMP_THRESH) { txq->elts_comp += pkts_n; } else { + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci)); /* Request a completion. */ txq->elts_comp = 0; - ++txq->cq_pi; comp_req = 8; } /* Fill CTRL in the header. */ @@ -333,11 +299,7 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n, ctrl = _mm_shuffle_epi8(ctrl, shuf_mask_ctrl); _mm_store_si128(t_wqe, ctrl); /* Fill ESEG in the header. */ - _mm_store_si128(t_wqe + 1, - _mm_set_epi8(0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, cs_flags, - 0, 0, 0, 0)); + _mm_store_si128(t_wqe + 1, _mm_set_epi32(0, metadata, cs_flags, 0)); #ifdef MLX5_PMD_SOFT_COUNTERS txq->stats.opackets += pkts_n; #endif @@ -387,8 +349,11 @@ rxq_copy_mbuf_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t n) * @param elts * Pointer to SW ring to be filled. The first mbuf has to be pre-built from * the title completion descriptor to be copied to the rest of mbufs. + * + * @return + * Number of mini-CQEs successfully decompressed. */ -static inline void +static inline uint16_t rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, struct rte_mbuf **elts) { @@ -412,16 +377,16 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, -1, -1, -1, -1 /* skip packet_type */); /* Restore the compressed count. Must be 16 bits. */ const uint16_t mcqe_n = t_pkt->data_len + - (rxq->crc_present * ETHER_CRC_LEN); + (rxq->crc_present * RTE_ETHER_CRC_LEN); const __m128i rearm = _mm_loadu_si128((__m128i *)&t_pkt->rearm_data); const __m128i rxdf = _mm_loadu_si128((__m128i *)&t_pkt->rx_descriptor_fields1); const __m128i crc_adj = _mm_set_epi16(0, 0, 0, - rxq->crc_present * ETHER_CRC_LEN, + rxq->crc_present * RTE_ETHER_CRC_LEN, 0, - rxq->crc_present * ETHER_CRC_LEN, + rxq->crc_present * RTE_ETHER_CRC_LEN, 0, 0); const uint32_t flow_tag = t_pkt->hash.fdir.hi; #ifdef MLX5_PMD_SOFT_COUNTERS @@ -524,6 +489,7 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq, rxq->stats.ibytes += rcvd_byte; #endif rxq->cq_ci += mcqe_n; + return mcqe_n; } /** @@ -577,6 +543,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4], const __m128i mbuf_init = _mm_loadl_epi64((__m128i *)&rxq->mbuf_initializer); __m128i rearm0, rearm1, rearm2, rearm3; + uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3; /* Extract pkt_info field. */ pinfo0 = _mm_unpacklo_epi32(cqes[0], cqes[1]); @@ -591,7 +558,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4], _mm_set_epi32(0xffffff00, 0xffffff00, 0xffffff00, 0xffffff00); const __m128i fdir_flags = _mm_set1_epi32(PKT_RX_FDIR); - const __m128i fdir_id_flags = _mm_set1_epi32(PKT_RX_FDIR_ID); + __m128i fdir_id_flags = _mm_set1_epi32(PKT_RX_FDIR_ID); __m128i flow_tag, invalid_mask; flow_tag = _mm_and_si128(pinfo, pinfo_ft_mask); @@ -601,7 +568,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4], _mm_andnot_si128(invalid_mask, fdir_flags)); /* Mask out invalid entries. */ - flow_tag = _mm_andnot_si128(invalid_mask, flow_tag); + fdir_id_flags = _mm_andnot_si128(invalid_mask, fdir_id_flags); /* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */ ol_flags = _mm_or_si128(ol_flags, _mm_andnot_si128( @@ -630,10 +597,18 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4], /* Errored packets will have RTE_PTYPE_ALL_MASK. */ op_err = _mm_srli_epi16(op_err, 8); ptype = _mm_or_si128(ptype, op_err); - pkts[0]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 0)]; - pkts[1]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 2)]; - pkts[2]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 4)]; - pkts[3]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 6)]; + pt_idx0 = _mm_extract_epi8(ptype, 0); + pt_idx1 = _mm_extract_epi8(ptype, 2); + pt_idx2 = _mm_extract_epi8(ptype, 4); + pt_idx3 = _mm_extract_epi8(ptype, 6); + pkts[0]->packet_type = mlx5_ptype_table[pt_idx0] | + !!(pt_idx0 & (1 << 6)) * rxq->tunnel; + pkts[1]->packet_type = mlx5_ptype_table[pt_idx1] | + !!(pt_idx1 & (1 << 6)) * rxq->tunnel; + pkts[2]->packet_type = mlx5_ptype_table[pt_idx2] | + !!(pt_idx2 & (1 << 6)) * rxq->tunnel; + pkts[3]->packet_type = mlx5_ptype_table[pt_idx3] | + !!(pt_idx3 & (1 << 6)) * rxq->tunnel; /* Fill flags for checksum and VLAN. */ pinfo = _mm_and_si128(pinfo, ptype_ol_mask); pinfo = _mm_shuffle_epi8(cv_flag_sel, pinfo); @@ -669,12 +644,16 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4], * Array to store received packets. * @param pkts_n * Maximum number of packets in array. + * @param[out] err + * Pointer to a flag. Set non-zero value if pkts array has at least one error + * packet to handle. * * @return * Number of packets received including errors (<= pkts_n). */ static inline uint16_t -rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) +rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, + uint64_t *err) { const uint16_t q_n = 1 << rxq->cqe_n; const uint16_t q_mask = q_n - 1; @@ -724,9 +703,9 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) const __m128i ones = _mm_cmpeq_epi32(zero, zero); const __m128i crc_adj = _mm_set_epi16(0, 0, 0, 0, 0, - rxq->crc_present * ETHER_CRC_LEN, + rxq->crc_present * RTE_ETHER_CRC_LEN, 0, - rxq->crc_present * ETHER_CRC_LEN); + rxq->crc_present * RTE_ETHER_CRC_LEN); const __m128i flow_mark_adj = _mm_set_epi32(rxq->mark * (-1), 0, 0, 0); assert(rxq->sges_n == 0); @@ -737,23 +716,16 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) rte_prefetch0(cq + 2); rte_prefetch0(cq + 3); pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST); - /* - * Order of indexes: - * rq_ci >= cq_ci >= rq_pi - * Definition of indexes: - * rq_ci - cq_ci := # of buffers owned by HW (posted). - * cq_ci - rq_pi := # of buffers not returned to app (decompressed). - * N - (rq_ci - rq_pi) := # of buffers consumed (to be replenished). - */ repl_n = q_n - (rxq->rq_ci - rxq->rq_pi); - if (repl_n >= MLX5_VPMD_RXQ_RPLNSH_THRESH) + if (repl_n >= rxq->rq_repl_thresh) mlx5_rx_replenish_bulk_mbuf(rxq, repl_n); /* See if there're unreturned mbufs from compressed CQE. */ - rcvd_pkt = rxq->cq_ci - rxq->rq_pi; + rcvd_pkt = rxq->decompressed; if (rcvd_pkt > 0) { rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n); rxq_copy_mbuf_v(rxq, pkts, rcvd_pkt); rxq->rq_pi += rcvd_pkt; + rxq->decompressed -= rcvd_pkt; pkts += rcvd_pkt; } elts_idx = rxq->rq_pi & q_mask; @@ -762,10 +734,11 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP); /* Not to cross queue end. */ pkts_n = RTE_MIN(pkts_n, q_n - elts_idx); + pkts_n = RTE_MIN(pkts_n, q_n - cq_idx); if (!pkts_n) return rcvd_pkt; /* At this point, there shouldn't be any remained packets. */ - assert(rxq->rq_pi == rxq->cq_ci); + assert(rxq->decompressed == 0); /* * A. load first Qword (8bytes) in one loop. * B. copy 4 mbuf pointers from elts ring to returing pkts. @@ -836,7 +809,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) /* B.2 copy mbuf pointers. */ _mm_storeu_si128((__m128i *)&pkts[pos], mbp1); _mm_storeu_si128((__m128i *)&pkts[pos + 2], mbp2); - rte_compiler_barrier(); + rte_cio_rmb(); /* C.1 load remained CQE data and extract necessary fields. */ cqe_tmp2 = _mm_load_si128((__m128i *)&cq[pos + p3]); cqe_tmp1 = _mm_load_si128((__m128i *)&cq[pos + p2]); @@ -936,7 +909,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) opcode = _mm_packs_epi32(opcode, zero); opcode = _mm_andnot_si128(invalid_mask, opcode); /* D.4 mark if any error is set */ - rxq->pending_err |= !!_mm_cvtsi128_si64(opcode); + *err |= _mm_cvtsi128_si64(opcode); /* D.5 fill in mbuf - rearm_data and packet_type. */ rxq_cq_to_ptype_oflags_v(rxq, cqes, opcode, &pkts[pos]); if (rxq->hw_timestamp) { @@ -978,15 +951,17 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Decompress the last CQE if compressed. */ if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n) { assert(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP)); - rxq_cq_decompress_v(rxq, &cq[nocmp_n], &elts[nocmp_n]); + rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n], + &elts[nocmp_n]); /* Return more packets if needed. */ if (nocmp_n < pkts_n) { - uint16_t n = rxq->cq_ci - rxq->rq_pi; + uint16_t n = rxq->decompressed; n = RTE_MIN(n, pkts_n - nocmp_n); rxq_copy_mbuf_v(rxq, &pkts[nocmp_n], n); rxq->rq_pi += n; rcvd_pkt += n; + rxq->decompressed -= n; } } rte_compiler_barrier();