-/*-
- * BSD LICENSE
- *
- * Copyright 2017 6WIND S.A.
- * Copyright 2017 Mellanox.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of 6WIND S.A. nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox Technologies, Ltd
*/
#ifndef RTE_PMD_MLX5_RXTX_VEC_SSE_H_
* @param txq
* Pointer to TX queue structure.
* @param dseg
- * Pointer to buffer descriptor to be writen.
+ * Pointer to buffer descriptor to be written.
* @param pkts
* Pointer to array of packets to be sent.
* @param n
sizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;
unsigned int n;
volatile struct mlx5_wqe *wqe = NULL;
+ bool metadata_ol =
+ txq->offloads & DEV_TX_OFFLOAD_MATCH_METADATA ? true : false;
assert(elts_n > pkts_n);
mlx5_tx_complete(txq);
8, 9, 10, 11, /* bswap32 */
4, 5, 6, 7, /* bswap32 */
0, 1, 2, 3 /* bswap32 */);
- uint8_t cs_flags = 0;
+ uint8_t cs_flags;
uint16_t max_elts;
uint16_t max_wqe;
__m128i *t_wqe, *dseg;
__m128i ctrl;
+ rte_be32_t metadata =
+ metadata_ol && (buf->ol_flags & PKT_TX_METADATA) ?
+ buf->tx_metadata : 0;
assert(segs_n);
max_elts = elts_n - (elts_head - txq->elts_tail);
}
wqe = &((volatile struct mlx5_wqe64 *)
txq->wqes)[wqe_ci & wq_mask].hdr;
- if (buf->ol_flags &
- (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
- const uint64_t is_tunneled =
- buf->ol_flags & (PKT_TX_TUNNEL_GRE |
- PKT_TX_TUNNEL_VXLAN);
-
- if (is_tunneled && txq->tunnel_en) {
- cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM |
- MLX5_ETH_WQE_L4_INNER_CSUM;
- if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM)
- cs_flags |= MLX5_ETH_WQE_L3_CSUM;
- } else {
- cs_flags = MLX5_ETH_WQE_L3_CSUM |
- MLX5_ETH_WQE_L4_CSUM;
- }
- }
+ cs_flags = txq_ol_cksum_to_cs(buf);
/* Title WQEBB pointer. */
t_wqe = (__m128i *)wqe;
dseg = (__m128i *)(wqe + 1);
_mm_store_si128(t_wqe, ctrl);
/* Fill ESEG in the header. */
_mm_store_si128(t_wqe + 1,
- _mm_set_epi16(0, 0, 0, 0,
- rte_cpu_to_be_16(len), cs_flags,
- 0, 0));
+ _mm_set_epi32(0, metadata,
+ (rte_cpu_to_be_16(len) << 16) |
+ cs_flags, 0));
txq->wqe_ci = wqe_ci;
}
if (!n)
txq->elts_comp += (uint16_t)(elts_head - txq->elts_head);
txq->elts_head = elts_head;
if (txq->elts_comp >= MLX5_TX_COMP_THRESH) {
+ /* A CQE slot must always be available. */
+ assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
wqe->ctrl[2] = rte_cpu_to_be_32(8);
wqe->ctrl[3] = txq->elts_head;
txq->elts_comp = 0;
- ++txq->cq_pi;
}
#ifdef MLX5_PMD_SOFT_COUNTERS
txq->stats.opackets += n;
* Send burst of packets with Enhanced MPW. If it encounters a multi-seg packet,
* it returns to make it processed by txq_scatter_v(). All the packets in
* the pkts list should be single segment packets having same offload flags.
- * This must be checked by txq_check_multiseg() and txq_calc_offload().
+ * This must be checked by txq_count_contig_single_seg() and txq_calc_offload().
*
* @param txq
* Pointer to TX queue structure.
* Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).
* @param cs_flags
* Checksum offload flags to be written in the descriptor.
+ * @param metadata
+ * Metadata value to be written in the descriptor.
*
* @return
* Number of packets successfully transmitted (<= pkts_n).
*/
static inline uint16_t
txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
- uint8_t cs_flags)
+ uint8_t cs_flags, rte_be32_t metadata)
{
struct rte_mbuf **elts;
uint16_t elts_head = txq->elts_head;
if (txq->elts_comp + pkts_n < MLX5_TX_COMP_THRESH) {
txq->elts_comp += pkts_n;
} else {
+ /* A CQE slot must always be available. */
+ assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request a completion. */
txq->elts_comp = 0;
- ++txq->cq_pi;
comp_req = 8;
}
/* Fill CTRL in the header. */
ctrl = _mm_shuffle_epi8(ctrl, shuf_mask_ctrl);
_mm_store_si128(t_wqe, ctrl);
/* Fill ESEG in the header. */
- _mm_store_si128(t_wqe + 1,
- _mm_set_epi8(0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, cs_flags,
- 0, 0, 0, 0));
+ _mm_store_si128(t_wqe + 1, _mm_set_epi32(0, metadata, cs_flags, 0));
#ifdef MLX5_PMD_SOFT_COUNTERS
txq->stats.opackets += pkts_n;
#endif
txq->wqe_ci += (nb_dword_in_hdr + pkts_n + (nb_dword_per_wqebb - 1)) /
nb_dword_per_wqebb;
/* Ring QP doorbell. */
- mlx5_tx_dbrec(txq, wqe);
+ mlx5_tx_dbrec_cond_wmb(txq, wqe, pkts_n < MLX5_VPMD_TX_MAX_BURST);
return pkts_n;
}
(uint8_t)(PKT_RX_L4_CKSUM_GOOD >> 1),
0,
(uint8_t)(PKT_RX_IP_CKSUM_GOOD >> 1),
- (uint8_t)(PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED),
+ (uint8_t)(PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED),
0);
const __m128i cv_mask =
_mm_set_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED,
+ PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED,
+ PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED,
+ PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
+ PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
const __m128i mbuf_init =
_mm_loadl_epi64((__m128i *)&rxq->mbuf_initializer);
__m128i rearm0, rearm1, rearm2, rearm3;
+ uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3;
/* Extract pkt_info field. */
pinfo0 = _mm_unpacklo_epi32(cqes[0], cqes[1]);
_mm_set_epi32(0xffffff00, 0xffffff00,
0xffffff00, 0xffffff00);
const __m128i fdir_flags = _mm_set1_epi32(PKT_RX_FDIR);
- const __m128i fdir_id_flags = _mm_set1_epi32(PKT_RX_FDIR_ID);
+ __m128i fdir_id_flags = _mm_set1_epi32(PKT_RX_FDIR_ID);
__m128i flow_tag, invalid_mask;
flow_tag = _mm_and_si128(pinfo, pinfo_ft_mask);
_mm_andnot_si128(invalid_mask,
fdir_flags));
/* Mask out invalid entries. */
- flow_tag = _mm_andnot_si128(invalid_mask, flow_tag);
+ fdir_id_flags = _mm_andnot_si128(invalid_mask, fdir_id_flags);
/* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */
ol_flags = _mm_or_si128(ol_flags,
_mm_andnot_si128(
/* Errored packets will have RTE_PTYPE_ALL_MASK. */
op_err = _mm_srli_epi16(op_err, 8);
ptype = _mm_or_si128(ptype, op_err);
- pkts[0]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 0)];
- pkts[1]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 2)];
- pkts[2]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 4)];
- pkts[3]->packet_type = mlx5_ptype_table[_mm_extract_epi8(ptype, 6)];
+ pt_idx0 = _mm_extract_epi8(ptype, 0);
+ pt_idx1 = _mm_extract_epi8(ptype, 2);
+ pt_idx2 = _mm_extract_epi8(ptype, 4);
+ pt_idx3 = _mm_extract_epi8(ptype, 6);
+ pkts[0]->packet_type = mlx5_ptype_table[pt_idx0] |
+ !!(pt_idx0 & (1 << 6)) * rxq->tunnel;
+ pkts[1]->packet_type = mlx5_ptype_table[pt_idx1] |
+ !!(pt_idx1 & (1 << 6)) * rxq->tunnel;
+ pkts[2]->packet_type = mlx5_ptype_table[pt_idx2] |
+ !!(pt_idx2 & (1 << 6)) * rxq->tunnel;
+ pkts[3]->packet_type = mlx5_ptype_table[pt_idx3] |
+ !!(pt_idx3 & (1 << 6)) * rxq->tunnel;
/* Fill flags for checksum and VLAN. */
pinfo = _mm_and_si128(pinfo, ptype_ol_mask);
pinfo = _mm_shuffle_epi8(cv_flag_sel, pinfo);
* Array to store received packets.
* @param pkts_n
* Maximum number of packets in array.
+ * @param[out] err
+ * Pointer to a flag. Set non-zero value if pkts array has at least one error
+ * packet to handle.
*
* @return
* Number of packets received including errors (<= pkts_n).
*/
static inline uint16_t
-rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
+ uint64_t *err)
{
const uint16_t q_n = 1 << rxq->cqe_n;
const uint16_t q_mask = q_n - 1;
* N - (rq_ci - rq_pi) := # of buffers consumed (to be replenished).
*/
repl_n = q_n - (rxq->rq_ci - rxq->rq_pi);
- if (repl_n >= MLX5_VPMD_RXQ_RPLNSH_THRESH)
+ if (repl_n >= rxq->rq_repl_thresh)
mlx5_rx_replenish_bulk_mbuf(rxq, repl_n);
/* See if there're unreturned mbufs from compressed CQE. */
rcvd_pkt = rxq->cq_ci - rxq->rq_pi;
/* B.2 copy mbuf pointers. */
_mm_storeu_si128((__m128i *)&pkts[pos], mbp1);
_mm_storeu_si128((__m128i *)&pkts[pos + 2], mbp2);
- rte_compiler_barrier();
+ rte_cio_rmb();
/* C.1 load remained CQE data and extract necessary fields. */
cqe_tmp2 = _mm_load_si128((__m128i *)&cq[pos + p3]);
cqe_tmp1 = _mm_load_si128((__m128i *)&cq[pos + p2]);
opcode = _mm_packs_epi32(opcode, zero);
opcode = _mm_andnot_si128(invalid_mask, opcode);
/* D.4 mark if any error is set */
- rxq->pending_err |= !!_mm_cvtsi128_si64(opcode);
+ *err |= _mm_cvtsi128_si64(opcode);
/* D.5 fill in mbuf - rearm_data and packet_type. */
rxq_cq_to_ptype_oflags_v(rxq, cqes, opcode, &pkts[pos]);
if (rxq->hw_timestamp) {