A nonzero value enables the compression of CQE on RX side. This feature
allows to save PCI bandwidth and improve performance. Enabled by default.
+ Different compression formats are supported in order to achieve the best
+ performance for different traffic patterns. Hash RSS format is the default.
+
+ Specifying 2 as a ``rxq_cqe_comp_en`` value selects Flow Tag format for
+ better compression rate in case of RTE Flow Mark traffic.
+ Specifying 3 as a ``rxq_cqe_comp_en`` value selects Checksum format.
+ Specifying 4 as a ``rxq_cqe_comp_en`` value selects L3/L4 Header format for
+ better compression rate in case of mixed TCP/UDP and IPv4/IPv6 traffic.
Supported on:
Updated Mellanox mlx5 driver with new features and improvements, including:
+ * Added vectorized Multi-Packet Rx Queue burst.
+ * Added support for 2 new miniCQE formats: Flow Tag and L3/L4 header.
* Added support for PMD level multiple-thread flow insertion.
* Added support for matching on fragmented/non-fragmented IPv4/IPv6 packets.
* Added support for QinQ packets matching.
MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET(cqc, cqctx, c_eqn, attr->eqn);
MLX5_SET(cqc, cqctx, uar_page, attr->uar_page_id);
- MLX5_SET(cqc, cqctx, cqe_comp_en, attr->cqe_comp_en);
- MLX5_SET(cqc, cqctx, mini_cqe_res_format, attr->mini_cqe_res_format);
+ MLX5_SET(cqc, cqctx, cqe_comp_en, !!attr->cqe_comp_en);
+ MLX5_SET(cqc, cqctx, mini_cqe_res_format,
+ attr->mini_cqe_res_format);
+ MLX5_SET(cqc, cqctx, mini_cqe_res_format_ext,
+ attr->mini_cqe_res_format_ext);
MLX5_SET(cqc, cqctx, cqe_sz, attr->cqe_size);
if (attr->q_umem_valid) {
MLX5_SET(create_cq_in, in, cq_umem_valid, attr->q_umem_valid);
uint32_t overrun_ignore:1;
uint32_t cqe_comp_en:1;
uint32_t mini_cqe_res_format:2;
+ uint32_t mini_cqe_res_format_ext:2;
uint32_t cqe_size:3;
uint32_t log_cq_size:5;
uint32_t log_page_size:5;
/* Default mark mask for metadata legacy mode. */
#define MLX5_FLOW_MARK_MASK 0xffffff
+/* Byte length mask when mark is enable in miniCQE */
+#define MLX5_LEN_WITH_MARK_MASK 0xffffff00
+
/* Maximum number of DS in WQE. Limited by 6-bit field. */
#define MLX5_DSEG_MAX 63
u8 cqe_comp_en[0x1];
u8 mini_cqe_res_format[0x2];
u8 st[0x4];
- u8 reserved_at_18[0x8];
+ u8 reserved_at_18[0x1];
+ u8 cqe_comp_layout[0x7];
u8 dbr_umem_id[0x20];
u8 reserved_at_40[0x14];
u8 page_offset[0x6];
- u8 reserved_at_5a[0x6];
+ u8 reserved_at_5a[0x2];
+ u8 mini_cqe_res_format_ext[0x2];
+ u8 cq_timestamp_format[0x2];
u8 reserved_at_60[0x3];
u8 log_cq_size[0x5];
u8 uar_page[0x18];
union {
uint32_t rx_hash_result;
struct {
- uint16_t checksum;
+ union {
+ uint16_t checksum;
+ uint16_t flow_tag_high;
+ struct {
+ uint8_t reserved;
+ uint8_t hdr_type;
+ };
+ };
uint16_t stride_idx;
};
struct {
uint8_t reserved;
} s_wqe_info;
};
- uint32_t byte_cnt;
+ union {
+ uint32_t byte_cnt_flow;
+ uint32_t byte_cnt;
+ };
};
/* Mini CQE responder format. */
enum {
MLX5_CQE_RESP_FORMAT_HASH = 0x0,
MLX5_CQE_RESP_FORMAT_CSUM = 0x1,
- MLX5_CQE_RESP_FORMAT_CSUM_FLOW_TAG = 0x2,
+ MLX5_CQE_RESP_FORMAT_FTAG_STRIDX = 0x2,
MLX5_CQE_RESP_FORMAT_CSUM_STRIDX = 0x3,
+ MLX5_CQE_RESP_FORMAT_L34H_STRIDX = 0x4,
};
/* srTCM PRM flow meter parameters. */
}
mod = tmp >= 0 ? tmp : -tmp;
if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) {
+ if (tmp > MLX5_CQE_RESP_FORMAT_L34H_STRIDX) {
+ DRV_LOG(ERR, "invalid CQE compression "
+ "format parameter");
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
config->cqe_comp = !!tmp;
+ config->cqe_comp_fmt = tmp;
} else if (strcmp(MLX5_RXQ_CQE_PAD_EN, key) == 0) {
config->cqe_pad = !!tmp;
} else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) {
/* Whether tunnel stateless offloads are supported. */
unsigned int mpls_en:1; /* MPLS over GRE/UDP is enabled. */
unsigned int cqe_comp:1; /* CQE compression is enabled. */
+ unsigned int cqe_comp_fmt:3; /* CQE compression format. */
unsigned int cqe_pad:1; /* CQE padding is enabled. */
unsigned int tso:1; /* Whether TSO is supported. */
unsigned int rx_vec_en:1; /* Rx vector is enabled. */
if (priv->config.cqe_comp && !rxq_data->hw_timestamp &&
!rxq_data->lro) {
cq_attr.cqe_comp_en = 1u;
- /*
- * Select CSUM miniCQE format only for non-vectorized MPRQ
- * Rx burst, use HASH miniCQE format for everything else.
- */
- if (mlx5_rxq_check_vec_support(rxq_data) < 0 &&
- mlx5_rxq_mprq_enabled(rxq_data))
- cq_attr.mini_cqe_res_format =
- MLX5_CQE_RESP_FORMAT_CSUM_STRIDX;
- else
- cq_attr.mini_cqe_res_format =
- MLX5_CQE_RESP_FORMAT_HASH;
+ rxq_data->mcqe_format = priv->config.cqe_comp_fmt;
+ rxq_data->byte_mask = UINT32_MAX;
+ switch (priv->config.cqe_comp_fmt) {
+ case MLX5_CQE_RESP_FORMAT_HASH:
+ /* fallthrough */
+ case MLX5_CQE_RESP_FORMAT_CSUM:
+ /*
+ * Select CSUM miniCQE format only for non-vectorized
+ * MPRQ Rx burst, use HASH miniCQE format for others.
+ */
+ if (mlx5_rxq_check_vec_support(rxq_data) < 0 &&
+ mlx5_rxq_mprq_enabled(rxq_data))
+ cq_attr.mini_cqe_res_format =
+ MLX5_CQE_RESP_FORMAT_CSUM_STRIDX;
+ else
+ cq_attr.mini_cqe_res_format =
+ MLX5_CQE_RESP_FORMAT_HASH;
+ rxq_data->mcqe_format = cq_attr.mini_cqe_res_format;
+ break;
+ case MLX5_CQE_RESP_FORMAT_FTAG_STRIDX:
+ rxq_data->byte_mask = MLX5_LEN_WITH_MARK_MASK;
+ /* fallthrough */
+ case MLX5_CQE_RESP_FORMAT_CSUM_STRIDX:
+ cq_attr.mini_cqe_res_format = priv->config.cqe_comp_fmt;
+ break;
+ case MLX5_CQE_RESP_FORMAT_L34H_STRIDX:
+ cq_attr.mini_cqe_res_format = 0;
+ cq_attr.mini_cqe_res_format_ext = 1;
+ break;
+ }
+ DRV_LOG(DEBUG,
+ "Port %u Rx CQE compression is enabled, format %d.",
+ dev->data->port_id, priv->config.cqe_comp_fmt);
/*
* For vectorized Rx, it must not be doubled in order to
* make cq_ci and rq_ci aligned.
#define MLX5_TXOFF_INFO(func, olx) {mlx5_tx_burst_##func, olx},
static __rte_always_inline uint32_t
-rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe);
+rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
+ volatile struct mlx5_mini_cqe8 *mcqe);
static __rte_always_inline int
mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
static __rte_always_inline void
rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
- volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res);
+ volatile struct mlx5_cqe *cqe,
+ volatile struct mlx5_mini_cqe8 *mcqe);
static int
mlx5_queue_state_modify(struct rte_eth_dev *dev,
static inline void
mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp,
volatile struct mlx5_cqe *__rte_restrict cqe,
- uint32_t phcsum);
+ uint32_t phcsum, uint8_t l4_type);
static inline void
mlx5_lro_update_hdr(uint8_t *__rte_restrict padd,
volatile struct mlx5_cqe *__rte_restrict cqe,
- uint32_t len);
+ volatile struct mlx5_mini_cqe8 *mcqe,
+ struct mlx5_rxq_data *rxq, uint32_t len);
uint32_t mlx5_ptype_table[] __rte_cache_aligned = {
[0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */
* Packet type for struct rte_mbuf.
*/
static inline uint32_t
-rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe)
+rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
+ volatile struct mlx5_mini_cqe8 *mcqe)
{
uint8_t idx;
- uint8_t pinfo = cqe->pkt_info;
- uint16_t ptype = cqe->hdr_type_etc;
+ uint8_t ptype;
+ uint8_t pinfo = (cqe->pkt_info & 0x3) << 6;
+ /* Get l3/l4 header from mini-CQE in case L3/L4 format*/
+ if (mcqe == NULL ||
+ rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
+ ptype = (cqe->hdr_type_etc & 0xfc00) >> 10;
+ else
+ ptype = mcqe->hdr_type >> 2;
/*
* The index to the array should have:
* bit[1:0] = l3_hdr_type
* bit[6] = tunneled
* bit[7] = outer_l3_type
*/
- idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10);
+ idx = pinfo | ptype;
return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6));
}
(volatile struct mlx5_mini_cqe8 (*)[8])
(uintptr_t)(&(*rxq->cqes)[zip->ca &
cqe_cnt].pkt_info);
-
- len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt);
+ len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt &
+ rxq->byte_mask);
*mcqe = &(*mc)[zip->ai & 7];
if ((++zip->ai & 7) == 0) {
/* Invalidate consumed CQEs */
--rxq->cq_ci;
zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
/* Get packet size to return. */
- len = rte_be_to_cpu_32((*mc)[0].byte_cnt);
+ len = rte_be_to_cpu_32((*mc)[0].byte_cnt &
+ rxq->byte_mask);
*mcqe = &(*mc)[0];
zip->ai = 1;
/* Prefetch all to be invalidated */
*/
static inline void
rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
- volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res)
+ volatile struct mlx5_cqe *cqe,
+ volatile struct mlx5_mini_cqe8 *mcqe)
{
/* Update packet information. */
- pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe);
- if (rss_hash_res && rxq->rss_hash) {
- pkt->hash.rss = rss_hash_res;
- pkt->ol_flags |= PKT_RX_RSS_HASH;
+ pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe, mcqe);
+
+ if (rxq->rss_hash) {
+ uint32_t rss_hash_res = 0;
+
+ /* If compressed, take hash result from mini-CQE. */
+ if (mcqe == NULL ||
+ rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH)
+ rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res);
+ else
+ rss_hash_res = rte_be_to_cpu_32(mcqe->rx_hash_result);
+ if (rss_hash_res) {
+ pkt->hash.rss = rss_hash_res;
+ pkt->ol_flags |= PKT_RX_RSS_HASH;
+ }
}
- if (rxq->mark && MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) {
- pkt->ol_flags |= PKT_RX_FDIR;
- if (cqe->sop_drop_qpn !=
- rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) {
- uint32_t mark = cqe->sop_drop_qpn;
-
- pkt->ol_flags |= PKT_RX_FDIR_ID;
- pkt->hash.fdir.hi = mlx5_flow_mark_get(mark);
+ if (rxq->mark) {
+ uint32_t mark = 0;
+
+ /* If compressed, take flow tag from mini-CQE. */
+ if (mcqe == NULL ||
+ rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_FTAG_STRIDX)
+ mark = cqe->sop_drop_qpn;
+ else
+ mark = ((mcqe->byte_cnt_flow & 0xff) << 8) |
+ (mcqe->flow_tag_high << 16);
+ if (MLX5_FLOW_MARK_IS_VALID(mark)) {
+ pkt->ol_flags |= PKT_RX_FDIR;
+ if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) {
+ pkt->ol_flags |= PKT_RX_FDIR_ID;
+ pkt->hash.fdir.hi = mlx5_flow_mark_get(mark);
+ }
}
}
if (rxq->dynf_meta && cqe->flow_table_metadata) {
}
if (rxq->csum)
pkt->ol_flags |= rxq_cq_to_ol_flags(cqe);
- if (rxq->vlan_strip &&
- (cqe->hdr_type_etc & rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) {
- pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
- pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info);
+ if (rxq->vlan_strip) {
+ bool vlan_strip;
+
+ if (mcqe == NULL ||
+ rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
+ vlan_strip = cqe->hdr_type_etc &
+ RTE_BE16(MLX5_CQE_VLAN_STRIPPED);
+ else
+ vlan_strip = mcqe->hdr_type &
+ RTE_BE16(MLX5_CQE_VLAN_STRIPPED);
+ if (vlan_strip) {
+ pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
+ pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info);
+ }
}
if (rxq->hw_timestamp) {
uint64_t ts = rte_be_to_cpu_64(cqe->timestamp);
&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx];
struct rte_mbuf *rep = (*rxq->elts)[idx];
volatile struct mlx5_mini_cqe8 *mcqe = NULL;
- uint32_t rss_hash_res;
if (pkt)
NEXT(seg) = rep;
pkt = seg;
MLX5_ASSERT(len >= (rxq->crc_present << 2));
pkt->ol_flags &= EXT_ATTACHED_MBUF;
- /* If compressed, take hash result from mini-CQE. */
- rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ?
- cqe->rx_hash_res :
- mcqe->rx_hash_result);
- rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
+ rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe);
if (rxq->crc_present)
len -= RTE_ETHER_CRC_LEN;
PKT_LEN(pkt) = len;
if (cqe->lro_num_seg > 1) {
mlx5_lro_update_hdr
(rte_pktmbuf_mtod(pkt, uint8_t *), cqe,
- len);
+ mcqe, rxq, len);
pkt->ol_flags |= PKT_RX_LRO;
pkt->tso_segsz = len / cqe->lro_num_seg;
}
static inline void
mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp,
volatile struct mlx5_cqe *__rte_restrict cqe,
- uint32_t phcsum)
+ uint32_t phcsum, uint8_t l4_type)
{
- uint8_t l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) &
- MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT;
/*
* The HW calculates only the TCP payload checksum, need to complete
* the TCP header checksum and the L3 pseudo-header checksum.
static inline void
mlx5_lro_update_hdr(uint8_t *__rte_restrict padd,
volatile struct mlx5_cqe *__rte_restrict cqe,
- uint32_t len)
+ volatile struct mlx5_mini_cqe8 *mcqe,
+ struct mlx5_rxq_data *rxq, uint32_t len)
{
union {
struct rte_ether_hdr *eth;
};
uint16_t proto = h.eth->ether_type;
uint32_t phcsum;
+ uint8_t l4_type;
h.eth++;
while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) ||
phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0);
h.ipv6++;
}
- mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum);
+ if (mcqe == NULL ||
+ rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX)
+ l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) &
+ MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT;
+ else
+ l4_type = (rte_be_to_cpu_16(mcqe->hdr_type) &
+ MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT;
+ mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum, l4_type);
}
void
{
struct mlx5_rxq_data *rxq = dpdk_rxq;
const uint32_t strd_n = 1 << rxq->strd_num_n;
+ const uint32_t strd_sz = 1 << rxq->strd_sz_n;
const uint32_t cq_mask = (1 << rxq->cqe_n) - 1;
const uint32_t wq_mask = (1 << rxq->elts_n) - 1;
volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
uint16_t strd_idx;
uint32_t byte_cnt;
volatile struct mlx5_mini_cqe8 *mcqe = NULL;
- uint32_t rss_hash_res = 0;
enum mlx5_rqx_code rxq_code;
if (consumed_strd == strd_n) {
if (!ret)
break;
byte_cnt = ret;
- strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >>
- MLX5_MPRQ_STRIDE_NUM_SHIFT;
+ len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT;
+ MLX5_ASSERT((int)len >= (rxq->crc_present << 2));
+ if (rxq->crc_present)
+ len -= RTE_ETHER_CRC_LEN;
+ if (mcqe &&
+ rxq->mcqe_format == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX)
+ strd_cnt = (len / strd_sz) + !!(len % strd_sz);
+ else
+ strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >>
+ MLX5_MPRQ_STRIDE_NUM_SHIFT;
MLX5_ASSERT(strd_cnt);
consumed_strd += strd_cnt;
if (byte_cnt & MLX5_MPRQ_FILLER_MASK)
continue;
- if (mcqe == NULL) {
- rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res);
- strd_idx = rte_be_to_cpu_16(cqe->wqe_counter);
- } else {
- /* mini-CQE for MPRQ doesn't have hash result. */
- strd_idx = rte_be_to_cpu_16(mcqe->stride_idx);
- }
+ strd_idx = rte_be_to_cpu_16(mcqe == NULL ?
+ cqe->wqe_counter :
+ mcqe->stride_idx);
MLX5_ASSERT(strd_idx < strd_n);
MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) &
wq_mask));
break;
}
}
- rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
+ rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe);
if (cqe->lro_num_seg > 1) {
mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt, uint8_t *),
- cqe, len);
+ cqe, mcqe, rxq, len);
pkt->ol_flags |= PKT_RX_LRO;
pkt->tso_segsz = len / cqe->lro_num_seg;
}
unsigned int strd_scatter_en:1; /* Scattered packets from a stride. */
unsigned int lro:1; /* Enable LRO. */
unsigned int dynf_meta:1; /* Dynamic metadata is configured. */
+ unsigned int mcqe_format:3; /* Dynamic metadata is configured. */
volatile uint32_t *rq_db;
volatile uint32_t *cq_db;
uint16_t port_id;
uint32_t rq_pi;
uint32_t cq_ci;
uint16_t rq_repl_thresh; /* Threshold for buffer replenishment. */
+ uint32_t byte_mask;
union {
struct rxq_zip zip; /* Compressed context. */
uint16_t decompressed;
const vector unsigned short rxdf_sel_mask =
(vector unsigned short){
0xffff, 0xffff, 0, 0, 0, 0xffff, 0, 0};
- const uint32_t flow_tag = t_pkt->hash.fdir.hi;
+ vector unsigned char ol_flags = (vector unsigned char){0};
+ vector unsigned char ol_flags_mask = (vector unsigned char){0};
unsigned int pos;
unsigned int i;
unsigned int inv = 0;
vec_sel((vector unsigned long)shmask,
(vector unsigned long)invalid_mask, shmask);
- mcqe1 = (vector unsigned char)
+ byte_cnt = (vector unsigned char)
+ vec_sel((vector unsigned short)
vec_sro((vector unsigned short)mcqe1,
(vector unsigned char){32}),
- byte_cnt = (vector unsigned char)
- vec_sel((vector unsigned short)mcqe1,
(vector unsigned short)mcqe2, mcqe_sel_mask);
byte_cnt = vec_perm(byte_cnt, zero, len_shuf_mask);
byte_cnt = (vector unsigned char)
#endif
if (rxq->mark) {
- /* E.1 store flow tag (rte_flow mark). */
- elts[pos]->hash.fdir.hi = flow_tag;
- elts[pos + 1]->hash.fdir.hi = flow_tag;
- elts[pos + 2]->hash.fdir.hi = flow_tag;
- elts[pos + 3]->hash.fdir.hi = flow_tag;
+ if (rxq->mcqe_format !=
+ MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) {
+ const uint32_t flow_tag = t_pkt->hash.fdir.hi;
+
+ /* E.1 store flow tag (rte_flow mark). */
+ elts[pos]->hash.fdir.hi = flow_tag;
+ elts[pos + 1]->hash.fdir.hi = flow_tag;
+ elts[pos + 2]->hash.fdir.hi = flow_tag;
+ elts[pos + 3]->hash.fdir.hi = flow_tag;
+ } else {
+ const vector unsigned char flow_mark_adj =
+ (vector unsigned char)
+ (vector unsigned int){
+ -1, -1, -1, -1};
+ const vector unsigned char flow_mark_shuf =
+ (vector unsigned char){
+ -1, -1, -1, -1,
+ -1, -1, -1, -1,
+ 12, 8, 9, -1,
+ 4, 0, 1, -1};
+ const vector unsigned char ft_mask =
+ (vector unsigned char)
+ (vector unsigned int){
+ 0xffffff00, 0xffffff00,
+ 0xffffff00, 0xffffff00};
+ const vector unsigned char fdir_flags =
+ (vector unsigned char)
+ (vector unsigned int){
+ PKT_RX_FDIR, PKT_RX_FDIR,
+ PKT_RX_FDIR, PKT_RX_FDIR};
+ const vector unsigned char fdir_all_flags =
+ (vector unsigned char)
+ (vector unsigned int){
+ PKT_RX_FDIR | PKT_RX_FDIR_ID,
+ PKT_RX_FDIR | PKT_RX_FDIR_ID,
+ PKT_RX_FDIR | PKT_RX_FDIR_ID,
+ PKT_RX_FDIR | PKT_RX_FDIR_ID};
+ vector unsigned char fdir_id_flags =
+ (vector unsigned char)
+ (vector unsigned int){
+ PKT_RX_FDIR_ID, PKT_RX_FDIR_ID,
+ PKT_RX_FDIR_ID, PKT_RX_FDIR_ID};
+ /* Extract flow_tag field. */
+ vector unsigned char ftag0 = vec_perm(mcqe1,
+ zero, flow_mark_shuf);
+ vector unsigned char ftag1 = vec_perm(mcqe2,
+ zero, flow_mark_shuf);
+ vector unsigned char ftag =
+ (vector unsigned char)
+ vec_mergel((vector unsigned int)ftag0,
+ (vector unsigned int)ftag1);
+ vector unsigned char invalid_mask =
+ (vector unsigned char)
+ vec_cmpeq((vector unsigned int)ftag,
+ (vector unsigned int)zero);
+
+ ol_flags_mask = (vector unsigned char)
+ vec_or((vector unsigned long)
+ ol_flags_mask,
+ (vector unsigned long)fdir_all_flags);
+
+ /* Set PKT_RX_FDIR if flow tag is non-zero. */
+ invalid_mask = (vector unsigned char)
+ vec_cmpeq((vector unsigned int)ftag,
+ (vector unsigned int)zero);
+ ol_flags = (vector unsigned char)
+ vec_or((vector unsigned long)ol_flags,
+ (vector unsigned long)
+ vec_andc((vector unsigned long)
+ fdir_flags,
+ (vector unsigned long)invalid_mask));
+ ol_flags_mask = (vector unsigned char)
+ vec_or((vector unsigned long)
+ ol_flags_mask,
+ (vector unsigned long)fdir_flags);
+
+ /* Mask out invalid entries. */
+ fdir_id_flags = (vector unsigned char)
+ vec_andc((vector unsigned long)
+ fdir_id_flags,
+ (vector unsigned long)invalid_mask);
+
+ /* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */
+ ol_flags = (vector unsigned char)
+ vec_or((vector unsigned long)ol_flags,
+ (vector unsigned long)
+ vec_andc((vector unsigned long)
+ fdir_id_flags,
+ (vector unsigned long)
+ vec_cmpeq((vector unsigned int)ftag,
+ (vector unsigned int)ft_mask)));
+
+ ftag = (vector unsigned char)
+ ((vector unsigned int)ftag +
+ (vector unsigned int)flow_mark_adj);
+ elts[pos]->hash.fdir.hi =
+ ((vector unsigned int)ftag)[0];
+ elts[pos + 1]->hash.fdir.hi =
+ ((vector unsigned int)ftag)[1];
+ elts[pos + 2]->hash.fdir.hi =
+ ((vector unsigned int)ftag)[2];
+ elts[pos + 3]->hash.fdir.hi =
+ ((vector unsigned int)ftag)[3];
+ }
+ }
+ if (unlikely(rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH)) {
+ if (rxq->mcqe_format ==
+ MLX5_CQE_RESP_FORMAT_L34H_STRIDX) {
+ const uint8_t pkt_info =
+ (cq->pkt_info & 0x3) << 6;
+ const uint8_t pkt_hdr0 =
+ mcq[pos % 8].hdr_type;
+ const uint8_t pkt_hdr1 =
+ mcq[pos % 8 + 1].hdr_type;
+ const uint8_t pkt_hdr2 =
+ mcq[pos % 8 + 2].hdr_type;
+ const uint8_t pkt_hdr3 =
+ mcq[pos % 8 + 3].hdr_type;
+ const vector unsigned char vlan_mask =
+ (vector unsigned char)
+ (vector unsigned int) {
+ (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED),
+ (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED),
+ (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED),
+ (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED)};
+ const vector unsigned char cv_mask =
+ (vector unsigned char)
+ (vector unsigned int) {
+ MLX5_CQE_VLAN_STRIPPED,
+ MLX5_CQE_VLAN_STRIPPED,
+ MLX5_CQE_VLAN_STRIPPED,
+ MLX5_CQE_VLAN_STRIPPED};
+ vector unsigned char pkt_cv =
+ (vector unsigned char)
+ (vector unsigned int) {
+ pkt_hdr0 & 0x1, pkt_hdr1 & 0x1,
+ pkt_hdr2 & 0x1, pkt_hdr3 & 0x1};
+
+ ol_flags_mask = (vector unsigned char)
+ vec_or((vector unsigned long)
+ ol_flags_mask,
+ (vector unsigned long)vlan_mask);
+ ol_flags = (vector unsigned char)
+ vec_or((vector unsigned long)ol_flags,
+ (vector unsigned long)
+ vec_and((vector unsigned long)vlan_mask,
+ (vector unsigned long)
+ vec_cmpeq((vector unsigned int)pkt_cv,
+ (vector unsigned int)cv_mask)));
+ elts[pos]->packet_type =
+ mlx5_ptype_table[(pkt_hdr0 >> 2) |
+ pkt_info];
+ elts[pos + 1]->packet_type =
+ mlx5_ptype_table[(pkt_hdr1 >> 2) |
+ pkt_info];
+ elts[pos + 2]->packet_type =
+ mlx5_ptype_table[(pkt_hdr2 >> 2) |
+ pkt_info];
+ elts[pos + 3]->packet_type =
+ mlx5_ptype_table[(pkt_hdr3 >> 2) |
+ pkt_info];
+ if (rxq->tunnel) {
+ elts[pos]->packet_type |=
+ !!(((pkt_hdr0 >> 2) |
+ pkt_info) & (1 << 6));
+ elts[pos + 1]->packet_type |=
+ !!(((pkt_hdr1 >> 2) |
+ pkt_info) & (1 << 6));
+ elts[pos + 2]->packet_type |=
+ !!(((pkt_hdr2 >> 2) |
+ pkt_info) & (1 << 6));
+ elts[pos + 3]->packet_type |=
+ !!(((pkt_hdr3 >> 2) |
+ pkt_info) & (1 << 6));
+ }
+ }
+ const vector unsigned char hash_mask =
+ (vector unsigned char)(vector unsigned int) {
+ PKT_RX_RSS_HASH,
+ PKT_RX_RSS_HASH,
+ PKT_RX_RSS_HASH,
+ PKT_RX_RSS_HASH};
+ const vector unsigned char rearm_flags =
+ (vector unsigned char)(vector unsigned int) {
+ (uint32_t)t_pkt->ol_flags,
+ (uint32_t)t_pkt->ol_flags,
+ (uint32_t)t_pkt->ol_flags,
+ (uint32_t)t_pkt->ol_flags};
+
+ ol_flags_mask = (vector unsigned char)
+ vec_or((vector unsigned long)ol_flags_mask,
+ (vector unsigned long)hash_mask);
+ ol_flags = (vector unsigned char)
+ vec_or((vector unsigned long)ol_flags,
+ (vector unsigned long)
+ vec_andc((vector unsigned long)rearm_flags,
+ (vector unsigned long)ol_flags_mask));
+
+ elts[pos]->ol_flags =
+ ((vector unsigned int)ol_flags)[0];
+ elts[pos + 1]->ol_flags =
+ ((vector unsigned int)ol_flags)[1];
+ elts[pos + 2]->ol_flags =
+ ((vector unsigned int)ol_flags)[2];
+ elts[pos + 3]->ol_flags =
+ ((vector unsigned int)ol_flags)[3];
+ elts[pos]->hash.rss = 0;
+ elts[pos + 1]->hash.rss = 0;
+ elts[pos + 2]->hash.rss = 0;
+ elts[pos + 3]->hash.rss = 0;
}
if (rxq->dynf_meta) {
int32_t offs = rxq->flow_meta_offset;
rxq->crc_present * RTE_ETHER_CRC_LEN, 0,
0, 0
};
- const uint32_t flow_tag = t_pkt->hash.fdir.hi;
+ uint32x4_t ol_flags = {0, 0, 0, 0};
+ uint32x4_t ol_flags_mask = {0, 0, 0, 0};
#ifdef MLX5_PMD_SOFT_COUNTERS
uint32_t rcvd_byte = 0;
#endif
rcvd_byte += vget_lane_u64(vpaddl_u32(vpaddl_u16(byte_cnt)), 0);
#endif
if (rxq->mark) {
- /* E.1 store flow tag (rte_flow mark). */
- elts[pos]->hash.fdir.hi = flow_tag;
- elts[pos + 1]->hash.fdir.hi = flow_tag;
- elts[pos + 2]->hash.fdir.hi = flow_tag;
- elts[pos + 3]->hash.fdir.hi = flow_tag;
+ if (rxq->mcqe_format !=
+ MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) {
+ const uint32_t flow_tag = t_pkt->hash.fdir.hi;
+
+ /* E.1 store flow tag (rte_flow mark). */
+ elts[pos]->hash.fdir.hi = flow_tag;
+ elts[pos + 1]->hash.fdir.hi = flow_tag;
+ elts[pos + 2]->hash.fdir.hi = flow_tag;
+ elts[pos + 3]->hash.fdir.hi = flow_tag;
+ } else {
+ const uint32x4_t flow_mark_adj = {
+ -1, -1, -1, -1 };
+ const uint8x16_t flow_mark_shuf = {
+ 28, 24, 25, -1,
+ 20, 16, 17, -1,
+ 12, 8, 9, -1,
+ 4, 0, 1, -1};
+ /* Extract flow_tag field. */
+ const uint32x4_t ft_mask =
+ vdupq_n_u32(MLX5_FLOW_MARK_DEFAULT);
+ const uint32x4_t fdir_flags =
+ vdupq_n_u32(PKT_RX_FDIR);
+ const uint32x4_t fdir_all_flags =
+ vdupq_n_u32(PKT_RX_FDIR |
+ PKT_RX_FDIR_ID);
+ uint32x4_t fdir_id_flags =
+ vdupq_n_u32(PKT_RX_FDIR_ID);
+ uint32x4_t invalid_mask, ftag;
+
+ __asm__ volatile
+ /* A.1 load mCQEs into a 128bit register. */
+ ("ld1 {v16.16b - v17.16b}, [%[mcq]]\n\t"
+ /* Extract flow_tag. */
+ "tbl %[ftag].16b, {v16.16b - v17.16b}, %[flow_mark_shuf].16b\n\t"
+ : [ftag]"=&w"(ftag)
+ : [mcq]"r"(p),
+ [flow_mark_shuf]"w"(flow_mark_shuf)
+ : "memory", "v16", "v17");
+ invalid_mask = vceqzq_u32(ftag);
+ ol_flags_mask = vorrq_u32(ol_flags_mask,
+ fdir_all_flags);
+ /* Set PKT_RX_FDIR if flow tag is non-zero. */
+ ol_flags = vorrq_u32(ol_flags,
+ vbicq_u32(fdir_flags, invalid_mask));
+ /* Mask out invalid entries. */
+ fdir_id_flags = vbicq_u32(fdir_id_flags,
+ invalid_mask);
+ /* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */
+ ol_flags = vorrq_u32(ol_flags,
+ vbicq_u32(fdir_id_flags,
+ vceqq_u32(ftag, ft_mask)));
+ ftag = vaddq_u32(ftag, flow_mark_adj);
+ elts[pos]->hash.fdir.hi =
+ vgetq_lane_u32(ftag, 3);
+ elts[pos + 1]->hash.fdir.hi =
+ vgetq_lane_u32(ftag, 2);
+ elts[pos + 2]->hash.fdir.hi =
+ vgetq_lane_u32(ftag, 1);
+ elts[pos + 3]->hash.fdir.hi =
+ vgetq_lane_u32(ftag, 0);
+ }
+ }
+ if (unlikely(rxq->mcqe_format !=
+ MLX5_CQE_RESP_FORMAT_HASH)) {
+ if (rxq->mcqe_format ==
+ MLX5_CQE_RESP_FORMAT_L34H_STRIDX) {
+ const uint8_t pkt_info =
+ (cq->pkt_info & 0x3) << 6;
+ const uint8_t pkt_hdr0 =
+ mcq[pos % 8].hdr_type;
+ const uint8_t pkt_hdr1 =
+ mcq[pos % 8 + 1].hdr_type;
+ const uint8_t pkt_hdr2 =
+ mcq[pos % 8 + 2].hdr_type;
+ const uint8_t pkt_hdr3 =
+ mcq[pos % 8 + 3].hdr_type;
+ const uint32x4_t vlan_mask =
+ vdupq_n_u32(PKT_RX_VLAN |
+ PKT_RX_VLAN_STRIPPED);
+ const uint32x4_t cv_mask =
+ vdupq_n_u32(MLX5_CQE_VLAN_STRIPPED);
+ const uint32x4_t pkt_cv = {
+ pkt_hdr0 & 0x1, pkt_hdr1 & 0x1,
+ pkt_hdr2 & 0x1, pkt_hdr3 & 0x1};
+
+ ol_flags_mask = vorrq_u32(ol_flags_mask,
+ vlan_mask);
+ ol_flags = vorrq_u32(ol_flags,
+ vandq_u32(vlan_mask,
+ vceqq_u32(pkt_cv, cv_mask)));
+ elts[pos]->packet_type =
+ mlx5_ptype_table[(pkt_hdr0 >> 2) |
+ pkt_info];
+ elts[pos + 1]->packet_type =
+ mlx5_ptype_table[(pkt_hdr1 >> 2) |
+ pkt_info];
+ elts[pos + 2]->packet_type =
+ mlx5_ptype_table[(pkt_hdr2 >> 2) |
+ pkt_info];
+ elts[pos + 3]->packet_type =
+ mlx5_ptype_table[(pkt_hdr3 >> 2) |
+ pkt_info];
+ if (rxq->tunnel) {
+ elts[pos]->packet_type |=
+ !!(((pkt_hdr0 >> 2) |
+ pkt_info) & (1 << 6));
+ elts[pos + 1]->packet_type |=
+ !!(((pkt_hdr1 >> 2) |
+ pkt_info) & (1 << 6));
+ elts[pos + 2]->packet_type |=
+ !!(((pkt_hdr2 >> 2) |
+ pkt_info) & (1 << 6));
+ elts[pos + 3]->packet_type |=
+ !!(((pkt_hdr3 >> 2) |
+ pkt_info) & (1 << 6));
+ }
+ }
+ const uint32x4_t hash_flags =
+ vdupq_n_u32(PKT_RX_RSS_HASH);
+ const uint32x4_t rearm_flags =
+ vdupq_n_u32((uint32_t)t_pkt->ol_flags);
+
+ ol_flags_mask = vorrq_u32(ol_flags_mask, hash_flags);
+ ol_flags = vorrq_u32(ol_flags,
+ vbicq_u32(rearm_flags, ol_flags_mask));
+ elts[pos]->ol_flags = vgetq_lane_u32(ol_flags, 3);
+ elts[pos + 1]->ol_flags = vgetq_lane_u32(ol_flags, 2);
+ elts[pos + 2]->ol_flags = vgetq_lane_u32(ol_flags, 1);
+ elts[pos + 3]->ol_flags = vgetq_lane_u32(ol_flags, 0);
+ elts[pos]->hash.rss = 0;
+ elts[pos + 1]->hash.rss = 0;
+ elts[pos + 2]->hash.rss = 0;
+ elts[pos + 3]->hash.rss = 0;
}
if (rxq->dynf_meta) {
int32_t offs = rxq->flow_meta_offset;
0,
rxq->crc_present * RTE_ETHER_CRC_LEN,
0, 0);
- const uint32_t flow_tag = t_pkt->hash.fdir.hi;
+ __m128i ol_flags = _mm_setzero_si128();
+ __m128i ol_flags_mask = _mm_setzero_si128();
#ifdef MLX5_PMD_SOFT_COUNTERS
const __m128i zero = _mm_setzero_si128();
const __m128i ones = _mm_cmpeq_epi32(zero, zero);
(mcqe_n - pos) *
sizeof(uint16_t) * 8);
invalid_mask = _mm_sll_epi64(ones, invalid_mask);
- mcqe1 = _mm_srli_si128(mcqe1, 4);
- byte_cnt = _mm_blend_epi16(mcqe1, mcqe2, 0xcc);
+ byte_cnt = _mm_blend_epi16(_mm_srli_si128(mcqe1, 4),
+ mcqe2, 0xcc);
byte_cnt = _mm_shuffle_epi8(byte_cnt, len_shuf_mask);
byte_cnt = _mm_andnot_si128(invalid_mask, byte_cnt);
byte_cnt = _mm_hadd_epi16(byte_cnt, zero);
rcvd_byte += _mm_cvtsi128_si64(_mm_hadd_epi16(byte_cnt, zero));
#endif
if (rxq->mark) {
- /* E.1 store flow tag (rte_flow mark). */
- elts[pos]->hash.fdir.hi = flow_tag;
- elts[pos + 1]->hash.fdir.hi = flow_tag;
- elts[pos + 2]->hash.fdir.hi = flow_tag;
- elts[pos + 3]->hash.fdir.hi = flow_tag;
+ if (rxq->mcqe_format !=
+ MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) {
+ const uint32_t flow_tag = t_pkt->hash.fdir.hi;
+
+ /* E.1 store flow tag (rte_flow mark). */
+ elts[pos]->hash.fdir.hi = flow_tag;
+ elts[pos + 1]->hash.fdir.hi = flow_tag;
+ elts[pos + 2]->hash.fdir.hi = flow_tag;
+ elts[pos + 3]->hash.fdir.hi = flow_tag;
+ } else {
+ const __m128i flow_mark_adj =
+ _mm_set_epi32(-1, -1, -1, -1);
+ const __m128i flow_mark_shuf =
+ _mm_set_epi8(-1, 1, 0, 4,
+ -1, 9, 8, 12,
+ -1, -1, -1, -1,
+ -1, -1, -1, -1);
+ const __m128i ft_mask =
+ _mm_set1_epi32(0xffffff00);
+ const __m128i fdir_flags =
+ _mm_set1_epi32(PKT_RX_FDIR);
+ const __m128i fdir_all_flags =
+ _mm_set1_epi32(PKT_RX_FDIR |
+ PKT_RX_FDIR_ID);
+ __m128i fdir_id_flags =
+ _mm_set1_epi32(PKT_RX_FDIR_ID);
+
+ /* Extract flow_tag field. */
+ __m128i ftag0 =
+ _mm_shuffle_epi8(mcqe1, flow_mark_shuf);
+ __m128i ftag1 =
+ _mm_shuffle_epi8(mcqe2, flow_mark_shuf);
+ __m128i ftag =
+ _mm_unpackhi_epi64(ftag0, ftag1);
+ __m128i invalid_mask =
+ _mm_cmpeq_epi32(ftag, zero);
+
+ ol_flags_mask = _mm_or_si128(ol_flags_mask,
+ fdir_all_flags);
+ /* Set PKT_RX_FDIR if flow tag is non-zero. */
+ ol_flags = _mm_or_si128(ol_flags,
+ _mm_andnot_si128(invalid_mask,
+ fdir_flags));
+ /* Mask out invalid entries. */
+ fdir_id_flags = _mm_andnot_si128(invalid_mask,
+ fdir_id_flags);
+ /* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */
+ ol_flags = _mm_or_si128(ol_flags,
+ _mm_andnot_si128(_mm_cmpeq_epi32(ftag,
+ ft_mask),
+ fdir_id_flags));
+ ftag = _mm_add_epi32(ftag, flow_mark_adj);
+ elts[pos]->hash.fdir.hi =
+ _mm_extract_epi32(ftag, 0);
+ elts[pos + 1]->hash.fdir.hi =
+ _mm_extract_epi32(ftag, 1);
+ elts[pos + 2]->hash.fdir.hi =
+ _mm_extract_epi32(ftag, 2);
+ elts[pos + 3]->hash.fdir.hi =
+ _mm_extract_epi32(ftag, 3);
+ }
+ }
+ if (unlikely(rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH)) {
+ if (rxq->mcqe_format ==
+ MLX5_CQE_RESP_FORMAT_L34H_STRIDX) {
+ const uint8_t pkt_info =
+ (cq->pkt_info & 0x3) << 6;
+ const uint8_t pkt_hdr0 =
+ _mm_extract_epi8(mcqe1, 0);
+ const uint8_t pkt_hdr1 =
+ _mm_extract_epi8(mcqe1, 8);
+ const uint8_t pkt_hdr2 =
+ _mm_extract_epi8(mcqe2, 0);
+ const uint8_t pkt_hdr3 =
+ _mm_extract_epi8(mcqe2, 8);
+ const __m128i vlan_mask =
+ _mm_set1_epi32(PKT_RX_VLAN |
+ PKT_RX_VLAN_STRIPPED);
+ const __m128i cv_mask =
+ _mm_set1_epi32(MLX5_CQE_VLAN_STRIPPED);
+ const __m128i pkt_cv =
+ _mm_set_epi32(pkt_hdr0 & 0x1,
+ pkt_hdr1 & 0x1,
+ pkt_hdr2 & 0x1,
+ pkt_hdr3 & 0x1);
+
+ ol_flags_mask = _mm_or_si128(ol_flags_mask,
+ vlan_mask);
+ ol_flags = _mm_or_si128(ol_flags,
+ _mm_and_si128(_mm_cmpeq_epi32(pkt_cv,
+ cv_mask), vlan_mask));
+ elts[pos]->packet_type =
+ mlx5_ptype_table[(pkt_hdr0 >> 2) |
+ pkt_info];
+ elts[pos + 1]->packet_type =
+ mlx5_ptype_table[(pkt_hdr1 >> 2) |
+ pkt_info];
+ elts[pos + 2]->packet_type =
+ mlx5_ptype_table[(pkt_hdr2 >> 2) |
+ pkt_info];
+ elts[pos + 3]->packet_type =
+ mlx5_ptype_table[(pkt_hdr3 >> 2) |
+ pkt_info];
+ if (rxq->tunnel) {
+ elts[pos]->packet_type |=
+ !!(((pkt_hdr0 >> 2) |
+ pkt_info) & (1 << 6));
+ elts[pos + 1]->packet_type |=
+ !!(((pkt_hdr1 >> 2) |
+ pkt_info) & (1 << 6));
+ elts[pos + 2]->packet_type |=
+ !!(((pkt_hdr2 >> 2) |
+ pkt_info) & (1 << 6));
+ elts[pos + 3]->packet_type |=
+ !!(((pkt_hdr3 >> 2) |
+ pkt_info) & (1 << 6));
+ }
+ }
+ const __m128i hash_flags =
+ _mm_set1_epi32(PKT_RX_RSS_HASH);
+ const __m128i rearm_flags =
+ _mm_set1_epi32((uint32_t)t_pkt->ol_flags);
+
+ ol_flags_mask = _mm_or_si128(ol_flags_mask, hash_flags);
+ ol_flags = _mm_or_si128(ol_flags,
+ _mm_andnot_si128(ol_flags_mask, rearm_flags));
+ elts[pos]->ol_flags =
+ _mm_extract_epi32(ol_flags, 0);
+ elts[pos + 1]->ol_flags =
+ _mm_extract_epi32(ol_flags, 1);
+ elts[pos + 2]->ol_flags =
+ _mm_extract_epi32(ol_flags, 2);
+ elts[pos + 3]->ol_flags =
+ _mm_extract_epi32(ol_flags, 3);
+ elts[pos]->hash.rss = 0;
+ elts[pos + 1]->hash.rss = 0;
+ elts[pos + 2]->hash.rss = 0;
+ elts[pos + 3]->hash.rss = 0;
}
if (rxq->dynf_meta) {
int32_t offs = rxq->flow_meta_offset;
rxq->hw_timestamp * rxq->timestamp_rx_flag);
__m128i cv_flags;
const __m128i zero = _mm_setzero_si128();
- const __m128i ptype_mask =
- _mm_set_epi32(0xfd06, 0xfd06, 0xfd06, 0xfd06);
- const __m128i ptype_ol_mask =
- _mm_set_epi32(0x106, 0x106, 0x106, 0x106);
- const __m128i pinfo_mask =
- _mm_set_epi32(0x3, 0x3, 0x3, 0x3);
+ const __m128i ptype_mask = _mm_set1_epi32(0xfd06);
+ const __m128i ptype_ol_mask = _mm_set1_epi32(0x106);
+ const __m128i pinfo_mask = _mm_set1_epi32(0x3);
const __m128i cv_flag_sel =
_mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0,
(uint8_t)((PKT_RX_IP_CKSUM_GOOD |
(uint8_t)(PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED),
0);
const __m128i cv_mask =
- _mm_set_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
- PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
- PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
- PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
+ _mm_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
const __m128i mbuf_init =
_mm_load_si128((__m128i *)&rxq->mbuf_initializer);