if (mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
+ struct rte_pktmbuf_pool_private *priv =
+ (struct rte_pktmbuf_pool_private *)
+ rte_mempool_get_priv(rxq_ctrl->rxq.mp);
int j;
/* Initialize default rearm_data for vPMD. */
rte_mbuf_refcnt_set(mbuf_init, 1);
mbuf_init->nb_segs = 1;
mbuf_init->port = rxq->port_id;
+ if (priv->flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF)
+ mbuf_init->ol_flags = EXT_ATTACHED_MBUF;
/*
* prevent compiler reordering:
* rearm_data covers previous fields.
*/
rte_compiler_barrier();
rxq->mbuf_initializer =
- *(uint64_t *)&mbuf_init->rearm_data;
+ *(rte_xmm_t *)&mbuf_init->rearm_data;
/* Padding with a fake mbuf for vectorized Rx. */
for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
(*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
}
pkt = seg;
assert(len >= (rxq->crc_present << 2));
- pkt->ol_flags = 0;
+ pkt->ol_flags &= EXT_ATTACHED_MBUF;
/* If compressed, take hash result from mini-CQE. */
rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ?
cqe->rx_hash_res :
struct mlx5_mprq_buf *mprq_repl; /* Stashed mbuf for replenish. */
uint16_t idx; /* Queue index. */
struct mlx5_rxq_stats stats;
- uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */
+ rte_xmm_t mbuf_initializer; /* Default rearm/flags for vectorized Rx. */
struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
void *cq_uar; /* CQ user access region. */
uint32_t cqn; /* CQ number. */
void *buf_addr;
/*
- * Load the virtual address for Rx WQE. non-x86 processors
- * (mostly RISC such as ARM and Power) are more vulnerable to
- * load stall. For x86, reducing the number of instructions
- * seems to matter most.
+ * In order to support the mbufs with external attached
+ * data buffer we should use the buf_addr pointer instead of
+ * rte_mbuf_buf_addr(). It touches the mbuf itself and may
+ * impact the performance.
*/
-#ifdef RTE_ARCH_X86_64
buf_addr = elts[i]->buf_addr;
- assert(buf_addr == rte_mbuf_buf_addr(elts[i], rxq->mp));
-#else
- buf_addr = rte_mbuf_buf_addr(elts[i], rxq->mp);
- assert(buf_addr == elts[i]->buf_addr);
-#endif
wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
RTE_PKTMBUF_HEADROOM);
/* If there's only one MR, no need to replace LKey in WQE. */
PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED};
const vector unsigned char mbuf_init =
- (vector unsigned char)(vector unsigned long){
- *(__attribute__((__aligned__(8))) unsigned long *)
- &rxq->mbuf_initializer, 0LL};
+ (vector unsigned char)vec_vsx_ld
+ (0, (vector unsigned char *)&rxq->mbuf_initializer);
const vector unsigned short rearm_sel_mask =
(vector unsigned short){0, 0, 0, 0, 0xffff, 0xffff, 0, 0};
vector unsigned char rearm0, rearm1, rearm2, rearm3;
const uint32x4_t cv_mask =
vdupq_n_u32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
- const uint64x1_t mbuf_init = vld1_u64(&rxq->mbuf_initializer);
- const uint64x1_t r32_mask = vcreate_u64(0xffffffff);
+ const uint64x2_t mbuf_init = vld1q_u64
+ ((const uint64_t *)&rxq->mbuf_initializer);
uint64x2_t rearm0, rearm1, rearm2, rearm3;
uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3;
/* Merge to ol_flags. */
ol_flags = vorrq_u32(ol_flags, cv_flags);
/* Merge mbuf_init and ol_flags, and store. */
- rearm0 = vcombine_u64(mbuf_init,
- vshr_n_u64(vget_high_u64(vreinterpretq_u64_u32(
- ol_flags)), 32));
- rearm1 = vcombine_u64(mbuf_init,
- vand_u64(vget_high_u64(vreinterpretq_u64_u32(
- ol_flags)), r32_mask));
- rearm2 = vcombine_u64(mbuf_init,
- vshr_n_u64(vget_low_u64(vreinterpretq_u64_u32(
- ol_flags)), 32));
- rearm3 = vcombine_u64(mbuf_init,
- vand_u64(vget_low_u64(vreinterpretq_u64_u32(
- ol_flags)), r32_mask));
+ rearm0 = vreinterpretq_u64_u32(vsetq_lane_u32
+ (vgetq_lane_u32(ol_flags, 3),
+ vreinterpretq_u32_u64(mbuf_init), 2));
+ rearm1 = vreinterpretq_u64_u32(vsetq_lane_u32
+ (vgetq_lane_u32(ol_flags, 2),
+ vreinterpretq_u32_u64(mbuf_init), 2));
+ rearm2 = vreinterpretq_u64_u32(vsetq_lane_u32
+ (vgetq_lane_u32(ol_flags, 1),
+ vreinterpretq_u32_u64(mbuf_init), 2));
+ rearm3 = vreinterpretq_u64_u32(vsetq_lane_u32
+ (vgetq_lane_u32(ol_flags, 0),
+ vreinterpretq_u32_u64(mbuf_init), 2));
+
vst1q_u64((void *)&pkts[0]->rearm_data, rearm0);
vst1q_u64((void *)&pkts[1]->rearm_data, rearm1);
vst1q_u64((void *)&pkts[2]->rearm_data, rearm2);
PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
const __m128i mbuf_init =
- _mm_loadl_epi64((__m128i *)&rxq->mbuf_initializer);
+ _mm_load_si128((__m128i *)&rxq->mbuf_initializer);
__m128i rearm0, rearm1, rearm2, rearm3;
uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3;