return n;
}
+/**
+ * Receive burst of packets. An errored completion also consumes a mbuf, but the
+ * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
+ * before returning to application.
+ *
+ * @param rxq
+ * Pointer to RX queue structure.
+ * @param[out] pkts
+ * Array to store received packets.
+ * @param pkts_n
+ * Maximum number of packets in array.
+ * @param[out] err
+ * Pointer to a flag. Set non-zero value if pkts array has at least one error
+ * packet to handle.
+ * @param[out] no_cq
+ * Pointer to a boolean. Set true if no new CQE seen.
+ *
+ * @return
+ * Number of packets received including errors (<= pkts_n).
+ */
+static inline uint16_t
+rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
+ uint16_t pkts_n, uint64_t *err, bool *no_cq)
+{
+ const uint16_t q_n = 1 << rxq->cqe_n;
+ const uint16_t q_mask = q_n - 1;
+ const uint16_t e_n = 1 << rxq->elts_n;
+ const uint16_t e_mask = e_n - 1;
+ volatile struct mlx5_cqe *cq;
+ struct rte_mbuf **elts;
+ uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
+ uint16_t nocmp_n = 0;
+ uint16_t rcvd_pkt = 0;
+ unsigned int cq_idx = rxq->cq_ci & q_mask;
+ unsigned int elts_idx;
+
+ MLX5_ASSERT(rxq->sges_n == 0);
+ MLX5_ASSERT(rxq->cqe_n == rxq->elts_n);
+ cq = &(*rxq->cqes)[cq_idx];
+ rte_prefetch0(cq);
+ rte_prefetch0(cq + 1);
+ rte_prefetch0(cq + 2);
+ rte_prefetch0(cq + 3);
+ pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
+ mlx5_rx_replenish_bulk_mbuf(rxq);
+ /* See if there're unreturned mbufs from compressed CQE. */
+ rcvd_pkt = rxq->decompressed;
+ if (rcvd_pkt > 0) {
+ rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
+ rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
+ pkts, rcvd_pkt);
+ rxq->rq_pi += rcvd_pkt;
+ rxq->decompressed -= rcvd_pkt;
+ pkts += rcvd_pkt;
+ }
+ elts_idx = rxq->rq_pi & e_mask;
+ elts = &(*rxq->elts)[elts_idx];
+ /* Not to overflow pkts array. */
+ pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);
+ /* Not to cross queue end. */
+ pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
+ pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
+ if (!pkts_n) {
+ *no_cq = !rcvd_pkt;
+ return rcvd_pkt;
+ }
+ /* At this point, there shouldn't be any remaining packets. */
+ MLX5_ASSERT(rxq->decompressed == 0);
+ /* Process all the CQEs */
+ nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
+ /* If no new CQE seen, return without updating cq_db. */
+ if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
+ *no_cq = true;
+ return rcvd_pkt;
+ }
+ /* Update the consumer indexes for non-compressed CQEs. */
+ MLX5_ASSERT(nocmp_n <= pkts_n);
+ rxq->cq_ci += nocmp_n;
+ rxq->rq_pi += nocmp_n;
+ rcvd_pkt += nocmp_n;
+ /* Decompress the last CQE if compressed. */
+ if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
+ MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
+ rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
+ &elts[nocmp_n]);
+ rxq->cq_ci += rxq->decompressed;
+ /* Return more packets if needed. */
+ if (nocmp_n < pkts_n) {
+ uint16_t n = rxq->decompressed;
+
+ n = RTE_MIN(n, pkts_n - nocmp_n);
+ rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],
+ &pkts[nocmp_n], n);
+ rxq->rq_pi += n;
+ rcvd_pkt += n;
+ rxq->decompressed -= n;
+ }
+ }
+ rte_io_wmb();
+ *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+ *no_cq = !rcvd_pkt;
+ return rcvd_pkt;
+}
+
/**
* DPDK callback for vectorized RX.
*
*
* @param rxq
* Pointer to RX queue structure.
- * @param n
- * Number of buffers to be replenished.
*/
static inline void
-mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n)
+mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
{
const uint16_t q_n = 1 << rxq->elts_n;
const uint16_t q_mask = q_n - 1;
+ uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
uint16_t elts_idx = rxq->rq_ci & q_mask;
struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
volatile struct mlx5_wqe_data_seg *wq =
&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
unsigned int i;
- MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
- MLX5_ASSERT(n <= (uint16_t)(q_n - (rxq->rq_ci - rxq->rq_pi)));
- MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
- MLX5_VPMD_DESCS_PER_LOOP);
- /* Not to cross queue end. */
- n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
- if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
- rxq->stats.rx_nombuf += n;
- return;
- }
- for (i = 0; i < n; ++i) {
- void *buf_addr;
+ if (n >= rxq->rq_repl_thresh) {
+ MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
+ MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
+ MLX5_VPMD_DESCS_PER_LOOP);
+ /* Not to cross queue end. */
+ n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
+ if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
+ rxq->stats.rx_nombuf += n;
+ return;
+ }
+ for (i = 0; i < n; ++i) {
+ void *buf_addr;
- /*
- * In order to support the mbufs with external attached
- * data buffer we should use the buf_addr pointer instead of
- * rte_mbuf_buf_addr(). It touches the mbuf itself and may
- * impact the performance.
- */
- buf_addr = elts[i]->buf_addr;
- wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
- RTE_PKTMBUF_HEADROOM);
- /* If there's only one MR, no need to replace LKey in WQE. */
- if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))
- wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
+ /*
+ * In order to support the mbufs with external attached
+ * data buffer we should use the buf_addr pointer
+ * instead of rte_mbuf_buf_addr(). It touches the mbuf
+ * itself and may impact the performance.
+ */
+ buf_addr = elts[i]->buf_addr;
+ wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
+ RTE_PKTMBUF_HEADROOM);
+ /* If there's a single MR, no need to replace LKey. */
+ if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh)
+ > 1))
+ wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
+ }
+ rxq->rq_ci += n;
+ /* Prevent overflowing into consumed mbufs. */
+ elts_idx = rxq->rq_ci & q_mask;
+ for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
+ (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
+ rte_io_wmb();
+ *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
}
- rxq->rq_ci += n;
- /* Prevent overflowing into consumed mbufs. */
- elts_idx = rxq->rq_ci & q_mask;
- for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
- (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
- rte_io_wmb();
- *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
}
#endif /* RTE_PMD_MLX5_RXTX_VEC_H_ */
/**
* Store free buffers to RX SW ring.
*
- * @param rxq
- * Pointer to RX queue structure.
+ * @param elts
+ * Pointer to SW ring to be filled.
* @param pkts
* Pointer to array of packets to be stored.
* @param pkts_n
* Number of packets to be stored.
*/
static inline void
-rxq_copy_mbuf_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t n)
+rxq_copy_mbuf_v(struct rte_mbuf **elts, struct rte_mbuf **pkts, uint16_t n)
{
- const uint16_t q_mask = (1 << rxq->elts_n) - 1;
- struct rte_mbuf **elts = &(*rxq->elts)[rxq->rq_pi & q_mask];
unsigned int pos;
uint16_t p = n & -2;
(vector unsigned char *)&pkts[3]->rearm_data);
}
-
/**
- * Receive burst of packets. An errored completion also consumes a mbuf, but the
- * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
- * before returning to application.
+ * Process a non-compressed completion and fill in mbufs in RX SW ring
+ * with data extracted from the title completion descriptor.
*
* @param rxq
* Pointer to RX queue structure.
+ * @param cq
+ * Pointer to completion array having a non-compressed completion at first.
+ * @param elts
+ * Pointer to SW ring to be filled. The first mbuf has to be pre-built from
+ * the title completion descriptor to be copied to the rest of mbufs.
* @param[out] pkts
* Array to store received packets.
* @param pkts_n
* @param[out] err
* Pointer to a flag. Set non-zero value if pkts array has at least one error
* packet to handle.
- * @param[out] no_cq
- * Pointer to a boolean. Set true if no new CQE seen.
+ * @param[out] comp
+ * Pointer to a index. Set it to the first compressed completion if any.
*
* @return
- * Number of packets received including errors (<= pkts_n).
+ * Number of CQEs successfully processed.
*/
static inline uint16_t
-rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
- uint64_t *err, bool *no_cq)
+rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
+ struct rte_mbuf **elts, struct rte_mbuf **pkts,
+ uint16_t pkts_n, uint64_t *err, uint64_t *comp)
{
const uint16_t q_n = 1 << rxq->cqe_n;
const uint16_t q_mask = q_n - 1;
- volatile struct mlx5_cqe *cq;
- struct rte_mbuf **elts;
unsigned int pos;
- uint64_t n;
- uint16_t repl_n;
+ uint64_t n = 0;
uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
uint16_t nocmp_n = 0;
- uint16_t rcvd_pkt = 0;
- unsigned int cq_idx = rxq->cq_ci & q_mask;
- unsigned int elts_idx;
unsigned int ownership = !!(rxq->cq_ci & (q_mask + 1));
const vector unsigned char zero = (vector unsigned char){0};
const vector unsigned char ones = vec_splat_u8(-1);
const vector unsigned short cqe_sel_mask2 =
(vector unsigned short){0, 0, 0xffff, 0, 0, 0, 0, 0};
- MLX5_ASSERT(rxq->sges_n == 0);
- MLX5_ASSERT(rxq->cqe_n == rxq->elts_n);
- cq = &(*rxq->cqes)[cq_idx];
- rte_prefetch0(cq);
- rte_prefetch0(cq + 1);
- rte_prefetch0(cq + 2);
- rte_prefetch0(cq + 3);
- pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
-
- repl_n = q_n - (rxq->rq_ci - rxq->rq_pi);
- if (repl_n >= rxq->rq_repl_thresh)
- mlx5_rx_replenish_bulk_mbuf(rxq, repl_n);
- /* See if there're unreturned mbufs from compressed CQE. */
- rcvd_pkt = rxq->decompressed;
- if (rcvd_pkt > 0) {
- rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
- rxq_copy_mbuf_v(rxq, pkts, rcvd_pkt);
- rxq->rq_pi += rcvd_pkt;
- rxq->decompressed -= rcvd_pkt;
- pkts += rcvd_pkt;
- }
- elts_idx = rxq->rq_pi & q_mask;
- elts = &(*rxq->elts)[elts_idx];
- /* Not to overflow pkts array. */
- pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);
- /* Not to cross queue end. */
- pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
- pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
- if (!pkts_n) {
- *no_cq = !rcvd_pkt;
- return rcvd_pkt;
- }
- /* At this point, there shouldn't be any remaining packets. */
- MLX5_ASSERT(rxq->decompressed == 0);
-
/*
* A. load first Qword (8bytes) in one loop.
* B. copy 4 mbuf pointers from elts ring to returing pkts.
if (n != MLX5_VPMD_DESCS_PER_LOOP)
break;
}
- /* If no new CQE seen, return without updating cq_db. */
- if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
- *no_cq = true;
- return rcvd_pkt;
- }
- /* Update the consumer indexes for non-compressed CQEs. */
- MLX5_ASSERT(nocmp_n <= pkts_n);
- rxq->cq_ci += nocmp_n;
- rxq->rq_pi += nocmp_n;
- rcvd_pkt += nocmp_n;
#ifdef MLX5_PMD_SOFT_COUNTERS
rxq->stats.ipackets += nocmp_n;
rxq->stats.ibytes += rcvd_byte;
#endif
- /* Decompress the last CQE if compressed. */
- if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n) {
- MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
- rxq->decompressed =
- rxq_cq_decompress_v(rxq, &cq[nocmp_n], &elts[nocmp_n]);
- /* Return more packets if needed. */
- if (nocmp_n < pkts_n) {
- uint16_t n = rxq->decompressed;
-
- n = RTE_MIN(n, pkts_n - nocmp_n);
- rxq_copy_mbuf_v(rxq, &pkts[nocmp_n], n);
- rxq->rq_pi += n;
- rcvd_pkt += n;
- rxq->decompressed -= n;
- }
- }
- rte_compiler_barrier();
- *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
- *no_cq = !rcvd_pkt;
- return rcvd_pkt;
+ if (comp_idx == n)
+ *comp = comp_idx;
+ return nocmp_n;
}
#endif /* RTE_PMD_MLX5_RXTX_VEC_ALTIVEC_H_ */
/**
* Store free buffers to RX SW ring.
*
- * @param rxq
- * Pointer to RX queue structure.
+ * @param elts
+ * Pointer to SW ring to be filled.
* @param pkts
* Pointer to array of packets to be stored.
* @param pkts_n
* Number of packets to be stored.
*/
static inline void
-rxq_copy_mbuf_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t n)
+rxq_copy_mbuf_v(struct rte_mbuf **elts, struct rte_mbuf **pkts, uint16_t n)
{
- const uint16_t q_mask = (1 << rxq->elts_n) - 1;
- struct rte_mbuf **elts = &(*rxq->elts)[rxq->rq_pi & q_mask];
unsigned int pos;
uint16_t p = n & -2;
}
/**
- * Receive burst of packets. An errored completion also consumes a mbuf, but the
- * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
- * before returning to application.
+ * Process a non-compressed completion and fill in mbufs in RX SW ring
+ * with data extracted from the title completion descriptor.
*
* @param rxq
* Pointer to RX queue structure.
+ * @param cq
+ * Pointer to completion array having a non-compressed completion at first.
+ * @param elts
+ * Pointer to SW ring to be filled. The first mbuf has to be pre-built from
+ * the title completion descriptor to be copied to the rest of mbufs.
* @param[out] pkts
* Array to store received packets.
* @param pkts_n
* @param[out] err
* Pointer to a flag. Set non-zero value if pkts array has at least one error
* packet to handle.
- * @param[out] no_cq
- * Pointer to a boolean. Set true if no new CQE seen.
+ * @param[out] comp
+ * Pointer to a index. Set it to the first compressed completion if any.
*
* @return
- * Number of packets received including errors (<= pkts_n).
+ * Number of CQEs successfully processed.
*/
static inline uint16_t
-rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
- uint64_t *err, bool *no_cq)
+rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
+ struct rte_mbuf **elts, struct rte_mbuf **pkts,
+ uint16_t pkts_n, uint64_t *err, uint64_t *comp)
{
const uint16_t q_n = 1 << rxq->cqe_n;
const uint16_t q_mask = q_n - 1;
- volatile struct mlx5_cqe *cq;
- struct rte_mbuf **elts;
unsigned int pos;
- uint64_t n;
- uint16_t repl_n;
+ uint64_t n = 0;
uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
uint16_t nocmp_n = 0;
- uint16_t rcvd_pkt = 0;
- unsigned int cq_idx = rxq->cq_ci & q_mask;
- unsigned int elts_idx;
const uint16x4_t ownership = vdup_n_u16(!(rxq->cq_ci & (q_mask + 1)));
const uint16x4_t owner_check = vcreate_u16(0x0001000100010001);
const uint16x4_t opcode_check = vcreate_u16(0x00f000f000f000f0);
};
const uint32x4_t flow_mark_adj = { 0, 0, 0, rxq->mark * (-1) };
- MLX5_ASSERT(rxq->sges_n == 0);
- MLX5_ASSERT(rxq->cqe_n == rxq->elts_n);
- cq = &(*rxq->cqes)[cq_idx];
- rte_prefetch_non_temporal(cq);
- rte_prefetch_non_temporal(cq + 1);
- rte_prefetch_non_temporal(cq + 2);
- rte_prefetch_non_temporal(cq + 3);
- pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
- repl_n = q_n - (rxq->rq_ci - rxq->rq_pi);
- if (repl_n >= rxq->rq_repl_thresh)
- mlx5_rx_replenish_bulk_mbuf(rxq, repl_n);
- /* See if there're unreturned mbufs from compressed CQE. */
- rcvd_pkt = rxq->decompressed;
- if (rcvd_pkt > 0) {
- rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
- rxq_copy_mbuf_v(rxq, pkts, rcvd_pkt);
- rxq->rq_pi += rcvd_pkt;
- pkts += rcvd_pkt;
- rxq->decompressed -= rcvd_pkt;
- }
- elts_idx = rxq->rq_pi & q_mask;
- elts = &(*rxq->elts)[elts_idx];
- /* Not to overflow pkts array. */
- pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);
- /* Not to cross queue end. */
- pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
- pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
- if (!pkts_n) {
- *no_cq = !rcvd_pkt;
- return rcvd_pkt;
- }
- /* At this point, there shouldn't be any remained packets. */
- MLX5_ASSERT(rxq->decompressed == 0);
/*
* Note that vectors have reverse order - {v3, v2, v1, v0}, because
* there's no instruction to count trailing zeros. __builtin_clzl() is
if (n != MLX5_VPMD_DESCS_PER_LOOP)
break;
}
- /* If no new CQE seen, return without updating cq_db. */
- if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
- *no_cq = true;
- return rcvd_pkt;
- }
- /* Update the consumer indexes for non-compressed CQEs. */
- MLX5_ASSERT(nocmp_n <= pkts_n);
- rxq->cq_ci += nocmp_n;
- rxq->rq_pi += nocmp_n;
- rcvd_pkt += nocmp_n;
#ifdef MLX5_PMD_SOFT_COUNTERS
rxq->stats.ipackets += nocmp_n;
rxq->stats.ibytes += rcvd_byte;
#endif
- /* Decompress the last CQE if compressed. */
- if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n) {
- MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
- rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
- &elts[nocmp_n]);
- /* Return more packets if needed. */
- if (nocmp_n < pkts_n) {
- uint16_t n = rxq->decompressed;
-
- n = RTE_MIN(n, pkts_n - nocmp_n);
- rxq_copy_mbuf_v(rxq, &pkts[nocmp_n], n);
- rxq->rq_pi += n;
- rcvd_pkt += n;
- rxq->decompressed -= n;
- }
- }
- rte_io_wmb();
- *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
- *no_cq = !rcvd_pkt;
- return rcvd_pkt;
+ if (comp_idx == n)
+ *comp = comp_idx;
+ return nocmp_n;
}
#endif /* RTE_PMD_MLX5_RXTX_VEC_NEON_H_ */
/**
* Store free buffers to RX SW ring.
*
- * @param rxq
- * Pointer to RX queue structure.
+ * @param elts
+ * Pointer to SW ring to be filled.
* @param pkts
* Pointer to array of packets to be stored.
* @param pkts_n
* Number of packets to be stored.
*/
static inline void
-rxq_copy_mbuf_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t n)
+rxq_copy_mbuf_v(struct rte_mbuf **elts, struct rte_mbuf **pkts, uint16_t n)
{
- const uint16_t q_mask = (1 << rxq->elts_n) - 1;
- struct rte_mbuf **elts = &(*rxq->elts)[rxq->rq_pi & q_mask];
unsigned int pos;
uint16_t p = n & -2;
rxq->stats.ipackets += mcqe_n;
rxq->stats.ibytes += rcvd_byte;
#endif
- rxq->cq_ci += mcqe_n;
return mcqe_n;
}
pinfo1 = _mm_unpackhi_epi32(cqes[2], cqes[3]);
ptype = _mm_unpacklo_epi64(pinfo0, pinfo1);
if (rxq->mark) {
- const __m128i pinfo_ft_mask =
- _mm_set_epi32(0xffffff00, 0xffffff00,
- 0xffffff00, 0xffffff00);
+ const __m128i pinfo_ft_mask = _mm_set1_epi32(0xffffff00);
const __m128i fdir_flags = _mm_set1_epi32(PKT_RX_FDIR);
__m128i fdir_id_flags = _mm_set1_epi32(PKT_RX_FDIR_ID);
__m128i flow_tag, invalid_mask;
}
/**
- * Receive burst of packets. An errored completion also consumes a mbuf, but the
- * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
- * before returning to application.
+ * Process a non-compressed completion and fill in mbufs in RX SW ring
+ * with data extracted from the title completion descriptor.
*
* @param rxq
* Pointer to RX queue structure.
+ * @param cq
+ * Pointer to completion array having a non-compressed completion at first.
+ * @param elts
+ * Pointer to SW ring to be filled. The first mbuf has to be pre-built from
+ * the title completion descriptor to be copied to the rest of mbufs.
* @param[out] pkts
* Array to store received packets.
* @param pkts_n
* @param[out] err
* Pointer to a flag. Set non-zero value if pkts array has at least one error
* packet to handle.
- * @param[out] no_cq
- * Pointer to a boolean. Set true if no new CQE seen.
+ * @param[out] comp
+ * Pointer to a index. Set it to the first compressed completion if any.
*
* @return
- * Number of packets received including errors (<= pkts_n).
+ * Number of CQEs successfully processed.
*/
static inline uint16_t
-rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
- uint64_t *err, bool *no_cq)
+rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
+ struct rte_mbuf **elts, struct rte_mbuf **pkts,
+ uint16_t pkts_n, uint64_t *err, uint64_t *comp)
{
const uint16_t q_n = 1 << rxq->cqe_n;
const uint16_t q_mask = q_n - 1;
- volatile struct mlx5_cqe *cq;
- struct rte_mbuf **elts;
unsigned int pos;
- uint64_t n;
- uint16_t repl_n;
+ uint64_t n = 0;
uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
uint16_t nocmp_n = 0;
- uint16_t rcvd_pkt = 0;
- unsigned int cq_idx = rxq->cq_ci & q_mask;
- unsigned int elts_idx;
unsigned int ownership = !!(rxq->cq_ci & (q_mask + 1));
- const __m128i owner_check =
- _mm_set_epi64x(0x0100000001000000LL, 0x0100000001000000LL);
- const __m128i opcode_check =
- _mm_set_epi64x(0xf0000000f0000000LL, 0xf0000000f0000000LL);
- const __m128i format_check =
- _mm_set_epi64x(0x0c0000000c000000LL, 0x0c0000000c000000LL);
- const __m128i resp_err_check =
- _mm_set_epi64x(0xe0000000e0000000LL, 0xe0000000e0000000LL);
+ const __m128i owner_check = _mm_set1_epi64x(0x0100000001000000LL);
+ const __m128i opcode_check = _mm_set1_epi64x(0xf0000000f0000000LL);
+ const __m128i format_check = _mm_set1_epi64x(0x0c0000000c000000LL);
+ const __m128i resp_err_check = _mm_set1_epi64x(0xe0000000e0000000LL);
#ifdef MLX5_PMD_SOFT_COUNTERS
uint32_t rcvd_byte = 0;
/* Mask to shuffle byte_cnt to add up stats. Do bswap16 for all. */
0,
rxq->crc_present * RTE_ETHER_CRC_LEN);
const __m128i flow_mark_adj = _mm_set_epi32(rxq->mark * (-1), 0, 0, 0);
-
- MLX5_ASSERT(rxq->sges_n == 0);
- MLX5_ASSERT(rxq->cqe_n == rxq->elts_n);
- cq = &(*rxq->cqes)[cq_idx];
- rte_prefetch0(cq);
- rte_prefetch0(cq + 1);
- rte_prefetch0(cq + 2);
- rte_prefetch0(cq + 3);
- pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
- repl_n = q_n - (rxq->rq_ci - rxq->rq_pi);
- if (repl_n >= rxq->rq_repl_thresh)
- mlx5_rx_replenish_bulk_mbuf(rxq, repl_n);
- /* See if there're unreturned mbufs from compressed CQE. */
- rcvd_pkt = rxq->decompressed;
- if (rcvd_pkt > 0) {
- rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
- rxq_copy_mbuf_v(rxq, pkts, rcvd_pkt);
- rxq->rq_pi += rcvd_pkt;
- rxq->decompressed -= rcvd_pkt;
- pkts += rcvd_pkt;
- }
- elts_idx = rxq->rq_pi & q_mask;
- elts = &(*rxq->elts)[elts_idx];
- /* Not to overflow pkts array. */
- pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);
- /* Not to cross queue end. */
- pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
- pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
- if (!pkts_n) {
- *no_cq = !rcvd_pkt;
- return rcvd_pkt;
- }
- /* At this point, there shouldn't be any remained packets. */
- MLX5_ASSERT(rxq->decompressed == 0);
/*
* A. load first Qword (8bytes) in one loop.
* B. copy 4 mbuf pointers from elts ring to returing pkts.
if (n != MLX5_VPMD_DESCS_PER_LOOP)
break;
}
- /* If no new CQE seen, return without updating cq_db. */
- if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
- *no_cq = true;
- return rcvd_pkt;
- }
- /* Update the consumer indexes for non-compressed CQEs. */
- MLX5_ASSERT(nocmp_n <= pkts_n);
- rxq->cq_ci += nocmp_n;
- rxq->rq_pi += nocmp_n;
- rcvd_pkt += nocmp_n;
#ifdef MLX5_PMD_SOFT_COUNTERS
rxq->stats.ipackets += nocmp_n;
rxq->stats.ibytes += rcvd_byte;
#endif
- /* Decompress the last CQE if compressed. */
- if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n) {
- MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
- rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
- &elts[nocmp_n]);
- /* Return more packets if needed. */
- if (nocmp_n < pkts_n) {
- uint16_t n = rxq->decompressed;
-
- n = RTE_MIN(n, pkts_n - nocmp_n);
- rxq_copy_mbuf_v(rxq, &pkts[nocmp_n], n);
- rxq->rq_pi += n;
- rcvd_pkt += n;
- rxq->decompressed -= n;
- }
- }
- rte_compiler_barrier();
- *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
- *no_cq = !rcvd_pkt;
- return rcvd_pkt;
+ if (comp_idx == n)
+ *comp = comp_idx;
+ return nocmp_n;
}
#endif /* RTE_PMD_MLX5_RXTX_VEC_SSE_H_ */