/* Maximum number of special flows. */
#define MLX5_MAX_SPECIAL_FLOWS 4
-/* Request send completion once in every 64 sends, might be less. */
-#define MLX5_PMD_TX_PER_COMP_REQ 64
+/*
+ * Request TX completion every time descriptors reach this threshold since
+ * the previous request. Must be a power of two for performance reasons.
+ */
+#define MLX5_TX_COMP_THRESH 32
/* RSS Indirection table size. */
#define RSS_INDIRECTION_TABLE_SIZE 256
* Manage TX completions.
*
* When sending a burst, mlx5_tx_burst() posts several WRs.
- * To improve performance, a completion event is only required once every
- * MLX5_PMD_TX_PER_COMP_REQ sends. Doing so discards completion information
- * for other WRs, but this information would not be used anyway.
*
* @param txq
* Pointer to TX queue structure.
uint16_t elts_free = txq->elts_tail;
uint16_t elts_tail;
uint16_t cq_ci = txq->cq_ci;
- unsigned int wqe_ci = (unsigned int)-1;
+ volatile struct mlx5_cqe64 *cqe = NULL;
+ volatile union mlx5_wqe *wqe;
do {
- unsigned int idx = cq_ci & cqe_cnt;
- volatile struct mlx5_cqe64 *cqe = &(*txq->cqes)[idx].cqe64;
+ volatile struct mlx5_cqe64 *tmp;
- if (check_cqe64(cqe, cqe_n, cq_ci) == 1)
+ tmp = &(*txq->cqes)[cq_ci & cqe_cnt].cqe64;
+ if (check_cqe64(tmp, cqe_n, cq_ci))
break;
+ cqe = tmp;
#ifndef NDEBUG
if (MLX5_CQE_FORMAT(cqe->op_own) == MLX5_COMPRESSED) {
if (!check_cqe64_seen(cqe))
return;
}
#endif /* NDEBUG */
- wqe_ci = ntohs(cqe->wqe_counter);
++cq_ci;
} while (1);
- if (unlikely(wqe_ci == (unsigned int)-1))
+ if (unlikely(cqe == NULL))
return;
+ wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)];
+ elts_tail = wqe->wqe.ctrl.data[3];
+ assert(elts_tail < txq->wqe_n);
/* Free buffers. */
- elts_tail = (wqe_ci + 1) & (elts_n - 1);
- do {
+ while (elts_free != elts_tail) {
struct rte_mbuf *elt = (*txq->elts)[elts_free];
unsigned int elts_free_next =
(elts_free + 1) & (elts_n - 1);
/* Only one segment needs to be freed. */
rte_pktmbuf_free_seg(elt);
elts_free = elts_free_next;
- } while (elts_free != elts_tail);
+ }
txq->cq_ci = cq_ci;
txq->elts_tail = elts_tail;
/* Update the consumer index. */
const unsigned int elts_n = txq->elts_n;
unsigned int i;
unsigned int max;
+ unsigned int comp;
volatile union mlx5_wqe *wqe;
struct rte_mbuf *buf;
buf->vlan_tci);
else
mlx5_wqe_write(txq, wqe, addr, length, lkey);
- /* Request completion if needed. */
- if (unlikely(--txq->elts_comp == 0)) {
- wqe->wqe.ctrl.data[2] = htonl(8);
- txq->elts_comp = txq->elts_comp_cd_init;
- } else {
- wqe->wqe.ctrl.data[2] = 0;
- }
+ wqe->wqe.ctrl.data[2] = 0;
/* Should we enable HW CKSUM offload */
if (buf->ol_flags &
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
/* Take a shortcut if nothing must be sent. */
if (unlikely(i == 0))
return 0;
+ /* Check whether completion threshold has been reached. */
+ comp = txq->elts_comp + i;
+ if (comp >= MLX5_TX_COMP_THRESH) {
+ /* Request completion on last WQE. */
+ wqe->wqe.ctrl.data[2] = htonl(8);
+ /* Save elts_head in unused "immediate" field of WQE. */
+ wqe->wqe.ctrl.data[3] = elts_head;
+ txq->elts_comp = 0;
+ } else {
+ txq->elts_comp = comp;
+ }
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment sent packets counter. */
txq->stats.opackets += i;
struct txq {
uint16_t elts_head; /* Current index in (*elts)[]. */
uint16_t elts_tail; /* First element awaiting completion. */
- uint16_t elts_comp_cd_init; /* Initial value for countdown. */
- uint16_t elts_comp; /* Elements before asking a completion. */
+ uint16_t elts_comp; /* Counter since last completion request. */
uint16_t elts_n; /* (*elts)[] length. */
uint16_t cq_ci; /* Consumer index for completion queue. */
uint16_t cqe_n; /* Number of CQ elements. */
uint16_t wqe_n; /* Number of WQ elements. */
uint16_t bf_offset; /* Blueflame offset. */
uint16_t bf_buf_size; /* Blueflame size. */
+ uint32_t qp_num_8s; /* QP number shifted by 8. */
volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
volatile union mlx5_wqe (*wqes)[]; /* Work queue. */
volatile uint32_t *qp_db; /* Work queue doorbell. */
} mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
struct rte_mbuf *(*elts)[]; /* TX elements. */
struct mlx5_txq_stats stats; /* TX queue counters. */
- uint32_t qp_num_8s; /* QP number shifted by 8. */
} __rte_cache_aligned;
/* TX queue control descriptor. */
DEBUG("%p: allocated and configured %u WRs", (void *)txq_ctrl, elts_n);
txq_ctrl->txq.elts_head = 0;
txq_ctrl->txq.elts_tail = 0;
+ txq_ctrl->txq.elts_comp = 0;
}
/**
DEBUG("%p: freeing WRs", (void *)txq_ctrl);
txq_ctrl->txq.elts_head = 0;
txq_ctrl->txq.elts_tail = 0;
+ txq_ctrl->txq.elts_comp = 0;
while (elts_tail != elts_head) {
struct rte_mbuf *elt = (*elts)[elts_tail];
goto error;
}
(void)conf; /* Thresholds configuration (ignored). */
+ assert(desc > MLX5_TX_COMP_THRESH);
tmpl.txq.elts_n = desc;
- /*
- * Request send completion every MLX5_PMD_TX_PER_COMP_REQ packets or
- * at least 4 times per ring.
- */
- tmpl.txq.elts_comp_cd_init =
- ((MLX5_PMD_TX_PER_COMP_REQ < (desc / 4)) ?
- MLX5_PMD_TX_PER_COMP_REQ : (desc / 4));
- tmpl.txq.elts_comp = tmpl.txq.elts_comp_cd_init;
/* MRs will be registered in mp2mr[] later. */
attr.rd = (struct ibv_exp_res_domain_init_attr){
.comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
.res_domain = tmpl.rd,
};
tmpl.cq = ibv_exp_create_cq(priv->ctx,
- (desc / tmpl.txq.elts_comp_cd_init) - 1,
+ (((desc / MLX5_TX_COMP_THRESH) - 1) ?
+ ((desc / MLX5_TX_COMP_THRESH) - 1) : 1),
NULL, NULL, 0, &attr.cq);
if (tmpl.cq == NULL) {
ret = ENOMEM;
return -E_RTE_SECONDARY;
priv_lock(priv);
+ if (desc <= MLX5_TX_COMP_THRESH) {
+ WARN("%p: number of descriptors requested for TX queue %u"
+ " must be higher than MLX5_TX_COMP_THRESH, using"
+ " %u instead of %u",
+ (void *)dev, idx, MLX5_TX_COMP_THRESH + 1, desc);
+ desc = MLX5_TX_COMP_THRESH + 1;
+ }
if (!rte_is_power_of_2(desc)) {
desc = 1 << log2above(desc);
WARN("%p: increased number of descriptors in TX queue %u"