/* Maximum number of DS in WQE. */
#define MLX5_DSEG_MAX 63
+/* The completion mode offset in the WQE control segment line 2. */
+#define MLX5_COMP_MODE_OFFSET 2
+
+/* Completion mode. */
+enum mlx5_completion_mode {
+ MLX5_COMP_ONLY_ERR = 0x0,
+ MLX5_COMP_ONLY_FIRST_ERR = 0x1,
+ MLX5_COMP_ALWAYS = 0x2,
+ MLX5_COMP_CQE_AND_EQE = 0x3,
+};
+
/* Subset of struct mlx5_wqe_eth_seg. */
struct mlx5_wqe_eth_seg_small {
uint32_t rsvd0;
fclose(fd);
}
+/**
+ * Move QP from error state to running state.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param qp
+ * The qp pointer for recovery.
+ *
+ * @return
+ * 0 on success, else errno value.
+ */
+static int
+tx_recover_qp(struct mlx5_txq_data *txq, struct ibv_qp *qp)
+{
+ int ret;
+ struct ibv_qp_attr mod = {
+ .qp_state = IBV_QPS_RESET,
+ .port_num = 1,
+ };
+ ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE);
+ if (ret) {
+ DRV_LOG(ERR, "Cannot change the Tx QP state to RESET %d\n",
+ ret);
+ return ret;
+ }
+ mod.qp_state = IBV_QPS_INIT;
+ ret = mlx5_glue->modify_qp(qp, &mod,
+ (IBV_QP_STATE | IBV_QP_PORT));
+ if (ret) {
+ DRV_LOG(ERR, "Cannot change Tx QP state to INIT %d\n", ret);
+ return ret;
+ }
+ mod.qp_state = IBV_QPS_RTR;
+ ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE);
+ if (ret) {
+ DRV_LOG(ERR, "Cannot change Tx QP state to RTR %d\n", ret);
+ return ret;
+ }
+ mod.qp_state = IBV_QPS_RTS;
+ ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE);
+ if (ret) {
+ DRV_LOG(ERR, "Cannot change Tx QP state to RTS %d\n", ret);
+ return ret;
+ }
+ txq->wqe_ci = 0;
+ txq->wqe_pi = 0;
+ txq->elts_comp = 0;
+ return 0;
+}
+
+/* Return 1 if the error CQE is signed otherwise, sign it and return 0. */
+static int
+check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe)
+{
+ static const uint8_t magic[] = "seen";
+ int ret = 1;
+ unsigned int i;
+
+ for (i = 0; i < sizeof(magic); ++i)
+ if (!ret || err_cqe->rsvd1[i] != magic[i]) {
+ ret = 0;
+ err_cqe->rsvd1[i] = magic[i];
+ }
+ return ret;
+}
+
+/**
+ * Handle error CQE.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param error_cqe
+ * Pointer to the error CQE.
+ *
+ * @return
+ * The last Tx buffer element to free.
+ */
+uint16_t
+mlx5_tx_error_cqe_handle(struct mlx5_txq_data *txq,
+ volatile struct mlx5_err_cqe *err_cqe)
+{
+ if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) {
+ const uint16_t wqe_m = ((1 << txq->wqe_n) - 1);
+ struct mlx5_txq_ctrl *txq_ctrl =
+ container_of(txq, struct mlx5_txq_ctrl, txq);
+ uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter);
+ int seen = check_err_cqe_seen(err_cqe);
+
+ if (!seen && txq_ctrl->dump_file_n <
+ txq_ctrl->priv->config.max_dump_files_num) {
+ MKSTR(err_str, "Unexpected CQE error syndrome "
+ "0x%02x CQN = %u SQN = %u wqe_counter = %u "
+ "wq_ci = %u cq_ci = %u", err_cqe->syndrome,
+ txq_ctrl->cqn, txq->qp_num_8s >> 8,
+ rte_be_to_cpu_16(err_cqe->wqe_counter),
+ txq->wqe_ci, txq->cq_ci);
+ MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u",
+ PORT_ID(txq_ctrl->priv), txq->idx,
+ txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc());
+ mlx5_dump_debug_information(name, NULL, err_str, 0);
+ mlx5_dump_debug_information(name, "MLX5 Error CQ:",
+ (const void *)((uintptr_t)
+ &(*txq->cqes)[0]),
+ sizeof(*err_cqe) *
+ (1 << txq->cqe_n));
+ mlx5_dump_debug_information(name, "MLX5 Error SQ:",
+ (const void *)((uintptr_t)
+ tx_mlx5_wqe(txq, 0)),
+ MLX5_WQE_SIZE *
+ (1 << txq->wqe_n));
+ txq_ctrl->dump_file_n++;
+ }
+ if (!seen)
+ /*
+ * Count errors in WQEs units.
+ * Later it can be improved to count error packets,
+ * for example, by SQ parsing to find how much packets
+ * should be counted for each WQE.
+ */
+ txq->stats.oerrors += ((txq->wqe_ci & wqe_m) -
+ new_wqe_pi) & wqe_m;
+ if ((rte_eal_process_type() == RTE_PROC_PRIMARY) &&
+ tx_recover_qp(txq, txq_ctrl->ibv->qp) == 0) {
+ txq->cq_ci++;
+ /* Release all the remaining buffers. */
+ return txq->elts_head;
+ }
+ /* Recovering failed - try again later on the same WQE. */
+ } else {
+ txq->cq_ci++;
+ }
+ /* Do not release buffers. */
+ return txq->elts_tail;
+}
+
/**
* DPDK callback for TX.
*
wqe->ctrl = (rte_v128u32_t){
rte_cpu_to_be_32(txq->wqe_ci << 8),
rte_cpu_to_be_32(txq->qp_num_8s | 1),
- 0,
+ rte_cpu_to_be_32
+ (MLX5_COMP_ONLY_FIRST_ERR <<
+ MLX5_COMP_MODE_OFFSET),
0,
};
ds = 1;
rte_cpu_to_be_32((txq->wqe_ci << 8) |
MLX5_OPCODE_TSO),
rte_cpu_to_be_32(txq->qp_num_8s | ds),
- 0,
+ rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<
+ MLX5_COMP_MODE_OFFSET),
0,
};
wqe->eseg = (rte_v128u32_t){
rte_cpu_to_be_32((txq->wqe_ci << 8) |
MLX5_OPCODE_SEND),
rte_cpu_to_be_32(txq->qp_num_8s | ds),
- 0,
+ rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<
+ MLX5_COMP_MODE_OFFSET),
0,
};
wqe->eseg = (rte_v128u32_t){
/* A CQE slot must always be available. */
assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request completion on last WQE. */
- last_wqe->ctrl2 = rte_cpu_to_be_32(8);
+ last_wqe->ctrl2 = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
+ MLX5_COMP_MODE_OFFSET);
/* Save elts_head in unused "immediate" field of WQE. */
last_wqe->ctrl3 = txq->elts_head;
txq->elts_comp = 0;
mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
(txq->wqe_ci << 8) |
MLX5_OPCODE_TSO);
- mpw->wqe->ctrl[2] = 0;
+ mpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<
+ MLX5_COMP_MODE_OFFSET);
mpw->wqe->ctrl[3] = 0;
mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *)
(((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));
/* A CQE slot must always be available. */
assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request completion on last WQE. */
- wqe->ctrl[2] = rte_cpu_to_be_32(8);
+ wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
+ MLX5_COMP_MODE_OFFSET);
/* Save elts_head in unused "immediate" field of WQE. */
wqe->ctrl[3] = elts_head;
txq->elts_comp = 0;
mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
(txq->wqe_ci << 8) |
MLX5_OPCODE_TSO);
- mpw->wqe->ctrl[2] = 0;
+ mpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<
+ MLX5_COMP_MODE_OFFSET);
mpw->wqe->ctrl[3] = 0;
mpw->wqe->eseg.mss = rte_cpu_to_be_16(length);
mpw->wqe->eseg.inline_hdr_sz = 0;
/* A CQE slot must always be available. */
assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request completion on last WQE. */
- wqe->ctrl[2] = rte_cpu_to_be_32(8);
+ wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
+ MLX5_COMP_MODE_OFFSET);
/* Save elts_head in unused "immediate" field of WQE. */
wqe->ctrl[3] = elts_head;
txq->elts_comp = 0;
rte_cpu_to_be_32((MLX5_OPC_MOD_ENHANCED_MPSW << 24) |
(txq->wqe_ci << 8) |
MLX5_OPCODE_ENHANCED_MPSW);
- mpw->wqe->ctrl[2] = 0;
+ mpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<
+ MLX5_COMP_MODE_OFFSET);
mpw->wqe->ctrl[3] = 0;
memset((void *)(uintptr_t)&mpw->wqe->eseg, 0, MLX5_WQE_DWORD_SIZE);
if (unlikely(padding)) {
/* A CQE slot must always be available. */
assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request completion on last WQE. */
- wqe->ctrl[2] = rte_cpu_to_be_32(8);
+ wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
+ MLX5_COMP_MODE_OFFSET);
/* Save elts_head in unused "immediate" field of WQE. */
wqe->ctrl[3] = elts_head;
txq->elts_comp = 0;
struct mlx5_priv *priv; /* Back pointer to private data. */
off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
void *bf_reg; /* BlueFlame register from Verbs. */
+ uint32_t cqn; /* CQ number. */
+ uint16_t dump_file_n; /* Number of dump files. */
};
#define MLX5_TX_BFREG(txq) \
uint16_t pkts_n);
uint16_t mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t pkts_n);
+__rte_noinline uint16_t mlx5_tx_error_cqe_handle(struct mlx5_txq_data *txq,
+ volatile struct mlx5_err_cqe *err_cqe);
uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq);
__rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq,
return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
}
+/**
+ * Handle the next CQE.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ *
+ * @return
+ * The last Tx buffer element to free.
+ */
+static __rte_always_inline uint16_t
+mlx5_tx_cqe_handle(struct mlx5_txq_data *txq)
+{
+ const unsigned int cqe_n = 1 << txq->cqe_n;
+ const unsigned int cqe_cnt = cqe_n - 1;
+ uint16_t last_elts;
+ union {
+ volatile struct mlx5_cqe *cqe;
+ volatile struct mlx5_err_cqe *err_cqe;
+ } u = {
+ .cqe = &(*txq->cqes)[txq->cq_ci & cqe_cnt],
+ };
+ int ret = check_cqe(u.cqe, cqe_n, txq->cq_ci);
+
+ if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
+ if (unlikely(ret == MLX5_CQE_STATUS_ERR))
+ last_elts = mlx5_tx_error_cqe_handle(txq, u.err_cqe);
+ else
+ /* Do not release buffers. */
+ return txq->elts_tail;
+ } else {
+ uint16_t new_wqe_pi = rte_be_to_cpu_16(u.cqe->wqe_counter);
+ volatile struct mlx5_wqe_ctrl *ctrl =
+ (volatile struct mlx5_wqe_ctrl *)
+ tx_mlx5_wqe(txq, new_wqe_pi);
+
+ /* Release completion burst buffers. */
+ last_elts = ctrl->ctrl3;
+ txq->wqe_pi = new_wqe_pi;
+ txq->cq_ci++;
+ }
+ rte_compiler_barrier();
+ *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
+ return last_elts;
+}
+
/**
* Manage TX completions.
*
{
const uint16_t elts_n = 1 << txq->elts_n;
const uint16_t elts_m = elts_n - 1;
- const unsigned int cqe_n = 1 << txq->cqe_n;
- const unsigned int cqe_cnt = cqe_n - 1;
uint16_t elts_free = txq->elts_tail;
uint16_t elts_tail;
- uint16_t cq_ci = txq->cq_ci;
- volatile struct mlx5_cqe *cqe = NULL;
- volatile struct mlx5_wqe_ctrl *ctrl;
struct rte_mbuf *m, *free[elts_n];
struct rte_mempool *pool = NULL;
unsigned int blk_n = 0;
- cqe = &(*txq->cqes)[cq_ci & cqe_cnt];
- if (unlikely(check_cqe(cqe, cqe_n, cq_ci)))
- return;
-#ifndef NDEBUG
- if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) ||
- (MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) {
- if (!check_cqe_seen(cqe)) {
- DRV_LOG(ERR, "unexpected error CQE, Tx stopped");
- rte_hexdump(stderr, "MLX5 TXQ:",
- (const void *)((uintptr_t)txq->wqes),
- ((1 << txq->wqe_n) *
- MLX5_WQE_SIZE));
- }
- return;
- }
-#endif /* NDEBUG */
- ++cq_ci;
- rte_cio_rmb();
- txq->wqe_pi = rte_be_to_cpu_16(cqe->wqe_counter);
- ctrl = (volatile struct mlx5_wqe_ctrl *)
- tx_mlx5_wqe(txq, txq->wqe_pi);
- elts_tail = ctrl->ctrl3;
+ elts_tail = mlx5_tx_cqe_handle(txq);
assert((elts_tail & elts_m) < (1 << txq->wqe_n));
/* Free buffers. */
while (elts_free != elts_tail) {
++elts_free;
}
#endif
- txq->cq_ci = cq_ci;
txq->elts_tail = elts_tail;
- /* Update the consumer index. */
- rte_compiler_barrier();
- *txq->cq_db = rte_cpu_to_be_32(cq_ci);
}
/**
ctrl = vreinterpretq_u8_u32((uint32x4_t) {
MLX5_OPC_MOD_MPW << 24 |
txq->wqe_ci << 8 | MLX5_OPCODE_TSO,
- txq->qp_num_8s | ds, 0, 0});
+ txq->qp_num_8s | ds, 4, 0});
ctrl = vqtbl1q_u8(ctrl, ctrl_shuf_m);
vst1q_u8((void *)t_wqe, ctrl);
/* Fill ESEG in the header. */
if (txq->elts_comp >= MLX5_TX_COMP_THRESH) {
/* A CQE slot must always be available. */
assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
- wqe->ctrl[2] = rte_cpu_to_be_32(8);
+ wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
+ MLX5_COMP_MODE_OFFSET);
wqe->ctrl[3] = txq->elts_head;
txq->elts_comp = 0;
}
unsigned int pos;
uint16_t max_elts;
uint16_t max_wqe;
- uint32_t comp_req = 0;
+ uint32_t comp_req;
const uint16_t wq_n = 1 << txq->wqe_n;
const uint16_t wq_mask = wq_n - 1;
uint16_t wq_idx = txq->wqe_ci & wq_mask;
}
if (txq->elts_comp + pkts_n < MLX5_TX_COMP_THRESH) {
txq->elts_comp += pkts_n;
+ comp_req = MLX5_COMP_ONLY_FIRST_ERR << MLX5_COMP_MODE_OFFSET;
} else {
/* A CQE slot must always be available. */
assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request a completion. */
txq->elts_comp = 0;
- comp_req = 8;
+ comp_req = MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET;
}
/* Fill CTRL in the header. */
ctrl = vreinterpretq_u8_u32((uint32x4_t) {
} while (--segs_n);
++wqe_ci;
/* Fill CTRL in the header. */
- ctrl = _mm_set_epi32(0, 0, txq->qp_num_8s | ds,
+ ctrl = _mm_set_epi32(0, 4, txq->qp_num_8s | ds,
MLX5_OPC_MOD_MPW << 24 |
txq->wqe_ci << 8 | MLX5_OPCODE_TSO);
ctrl = _mm_shuffle_epi8(ctrl, shuf_mask_ctrl);
if (txq->elts_comp >= MLX5_TX_COMP_THRESH) {
/* A CQE slot must always be available. */
assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
- wqe->ctrl[2] = rte_cpu_to_be_32(8);
+ wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
+ MLX5_COMP_MODE_OFFSET);
wqe->ctrl[3] = txq->elts_head;
txq->elts_comp = 0;
}
unsigned int pos;
uint16_t max_elts;
uint16_t max_wqe;
- uint32_t comp_req = 0;
+ uint32_t comp_req;
const uint16_t wq_n = 1 << txq->wqe_n;
const uint16_t wq_mask = wq_n - 1;
uint16_t wq_idx = txq->wqe_ci & wq_mask;
}
if (txq->elts_comp + pkts_n < MLX5_TX_COMP_THRESH) {
txq->elts_comp += pkts_n;
+ comp_req = MLX5_COMP_ONLY_FIRST_ERR << MLX5_COMP_MODE_OFFSET;
} else {
/* A CQE slot must always be available. */
assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request a completion. */
txq->elts_comp = 0;
- comp_req = 8;
+ comp_req = MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET;
}
/* Fill CTRL in the header. */
ctrl = _mm_set_epi32(txq->elts_head, comp_req,
attr.cq = (struct ibv_cq_init_attr_ex){
.comp_mask = 0,
};
- cqe_n = ((desc / MLX5_TX_COMP_THRESH) - 1) ?
- ((desc / MLX5_TX_COMP_THRESH) - 1) : 1;
+ cqe_n = desc / MLX5_TX_COMP_THRESH + 1;
if (is_empw_burst_func(tx_pkt_burst))
cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV;
tmpl.cq = mlx5_glue->create_cq(priv->sh->ctx, cqe_n, NULL, NULL, 0);
txq_ibv->cq = tmpl.cq;
rte_atomic32_inc(&txq_ibv->refcnt);
txq_ctrl->bf_reg = qp.bf.reg;
+ txq_ctrl->cqn = cq_info.cqn;
txq_uar_init(txq_ctrl);
if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;