#include <rte_common.h>
#include <rte_branch_prediction.h>
#include <rte_ether.h>
+#include <rte_cycles.h>
#include "mlx5.h"
#include "mlx5_utils.h"
}
/**
- * DPDK callback to check the status of a rx descriptor.
+ * Internal function to compute the number of used descriptors in an RX queue
*
- * @param rx_queue
- * The rx queue.
- * @param[in] offset
- * The index of the descriptor in the ring.
+ * @param rxq
+ * The Rx queue.
*
* @return
- * The status of the tx descriptor.
+ * The number of used rx descriptor.
*/
-int
-mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
+static uint32_t
+rx_queue_count(struct mlx5_rxq_data *rxq)
{
- struct mlx5_rxq_data *rxq = rx_queue;
struct rxq_zip *zip = &rxq->zip;
volatile struct mlx5_cqe *cqe;
const unsigned int cqe_n = (1 << rxq->cqe_n);
cq_ci = rxq->cq_ci;
}
cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
- while (check_cqe(cqe, cqe_n, cq_ci) == 0) {
+ while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) {
int8_t op_own;
unsigned int n;
cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
}
used = RTE_MIN(used, (1U << rxq->elts_n) - 1);
- if (offset < used)
+ return used;
+}
+
+/**
+ * DPDK callback to check the status of a rx descriptor.
+ *
+ * @param rx_queue
+ * The Rx queue.
+ * @param[in] offset
+ * The index of the descriptor in the ring.
+ *
+ * @return
+ * The status of the tx descriptor.
+ */
+int
+mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
+{
+ struct mlx5_rxq_data *rxq = rx_queue;
+ struct mlx5_rxq_ctrl *rxq_ctrl =
+ container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+ struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv);
+
+ if (dev->rx_pkt_burst != mlx5_rx_burst) {
+ rte_errno = ENOTSUP;
+ return -rte_errno;
+ }
+ if (offset >= (1 << rxq->elts_n)) {
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ if (offset < rx_queue_count(rxq))
return RTE_ETH_RX_DESC_DONE;
return RTE_ETH_RX_DESC_AVAIL;
}
+/**
+ * DPDK callback to get the number of used descriptors in a RX queue
+ *
+ * @param dev
+ * Pointer to the device structure.
+ *
+ * @param rx_queue_id
+ * The Rx queue.
+ *
+ * @return
+ * The number of used rx descriptor.
+ * -EINVAL if the queue is invalid
+ */
+uint32_t
+mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_rxq_data *rxq;
+
+ if (dev->rx_pkt_burst != mlx5_rx_burst) {
+ rte_errno = ENOTSUP;
+ return -rte_errno;
+ }
+ rxq = (*priv->rxqs)[rx_queue_id];
+ if (!rxq) {
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ return rx_queue_count(rxq);
+}
+
+#define MLX5_SYSTEM_LOG_DIR "/var/log"
+/**
+ * Dump debug information to log file.
+ *
+ * @param fname
+ * The file name.
+ * @param hex_title
+ * If not NULL this string is printed as a header to the output
+ * and the output will be in hexadecimal view.
+ * @param buf
+ * This is the buffer address to print out.
+ * @param len
+ * The number of bytes to dump out.
+ */
+void
+mlx5_dump_debug_information(const char *fname, const char *hex_title,
+ const void *buf, unsigned int hex_len)
+{
+ FILE *fd;
+
+ MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname);
+ fd = fopen(path, "a+");
+ if (!fd) {
+ DRV_LOG(WARNING, "cannot open %s for debug dump\n",
+ path);
+ MKSTR(path2, "./%s", fname);
+ fd = fopen(path2, "a+");
+ if (!fd) {
+ DRV_LOG(ERR, "cannot open %s for debug dump\n",
+ path2);
+ return;
+ }
+ DRV_LOG(INFO, "New debug dump in file %s\n", path2);
+ } else {
+ DRV_LOG(INFO, "New debug dump in file %s\n", path);
+ }
+ if (hex_title)
+ rte_hexdump(fd, hex_title, buf, hex_len);
+ else
+ fprintf(fd, "%s", (const char *)buf);
+ fprintf(fd, "\n\n\n");
+ fclose(fd);
+}
+
/**
* DPDK callback for TX.
*
volatile struct mlx5_wqe_ctrl *last_wqe = NULL;
unsigned int segs_n = 0;
const unsigned int max_inline = txq->max_inline;
+ uint64_t addr_64;
if (unlikely(!pkts_n))
return 0;
/* Start processing. */
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
- /* A CQE slot must always be available. */
- assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
if (unlikely(!max_wqe))
return 0;
uint8_t tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG);
uint32_t swp_offsets = 0;
uint8_t swp_types = 0;
+ rte_be32_t metadata;
uint16_t tso_segsz = 0;
#ifdef MLX5_PMD_SOFT_COUNTERS
uint32_t total_length = 0;
cs_flags = txq_ol_cksum_to_cs(buf);
txq_mbuf_to_swp(txq, buf, (uint8_t *)&swp_offsets, &swp_types);
raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
+ /* Copy metadata from mbuf if valid */
+ metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
+ 0;
/* Replace the Ethernet type by the VLAN if necessary. */
if (buf->ol_flags & PKT_TX_VLAN_PKT) {
uint32_t vlan = rte_cpu_to_be_32(0x81000000 |
buf->vlan_tci);
- unsigned int len = 2 * ETHER_ADDR_LEN - 2;
+ unsigned int len = 2 * RTE_ETHER_ADDR_LEN - 2;
addr += 2;
length -= 2;
RTE_CACHE_LINE_SIZE);
copy_b = (addr_end > addr) ?
RTE_MIN((addr_end - addr), length) : 0;
- if (copy_b && ((end - (uintptr_t)raw) > copy_b)) {
+ if (copy_b && ((end - (uintptr_t)raw) >
+ (copy_b + sizeof(inl)))) {
/*
* One Dseg remains in the current WQE. To
* keep the computation positive, it is
ds = 3;
use_dseg:
/* Add the remaining packet as a simple ds. */
- addr = rte_cpu_to_be_64(addr);
+ addr_64 = rte_cpu_to_be_64(addr);
*dseg = (rte_v128u32_t){
rte_cpu_to_be_32(length),
mlx5_tx_mb2mr(txq, buf),
- addr,
- addr >> 32,
+ addr_64,
+ addr_64 >> 32,
};
++ds;
if (!segs_n)
total_length += length;
#endif
/* Store segment information. */
- addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
+ addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
*dseg = (rte_v128u32_t){
rte_cpu_to_be_32(length),
mlx5_tx_mb2mr(txq, buf),
- addr,
- addr >> 32,
+ addr_64,
+ addr_64 >> 32,
};
(*txq->elts)[++elts_head & elts_m] = buf;
if (--segs_n)
swp_offsets,
cs_flags | (swp_types << 8) |
(rte_cpu_to_be_16(tso_segsz) << 16),
- 0,
+ metadata,
(ehdr << 16) | rte_cpu_to_be_16(tso_header_sz),
};
} else {
wqe->eseg = (rte_v128u32_t){
swp_offsets,
cs_flags | (swp_types << 8),
- 0,
+ metadata,
(ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz),
};
}
/* Check whether completion threshold has been reached. */
comp = txq->elts_comp + i + j + k;
if (comp >= MLX5_TX_COMP_THRESH) {
+ /* A CQE slot must always be available. */
+ assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request completion on last WQE. */
last_wqe->ctrl2 = rte_cpu_to_be_32(8);
/* Save elts_head in unused "immediate" field of WQE. */
last_wqe->ctrl3 = txq->elts_head;
txq->elts_comp = 0;
-#ifndef NDEBUG
- ++txq->cq_pi;
-#endif
} else {
txq->elts_comp = comp;
}
mpw->wqe->eseg.inline_hdr_sz = 0;
mpw->wqe->eseg.rsvd0 = 0;
mpw->wqe->eseg.rsvd1 = 0;
- mpw->wqe->eseg.rsvd2 = 0;
+ mpw->wqe->eseg.flow_table_metadata = 0;
mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
(txq->wqe_ci << 8) |
MLX5_OPCODE_TSO);
/* Start processing. */
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
- /* A CQE slot must always be available. */
- assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
if (unlikely(!max_wqe))
return 0;
uint32_t length;
unsigned int segs_n = buf->nb_segs;
uint32_t cs_flags;
+ rte_be32_t metadata;
/*
* Make sure there is enough room to store this packet and
max_elts -= segs_n;
--pkts_n;
cs_flags = txq_ol_cksum_to_cs(buf);
+ /* Copy metadata from mbuf if valid */
+ metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
+ 0;
/* Retrieve packet information. */
length = PKT_LEN(buf);
assert(length);
if ((mpw.state == MLX5_MPW_STATE_OPENED) &&
((mpw.len != length) ||
(segs_n != 1) ||
+ (mpw.wqe->eseg.flow_table_metadata != metadata) ||
(mpw.wqe->eseg.cs_flags != cs_flags)))
mlx5_mpw_close(txq, &mpw);
if (mpw.state == MLX5_MPW_STATE_CLOSED) {
max_wqe -= 2;
mlx5_mpw_new(txq, &mpw, length);
mpw.wqe->eseg.cs_flags = cs_flags;
+ mpw.wqe->eseg.flow_table_metadata = metadata;
}
/* Multi-segment packets must be alone in their MPW. */
assert((segs_n == 1) || (mpw.pkts_n == 0));
if (comp >= MLX5_TX_COMP_THRESH) {
volatile struct mlx5_wqe *wqe = mpw.wqe;
+ /* A CQE slot must always be available. */
+ assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request completion on last WQE. */
wqe->ctrl[2] = rte_cpu_to_be_32(8);
/* Save elts_head in unused "immediate" field of WQE. */
wqe->ctrl[3] = elts_head;
txq->elts_comp = 0;
-#ifndef NDEBUG
- ++txq->cq_pi;
-#endif
} else {
txq->elts_comp = comp;
}
mpw->wqe->eseg.cs_flags = 0;
mpw->wqe->eseg.rsvd0 = 0;
mpw->wqe->eseg.rsvd1 = 0;
- mpw->wqe->eseg.rsvd2 = 0;
+ mpw->wqe->eseg.flow_table_metadata = 0;
inl = (struct mlx5_wqe_inl_small *)
(((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE);
mpw->data.raw = (uint8_t *)&inl->raw;
/* Start processing. */
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
- /* A CQE slot must always be available. */
- assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
do {
struct rte_mbuf *buf = *(pkts++);
uintptr_t addr;
uint32_t length;
unsigned int segs_n = buf->nb_segs;
uint8_t cs_flags;
+ rte_be32_t metadata;
/*
* Make sure there is enough room to store this packet and
*/
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
cs_flags = txq_ol_cksum_to_cs(buf);
+ /* Copy metadata from mbuf if valid */
+ metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
+ 0;
/* Retrieve packet information. */
length = PKT_LEN(buf);
/* Start new session if packet differs. */
if (mpw.state == MLX5_MPW_STATE_OPENED) {
if ((mpw.len != length) ||
(segs_n != 1) ||
+ (mpw.wqe->eseg.flow_table_metadata != metadata) ||
(mpw.wqe->eseg.cs_flags != cs_flags))
mlx5_mpw_close(txq, &mpw);
} else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) {
if ((mpw.len != length) ||
(segs_n != 1) ||
(length > inline_room) ||
+ (mpw.wqe->eseg.flow_table_metadata != metadata) ||
(mpw.wqe->eseg.cs_flags != cs_flags)) {
mlx5_mpw_inline_close(txq, &mpw);
inline_room =
max_wqe -= 2;
mlx5_mpw_new(txq, &mpw, length);
mpw.wqe->eseg.cs_flags = cs_flags;
+ mpw.wqe->eseg.flow_table_metadata = metadata;
} else {
if (unlikely(max_wqe < wqe_inl_n))
break;
max_wqe -= wqe_inl_n;
mlx5_mpw_inline_new(txq, &mpw, length);
mpw.wqe->eseg.cs_flags = cs_flags;
+ mpw.wqe->eseg.flow_table_metadata = metadata;
}
}
/* Multi-segment packets must be alone in their MPW. */
if (comp >= MLX5_TX_COMP_THRESH) {
volatile struct mlx5_wqe *wqe = mpw.wqe;
+ /* A CQE slot must always be available. */
+ assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request completion on last WQE. */
wqe->ctrl[2] = rte_cpu_to_be_32(8);
/* Save elts_head in unused "immediate" field of WQE. */
wqe->ctrl[3] = elts_head;
txq->elts_comp = 0;
-#ifndef NDEBUG
- ++txq->cq_pi;
-#endif
} else {
txq->elts_comp = comp;
}
unsigned int mpw_room = 0;
unsigned int inl_pad = 0;
uint32_t inl_hdr;
+ uint64_t addr_64;
struct mlx5_mpw mpw = {
.state = MLX5_MPW_STATE_CLOSED,
};
/* Start processing. */
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
- /* A CQE slot must always be available. */
- assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
if (unlikely(!max_wqe))
return 0;
unsigned int do_inline = 0; /* Whether inline is possible. */
uint32_t length;
uint8_t cs_flags;
+ rte_be32_t metadata;
/* Multi-segmented packet is handled in slow-path outside. */
assert(NB_SEGS(buf) == 1);
if (max_elts - j == 0)
break;
cs_flags = txq_ol_cksum_to_cs(buf);
+ /* Copy metadata from mbuf if valid */
+ metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
+ 0;
/* Retrieve packet information. */
length = PKT_LEN(buf);
/* Start new session if:
(length <= txq->inline_max_packet_sz &&
inl_pad + sizeof(inl_hdr) + length >
mpw_room) ||
+ (mpw.wqe->eseg.flow_table_metadata != metadata) ||
(mpw.wqe->eseg.cs_flags != cs_flags))
max_wqe -= mlx5_empw_close(txq, &mpw);
}
sizeof(inl_hdr) + length <= mpw_room &&
!txq->mpw_hdr_dseg;
mpw.wqe->eseg.cs_flags = cs_flags;
+ mpw.wqe->eseg.flow_table_metadata = metadata;
} else {
/* Evaluate whether the next packet can be inlined.
* Inlininig is possible when:
((uintptr_t)mpw.data.raw +
inl_pad);
(*txq->elts)[elts_head++ & elts_m] = buf;
- addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
- uintptr_t));
+ addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
+ uintptr_t));
*dseg = (rte_v128u32_t) {
rte_cpu_to_be_32(length),
mlx5_tx_mb2mr(txq, buf),
- addr,
- addr >> 32,
+ addr_64,
+ addr_64 >> 32,
};
mpw.data.raw = (volatile void *)(dseg + 1);
mpw.total_len += (inl_pad + sizeof(*dseg));
(1 << txq->wqe_n) / MLX5_TX_COMP_THRESH_INLINE_DIV) {
volatile struct mlx5_wqe *wqe = mpw.wqe;
+ /* A CQE slot must always be available. */
+ assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request completion on last WQE. */
wqe->ctrl[2] = rte_cpu_to_be_32(8);
/* Save elts_head in unused "immediate" field of WQE. */
wqe->ctrl[3] = elts_head;
txq->elts_comp = 0;
txq->mpw_comp = txq->wqe_ci;
-#ifndef NDEBUG
- ++txq->cq_pi;
-#endif
} else {
txq->elts_comp += j;
}
return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6));
}
+/**
+ * Initialize Rx WQ and indexes.
+ *
+ * @param[in] rxq
+ * Pointer to RX queue structure.
+ */
+void
+mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
+{
+ const unsigned int wqe_n = 1 << rxq->elts_n;
+ unsigned int i;
+
+ for (i = 0; (i != wqe_n); ++i) {
+ volatile struct mlx5_wqe_data_seg *scat;
+ uintptr_t addr;
+ uint32_t byte_count;
+
+ if (mlx5_rxq_mprq_enabled(rxq)) {
+ struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i];
+
+ scat = &((volatile struct mlx5_wqe_mprq *)
+ rxq->wqes)[i].dseg;
+ addr = (uintptr_t)mlx5_mprq_buf_addr(buf);
+ byte_count = (1 << rxq->strd_sz_n) *
+ (1 << rxq->strd_num_n);
+ } else {
+ struct rte_mbuf *buf = (*rxq->elts)[i];
+
+ scat = &((volatile struct mlx5_wqe_data_seg *)
+ rxq->wqes)[i];
+ addr = rte_pktmbuf_mtod(buf, uintptr_t);
+ byte_count = DATA_LEN(buf);
+ }
+ /* scat->addr must be able to store a pointer. */
+ assert(sizeof(scat->addr) >= sizeof(uintptr_t));
+ *scat = (struct mlx5_wqe_data_seg){
+ .addr = rte_cpu_to_be_64(addr),
+ .byte_count = rte_cpu_to_be_32(byte_count),
+ .lkey = mlx5_rx_addr2mr(rxq, addr),
+ };
+ }
+ rxq->consumed_strd = 0;
+ rxq->decompressed = 0;
+ rxq->rq_pi = 0;
+ rxq->zip = (struct rxq_zip){
+ .ai = 0,
+ };
+ /* Update doorbell counter. */
+ rxq->rq_ci = wqe_n >> rxq->sges_n;
+ rte_cio_wmb();
+ *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+}
+
+/**
+ * Handle a Rx error.
+ * The function inserts the RQ state to reset when the first error CQE is
+ * shown, then drains the CQ by the caller function loop. When the CQ is empty,
+ * it moves the RQ state to ready and initializes the RQ.
+ * Next CQE identification and error counting are in the caller responsibility.
+ *
+ * @param[in] rxq
+ * Pointer to RX queue structure.
+ * @param[in] mbuf_prepare
+ * Whether to prepare mbufs for the RQ.
+ *
+ * @return
+ * -1 in case of recovery error, otherwise the CQE status.
+ */
+int
+mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare)
+{
+ const uint16_t cqe_n = 1 << rxq->cqe_n;
+ const uint16_t cqe_mask = cqe_n - 1;
+ const unsigned int wqe_n = 1 << rxq->elts_n;
+ struct mlx5_rxq_ctrl *rxq_ctrl =
+ container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+ struct ibv_wq_attr mod = {
+ .attr_mask = IBV_WQ_ATTR_STATE,
+ };
+ union {
+ volatile struct mlx5_cqe *cqe;
+ volatile struct mlx5_err_cqe *err_cqe;
+ } u = {
+ .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask],
+ };
+ int ret;
+
+ switch (rxq->err_state) {
+ case MLX5_RXQ_ERR_STATE_NO_ERROR:
+ rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET;
+ /* Fall-through */
+ case MLX5_RXQ_ERR_STATE_NEED_RESET:
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return -1;
+ mod.wq_state = IBV_WQS_RESET;
+ ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod);
+ if (ret) {
+ DRV_LOG(ERR, "Cannot change Rx WQ state to RESET %s\n",
+ strerror(errno));
+ return -1;
+ }
+ if (rxq_ctrl->dump_file_n <
+ rxq_ctrl->priv->config.max_dump_files_num) {
+ MKSTR(err_str, "Unexpected CQE error syndrome "
+ "0x%02x CQN = %u RQN = %u wqe_counter = %u"
+ " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome,
+ rxq->cqn, rxq_ctrl->ibv->wq->wq_num,
+ rte_be_to_cpu_16(u.err_cqe->wqe_counter),
+ rxq->rq_ci << rxq->sges_n, rxq->cq_ci);
+ MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u",
+ rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc());
+ mlx5_dump_debug_information(name, NULL, err_str, 0);
+ mlx5_dump_debug_information(name, "MLX5 Error CQ:",
+ (const void *)((uintptr_t)
+ rxq->cqes),
+ sizeof(*u.cqe) * cqe_n);
+ mlx5_dump_debug_information(name, "MLX5 Error RQ:",
+ (const void *)((uintptr_t)
+ rxq->wqes),
+ 16 * wqe_n);
+ rxq_ctrl->dump_file_n++;
+ }
+ rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY;
+ /* Fall-through */
+ case MLX5_RXQ_ERR_STATE_NEED_READY:
+ ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci);
+ if (ret == MLX5_CQE_STATUS_HW_OWN) {
+ rte_cio_wmb();
+ *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+ rte_cio_wmb();
+ /*
+ * The RQ consumer index must be zeroed while moving
+ * from RESET state to RDY state.
+ */
+ *rxq->rq_db = rte_cpu_to_be_32(0);
+ rte_cio_wmb();
+ mod.wq_state = IBV_WQS_RDY;
+ ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod);
+ if (ret) {
+ DRV_LOG(ERR, "Cannot change Rx WQ state to RDY"
+ " %s\n", strerror(errno));
+ return -1;
+ }
+ if (mbuf_prepare) {
+ const uint16_t q_mask = wqe_n - 1;
+ uint16_t elt_idx;
+ struct rte_mbuf **elt;
+ int i;
+ unsigned int n = wqe_n - (rxq->rq_ci -
+ rxq->rq_pi);
+
+ for (i = 0; i < (int)n; ++i) {
+ elt_idx = (rxq->rq_ci + i) & q_mask;
+ elt = &(*rxq->elts)[elt_idx];
+ *elt = rte_mbuf_raw_alloc(rxq->mp);
+ if (!*elt) {
+ for (i--; i >= 0; --i) {
+ elt_idx = (rxq->rq_ci +
+ i) & q_mask;
+ elt = &(*rxq->elts)
+ [elt_idx];
+ rte_pktmbuf_free_seg
+ (*elt);
+ }
+ return -1;
+ }
+ }
+ }
+ mlx5_rxq_initialize(rxq);
+ rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
+ }
+ return ret;
+ default:
+ return -1;
+ }
+}
+
/**
* Get size of the next packet for a given CQE. For compressed CQEs, the
* consumer index is updated only once all packets of the current one have
* written.
*
* @return
- * Packet size in bytes (0 if there is none), -1 in case of completion
- * with error.
+ * 0 in case of empty CQE, otherwise the packet size in bytes.
*/
static inline int
mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
{
struct rxq_zip *zip = &rxq->zip;
uint16_t cqe_n = cqe_cnt + 1;
- int len = 0;
+ int len;
uint16_t idx, end;
- /* Process compressed data in the CQE and mini arrays. */
- if (zip->ai) {
- volatile struct mlx5_mini_cqe8 (*mc)[8] =
- (volatile struct mlx5_mini_cqe8 (*)[8])
- (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].pkt_info);
-
- len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt);
- *mcqe = &(*mc)[zip->ai & 7];
- if ((++zip->ai & 7) == 0) {
- /* Invalidate consumed CQEs */
- idx = zip->ca;
- end = zip->na;
- while (idx != end) {
- (*rxq->cqes)[idx & cqe_cnt].op_own =
- MLX5_CQE_INVALIDATE;
- ++idx;
- }
- /*
- * Increment consumer index to skip the number of
- * CQEs consumed. Hardware leaves holes in the CQ
- * ring for software use.
- */
- zip->ca = zip->na;
- zip->na += 8;
- }
- if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) {
- /* Invalidate the rest */
- idx = zip->ca;
- end = zip->cq_ci;
-
- while (idx != end) {
- (*rxq->cqes)[idx & cqe_cnt].op_own =
- MLX5_CQE_INVALIDATE;
- ++idx;
- }
- rxq->cq_ci = zip->cq_ci;
- zip->ai = 0;
- }
- /* No compressed data, get next CQE and verify if it is compressed. */
- } else {
- int ret;
- int8_t op_own;
-
- ret = check_cqe(cqe, cqe_n, rxq->cq_ci);
- if (unlikely(ret == 1))
- return 0;
- ++rxq->cq_ci;
- op_own = cqe->op_own;
- rte_cio_rmb();
- if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) {
+ do {
+ len = 0;
+ /* Process compressed data in the CQE and mini arrays. */
+ if (zip->ai) {
volatile struct mlx5_mini_cqe8 (*mc)[8] =
(volatile struct mlx5_mini_cqe8 (*)[8])
- (uintptr_t)(&(*rxq->cqes)[rxq->cq_ci &
+ (uintptr_t)(&(*rxq->cqes)[zip->ca &
cqe_cnt].pkt_info);
- /* Fix endianness. */
- zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt);
- /*
- * Current mini array position is the one returned by
- * check_cqe64().
- *
- * If completion comprises several mini arrays, as a
- * special case the second one is located 7 CQEs after
- * the initial CQE instead of 8 for subsequent ones.
- */
- zip->ca = rxq->cq_ci;
- zip->na = zip->ca + 7;
- /* Compute the next non compressed CQE. */
- --rxq->cq_ci;
- zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
- /* Get packet size to return. */
- len = rte_be_to_cpu_32((*mc)[0].byte_cnt);
- *mcqe = &(*mc)[0];
- zip->ai = 1;
- /* Prefetch all the entries to be invalidated */
- idx = zip->ca;
- end = zip->cq_ci;
- while (idx != end) {
- rte_prefetch0(&(*rxq->cqes)[(idx) & cqe_cnt]);
- ++idx;
+ len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt);
+ *mcqe = &(*mc)[zip->ai & 7];
+ if ((++zip->ai & 7) == 0) {
+ /* Invalidate consumed CQEs */
+ idx = zip->ca;
+ end = zip->na;
+ while (idx != end) {
+ (*rxq->cqes)[idx & cqe_cnt].op_own =
+ MLX5_CQE_INVALIDATE;
+ ++idx;
+ }
+ /*
+ * Increment consumer index to skip the number
+ * of CQEs consumed. Hardware leaves holes in
+ * the CQ ring for software use.
+ */
+ zip->ca = zip->na;
+ zip->na += 8;
+ }
+ if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) {
+ /* Invalidate the rest */
+ idx = zip->ca;
+ end = zip->cq_ci;
+
+ while (idx != end) {
+ (*rxq->cqes)[idx & cqe_cnt].op_own =
+ MLX5_CQE_INVALIDATE;
+ ++idx;
+ }
+ rxq->cq_ci = zip->cq_ci;
+ zip->ai = 0;
+ }
+ /*
+ * No compressed data, get next CQE and verify if it is
+ * compressed.
+ */
+ } else {
+ int ret;
+ int8_t op_own;
+
+ ret = check_cqe(cqe, cqe_n, rxq->cq_ci);
+ if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
+ if (unlikely(ret == MLX5_CQE_STATUS_ERR ||
+ rxq->err_state)) {
+ ret = mlx5_rx_err_handle(rxq, 0);
+ if (ret == MLX5_CQE_STATUS_HW_OWN ||
+ ret == -1)
+ return 0;
+ } else {
+ return 0;
+ }
}
+ ++rxq->cq_ci;
+ op_own = cqe->op_own;
+ if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) {
+ volatile struct mlx5_mini_cqe8 (*mc)[8] =
+ (volatile struct mlx5_mini_cqe8 (*)[8])
+ (uintptr_t)(&(*rxq->cqes)
+ [rxq->cq_ci &
+ cqe_cnt].pkt_info);
+
+ /* Fix endianness. */
+ zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt);
+ /*
+ * Current mini array position is the one
+ * returned by check_cqe64().
+ *
+ * If completion comprises several mini arrays,
+ * as a special case the second one is located
+ * 7 CQEs after the initial CQE instead of 8
+ * for subsequent ones.
+ */
+ zip->ca = rxq->cq_ci;
+ zip->na = zip->ca + 7;
+ /* Compute the next non compressed CQE. */
+ --rxq->cq_ci;
+ zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
+ /* Get packet size to return. */
+ len = rte_be_to_cpu_32((*mc)[0].byte_cnt);
+ *mcqe = &(*mc)[0];
+ zip->ai = 1;
+ /* Prefetch all to be invalidated */
+ idx = zip->ca;
+ end = zip->cq_ci;
+ while (idx != end) {
+ rte_prefetch0(&(*rxq->cqes)[(idx) &
+ cqe_cnt]);
+ ++idx;
+ }
+ } else {
+ len = rte_be_to_cpu_32(cqe->byte_cnt);
+ }
+ }
+ if (unlikely(rxq->err_state)) {
+ cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
+ ++rxq->stats.idropped;
} else {
- len = rte_be_to_cpu_32(cqe->byte_cnt);
+ return len;
}
- /* Error while receiving packet. */
- if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR))
- return -1;
- }
- return len;
+ } while (1);
}
/**
rte_mbuf_raw_free(rep);
break;
}
- if (unlikely(len == -1)) {
- /* RX error, packet is likely too large. */
- rte_mbuf_raw_free(rep);
- ++rxq->stats.idropped;
- goto skip;
- }
pkt = seg;
assert(len >= (rxq->crc_present << 2));
pkt->ol_flags = 0;
mcqe->rx_hash_result);
rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
if (rxq->crc_present)
- len -= ETHER_CRC_LEN;
+ len -= RTE_ETHER_CRC_LEN;
PKT_LEN(pkt) = len;
}
DATA_LEN(rep) = DATA_LEN(seg);
pkt = NULL;
--pkts_n;
++i;
-skip:
/* Align consumer index to the next stride. */
rq_ci >>= sges_n;
++rq_ci;
const unsigned int wq_mask = (1 << rxq->elts_n) - 1;
volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
unsigned int i = 0;
- uint16_t rq_ci = rxq->rq_ci;
+ uint32_t rq_ci = rxq->rq_ci;
uint16_t consumed_strd = rxq->consumed_strd;
struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe);
if (!ret)
break;
- if (unlikely(ret == -1)) {
- /* RX error, packet is likely too large. */
- ++rxq->stats.idropped;
- continue;
- }
byte_cnt = ret;
strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >>
MLX5_MPRQ_STRIDE_NUM_SHIFT;
len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT;
assert((int)len >= (rxq->crc_present << 2));
if (rxq->crc_present)
- len -= ETHER_CRC_LEN;
+ len -= RTE_ETHER_CRC_LEN;
offset = strd_idx * strd_sz + strd_shift;
addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf), offset);
/* Initialize the offload flag. */
struct rte_mbuf **pkts __rte_unused,
uint16_t pkts_n __rte_unused)
{
+ rte_mb();
return 0;
}
struct rte_mbuf **pkts __rte_unused,
uint16_t pkts_n __rte_unused)
{
+ rte_mb();
return 0;
}
* (e.g. mlx5_rxtx_vec_sse.c for x86).
*/
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused,
struct rte_mbuf **pkts __rte_unused,
uint16_t pkts_n __rte_unused)
return 0;
}
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
mlx5_tx_burst_vec(void *dpdk_txq __rte_unused,
struct rte_mbuf **pkts __rte_unused,
uint16_t pkts_n __rte_unused)
return 0;
}
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
mlx5_rx_burst_vec(void *dpdk_txq __rte_unused,
struct rte_mbuf **pkts __rte_unused,
uint16_t pkts_n __rte_unused)
return 0;
}
-int __attribute__((weak))
+__rte_weak int
mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev __rte_unused)
{
return -ENOTSUP;
}
-int __attribute__((weak))
+__rte_weak int
mlx5_check_vec_tx_support(struct rte_eth_dev *dev __rte_unused)
{
return -ENOTSUP;
}
-int __attribute__((weak))
+__rte_weak int
mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused)
{
return -ENOTSUP;
}
-int __attribute__((weak))
+__rte_weak int
mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused)
{
return -ENOTSUP;