#pragma GCC diagnostic ignored "-pedantic"
#endif
#include <infiniband/verbs.h>
+#include <infiniband/arch.h>
+#include <infiniband/mlx5_hw.h>
#ifdef PEDANTIC
#pragma GCC diagnostic error "-pedantic"
#endif
DEBUG("indirection table extended to assume %u WQs",
priv->reta_idx_n);
}
- for (i = 0; (i != priv->reta_idx_n); ++i)
- wqs[i] = (*priv->rxqs)[(*priv->reta_idx)[i]]->wq;
+ for (i = 0; (i != priv->reta_idx_n); ++i) {
+ struct rxq_ctrl *rxq_ctrl;
+
+ rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
+ struct rxq_ctrl, rxq);
+ wqs[i] = rxq_ctrl->wq;
+ }
/* Get number of hash RX queues to configure. */
for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
hash_rxqs_n += ind_table_init[i].hash_types_n;
struct rte_mbuf **pool)
{
unsigned int i;
- struct rxq_elt (*elts)[elts_n] =
- rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
- rxq_ctrl->socket);
int ret = 0;
- if (elts == NULL) {
- ERROR("%p: can't allocate packets array", (void *)rxq_ctrl);
- ret = ENOMEM;
- goto error;
- }
/* For each WR (packet). */
for (i = 0; (i != elts_n); ++i) {
- struct rxq_elt *elt = &(*elts)[i];
- struct ibv_sge *sge = &(*elts)[i].sge;
struct rte_mbuf *buf;
+ volatile struct mlx5_wqe_data_seg *scat =
+ &(*rxq_ctrl->rxq.wqes)[i];
if (pool != NULL) {
buf = *(pool++);
ret = ENOMEM;
goto error;
}
- elt->buf = buf;
/* Headroom is reserved by rte_pktmbuf_alloc(). */
assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
/* Buffer is supposed to be empty. */
assert(rte_pktmbuf_data_len(buf) == 0);
assert(rte_pktmbuf_pkt_len(buf) == 0);
- /* sge->addr must be able to store a pointer. */
- assert(sizeof(sge->addr) >= sizeof(uintptr_t));
- /* SGE keeps its headroom. */
- sge->addr = (uintptr_t)
- ((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);
- sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);
- sge->lkey = rxq_ctrl->mr->lkey;
- /* Redundant check for tailroom. */
- assert(sge->length == rte_pktmbuf_tailroom(buf));
+ assert(!buf->next);
+ PORT(buf) = rxq_ctrl->rxq.port_id;
+ DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
+ PKT_LEN(buf) = DATA_LEN(buf);
+ NB_SEGS(buf) = 1;
+ /* scat->addr must be able to store a pointer. */
+ assert(sizeof(scat->addr) >= sizeof(uintptr_t));
+ *scat = (struct mlx5_wqe_data_seg){
+ .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)),
+ .byte_count = htonl(DATA_LEN(buf)),
+ .lkey = htonl(rxq_ctrl->mr->lkey),
+ };
+ (*rxq_ctrl->rxq.elts)[i] = buf;
}
DEBUG("%p: allocated and configured %u single-segment WRs",
(void *)rxq_ctrl, elts_n);
- rxq_ctrl->rxq.elts_n = elts_n;
- rxq_ctrl->rxq.elts_head = 0;
- rxq_ctrl->rxq.elts = elts;
assert(ret == 0);
return 0;
error:
- if (elts != NULL) {
- assert(pool == NULL);
- for (i = 0; (i != RTE_DIM(*elts)); ++i) {
- struct rxq_elt *elt = &(*elts)[i];
- struct rte_mbuf *buf = elt->buf;
-
- if (buf != NULL)
- rte_pktmbuf_free_seg(buf);
- }
- rte_free(elts);
+ assert(pool == NULL);
+ elts_n = i;
+ for (i = 0; (i != elts_n); ++i) {
+ if ((*rxq_ctrl->rxq.elts)[i] != NULL)
+ rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
+ (*rxq_ctrl->rxq.elts)[i] = NULL;
}
DEBUG("%p: failed, freed everything", (void *)rxq_ctrl);
assert(ret > 0);
rxq_free_elts(struct rxq_ctrl *rxq_ctrl)
{
unsigned int i;
- unsigned int elts_n = rxq_ctrl->rxq.elts_n;
- struct rxq_elt (*elts)[elts_n] = rxq_ctrl->rxq.elts;
DEBUG("%p: freeing WRs", (void *)rxq_ctrl);
- rxq_ctrl->rxq.elts_n = 0;
- rxq_ctrl->rxq.elts = NULL;
- if (elts == NULL)
+ if (rxq_ctrl->rxq.elts == NULL)
return;
- for (i = 0; (i != RTE_DIM(*elts)); ++i) {
- struct rxq_elt *elt = &(*elts)[i];
- struct rte_mbuf *buf = elt->buf;
- if (buf != NULL)
- rte_pktmbuf_free_seg(buf);
+ for (i = 0; (i != rxq_ctrl->rxq.elts_n); ++i) {
+ if ((*rxq_ctrl->rxq.elts)[i] != NULL)
+ rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
+ (*rxq_ctrl->rxq.elts)[i] = NULL;
}
- rte_free(elts);
}
/**
DEBUG("cleaning up %p", (void *)rxq_ctrl);
rxq_free_elts(rxq_ctrl);
- rxq_ctrl->rxq.poll = NULL;
- rxq_ctrl->rxq.recv = NULL;
if (rxq_ctrl->if_wq != NULL) {
- assert(rxq_ctrl->rxq.priv != NULL);
- assert(rxq_ctrl->rxq.priv->ctx != NULL);
- assert(rxq_ctrl->rxq.wq != NULL);
+ assert(rxq_ctrl->priv != NULL);
+ assert(rxq_ctrl->priv->ctx != NULL);
+ assert(rxq_ctrl->wq != NULL);
params = (struct ibv_exp_release_intf_params){
.comp_mask = 0,
};
- claim_zero(ibv_exp_release_intf(rxq_ctrl->rxq.priv->ctx,
+ claim_zero(ibv_exp_release_intf(rxq_ctrl->priv->ctx,
rxq_ctrl->if_wq,
¶ms));
}
if (rxq_ctrl->if_cq != NULL) {
- assert(rxq_ctrl->rxq.priv != NULL);
- assert(rxq_ctrl->rxq.priv->ctx != NULL);
- assert(rxq_ctrl->rxq.cq != NULL);
+ assert(rxq_ctrl->priv != NULL);
+ assert(rxq_ctrl->priv->ctx != NULL);
+ assert(rxq_ctrl->cq != NULL);
params = (struct ibv_exp_release_intf_params){
.comp_mask = 0,
};
- claim_zero(ibv_exp_release_intf(rxq_ctrl->rxq.priv->ctx,
+ claim_zero(ibv_exp_release_intf(rxq_ctrl->priv->ctx,
rxq_ctrl->if_cq,
¶ms));
}
- if (rxq_ctrl->rxq.wq != NULL)
- claim_zero(ibv_exp_destroy_wq(rxq_ctrl->rxq.wq));
- if (rxq_ctrl->rxq.cq != NULL)
- claim_zero(ibv_destroy_cq(rxq_ctrl->rxq.cq));
+ if (rxq_ctrl->wq != NULL)
+ claim_zero(ibv_exp_destroy_wq(rxq_ctrl->wq));
+ if (rxq_ctrl->cq != NULL)
+ claim_zero(ibv_destroy_cq(rxq_ctrl->cq));
if (rxq_ctrl->rd != NULL) {
struct ibv_exp_destroy_res_domain_attr attr = {
.comp_mask = 0,
};
- assert(rxq_ctrl->rxq.priv != NULL);
- assert(rxq_ctrl->rxq.priv->ctx != NULL);
- claim_zero(ibv_exp_destroy_res_domain(rxq_ctrl->rxq.priv->ctx,
+ assert(rxq_ctrl->priv != NULL);
+ assert(rxq_ctrl->priv->ctx != NULL);
+ claim_zero(ibv_exp_destroy_res_domain(rxq_ctrl->priv->ctx,
rxq_ctrl->rd,
&attr));
}
int
rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl)
{
- struct priv *priv = rxq_ctrl->rxq.priv;
+ struct priv *priv = rxq_ctrl->priv;
struct rxq_ctrl tmpl = *rxq_ctrl;
unsigned int mbuf_n;
unsigned int desc_n;
struct rte_mbuf **pool;
unsigned int i, k;
struct ibv_exp_wq_attr mod;
- struct rxq_elt (*elts)[tmpl.rxq.elts_n];
int err;
DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq_ctrl);
.attr_mask = IBV_EXP_WQ_ATTR_STATE,
.wq_state = IBV_EXP_WQS_RESET,
};
- err = ibv_exp_modify_wq(tmpl.rxq.wq, &mod);
+ err = ibv_exp_modify_wq(tmpl.wq, &mod);
if (err) {
ERROR("%p: cannot reset WQ: %s", (void *)dev, strerror(err));
assert(err > 0);
}
/* Snatch mbufs from original queue. */
k = 0;
- elts = rxq_ctrl->rxq.elts;
- for (i = 0; (i != RTE_DIM(*elts)); ++i) {
- struct rxq_elt *elt = &(*elts)[i];
- struct rte_mbuf *buf = elt->buf;
-
- pool[k++] = buf;
- }
+ for (i = 0; (i != desc_n); ++i)
+ pool[k++] = (*rxq_ctrl->rxq.elts)[i];
assert(k == mbuf_n);
- tmpl.rxq.elts_n = 0;
- tmpl.rxq.elts = NULL;
- assert((void *)&tmpl.rxq.elts == NULL);
- err = rxq_alloc_elts(&tmpl, desc_n, pool);
- if (err) {
- ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
- rte_free(pool);
- assert(err > 0);
- return err;
- }
- assert(tmpl.rxq.elts_n == desc_n);
rte_free(pool);
- /* Clean up original data. */
- rxq_ctrl->rxq.elts_n = 0;
- rte_free(rxq_ctrl->rxq.elts);
- rxq_ctrl->rxq.elts = NULL;
/* Change queue state to ready. */
mod = (struct ibv_exp_wq_attr){
.attr_mask = IBV_EXP_WQ_ATTR_STATE,
.wq_state = IBV_EXP_WQS_RDY,
};
- err = ibv_exp_modify_wq(tmpl.rxq.wq, &mod);
+ err = ibv_exp_modify_wq(tmpl.wq, &mod);
if (err) {
ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
(void *)dev, strerror(err));
goto error;
}
/* Post SGEs. */
- assert(tmpl.if_wq != NULL);
- elts = tmpl.rxq.elts;
- for (i = 0; (i != RTE_DIM(*elts)); ++i) {
- err = tmpl.if_wq->recv_burst(
- tmpl.rxq.wq,
- &(*elts)[i].sge,
- 1);
- if (err)
- break;
- }
+ err = rxq_alloc_elts(&tmpl, desc_n, pool);
if (err) {
- ERROR("%p: failed to post SGEs with error %d",
- (void *)dev, err);
- /* Set err because it does not contain a valid errno value. */
- err = EIO;
- goto error;
+ ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
+ rte_free(pool);
+ assert(err > 0);
+ return err;
}
- tmpl.rxq.recv = tmpl.if_wq->recv_burst;
+ /* Update doorbell counter. */
+ rxq_ctrl->rxq.rq_ci = desc_n;
+ rte_wmb();
+ *rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci);
error:
*rxq_ctrl = tmpl;
assert(err >= 0);
return err;
}
+/**
+ * Initialize RX queue.
+ *
+ * @param tmpl
+ * Pointer to RX queue control template.
+ * @param rxq_ctrl
+ * Pointer to RX queue control.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+static inline int
+rxq_setup(struct rxq_ctrl *tmpl, struct rxq_ctrl *rxq_ctrl)
+{
+ struct ibv_cq *ibcq = tmpl->cq;
+ struct mlx5_cq *cq = to_mxxx(cq, cq);
+ struct mlx5_rwq *rwq = container_of(tmpl->wq, struct mlx5_rwq, wq);
+
+ if (cq->cqe_sz != RTE_CACHE_LINE_SIZE) {
+ ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
+ "it should be set to %u", RTE_CACHE_LINE_SIZE);
+ return EINVAL;
+ }
+ tmpl->rxq.rq_db = rwq->rq.db;
+ tmpl->rxq.cq_ci = 0;
+ tmpl->rxq.rq_ci = 0;
+ tmpl->rxq.cq_db = cq->dbrec;
+ tmpl->rxq.wqes =
+ (volatile struct mlx5_wqe_data_seg (*)[])
+ (uintptr_t)rwq->rq.buff;
+ tmpl->rxq.cqes =
+ (volatile struct mlx5_cqe (*)[])
+ (uintptr_t)cq->active_buf->buf;
+ tmpl->rxq.elts =
+ (struct rte_mbuf *(*)[tmpl->rxq.elts_n])
+ ((uintptr_t)rxq_ctrl + sizeof(*rxq_ctrl));
+ return 0;
+}
+
/**
* Configure a RX queue.
*
* 0 on success, errno value on failure.
*/
int
-rxq_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, uint16_t desc,
- unsigned int socket, const struct rte_eth_rxconf *conf,
- struct rte_mempool *mp)
+rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
+ uint16_t desc, unsigned int socket,
+ const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
{
struct priv *priv = dev->data->dev_private;
struct rxq_ctrl tmpl = {
+ .priv = priv,
.socket = socket,
.rxq = {
- .priv = priv,
+ .elts_n = desc,
.mp = mp,
},
};
struct ibv_exp_cq_init_attr cq;
struct ibv_exp_res_domain_init_attr rd;
struct ibv_exp_wq_init_attr wq;
+ struct ibv_exp_cq_attr cq_attr;
} attr;
enum ibv_exp_query_intf_status status;
unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
- struct rxq_elt (*elts)[desc];
int ret = 0;
- unsigned int i;
- unsigned int cq_size = desc;
(void)conf; /* Thresholds configuration (ignored). */
if (desc == 0) {
- ERROR("%p: invalid number of RX descriptors", (void *)dev);
+ ERROR("%p: invalid number of RX descriptors (must be a"
+ " multiple of 2)", (void *)dev);
return EINVAL;
}
/* Toggle RX checksum offload if hardware supports it. */
.comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
.res_domain = tmpl.rd,
};
- tmpl.rxq.cq = ibv_exp_create_cq(priv->ctx, cq_size, NULL, NULL, 0,
- &attr.cq);
- if (tmpl.rxq.cq == NULL) {
+ tmpl.cq = ibv_exp_create_cq(priv->ctx, desc - 1, NULL, NULL, 0,
+ &attr.cq);
+ if (tmpl.cq == NULL) {
ret = ENOMEM;
ERROR("%p: CQ creation failure: %s",
(void *)dev, strerror(ret));
.wq_context = NULL, /* Could be useful in the future. */
.wq_type = IBV_EXP_WQT_RQ,
/* Max number of outstanding WRs. */
- .max_recv_wr = ((priv->device_attr.max_qp_wr < (int)cq_size) ?
+ .max_recv_wr = ((priv->device_attr.max_qp_wr < (int)desc) ?
priv->device_attr.max_qp_wr :
- (int)cq_size),
+ (int)desc),
/* Max number of scatter/gather elements in a WR. */
.max_recv_sge = 1,
.pd = priv->pd,
- .cq = tmpl.rxq.cq,
+ .cq = tmpl.cq,
.comp_mask =
IBV_EXP_CREATE_WQ_RES_DOMAIN |
IBV_EXP_CREATE_WQ_VLAN_OFFLOADS |
" up to date",
(void *)dev);
- tmpl.rxq.wq = ibv_exp_create_wq(priv->ctx, &attr.wq);
- if (tmpl.rxq.wq == NULL) {
+ tmpl.wq = ibv_exp_create_wq(priv->ctx, &attr.wq);
+ if (tmpl.wq == NULL) {
ret = (errno ? errno : EINVAL);
ERROR("%p: WQ creation failure: %s",
(void *)dev, strerror(ret));
goto error;
}
- ret = rxq_alloc_elts(&tmpl, desc, NULL);
- if (ret) {
- ERROR("%p: RXQ allocation failed: %s",
- (void *)dev, strerror(ret));
- goto error;
- }
/* Save port ID. */
tmpl.rxq.port_id = dev->data->port_id;
DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id);
.intf_scope = IBV_EXP_INTF_GLOBAL,
.intf_version = 1,
.intf = IBV_EXP_INTF_CQ,
- .obj = tmpl.rxq.cq,
+ .obj = tmpl.cq,
};
tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
if (tmpl.if_cq == NULL) {
attr.params = (struct ibv_exp_query_intf_params){
.intf_scope = IBV_EXP_INTF_GLOBAL,
.intf = IBV_EXP_INTF_WQ,
- .obj = tmpl.rxq.wq,
+ .obj = tmpl.wq,
};
tmpl.if_wq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
if (tmpl.if_wq == NULL) {
.attr_mask = IBV_EXP_WQ_ATTR_STATE,
.wq_state = IBV_EXP_WQS_RDY,
};
- ret = ibv_exp_modify_wq(tmpl.rxq.wq, &mod);
+ ret = ibv_exp_modify_wq(tmpl.wq, &mod);
if (ret) {
ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
(void *)dev, strerror(ret));
goto error;
}
- /* Post SGEs. */
- elts = tmpl.rxq.elts;
- for (i = 0; (i != RTE_DIM(*elts)); ++i) {
- ret = tmpl.if_wq->recv_burst(
- tmpl.rxq.wq,
- &(*elts)[i].sge,
- 1);
- if (ret)
- break;
+ ret = rxq_setup(&tmpl, rxq_ctrl);
+ if (ret) {
+ ERROR("%p: cannot initialize RX queue structure: %s",
+ (void *)dev, strerror(ret));
+ goto error;
}
+ ret = rxq_alloc_elts(&tmpl, desc, NULL);
if (ret) {
- ERROR("%p: failed to post SGEs with error %d",
- (void *)dev, ret);
- /* Set ret because it does not contain a valid errno value. */
- ret = EIO;
+ ERROR("%p: RXQ allocation failed: %s",
+ (void *)dev, strerror(ret));
goto error;
}
/* Clean up rxq in case we're reinitializing it. */
DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq_ctrl);
rxq_cleanup(rxq_ctrl);
*rxq_ctrl = tmpl;
+ /* Update doorbell counter. */
+ rxq_ctrl->rxq.rq_ci = desc;
+ rte_wmb();
+ *rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci);
DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
assert(ret == 0);
- /* Assign function in queue. */
- rxq_ctrl->rxq.poll = rxq_ctrl->if_cq->poll_length_flags_cvlan;
- rxq_ctrl->rxq.recv = rxq_ctrl->if_wq->recv_burst;
return 0;
error:
rxq_cleanup(&tmpl);
{
struct priv *priv = dev->data->dev_private;
struct rxq *rxq = (*priv->rxqs)[idx];
- struct rxq_ctrl *rxq_ctrl;
+ struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
int ret;
if (mlx5_is_secondary())
return -E_RTE_SECONDARY;
priv_lock(priv);
- rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+ if (!rte_is_power_of_2(desc)) {
+ desc = 1 << log2above(desc);
+ WARN("%p: increased number of descriptors in RX queue %u"
+ " to the next power of two (%d)",
+ (void *)dev, idx, desc);
+ }
DEBUG("%p: configuring queue %u for %u descriptors",
(void *)dev, idx, desc);
if (idx >= priv->rxqs_n) {
(*priv->rxqs)[idx] = NULL;
rxq_cleanup(rxq_ctrl);
} else {
- rxq_ctrl = rte_calloc_socket("RXQ", 1, sizeof(*rxq_ctrl), 0,
- socket);
+ rxq_ctrl = rte_calloc_socket("RXQ", 1, sizeof(*rxq_ctrl) +
+ desc * sizeof(struct rte_mbuf *),
+ 0, socket);
if (rxq_ctrl == NULL) {
ERROR("%p: unable to allocate queue index %u",
(void *)dev, idx);
return -ENOMEM;
}
}
- ret = rxq_setup(dev, rxq_ctrl, desc, socket, conf, mp);
+ ret = rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp);
if (ret)
rte_free(rxq_ctrl);
else {
if (rxq == NULL)
return;
rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
- priv = rxq->priv;
+ priv = rxq_ctrl->priv;
priv_lock(priv);
for (i = 0; (i != priv->rxqs_n); ++i)
if ((*priv->rxqs)[i] == rxq) {
DEBUG("%p: removing RX queue %p from list",
- (void *)priv->dev, (void *)rxq);
+ (void *)priv->dev, (void *)rxq_ctrl);
(*priv->rxqs)[i] = NULL;
break;
}
uint16_t pkts_n)
{
struct rxq *rxq = dpdk_rxq;
- struct priv *priv = mlx5_secondary_data_setup(rxq->priv);
+ struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+ struct priv *priv = mlx5_secondary_data_setup(rxq_ctrl->priv);
struct priv *primary_priv;
unsigned int index;
#pragma GCC diagnostic ignored "-pedantic"
#endif
#include <infiniband/verbs.h>
+#include <infiniband/mlx5_hw.h>
+#include <infiniband/arch.h>
#ifdef PEDANTIC
#pragma GCC diagnostic error "-pedantic"
#endif
#include <rte_prefetch.h>
#include <rte_common.h>
#include <rte_branch_prediction.h>
-#include <rte_memory.h>
+#include <rte_ether.h>
#ifdef PEDANTIC
#pragma GCC diagnostic error "-pedantic"
#endif
#include "mlx5_rxtx.h"
#include "mlx5_autoconf.h"
#include "mlx5_defs.h"
+#include "mlx5_prm.h"
+
+static inline volatile struct mlx5_cqe64 *
+get_cqe64(volatile struct mlx5_cqe cqes[],
+ unsigned int cqes_n, uint16_t *ci)
+ __attribute__((always_inline));
+
+static inline int
+rx_poll_len(struct rxq *rxq) __attribute__((always_inline));
+
+static volatile struct mlx5_cqe64 *
+get_cqe64(volatile struct mlx5_cqe cqes[],
+ unsigned int cqes_n, uint16_t *ci)
+{
+ volatile struct mlx5_cqe64 *cqe;
+ uint16_t idx = *ci;
+ uint8_t op_own;
+
+ cqe = &cqes[idx & (cqes_n - 1)].cqe64;
+ op_own = cqe->op_own;
+ if (unlikely((op_own & MLX5_CQE_OWNER_MASK) == !(idx & cqes_n))) {
+ return NULL;
+ } else if (unlikely(op_own & 0x80)) {
+ switch (op_own >> 4) {
+ case MLX5_CQE_INVALID:
+ return NULL; /* No CQE */
+ case MLX5_CQE_REQ_ERR:
+ return cqe;
+ case MLX5_CQE_RESP_ERR:
+ ++(*ci);
+ return NULL;
+ default:
+ return NULL;
+ }
+ }
+ if (cqe) {
+ *ci = idx + 1;
+ return cqe;
+ }
+ return NULL;
+}
/**
* Manage TX completions.
/**
* Translate RX completion flags to packet type.
*
- * @param flags
- * RX completion flags returned by poll_length_flags().
+ * @param[in] cqe
+ * Pointer to CQE.
*
* @note: fix mlx5_dev_supported_ptypes_get() if any change here.
*
* Packet type for struct rte_mbuf.
*/
static inline uint32_t
-rxq_cq_to_pkt_type(uint32_t flags)
+rxq_cq_to_pkt_type(volatile struct mlx5_cqe64 *cqe)
{
uint32_t pkt_type;
+ uint8_t flags = cqe->l4_hdr_type_etc;
+ uint8_t info = cqe->rsvd0[0];
- if (flags & IBV_EXP_CQ_RX_TUNNEL_PACKET)
+ if (info & IBV_EXP_CQ_RX_TUNNEL_PACKET)
pkt_type =
TRANSPOSE(flags,
IBV_EXP_CQ_RX_OUTER_IPV4_PACKET,
else
pkt_type =
TRANSPOSE(flags,
- IBV_EXP_CQ_RX_IPV4_PACKET,
- RTE_PTYPE_L3_IPV4) |
+ MLX5_CQE_L3_HDR_TYPE_IPV6,
+ RTE_PTYPE_L3_IPV6) |
TRANSPOSE(flags,
- IBV_EXP_CQ_RX_IPV6_PACKET,
- RTE_PTYPE_L3_IPV6);
+ MLX5_CQE_L3_HDR_TYPE_IPV4,
+ RTE_PTYPE_L3_IPV4);
return pkt_type;
}
*
* @param[in] rxq
* Pointer to RX queue structure.
- * @param flags
- * RX completion flags returned by poll_length_flags().
+ * @param[in] cqe
+ * Pointer to CQE.
*
* @return
* Offload flags (ol_flags) for struct rte_mbuf.
*/
static inline uint32_t
-rxq_cq_to_ol_flags(const struct rxq *rxq, uint32_t flags)
+rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe)
{
uint32_t ol_flags = 0;
+ uint8_t l3_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L3_HDR_TYPE_MASK;
+ uint8_t l4_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L4_HDR_TYPE_MASK;
+ uint8_t info = cqe->rsvd0[0];
- if (rxq->csum) {
- /* Set IP checksum flag only for IPv4/IPv6 packets. */
- if (flags &
- (IBV_EXP_CQ_RX_IPV4_PACKET | IBV_EXP_CQ_RX_IPV6_PACKET))
- ol_flags |=
- TRANSPOSE(~flags,
- IBV_EXP_CQ_RX_IP_CSUM_OK,
- PKT_RX_IP_CKSUM_BAD);
- /* Set L4 checksum flag only for TCP/UDP packets. */
- if (flags &
- (IBV_EXP_CQ_RX_TCP_PACKET | IBV_EXP_CQ_RX_UDP_PACKET))
- ol_flags |=
- TRANSPOSE(~flags,
- IBV_EXP_CQ_RX_TCP_UDP_CSUM_OK,
- PKT_RX_L4_CKSUM_BAD);
- }
+ if ((l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV4) ||
+ (l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV6))
+ ol_flags |=
+ (!(cqe->hds_ip_ext & MLX5_CQE_L3_OK) *
+ PKT_RX_IP_CKSUM_BAD);
+ if ((l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP) ||
+ (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_EMP_ACK) ||
+ (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_ACK) ||
+ (l4_hdr == MLX5_CQE_L4_HDR_TYPE_UDP))
+ ol_flags |=
+ (!(cqe->hds_ip_ext & MLX5_CQE_L4_OK) *
+ PKT_RX_L4_CKSUM_BAD);
/*
* PKT_RX_IP_CKSUM_BAD and PKT_RX_L4_CKSUM_BAD are used in place
* of PKT_RX_EIP_CKSUM_BAD because the latter is not functional
* (its value is 0).
*/
- if ((flags & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
+ if ((info & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
ol_flags |=
- TRANSPOSE(~flags,
+ TRANSPOSE(~cqe->l4_hdr_type_etc,
IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK,
PKT_RX_IP_CKSUM_BAD) |
- TRANSPOSE(~flags,
+ TRANSPOSE(~cqe->l4_hdr_type_etc,
IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK,
PKT_RX_L4_CKSUM_BAD);
return ol_flags;
}
+/**
+ * Get size of the next packet.
+ *
+ * @param rxq
+ * RX queue to fetch packet from.
+ *
+ * @return
+ * Packet size in bytes.
+ */
+static inline int __attribute__((always_inline))
+rx_poll_len(struct rxq *rxq)
+{
+ volatile struct mlx5_cqe64 *cqe;
+
+ cqe = get_cqe64(*rxq->cqes, rxq->elts_n, &rxq->cq_ci);
+ if (cqe)
+ return ntohl(cqe->byte_cnt);
+ return 0;
+}
+
/**
* DPDK callback for RX.
*
uint16_t
mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
- struct rxq *rxq = (struct rxq *)dpdk_rxq;
- struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts;
- const unsigned int elts_n = rxq->elts_n;
- unsigned int elts_head = rxq->elts_head;
- struct ibv_sge sges[pkts_n];
- unsigned int i;
+ struct rxq *rxq = dpdk_rxq;
unsigned int pkts_ret = 0;
- int ret;
+ unsigned int i;
+ unsigned int rq_ci = rxq->rq_ci;
+ const unsigned int elts_n = rxq->elts_n;
+ const unsigned int wqe_cnt = elts_n - 1;
for (i = 0; (i != pkts_n); ++i) {
- struct rxq_elt *elt = &(*elts)[elts_head];
- unsigned int len;
- struct rte_mbuf *seg = elt->buf;
+ unsigned int idx = rq_ci & wqe_cnt;
struct rte_mbuf *rep;
- uint32_t flags;
- uint16_t vlan_tci;
+ struct rte_mbuf *pkt;
+ unsigned int len;
+ volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx];
+ volatile struct mlx5_cqe64 *cqe =
+ &(*rxq->cqes)[rxq->cq_ci & wqe_cnt].cqe64;
- /* Sanity checks. */
- assert(seg != NULL);
- assert(elts_head < rxq->elts_n);
- assert(rxq->elts_head < rxq->elts_n);
- /*
- * Fetch initial bytes of packet descriptor into a
- * cacheline while allocating rep.
- */
- rte_mbuf_prefetch_part1(seg);
- rte_mbuf_prefetch_part2(seg);
- ret = rxq->poll(rxq->cq, NULL, NULL, &flags, &vlan_tci);
- if (unlikely(ret < 0)) {
- struct ibv_wc wc;
- int wcs_n;
-
- DEBUG("rxq=%p, poll_length() failed (ret=%d)",
- (void *)rxq, ret);
- /* ibv_poll_cq() must be used in case of failure. */
- wcs_n = ibv_poll_cq(rxq->cq, 1, &wc);
- if (unlikely(wcs_n == 0))
- break;
- if (unlikely(wcs_n < 0)) {
- DEBUG("rxq=%p, ibv_poll_cq() failed (wcs_n=%d)",
- (void *)rxq, wcs_n);
- break;
- }
- assert(wcs_n == 1);
- if (unlikely(wc.status != IBV_WC_SUCCESS)) {
- /* Whatever, just repost the offending WR. */
- DEBUG("rxq=%p, wr_id=%" PRIu64 ": bad work"
- " completion status (%d): %s",
- (void *)rxq, wc.wr_id, wc.status,
- ibv_wc_status_str(wc.status));
-#ifdef MLX5_PMD_SOFT_COUNTERS
- /* Increment dropped packets counter. */
- ++rxq->stats.idropped;
-#endif
- /* Add SGE to array for repost. */
- sges[i] = elt->sge;
- goto repost;
- }
- ret = wc.byte_len;
- }
- if (ret == 0)
- break;
- assert(ret >= (rxq->crc_present << 2));
- len = ret - (rxq->crc_present << 2);
+ pkt = (*rxq->elts)[idx];
+ rte_prefetch0(cqe);
rep = rte_mbuf_raw_alloc(rxq->mp);
if (unlikely(rep == NULL)) {
- /*
- * Unable to allocate a replacement mbuf,
- * repost WR.
- */
- DEBUG("rxq=%p: can't allocate a new mbuf",
- (void *)rxq);
- /* Increment out of memory counters. */
++rxq->stats.rx_nombuf;
- ++rxq->priv->dev->data->rx_mbuf_alloc_failed;
- goto repost;
+ break;
}
-
- /* Reconfigure sge to use rep instead of seg. */
- elt->sge.addr = (uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM;
- elt->buf = rep;
-
- /* Add SGE to array for repost. */
- sges[i] = elt->sge;
-
- /* Update seg information. */
- SET_DATA_OFF(seg, RTE_PKTMBUF_HEADROOM);
- NB_SEGS(seg) = 1;
- PORT(seg) = rxq->port_id;
- NEXT(seg) = NULL;
- PKT_LEN(seg) = len;
- DATA_LEN(seg) = len;
- if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip) {
- seg->packet_type = rxq_cq_to_pkt_type(flags);
- seg->ol_flags = rxq_cq_to_ol_flags(rxq, flags);
- if (flags & IBV_EXP_CQ_RX_CVLAN_STRIPPED_V1) {
- seg->ol_flags |= PKT_RX_VLAN_PKT |
+ SET_DATA_OFF(rep, RTE_PKTMBUF_HEADROOM);
+ NB_SEGS(rep) = 1;
+ PORT(rep) = rxq->port_id;
+ NEXT(rep) = NULL;
+ len = rx_poll_len(rxq);
+ if (unlikely(len == 0)) {
+ rte_mbuf_refcnt_set(rep, 0);
+ __rte_mbuf_raw_free(rep);
+ break;
+ }
+ /*
+ * Fill NIC descriptor with the new buffer. The lkey and size
+ * of the buffers are already known, only the buffer address
+ * changes.
+ */
+ wqe->addr = htonll((uintptr_t)rep->buf_addr +
+ RTE_PKTMBUF_HEADROOM);
+ (*rxq->elts)[idx] = rep;
+ /* Update pkt information. */
+ if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip |
+ rxq->crc_present) {
+ if (rxq->csum) {
+ pkt->packet_type = rxq_cq_to_pkt_type(cqe);
+ pkt->ol_flags = rxq_cq_to_ol_flags(rxq, cqe);
+ }
+ if (cqe->l4_hdr_type_etc & MLX5_CQE_VLAN_STRIPPED) {
+ pkt->ol_flags |= PKT_RX_VLAN_PKT |
PKT_RX_VLAN_STRIPPED;
- seg->vlan_tci = vlan_tci;
+ pkt->vlan_tci = ntohs(cqe->vlan_info);
}
+ if (rxq->crc_present)
+ len -= ETHER_CRC_LEN;
}
- /* Return packet. */
- *(pkts++) = seg;
- ++pkts_ret;
+ PKT_LEN(pkt) = len;
+ DATA_LEN(pkt) = len;
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment bytes counter. */
rxq->stats.ibytes += len;
#endif
-repost:
- if (++elts_head >= elts_n)
- elts_head = 0;
- continue;
+ /* Return packet. */
+ *(pkts++) = pkt;
+ ++pkts_ret;
+ ++rq_ci;
}
- if (unlikely(i == 0))
+ if (unlikely((i == 0) && (rq_ci == rxq->rq_ci)))
return 0;
/* Repost WRs. */
#ifdef DEBUG_RECV
DEBUG("%p: reposting %u WRs", (void *)rxq, i);
#endif
- ret = rxq->recv(rxq->wq, sges, i);
- if (unlikely(ret)) {
- /* Inability to repost WRs is fatal. */
- DEBUG("%p: recv_burst(): failed (ret=%d)",
- (void *)rxq->priv,
- ret);
- abort();
- }
- rxq->elts_head = elts_head;
+ /* Update the consumer index. */
+ rxq->rq_ci = rq_ci;
+ rte_wmb();
+ *rxq->cq_db = htonl(rxq->cq_ci);
+ rte_wmb();
+ *rxq->rq_db = htonl(rxq->rq_ci);
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment packets counter. */
rxq->stats.ipackets += pkts_ret;