/* Verbs header. */
/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
#endif
#include <infiniband/verbs.h>
#include <infiniband/arch.h>
#include <infiniband/mlx5_hw.h>
#ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
#endif
/* DPDK headers don't like -pedantic. */
#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-pedantic"
+#pragma GCC diagnostic ignored "-Wpedantic"
#endif
#include <rte_mbuf.h>
#include <rte_malloc.h>
#include <rte_ethdev.h>
#include <rte_common.h>
#ifdef PEDANTIC
-#pragma GCC diagnostic error "-pedantic"
+#pragma GCC diagnostic error "-Wpedantic"
#endif
#include "mlx5.h"
static enum hash_rxq_type
hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos)
{
- enum hash_rxq_type type = 0;
+ enum hash_rxq_type type = HASH_RXQ_TCPV4;
assert(pos < table->hash_types_n);
do {
int
priv_rehash_flows(struct priv *priv)
{
- unsigned int i;
+ enum hash_rxq_flow_type i;
- for (i = 0; (i != RTE_DIM((*priv->hash_rxqs)[0].special_flow)); ++i)
+ for (i = HASH_RXQ_FLOW_TYPE_PROMISC;
+ i != RTE_DIM((*priv->hash_rxqs)[0].special_flow);
+ ++i)
if (!priv_allow_flow_type(priv, i)) {
priv_special_flow_disable(priv, i);
} else {
*/
static int
rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n,
- struct rte_mbuf **pool)
+ struct rte_mbuf *(*pool)[])
{
+ const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
unsigned int i;
int ret = 0;
- /* For each WR (packet). */
+ /* Iterate on segments. */
for (i = 0; (i != elts_n); ++i) {
struct rte_mbuf *buf;
volatile struct mlx5_wqe_data_seg *scat =
&(*rxq_ctrl->rxq.wqes)[i];
if (pool != NULL) {
- buf = *(pool++);
+ buf = (*pool)[i];
assert(buf != NULL);
rte_pktmbuf_reset(buf);
+ rte_pktmbuf_refcnt_update(buf, 1);
} else
buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
if (buf == NULL) {
assert(rte_pktmbuf_data_len(buf) == 0);
assert(rte_pktmbuf_pkt_len(buf) == 0);
assert(!buf->next);
+ /* Only the first segment keeps headroom. */
+ if (i % sges_n)
+ SET_DATA_OFF(buf, 0);
PORT(buf) = rxq_ctrl->rxq.port_id;
DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
PKT_LEN(buf) = DATA_LEN(buf);
};
(*rxq_ctrl->rxq.elts)[i] = buf;
}
- DEBUG("%p: allocated and configured %u single-segment WRs",
- (void *)rxq_ctrl, elts_n);
+ DEBUG("%p: allocated and configured %u segments (max %u packets)",
+ (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n));
assert(ret == 0);
return 0;
error:
if (rxq_ctrl->rxq.elts == NULL)
return;
- for (i = 0; (i != rxq_ctrl->rxq.elts_n); ++i) {
+ for (i = 0; (i != (1u << rxq_ctrl->rxq.elts_n)); ++i) {
if ((*rxq_ctrl->rxq.elts)[i] != NULL)
rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
(*rxq_ctrl->rxq.elts)[i] = NULL;
DEBUG("cleaning up %p", (void *)rxq_ctrl);
rxq_free_elts(rxq_ctrl);
+ if (rxq_ctrl->fdir_queue != NULL)
+ priv_fdir_queue_destroy(rxq_ctrl->priv, rxq_ctrl->fdir_queue);
if (rxq_ctrl->if_wq != NULL) {
assert(rxq_ctrl->priv != NULL);
assert(rxq_ctrl->priv->ctx != NULL);
}
/**
- * Reconfigure a RX queue with new parameters.
+ * Reconfigure RX queue buffers.
*
* rxq_rehash() does not allocate mbufs, which, if not done from the right
* thread (such as a control thread), may corrupt the pool.
int
rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl)
{
- struct priv *priv = rxq_ctrl->priv;
- struct rxq_ctrl tmpl = *rxq_ctrl;
- unsigned int mbuf_n;
- unsigned int desc_n;
- struct rte_mbuf **pool;
- unsigned int i, k;
+ unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
+ unsigned int i;
struct ibv_exp_wq_attr mod;
int err;
- DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq_ctrl);
- /* Number of descriptors and mbufs currently allocated. */
- desc_n = tmpl.rxq.elts_n;
- mbuf_n = desc_n;
- /* Toggle RX checksum offload if hardware supports it. */
- if (priv->hw_csum) {
- tmpl.rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
- rxq_ctrl->rxq.csum = tmpl.rxq.csum;
- }
- if (priv->hw_csum_l2tun) {
- tmpl.rxq.csum_l2tun =
- !!dev->data->dev_conf.rxmode.hw_ip_checksum;
- rxq_ctrl->rxq.csum_l2tun = tmpl.rxq.csum_l2tun;
- }
+ DEBUG("%p: rehashing queue %p with %u SGE(s) per packet",
+ (void *)dev, (void *)rxq_ctrl, 1 << rxq_ctrl->rxq.sges_n);
+ assert(!(elts_n % (1 << rxq_ctrl->rxq.sges_n)));
/* From now on, any failure will render the queue unusable.
* Reinitialize WQ. */
mod = (struct ibv_exp_wq_attr){
.attr_mask = IBV_EXP_WQ_ATTR_STATE,
.wq_state = IBV_EXP_WQS_RESET,
};
- err = ibv_exp_modify_wq(tmpl.wq, &mod);
+ err = ibv_exp_modify_wq(rxq_ctrl->wq, &mod);
if (err) {
ERROR("%p: cannot reset WQ: %s", (void *)dev, strerror(err));
assert(err > 0);
return err;
}
- /* Allocate pool. */
- pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0);
- if (pool == NULL) {
- ERROR("%p: cannot allocate memory", (void *)dev);
- return ENOBUFS;
- }
/* Snatch mbufs from original queue. */
- k = 0;
- for (i = 0; (i != desc_n); ++i)
- pool[k++] = (*rxq_ctrl->rxq.elts)[i];
- assert(k == mbuf_n);
- rte_free(pool);
+ claim_zero(rxq_alloc_elts(rxq_ctrl, elts_n, rxq_ctrl->rxq.elts));
+ for (i = 0; i != elts_n; ++i) {
+ struct rte_mbuf *buf = (*rxq_ctrl->rxq.elts)[i];
+
+ assert(rte_mbuf_refcnt_read(buf) == 2);
+ rte_pktmbuf_free_seg(buf);
+ }
/* Change queue state to ready. */
mod = (struct ibv_exp_wq_attr){
.attr_mask = IBV_EXP_WQ_ATTR_STATE,
.wq_state = IBV_EXP_WQS_RDY,
};
- err = ibv_exp_modify_wq(tmpl.wq, &mod);
+ err = ibv_exp_modify_wq(rxq_ctrl->wq, &mod);
if (err) {
ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
(void *)dev, strerror(err));
goto error;
}
- /* Post SGEs. */
- err = rxq_alloc_elts(&tmpl, desc_n, pool);
- if (err) {
- ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
- rte_free(pool);
- assert(err > 0);
- return err;
- }
/* Update doorbell counter. */
- rxq_ctrl->rxq.rq_ci = desc_n;
+ rxq_ctrl->rxq.rq_ci = elts_n >> rxq_ctrl->rxq.sges_n;
rte_wmb();
*rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci);
error:
- *rxq_ctrl = tmpl;
assert(err >= 0);
return err;
}
*
* @param tmpl
* Pointer to RX queue control template.
- * @param rxq_ctrl
- * Pointer to RX queue control.
*
* @return
* 0 on success, errno value on failure.
*/
static inline int
-rxq_setup(struct rxq_ctrl *tmpl, struct rxq_ctrl *rxq_ctrl)
+rxq_setup(struct rxq_ctrl *tmpl)
{
struct ibv_cq *ibcq = tmpl->cq;
struct mlx5_cq *cq = to_mxxx(cq, cq);
struct mlx5_rwq *rwq = container_of(tmpl->wq, struct mlx5_rwq, wq);
+ struct rte_mbuf *(*elts)[1 << tmpl->rxq.elts_n] =
+ rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket);
if (cq->cqe_sz != RTE_CACHE_LINE_SIZE) {
ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
"it should be set to %u", RTE_CACHE_LINE_SIZE);
return EINVAL;
}
+ if (elts == NULL)
+ return ENOMEM;
tmpl->rxq.rq_db = rwq->rq.db;
+ tmpl->rxq.cqe_n = ibcq->cqe + 1;
tmpl->rxq.cq_ci = 0;
tmpl->rxq.rq_ci = 0;
tmpl->rxq.cq_db = cq->dbrec;
tmpl->rxq.cqes =
(volatile struct mlx5_cqe (*)[])
(uintptr_t)cq->active_buf->buf;
- tmpl->rxq.elts =
- (struct rte_mbuf *(*)[tmpl->rxq.elts_n])
- ((uintptr_t)rxq_ctrl + sizeof(*rxq_ctrl));
+ tmpl->rxq.elts = elts;
return 0;
}
.priv = priv,
.socket = socket,
.rxq = {
- .elts_n = desc,
+ .elts_n = log2above(desc),
.mp = mp,
},
};
} attr;
enum ibv_exp_query_intf_status status;
unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
+ unsigned int cqe_n = desc - 1;
+ struct rte_mbuf *(*elts)[desc] = NULL;
int ret = 0;
(void)conf; /* Thresholds configuration (ignored). */
- if (desc == 0) {
- ERROR("%p: invalid number of RX descriptors (must be a"
- " multiple of 2)", (void *)dev);
+ /* Enable scattered packets support for this queue if necessary. */
+ assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+ if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
+ (dev->data->dev_conf.rxmode.max_rx_pkt_len >
+ (mb_len - RTE_PKTMBUF_HEADROOM))) {
+ unsigned int size =
+ RTE_PKTMBUF_HEADROOM +
+ dev->data->dev_conf.rxmode.max_rx_pkt_len;
+ unsigned int sges_n;
+
+ /*
+ * Determine the number of SGEs needed for a full packet
+ * and round it to the next power of two.
+ */
+ sges_n = log2above((size / mb_len) + !!(size % mb_len));
+ tmpl.rxq.sges_n = sges_n;
+ /* Make sure rxq.sges_n did not overflow. */
+ size = mb_len * (1 << tmpl.rxq.sges_n);
+ size -= RTE_PKTMBUF_HEADROOM;
+ if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
+ ERROR("%p: too many SGEs (%u) needed to handle"
+ " requested maximum packet size %u",
+ (void *)dev,
+ 1 << sges_n,
+ dev->data->dev_conf.rxmode.max_rx_pkt_len);
+ return EOVERFLOW;
+ }
+ }
+ DEBUG("%p: maximum number of segments per packet: %u",
+ (void *)dev, 1 << tmpl.rxq.sges_n);
+ if (desc % (1 << tmpl.rxq.sges_n)) {
+ ERROR("%p: number of RX queue descriptors (%u) is not a"
+ " multiple of SGEs per packet (%u)",
+ (void *)dev,
+ desc,
+ 1 << tmpl.rxq.sges_n);
return EINVAL;
}
/* Toggle RX checksum offload if hardware supports it. */
if (priv->hw_csum_l2tun)
tmpl.rxq.csum_l2tun =
!!dev->data->dev_conf.rxmode.hw_ip_checksum;
- (void)mb_len; /* I'll be back! */
/* Use the entire RX mempool as the memory region. */
tmpl.mr = mlx5_mp2mr(priv->pd, mp);
if (tmpl.mr == NULL) {
.comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
.res_domain = tmpl.rd,
};
- tmpl.cq = ibv_exp_create_cq(priv->ctx, desc - 1, NULL, NULL, 0,
+ if (priv->cqe_comp) {
+ attr.cq.comp_mask |= IBV_EXP_CQ_INIT_ATTR_FLAGS;
+ attr.cq.flags |= IBV_EXP_CQ_COMPRESSED_CQE;
+ cqe_n = (desc * 2) - 1; /* Double the number of CQEs. */
+ }
+ tmpl.cq = ibv_exp_create_cq(priv->ctx, cqe_n, NULL, NULL, 0,
&attr.cq);
if (tmpl.cq == NULL) {
ret = ENOMEM;
.wq_context = NULL, /* Could be useful in the future. */
.wq_type = IBV_EXP_WQT_RQ,
/* Max number of outstanding WRs. */
- .max_recv_wr = ((priv->device_attr.max_qp_wr < (int)desc) ?
- priv->device_attr.max_qp_wr :
- (int)desc),
+ .max_recv_wr = desc >> tmpl.rxq.sges_n,
/* Max number of scatter/gather elements in a WR. */
- .max_recv_sge = 1,
+ .max_recv_sge = 1 << tmpl.rxq.sges_n,
.pd = priv->pd,
.cq = tmpl.cq,
.comp_mask =
(void *)dev, strerror(ret));
goto error;
}
+ /*
+ * Make sure number of WRs*SGEs match expectations since a queue
+ * cannot allocate more than "desc" buffers.
+ */
+ if (((int)attr.wq.max_recv_wr != (desc >> tmpl.rxq.sges_n)) ||
+ ((int)attr.wq.max_recv_sge != (1 << tmpl.rxq.sges_n))) {
+ ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
+ (void *)dev,
+ (desc >> tmpl.rxq.sges_n), (1 << tmpl.rxq.sges_n),
+ attr.wq.max_recv_wr, attr.wq.max_recv_sge);
+ ret = EINVAL;
+ goto error;
+ }
/* Save port ID. */
tmpl.rxq.port_id = dev->data->port_id;
DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id);
(void *)dev, strerror(ret));
goto error;
}
- ret = rxq_setup(&tmpl, rxq_ctrl);
+ ret = rxq_setup(&tmpl);
if (ret) {
ERROR("%p: cannot initialize RX queue structure: %s",
(void *)dev, strerror(ret));
goto error;
}
- ret = rxq_alloc_elts(&tmpl, desc, NULL);
+ /* Reuse buffers from original queue if possible. */
+ if (rxq_ctrl->rxq.elts_n) {
+ assert(1 << rxq_ctrl->rxq.elts_n == desc);
+ assert(rxq_ctrl->rxq.elts != tmpl.rxq.elts);
+ ret = rxq_alloc_elts(&tmpl, desc, rxq_ctrl->rxq.elts);
+ } else
+ ret = rxq_alloc_elts(&tmpl, desc, NULL);
if (ret) {
ERROR("%p: RXQ allocation failed: %s",
(void *)dev, strerror(ret));
/* Clean up rxq in case we're reinitializing it. */
DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq_ctrl);
rxq_cleanup(rxq_ctrl);
+ /* Move mbuf pointers to dedicated storage area in RX queue. */
+ elts = (void *)(rxq_ctrl + 1);
+ rte_memcpy(elts, tmpl.rxq.elts, sizeof(*elts));
+#ifndef NDEBUG
+ memset(tmpl.rxq.elts, 0x55, sizeof(*elts));
+#endif
+ rte_free(tmpl.rxq.elts);
+ tmpl.rxq.elts = elts;
*rxq_ctrl = tmpl;
/* Update doorbell counter. */
- rxq_ctrl->rxq.rq_ci = desc;
+ rxq_ctrl->rxq.rq_ci = desc >> rxq_ctrl->rxq.sges_n;
rte_wmb();
*rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci);
DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
assert(ret == 0);
return 0;
error:
+ elts = tmpl.rxq.elts;
rxq_cleanup(&tmpl);
+ rte_free(elts);
assert(ret > 0);
return ret;
}