+
+/**
+ * Create a DPDK Rx queue.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param idx
+ * TX queue index.
+ * @param desc
+ * Number of descriptors to configure in queue.
+ * @param socket
+ * NUMA socket on which memory must be allocated.
+ *
+ * @return
+ * A DPDK queue object on success.
+ */
+struct mlx5_rxq_ctrl*
+mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
+ unsigned int socket, const struct rte_eth_rxconf *conf,
+ struct rte_mempool *mp)
+{
+ struct rte_eth_dev *dev = priv->dev;
+ struct mlx5_rxq_ctrl *tmpl;
+ unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
+ struct mlx5_dev_config *config = &priv->config;
+ /*
+ * Always allocate extra slots, even if eventually
+ * the vector Rx will not be used.
+ */
+ const uint16_t desc_n =
+ desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
+
+ tmpl = rte_calloc_socket("RXQ", 1,
+ sizeof(*tmpl) +
+ desc_n * sizeof(struct rte_mbuf *),
+ 0, socket);
+ if (!tmpl)
+ return NULL;
+ tmpl->socket = socket;
+ if (priv->dev->data->dev_conf.intr_conf.rxq)
+ tmpl->irq = 1;
+ /* Enable scattered packets support for this queue if necessary. */
+ assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+ if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
+ (mb_len - RTE_PKTMBUF_HEADROOM)) {
+ tmpl->rxq.sges_n = 0;
+ } else if (conf->offloads & DEV_RX_OFFLOAD_SCATTER) {
+ unsigned int size =
+ RTE_PKTMBUF_HEADROOM +
+ dev->data->dev_conf.rxmode.max_rx_pkt_len;
+ unsigned int sges_n;
+
+ /*
+ * Determine the number of SGEs needed for a full packet
+ * and round it to the next power of two.
+ */
+ sges_n = log2above((size / mb_len) + !!(size % mb_len));
+ tmpl->rxq.sges_n = sges_n;
+ /* Make sure rxq.sges_n did not overflow. */
+ size = mb_len * (1 << tmpl->rxq.sges_n);
+ size -= RTE_PKTMBUF_HEADROOM;
+ if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
+ ERROR("%p: too many SGEs (%u) needed to handle"
+ " requested maximum packet size %u",
+ (void *)dev,
+ 1 << sges_n,
+ dev->data->dev_conf.rxmode.max_rx_pkt_len);
+ goto error;
+ }
+ } else {
+ WARN("%p: the requested maximum Rx packet size (%u) is"
+ " larger than a single mbuf (%u) and scattered"
+ " mode has not been requested",
+ (void *)dev,
+ dev->data->dev_conf.rxmode.max_rx_pkt_len,
+ mb_len - RTE_PKTMBUF_HEADROOM);
+ }
+ DEBUG("%p: maximum number of segments per packet: %u",
+ (void *)dev, 1 << tmpl->rxq.sges_n);
+ if (desc % (1 << tmpl->rxq.sges_n)) {
+ ERROR("%p: number of RX queue descriptors (%u) is not a"
+ " multiple of SGEs per packet (%u)",
+ (void *)dev,
+ desc,
+ 1 << tmpl->rxq.sges_n);
+ goto error;
+ }
+ /* Toggle RX checksum offload if hardware supports it. */
+ tmpl->rxq.csum = !!(conf->offloads & DEV_RX_OFFLOAD_CHECKSUM);
+ tmpl->rxq.csum_l2tun = (!!(conf->offloads & DEV_RX_OFFLOAD_CHECKSUM) &&
+ priv->config.hw_csum_l2tun);
+ tmpl->rxq.hw_timestamp = !!(conf->offloads & DEV_RX_OFFLOAD_TIMESTAMP);
+ /* Configure VLAN stripping. */
+ tmpl->rxq.vlan_strip = !!(conf->offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
+ /* By default, FCS (CRC) is stripped by hardware. */
+ if (conf->offloads & DEV_RX_OFFLOAD_CRC_STRIP) {
+ tmpl->rxq.crc_present = 0;
+ } else if (config->hw_fcs_strip) {
+ tmpl->rxq.crc_present = 1;
+ } else {
+ WARN("%p: CRC stripping has been disabled but will still"
+ " be performed by hardware, make sure MLNX_OFED and"
+ " firmware are up to date",
+ (void *)dev);
+ tmpl->rxq.crc_present = 0;
+ }
+ DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
+ " incoming frames to hide it",
+ (void *)dev,
+ tmpl->rxq.crc_present ? "disabled" : "enabled",
+ tmpl->rxq.crc_present << 2);
+ /* Save port ID. */
+ tmpl->rxq.rss_hash = priv->rxqs_n > 1;
+ tmpl->rxq.port_id = dev->data->port_id;
+ tmpl->priv = priv;
+ tmpl->rxq.mp = mp;
+ tmpl->rxq.stats.idx = idx;
+ tmpl->rxq.elts_n = log2above(desc);
+ tmpl->rxq.elts =
+ (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
+ rte_atomic32_inc(&tmpl->refcnt);
+ DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
+ (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
+ LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
+ return tmpl;
+error:
+ rte_free(tmpl);
+ return NULL;
+}
+
+/**
+ * Get a Rx queue.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param idx
+ * TX queue index.
+ *
+ * @return
+ * A pointer to the queue if it exists.
+ */
+struct mlx5_rxq_ctrl*
+mlx5_priv_rxq_get(struct priv *priv, uint16_t idx)
+{
+ struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
+
+ if ((*priv->rxqs)[idx]) {
+ rxq_ctrl = container_of((*priv->rxqs)[idx],
+ struct mlx5_rxq_ctrl,
+ rxq);
+
+ mlx5_priv_rxq_ibv_get(priv, idx);
+ rte_atomic32_inc(&rxq_ctrl->refcnt);
+ DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
+ (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
+ }
+ return rxq_ctrl;
+}
+
+/**
+ * Release a Rx queue.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param idx
+ * TX queue index.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+mlx5_priv_rxq_release(struct priv *priv, uint16_t idx)
+{
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+
+ if (!(*priv->rxqs)[idx])
+ return 0;
+ rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
+ assert(rxq_ctrl->priv);
+ if (rxq_ctrl->ibv) {
+ int ret;
+
+ ret = mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
+ if (!ret)
+ rxq_ctrl->ibv = NULL;
+ }
+ DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
+ (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
+ if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
+ LIST_REMOVE(rxq_ctrl, next);
+ rte_free(rxq_ctrl);
+ (*priv->rxqs)[idx] = NULL;
+ return 0;
+ }
+ return EBUSY;
+}
+
+/**
+ * Verify if the queue can be released.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param idx
+ * TX queue index.
+ *
+ * @return
+ * 1 if the queue can be released.
+ */
+int
+mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx)
+{
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+
+ if (!(*priv->rxqs)[idx])
+ return -1;
+ rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
+ return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_rxq_verify(struct priv *priv)
+{
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+ int ret = 0;
+
+ LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
+ DEBUG("%p: Rx Queue %p still referenced", (void *)priv,
+ (void *)rxq_ctrl);
+ ++ret;
+ }
+ return ret;
+}
+
+/**
+ * Create an indirection table.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param queues
+ * Queues entering in the indirection table.
+ * @param queues_n
+ * Number of queues in the array.
+ *
+ * @return
+ * A new indirection table.
+ */
+struct mlx5_ind_table_ibv*
+mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
+ uint16_t queues_n)
+{
+ struct mlx5_ind_table_ibv *ind_tbl;
+ const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
+ log2above(queues_n) :
+ log2above(priv->ind_table_max_size);
+ struct ibv_wq *wq[1 << wq_n];
+ unsigned int i;
+ unsigned int j;
+
+ ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
+ queues_n * sizeof(uint16_t), 0);
+ if (!ind_tbl)
+ return NULL;
+ for (i = 0; i != queues_n; ++i) {
+ struct mlx5_rxq_ctrl *rxq =
+ mlx5_priv_rxq_get(priv, queues[i]);
+
+ if (!rxq)
+ goto error;
+ wq[i] = rxq->ibv->wq;
+ ind_tbl->queues[i] = queues[i];
+ }
+ ind_tbl->queues_n = queues_n;
+ /* Finalise indirection table. */
+ for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j)
+ wq[i] = wq[j];
+ ind_tbl->ind_table = ibv_create_rwq_ind_table(
+ priv->ctx,
+ &(struct ibv_rwq_ind_table_init_attr){
+ .log_ind_tbl_size = wq_n,
+ .ind_tbl = wq,
+ .comp_mask = 0,
+ });
+ if (!ind_tbl->ind_table)
+ goto error;
+ rte_atomic32_inc(&ind_tbl->refcnt);
+ LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
+ DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+ (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+ return ind_tbl;
+error:
+ rte_free(ind_tbl);
+ DEBUG("%p cannot create indirection table", (void *)priv);
+ return NULL;
+}
+
+/**
+ * Get an indirection table.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param queues
+ * Queues entering in the indirection table.
+ * @param queues_n
+ * Number of queues in the array.
+ *
+ * @return
+ * An indirection table if found.
+ */
+struct mlx5_ind_table_ibv*
+mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
+ uint16_t queues_n)
+{
+ struct mlx5_ind_table_ibv *ind_tbl;
+
+ LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
+ if ((ind_tbl->queues_n == queues_n) &&
+ (memcmp(ind_tbl->queues, queues,
+ ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
+ == 0))
+ break;
+ }
+ if (ind_tbl) {
+ unsigned int i;
+
+ rte_atomic32_inc(&ind_tbl->refcnt);
+ DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+ (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+ for (i = 0; i != ind_tbl->queues_n; ++i)
+ mlx5_priv_rxq_get(priv, ind_tbl->queues[i]);
+ }
+ return ind_tbl;
+}
+
+/**
+ * Release an indirection table.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param ind_table
+ * Indirection table to release.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+mlx5_priv_ind_table_ibv_release(struct priv *priv,
+ struct mlx5_ind_table_ibv *ind_tbl)
+{
+ unsigned int i;
+
+ DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+ (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+ if (rte_atomic32_dec_and_test(&ind_tbl->refcnt))
+ claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table));
+ for (i = 0; i != ind_tbl->queues_n; ++i)
+ claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i]));
+ if (!rte_atomic32_read(&ind_tbl->refcnt)) {
+ LIST_REMOVE(ind_tbl, next);
+ rte_free(ind_tbl);
+ return 0;
+ }
+ return EBUSY;
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_ind_table_ibv_verify(struct priv *priv)
+{
+ struct mlx5_ind_table_ibv *ind_tbl;
+ int ret = 0;
+
+ LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
+ DEBUG("%p: Verbs indirection table %p still referenced",
+ (void *)priv, (void *)ind_tbl);
+ ++ret;
+ }
+ return ret;
+}
+
+/**
+ * Create an Rx Hash queue.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param rss_key
+ * RSS key for the Rx hash queue.
+ * @param rss_key_len
+ * RSS key length.
+ * @param hash_fields
+ * Verbs protocol hash field to make the RSS on.
+ * @param queues
+ * Queues entering in hash queue. In case of empty hash_fields only the
+ * first queue index will be taken for the indirection table.
+ * @param queues_n
+ * Number of queues.
+ *
+ * @return
+ * An hash Rx queue on success.
+ */
+struct mlx5_hrxq*
+mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
+ uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
+{
+ struct mlx5_hrxq *hrxq;
+ struct mlx5_ind_table_ibv *ind_tbl;
+ struct ibv_qp *qp;
+
+ queues_n = hash_fields ? queues_n : 1;
+ ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
+ if (!ind_tbl)
+ ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n);
+ if (!ind_tbl)
+ return NULL;
+ qp = ibv_create_qp_ex(
+ priv->ctx,
+ &(struct ibv_qp_init_attr_ex){
+ .qp_type = IBV_QPT_RAW_PACKET,
+ .comp_mask =
+ IBV_QP_INIT_ATTR_PD |
+ IBV_QP_INIT_ATTR_IND_TABLE |
+ IBV_QP_INIT_ATTR_RX_HASH,
+ .rx_hash_conf = (struct ibv_rx_hash_conf){
+ .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
+ .rx_hash_key_len = rss_key_len,
+ .rx_hash_key = rss_key,
+ .rx_hash_fields_mask = hash_fields,
+ },
+ .rwq_ind_tbl = ind_tbl->ind_table,
+ .pd = priv->pd,
+ });
+ if (!qp)
+ goto error;
+ hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0);
+ if (!hrxq)
+ goto error;
+ hrxq->ind_table = ind_tbl;
+ hrxq->qp = qp;
+ hrxq->rss_key_len = rss_key_len;
+ hrxq->hash_fields = hash_fields;
+ memcpy(hrxq->rss_key, rss_key, rss_key_len);
+ rte_atomic32_inc(&hrxq->refcnt);
+ LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
+ DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
+ (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+ return hrxq;
+error:
+ mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
+ if (qp)
+ claim_zero(ibv_destroy_qp(qp));
+ return NULL;
+}
+
+/**
+ * Get an Rx Hash queue.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param rss_conf
+ * RSS configuration for the Rx hash queue.
+ * @param queues
+ * Queues entering in hash queue. In case of empty hash_fields only the
+ * first queue index will be taken for the indirection table.
+ * @param queues_n
+ * Number of queues.
+ *
+ * @return
+ * An hash Rx queue on success.
+ */
+struct mlx5_hrxq*
+mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
+ uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
+{
+ struct mlx5_hrxq *hrxq;
+
+ queues_n = hash_fields ? queues_n : 1;
+ LIST_FOREACH(hrxq, &priv->hrxqs, next) {
+ struct mlx5_ind_table_ibv *ind_tbl;
+
+ if (hrxq->rss_key_len != rss_key_len)
+ continue;
+ if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
+ continue;
+ if (hrxq->hash_fields != hash_fields)
+ continue;
+ ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
+ if (!ind_tbl)
+ continue;
+ if (ind_tbl != hrxq->ind_table) {
+ mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
+ continue;
+ }
+ rte_atomic32_inc(&hrxq->refcnt);
+ DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
+ (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+ return hrxq;
+ }
+ return NULL;
+}
+
+/**
+ * Release the hash Rx queue.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param hrxq
+ * Pointer to Hash Rx queue to release.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq)
+{
+ DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
+ (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+ if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
+ claim_zero(ibv_destroy_qp(hrxq->qp));
+ mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table);
+ LIST_REMOVE(hrxq, next);
+ rte_free(hrxq);
+ return 0;
+ }
+ claim_nonzero(mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table));
+ return EBUSY;
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_hrxq_ibv_verify(struct priv *priv)
+{
+ struct mlx5_hrxq *hrxq;
+ int ret = 0;
+
+ LIST_FOREACH(hrxq, &priv->hrxqs, next) {
+ DEBUG("%p: Verbs Hash Rx queue %p still referenced",
+ (void *)priv, (void *)hrxq);
+ ++ret;
+ }
+ return ret;
+}