]> git.droids-corp.org - dpdk.git/commitdiff
net/mlx5: support shared Rx queue
authorXueming Li <xuemingl@nvidia.com>
Thu, 4 Nov 2021 12:33:19 +0000 (20:33 +0800)
committerRaslan Darawsheh <rasland@nvidia.com>
Thu, 4 Nov 2021 21:55:50 +0000 (22:55 +0100)
This patch introduces shared RxQ. All shared Rx queues with same group
and queue ID share the same rxq_ctrl. Rxq_ctrl and rxq_data are shared,
all queues from different member port share same WQ and CQ, essentially
one Rx WQ, mbufs are filled into this singleton WQ.

Shared rxq_data is set into device Rx queues of all member ports as
RxQ object, used for receiving packets. Polling queue of any member
ports returns packets of any member, mbuf->port is used to identify
source port.

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
doc/guides/nics/features/mlx5.ini
doc/guides/nics/mlx5.rst
drivers/net/mlx5/linux/mlx5_os.c
drivers/net/mlx5/linux/mlx5_verbs.c
drivers/net/mlx5/mlx5.h
drivers/net/mlx5/mlx5_devx.c
drivers/net/mlx5/mlx5_ethdev.c
drivers/net/mlx5/mlx5_rx.h
drivers/net/mlx5/mlx5_rxq.c
drivers/net/mlx5/mlx5_trigger.c

index c10e5dd1722330b26a7408102f9feea945a0801a..845d2d4a97003eca4ac925d7dd7009bcfe7e0cb3 100644 (file)
@@ -11,6 +11,7 @@ Removal event        = Y
 Rx interrupt         = Y
 Fast mbuf free       = Y
 Queue start/stop     = Y
+Shared Rx queue      = Y
 Burst mode info      = Y
 Power mgmt address monitor = Y
 MTU update           = Y
index bb92520dff4308d9db75a96f160753fc2b0b2975..c8fa1f8f17ccdcb66dcd2050b7e8f601a76cb95e 100644 (file)
@@ -60,6 +60,7 @@ Features
 
 - Multi arch support: x86_64, POWER8, ARMv8, i686.
 - Multiple TX and RX queues.
+- Shared Rx queue.
 - Support for scattered TX frames.
 - Advanced support for scattered Rx frames with tunable buffer attributes.
 - IPv4, IPv6, TCPv4, TCPv6, UDPv4 and UDPv6 RSS on any number of queues.
@@ -142,6 +143,11 @@ Limitations
     process. If the external memory is registered by primary process but has
     different virtual address in secondary process, unexpected error may happen.
 
+- Shared Rx queue:
+
+  - Counters of received packets and bytes number of devices in same share group are same.
+  - Counters of received packets and bytes number of queues in same group and queue ID are same.
+
 - When using Verbs flow engine (``dv_flow_en`` = 0), flow pattern without any
   specific VLAN will match for VLAN packets as well:
 
index f51da8c3a383a446a56f687954c40c090c6eb5d4..e0304b685e519025b08ca2ea5739b2b90c82eb47 100644 (file)
@@ -420,6 +420,7 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv)
                        mlx5_glue->dr_create_flow_action_default_miss();
        if (!sh->default_miss_action)
                DRV_LOG(WARNING, "Default miss action is not supported.");
+       LIST_INIT(&sh->shared_rxqs);
        return 0;
 error:
        /* Rollback the created objects. */
@@ -494,6 +495,7 @@ mlx5_os_free_shared_dr(struct mlx5_priv *priv)
        MLX5_ASSERT(sh && sh->refcnt);
        if (sh->refcnt > 1)
                return;
+       MLX5_ASSERT(LIST_EMPTY(&sh->shared_rxqs));
 #ifdef HAVE_MLX5DV_DR
        if (sh->rx_domain) {
                mlx5_glue->dr_destroy_domain(sh->rx_domain);
index f78916c868fc1f1666972e8f4f3b36b2542b4665..9d299542614aafb371538042f7391402739ba130 100644 (file)
@@ -424,14 +424,16 @@ mlx5_rxq_ibv_obj_release(struct mlx5_rxq_priv *rxq)
 {
        struct mlx5_rxq_obj *rxq_obj = rxq->ctrl->obj;
 
-       MLX5_ASSERT(rxq_obj);
-       MLX5_ASSERT(rxq_obj->wq);
-       MLX5_ASSERT(rxq_obj->ibv_cq);
+       if (rxq_obj == NULL || rxq_obj->wq == NULL)
+               return;
        claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
+       rxq_obj->wq = NULL;
+       MLX5_ASSERT(rxq_obj->ibv_cq);
        claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq));
        if (rxq_obj->ibv_channel)
                claim_zero(mlx5_glue->destroy_comp_channel
                                                        (rxq_obj->ibv_channel));
+       rxq->ctrl->started = false;
 }
 
 /**
index a037a33debf8ce4978768194ba8eceec48d420b5..51f45788381eff1e20595a4947785977f606e962 100644 (file)
@@ -1200,6 +1200,7 @@ struct mlx5_dev_ctx_shared {
        struct mlx5_ecpri_parser_profile ecpri_parser;
        /* Flex parser profiles information. */
        void *devx_rx_uar; /* DevX UAR for Rx. */
+       LIST_HEAD(shared_rxqs, mlx5_rxq_ctrl) shared_rxqs; /* Shared RXQs. */
        struct mlx5_aso_age_mng *aso_age_mng;
        /* Management data for aging mechanism using ASO Flow Hit. */
        struct mlx5_geneve_tlv_option_resource *geneve_tlv_option_resource;
@@ -1267,6 +1268,7 @@ struct mlx5_rxq_obj {
                };
                struct mlx5_devx_obj *rq; /* DevX RQ object for hairpin. */
                struct {
+                       struct mlx5_devx_rmp devx_rmp; /* RMP for shared RQ. */
                        struct mlx5_devx_cq cq_obj; /* DevX CQ object. */
                        void *devx_channel;
                };
index 668d47025e8d2e59774570f4f61207b556354f63..d3d189ab7f2e645bb32b0241b6472a57fcba6dac 100644 (file)
@@ -88,6 +88,8 @@ mlx5_devx_modify_rq(struct mlx5_rxq_priv *rxq, uint8_t type)
        default:
                break;
        }
+       if (rxq->ctrl->type == MLX5_RXQ_TYPE_HAIRPIN)
+               return mlx5_devx_cmd_modify_rq(rxq->ctrl->obj->rq, &rq_attr);
        return mlx5_devx_cmd_modify_rq(rxq->devx_rq.rq, &rq_attr);
 }
 
@@ -156,18 +158,21 @@ mlx5_txq_devx_modify(struct mlx5_txq_obj *obj, enum mlx5_txq_modify_type type,
 static void
 mlx5_rxq_devx_obj_release(struct mlx5_rxq_priv *rxq)
 {
-       struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl;
-       struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
+       struct mlx5_rxq_obj *rxq_obj = rxq->ctrl->obj;
 
-       MLX5_ASSERT(rxq != NULL);
-       MLX5_ASSERT(rxq_ctrl != NULL);
+       if (rxq_obj == NULL)
+               return;
        if (rxq_obj->rxq_ctrl->type == MLX5_RXQ_TYPE_HAIRPIN) {
-               MLX5_ASSERT(rxq_obj->rq);
+               if (rxq_obj->rq == NULL)
+                       return;
                mlx5_devx_modify_rq(rxq, MLX5_RXQ_MOD_RDY2RST);
                claim_zero(mlx5_devx_cmd_destroy(rxq_obj->rq));
        } else {
+               if (rxq->devx_rq.rq == NULL)
+                       return;
                mlx5_devx_rq_destroy(&rxq->devx_rq);
-               memset(&rxq->devx_rq, 0, sizeof(rxq->devx_rq));
+               if (rxq->devx_rq.rmp != NULL && rxq->devx_rq.rmp->ref_cnt > 0)
+                       return;
                mlx5_devx_cq_destroy(&rxq_obj->cq_obj);
                memset(&rxq_obj->cq_obj, 0, sizeof(rxq_obj->cq_obj));
                if (rxq_obj->devx_channel) {
@@ -176,6 +181,7 @@ mlx5_rxq_devx_obj_release(struct mlx5_rxq_priv *rxq)
                        rxq_obj->devx_channel = NULL;
                }
        }
+       rxq->ctrl->started = false;
 }
 
 /**
@@ -271,6 +277,8 @@ mlx5_rxq_create_devx_rq_resources(struct mlx5_rxq_priv *rxq)
                                                MLX5_WQ_END_PAD_MODE_NONE;
        rq_attr.wq_attr.pd = cdev->pdn;
        rq_attr.counter_set_id = priv->counter_set_id;
+       if (rxq_data->shared) /* Create RMP based RQ. */
+               rxq->devx_rq.rmp = &rxq_ctrl->obj->devx_rmp;
        /* Create RQ using DevX API. */
        return mlx5_devx_rq_create(cdev->ctx, &rxq->devx_rq, wqe_size,
                                   log_desc_n, &rq_attr, rxq_ctrl->socket);
@@ -300,6 +308,8 @@ mlx5_rxq_create_devx_cq_resources(struct mlx5_rxq_priv *rxq)
        uint16_t event_nums[1] = { 0 };
        int ret = 0;
 
+       if (rxq_ctrl->started)
+               return 0;
        if (priv->config.cqe_comp && !rxq_data->hw_timestamp &&
            !rxq_data->lro) {
                cq_attr.cqe_comp_en = 1u;
@@ -365,6 +375,7 @@ mlx5_rxq_create_devx_cq_resources(struct mlx5_rxq_priv *rxq)
        rxq_data->cq_uar = mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar);
        rxq_data->cqe_n = log_cqe_n;
        rxq_data->cqn = cq_obj->cq->id;
+       rxq_data->cq_ci = 0;
        if (rxq_ctrl->obj->devx_channel) {
                ret = mlx5_os_devx_subscribe_devx_event
                                              (rxq_ctrl->obj->devx_channel,
@@ -463,7 +474,7 @@ mlx5_rxq_devx_obj_new(struct mlx5_rxq_priv *rxq)
        if (rxq_ctrl->type == MLX5_RXQ_TYPE_HAIRPIN)
                return mlx5_rxq_obj_hairpin_new(rxq);
        tmpl->rxq_ctrl = rxq_ctrl;
-       if (rxq_ctrl->irq) {
+       if (rxq_ctrl->irq && !rxq_ctrl->started) {
                int devx_ev_flag =
                          MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA;
 
@@ -496,11 +507,19 @@ mlx5_rxq_devx_obj_new(struct mlx5_rxq_priv *rxq)
        ret = mlx5_devx_modify_rq(rxq, MLX5_RXQ_MOD_RST2RDY);
        if (ret)
                goto error;
-       rxq_data->wqes = (void *)(uintptr_t)rxq->devx_rq.wq.umem_buf;
-       rxq_data->rq_db = (uint32_t *)(uintptr_t)rxq->devx_rq.wq.db_rec;
-       mlx5_rxq_initialize(rxq_data);
+       if (!rxq_data->shared) {
+               rxq_data->wqes = (void *)(uintptr_t)rxq->devx_rq.wq.umem_buf;
+               rxq_data->rq_db = (uint32_t *)(uintptr_t)rxq->devx_rq.wq.db_rec;
+       } else if (!rxq_ctrl->started) {
+               rxq_data->wqes = (void *)(uintptr_t)tmpl->devx_rmp.wq.umem_buf;
+               rxq_data->rq_db =
+                               (uint32_t *)(uintptr_t)tmpl->devx_rmp.wq.db_rec;
+       }
+       if (!rxq_ctrl->started) {
+               mlx5_rxq_initialize(rxq_data);
+               rxq_ctrl->wqn = rxq->devx_rq.rq->id;
+       }
        priv->dev_data->rx_queue_state[rxq->idx] = RTE_ETH_QUEUE_STATE_STARTED;
-       rxq_ctrl->wqn = rxq->devx_rq.rq->id;
        return 0;
 error:
        ret = rte_errno; /* Save rte_errno before cleanup. */
@@ -558,7 +577,10 @@ mlx5_devx_ind_table_create_rqt_attr(struct rte_eth_dev *dev,
                struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, queues[i]);
 
                MLX5_ASSERT(rxq != NULL);
-               rqt_attr->rq_list[i] = rxq->devx_rq.rq->id;
+               if (rxq->ctrl->type == MLX5_RXQ_TYPE_HAIRPIN)
+                       rqt_attr->rq_list[i] = rxq->ctrl->obj->rq->id;
+               else
+                       rqt_attr->rq_list[i] = rxq->devx_rq.rq->id;
        }
        MLX5_ASSERT(i > 0);
        for (j = 0; i != rqt_n; ++j, ++i)
index bb38d5d2adee0dce15822a71ae93110b83cc95bf..dc647d5580c076139536c7dc8b526a7c9e57c570 100644 (file)
@@ -26,6 +26,7 @@
 #include "mlx5_rx.h"
 #include "mlx5_tx.h"
 #include "mlx5_autoconf.h"
+#include "mlx5_devx.h"
 
 /**
  * Get the interface index from device name.
@@ -336,9 +337,13 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
        info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK;
        mlx5_set_default_params(dev, info);
        mlx5_set_txlimit_params(dev, info);
+       if (priv->config.hca_attr.mem_rq_rmp &&
+           priv->obj_ops.rxq_obj_new == devx_obj_ops.rxq_obj_new)
+               info->dev_capa |= RTE_ETH_DEV_CAPA_RXQ_SHARE;
        info->switch_info.name = dev->data->name;
        info->switch_info.domain_id = priv->domain_id;
        info->switch_info.port_id = priv->representor_id;
+       info->switch_info.rx_domain = 0; /* No sub Rx domains. */
        if (priv->representor) {
                uint16_t port_id;
 
index 413e36f6d8ddaf99467c5b79203070acf7eb1e87..eda6eca8dea2ccc81542a1449c3c6fa8cd60f257 100644 (file)
@@ -96,6 +96,7 @@ struct mlx5_rxq_data {
        unsigned int lro:1; /* Enable LRO. */
        unsigned int dynf_meta:1; /* Dynamic metadata is configured. */
        unsigned int mcqe_format:3; /* CQE compression format. */
+       unsigned int shared:1; /* Shared RXQ. */
        volatile uint32_t *rq_db;
        volatile uint32_t *cq_db;
        uint16_t port_id;
@@ -158,8 +159,10 @@ struct mlx5_rxq_ctrl {
        struct mlx5_dev_ctx_shared *sh; /* Shared context. */
        enum mlx5_rxq_type type; /* Rxq type. */
        unsigned int socket; /* CPU socket ID for allocations. */
+       LIST_ENTRY(mlx5_rxq_ctrl) share_entry; /* Entry in shared RXQ list. */
        uint32_t share_group; /* Group ID of shared RXQ. */
        uint16_t share_qid; /* Shared RxQ ID in group. */
+       unsigned int started:1; /* Whether (shared) RXQ has been started. */
        unsigned int irq:1; /* Whether IRQ is enabled. */
        uint32_t flow_mark_n; /* Number of Mark/Flag flows using this Queue. */
        uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels counters. */
index f3fc618ed2c2c69a5460bd71f95a2f1b209cfe81..8feb3e2c0fb4afd728a24d7845feba2d2144af86 100644 (file)
@@ -29,6 +29,7 @@
 #include "mlx5_rx.h"
 #include "mlx5_utils.h"
 #include "mlx5_autoconf.h"
+#include "mlx5_devx.h"
 
 
 /* Default RSS hash key also used for ConnectX-3. */
@@ -633,14 +634,19 @@ mlx5_rx_queue_start(struct rte_eth_dev *dev, uint16_t idx)
  *   RX queue index.
  * @param desc
  *   Number of descriptors to configure in queue.
+ * @param[out] rxq_ctrl
+ *   Address of pointer to shared Rx queue control.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc)
+mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc,
+                       struct mlx5_rxq_ctrl **rxq_ctrl)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_rxq_priv *rxq;
+       bool empty;
 
        if (!rte_is_power_of_2(*desc)) {
                *desc = 1 << log2above(*desc);
@@ -657,16 +663,143 @@ mlx5_rx_queue_pre_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t *desc)
                rte_errno = EOVERFLOW;
                return -rte_errno;
        }
-       if (!mlx5_rxq_releasable(dev, idx)) {
-               DRV_LOG(ERR, "port %u unable to release queue index %u",
-                       dev->data->port_id, idx);
-               rte_errno = EBUSY;
-               return -rte_errno;
+       if (rxq_ctrl == NULL || *rxq_ctrl == NULL)
+               return 0;
+       if (!(*rxq_ctrl)->rxq.shared) {
+               if (!mlx5_rxq_releasable(dev, idx)) {
+                       DRV_LOG(ERR, "port %u unable to release queue index %u",
+                               dev->data->port_id, idx);
+                       rte_errno = EBUSY;
+                       return -rte_errno;
+               }
+               mlx5_rxq_release(dev, idx);
+       } else {
+               if ((*rxq_ctrl)->obj != NULL)
+                       /* Some port using shared Rx queue has been started. */
+                       return 0;
+               /* Release all owner RxQ to reconfigure Shared RxQ. */
+               do {
+                       rxq = LIST_FIRST(&(*rxq_ctrl)->owners);
+                       LIST_REMOVE(rxq, owner_entry);
+                       empty = LIST_EMPTY(&(*rxq_ctrl)->owners);
+                       mlx5_rxq_release(ETH_DEV(rxq->priv), rxq->idx);
+               } while (!empty);
+               *rxq_ctrl = NULL;
        }
-       mlx5_rxq_release(dev, idx);
        return 0;
 }
 
+/**
+ * Get the shared Rx queue object that matches group and queue index.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param group
+ *   Shared RXQ group.
+ * @param share_qid
+ *   Shared RX queue index.
+ *
+ * @return
+ *   Shared RXQ object that matching, or NULL if not found.
+ */
+static struct mlx5_rxq_ctrl *
+mlx5_shared_rxq_get(struct rte_eth_dev *dev, uint32_t group, uint16_t share_qid)
+{
+       struct mlx5_rxq_ctrl *rxq_ctrl;
+       struct mlx5_priv *priv = dev->data->dev_private;
+
+       LIST_FOREACH(rxq_ctrl, &priv->sh->shared_rxqs, share_entry) {
+               if (rxq_ctrl->share_group == group &&
+                   rxq_ctrl->share_qid == share_qid)
+                       return rxq_ctrl;
+       }
+       return NULL;
+}
+
+/**
+ * Check whether requested Rx queue configuration matches shared RXQ.
+ *
+ * @param rxq_ctrl
+ *   Pointer to shared RXQ.
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param idx
+ *   Queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ * @param[in] conf
+ *   Thresholds parameters.
+ * @param mp
+ *   Memory pool for buffer allocations.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static bool
+mlx5_shared_rxq_match(struct mlx5_rxq_ctrl *rxq_ctrl, struct rte_eth_dev *dev,
+                     uint16_t idx, uint16_t desc, unsigned int socket,
+                     const struct rte_eth_rxconf *conf,
+                     struct rte_mempool *mp)
+{
+       struct mlx5_priv *spriv = LIST_FIRST(&rxq_ctrl->owners)->priv;
+       struct mlx5_priv *priv = dev->data->dev_private;
+       unsigned int i;
+
+       RTE_SET_USED(conf);
+       if (rxq_ctrl->socket != socket) {
+               DRV_LOG(ERR, "port %u queue index %u failed to join shared group: socket mismatch",
+                       dev->data->port_id, idx);
+               return false;
+       }
+       if (rxq_ctrl->rxq.elts_n != log2above(desc)) {
+               DRV_LOG(ERR, "port %u queue index %u failed to join shared group: descriptor number mismatch",
+                       dev->data->port_id, idx);
+               return false;
+       }
+       if (priv->mtu != spriv->mtu) {
+               DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mtu mismatch",
+                       dev->data->port_id, idx);
+               return false;
+       }
+       if (priv->dev_data->dev_conf.intr_conf.rxq !=
+           spriv->dev_data->dev_conf.intr_conf.rxq) {
+               DRV_LOG(ERR, "port %u queue index %u failed to join shared group: interrupt mismatch",
+                       dev->data->port_id, idx);
+               return false;
+       }
+       if (mp != NULL && rxq_ctrl->rxq.mp != mp) {
+               DRV_LOG(ERR, "port %u queue index %u failed to join shared group: mempool mismatch",
+                       dev->data->port_id, idx);
+               return false;
+       } else if (mp == NULL) {
+               for (i = 0; i < conf->rx_nseg; i++) {
+                       if (conf->rx_seg[i].split.mp !=
+                           rxq_ctrl->rxq.rxseg[i].mp ||
+                           conf->rx_seg[i].split.length !=
+                           rxq_ctrl->rxq.rxseg[i].length) {
+                               DRV_LOG(ERR, "port %u queue index %u failed to join shared group: segment %u configuration mismatch",
+                                       dev->data->port_id, idx, i);
+                               return false;
+                       }
+               }
+       }
+       if (priv->config.hw_padding != spriv->config.hw_padding) {
+               DRV_LOG(ERR, "port %u queue index %u failed to join shared group: padding mismatch",
+                       dev->data->port_id, idx);
+               return false;
+       }
+       if (priv->config.cqe_comp != spriv->config.cqe_comp ||
+           (priv->config.cqe_comp &&
+            priv->config.cqe_comp_fmt != spriv->config.cqe_comp_fmt)) {
+               DRV_LOG(ERR, "port %u queue index %u failed to join shared group: CQE compression mismatch",
+                       dev->data->port_id, idx);
+               return false;
+       }
+       return true;
+}
+
 /**
  *
  * @param dev
@@ -692,12 +825,14 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 {
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_rxq_priv *rxq;
-       struct mlx5_rxq_ctrl *rxq_ctrl;
+       struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
        struct rte_eth_rxseg_split *rx_seg =
                                (struct rte_eth_rxseg_split *)conf->rx_seg;
        struct rte_eth_rxseg_split rx_single = {.mp = mp};
        uint16_t n_seg = conf->rx_nseg;
        int res;
+       uint64_t offloads = conf->offloads |
+                           dev->data->dev_conf.rxmode.offloads;
 
        if (mp) {
                /*
@@ -709,9 +844,6 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
                n_seg = 1;
        }
        if (n_seg > 1) {
-               uint64_t offloads = conf->offloads |
-                                   dev->data->dev_conf.rxmode.offloads;
-
                /* The offloads should be checked on rte_eth_dev layer. */
                MLX5_ASSERT(offloads & RTE_ETH_RX_OFFLOAD_SCATTER);
                if (!(offloads & RTE_ETH_RX_OFFLOAD_BUFFER_SPLIT)) {
@@ -723,9 +855,46 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
                }
                MLX5_ASSERT(n_seg < MLX5_MAX_RXQ_NSEG);
        }
-       res = mlx5_rx_queue_pre_setup(dev, idx, &desc);
+       if (conf->share_group > 0) {
+               if (!priv->config.hca_attr.mem_rq_rmp) {
+                       DRV_LOG(ERR, "port %u queue index %u shared Rx queue not supported by fw",
+                                    dev->data->port_id, idx);
+                       rte_errno = EINVAL;
+                       return -rte_errno;
+               }
+               if (priv->obj_ops.rxq_obj_new != devx_obj_ops.rxq_obj_new) {
+                       DRV_LOG(ERR, "port %u queue index %u shared Rx queue needs DevX api",
+                                    dev->data->port_id, idx);
+                       rte_errno = EINVAL;
+                       return -rte_errno;
+               }
+               if (conf->share_qid >= priv->rxqs_n) {
+                       DRV_LOG(ERR, "port %u shared Rx queue index %u > number of Rx queues %u",
+                               dev->data->port_id, conf->share_qid,
+                               priv->rxqs_n);
+                       rte_errno = EINVAL;
+                       return -rte_errno;
+               }
+               if (priv->config.mprq.enabled) {
+                       DRV_LOG(ERR, "port %u shared Rx queue index %u: not supported when MPRQ enabled",
+                               dev->data->port_id, conf->share_qid);
+                       rte_errno = EINVAL;
+                       return -rte_errno;
+               }
+               /* Try to reuse shared RXQ. */
+               rxq_ctrl = mlx5_shared_rxq_get(dev, conf->share_group,
+                                              conf->share_qid);
+               if (rxq_ctrl != NULL &&
+                   !mlx5_shared_rxq_match(rxq_ctrl, dev, idx, desc, socket,
+                                          conf, mp)) {
+                       rte_errno = EINVAL;
+                       return -rte_errno;
+               }
+       }
+       res = mlx5_rx_queue_pre_setup(dev, idx, &desc, &rxq_ctrl);
        if (res)
                return res;
+       /* Allocate RXQ. */
        rxq = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*rxq), 0,
                          SOCKET_ID_ANY);
        if (!rxq) {
@@ -737,15 +906,23 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
        rxq->priv = priv;
        rxq->idx = idx;
        (*priv->rxq_privs)[idx] = rxq;
-       rxq_ctrl = mlx5_rxq_new(dev, rxq, desc, socket, conf, rx_seg, n_seg);
-       if (!rxq_ctrl) {
-               DRV_LOG(ERR, "port %u unable to allocate rx queue index %u",
-                       dev->data->port_id, idx);
-               mlx5_free(rxq);
-               (*priv->rxq_privs)[idx] = NULL;
-               rte_errno = ENOMEM;
-               return -rte_errno;
+       if (rxq_ctrl != NULL) {
+               /* Join owner list. */
+               LIST_INSERT_HEAD(&rxq_ctrl->owners, rxq, owner_entry);
+               rxq->ctrl = rxq_ctrl;
+       } else {
+               rxq_ctrl = mlx5_rxq_new(dev, rxq, desc, socket, conf, rx_seg,
+                                       n_seg);
+               if (rxq_ctrl == NULL) {
+                       DRV_LOG(ERR, "port %u unable to allocate rx queue index %u",
+                               dev->data->port_id, idx);
+                       mlx5_free(rxq);
+                       (*priv->rxq_privs)[idx] = NULL;
+                       rte_errno = ENOMEM;
+                       return -rte_errno;
+               }
        }
+       mlx5_rxq_ref(dev, idx);
        DRV_LOG(DEBUG, "port %u adding Rx queue %u to list",
                dev->data->port_id, idx);
        dev->data->rx_queues[idx] = &rxq_ctrl->rxq;
@@ -776,7 +953,7 @@ mlx5_rx_hairpin_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
        struct mlx5_rxq_ctrl *rxq_ctrl;
        int res;
 
-       res = mlx5_rx_queue_pre_setup(dev, idx, &desc);
+       res = mlx5_rx_queue_pre_setup(dev, idx, &desc, NULL);
        if (res)
                return res;
        if (hairpin_conf->peer_count != 1) {
@@ -1095,6 +1272,9 @@ mlx5_rxq_obj_verify(struct rte_eth_dev *dev)
        struct mlx5_rxq_obj *rxq_obj;
 
        LIST_FOREACH(rxq_obj, &priv->rxqsobj, next) {
+               if (rxq_obj->rxq_ctrl->rxq.shared &&
+                   !LIST_EMPTY(&rxq_obj->rxq_ctrl->owners))
+                       continue;
                DRV_LOG(DEBUG, "port %u Rx queue %u still referenced",
                        dev->data->port_id, rxq_obj->rxq_ctrl->rxq.idx);
                ++ret;
@@ -1413,6 +1593,12 @@ mlx5_rxq_new(struct rte_eth_dev *dev, struct mlx5_rxq_priv *rxq,
                return NULL;
        }
        LIST_INIT(&tmpl->owners);
+       if (conf->share_group > 0) {
+               tmpl->rxq.shared = 1;
+               tmpl->share_group = conf->share_group;
+               tmpl->share_qid = conf->share_qid;
+               LIST_INSERT_HEAD(&priv->sh->shared_rxqs, tmpl, share_entry);
+       }
        rxq->ctrl = tmpl;
        LIST_INSERT_HEAD(&tmpl->owners, rxq, owner_entry);
        MLX5_ASSERT(n_seg && n_seg <= MLX5_MAX_RXQ_NSEG);
@@ -1661,7 +1847,6 @@ mlx5_rxq_new(struct rte_eth_dev *dev, struct mlx5_rxq_priv *rxq,
        tmpl->rxq.uar_lock_cq = &priv->sh->uar_lock_cq;
 #endif
        tmpl->rxq.idx = idx;
-       mlx5_rxq_ref(dev, idx);
        LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
        return tmpl;
 error:
@@ -1836,31 +2021,41 @@ mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx)
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_rxq_priv *rxq;
        struct mlx5_rxq_ctrl *rxq_ctrl;
+       uint32_t refcnt;
 
        if (priv->rxq_privs == NULL)
                return 0;
        rxq = mlx5_rxq_get(dev, idx);
-       if (rxq == NULL)
+       if (rxq == NULL || rxq->refcnt == 0)
                return 0;
-       if (mlx5_rxq_deref(dev, idx) > 1)
-               return 1;
        rxq_ctrl = rxq->ctrl;
-       if (rxq_ctrl->obj != NULL) {
+       refcnt = mlx5_rxq_deref(dev, idx);
+       if (refcnt > 1) {
+               return 1;
+       } else if (refcnt == 1) { /* RxQ stopped. */
                priv->obj_ops.rxq_obj_release(rxq);
-               LIST_REMOVE(rxq_ctrl->obj, next);
-               mlx5_free(rxq_ctrl->obj);
-               rxq_ctrl->obj = NULL;
-       }
-       if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
-               rxq_free_elts(rxq_ctrl);
-               dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STOPPED;
-       }
-       if (!__atomic_load_n(&rxq->refcnt, __ATOMIC_RELAXED)) {
-               if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD)
-                       mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh);
+               if (!rxq_ctrl->started && rxq_ctrl->obj != NULL) {
+                       LIST_REMOVE(rxq_ctrl->obj, next);
+                       mlx5_free(rxq_ctrl->obj);
+                       rxq_ctrl->obj = NULL;
+               }
+               if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
+                       if (!rxq_ctrl->started)
+                               rxq_free_elts(rxq_ctrl);
+                       dev->data->rx_queue_state[idx] =
+                                       RTE_ETH_QUEUE_STATE_STOPPED;
+               }
+       } else { /* Refcnt zero, closing device. */
                LIST_REMOVE(rxq, owner_entry);
-               LIST_REMOVE(rxq_ctrl, next);
-               mlx5_free(rxq_ctrl);
+               if (LIST_EMPTY(&rxq_ctrl->owners)) {
+                       if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD)
+                               mlx5_mr_btree_free
+                                       (&rxq_ctrl->rxq.mr_ctrl.cache_bh);
+                       if (rxq_ctrl->rxq.shared)
+                               LIST_REMOVE(rxq_ctrl, share_entry);
+                       LIST_REMOVE(rxq_ctrl, next);
+                       mlx5_free(rxq_ctrl);
+               }
                dev->data->rx_queues[idx] = NULL;
                mlx5_free(rxq);
                (*priv->rxq_privs)[idx] = NULL;
index 72475e4b5b50b74ae144bc2694ca7a1c22f309a7..a3e62e95335fd2faa32a1bb9c9ed78fd0323a14e 100644 (file)
@@ -176,6 +176,39 @@ mlx5_rxq_stop(struct rte_eth_dev *dev)
                mlx5_rxq_release(dev, i);
 }
 
+static int
+mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
+                     unsigned int idx)
+{
+       int ret = 0;
+
+       if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
+               /*
+                * Pre-register the mempools. Regardless of whether
+                * the implicit registration is enabled or not,
+                * Rx mempool destruction is tracked to free MRs.
+                */
+               if (mlx5_rxq_mempool_register(dev, rxq_ctrl) < 0)
+                       return -rte_errno;
+               ret = rxq_alloc_elts(rxq_ctrl);
+               if (ret)
+                       return ret;
+       }
+       MLX5_ASSERT(!rxq_ctrl->obj);
+       rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
+                                   sizeof(*rxq_ctrl->obj), 0,
+                                   rxq_ctrl->socket);
+       if (!rxq_ctrl->obj) {
+               DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.",
+                       dev->data->port_id, idx);
+               rte_errno = ENOMEM;
+               return -rte_errno;
+       }
+       DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", dev->data->port_id,
+               idx, (void *)&rxq_ctrl->obj);
+       return 0;
+}
+
 /**
  * Start traffic on Rx queues.
  *
@@ -208,28 +241,10 @@ mlx5_rxq_start(struct rte_eth_dev *dev)
                if (rxq == NULL)
                        continue;
                rxq_ctrl = rxq->ctrl;
-               if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
-                       /*
-                        * Pre-register the mempools. Regardless of whether
-                        * the implicit registration is enabled or not,
-                        * Rx mempool destruction is tracked to free MRs.
-                        */
-                       if (mlx5_rxq_mempool_register(dev, rxq_ctrl) < 0)
-                               goto error;
-                       ret = rxq_alloc_elts(rxq_ctrl);
-                       if (ret)
+               if (!rxq_ctrl->started) {
+                       if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0)
                                goto error;
-               }
-               MLX5_ASSERT(!rxq_ctrl->obj);
-               rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
-                                           sizeof(*rxq_ctrl->obj), 0,
-                                           rxq_ctrl->socket);
-               if (!rxq_ctrl->obj) {
-                       DRV_LOG(ERR,
-                               "Port %u Rx queue %u can't allocate resources.",
-                               dev->data->port_id, i);
-                       rte_errno = ENOMEM;
-                       goto error;
+                       LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
                }
                ret = priv->obj_ops.rxq_obj_new(rxq);
                if (ret) {
@@ -237,9 +252,7 @@ mlx5_rxq_start(struct rte_eth_dev *dev)
                        rxq_ctrl->obj = NULL;
                        goto error;
                }
-               DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
-                       dev->data->port_id, i, (void *)&rxq_ctrl->obj);
-               LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
+               rxq_ctrl->started = true;
        }
        return 0;
 error: