+ for (i = 0; i < MLX5_MAX_MODIFY_NUM; ++i)
+ if (sh->mdh_ipools[i])
+ mlx5_ipool_destroy(sh->mdh_ipools[i]);
+}
+
+/*
+ * Check if dynamic flex parser for eCPRI already exists.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ *
+ * @return
+ * true on exists, false on not.
+ */
+bool
+mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_flex_parser_profiles *prf =
+ &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0];
+
+ return !!prf->obj;
+}
+
+/*
+ * Allocation of a flex parser for eCPRI. Once created, this parser related
+ * resources will be held until the device is closed.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_flex_parser_profiles *prf =
+ &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0];
+ struct mlx5_devx_graph_node_attr node = {
+ .modify_field_select = 0,
+ };
+ uint32_t ids[8];
+ int ret;
+
+ if (!priv->config.hca_attr.parse_graph_flex_node) {
+ DRV_LOG(ERR, "Dynamic flex parser is not supported "
+ "for device %s.", priv->dev_data->name);
+ return -ENOTSUP;
+ }
+ node.header_length_mode = MLX5_GRAPH_NODE_LEN_FIXED;
+ /* 8 bytes now: 4B common header + 4B message body header. */
+ node.header_length_base_value = 0x8;
+ /* After MAC layer: Ether / VLAN. */
+ node.in[0].arc_parse_graph_node = MLX5_GRAPH_ARC_NODE_MAC;
+ /* Type of compared condition should be 0xAEFE in the L2 layer. */
+ node.in[0].compare_condition_value = RTE_ETHER_TYPE_ECPRI;
+ /* Sample #0: type in common header. */
+ node.sample[0].flow_match_sample_en = 1;
+ /* Fixed offset. */
+ node.sample[0].flow_match_sample_offset_mode = 0x0;
+ /* Only the 2nd byte will be used. */
+ node.sample[0].flow_match_sample_field_base_offset = 0x0;
+ /* Sample #1: message payload. */
+ node.sample[1].flow_match_sample_en = 1;
+ /* Fixed offset. */
+ node.sample[1].flow_match_sample_offset_mode = 0x0;
+ /*
+ * Only the first two bytes will be used right now, and its offset will
+ * start after the common header that with the length of a DW(u32).
+ */
+ node.sample[1].flow_match_sample_field_base_offset = sizeof(uint32_t);
+ prf->obj = mlx5_devx_cmd_create_flex_parser(priv->sh->ctx, &node);
+ if (!prf->obj) {
+ DRV_LOG(ERR, "Failed to create flex parser node object.");
+ return (rte_errno == 0) ? -ENODEV : -rte_errno;
+ }
+ prf->num = 2;
+ ret = mlx5_devx_cmd_query_parse_samples(prf->obj, ids, prf->num);
+ if (ret) {
+ DRV_LOG(ERR, "Failed to query sample IDs.");
+ return (rte_errno == 0) ? -ENODEV : -rte_errno;
+ }
+ prf->offset[0] = 0x0;
+ prf->offset[1] = sizeof(uint32_t);
+ prf->ids[0] = ids[0];
+ prf->ids[1] = ids[1];
+ return 0;
+}
+
+/*
+ * Destroy the flex parser node, including the parser itself, input / output
+ * arcs and DW samples. Resources could be reused then.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ */
+static void
+mlx5_flex_parser_ecpri_release(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_flex_parser_profiles *prf =
+ &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0];
+
+ if (prf->obj)
+ mlx5_devx_cmd_destroy(prf->obj);
+ prf->obj = NULL;
+}
+
+uint32_t
+mlx5_get_supported_sw_parsing_offloads(const struct mlx5_hca_attr *attr)
+{
+ uint32_t sw_parsing_offloads = 0;
+
+ if (attr->swp) {
+ sw_parsing_offloads |= MLX5_SW_PARSING_CAP;
+ if (attr->swp_csum)
+ sw_parsing_offloads |= MLX5_SW_PARSING_CSUM_CAP;
+
+ if (attr->swp_lso)
+ sw_parsing_offloads |= MLX5_SW_PARSING_TSO_CAP;
+ }
+ return sw_parsing_offloads;
+}
+
+uint32_t
+mlx5_get_supported_tunneling_offloads(const struct mlx5_hca_attr *attr)
+{
+ uint32_t tn_offloads = 0;
+
+ if (attr->tunnel_stateless_vxlan)
+ tn_offloads |= MLX5_TUNNELED_OFFLOADS_VXLAN_CAP;
+ if (attr->tunnel_stateless_gre)
+ tn_offloads |= MLX5_TUNNELED_OFFLOADS_GRE_CAP;
+ if (attr->tunnel_stateless_geneve_rx)
+ tn_offloads |= MLX5_TUNNELED_OFFLOADS_GENEVE_CAP;
+ return tn_offloads;
+}
+
+/*
+ * Allocate Rx and Tx UARs in robust fashion.
+ * This routine handles the following UAR allocation issues:
+ *
+ * - tries to allocate the UAR with the most appropriate memory
+ * mapping type from the ones supported by the host
+ *
+ * - tries to allocate the UAR with non-NULL base address
+ * OFED 5.0.x and Upstream rdma_core before v29 returned the NULL as
+ * UAR base address if UAR was not the first object in the UAR page.
+ * It caused the PMD failure and we should try to get another UAR
+ * till we get the first one with non-NULL base address returned.
+ */
+static int
+mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh,
+ const struct mlx5_common_dev_config *config)
+{
+ uint32_t uar_mapping, retry;
+ int err = 0;
+ void *base_addr;
+
+ for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) {
+#ifdef MLX5DV_UAR_ALLOC_TYPE_NC
+ /* Control the mapping type according to the settings. */
+ uar_mapping = (config->dbnc == MLX5_TXDB_NCACHED) ?
+ MLX5DV_UAR_ALLOC_TYPE_NC :
+ MLX5DV_UAR_ALLOC_TYPE_BF;
+#else
+ RTE_SET_USED(config);
+ /*
+ * It seems we have no way to control the memory mapping type
+ * for the UAR, the default "Write-Combining" type is supposed.
+ * The UAR initialization on queue creation queries the
+ * actual mapping type done by Verbs/kernel and setups the
+ * PMD datapath accordingly.
+ */
+ uar_mapping = 0;
+#endif
+ sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->ctx, uar_mapping);
+#ifdef MLX5DV_UAR_ALLOC_TYPE_NC
+ if (!sh->tx_uar &&
+ uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) {
+ if (config->dbnc == MLX5_TXDB_CACHED ||
+ config->dbnc == MLX5_TXDB_HEURISTIC)
+ DRV_LOG(WARNING, "Devarg tx_db_nc setting "
+ "is not supported by DevX");
+ /*
+ * In some environments like virtual machine
+ * the Write Combining mapped might be not supported
+ * and UAR allocation fails. We try "Non-Cached"
+ * mapping for the case. The tx_burst routines take
+ * the UAR mapping type into account on UAR setup
+ * on queue creation.
+ */
+ DRV_LOG(DEBUG, "Failed to allocate Tx DevX UAR (BF)");
+ uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC;
+ sh->tx_uar = mlx5_glue->devx_alloc_uar
+ (sh->ctx, uar_mapping);
+ } else if (!sh->tx_uar &&
+ uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) {
+ if (config->dbnc == MLX5_TXDB_NCACHED)
+ DRV_LOG(WARNING, "Devarg tx_db_nc settings "
+ "is not supported by DevX");
+ /*
+ * If Verbs/kernel does not support "Non-Cached"
+ * try the "Write-Combining".
+ */
+ DRV_LOG(DEBUG, "Failed to allocate Tx DevX UAR (NC)");
+ uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF;
+ sh->tx_uar = mlx5_glue->devx_alloc_uar
+ (sh->ctx, uar_mapping);
+ }
+#endif
+ if (!sh->tx_uar) {
+ DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (BF/NC)");
+ err = ENOMEM;
+ goto exit;
+ }
+ base_addr = mlx5_os_get_devx_uar_base_addr(sh->tx_uar);
+ if (base_addr)
+ break;
+ /*
+ * The UARs are allocated by rdma_core within the
+ * IB device context, on context closure all UARs
+ * will be freed, should be no memory/object leakage.
+ */
+ DRV_LOG(DEBUG, "Retrying to allocate Tx DevX UAR");
+ sh->tx_uar = NULL;
+ }
+ /* Check whether we finally succeeded with valid UAR allocation. */
+ if (!sh->tx_uar) {
+ DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (NULL base)");
+ err = ENOMEM;
+ goto exit;
+ }
+ for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) {
+ uar_mapping = 0;
+ sh->devx_rx_uar = mlx5_glue->devx_alloc_uar
+ (sh->ctx, uar_mapping);
+#ifdef MLX5DV_UAR_ALLOC_TYPE_NC
+ if (!sh->devx_rx_uar &&
+ uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) {
+ /*
+ * Rx UAR is used to control interrupts only,
+ * should be no datapath noticeable impact,
+ * can try "Non-Cached" mapping safely.
+ */
+ DRV_LOG(DEBUG, "Failed to allocate Rx DevX UAR (BF)");
+ uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC;
+ sh->devx_rx_uar = mlx5_glue->devx_alloc_uar
+ (sh->ctx, uar_mapping);
+ }
+#endif
+ if (!sh->devx_rx_uar) {
+ DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (BF/NC)");
+ err = ENOMEM;
+ goto exit;
+ }
+ base_addr = mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar);
+ if (base_addr)
+ break;
+ /*
+ * The UARs are allocated by rdma_core within the
+ * IB device context, on context closure all UARs
+ * will be freed, should be no memory/object leakage.
+ */
+ DRV_LOG(DEBUG, "Retrying to allocate Rx DevX UAR");
+ sh->devx_rx_uar = NULL;
+ }
+ /* Check whether we finally succeeded with valid UAR allocation. */
+ if (!sh->devx_rx_uar) {
+ DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (NULL base)");
+ err = ENOMEM;
+ }
+exit:
+ return err;
+}
+
+/**
+ * Unregister the mempool from the protection domain.
+ *
+ * @param sh
+ * Pointer to the device shared context.
+ * @param mp
+ * Mempool being unregistered.
+ */
+static void
+mlx5_dev_ctx_shared_mempool_unregister(struct mlx5_dev_ctx_shared *sh,
+ struct rte_mempool *mp)
+{
+ struct mlx5_mp_id mp_id;
+
+ mlx5_mp_id_init(&mp_id, 0);
+ if (mlx5_mr_mempool_unregister(&sh->share_cache, mp, &mp_id) < 0)
+ DRV_LOG(WARNING, "Failed to unregister mempool %s for PD %p: %s",
+ mp->name, sh->pd, rte_strerror(rte_errno));
+}
+
+/**
+ * rte_mempool_walk() callback to register mempools
+ * for the protection domain.
+ *
+ * @param mp
+ * The mempool being walked.
+ * @param arg
+ * Pointer to the device shared context.
+ */
+static void
+mlx5_dev_ctx_shared_mempool_register_cb(struct rte_mempool *mp, void *arg)
+{
+ struct mlx5_dev_ctx_shared *sh = arg;
+ struct mlx5_mp_id mp_id;
+ int ret;
+
+ mlx5_mp_id_init(&mp_id, 0);
+ ret = mlx5_mr_mempool_register(&sh->share_cache, sh->pd, mp, &mp_id);
+ if (ret < 0 && rte_errno != EEXIST)
+ DRV_LOG(ERR, "Failed to register existing mempool %s for PD %p: %s",
+ mp->name, sh->pd, rte_strerror(rte_errno));
+}
+
+/**
+ * rte_mempool_walk() callback to unregister mempools
+ * from the protection domain.
+ *
+ * @param mp
+ * The mempool being walked.
+ * @param arg
+ * Pointer to the device shared context.
+ */
+static void
+mlx5_dev_ctx_shared_mempool_unregister_cb(struct rte_mempool *mp, void *arg)
+{
+ mlx5_dev_ctx_shared_mempool_unregister
+ ((struct mlx5_dev_ctx_shared *)arg, mp);
+}
+
+/**
+ * Mempool life cycle callback for Ethernet devices.
+ *
+ * @param event
+ * Mempool life cycle event.
+ * @param mp
+ * Associated mempool.
+ * @param arg
+ * Pointer to a device shared context.
+ */
+static void
+mlx5_dev_ctx_shared_mempool_event_cb(enum rte_mempool_event event,
+ struct rte_mempool *mp, void *arg)
+{
+ struct mlx5_dev_ctx_shared *sh = arg;
+ struct mlx5_mp_id mp_id;
+
+ switch (event) {
+ case RTE_MEMPOOL_EVENT_READY:
+ mlx5_mp_id_init(&mp_id, 0);
+ if (mlx5_mr_mempool_register(&sh->share_cache, sh->pd, mp,
+ &mp_id) < 0)
+ DRV_LOG(ERR, "Failed to register new mempool %s for PD %p: %s",
+ mp->name, sh->pd, rte_strerror(rte_errno));
+ break;
+ case RTE_MEMPOOL_EVENT_DESTROY:
+ mlx5_dev_ctx_shared_mempool_unregister(sh, mp);
+ break;
+ }
+}
+
+/**
+ * Callback used when implicit mempool registration is disabled
+ * in order to track Rx mempool destruction.
+ *
+ * @param event
+ * Mempool life cycle event.
+ * @param mp
+ * An Rx mempool registered explicitly when the port is started.
+ * @param arg
+ * Pointer to a device shared context.
+ */
+static void
+mlx5_dev_ctx_shared_rx_mempool_event_cb(enum rte_mempool_event event,
+ struct rte_mempool *mp, void *arg)
+{
+ struct mlx5_dev_ctx_shared *sh = arg;
+
+ if (event == RTE_MEMPOOL_EVENT_DESTROY)
+ mlx5_dev_ctx_shared_mempool_unregister(sh, mp);
+}
+
+int
+mlx5_dev_ctx_shared_mempool_subscribe(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_dev_ctx_shared *sh = priv->sh;
+ int ret;
+
+ /* Check if we only need to track Rx mempool destruction. */
+ if (!sh->cdev->config.mr_mempool_reg_en) {
+ ret = rte_mempool_event_callback_register
+ (mlx5_dev_ctx_shared_rx_mempool_event_cb, sh);
+ return ret == 0 || rte_errno == EEXIST ? 0 : ret;
+ }
+ /* Callback for this shared context may be already registered. */
+ ret = rte_mempool_event_callback_register
+ (mlx5_dev_ctx_shared_mempool_event_cb, sh);
+ if (ret != 0 && rte_errno != EEXIST)
+ return ret;
+ /* Register mempools only once for this shared context. */
+ if (ret == 0)
+ rte_mempool_walk(mlx5_dev_ctx_shared_mempool_register_cb, sh);
+ return 0;