+
+/**
+ * Get thread specific current flow workspace.
+ *
+ * @return pointer to thread specific flow workspace data, NULL on error.
+ */
+struct mlx5_flow_workspace*
+mlx5_flow_get_thread_workspace(void)
+{
+ struct mlx5_flow_workspace *data;
+
+ data = mlx5_flow_os_get_specific_workspace();
+ MLX5_ASSERT(data && data->inuse);
+ if (!data || !data->inuse)
+ DRV_LOG(ERR, "flow workspace not initialized.");
+ return data;
+}
+
+/**
+ * Allocate and init new flow workspace.
+ *
+ * @return pointer to flow workspace data, NULL on error.
+ */
+static struct mlx5_flow_workspace*
+flow_alloc_thread_workspace(void)
+{
+ struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
+
+ if (!data) {
+ DRV_LOG(ERR, "Failed to allocate flow workspace "
+ "memory.");
+ return NULL;
+ }
+ data->rss_desc.queue = calloc(1,
+ sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
+ if (!data->rss_desc.queue)
+ goto err;
+ data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
+ return data;
+err:
+ if (data->rss_desc.queue)
+ free(data->rss_desc.queue);
+ free(data);
+ return NULL;
+}
+
+/**
+ * Get new thread specific flow workspace.
+ *
+ * If current workspace inuse, create new one and set as current.
+ *
+ * @return pointer to thread specific flow workspace data, NULL on error.
+ */
+static struct mlx5_flow_workspace*
+mlx5_flow_push_thread_workspace(void)
+{
+ struct mlx5_flow_workspace *curr;
+ struct mlx5_flow_workspace *data;
+
+ curr = mlx5_flow_os_get_specific_workspace();
+ if (!curr) {
+ data = flow_alloc_thread_workspace();
+ if (!data)
+ return NULL;
+ } else if (!curr->inuse) {
+ data = curr;
+ } else if (curr->next) {
+ data = curr->next;
+ } else {
+ data = flow_alloc_thread_workspace();
+ if (!data)
+ return NULL;
+ curr->next = data;
+ data->prev = curr;
+ }
+ data->inuse = 1;
+ data->flow_idx = 0;
+ /* Set as current workspace */
+ if (mlx5_flow_os_set_specific_workspace(data))
+ DRV_LOG(ERR, "Failed to set flow workspace to thread.");
+ return data;
+}
+
+/**
+ * Close current thread specific flow workspace.
+ *
+ * If previous workspace available, set it as current.
+ *
+ * @return pointer to thread specific flow workspace data, NULL on error.
+ */
+static void
+mlx5_flow_pop_thread_workspace(void)
+{
+ struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
+
+ if (!data)
+ return;
+ if (!data->inuse) {
+ DRV_LOG(ERR, "Failed to close unused flow workspace.");
+ return;
+ }
+ data->inuse = 0;
+ if (!data->prev)
+ return;
+ if (mlx5_flow_os_set_specific_workspace(data->prev))
+ DRV_LOG(ERR, "Failed to set flow workspace to thread.");
+}
+
+/**
+ * Verify the flow list is empty
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ *
+ * @return the number of flows not released.
+ */
+int
+mlx5_flow_verify(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct rte_flow *flow;
+ uint32_t idx;
+ int ret = 0;
+
+ ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx,
+ flow, next) {
+ DRV_LOG(DEBUG, "port %u flow %p still referenced",
+ dev->data->port_id, (void *)flow);
+ ++ret;
+ }
+ return ret;
+}
+
+/**
+ * Enable default hairpin egress flow.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param queue
+ * The queue index.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
+ uint32_t queue)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ const struct rte_flow_attr attr = {
+ .egress = 1,
+ .priority = 0,
+ };
+ struct mlx5_rte_flow_item_tx_queue queue_spec = {
+ .queue = queue,
+ };
+ struct mlx5_rte_flow_item_tx_queue queue_mask = {
+ .queue = UINT32_MAX,
+ };
+ struct rte_flow_item items[] = {
+ {
+ .type = (enum rte_flow_item_type)
+ MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
+ .spec = &queue_spec,
+ .last = NULL,
+ .mask = &queue_mask,
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ struct rte_flow_action_jump jump = {
+ .group = MLX5_HAIRPIN_TX_TABLE,
+ };
+ struct rte_flow_action actions[2];
+ uint32_t flow_idx;
+ struct rte_flow_error error;
+
+ actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
+ actions[0].conf = &jump;
+ actions[1].type = RTE_FLOW_ACTION_TYPE_END;
+ flow_idx = flow_list_create(dev, &priv->ctrl_flows,
+ &attr, items, actions, false, &error);
+ if (!flow_idx) {
+ DRV_LOG(DEBUG,
+ "Failed to create ctrl flow: rte_errno(%d),"
+ " type(%d), message(%s)",
+ rte_errno, error.type,
+ error.message ? error.message : " (no stated reason)");
+ return -rte_errno;
+ }
+ return 0;
+}
+
+/**
+ * Enable a control flow configured from the control plane.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param eth_spec
+ * An Ethernet flow spec to apply.
+ * @param eth_mask
+ * An Ethernet flow mask to apply.
+ * @param vlan_spec
+ * A VLAN flow spec to apply.
+ * @param vlan_mask
+ * A VLAN flow mask to apply.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
+ struct rte_flow_item_eth *eth_spec,
+ struct rte_flow_item_eth *eth_mask,
+ struct rte_flow_item_vlan *vlan_spec,
+ struct rte_flow_item_vlan *vlan_mask)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ const struct rte_flow_attr attr = {
+ .ingress = 1,
+ .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
+ };
+ struct rte_flow_item items[] = {
+ {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .spec = eth_spec,
+ .last = NULL,
+ .mask = eth_mask,
+ },
+ {
+ .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
+ RTE_FLOW_ITEM_TYPE_END,
+ .spec = vlan_spec,
+ .last = NULL,
+ .mask = vlan_mask,
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ uint16_t queue[priv->reta_idx_n];
+ struct rte_flow_action_rss action_rss = {
+ .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+ .level = 0,
+ .types = priv->rss_conf.rss_hf,
+ .key_len = priv->rss_conf.rss_key_len,
+ .queue_num = priv->reta_idx_n,
+ .key = priv->rss_conf.rss_key,
+ .queue = queue,
+ };
+ struct rte_flow_action actions[] = {
+ {
+ .type = RTE_FLOW_ACTION_TYPE_RSS,
+ .conf = &action_rss,
+ },
+ {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
+ uint32_t flow_idx;
+ struct rte_flow_error error;
+ unsigned int i;
+
+ if (!priv->reta_idx_n || !priv->rxqs_n) {
+ return 0;
+ }
+ if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
+ action_rss.types = 0;
+ for (i = 0; i != priv->reta_idx_n; ++i)
+ queue[i] = (*priv->reta_idx)[i];
+ flow_idx = flow_list_create(dev, &priv->ctrl_flows,
+ &attr, items, actions, false, &error);
+ if (!flow_idx)
+ return -rte_errno;
+ return 0;
+}
+
+/**
+ * Enable a flow control configured from the control plane.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param eth_spec
+ * An Ethernet flow spec to apply.
+ * @param eth_mask
+ * An Ethernet flow mask to apply.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_ctrl_flow(struct rte_eth_dev *dev,
+ struct rte_flow_item_eth *eth_spec,
+ struct rte_flow_item_eth *eth_mask)
+{
+ return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
+}
+
+/**
+ * Create default miss flow rule matching lacp traffic
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param eth_spec
+ * An Ethernet flow spec to apply.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ /*
+ * The LACP matching is done by only using ether type since using
+ * a multicast dst mac causes kernel to give low priority to this flow.
+ */
+ static const struct rte_flow_item_eth lacp_spec = {
+ .type = RTE_BE16(0x8809),
+ };
+ static const struct rte_flow_item_eth lacp_mask = {
+ .type = 0xffff,
+ };
+ const struct rte_flow_attr attr = {
+ .ingress = 1,
+ };
+ struct rte_flow_item items[] = {
+ {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .spec = &lacp_spec,
+ .mask = &lacp_mask,
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ struct rte_flow_action actions[] = {
+ {
+ .type = (enum rte_flow_action_type)
+ MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
+ },
+ {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
+ struct rte_flow_error error;
+ uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows,
+ &attr, items, actions, false, &error);
+
+ if (!flow_idx)
+ return -rte_errno;
+ return 0;
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_destroy(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ struct rte_flow_error *error __rte_unused)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow);
+ return 0;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @see rte_flow_flush()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error __rte_unused)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ mlx5_flow_list_flush(dev, &priv->flows, false);
+ return 0;
+}
+
+/**
+ * Isolated mode.
+ *
+ * @see rte_flow_isolate()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_isolate(struct rte_eth_dev *dev,
+ int enable,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ if (dev->data->dev_started) {
+ rte_flow_error_set(error, EBUSY,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "port must be stopped first");
+ return -rte_errno;
+ }
+ priv->isolated = !!enable;
+ if (enable)
+ dev->dev_ops = &mlx5_dev_ops_isolate;
+ else
+ dev->dev_ops = &mlx5_dev_ops;
+
+ dev->rx_descriptor_status = mlx5_rx_descriptor_status;
+ dev->tx_descriptor_status = mlx5_tx_descriptor_status;
+
+ return 0;
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_drv_query(struct rte_eth_dev *dev,
+ uint32_t flow_idx,
+ const struct rte_flow_action *actions,
+ void *data,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
+ [MLX5_IPOOL_RTE_FLOW],
+ flow_idx);
+ enum mlx5_flow_drv_type ftype;
+
+ if (!flow) {
+ return rte_flow_error_set(error, ENOENT,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "invalid flow handle");
+ }
+ ftype = flow->drv_type;
+ MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
+ fops = flow_get_drv_ops(ftype);
+
+ return fops->query(dev, flow, actions, data, error);
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_action *actions,
+ void *data,
+ struct rte_flow_error *error)
+{
+ int ret;
+
+ ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
+ error);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param filter_type
+ * Filter type.
+ * @param filter_op
+ * Operation to perform.
+ * @param arg
+ * Pointer to operation-specific structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
+ enum rte_filter_type filter_type,
+ enum rte_filter_op filter_op,
+ void *arg)
+{
+ switch (filter_type) {
+ case RTE_ETH_FILTER_GENERIC:
+ if (filter_op != RTE_ETH_FILTER_GET) {
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ *(const void **)arg = &mlx5_flow_ops;
+ return 0;
+ default:
+ DRV_LOG(ERR, "port %u filter type (%d) not supported",
+ dev->data->port_id, filter_type);
+ rte_errno = ENOTSUP;
+ return -rte_errno;
+ }
+ return 0;
+}
+
+/**
+ * Create the needed meter and suffix tables.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in] fm
+ * Pointer to the flow meter.
+ *
+ * @return
+ * Pointer to table set on success, NULL otherwise.
+ */
+struct mlx5_meter_domains_infos *
+mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
+ const struct mlx5_flow_meter *fm)
+{
+ const struct mlx5_flow_driver_ops *fops;
+
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->create_mtr_tbls(dev, fm);
+}
+
+/**
+ * Destroy the meter table set.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in] tbl
+ * Pointer to the meter table set.
+ *
+ * @return
+ * 0 on success.
+ */
+int
+mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
+ struct mlx5_meter_domains_infos *tbls)
+{
+ const struct mlx5_flow_driver_ops *fops;
+
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->destroy_mtr_tbls(dev, tbls);
+}
+
+/**
+ * Create policer rules.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in] fm
+ * Pointer to flow meter structure.
+ * @param[in] attr
+ * Pointer to flow attributes.
+ *
+ * @return
+ * 0 on success, -1 otherwise.
+ */
+int
+mlx5_flow_create_policer_rules(struct rte_eth_dev *dev,
+ struct mlx5_flow_meter *fm,
+ const struct rte_flow_attr *attr)
+{
+ const struct mlx5_flow_driver_ops *fops;
+
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->create_policer_rules(dev, fm, attr);
+}
+
+/**
+ * Destroy policer rules.
+ *
+ * @param[in] fm
+ * Pointer to flow meter structure.
+ * @param[in] attr
+ * Pointer to flow attributes.
+ *
+ * @return
+ * 0 on success, -1 otherwise.
+ */
+int
+mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev,
+ struct mlx5_flow_meter *fm,
+ const struct rte_flow_attr *attr)
+{
+ const struct mlx5_flow_driver_ops *fops;
+
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->destroy_policer_rules(dev, fm, attr);
+}
+
+/**
+ * Allocate a counter.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device structure.
+ *
+ * @return
+ * Index to allocated counter on success, 0 otherwise.
+ */
+uint32_t
+mlx5_counter_alloc(struct rte_eth_dev *dev)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->counter_alloc(dev);
+ }
+ DRV_LOG(ERR,
+ "port %u counter allocate is not supported.",
+ dev->data->port_id);
+ return 0;
+}
+
+/**
+ * Free a counter.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device structure.
+ * @param[in] cnt
+ * Index to counter to be free.
+ */
+void
+mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ fops->counter_free(dev, cnt);
+ return;
+ }
+ DRV_LOG(ERR,
+ "port %u counter free is not supported.",
+ dev->data->port_id);
+}
+
+/**
+ * Query counter statistics.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device structure.
+ * @param[in] cnt
+ * Index to counter to query.
+ * @param[in] clear
+ * Set to clear counter statistics.
+ * @param[out] pkts
+ * The counter hits packets number to save.
+ * @param[out] bytes
+ * The counter hits bytes number to save.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise.
+ */
+int
+mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
+ bool clear, uint64_t *pkts, uint64_t *bytes)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->counter_query(dev, cnt, clear, pkts, bytes);
+ }
+ DRV_LOG(ERR,
+ "port %u counter query is not supported.",
+ dev->data->port_id);
+ return -ENOTSUP;
+}
+
+/**
+ * Allocate a new memory for the counter values wrapped by all the needed
+ * management.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise.
+ */
+static int
+mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
+{
+ struct mlx5_devx_mkey_attr mkey_attr;
+ struct mlx5_counter_stats_mem_mng *mem_mng;
+ volatile struct flow_counter_stats *raw_data;
+ int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
+ int size = (sizeof(struct flow_counter_stats) *
+ MLX5_COUNTERS_PER_POOL +
+ sizeof(struct mlx5_counter_stats_raw)) * raws_n +
+ sizeof(struct mlx5_counter_stats_mem_mng);
+ size_t pgsize = rte_mem_page_size();
+ uint8_t *mem;
+ int i;
+
+ if (pgsize == (size_t)-1) {
+ DRV_LOG(ERR, "Failed to get mem page size");
+ rte_errno = ENOMEM;
+ return -ENOMEM;
+ }
+ mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
+ if (!mem) {
+ rte_errno = ENOMEM;
+ return -ENOMEM;
+ }
+ mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
+ size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
+ mem_mng->umem = mlx5_os_umem_reg(sh->ctx, mem, size,
+ IBV_ACCESS_LOCAL_WRITE);
+ if (!mem_mng->umem) {
+ rte_errno = errno;
+ mlx5_free(mem);
+ return -rte_errno;
+ }
+ mkey_attr.addr = (uintptr_t)mem;
+ mkey_attr.size = size;
+ mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem);
+ mkey_attr.pd = sh->pdn;
+ mkey_attr.log_entity_size = 0;
+ mkey_attr.pg_access = 0;
+ mkey_attr.klm_array = NULL;
+ mkey_attr.klm_num = 0;
+ mkey_attr.relaxed_ordering_write = sh->cmng.relaxed_ordering_write;
+ mkey_attr.relaxed_ordering_read = sh->cmng.relaxed_ordering_read;
+ mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
+ if (!mem_mng->dm) {
+ mlx5_os_umem_dereg(mem_mng->umem);
+ rte_errno = errno;
+ mlx5_free(mem);
+ return -rte_errno;
+ }
+ mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
+ raw_data = (volatile struct flow_counter_stats *)mem;
+ for (i = 0; i < raws_n; ++i) {
+ mem_mng->raws[i].mem_mng = mem_mng;
+ mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
+ }
+ for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
+ LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
+ mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
+ next);
+ LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
+ sh->cmng.mem_mng = mem_mng;
+ return 0;
+}
+
+/**
+ * Set the statistic memory to the new counter pool.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] pool
+ * Pointer to the pool to set the statistic memory.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise.
+ */
+static int
+mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
+ struct mlx5_flow_counter_pool *pool)
+{
+ struct mlx5_flow_counter_mng *cmng = &sh->cmng;
+ /* Resize statistic memory once used out. */
+ if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
+ mlx5_flow_create_counter_stat_mem_mng(sh)) {
+ DRV_LOG(ERR, "Cannot resize counter stat mem.");
+ return -1;
+ }
+ rte_spinlock_lock(&pool->sl);
+ pool->raw = cmng->mem_mng->raws + pool->index %
+ MLX5_CNT_CONTAINER_RESIZE;
+ rte_spinlock_unlock(&pool->sl);
+ pool->raw_hw = NULL;
+ return 0;
+}
+
+#define MLX5_POOL_QUERY_FREQ_US 1000000
+
+/**
+ * Set the periodic procedure for triggering asynchronous batch queries for all
+ * the counter pools.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ */
+void
+mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
+{
+ uint32_t pools_n, us;
+
+ pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
+ us = MLX5_POOL_QUERY_FREQ_US / pools_n;
+ DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
+ if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
+ sh->cmng.query_thread_on = 0;
+ DRV_LOG(ERR, "Cannot reinitialize query alarm");
+ } else {
+ sh->cmng.query_thread_on = 1;
+ }
+}
+
+/**
+ * The periodic procedure for triggering asynchronous batch queries for all the
+ * counter pools. This function is probably called by the host thread.
+ *
+ * @param[in] arg
+ * The parameter for the alarm process.
+ */
+void
+mlx5_flow_query_alarm(void *arg)
+{
+ struct mlx5_dev_ctx_shared *sh = arg;
+ int ret;
+ uint16_t pool_index = sh->cmng.pool_index;
+ struct mlx5_flow_counter_mng *cmng = &sh->cmng;
+ struct mlx5_flow_counter_pool *pool;
+ uint16_t n_valid;
+
+ if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
+ goto set_alarm;
+ rte_spinlock_lock(&cmng->pool_update_sl);
+ pool = cmng->pools[pool_index];
+ n_valid = cmng->n_valid;
+ rte_spinlock_unlock(&cmng->pool_update_sl);
+ /* Set the statistic memory to the new created pool. */
+ if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
+ goto set_alarm;
+ if (pool->raw_hw)
+ /* There is a pool query in progress. */
+ goto set_alarm;
+ pool->raw_hw =
+ LIST_FIRST(&sh->cmng.free_stat_raws);
+ if (!pool->raw_hw)
+ /* No free counter statistics raw memory. */
+ goto set_alarm;
+ /*
+ * Identify the counters released between query trigger and query
+ * handle more efficiently. The counter released in this gap period
+ * should wait for a new round of query as the new arrived packets
+ * will not be taken into account.
+ */
+ pool->query_gen++;
+ ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
+ MLX5_COUNTERS_PER_POOL,
+ NULL, NULL,
+ pool->raw_hw->mem_mng->dm->id,
+ (void *)(uintptr_t)
+ pool->raw_hw->data,
+ sh->devx_comp,
+ (uint64_t)(uintptr_t)pool);
+ if (ret) {
+ DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
+ " %d", pool->min_dcs->id);
+ pool->raw_hw = NULL;
+ goto set_alarm;
+ }
+ LIST_REMOVE(pool->raw_hw, next);
+ sh->cmng.pending_queries++;
+ pool_index++;
+ if (pool_index >= n_valid)
+ pool_index = 0;
+set_alarm:
+ sh->cmng.pool_index = pool_index;
+ mlx5_set_query_alarm(sh);
+}
+
+/**
+ * Check and callback event for new aged flow in the counter pool
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] pool
+ * Pointer to Current counter pool.
+ */
+static void
+mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
+ struct mlx5_flow_counter_pool *pool)
+{
+ struct mlx5_priv *priv;
+ struct mlx5_flow_counter *cnt;
+ struct mlx5_age_info *age_info;
+ struct mlx5_age_param *age_param;
+ struct mlx5_counter_stats_raw *cur = pool->raw_hw;
+ struct mlx5_counter_stats_raw *prev = pool->raw;
+ const uint64_t curr_time = MLX5_CURR_TIME_SEC;
+ const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
+ uint16_t expected = AGE_CANDIDATE;
+ uint32_t i;
+
+ pool->time_of_last_age_check = curr_time;
+ for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
+ cnt = MLX5_POOL_GET_CNT(pool, i);
+ age_param = MLX5_CNT_TO_AGE(cnt);
+ if (__atomic_load_n(&age_param->state,
+ __ATOMIC_RELAXED) != AGE_CANDIDATE)
+ continue;
+ if (cur->data[i].hits != prev->data[i].hits) {
+ __atomic_store_n(&age_param->sec_since_last_hit, 0,
+ __ATOMIC_RELAXED);
+ continue;
+ }
+ if (__atomic_add_fetch(&age_param->sec_since_last_hit,
+ time_delta,
+ __ATOMIC_RELAXED) <= age_param->timeout)
+ continue;
+ /**
+ * Hold the lock first, or if between the
+ * state AGE_TMOUT and tailq operation the
+ * release happened, the release procedure
+ * may delete a non-existent tailq node.
+ */
+ priv = rte_eth_devices[age_param->port_id].data->dev_private;
+ age_info = GET_PORT_AGE_INFO(priv);
+ rte_spinlock_lock(&age_info->aged_sl);
+ if (__atomic_compare_exchange_n(&age_param->state, &expected,
+ AGE_TMOUT, false,
+ __ATOMIC_RELAXED,
+ __ATOMIC_RELAXED)) {
+ TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
+ MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
+ }
+ rte_spinlock_unlock(&age_info->aged_sl);
+ }
+ mlx5_age_event_prepare(sh);
+}
+
+/**
+ * Handler for the HW respond about ready values from an asynchronous batch
+ * query. This function is probably called by the host thread.
+ *
+ * @param[in] sh
+ * The pointer to the shared device context.
+ * @param[in] async_id
+ * The Devx async ID.
+ * @param[in] status
+ * The status of the completion.
+ */
+void
+mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
+ uint64_t async_id, int status)
+{
+ struct mlx5_flow_counter_pool *pool =
+ (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
+ struct mlx5_counter_stats_raw *raw_to_free;
+ uint8_t query_gen = pool->query_gen ^ 1;
+ struct mlx5_flow_counter_mng *cmng = &sh->cmng;
+ enum mlx5_counter_type cnt_type =
+ pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
+ MLX5_COUNTER_TYPE_ORIGIN;
+
+ if (unlikely(status)) {
+ raw_to_free = pool->raw_hw;
+ } else {
+ raw_to_free = pool->raw;
+ if (pool->is_aged)
+ mlx5_flow_aging_check(sh, pool);
+ rte_spinlock_lock(&pool->sl);
+ pool->raw = pool->raw_hw;
+ rte_spinlock_unlock(&pool->sl);
+ /* Be sure the new raw counters data is updated in memory. */
+ rte_io_wmb();
+ if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
+ rte_spinlock_lock(&cmng->csl[cnt_type]);
+ TAILQ_CONCAT(&cmng->counters[cnt_type],
+ &pool->counters[query_gen], next);
+ rte_spinlock_unlock(&cmng->csl[cnt_type]);
+ }
+ }
+ LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
+ pool->raw_hw = NULL;
+ sh->cmng.pending_queries--;
+}
+
+static int
+flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
+ const struct flow_grp_info *grp_info,
+ struct rte_flow_error *error)
+{
+ if (grp_info->transfer && grp_info->external &&
+ grp_info->fdb_def_rule) {
+ if (group == UINT32_MAX)
+ return rte_flow_error_set
+ (error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+ NULL,
+ "group index not supported");
+ *table = group + 1;
+ } else {
+ *table = group;
+ }
+ DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
+ return 0;
+}
+
+/**
+ * Translate the rte_flow group index to HW table value.
+ *
+ * If tunnel offload is disabled, all group ids converted to flow table
+ * id using the standard method.
+ * If tunnel offload is enabled, group id can be converted using the
+ * standard or tunnel conversion method. Group conversion method
+ * selection depends on flags in `grp_info` parameter:
+ * - Internal (grp_info.external == 0) groups conversion uses the
+ * standard method.
+ * - Group ids in JUMP action converted with the tunnel conversion.
+ * - Group id in rule attribute conversion depends on a rule type and
+ * group id value:
+ * ** non zero group attributes converted with the tunnel method
+ * ** zero group attribute in non-tunnel rule is converted using the
+ * standard method - there's only one root table
+ * ** zero group attribute in steer tunnel rule is converted with the
+ * standard method - single root table
+ * ** zero group attribute in match tunnel rule is a special OvS
+ * case: that value is used for portability reasons. That group
+ * id is converted with the tunnel conversion method.
+ *
+ * @param[in] dev
+ * Port device
+ * @param[in] tunnel
+ * PMD tunnel offload object
+ * @param[in] group
+ * rte_flow group index value.
+ * @param[out] table
+ * HW table value.
+ * @param[in] grp_info
+ * flags used for conversion
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_group_to_table(struct rte_eth_dev *dev,
+ const struct mlx5_flow_tunnel *tunnel,
+ uint32_t group, uint32_t *table,
+ const struct flow_grp_info *grp_info,
+ struct rte_flow_error *error)
+{
+ int ret;
+ bool standard_translation;
+
+ if (!grp_info->skip_scale && grp_info->external &&
+ group < MLX5_MAX_TABLES_EXTERNAL)
+ group *= MLX5_FLOW_TABLE_FACTOR;
+ if (is_tunnel_offload_active(dev)) {
+ standard_translation = !grp_info->external ||
+ grp_info->std_tbl_fix;
+ } else {
+ standard_translation = true;
+ }
+ DRV_LOG(DEBUG,
+ "port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
+ dev->data->port_id, group, grp_info->transfer,
+ grp_info->external, grp_info->fdb_def_rule,
+ standard_translation ? "STANDARD" : "TUNNEL");
+ if (standard_translation)
+ ret = flow_group_to_table(dev->data->port_id, group, table,
+ grp_info, error);
+ else
+ ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
+ table, error);
+
+ return ret;
+}
+
+/**
+ * Discover availability of metadata reg_c's.
+ *
+ * Iteratively use test flows to check availability.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_dev_config *config = &priv->config;
+ enum modify_reg idx;
+ int n = 0;
+
+ /* reg_c[0] and reg_c[1] are reserved. */
+ config->flow_mreg_c[n++] = REG_C_0;
+ config->flow_mreg_c[n++] = REG_C_1;
+ /* Discover availability of other reg_c's. */
+ for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
+ struct rte_flow_attr attr = {
+ .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
+ .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
+ .ingress = 1,
+ };
+ struct rte_flow_item items[] = {
+ [0] = {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ struct rte_flow_action actions[] = {
+ [0] = {
+ .type = (enum rte_flow_action_type)
+ MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
+ .conf = &(struct mlx5_flow_action_copy_mreg){
+ .src = REG_C_1,
+ .dst = idx,
+ },
+ },
+ [1] = {
+ .type = RTE_FLOW_ACTION_TYPE_JUMP,
+ .conf = &(struct rte_flow_action_jump){
+ .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
+ },
+ },
+ [2] = {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
+ uint32_t flow_idx;
+ struct rte_flow *flow;
+ struct rte_flow_error error;
+
+ if (!config->dv_flow_en)
+ break;
+ /* Create internal flow, validation skips copy action. */
+ flow_idx = flow_list_create(dev, NULL, &attr, items,
+ actions, false, &error);
+ flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
+ flow_idx);
+ if (!flow)
+ continue;
+ config->flow_mreg_c[n++] = idx;
+ flow_list_destroy(dev, NULL, flow_idx);
+ }
+ for (; n < MLX5_MREG_C_NUM; ++n)
+ config->flow_mreg_c[n] = REG_NON;
+ return 0;
+}
+
+/**
+ * Dump flow raw hw data to file
+ *
+ * @param[in] dev
+ * The pointer to Ethernet device.
+ * @param[in] file
+ * A pointer to a file for output.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ * @return
+ * 0 on success, a nagative value otherwise.
+ */
+int
+mlx5_flow_dev_dump(struct rte_eth_dev *dev,
+ FILE *file,
+ struct rte_flow_error *error __rte_unused)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_dev_ctx_shared *sh = priv->sh;
+
+ if (!priv->config.dv_flow_en) {
+ if (fputs("device dv flow disabled\n", file) <= 0)
+ return -errno;
+ return -ENOTSUP;
+ }
+ return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
+ sh->tx_domain, file);
+}
+
+/**
+ * Get aged-out flows.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] context
+ * The address of an array of pointers to the aged-out flows contexts.
+ * @param[in] nb_countexts
+ * The length of context array pointers.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. Initialized in case of
+ * error only.
+ *
+ * @return
+ * how many contexts get in success, otherwise negative errno value.
+ * if nb_contexts is 0, return the amount of all aged contexts.
+ * if nb_contexts is not 0 , return the amount of aged flows reported
+ * in the context array.
+ */
+int
+mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
+ uint32_t nb_contexts, struct rte_flow_error *error)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->get_aged_flows(dev, contexts, nb_contexts,
+ error);
+ }
+ DRV_LOG(ERR,
+ "port %u get aged flows is not supported.",
+ dev->data->port_id);
+ return -ENOTSUP;
+}
+
+/* Wrapper for driver action_validate op callback */
+static int
+flow_drv_action_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_shared_action_conf *conf,
+ const struct rte_flow_action *action,
+ const struct mlx5_flow_driver_ops *fops,
+ struct rte_flow_error *error)
+{
+ static const char err_msg[] = "shared action validation unsupported";
+
+ if (!fops->action_validate) {
+ DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, err_msg);
+ return -rte_errno;
+ }
+ return fops->action_validate(dev, conf, action, error);
+}
+
+/**
+ * Destroys the shared action by handle.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param[in] action
+ * Handle for the shared action to be destroyed.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ *
+ * @note: wrapper for driver action_create op callback.
+ */
+static int
+mlx5_shared_action_destroy(struct rte_eth_dev *dev,
+ struct rte_flow_shared_action *action,
+ struct rte_flow_error *error)
+{
+ static const char err_msg[] = "shared action destruction unsupported";
+ struct rte_flow_attr attr = { .transfer = 0 };
+ const struct mlx5_flow_driver_ops *fops =
+ flow_get_drv_ops(flow_get_drv_type(dev, &attr));
+
+ if (!fops->action_destroy) {
+ DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, err_msg);
+ return -rte_errno;
+ }
+ return fops->action_destroy(dev, action, error);
+}
+
+/* Wrapper for driver action_destroy op callback */
+static int
+flow_drv_action_update(struct rte_eth_dev *dev,
+ struct rte_flow_shared_action *action,
+ const void *action_conf,
+ const struct mlx5_flow_driver_ops *fops,
+ struct rte_flow_error *error)
+{
+ static const char err_msg[] = "shared action update unsupported";
+
+ if (!fops->action_update) {
+ DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, err_msg);
+ return -rte_errno;
+ }
+ return fops->action_update(dev, action, action_conf, error);
+}
+
+/* Wrapper for driver action_destroy op callback */
+static int
+flow_drv_action_query(struct rte_eth_dev *dev,
+ const struct rte_flow_shared_action *action,
+ void *data,
+ const struct mlx5_flow_driver_ops *fops,
+ struct rte_flow_error *error)
+{
+ static const char err_msg[] = "shared action query unsupported";
+
+ if (!fops->action_query) {
+ DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, err_msg);
+ return -rte_errno;
+ }
+ return fops->action_query(dev, action, data, error);
+}
+
+/**
+ * Create shared action for reuse in multiple flow rules.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param[in] action
+ * Action configuration for shared action creation.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ * @return
+ * A valid handle in case of success, NULL otherwise and rte_errno is set.
+ */
+static struct rte_flow_shared_action *
+mlx5_shared_action_create(struct rte_eth_dev *dev,
+ const struct rte_flow_shared_action_conf *conf,
+ const struct rte_flow_action *action,
+ struct rte_flow_error *error)
+{
+ static const char err_msg[] = "shared action creation unsupported";
+ struct rte_flow_attr attr = { .transfer = 0 };
+ const struct mlx5_flow_driver_ops *fops =
+ flow_get_drv_ops(flow_get_drv_type(dev, &attr));
+
+ if (flow_drv_action_validate(dev, conf, action, fops, error))
+ return NULL;
+ if (!fops->action_create) {
+ DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, err_msg);
+ return NULL;
+ }
+ return fops->action_create(dev, conf, action, error);
+}
+
+/**
+ * Updates inplace the shared action configuration pointed by *action* handle
+ * with the configuration provided as *action* argument.
+ * The update of the shared action configuration effects all flow rules reusing
+ * the action via handle.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param[in] shared_action
+ * Handle for the shared action to be updated.
+ * @param[in] action
+ * Action specification used to modify the action pointed by handle.
+ * *action* should be of same type with the action pointed by the *action*
+ * handle argument, otherwise considered as invalid.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_shared_action_update(struct rte_eth_dev *dev,
+ struct rte_flow_shared_action *shared_action,
+ const struct rte_flow_action *action,
+ struct rte_flow_error *error)
+{
+ struct rte_flow_attr attr = { .transfer = 0 };
+ const struct mlx5_flow_driver_ops *fops =
+ flow_get_drv_ops(flow_get_drv_type(dev, &attr));
+ int ret;
+
+ ret = flow_drv_action_validate(dev, NULL, action, fops, error);
+ if (ret)
+ return ret;
+ return flow_drv_action_update(dev, shared_action, action->conf, fops,
+ error);
+}
+
+/**
+ * Query the shared action by handle.
+ *
+ * This function allows retrieving action-specific data such as counters.
+ * Data is gathered by special action which may be present/referenced in
+ * more than one flow rule definition.
+ *
+ * \see RTE_FLOW_ACTION_TYPE_COUNT
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param[in] action
+ * Handle for the shared action to query.
+ * @param[in, out] data
+ * Pointer to storage for the associated query data type.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_shared_action_query(struct rte_eth_dev *dev,
+ const struct rte_flow_shared_action *action,
+ void *data,
+ struct rte_flow_error *error)
+{
+ struct rte_flow_attr attr = { .transfer = 0 };
+ const struct mlx5_flow_driver_ops *fops =
+ flow_get_drv_ops(flow_get_drv_type(dev, &attr));
+
+ return flow_drv_action_query(dev, action, data, fops, error);
+}
+
+/**
+ * Destroy all shared actions.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_shared_action_flush(struct rte_eth_dev *dev)
+{
+ struct rte_flow_error error;
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_shared_action_rss *shared_rss;
+ int ret = 0;
+ uint32_t idx;
+
+ ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
+ priv->rss_shared_actions, idx, shared_rss, next) {
+ ret |= mlx5_shared_action_destroy(dev,
+ (struct rte_flow_shared_action *)(uintptr_t)idx, &error);
+ }
+ return ret;
+}
+
+#ifndef HAVE_MLX5DV_DR
+#define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
+#else
+#define MLX5_DOMAIN_SYNC_FLOW \
+ (MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
+#endif
+
+int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ const struct mlx5_flow_driver_ops *fops;
+ int ret;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
+ ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
+ if (ret > 0)
+ ret = -ret;
+ return ret;
+}
+
+/**
+ * tunnel offload functionalilty is defined for DV environment only
+ */
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+__extension__
+union tunnel_offload_mark {
+ uint32_t val;
+ struct {
+ uint32_t app_reserve:8;
+ uint32_t table_id:15;
+ uint32_t transfer:1;
+ uint32_t _unused_:8;
+ };
+};
+
+static bool
+mlx5_access_tunnel_offload_db
+ (struct rte_eth_dev *dev,
+ bool (*match)(struct rte_eth_dev *,
+ struct mlx5_flow_tunnel *, const void *),
+ void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
+ void (*miss)(struct rte_eth_dev *, void *),
+ void *ctx, bool lock_op);
+
+static int
+flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_action *app_actions,
+ uint32_t flow_idx,
+ struct tunnel_default_miss_ctx *ctx,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_flow *dev_flow;
+ struct rte_flow_attr miss_attr = *attr;
+ const struct mlx5_flow_tunnel *tunnel = app_actions[0].conf;
+ const struct rte_flow_item miss_items[2] = {
+ {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .spec = NULL,
+ .last = NULL,
+ .mask = NULL
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ .spec = NULL,
+ .last = NULL,
+ .mask = NULL
+ }
+ };
+ union tunnel_offload_mark mark_id;
+ struct rte_flow_action_mark miss_mark;
+ struct rte_flow_action miss_actions[3] = {
+ [0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
+ [2] = { .type = RTE_FLOW_ACTION_TYPE_END, .conf = NULL }
+ };
+ const struct rte_flow_action_jump *jump_data;
+ uint32_t i, flow_table = 0; /* prevent compilation warning */
+ struct flow_grp_info grp_info = {
+ .external = 1,
+ .transfer = attr->transfer,
+ .fdb_def_rule = !!priv->fdb_def_rule,
+ .std_tbl_fix = 0,
+ };
+ int ret;
+
+ if (!attr->transfer) {
+ uint32_t q_size;
+
+ miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
+ q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
+ ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
+ 0, SOCKET_ID_ANY);
+ if (!ctx->queue)
+ return rte_flow_error_set
+ (error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ NULL, "invalid default miss RSS");
+ ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+ ctx->action_rss.level = 0,
+ ctx->action_rss.types = priv->rss_conf.rss_hf,
+ ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
+ ctx->action_rss.queue_num = priv->reta_idx_n,
+ ctx->action_rss.key = priv->rss_conf.rss_key,
+ ctx->action_rss.queue = ctx->queue;
+ if (!priv->reta_idx_n || !priv->rxqs_n)
+ return rte_flow_error_set
+ (error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ NULL, "invalid port configuration");
+ if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
+ ctx->action_rss.types = 0;
+ for (i = 0; i != priv->reta_idx_n; ++i)
+ ctx->queue[i] = (*priv->reta_idx)[i];
+ } else {
+ miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
+ ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
+ }
+ miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
+ for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
+ jump_data = app_actions->conf;
+ miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
+ miss_attr.group = jump_data->group;
+ ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
+ &flow_table, &grp_info, error);
+ if (ret)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ NULL, "invalid tunnel id");
+ mark_id.app_reserve = 0;
+ mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
+ mark_id.transfer = !!attr->transfer;
+ mark_id._unused_ = 0;
+ miss_mark.id = mark_id.val;
+ dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
+ miss_items, miss_actions, flow_idx, error);
+ if (!dev_flow)
+ return -rte_errno;
+ dev_flow->flow = flow;
+ dev_flow->external = true;
+ dev_flow->tunnel = tunnel;
+ /* Subflow object was created, we must include one in the list. */
+ SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
+ dev_flow->handle, next);
+ DRV_LOG(DEBUG,
+ "port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
+ dev->data->port_id, tunnel->app_tunnel.type,
+ tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
+ ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
+ miss_actions, error);
+ if (!ret)
+ ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
+ error);
+
+ return ret;
+}
+
+static const struct mlx5_flow_tbl_data_entry *
+tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_dev_ctx_shared *sh = priv->sh;
+ struct mlx5_hlist_entry *he;
+ union tunnel_offload_mark mbits = { .val = mark };
+ union mlx5_flow_tbl_key table_key = {
+ {
+ .table_id = tunnel_id_to_flow_tbl(mbits.table_id),
+ .dummy = 0,
+ .domain = !!mbits.transfer,
+ .direction = 0,
+ }
+ };
+ he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, NULL);
+ return he ?
+ container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
+}
+
+static void
+mlx5_flow_tunnel_grp2tbl_remove_cb(struct mlx5_hlist *list,
+ struct mlx5_hlist_entry *entry)
+{
+ struct mlx5_dev_ctx_shared *sh = list->ctx;
+ struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
+
+ mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
+ tunnel_flow_tbl_to_id(tte->flow_table));
+ mlx5_free(tte);
+}
+
+static int
+mlx5_flow_tunnel_grp2tbl_match_cb(struct mlx5_hlist *list __rte_unused,
+ struct mlx5_hlist_entry *entry,
+ uint64_t key, void *cb_ctx __rte_unused)
+{
+ union tunnel_tbl_key tbl = {
+ .val = key,
+ };
+ struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
+
+ return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
+}
+
+static struct mlx5_hlist_entry *
+mlx5_flow_tunnel_grp2tbl_create_cb(struct mlx5_hlist *list, uint64_t key,
+ void *ctx __rte_unused)
+{
+ struct mlx5_dev_ctx_shared *sh = list->ctx;
+ struct tunnel_tbl_entry *tte;
+ union tunnel_tbl_key tbl = {
+ .val = key,
+ };
+
+ tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
+ sizeof(*tte), 0,
+ SOCKET_ID_ANY);
+ if (!tte)
+ goto err;
+ mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
+ &tte->flow_table);
+ if (tte->flow_table >= MLX5_MAX_TABLES) {
+ DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
+ tte->flow_table);
+ mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
+ tte->flow_table);
+ goto err;
+ } else if (!tte->flow_table) {
+ goto err;
+ }
+ tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
+ tte->tunnel_id = tbl.tunnel_id;
+ tte->group = tbl.group;
+ return &tte->hash;
+err:
+ if (tte)
+ mlx5_free(tte);
+ return NULL;
+}
+
+static uint32_t
+tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
+ const struct mlx5_flow_tunnel *tunnel,
+ uint32_t group, uint32_t *table,
+ struct rte_flow_error *error)
+{
+ struct mlx5_hlist_entry *he;
+ struct tunnel_tbl_entry *tte;
+ union tunnel_tbl_key key = {
+ .tunnel_id = tunnel ? tunnel->tunnel_id : 0,
+ .group = group
+ };
+ struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
+ struct mlx5_hlist *group_hash;
+
+ group_hash = tunnel ? tunnel->groups : thub->groups;
+ he = mlx5_hlist_register(group_hash, key.val, NULL);
+ if (!he)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+ NULL,
+ "tunnel group index not supported");
+ tte = container_of(he, typeof(*tte), hash);
+ *table = tte->flow_table;
+ DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
+ dev->data->port_id, key.tunnel_id, group, *table);
+ return 0;
+}
+
+static void
+mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
+ struct mlx5_flow_tunnel *tunnel)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_indexed_pool *ipool;
+
+ DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
+ dev->data->port_id, tunnel->tunnel_id);
+ LIST_REMOVE(tunnel, chain);
+ mlx5_hlist_destroy(tunnel->groups);
+ ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
+ mlx5_ipool_free(ipool, tunnel->tunnel_id);
+}
+
+static bool
+mlx5_access_tunnel_offload_db
+ (struct rte_eth_dev *dev,
+ bool (*match)(struct rte_eth_dev *,
+ struct mlx5_flow_tunnel *, const void *),
+ void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
+ void (*miss)(struct rte_eth_dev *, void *),
+ void *ctx, bool lock_op)
+{
+ bool verdict = false;
+ struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
+ struct mlx5_flow_tunnel *tunnel;
+
+ rte_spinlock_lock(&thub->sl);
+ LIST_FOREACH(tunnel, &thub->tunnels, chain) {
+ verdict = match(dev, tunnel, (const void *)ctx);
+ if (verdict)
+ break;
+ }
+ if (!lock_op)
+ rte_spinlock_unlock(&thub->sl);
+ if (verdict && hit)
+ hit(dev, tunnel, ctx);
+ if (!verdict && miss)
+ miss(dev, ctx);
+ if (lock_op)
+ rte_spinlock_unlock(&thub->sl);
+
+ return verdict;
+}
+
+struct tunnel_db_find_tunnel_id_ctx {
+ uint32_t tunnel_id;
+ struct mlx5_flow_tunnel *tunnel;
+};
+
+static bool
+find_tunnel_id_match(struct rte_eth_dev *dev,
+ struct mlx5_flow_tunnel *tunnel, const void *x)
+{
+ const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
+
+ RTE_SET_USED(dev);
+ return tunnel->tunnel_id == ctx->tunnel_id;
+}
+
+static void
+find_tunnel_id_hit(struct rte_eth_dev *dev,
+ struct mlx5_flow_tunnel *tunnel, void *x)
+{
+ struct tunnel_db_find_tunnel_id_ctx *ctx = x;
+ RTE_SET_USED(dev);
+ ctx->tunnel = tunnel;
+}
+
+static struct mlx5_flow_tunnel *
+mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
+{
+ struct tunnel_db_find_tunnel_id_ctx ctx = {
+ .tunnel_id = id,
+ };
+
+ mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
+ find_tunnel_id_hit, NULL, &ctx, true);
+
+ return ctx.tunnel;
+}
+
+static struct mlx5_flow_tunnel *
+mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
+ const struct rte_flow_tunnel *app_tunnel)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_indexed_pool *ipool;
+ struct mlx5_flow_tunnel *tunnel;
+ uint32_t id;
+
+ ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
+ tunnel = mlx5_ipool_zmalloc(ipool, &id);
+ if (!tunnel)
+ return NULL;
+ if (id >= MLX5_MAX_TUNNELS) {
+ mlx5_ipool_free(ipool, id);
+ DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
+ return NULL;
+ }
+ tunnel->groups = mlx5_hlist_create("tunnel groups", 1024, 0, 0,
+ mlx5_flow_tunnel_grp2tbl_create_cb,
+ mlx5_flow_tunnel_grp2tbl_match_cb,
+ mlx5_flow_tunnel_grp2tbl_remove_cb);
+ if (!tunnel->groups) {
+ mlx5_ipool_free(ipool, id);
+ return NULL;
+ }
+ tunnel->groups->ctx = priv->sh;
+ /* initiate new PMD tunnel */
+ memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
+ tunnel->tunnel_id = id;
+ tunnel->action.type = (typeof(tunnel->action.type))
+ MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
+ tunnel->action.conf = tunnel;
+ tunnel->item.type = (typeof(tunnel->item.type))
+ MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
+ tunnel->item.spec = tunnel;
+ tunnel->item.last = NULL;
+ tunnel->item.mask = NULL;
+
+ DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
+ dev->data->port_id, tunnel->tunnel_id);
+
+ return tunnel;
+}
+
+struct tunnel_db_get_tunnel_ctx {
+ const struct rte_flow_tunnel *app_tunnel;
+ struct mlx5_flow_tunnel *tunnel;
+};
+
+static bool get_tunnel_match(struct rte_eth_dev *dev,
+ struct mlx5_flow_tunnel *tunnel, const void *x)
+{
+ const struct tunnel_db_get_tunnel_ctx *ctx = x;
+
+ RTE_SET_USED(dev);
+ return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
+ sizeof(*ctx->app_tunnel));
+}
+
+static void get_tunnel_hit(struct rte_eth_dev *dev,
+ struct mlx5_flow_tunnel *tunnel, void *x)
+{
+ /* called under tunnel spinlock protection */
+ struct tunnel_db_get_tunnel_ctx *ctx = x;
+
+ RTE_SET_USED(dev);
+ tunnel->refctn++;
+ ctx->tunnel = tunnel;
+}
+
+static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
+{
+ /* called under tunnel spinlock protection */
+ struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
+ struct tunnel_db_get_tunnel_ctx *ctx = x;
+
+ rte_spinlock_unlock(&thub->sl);
+ ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
+ ctx->tunnel->refctn = 1;
+ rte_spinlock_lock(&thub->sl);
+ if (ctx->tunnel)
+ LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
+}
+
+
+static int
+mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
+ const struct rte_flow_tunnel *app_tunnel,
+ struct mlx5_flow_tunnel **tunnel)
+{
+ struct tunnel_db_get_tunnel_ctx ctx = {
+ .app_tunnel = app_tunnel,
+ };
+
+ mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
+ get_tunnel_miss, &ctx, true);
+ *tunnel = ctx.tunnel;
+ return ctx.tunnel ? 0 : -ENOMEM;
+}
+
+void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
+{
+ struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
+
+ if (!thub)
+ return;
+ if (!LIST_EMPTY(&thub->tunnels))
+ DRV_LOG(WARNING, "port %u tunnels present\n", port_id);
+ mlx5_hlist_destroy(thub->groups);
+ mlx5_free(thub);
+}
+
+int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
+{
+ int err;
+ struct mlx5_flow_tunnel_hub *thub;
+
+ thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
+ 0, SOCKET_ID_ANY);
+ if (!thub)
+ return -ENOMEM;
+ LIST_INIT(&thub->tunnels);
+ rte_spinlock_init(&thub->sl);
+ thub->groups = mlx5_hlist_create("flow groups",
+ rte_align32pow2(MLX5_MAX_TABLES), 0,
+ 0, mlx5_flow_tunnel_grp2tbl_create_cb,
+ mlx5_flow_tunnel_grp2tbl_match_cb,
+ mlx5_flow_tunnel_grp2tbl_remove_cb);
+ if (!thub->groups) {
+ err = -rte_errno;
+ goto err;
+ }
+ thub->groups->ctx = sh;
+ sh->tunnel_hub = thub;
+
+ return 0;
+
+err:
+ if (thub->groups)
+ mlx5_hlist_destroy(thub->groups);
+ if (thub)
+ mlx5_free(thub);
+ return err;
+}
+
+static inline bool
+mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
+ struct rte_flow_tunnel *tunnel,
+ const char *err_msg)
+{
+ err_msg = NULL;
+ if (!is_tunnel_offload_active(dev)) {
+ err_msg = "tunnel offload was not activated";
+ goto out;
+ } else if (!tunnel) {
+ err_msg = "no application tunnel";
+ goto out;
+ }
+
+ switch (tunnel->type) {
+ default:
+ err_msg = "unsupported tunnel type";
+ goto out;
+ case RTE_FLOW_ITEM_TYPE_VXLAN:
+ break;
+ }
+
+out:
+ return !err_msg;
+}
+
+static int
+mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
+ struct rte_flow_tunnel *app_tunnel,
+ struct rte_flow_action **actions,
+ uint32_t *num_of_actions,
+ struct rte_flow_error *error)
+{
+ int ret;
+ struct mlx5_flow_tunnel *tunnel;
+ const char *err_msg = NULL;
+ bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
+
+ if (!verdict)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
+ err_msg);
+ ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
+ if (ret < 0) {
+ return rte_flow_error_set(error, ret,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
+ "failed to initialize pmd tunnel");
+ }
+ *actions = &tunnel->action;
+ *num_of_actions = 1;
+ return 0;
+}
+
+static int
+mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
+ struct rte_flow_tunnel *app_tunnel,
+ struct rte_flow_item **items,
+ uint32_t *num_of_items,
+ struct rte_flow_error *error)
+{
+ int ret;
+ struct mlx5_flow_tunnel *tunnel;
+ const char *err_msg = NULL;
+ bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
+
+ if (!verdict)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+ err_msg);
+ ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
+ if (ret < 0) {
+ return rte_flow_error_set(error, ret,
+ RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+ "failed to initialize pmd tunnel");
+ }
+ *items = &tunnel->item;
+ *num_of_items = 1;
+ return 0;
+}
+
+struct tunnel_db_element_release_ctx {
+ struct rte_flow_item *items;
+ struct rte_flow_action *actions;
+ uint32_t num_elements;
+ struct rte_flow_error *error;
+ int ret;
+};
+
+static bool
+tunnel_element_release_match(struct rte_eth_dev *dev,
+ struct mlx5_flow_tunnel *tunnel, const void *x)
+{
+ const struct tunnel_db_element_release_ctx *ctx = x;
+
+ RTE_SET_USED(dev);
+ if (ctx->num_elements != 1)
+ return false;
+ else if (ctx->items)
+ return ctx->items == &tunnel->item;
+ else if (ctx->actions)
+ return ctx->actions == &tunnel->action;
+
+ return false;
+}
+
+static void
+tunnel_element_release_hit(struct rte_eth_dev *dev,
+ struct mlx5_flow_tunnel *tunnel, void *x)
+{
+ struct tunnel_db_element_release_ctx *ctx = x;
+ ctx->ret = 0;
+ if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
+ mlx5_flow_tunnel_free(dev, tunnel);
+}
+
+static void
+tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
+{
+ struct tunnel_db_element_release_ctx *ctx = x;
+ RTE_SET_USED(dev);
+ ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+ "invalid argument");
+}
+
+static int
+mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
+ struct rte_flow_item *pmd_items,
+ uint32_t num_items, struct rte_flow_error *err)
+{
+ struct tunnel_db_element_release_ctx ctx = {
+ .items = pmd_items,
+ .actions = NULL,
+ .num_elements = num_items,
+ .error = err,
+ };
+
+ mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
+ tunnel_element_release_hit,
+ tunnel_element_release_miss, &ctx, false);
+
+ return ctx.ret;
+}
+
+static int
+mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
+ struct rte_flow_action *pmd_actions,
+ uint32_t num_actions, struct rte_flow_error *err)
+{
+ struct tunnel_db_element_release_ctx ctx = {
+ .items = NULL,
+ .actions = pmd_actions,
+ .num_elements = num_actions,
+ .error = err,
+ };
+
+ mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
+ tunnel_element_release_hit,
+ tunnel_element_release_miss, &ctx, false);
+
+ return ctx.ret;
+}
+
+static int
+mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
+ struct rte_mbuf *m,
+ struct rte_flow_restore_info *info,
+ struct rte_flow_error *err)
+{
+ uint64_t ol_flags = m->ol_flags;
+ const struct mlx5_flow_tbl_data_entry *tble;
+ const uint64_t mask = PKT_RX_FDIR | PKT_RX_FDIR_ID;
+
+ if (!is_tunnel_offload_active(dev)) {
+ info->flags = 0;
+ return 0;
+ }
+
+ if ((ol_flags & mask) != mask)
+ goto err;
+ tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
+ if (!tble) {
+ DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
+ dev->data->port_id, m->hash.fdir.hi);
+ goto err;
+ }
+ MLX5_ASSERT(tble->tunnel);
+ memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
+ info->group_id = tble->group_id;
+ info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
+ RTE_FLOW_RESTORE_INFO_GROUP_ID |
+ RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
+
+ return 0;
+
+err:
+ return rte_flow_error_set(err, EINVAL,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "failed to get restore info");
+}
+
+#else /* HAVE_IBV_FLOW_DV_SUPPORT */
+static int
+mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused struct rte_flow_tunnel *app_tunnel,
+ __rte_unused struct rte_flow_action **actions,
+ __rte_unused uint32_t *num_of_actions,
+ __rte_unused struct rte_flow_error *error)
+{
+ return -ENOTSUP;
+}
+
+static int
+mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused struct rte_flow_tunnel *app_tunnel,
+ __rte_unused struct rte_flow_item **items,
+ __rte_unused uint32_t *num_of_items,
+ __rte_unused struct rte_flow_error *error)
+{
+ return -ENOTSUP;
+}
+
+static int
+mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused struct rte_flow_item *pmd_items,
+ __rte_unused uint32_t num_items,
+ __rte_unused struct rte_flow_error *err)
+{
+ return -ENOTSUP;
+}
+
+static int
+mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused struct rte_flow_action *pmd_action,
+ __rte_unused uint32_t num_actions,
+ __rte_unused struct rte_flow_error *err)
+{
+ return -ENOTSUP;
+}
+
+static int
+mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused struct rte_mbuf *m,
+ __rte_unused struct rte_flow_restore_info *i,
+ __rte_unused struct rte_flow_error *err)
+{
+ return -ENOTSUP;
+}
+
+static int
+flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused struct rte_flow *flow,
+ __rte_unused const struct rte_flow_attr *attr,
+ __rte_unused const struct rte_flow_action *actions,
+ __rte_unused uint32_t flow_idx,
+ __rte_unused struct tunnel_default_miss_ctx *ctx,
+ __rte_unused struct rte_flow_error *error)
+{
+ return -ENOTSUP;
+}
+
+static struct mlx5_flow_tunnel *
+mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused uint32_t id)
+{
+ return NULL;
+}
+
+static void
+mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused struct mlx5_flow_tunnel *tunnel)
+{
+}
+
+static uint32_t
+tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused const struct mlx5_flow_tunnel *t,
+ __rte_unused uint32_t group,
+ __rte_unused uint32_t *table,
+ struct rte_flow_error *error)
+{
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "tunnel offload requires DV support");
+}
+
+void
+mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
+ __rte_unused uint16_t port_id)
+{
+}
+#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
+