+ goto error_before_hairpin_split;
+ }
+ flow_hairpin_split(dev, actions, actions_rx.actions,
+ actions_hairpin_tx.actions, items_tx.items,
+ idx);
+ p_actions_rx = actions_rx.actions;
+ }
+ flow_split_info.flow_idx = idx;
+ flow->drv_type = flow_get_drv_type(dev, attr);
+ MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
+ flow->drv_type < MLX5_FLOW_TYPE_MAX);
+ memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
+ /* RSS Action only works on NIC RX domain */
+ if (attr->ingress && !attr->transfer)
+ rss = flow_get_rss_action(p_actions_rx);
+ if (rss) {
+ if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
+ return 0;
+ /*
+ * The following information is required by
+ * mlx5_flow_hashfields_adjust() in advance.
+ */
+ rss_desc->level = rss->level;
+ /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
+ rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types;
+ }
+ flow->dev_handles = 0;
+ if (rss && rss->types) {
+ unsigned int graph_root;
+
+ graph_root = find_graph_root(items, rss->level);
+ ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
+ items, rss->types,
+ mlx5_support_expansion, graph_root);
+ MLX5_ASSERT(ret > 0 &&
+ (unsigned int)ret < sizeof(expand_buffer.buffer));
+ } else {
+ buf->entries = 1;
+ buf->entry[0].pattern = (void *)(uintptr_t)items;
+ }
+ rss_desc->shared_rss = flow_get_shared_rss_action(dev, shared_actions,
+ shared_actions_n);
+ for (i = 0; i < buf->entries; ++i) {
+ /* Initialize flow split data. */
+ flow_split_info.prefix_layers = 0;
+ flow_split_info.prefix_mark = 0;
+ flow_split_info.skip_scale = 0;
+ /*
+ * The splitter may create multiple dev_flows,
+ * depending on configuration. In the simplest
+ * case it just creates unmodified original flow.
+ */
+ ret = flow_create_split_outer(dev, flow, attr,
+ buf->entry[i].pattern,
+ p_actions_rx, &flow_split_info,
+ error);
+ if (ret < 0)
+ goto error;
+ if (is_flow_tunnel_steer_rule(dev, attr,
+ buf->entry[i].pattern,
+ p_actions_rx)) {
+ ret = flow_tunnel_add_default_miss(dev, flow, attr,
+ p_actions_rx,
+ idx,
+ &default_miss_ctx,
+ error);
+ if (ret < 0) {
+ mlx5_free(default_miss_ctx.queue);
+ goto error;
+ }
+ }
+ }
+ /* Create the tx flow. */
+ if (hairpin_flow) {
+ attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
+ attr_tx.ingress = 0;
+ attr_tx.egress = 1;
+ dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
+ actions_hairpin_tx.actions,
+ idx, error);
+ if (!dev_flow)
+ goto error;
+ dev_flow->flow = flow;
+ dev_flow->external = 0;
+ SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
+ dev_flow->handle, next);
+ ret = flow_drv_translate(dev, dev_flow, &attr_tx,
+ items_tx.items,
+ actions_hairpin_tx.actions, error);
+ if (ret < 0)
+ goto error;
+ }
+ /*
+ * Update the metadata register copy table. If extensive
+ * metadata feature is enabled and registers are supported
+ * we might create the extra rte_flow for each unique
+ * MARK/FLAG action ID.
+ *
+ * The table is updated for ingress Flows only, because
+ * the egress Flows belong to the different device and
+ * copy table should be updated in peer NIC Rx domain.
+ */
+ if (attr->ingress &&
+ (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
+ ret = flow_mreg_update_copy_table(dev, flow, actions, error);
+ if (ret)
+ goto error;
+ }
+ /*
+ * If the flow is external (from application) OR device is started,
+ * OR mreg discover, then apply immediately.
+ */
+ if (external || dev->data->dev_started ||
+ (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
+ attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
+ ret = flow_drv_apply(dev, flow, error);
+ if (ret < 0)
+ goto error;
+ }
+ if (list) {
+ rte_spinlock_lock(&priv->flow_list_lock);
+ ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx,
+ flow, next);
+ rte_spinlock_unlock(&priv->flow_list_lock);
+ }
+ flow_rxq_flags_set(dev, flow);
+ rte_free(translated_actions);
+ tunnel = flow_tunnel_from_rule(dev, attr, items, actions);
+ if (tunnel) {
+ flow->tunnel = 1;
+ flow->tunnel_id = tunnel->tunnel_id;
+ __atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
+ mlx5_free(default_miss_ctx.queue);
+ }
+ mlx5_flow_pop_thread_workspace();
+ return idx;
+error:
+ MLX5_ASSERT(flow);
+ ret = rte_errno; /* Save rte_errno before cleanup. */
+ flow_mreg_del_copy_action(dev, flow);
+ flow_drv_destroy(dev, flow);
+ if (rss_desc->shared_rss)
+ __atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
+ mlx5_ipool_get
+ (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
+ rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
+ mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx);
+ rte_errno = ret; /* Restore rte_errno. */
+ ret = rte_errno;
+ rte_errno = ret;
+ mlx5_flow_pop_thread_workspace();
+error_before_hairpin_split:
+ rte_free(translated_actions);
+ return 0;
+}
+
+/**
+ * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
+ * incoming packets to table 1.
+ *
+ * Other flow rules, requested for group n, will be created in
+ * e-switch table n+1.
+ * Jump action to e-switch group n will be created to group n+1.
+ *
+ * Used when working in switchdev mode, to utilise advantages of table 1
+ * and above.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ *
+ * @return
+ * Pointer to flow on success, NULL otherwise and rte_errno is set.
+ */
+struct rte_flow *
+mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
+{
+ const struct rte_flow_attr attr = {
+ .group = 0,
+ .priority = 0,
+ .ingress = 1,
+ .egress = 0,
+ .transfer = 1,
+ };
+ const struct rte_flow_item pattern = {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ };
+ struct rte_flow_action_jump jump = {
+ .group = 1,
+ };
+ const struct rte_flow_action actions[] = {
+ {
+ .type = RTE_FLOW_ACTION_TYPE_JUMP,
+ .conf = &jump,
+ },
+ {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct rte_flow_error error;
+
+ return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows,
+ &attr, &pattern,
+ actions, false, &error);
+}
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @see rte_flow_validate()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action original_actions[],
+ struct rte_flow_error *error)
+{
+ int hairpin_flow;
+ struct mlx5_translated_shared_action
+ shared_actions[MLX5_MAX_SHARED_ACTIONS];
+ int shared_actions_n = MLX5_MAX_SHARED_ACTIONS;
+ const struct rte_flow_action *actions;
+ struct rte_flow_action *translated_actions = NULL;
+ int ret = flow_shared_actions_translate(dev, original_actions,
+ shared_actions,
+ &shared_actions_n,
+ &translated_actions, error);
+
+ if (ret)
+ return ret;
+ actions = translated_actions ? translated_actions : original_actions;
+ hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
+ ret = flow_drv_validate(dev, attr, items, actions,
+ true, hairpin_flow, error);
+ rte_free(translated_actions);
+ return ret;
+}
+
+/**
+ * Create a flow.
+ *
+ * @see rte_flow_create()
+ * @see rte_flow_ops
+ */
+struct rte_flow *
+mlx5_flow_create(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ /*
+ * If the device is not started yet, it is not allowed to created a
+ * flow from application. PMD default flows and traffic control flows
+ * are not affected.
+ */
+ if (unlikely(!dev->data->dev_started)) {
+ DRV_LOG(DEBUG, "port %u is not started when "
+ "inserting a flow", dev->data->port_id);
+ rte_flow_error_set(error, ENODEV,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "port not started");
+ return NULL;
+ }
+
+ return (void *)(uintptr_t)flow_list_create(dev, &priv->flows,
+ attr, items, actions, true, error);
+}
+
+/**
+ * Destroy a flow in a list.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param list
+ * Pointer to the Indexed flow list. If this parameter NULL,
+ * there is no flow removal from the list. Be noted that as
+ * flow is add to the indexed list, memory of the indexed
+ * list points to maybe changed as flow destroyed.
+ * @param[in] flow_idx
+ * Index of flow to destroy.
+ */
+static void
+flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
+ uint32_t flow_idx)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
+ [MLX5_IPOOL_RTE_FLOW], flow_idx);
+
+ if (!flow)
+ return;
+ /*
+ * Update RX queue flags only if port is started, otherwise it is
+ * already clean.
+ */
+ if (dev->data->dev_started)
+ flow_rxq_flags_trim(dev, flow);
+ flow_drv_destroy(dev, flow);
+ if (list) {
+ rte_spinlock_lock(&priv->flow_list_lock);
+ ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list,
+ flow_idx, flow, next);
+ rte_spinlock_unlock(&priv->flow_list_lock);
+ }
+ if (flow->tunnel) {
+ struct mlx5_flow_tunnel *tunnel;
+
+ tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
+ RTE_VERIFY(tunnel);
+ if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
+ mlx5_flow_tunnel_free(dev, tunnel);
+ }
+ flow_mreg_del_copy_action(dev, flow);
+ mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param list
+ * Pointer to the Indexed flow list.
+ * @param active
+ * If flushing is called avtively.
+ */
+void
+mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active)
+{
+ uint32_t num_flushed = 0;
+
+ while (*list) {
+ flow_list_destroy(dev, list, *list);
+ num_flushed++;
+ }
+ if (active) {
+ DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
+ dev->data->port_id, num_flushed);
+ }
+}
+
+/**
+ * Stop all default actions for flows.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ */
+void
+mlx5_flow_stop_default(struct rte_eth_dev *dev)
+{
+ flow_mreg_del_default_copy_action(dev);
+ flow_rxq_flags_clear(dev);
+}
+
+/**
+ * Start all default actions for flows.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_start_default(struct rte_eth_dev *dev)
+{
+ struct rte_flow_error error;
+
+ /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
+ return flow_mreg_add_default_copy_action(dev, &error);
+}
+
+/**
+ * Release key of thread specific flow workspace data.
+ */
+void
+flow_release_workspace(void *data)
+{
+ struct mlx5_flow_workspace *wks = data;
+ struct mlx5_flow_workspace *next;
+
+ while (wks) {
+ next = wks->next;
+ free(wks->rss_desc.queue);
+ free(wks);
+ wks = next;
+ }
+}
+
+/**
+ * Get thread specific current flow workspace.
+ *
+ * @return pointer to thread specific flow workspace data, NULL on error.
+ */
+struct mlx5_flow_workspace*
+mlx5_flow_get_thread_workspace(void)
+{
+ struct mlx5_flow_workspace *data;
+
+ data = mlx5_flow_os_get_specific_workspace();
+ MLX5_ASSERT(data && data->inuse);
+ if (!data || !data->inuse)
+ DRV_LOG(ERR, "flow workspace not initialized.");
+ return data;
+}
+
+/**
+ * Allocate and init new flow workspace.
+ *
+ * @return pointer to flow workspace data, NULL on error.
+ */
+static struct mlx5_flow_workspace*
+flow_alloc_thread_workspace(void)
+{
+ struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
+
+ if (!data) {
+ DRV_LOG(ERR, "Failed to allocate flow workspace "
+ "memory.");
+ return NULL;
+ }
+ data->rss_desc.queue = calloc(1,
+ sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
+ if (!data->rss_desc.queue)
+ goto err;
+ data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
+ return data;
+err:
+ if (data->rss_desc.queue)
+ free(data->rss_desc.queue);
+ free(data);
+ return NULL;
+}
+
+/**
+ * Get new thread specific flow workspace.
+ *
+ * If current workspace inuse, create new one and set as current.
+ *
+ * @return pointer to thread specific flow workspace data, NULL on error.
+ */
+static struct mlx5_flow_workspace*
+mlx5_flow_push_thread_workspace(void)
+{
+ struct mlx5_flow_workspace *curr;
+ struct mlx5_flow_workspace *data;
+
+ curr = mlx5_flow_os_get_specific_workspace();
+ if (!curr) {
+ data = flow_alloc_thread_workspace();
+ if (!data)
+ return NULL;
+ } else if (!curr->inuse) {
+ data = curr;
+ } else if (curr->next) {
+ data = curr->next;
+ } else {
+ data = flow_alloc_thread_workspace();
+ if (!data)
+ return NULL;
+ curr->next = data;
+ data->prev = curr;
+ }
+ data->inuse = 1;
+ data->flow_idx = 0;
+ /* Set as current workspace */
+ if (mlx5_flow_os_set_specific_workspace(data))
+ DRV_LOG(ERR, "Failed to set flow workspace to thread.");
+ return data;
+}
+
+/**
+ * Close current thread specific flow workspace.
+ *
+ * If previous workspace available, set it as current.
+ *
+ * @return pointer to thread specific flow workspace data, NULL on error.
+ */
+static void
+mlx5_flow_pop_thread_workspace(void)
+{
+ struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
+
+ if (!data)
+ return;
+ if (!data->inuse) {
+ DRV_LOG(ERR, "Failed to close unused flow workspace.");
+ return;
+ }
+ data->inuse = 0;
+ if (!data->prev)
+ return;
+ if (mlx5_flow_os_set_specific_workspace(data->prev))
+ DRV_LOG(ERR, "Failed to set flow workspace to thread.");
+}
+
+/**
+ * Verify the flow list is empty
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ *
+ * @return the number of flows not released.
+ */
+int
+mlx5_flow_verify(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct rte_flow *flow;
+ uint32_t idx;
+ int ret = 0;
+
+ ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx,
+ flow, next) {
+ DRV_LOG(DEBUG, "port %u flow %p still referenced",
+ dev->data->port_id, (void *)flow);
+ ++ret;
+ }
+ return ret;
+}
+
+/**
+ * Enable default hairpin egress flow.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param queue
+ * The queue index.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
+ uint32_t queue)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ const struct rte_flow_attr attr = {
+ .egress = 1,
+ .priority = 0,
+ };
+ struct mlx5_rte_flow_item_tx_queue queue_spec = {
+ .queue = queue,
+ };
+ struct mlx5_rte_flow_item_tx_queue queue_mask = {
+ .queue = UINT32_MAX,
+ };
+ struct rte_flow_item items[] = {
+ {
+ .type = (enum rte_flow_item_type)
+ MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
+ .spec = &queue_spec,
+ .last = NULL,
+ .mask = &queue_mask,
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ struct rte_flow_action_jump jump = {
+ .group = MLX5_HAIRPIN_TX_TABLE,
+ };
+ struct rte_flow_action actions[2];
+ uint32_t flow_idx;
+ struct rte_flow_error error;
+
+ actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
+ actions[0].conf = &jump;
+ actions[1].type = RTE_FLOW_ACTION_TYPE_END;
+ flow_idx = flow_list_create(dev, &priv->ctrl_flows,
+ &attr, items, actions, false, &error);
+ if (!flow_idx) {
+ DRV_LOG(DEBUG,
+ "Failed to create ctrl flow: rte_errno(%d),"
+ " type(%d), message(%s)",
+ rte_errno, error.type,
+ error.message ? error.message : " (no stated reason)");
+ return -rte_errno;
+ }
+ return 0;
+}
+
+/**
+ * Enable a control flow configured from the control plane.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param eth_spec
+ * An Ethernet flow spec to apply.
+ * @param eth_mask
+ * An Ethernet flow mask to apply.
+ * @param vlan_spec
+ * A VLAN flow spec to apply.
+ * @param vlan_mask
+ * A VLAN flow mask to apply.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
+ struct rte_flow_item_eth *eth_spec,
+ struct rte_flow_item_eth *eth_mask,
+ struct rte_flow_item_vlan *vlan_spec,
+ struct rte_flow_item_vlan *vlan_mask)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ const struct rte_flow_attr attr = {
+ .ingress = 1,
+ .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
+ };
+ struct rte_flow_item items[] = {
+ {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .spec = eth_spec,
+ .last = NULL,
+ .mask = eth_mask,
+ },
+ {
+ .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
+ RTE_FLOW_ITEM_TYPE_END,
+ .spec = vlan_spec,
+ .last = NULL,
+ .mask = vlan_mask,
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ uint16_t queue[priv->reta_idx_n];
+ struct rte_flow_action_rss action_rss = {
+ .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+ .level = 0,
+ .types = priv->rss_conf.rss_hf,
+ .key_len = priv->rss_conf.rss_key_len,
+ .queue_num = priv->reta_idx_n,
+ .key = priv->rss_conf.rss_key,
+ .queue = queue,
+ };
+ struct rte_flow_action actions[] = {
+ {
+ .type = RTE_FLOW_ACTION_TYPE_RSS,
+ .conf = &action_rss,
+ },
+ {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
+ uint32_t flow_idx;
+ struct rte_flow_error error;
+ unsigned int i;
+
+ if (!priv->reta_idx_n || !priv->rxqs_n) {
+ return 0;
+ }
+ if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
+ action_rss.types = 0;
+ for (i = 0; i != priv->reta_idx_n; ++i)
+ queue[i] = (*priv->reta_idx)[i];
+ flow_idx = flow_list_create(dev, &priv->ctrl_flows,
+ &attr, items, actions, false, &error);
+ if (!flow_idx)
+ return -rte_errno;
+ return 0;
+}
+
+/**
+ * Enable a flow control configured from the control plane.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param eth_spec
+ * An Ethernet flow spec to apply.
+ * @param eth_mask
+ * An Ethernet flow mask to apply.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_ctrl_flow(struct rte_eth_dev *dev,
+ struct rte_flow_item_eth *eth_spec,
+ struct rte_flow_item_eth *eth_mask)
+{
+ return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
+}
+
+/**
+ * Create default miss flow rule matching lacp traffic
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param eth_spec
+ * An Ethernet flow spec to apply.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ /*
+ * The LACP matching is done by only using ether type since using
+ * a multicast dst mac causes kernel to give low priority to this flow.
+ */
+ static const struct rte_flow_item_eth lacp_spec = {
+ .type = RTE_BE16(0x8809),
+ };
+ static const struct rte_flow_item_eth lacp_mask = {
+ .type = 0xffff,
+ };
+ const struct rte_flow_attr attr = {
+ .ingress = 1,
+ };
+ struct rte_flow_item items[] = {
+ {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .spec = &lacp_spec,
+ .mask = &lacp_mask,
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ struct rte_flow_action actions[] = {
+ {
+ .type = (enum rte_flow_action_type)
+ MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
+ },
+ {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
+ struct rte_flow_error error;
+ uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows,
+ &attr, items, actions, false, &error);
+
+ if (!flow_idx)
+ return -rte_errno;
+ return 0;
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_destroy(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ struct rte_flow_error *error __rte_unused)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow);
+ return 0;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @see rte_flow_flush()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error __rte_unused)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ mlx5_flow_list_flush(dev, &priv->flows, false);
+ return 0;
+}
+
+/**
+ * Isolated mode.
+ *
+ * @see rte_flow_isolate()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_isolate(struct rte_eth_dev *dev,
+ int enable,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ if (dev->data->dev_started) {
+ rte_flow_error_set(error, EBUSY,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "port must be stopped first");
+ return -rte_errno;
+ }
+ priv->isolated = !!enable;
+ if (enable)
+ dev->dev_ops = &mlx5_dev_ops_isolate;
+ else
+ dev->dev_ops = &mlx5_dev_ops;
+
+ dev->rx_descriptor_status = mlx5_rx_descriptor_status;
+ dev->tx_descriptor_status = mlx5_tx_descriptor_status;
+
+ return 0;
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_drv_query(struct rte_eth_dev *dev,
+ uint32_t flow_idx,
+ const struct rte_flow_action *actions,
+ void *data,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
+ [MLX5_IPOOL_RTE_FLOW],
+ flow_idx);
+ enum mlx5_flow_drv_type ftype;
+
+ if (!flow) {
+ return rte_flow_error_set(error, ENOENT,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "invalid flow handle");
+ }
+ ftype = flow->drv_type;
+ MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
+ fops = flow_get_drv_ops(ftype);
+
+ return fops->query(dev, flow, actions, data, error);
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_action *actions,
+ void *data,
+ struct rte_flow_error *error)
+{
+ int ret;
+
+ ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
+ error);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param filter_type
+ * Filter type.
+ * @param filter_op
+ * Operation to perform.
+ * @param arg
+ * Pointer to operation-specific structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
+ enum rte_filter_type filter_type,
+ enum rte_filter_op filter_op,
+ void *arg)
+{
+ switch (filter_type) {
+ case RTE_ETH_FILTER_GENERIC:
+ if (filter_op != RTE_ETH_FILTER_GET) {
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ *(const void **)arg = &mlx5_flow_ops;
+ return 0;
+ default:
+ DRV_LOG(ERR, "port %u filter type (%d) not supported",
+ dev->data->port_id, filter_type);
+ rte_errno = ENOTSUP;
+ return -rte_errno;
+ }
+ return 0;
+}
+
+/**
+ * Create the needed meter and suffix tables.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in] fm
+ * Pointer to the flow meter.
+ *
+ * @return
+ * Pointer to table set on success, NULL otherwise.
+ */
+struct mlx5_meter_domains_infos *
+mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
+ const struct mlx5_flow_meter *fm)
+{
+ const struct mlx5_flow_driver_ops *fops;
+
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->create_mtr_tbls(dev, fm);
+}
+
+/**
+ * Destroy the meter table set.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in] tbl
+ * Pointer to the meter table set.
+ *
+ * @return
+ * 0 on success.
+ */
+int
+mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
+ struct mlx5_meter_domains_infos *tbls)
+{
+ const struct mlx5_flow_driver_ops *fops;
+
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->destroy_mtr_tbls(dev, tbls);
+}
+
+/**
+ * Create policer rules.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in] fm
+ * Pointer to flow meter structure.
+ * @param[in] attr
+ * Pointer to flow attributes.
+ *
+ * @return
+ * 0 on success, -1 otherwise.
+ */
+int
+mlx5_flow_create_policer_rules(struct rte_eth_dev *dev,
+ struct mlx5_flow_meter *fm,
+ const struct rte_flow_attr *attr)
+{
+ const struct mlx5_flow_driver_ops *fops;
+
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->create_policer_rules(dev, fm, attr);
+}
+
+/**
+ * Destroy policer rules.
+ *
+ * @param[in] fm
+ * Pointer to flow meter structure.
+ * @param[in] attr
+ * Pointer to flow attributes.
+ *
+ * @return
+ * 0 on success, -1 otherwise.
+ */
+int
+mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev,
+ struct mlx5_flow_meter *fm,
+ const struct rte_flow_attr *attr)
+{
+ const struct mlx5_flow_driver_ops *fops;
+
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->destroy_policer_rules(dev, fm, attr);
+}
+
+/**
+ * Allocate a counter.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device structure.
+ *
+ * @return
+ * Index to allocated counter on success, 0 otherwise.
+ */
+uint32_t
+mlx5_counter_alloc(struct rte_eth_dev *dev)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->counter_alloc(dev);
+ }
+ DRV_LOG(ERR,
+ "port %u counter allocate is not supported.",
+ dev->data->port_id);
+ return 0;
+}
+
+/**
+ * Free a counter.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device structure.
+ * @param[in] cnt
+ * Index to counter to be free.
+ */
+void
+mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ fops->counter_free(dev, cnt);
+ return;
+ }
+ DRV_LOG(ERR,
+ "port %u counter free is not supported.",
+ dev->data->port_id);
+}
+
+/**
+ * Query counter statistics.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device structure.
+ * @param[in] cnt
+ * Index to counter to query.
+ * @param[in] clear
+ * Set to clear counter statistics.
+ * @param[out] pkts
+ * The counter hits packets number to save.
+ * @param[out] bytes
+ * The counter hits bytes number to save.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise.
+ */
+int
+mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
+ bool clear, uint64_t *pkts, uint64_t *bytes)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->counter_query(dev, cnt, clear, pkts, bytes);
+ }
+ DRV_LOG(ERR,
+ "port %u counter query is not supported.",
+ dev->data->port_id);
+ return -ENOTSUP;
+}
+
+/**
+ * Allocate a new memory for the counter values wrapped by all the needed
+ * management.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise.
+ */
+static int
+mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
+{
+ struct mlx5_devx_mkey_attr mkey_attr;
+ struct mlx5_counter_stats_mem_mng *mem_mng;
+ volatile struct flow_counter_stats *raw_data;
+ int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
+ int size = (sizeof(struct flow_counter_stats) *
+ MLX5_COUNTERS_PER_POOL +
+ sizeof(struct mlx5_counter_stats_raw)) * raws_n +
+ sizeof(struct mlx5_counter_stats_mem_mng);
+ size_t pgsize = rte_mem_page_size();
+ uint8_t *mem;
+ int i;
+
+ if (pgsize == (size_t)-1) {
+ DRV_LOG(ERR, "Failed to get mem page size");
+ rte_errno = ENOMEM;
+ return -ENOMEM;
+ }
+ mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
+ if (!mem) {
+ rte_errno = ENOMEM;
+ return -ENOMEM;
+ }
+ mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
+ size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
+ mem_mng->umem = mlx5_os_umem_reg(sh->ctx, mem, size,
+ IBV_ACCESS_LOCAL_WRITE);
+ if (!mem_mng->umem) {
+ rte_errno = errno;
+ mlx5_free(mem);
+ return -rte_errno;
+ }
+ mkey_attr.addr = (uintptr_t)mem;
+ mkey_attr.size = size;
+ mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem);
+ mkey_attr.pd = sh->pdn;
+ mkey_attr.log_entity_size = 0;
+ mkey_attr.pg_access = 0;
+ mkey_attr.klm_array = NULL;
+ mkey_attr.klm_num = 0;
+ mkey_attr.relaxed_ordering_write = sh->cmng.relaxed_ordering_write;
+ mkey_attr.relaxed_ordering_read = sh->cmng.relaxed_ordering_read;
+ mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
+ if (!mem_mng->dm) {
+ mlx5_os_umem_dereg(mem_mng->umem);
+ rte_errno = errno;
+ mlx5_free(mem);
+ return -rte_errno;
+ }
+ mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
+ raw_data = (volatile struct flow_counter_stats *)mem;
+ for (i = 0; i < raws_n; ++i) {
+ mem_mng->raws[i].mem_mng = mem_mng;
+ mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
+ }
+ for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
+ LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
+ mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
+ next);
+ LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
+ sh->cmng.mem_mng = mem_mng;
+ return 0;
+}
+
+/**
+ * Set the statistic memory to the new counter pool.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] pool
+ * Pointer to the pool to set the statistic memory.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise.
+ */
+static int
+mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
+ struct mlx5_flow_counter_pool *pool)
+{
+ struct mlx5_flow_counter_mng *cmng = &sh->cmng;
+ /* Resize statistic memory once used out. */
+ if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
+ mlx5_flow_create_counter_stat_mem_mng(sh)) {
+ DRV_LOG(ERR, "Cannot resize counter stat mem.");
+ return -1;
+ }
+ rte_spinlock_lock(&pool->sl);
+ pool->raw = cmng->mem_mng->raws + pool->index %
+ MLX5_CNT_CONTAINER_RESIZE;
+ rte_spinlock_unlock(&pool->sl);
+ pool->raw_hw = NULL;
+ return 0;
+}
+
+#define MLX5_POOL_QUERY_FREQ_US 1000000
+
+/**
+ * Set the periodic procedure for triggering asynchronous batch queries for all
+ * the counter pools.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ */
+void
+mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
+{
+ uint32_t pools_n, us;
+
+ pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
+ us = MLX5_POOL_QUERY_FREQ_US / pools_n;
+ DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
+ if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
+ sh->cmng.query_thread_on = 0;
+ DRV_LOG(ERR, "Cannot reinitialize query alarm");
+ } else {
+ sh->cmng.query_thread_on = 1;
+ }
+}
+
+/**
+ * The periodic procedure for triggering asynchronous batch queries for all the
+ * counter pools. This function is probably called by the host thread.
+ *
+ * @param[in] arg
+ * The parameter for the alarm process.
+ */
+void
+mlx5_flow_query_alarm(void *arg)
+{
+ struct mlx5_dev_ctx_shared *sh = arg;
+ int ret;
+ uint16_t pool_index = sh->cmng.pool_index;
+ struct mlx5_flow_counter_mng *cmng = &sh->cmng;
+ struct mlx5_flow_counter_pool *pool;
+ uint16_t n_valid;
+
+ if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
+ goto set_alarm;
+ rte_spinlock_lock(&cmng->pool_update_sl);
+ pool = cmng->pools[pool_index];
+ n_valid = cmng->n_valid;
+ rte_spinlock_unlock(&cmng->pool_update_sl);
+ /* Set the statistic memory to the new created pool. */
+ if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
+ goto set_alarm;
+ if (pool->raw_hw)
+ /* There is a pool query in progress. */
+ goto set_alarm;
+ pool->raw_hw =
+ LIST_FIRST(&sh->cmng.free_stat_raws);
+ if (!pool->raw_hw)
+ /* No free counter statistics raw memory. */
+ goto set_alarm;
+ /*
+ * Identify the counters released between query trigger and query
+ * handle more efficiently. The counter released in this gap period
+ * should wait for a new round of query as the new arrived packets
+ * will not be taken into account.
+ */
+ pool->query_gen++;
+ ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
+ MLX5_COUNTERS_PER_POOL,
+ NULL, NULL,
+ pool->raw_hw->mem_mng->dm->id,
+ (void *)(uintptr_t)
+ pool->raw_hw->data,
+ sh->devx_comp,
+ (uint64_t)(uintptr_t)pool);
+ if (ret) {
+ DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
+ " %d", pool->min_dcs->id);
+ pool->raw_hw = NULL;
+ goto set_alarm;
+ }
+ LIST_REMOVE(pool->raw_hw, next);
+ sh->cmng.pending_queries++;
+ pool_index++;
+ if (pool_index >= n_valid)
+ pool_index = 0;
+set_alarm:
+ sh->cmng.pool_index = pool_index;
+ mlx5_set_query_alarm(sh);
+}
+
+/**
+ * Check and callback event for new aged flow in the counter pool
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] pool
+ * Pointer to Current counter pool.
+ */
+static void
+mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
+ struct mlx5_flow_counter_pool *pool)
+{
+ struct mlx5_priv *priv;
+ struct mlx5_flow_counter *cnt;
+ struct mlx5_age_info *age_info;
+ struct mlx5_age_param *age_param;
+ struct mlx5_counter_stats_raw *cur = pool->raw_hw;
+ struct mlx5_counter_stats_raw *prev = pool->raw;
+ const uint64_t curr_time = MLX5_CURR_TIME_SEC;
+ const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
+ uint16_t expected = AGE_CANDIDATE;
+ uint32_t i;
+
+ pool->time_of_last_age_check = curr_time;
+ for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
+ cnt = MLX5_POOL_GET_CNT(pool, i);
+ age_param = MLX5_CNT_TO_AGE(cnt);
+ if (__atomic_load_n(&age_param->state,
+ __ATOMIC_RELAXED) != AGE_CANDIDATE)
+ continue;
+ if (cur->data[i].hits != prev->data[i].hits) {
+ __atomic_store_n(&age_param->sec_since_last_hit, 0,
+ __ATOMIC_RELAXED);
+ continue;
+ }
+ if (__atomic_add_fetch(&age_param->sec_since_last_hit,
+ time_delta,
+ __ATOMIC_RELAXED) <= age_param->timeout)
+ continue;
+ /**
+ * Hold the lock first, or if between the
+ * state AGE_TMOUT and tailq operation the
+ * release happened, the release procedure
+ * may delete a non-existent tailq node.
+ */
+ priv = rte_eth_devices[age_param->port_id].data->dev_private;
+ age_info = GET_PORT_AGE_INFO(priv);
+ rte_spinlock_lock(&age_info->aged_sl);
+ if (__atomic_compare_exchange_n(&age_param->state, &expected,
+ AGE_TMOUT, false,
+ __ATOMIC_RELAXED,
+ __ATOMIC_RELAXED)) {
+ TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
+ MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
+ }
+ rte_spinlock_unlock(&age_info->aged_sl);
+ }
+ mlx5_age_event_prepare(sh);
+}
+
+/**
+ * Handler for the HW respond about ready values from an asynchronous batch
+ * query. This function is probably called by the host thread.
+ *
+ * @param[in] sh
+ * The pointer to the shared device context.
+ * @param[in] async_id
+ * The Devx async ID.
+ * @param[in] status
+ * The status of the completion.
+ */
+void
+mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
+ uint64_t async_id, int status)
+{
+ struct mlx5_flow_counter_pool *pool =
+ (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
+ struct mlx5_counter_stats_raw *raw_to_free;
+ uint8_t query_gen = pool->query_gen ^ 1;
+ struct mlx5_flow_counter_mng *cmng = &sh->cmng;
+ enum mlx5_counter_type cnt_type =
+ pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
+ MLX5_COUNTER_TYPE_ORIGIN;
+
+ if (unlikely(status)) {
+ raw_to_free = pool->raw_hw;
+ } else {
+ raw_to_free = pool->raw;
+ if (pool->is_aged)
+ mlx5_flow_aging_check(sh, pool);
+ rte_spinlock_lock(&pool->sl);
+ pool->raw = pool->raw_hw;
+ rte_spinlock_unlock(&pool->sl);
+ /* Be sure the new raw counters data is updated in memory. */
+ rte_io_wmb();
+ if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
+ rte_spinlock_lock(&cmng->csl[cnt_type]);
+ TAILQ_CONCAT(&cmng->counters[cnt_type],
+ &pool->counters[query_gen], next);
+ rte_spinlock_unlock(&cmng->csl[cnt_type]);
+ }
+ }
+ LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
+ pool->raw_hw = NULL;
+ sh->cmng.pending_queries--;
+}
+
+static int
+flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
+ const struct flow_grp_info *grp_info,
+ struct rte_flow_error *error)
+{
+ if (grp_info->transfer && grp_info->external &&
+ grp_info->fdb_def_rule) {
+ if (group == UINT32_MAX)
+ return rte_flow_error_set
+ (error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+ NULL,
+ "group index not supported");
+ *table = group + 1;
+ } else {
+ *table = group;
+ }
+ DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
+ return 0;
+}
+
+/**
+ * Translate the rte_flow group index to HW table value.
+ *
+ * If tunnel offload is disabled, all group ids converted to flow table
+ * id using the standard method.
+ * If tunnel offload is enabled, group id can be converted using the
+ * standard or tunnel conversion method. Group conversion method
+ * selection depends on flags in `grp_info` parameter:
+ * - Internal (grp_info.external == 0) groups conversion uses the
+ * standard method.
+ * - Group ids in JUMP action converted with the tunnel conversion.
+ * - Group id in rule attribute conversion depends on a rule type and
+ * group id value:
+ * ** non zero group attributes converted with the tunnel method
+ * ** zero group attribute in non-tunnel rule is converted using the
+ * standard method - there's only one root table
+ * ** zero group attribute in steer tunnel rule is converted with the
+ * standard method - single root table
+ * ** zero group attribute in match tunnel rule is a special OvS
+ * case: that value is used for portability reasons. That group
+ * id is converted with the tunnel conversion method.
+ *
+ * @param[in] dev
+ * Port device
+ * @param[in] tunnel
+ * PMD tunnel offload object
+ * @param[in] group
+ * rte_flow group index value.
+ * @param[out] table
+ * HW table value.
+ * @param[in] grp_info
+ * flags used for conversion
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_group_to_table(struct rte_eth_dev *dev,
+ const struct mlx5_flow_tunnel *tunnel,
+ uint32_t group, uint32_t *table,
+ const struct flow_grp_info *grp_info,
+ struct rte_flow_error *error)
+{
+ int ret;
+ bool standard_translation;
+
+ if (!grp_info->skip_scale && grp_info->external &&
+ group < MLX5_MAX_TABLES_EXTERNAL)
+ group *= MLX5_FLOW_TABLE_FACTOR;
+ if (is_tunnel_offload_active(dev)) {
+ standard_translation = !grp_info->external ||
+ grp_info->std_tbl_fix;
+ } else {
+ standard_translation = true;
+ }
+ DRV_LOG(DEBUG,
+ "port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
+ dev->data->port_id, group, grp_info->transfer,
+ grp_info->external, grp_info->fdb_def_rule,
+ standard_translation ? "STANDARD" : "TUNNEL");
+ if (standard_translation)
+ ret = flow_group_to_table(dev->data->port_id, group, table,
+ grp_info, error);
+ else
+ ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
+ table, error);
+
+ return ret;
+}
+
+/**
+ * Discover availability of metadata reg_c's.
+ *
+ * Iteratively use test flows to check availability.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_dev_config *config = &priv->config;
+ enum modify_reg idx;
+ int n = 0;
+
+ /* reg_c[0] and reg_c[1] are reserved. */
+ config->flow_mreg_c[n++] = REG_C_0;
+ config->flow_mreg_c[n++] = REG_C_1;
+ /* Discover availability of other reg_c's. */
+ for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
+ struct rte_flow_attr attr = {
+ .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
+ .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
+ .ingress = 1,
+ };
+ struct rte_flow_item items[] = {
+ [0] = {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ struct rte_flow_action actions[] = {
+ [0] = {
+ .type = (enum rte_flow_action_type)
+ MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
+ .conf = &(struct mlx5_flow_action_copy_mreg){
+ .src = REG_C_1,
+ .dst = idx,
+ },
+ },
+ [1] = {
+ .type = RTE_FLOW_ACTION_TYPE_JUMP,
+ .conf = &(struct rte_flow_action_jump){
+ .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
+ },
+ },
+ [2] = {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
+ uint32_t flow_idx;
+ struct rte_flow *flow;
+ struct rte_flow_error error;
+
+ if (!config->dv_flow_en)
+ break;
+ /* Create internal flow, validation skips copy action. */
+ flow_idx = flow_list_create(dev, NULL, &attr, items,
+ actions, false, &error);
+ flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
+ flow_idx);
+ if (!flow)
+ continue;
+ config->flow_mreg_c[n++] = idx;
+ flow_list_destroy(dev, NULL, flow_idx);
+ }
+ for (; n < MLX5_MREG_C_NUM; ++n)
+ config->flow_mreg_c[n] = REG_NON;
+ return 0;
+}
+
+/**
+ * Dump flow raw hw data to file
+ *
+ * @param[in] dev
+ * The pointer to Ethernet device.
+ * @param[in] file
+ * A pointer to a file for output.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ * @return
+ * 0 on success, a nagative value otherwise.
+ */
+int
+mlx5_flow_dev_dump(struct rte_eth_dev *dev,
+ FILE *file,
+ struct rte_flow_error *error __rte_unused)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_dev_ctx_shared *sh = priv->sh;
+
+ if (!priv->config.dv_flow_en) {
+ if (fputs("device dv flow disabled\n", file) <= 0)
+ return -errno;
+ return -ENOTSUP;
+ }
+ return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
+ sh->tx_domain, file);
+}
+
+/**
+ * Get aged-out flows.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] context
+ * The address of an array of pointers to the aged-out flows contexts.
+ * @param[in] nb_countexts
+ * The length of context array pointers.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. Initialized in case of
+ * error only.
+ *
+ * @return
+ * how many contexts get in success, otherwise negative errno value.
+ * if nb_contexts is 0, return the amount of all aged contexts.
+ * if nb_contexts is not 0 , return the amount of aged flows reported
+ * in the context array.
+ */
+int
+mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
+ uint32_t nb_contexts, struct rte_flow_error *error)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->get_aged_flows(dev, contexts, nb_contexts,
+ error);
+ }
+ DRV_LOG(ERR,
+ "port %u get aged flows is not supported.",
+ dev->data->port_id);
+ return -ENOTSUP;
+}
+
+/* Wrapper for driver action_validate op callback */
+static int
+flow_drv_action_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_shared_action_conf *conf,
+ const struct rte_flow_action *action,
+ const struct mlx5_flow_driver_ops *fops,
+ struct rte_flow_error *error)
+{
+ static const char err_msg[] = "shared action validation unsupported";
+
+ if (!fops->action_validate) {
+ DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, err_msg);
+ return -rte_errno;
+ }
+ return fops->action_validate(dev, conf, action, error);
+}
+
+/**
+ * Destroys the shared action by handle.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param[in] action
+ * Handle for the shared action to be destroyed.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ *
+ * @note: wrapper for driver action_create op callback.
+ */
+static int
+mlx5_shared_action_destroy(struct rte_eth_dev *dev,
+ struct rte_flow_shared_action *action,
+ struct rte_flow_error *error)
+{
+ static const char err_msg[] = "shared action destruction unsupported";
+ struct rte_flow_attr attr = { .transfer = 0 };
+ const struct mlx5_flow_driver_ops *fops =
+ flow_get_drv_ops(flow_get_drv_type(dev, &attr));
+
+ if (!fops->action_destroy) {
+ DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, err_msg);
+ return -rte_errno;
+ }
+ return fops->action_destroy(dev, action, error);
+}
+
+/* Wrapper for driver action_destroy op callback */
+static int
+flow_drv_action_update(struct rte_eth_dev *dev,
+ struct rte_flow_shared_action *action,
+ const void *action_conf,
+ const struct mlx5_flow_driver_ops *fops,
+ struct rte_flow_error *error)
+{
+ static const char err_msg[] = "shared action update unsupported";
+
+ if (!fops->action_update) {
+ DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, err_msg);
+ return -rte_errno;
+ }
+ return fops->action_update(dev, action, action_conf, error);
+}
+
+/* Wrapper for driver action_destroy op callback */
+static int
+flow_drv_action_query(struct rte_eth_dev *dev,
+ const struct rte_flow_shared_action *action,
+ void *data,
+ const struct mlx5_flow_driver_ops *fops,
+ struct rte_flow_error *error)
+{
+ static const char err_msg[] = "shared action query unsupported";
+
+ if (!fops->action_query) {
+ DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, err_msg);
+ return -rte_errno;
+ }
+ return fops->action_query(dev, action, data, error);
+}
+
+/**
+ * Create shared action for reuse in multiple flow rules.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param[in] action
+ * Action configuration for shared action creation.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ * @return
+ * A valid handle in case of success, NULL otherwise and rte_errno is set.
+ */
+static struct rte_flow_shared_action *
+mlx5_shared_action_create(struct rte_eth_dev *dev,
+ const struct rte_flow_shared_action_conf *conf,
+ const struct rte_flow_action *action,
+ struct rte_flow_error *error)
+{
+ static const char err_msg[] = "shared action creation unsupported";
+ struct rte_flow_attr attr = { .transfer = 0 };
+ const struct mlx5_flow_driver_ops *fops =
+ flow_get_drv_ops(flow_get_drv_type(dev, &attr));
+
+ if (flow_drv_action_validate(dev, conf, action, fops, error))
+ return NULL;
+ if (!fops->action_create) {
+ DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, err_msg);
+ return NULL;
+ }
+ return fops->action_create(dev, conf, action, error);
+}
+
+/**
+ * Updates inplace the shared action configuration pointed by *action* handle
+ * with the configuration provided as *action* argument.
+ * The update of the shared action configuration effects all flow rules reusing
+ * the action via handle.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param[in] shared_action
+ * Handle for the shared action to be updated.
+ * @param[in] action
+ * Action specification used to modify the action pointed by handle.
+ * *action* should be of same type with the action pointed by the *action*
+ * handle argument, otherwise considered as invalid.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_shared_action_update(struct rte_eth_dev *dev,
+ struct rte_flow_shared_action *shared_action,
+ const struct rte_flow_action *action,
+ struct rte_flow_error *error)
+{
+ struct rte_flow_attr attr = { .transfer = 0 };
+ const struct mlx5_flow_driver_ops *fops =
+ flow_get_drv_ops(flow_get_drv_type(dev, &attr));
+ int ret;
+
+ ret = flow_drv_action_validate(dev, NULL, action, fops, error);
+ if (ret)
+ return ret;
+ return flow_drv_action_update(dev, shared_action, action->conf, fops,
+ error);
+}
+
+/**
+ * Query the shared action by handle.
+ *
+ * This function allows retrieving action-specific data such as counters.
+ * Data is gathered by special action which may be present/referenced in
+ * more than one flow rule definition.
+ *
+ * \see RTE_FLOW_ACTION_TYPE_COUNT
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param[in] action
+ * Handle for the shared action to query.
+ * @param[in, out] data
+ * Pointer to storage for the associated query data type.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_shared_action_query(struct rte_eth_dev *dev,
+ const struct rte_flow_shared_action *action,
+ void *data,
+ struct rte_flow_error *error)
+{
+ struct rte_flow_attr attr = { .transfer = 0 };
+ const struct mlx5_flow_driver_ops *fops =
+ flow_get_drv_ops(flow_get_drv_type(dev, &attr));
+
+ return flow_drv_action_query(dev, action, data, fops, error);
+}
+
+/**
+ * Destroy all shared actions.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_shared_action_flush(struct rte_eth_dev *dev)
+{
+ struct rte_flow_error error;
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_shared_action_rss *shared_rss;
+ int ret = 0;
+ uint32_t idx;
+
+ ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
+ priv->rss_shared_actions, idx, shared_rss, next) {
+ ret |= mlx5_shared_action_destroy(dev,
+ (struct rte_flow_shared_action *)(uintptr_t)idx, &error);
+ }
+ return ret;
+}
+
+#ifndef HAVE_MLX5DV_DR
+#define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
+#else
+#define MLX5_DOMAIN_SYNC_FLOW \
+ (MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
+#endif
+
+int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ const struct mlx5_flow_driver_ops *fops;
+ int ret;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
+ ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
+ if (ret > 0)
+ ret = -ret;
+ return ret;
+}
+
+/**
+ * tunnel offload functionalilty is defined for DV environment only
+ */
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+__extension__
+union tunnel_offload_mark {
+ uint32_t val;
+ struct {
+ uint32_t app_reserve:8;
+ uint32_t table_id:15;
+ uint32_t transfer:1;
+ uint32_t _unused_:8;
+ };
+};
+
+static bool
+mlx5_access_tunnel_offload_db
+ (struct rte_eth_dev *dev,
+ bool (*match)(struct rte_eth_dev *,
+ struct mlx5_flow_tunnel *, const void *),
+ void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
+ void (*miss)(struct rte_eth_dev *, void *),
+ void *ctx, bool lock_op);
+
+static int
+flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_action *app_actions,
+ uint32_t flow_idx,
+ struct tunnel_default_miss_ctx *ctx,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_flow *dev_flow;
+ struct rte_flow_attr miss_attr = *attr;
+ const struct mlx5_flow_tunnel *tunnel = app_actions[0].conf;
+ const struct rte_flow_item miss_items[2] = {
+ {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .spec = NULL,
+ .last = NULL,
+ .mask = NULL
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ .spec = NULL,
+ .last = NULL,
+ .mask = NULL