+
+/**
+ * The splitting for meter feature.
+ *
+ * - The meter flow will be split to two flows as prefix and
+ * suffix flow. The packets make sense only it pass the prefix
+ * meter action.
+ *
+ * - Reg_C_5 is used for the packet to match betweend prefix and
+ * suffix flow.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param[in] flow
+ * Parent flow structure pointer.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[in] flow_split_info
+ * Pointer to flow split info structure.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ * @return
+ * 0 on success, negative value otherwise
+ */
+static int
+flow_create_split_meter(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct mlx5_flow_split_info *flow_split_info,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct rte_flow_action *sfx_actions = NULL;
+ struct rte_flow_action *pre_actions = NULL;
+ struct rte_flow_item *sfx_items = NULL;
+ struct mlx5_flow *dev_flow = NULL;
+ struct rte_flow_attr sfx_attr = *attr;
+ uint32_t mtr = 0;
+ uint32_t mtr_tag_id = 0;
+ size_t act_size;
+ size_t item_size;
+ int actions_n = 0;
+ int ret;
+
+ if (priv->mtr_en)
+ actions_n = flow_check_meter_action(actions, &mtr);
+ if (mtr) {
+ /* The five prefix actions: meter, decap, encap, tag, end. */
+ act_size = sizeof(struct rte_flow_action) * (actions_n + 5) +
+ sizeof(struct mlx5_rte_flow_action_set_tag);
+ /* tag, vlan, port id, end. */
+#define METER_SUFFIX_ITEM 4
+ item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
+ sizeof(struct mlx5_rte_flow_item_tag) * 2;
+ sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
+ 0, SOCKET_ID_ANY);
+ if (!sfx_actions)
+ return rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, "no memory to split "
+ "meter flow");
+ sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
+ act_size);
+ pre_actions = sfx_actions + actions_n;
+ mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items,
+ actions, sfx_actions,
+ pre_actions);
+ if (!mtr_tag_id) {
+ ret = -rte_errno;
+ goto exit;
+ }
+ /* Add the prefix subflow. */
+ flow_split_info->prefix_mark = 0;
+ ret = flow_create_split_inner(dev, flow, &dev_flow,
+ attr, items, pre_actions,
+ flow_split_info, error);
+ if (ret) {
+ ret = -rte_errno;
+ goto exit;
+ }
+ dev_flow->handle->split_flow_id = mtr_tag_id;
+ /* Setting the sfx group atrr. */
+ sfx_attr.group = sfx_attr.transfer ?
+ (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
+ MLX5_FLOW_TABLE_LEVEL_SUFFIX;
+ flow_split_info->prefix_layers =
+ flow_get_prefix_layer_flags(dev_flow);
+ flow_split_info->prefix_mark = dev_flow->handle->mark;
+ }
+ /* Add the prefix subflow. */
+ ret = flow_create_split_metadata(dev, flow,
+ &sfx_attr, sfx_items ?
+ sfx_items : items,
+ sfx_actions ? sfx_actions : actions,
+ flow_split_info, error);
+exit:
+ if (sfx_actions)
+ mlx5_free(sfx_actions);
+ return ret;
+}
+
+/**
+ * The splitting for sample feature.
+ *
+ * Once Sample action is detected in the action list, the flow actions should
+ * be split into prefix sub flow and suffix sub flow.
+ *
+ * The original items remain in the prefix sub flow, all actions preceding the
+ * sample action and the sample action itself will be copied to the prefix
+ * sub flow, the actions following the sample action will be copied to the
+ * suffix sub flow, Queue action always be located in the suffix sub flow.
+ *
+ * In order to make the packet from prefix sub flow matches with suffix sub
+ * flow, an extra tag action be added into prefix sub flow, and the suffix sub
+ * flow uses tag item with the unique flow id.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param[in] flow
+ * Parent flow structure pointer.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[in] flow_split_info
+ * Pointer to flow split info structure.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ * @return
+ * 0 on success, negative value otherwise
+ */
+static int
+flow_create_split_sample(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct mlx5_flow_split_info *flow_split_info,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct rte_flow_action *sfx_actions = NULL;
+ struct rte_flow_action *pre_actions = NULL;
+ struct rte_flow_item *sfx_items = NULL;
+ struct mlx5_flow *dev_flow = NULL;
+ struct rte_flow_attr sfx_attr = *attr;
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ struct mlx5_flow_dv_sample_resource *sample_res;
+ struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
+ struct mlx5_flow_tbl_resource *sfx_tbl;
+#endif
+ size_t act_size;
+ size_t item_size;
+ uint32_t fdb_tx = 0;
+ int32_t tag_id = 0;
+ int actions_n = 0;
+ int sample_action_pos;
+ int qrss_action_pos;
+ int add_tag = 0;
+ int modify_after_mirror = 0;
+ uint16_t jump_table = 0;
+ const uint32_t next_ft_step = 1;
+ int ret = 0;
+
+ if (priv->sampler_en)
+ actions_n = flow_check_match_action(actions, attr,
+ RTE_FLOW_ACTION_TYPE_SAMPLE,
+ &sample_action_pos, &qrss_action_pos,
+ &modify_after_mirror);
+ if (actions_n) {
+ /* The prefix actions must includes sample, tag, end. */
+ act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
+ + sizeof(struct mlx5_rte_flow_action_set_tag);
+ item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
+ sizeof(struct mlx5_rte_flow_item_tag) * 2;
+ sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
+ item_size), 0, SOCKET_ID_ANY);
+ if (!sfx_actions)
+ return rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, "no memory to split "
+ "sample flow");
+ /* The representor_id is -1 for uplink. */
+ fdb_tx = (attr->transfer && priv->representor_id != -1);
+ /*
+ * When reg_c_preserve is set, metadata registers Cx preserve
+ * their value even through packet duplication.
+ */
+ add_tag = (!fdb_tx || priv->config.hca_attr.reg_c_preserve);
+ if (add_tag)
+ sfx_items = (struct rte_flow_item *)((char *)sfx_actions
+ + act_size);
+ if (modify_after_mirror)
+ jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
+ next_ft_step;
+ pre_actions = sfx_actions + actions_n;
+ tag_id = flow_sample_split_prep(dev, add_tag, sfx_items,
+ actions, sfx_actions,
+ pre_actions, actions_n,
+ sample_action_pos,
+ qrss_action_pos, jump_table,
+ error);
+ if (tag_id < 0 || (add_tag && !tag_id)) {
+ ret = -rte_errno;
+ goto exit;
+ }
+ if (modify_after_mirror)
+ flow_split_info->skip_scale =
+ 1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
+ /* Add the prefix subflow. */
+ ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
+ items, pre_actions,
+ flow_split_info, error);
+ if (ret) {
+ ret = -rte_errno;
+ goto exit;
+ }
+ dev_flow->handle->split_flow_id = tag_id;
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ if (!modify_after_mirror) {
+ /* Set the sfx group attr. */
+ sample_res = (struct mlx5_flow_dv_sample_resource *)
+ dev_flow->dv.sample_res;
+ sfx_tbl = (struct mlx5_flow_tbl_resource *)
+ sample_res->normal_path_tbl;
+ sfx_tbl_data = container_of(sfx_tbl,
+ struct mlx5_flow_tbl_data_entry,
+ tbl);
+ sfx_attr.group = sfx_attr.transfer ?
+ (sfx_tbl_data->table_id - 1) :
+ sfx_tbl_data->table_id;
+ } else {
+ MLX5_ASSERT(attr->transfer);
+ sfx_attr.group = jump_table;
+ }
+ flow_split_info->prefix_layers =
+ flow_get_prefix_layer_flags(dev_flow);
+ flow_split_info->prefix_mark = dev_flow->handle->mark;
+ /* Suffix group level already be scaled with factor, set
+ * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
+ * again in translation.
+ */
+ flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
+#endif
+ }
+ /* Add the suffix subflow. */
+ ret = flow_create_split_meter(dev, flow, &sfx_attr,
+ sfx_items ? sfx_items : items,
+ sfx_actions ? sfx_actions : actions,
+ flow_split_info, error);
+exit:
+ if (sfx_actions)
+ mlx5_free(sfx_actions);
+ return ret;
+}
+
+/**
+ * Split the flow to subflow set. The splitters might be linked
+ * in the chain, like this:
+ * flow_create_split_outer() calls:
+ * flow_create_split_meter() calls:
+ * flow_create_split_metadata(meter_subflow_0) calls:
+ * flow_create_split_inner(metadata_subflow_0)
+ * flow_create_split_inner(metadata_subflow_1)
+ * flow_create_split_inner(metadata_subflow_2)
+ * flow_create_split_metadata(meter_subflow_1) calls:
+ * flow_create_split_inner(metadata_subflow_0)
+ * flow_create_split_inner(metadata_subflow_1)
+ * flow_create_split_inner(metadata_subflow_2)
+ *
+ * This provide flexible way to add new levels of flow splitting.
+ * The all of successfully created subflows are included to the
+ * parent flow dev_flow list.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param[in] flow
+ * Parent flow structure pointer.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[in] flow_split_info
+ * Pointer to flow split info structure.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ * @return
+ * 0 on success, negative value otherwise
+ */
+static int
+flow_create_split_outer(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct mlx5_flow_split_info *flow_split_info,
+ struct rte_flow_error *error)
+{
+ int ret;
+
+ ret = flow_create_split_sample(dev, flow, attr, items,
+ actions, flow_split_info, error);
+ MLX5_ASSERT(ret <= 0);
+ return ret;
+}
+
+static struct mlx5_flow_tunnel *
+flow_tunnel_from_rule(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[])
+{
+ struct mlx5_flow_tunnel *tunnel;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+ if (is_flow_tunnel_match_rule(dev, attr, items, actions))
+ tunnel = (struct mlx5_flow_tunnel *)items[0].spec;
+ else if (is_flow_tunnel_steer_rule(dev, attr, items, actions))
+ tunnel = (struct mlx5_flow_tunnel *)actions[0].conf;
+ else
+ tunnel = NULL;
+#pragma GCC diagnostic pop
+
+ return tunnel;
+}
+
+/**
+ * Adjust flow RSS workspace if needed.
+ *
+ * @param wks
+ * Pointer to thread flow work space.
+ * @param rss_desc
+ * Pointer to RSS descriptor.
+ * @param[in] nrssq_num
+ * New RSS queue number.
+ *
+ * @return
+ * 0 on success, -1 otherwise and rte_errno is set.
+ */
+static int
+flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
+ struct mlx5_flow_rss_desc *rss_desc,
+ uint32_t nrssq_num)
+{
+ if (likely(nrssq_num <= wks->rssq_num))
+ return 0;
+ rss_desc->queue = realloc(rss_desc->queue,
+ sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2));
+ if (!rss_desc->queue) {
+ rte_errno = ENOMEM;
+ return -1;
+ }
+ wks->rssq_num = RTE_ALIGN(nrssq_num, 2);
+ return 0;
+}
+
+/**
+ * Create a flow and add it to @p list.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param list
+ * Pointer to a TAILQ flow list. If this parameter NULL,
+ * no list insertion occurred, flow is just created,
+ * this is caller's responsibility to track the
+ * created flow.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[in] external
+ * This flow rule is created by request external to PMD.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * A flow index on success, 0 otherwise and rte_errno is set.
+ */
+static uint32_t
+flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action original_actions[],
+ bool external, struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct rte_flow *flow = NULL;
+ struct mlx5_flow *dev_flow;
+ const struct rte_flow_action_rss *rss = NULL;
+ struct mlx5_translated_shared_action
+ shared_actions[MLX5_MAX_SHARED_ACTIONS];
+ int shared_actions_n = MLX5_MAX_SHARED_ACTIONS;
+ union {
+ struct mlx5_flow_expand_rss buf;
+ uint8_t buffer[2048];
+ } expand_buffer;
+ union {
+ struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
+ uint8_t buffer[2048];
+ } actions_rx;
+ union {
+ struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
+ uint8_t buffer[2048];
+ } actions_hairpin_tx;
+ union {
+ struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
+ uint8_t buffer[2048];
+ } items_tx;
+ struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
+ struct mlx5_flow_rss_desc *rss_desc;
+ const struct rte_flow_action *p_actions_rx;
+ uint32_t i;
+ uint32_t idx = 0;
+ int hairpin_flow;
+ struct rte_flow_attr attr_tx = { .priority = 0 };
+ const struct rte_flow_action *actions;
+ struct rte_flow_action *translated_actions = NULL;
+ struct mlx5_flow_tunnel *tunnel;
+ struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
+ struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
+ struct mlx5_flow_split_info flow_split_info = {
+ .external = !!external,
+ .skip_scale = 0,
+ .flow_idx = 0,
+ .prefix_mark = 0,
+ .prefix_layers = 0
+ };
+ int ret;
+
+ MLX5_ASSERT(wks);
+ rss_desc = &wks->rss_desc;
+ ret = flow_shared_actions_translate(dev, original_actions,
+ shared_actions,
+ &shared_actions_n,
+ &translated_actions, error);
+ if (ret < 0) {
+ MLX5_ASSERT(translated_actions == NULL);
+ return 0;
+ }
+ actions = translated_actions ? translated_actions : original_actions;
+ p_actions_rx = actions;
+ hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
+ ret = flow_drv_validate(dev, attr, items, p_actions_rx,
+ external, hairpin_flow, error);
+ if (ret < 0)
+ goto error_before_hairpin_split;
+ flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx);
+ if (!flow) {
+ rte_errno = ENOMEM;
+ goto error_before_hairpin_split;
+ }
+ if (hairpin_flow > 0) {
+ if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
+ rte_errno = EINVAL;
+ goto error_before_hairpin_split;
+ }
+ flow_hairpin_split(dev, actions, actions_rx.actions,
+ actions_hairpin_tx.actions, items_tx.items,
+ idx);
+ p_actions_rx = actions_rx.actions;
+ }
+ flow_split_info.flow_idx = idx;
+ flow->drv_type = flow_get_drv_type(dev, attr);
+ MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
+ flow->drv_type < MLX5_FLOW_TYPE_MAX);
+ memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
+ /* RSS Action only works on NIC RX domain */
+ if (attr->ingress && !attr->transfer)
+ rss = flow_get_rss_action(p_actions_rx);
+ if (rss) {
+ if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
+ return 0;
+ /*
+ * The following information is required by
+ * mlx5_flow_hashfields_adjust() in advance.
+ */
+ rss_desc->level = rss->level;
+ /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
+ rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types;
+ }
+ flow->dev_handles = 0;
+ if (rss && rss->types) {
+ unsigned int graph_root;
+
+ graph_root = find_graph_root(items, rss->level);
+ ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
+ items, rss->types,
+ mlx5_support_expansion, graph_root);
+ MLX5_ASSERT(ret > 0 &&
+ (unsigned int)ret < sizeof(expand_buffer.buffer));
+ } else {
+ buf->entries = 1;
+ buf->entry[0].pattern = (void *)(uintptr_t)items;
+ }
+ rss_desc->shared_rss = flow_get_shared_rss_action(dev, shared_actions,
+ shared_actions_n);
+ for (i = 0; i < buf->entries; ++i) {
+ /* Initialize flow split data. */
+ flow_split_info.prefix_layers = 0;
+ flow_split_info.prefix_mark = 0;
+ flow_split_info.skip_scale = 0;
+ /*
+ * The splitter may create multiple dev_flows,
+ * depending on configuration. In the simplest
+ * case it just creates unmodified original flow.
+ */
+ ret = flow_create_split_outer(dev, flow, attr,
+ buf->entry[i].pattern,
+ p_actions_rx, &flow_split_info,
+ error);
+ if (ret < 0)
+ goto error;
+ if (is_flow_tunnel_steer_rule(dev, attr,
+ buf->entry[i].pattern,
+ p_actions_rx)) {
+ ret = flow_tunnel_add_default_miss(dev, flow, attr,
+ p_actions_rx,
+ idx,
+ &default_miss_ctx,
+ error);
+ if (ret < 0) {
+ mlx5_free(default_miss_ctx.queue);
+ goto error;
+ }
+ }
+ }
+ /* Create the tx flow. */
+ if (hairpin_flow) {
+ attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
+ attr_tx.ingress = 0;
+ attr_tx.egress = 1;
+ dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
+ actions_hairpin_tx.actions,
+ idx, error);
+ if (!dev_flow)
+ goto error;
+ dev_flow->flow = flow;
+ dev_flow->external = 0;
+ SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
+ dev_flow->handle, next);
+ ret = flow_drv_translate(dev, dev_flow, &attr_tx,
+ items_tx.items,
+ actions_hairpin_tx.actions, error);
+ if (ret < 0)
+ goto error;
+ }
+ /*
+ * Update the metadata register copy table. If extensive
+ * metadata feature is enabled and registers are supported
+ * we might create the extra rte_flow for each unique
+ * MARK/FLAG action ID.
+ *
+ * The table is updated for ingress Flows only, because
+ * the egress Flows belong to the different device and
+ * copy table should be updated in peer NIC Rx domain.
+ */
+ if (attr->ingress &&
+ (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
+ ret = flow_mreg_update_copy_table(dev, flow, actions, error);
+ if (ret)
+ goto error;
+ }
+ /*
+ * If the flow is external (from application) OR device is started,
+ * OR mreg discover, then apply immediately.
+ */
+ if (external || dev->data->dev_started ||
+ (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
+ attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
+ ret = flow_drv_apply(dev, flow, error);
+ if (ret < 0)
+ goto error;
+ }
+ if (list) {
+ rte_spinlock_lock(&priv->flow_list_lock);
+ ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx,
+ flow, next);
+ rte_spinlock_unlock(&priv->flow_list_lock);
+ }
+ flow_rxq_flags_set(dev, flow);
+ rte_free(translated_actions);
+ tunnel = flow_tunnel_from_rule(dev, attr, items, actions);
+ if (tunnel) {
+ flow->tunnel = 1;
+ flow->tunnel_id = tunnel->tunnel_id;
+ __atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
+ mlx5_free(default_miss_ctx.queue);
+ }
+ mlx5_flow_pop_thread_workspace();
+ return idx;
+error:
+ MLX5_ASSERT(flow);
+ ret = rte_errno; /* Save rte_errno before cleanup. */
+ flow_mreg_del_copy_action(dev, flow);
+ flow_drv_destroy(dev, flow);
+ if (rss_desc->shared_rss)
+ __atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
+ mlx5_ipool_get
+ (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
+ rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
+ mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx);
+ rte_errno = ret; /* Restore rte_errno. */
+ ret = rte_errno;
+ rte_errno = ret;
+ mlx5_flow_pop_thread_workspace();
+error_before_hairpin_split:
+ rte_free(translated_actions);
+ return 0;
+}
+
+/**
+ * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
+ * incoming packets to table 1.
+ *
+ * Other flow rules, requested for group n, will be created in
+ * e-switch table n+1.
+ * Jump action to e-switch group n will be created to group n+1.
+ *
+ * Used when working in switchdev mode, to utilise advantages of table 1
+ * and above.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ *
+ * @return
+ * Pointer to flow on success, NULL otherwise and rte_errno is set.
+ */
+struct rte_flow *
+mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
+{
+ const struct rte_flow_attr attr = {
+ .group = 0,
+ .priority = 0,
+ .ingress = 1,
+ .egress = 0,
+ .transfer = 1,
+ };
+ const struct rte_flow_item pattern = {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ };
+ struct rte_flow_action_jump jump = {
+ .group = 1,
+ };
+ const struct rte_flow_action actions[] = {
+ {
+ .type = RTE_FLOW_ACTION_TYPE_JUMP,
+ .conf = &jump,
+ },
+ {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct rte_flow_error error;
+
+ return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows,
+ &attr, &pattern,
+ actions, false, &error);
+}
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @see rte_flow_validate()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action original_actions[],
+ struct rte_flow_error *error)
+{
+ int hairpin_flow;
+ struct mlx5_translated_shared_action
+ shared_actions[MLX5_MAX_SHARED_ACTIONS];
+ int shared_actions_n = MLX5_MAX_SHARED_ACTIONS;
+ const struct rte_flow_action *actions;
+ struct rte_flow_action *translated_actions = NULL;
+ int ret = flow_shared_actions_translate(dev, original_actions,
+ shared_actions,
+ &shared_actions_n,
+ &translated_actions, error);
+
+ if (ret)
+ return ret;
+ actions = translated_actions ? translated_actions : original_actions;
+ hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
+ ret = flow_drv_validate(dev, attr, items, actions,
+ true, hairpin_flow, error);
+ rte_free(translated_actions);
+ return ret;
+}
+
+/**
+ * Create a flow.
+ *
+ * @see rte_flow_create()
+ * @see rte_flow_ops
+ */
+struct rte_flow *
+mlx5_flow_create(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ /*
+ * If the device is not started yet, it is not allowed to created a
+ * flow from application. PMD default flows and traffic control flows
+ * are not affected.
+ */
+ if (unlikely(!dev->data->dev_started)) {
+ DRV_LOG(DEBUG, "port %u is not started when "
+ "inserting a flow", dev->data->port_id);
+ rte_flow_error_set(error, ENODEV,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "port not started");
+ return NULL;
+ }
+
+ return (void *)(uintptr_t)flow_list_create(dev, &priv->flows,
+ attr, items, actions, true, error);
+}
+
+/**
+ * Destroy a flow in a list.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param list
+ * Pointer to the Indexed flow list. If this parameter NULL,
+ * there is no flow removal from the list. Be noted that as
+ * flow is add to the indexed list, memory of the indexed
+ * list points to maybe changed as flow destroyed.
+ * @param[in] flow_idx
+ * Index of flow to destroy.
+ */
+static void
+flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
+ uint32_t flow_idx)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
+ [MLX5_IPOOL_RTE_FLOW], flow_idx);
+
+ if (!flow)
+ return;
+ /*
+ * Update RX queue flags only if port is started, otherwise it is
+ * already clean.
+ */
+ if (dev->data->dev_started)
+ flow_rxq_flags_trim(dev, flow);
+ flow_drv_destroy(dev, flow);
+ if (list) {
+ rte_spinlock_lock(&priv->flow_list_lock);
+ ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list,
+ flow_idx, flow, next);
+ rte_spinlock_unlock(&priv->flow_list_lock);
+ }
+ if (flow->tunnel) {
+ struct mlx5_flow_tunnel *tunnel;
+
+ tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
+ RTE_VERIFY(tunnel);
+ if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
+ mlx5_flow_tunnel_free(dev, tunnel);
+ }
+ flow_mreg_del_copy_action(dev, flow);
+ mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param list
+ * Pointer to the Indexed flow list.
+ * @param active
+ * If flushing is called avtively.
+ */
+void
+mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active)
+{
+ uint32_t num_flushed = 0;
+
+ while (*list) {
+ flow_list_destroy(dev, list, *list);
+ num_flushed++;
+ }
+ if (active) {
+ DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
+ dev->data->port_id, num_flushed);
+ }
+}
+
+/**
+ * Stop all default actions for flows.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ */
+void
+mlx5_flow_stop_default(struct rte_eth_dev *dev)
+{
+ flow_mreg_del_default_copy_action(dev);
+ flow_rxq_flags_clear(dev);
+}
+
+/**
+ * Start all default actions for flows.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_start_default(struct rte_eth_dev *dev)
+{
+ struct rte_flow_error error;
+
+ /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
+ return flow_mreg_add_default_copy_action(dev, &error);
+}
+
+/**
+ * Release key of thread specific flow workspace data.
+ */
+void
+flow_release_workspace(void *data)
+{
+ struct mlx5_flow_workspace *wks = data;
+ struct mlx5_flow_workspace *next;
+
+ while (wks) {
+ next = wks->next;
+ free(wks->rss_desc.queue);
+ free(wks);
+ wks = next;
+ }
+}
+
+/**
+ * Get thread specific current flow workspace.
+ *
+ * @return pointer to thread specific flow workspace data, NULL on error.
+ */
+struct mlx5_flow_workspace*
+mlx5_flow_get_thread_workspace(void)
+{
+ struct mlx5_flow_workspace *data;
+
+ data = mlx5_flow_os_get_specific_workspace();
+ MLX5_ASSERT(data && data->inuse);
+ if (!data || !data->inuse)
+ DRV_LOG(ERR, "flow workspace not initialized.");
+ return data;
+}
+
+/**
+ * Allocate and init new flow workspace.
+ *
+ * @return pointer to flow workspace data, NULL on error.
+ */
+static struct mlx5_flow_workspace*
+flow_alloc_thread_workspace(void)
+{
+ struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
+
+ if (!data) {
+ DRV_LOG(ERR, "Failed to allocate flow workspace "
+ "memory.");
+ return NULL;
+ }
+ data->rss_desc.queue = calloc(1,
+ sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
+ if (!data->rss_desc.queue)
+ goto err;
+ data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
+ return data;
+err:
+ if (data->rss_desc.queue)
+ free(data->rss_desc.queue);
+ free(data);
+ return NULL;
+}
+
+/**
+ * Get new thread specific flow workspace.
+ *
+ * If current workspace inuse, create new one and set as current.
+ *
+ * @return pointer to thread specific flow workspace data, NULL on error.
+ */
+static struct mlx5_flow_workspace*
+mlx5_flow_push_thread_workspace(void)
+{
+ struct mlx5_flow_workspace *curr;
+ struct mlx5_flow_workspace *data;
+
+ curr = mlx5_flow_os_get_specific_workspace();
+ if (!curr) {
+ data = flow_alloc_thread_workspace();
+ if (!data)
+ return NULL;
+ } else if (!curr->inuse) {
+ data = curr;
+ } else if (curr->next) {
+ data = curr->next;
+ } else {
+ data = flow_alloc_thread_workspace();
+ if (!data)
+ return NULL;
+ curr->next = data;
+ data->prev = curr;
+ }
+ data->inuse = 1;
+ data->flow_idx = 0;
+ /* Set as current workspace */
+ if (mlx5_flow_os_set_specific_workspace(data))
+ DRV_LOG(ERR, "Failed to set flow workspace to thread.");
+ return data;
+}
+
+/**
+ * Close current thread specific flow workspace.
+ *
+ * If previous workspace available, set it as current.
+ *
+ * @return pointer to thread specific flow workspace data, NULL on error.
+ */
+static void
+mlx5_flow_pop_thread_workspace(void)
+{
+ struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
+
+ if (!data)
+ return;
+ if (!data->inuse) {
+ DRV_LOG(ERR, "Failed to close unused flow workspace.");
+ return;
+ }
+ data->inuse = 0;
+ if (!data->prev)
+ return;
+ if (mlx5_flow_os_set_specific_workspace(data->prev))
+ DRV_LOG(ERR, "Failed to set flow workspace to thread.");
+}
+
+/**
+ * Verify the flow list is empty
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ *
+ * @return the number of flows not released.
+ */
+int
+mlx5_flow_verify(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct rte_flow *flow;
+ uint32_t idx;
+ int ret = 0;
+
+ ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx,
+ flow, next) {
+ DRV_LOG(DEBUG, "port %u flow %p still referenced",
+ dev->data->port_id, (void *)flow);
+ ++ret;
+ }
+ return ret;
+}
+
+/**
+ * Enable default hairpin egress flow.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param queue
+ * The queue index.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
+ uint32_t queue)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ const struct rte_flow_attr attr = {
+ .egress = 1,
+ .priority = 0,
+ };
+ struct mlx5_rte_flow_item_tx_queue queue_spec = {
+ .queue = queue,
+ };
+ struct mlx5_rte_flow_item_tx_queue queue_mask = {
+ .queue = UINT32_MAX,
+ };
+ struct rte_flow_item items[] = {
+ {
+ .type = (enum rte_flow_item_type)
+ MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
+ .spec = &queue_spec,
+ .last = NULL,
+ .mask = &queue_mask,
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ struct rte_flow_action_jump jump = {
+ .group = MLX5_HAIRPIN_TX_TABLE,
+ };
+ struct rte_flow_action actions[2];
+ uint32_t flow_idx;
+ struct rte_flow_error error;
+
+ actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
+ actions[0].conf = &jump;
+ actions[1].type = RTE_FLOW_ACTION_TYPE_END;
+ flow_idx = flow_list_create(dev, &priv->ctrl_flows,
+ &attr, items, actions, false, &error);
+ if (!flow_idx) {
+ DRV_LOG(DEBUG,
+ "Failed to create ctrl flow: rte_errno(%d),"
+ " type(%d), message(%s)",
+ rte_errno, error.type,
+ error.message ? error.message : " (no stated reason)");
+ return -rte_errno;
+ }
+ return 0;
+}
+
+/**
+ * Enable a control flow configured from the control plane.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param eth_spec
+ * An Ethernet flow spec to apply.
+ * @param eth_mask
+ * An Ethernet flow mask to apply.
+ * @param vlan_spec
+ * A VLAN flow spec to apply.
+ * @param vlan_mask
+ * A VLAN flow mask to apply.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
+ struct rte_flow_item_eth *eth_spec,
+ struct rte_flow_item_eth *eth_mask,
+ struct rte_flow_item_vlan *vlan_spec,
+ struct rte_flow_item_vlan *vlan_mask)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ const struct rte_flow_attr attr = {
+ .ingress = 1,
+ .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
+ };
+ struct rte_flow_item items[] = {
+ {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .spec = eth_spec,
+ .last = NULL,
+ .mask = eth_mask,
+ },
+ {
+ .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
+ RTE_FLOW_ITEM_TYPE_END,
+ .spec = vlan_spec,
+ .last = NULL,
+ .mask = vlan_mask,
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ uint16_t queue[priv->reta_idx_n];
+ struct rte_flow_action_rss action_rss = {
+ .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+ .level = 0,
+ .types = priv->rss_conf.rss_hf,
+ .key_len = priv->rss_conf.rss_key_len,
+ .queue_num = priv->reta_idx_n,
+ .key = priv->rss_conf.rss_key,
+ .queue = queue,
+ };
+ struct rte_flow_action actions[] = {
+ {
+ .type = RTE_FLOW_ACTION_TYPE_RSS,
+ .conf = &action_rss,
+ },
+ {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
+ uint32_t flow_idx;
+ struct rte_flow_error error;
+ unsigned int i;
+
+ if (!priv->reta_idx_n || !priv->rxqs_n) {
+ return 0;
+ }
+ if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
+ action_rss.types = 0;
+ for (i = 0; i != priv->reta_idx_n; ++i)
+ queue[i] = (*priv->reta_idx)[i];
+ flow_idx = flow_list_create(dev, &priv->ctrl_flows,
+ &attr, items, actions, false, &error);
+ if (!flow_idx)
+ return -rte_errno;
+ return 0;
+}
+
+/**
+ * Enable a flow control configured from the control plane.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param eth_spec
+ * An Ethernet flow spec to apply.
+ * @param eth_mask
+ * An Ethernet flow mask to apply.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_ctrl_flow(struct rte_eth_dev *dev,
+ struct rte_flow_item_eth *eth_spec,
+ struct rte_flow_item_eth *eth_mask)
+{
+ return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
+}
+
+/**
+ * Create default miss flow rule matching lacp traffic
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param eth_spec
+ * An Ethernet flow spec to apply.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ /*
+ * The LACP matching is done by only using ether type since using
+ * a multicast dst mac causes kernel to give low priority to this flow.
+ */
+ static const struct rte_flow_item_eth lacp_spec = {
+ .type = RTE_BE16(0x8809),
+ };
+ static const struct rte_flow_item_eth lacp_mask = {
+ .type = 0xffff,
+ };
+ const struct rte_flow_attr attr = {
+ .ingress = 1,
+ };
+ struct rte_flow_item items[] = {
+ {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .spec = &lacp_spec,
+ .mask = &lacp_mask,
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ struct rte_flow_action actions[] = {
+ {
+ .type = (enum rte_flow_action_type)
+ MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
+ },
+ {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
+ struct rte_flow_error error;
+ uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows,
+ &attr, items, actions, false, &error);
+
+ if (!flow_idx)
+ return -rte_errno;
+ return 0;
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_destroy(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ struct rte_flow_error *error __rte_unused)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow);
+ return 0;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @see rte_flow_flush()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error __rte_unused)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ mlx5_flow_list_flush(dev, &priv->flows, false);
+ return 0;
+}
+
+/**
+ * Isolated mode.
+ *
+ * @see rte_flow_isolate()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_isolate(struct rte_eth_dev *dev,
+ int enable,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ if (dev->data->dev_started) {
+ rte_flow_error_set(error, EBUSY,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "port must be stopped first");
+ return -rte_errno;
+ }
+ priv->isolated = !!enable;
+ if (enable)
+ dev->dev_ops = &mlx5_dev_ops_isolate;
+ else
+ dev->dev_ops = &mlx5_dev_ops;
+
+ dev->rx_descriptor_status = mlx5_rx_descriptor_status;
+ dev->tx_descriptor_status = mlx5_tx_descriptor_status;
+
+ return 0;
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_drv_query(struct rte_eth_dev *dev,
+ uint32_t flow_idx,
+ const struct rte_flow_action *actions,
+ void *data,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
+ [MLX5_IPOOL_RTE_FLOW],
+ flow_idx);
+ enum mlx5_flow_drv_type ftype;
+
+ if (!flow) {
+ return rte_flow_error_set(error, ENOENT,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "invalid flow handle");
+ }
+ ftype = flow->drv_type;
+ MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
+ fops = flow_get_drv_ops(ftype);
+
+ return fops->query(dev, flow, actions, data, error);
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_action *actions,
+ void *data,
+ struct rte_flow_error *error)
+{
+ int ret;
+
+ ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
+ error);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param filter_type
+ * Filter type.
+ * @param filter_op
+ * Operation to perform.
+ * @param arg
+ * Pointer to operation-specific structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
+ enum rte_filter_type filter_type,
+ enum rte_filter_op filter_op,
+ void *arg)
+{
+ switch (filter_type) {
+ case RTE_ETH_FILTER_GENERIC:
+ if (filter_op != RTE_ETH_FILTER_GET) {
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ *(const void **)arg = &mlx5_flow_ops;
+ return 0;
+ default:
+ DRV_LOG(ERR, "port %u filter type (%d) not supported",
+ dev->data->port_id, filter_type);
+ rte_errno = ENOTSUP;
+ return -rte_errno;
+ }
+ return 0;
+}
+
+/**
+ * Create the needed meter and suffix tables.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in] fm
+ * Pointer to the flow meter.
+ *
+ * @return
+ * Pointer to table set on success, NULL otherwise.
+ */
+struct mlx5_meter_domains_infos *
+mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
+ const struct mlx5_flow_meter *fm)
+{
+ const struct mlx5_flow_driver_ops *fops;
+
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->create_mtr_tbls(dev, fm);
+}
+
+/**
+ * Destroy the meter table set.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in] tbl
+ * Pointer to the meter table set.
+ *
+ * @return
+ * 0 on success.
+ */
+int
+mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
+ struct mlx5_meter_domains_infos *tbls)
+{
+ const struct mlx5_flow_driver_ops *fops;
+
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->destroy_mtr_tbls(dev, tbls);
+}
+
+/**
+ * Create policer rules.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in] fm
+ * Pointer to flow meter structure.
+ * @param[in] attr
+ * Pointer to flow attributes.
+ *
+ * @return
+ * 0 on success, -1 otherwise.
+ */
+int
+mlx5_flow_create_policer_rules(struct rte_eth_dev *dev,
+ struct mlx5_flow_meter *fm,
+ const struct rte_flow_attr *attr)
+{
+ const struct mlx5_flow_driver_ops *fops;
+
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->create_policer_rules(dev, fm, attr);
+}
+
+/**
+ * Destroy policer rules.
+ *
+ * @param[in] fm
+ * Pointer to flow meter structure.
+ * @param[in] attr
+ * Pointer to flow attributes.
+ *
+ * @return
+ * 0 on success, -1 otherwise.
+ */
+int
+mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev,
+ struct mlx5_flow_meter *fm,
+ const struct rte_flow_attr *attr)
+{
+ const struct mlx5_flow_driver_ops *fops;
+
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->destroy_policer_rules(dev, fm, attr);
+}
+
+/**
+ * Allocate a counter.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device structure.
+ *
+ * @return
+ * Index to allocated counter on success, 0 otherwise.
+ */
+uint32_t
+mlx5_counter_alloc(struct rte_eth_dev *dev)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->counter_alloc(dev);
+ }
+ DRV_LOG(ERR,
+ "port %u counter allocate is not supported.",
+ dev->data->port_id);
+ return 0;
+}
+
+/**
+ * Free a counter.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device structure.
+ * @param[in] cnt
+ * Index to counter to be free.
+ */
+void
+mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ fops->counter_free(dev, cnt);
+ return;
+ }
+ DRV_LOG(ERR,
+ "port %u counter free is not supported.",
+ dev->data->port_id);
+}
+
+/**
+ * Query counter statistics.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device structure.
+ * @param[in] cnt
+ * Index to counter to query.
+ * @param[in] clear
+ * Set to clear counter statistics.
+ * @param[out] pkts
+ * The counter hits packets number to save.
+ * @param[out] bytes
+ * The counter hits bytes number to save.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise.
+ */
+int
+mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
+ bool clear, uint64_t *pkts, uint64_t *bytes)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->counter_query(dev, cnt, clear, pkts, bytes);
+ }
+ DRV_LOG(ERR,
+ "port %u counter query is not supported.",
+ dev->data->port_id);
+ return -ENOTSUP;
+}
+
+/**
+ * Allocate a new memory for the counter values wrapped by all the needed
+ * management.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise.
+ */
+static int
+mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
+{
+ struct mlx5_devx_mkey_attr mkey_attr;
+ struct mlx5_counter_stats_mem_mng *mem_mng;
+ volatile struct flow_counter_stats *raw_data;
+ int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
+ int size = (sizeof(struct flow_counter_stats) *
+ MLX5_COUNTERS_PER_POOL +
+ sizeof(struct mlx5_counter_stats_raw)) * raws_n +
+ sizeof(struct mlx5_counter_stats_mem_mng);
+ size_t pgsize = rte_mem_page_size();
+ uint8_t *mem;
+ int i;
+
+ if (pgsize == (size_t)-1) {
+ DRV_LOG(ERR, "Failed to get mem page size");
+ rte_errno = ENOMEM;
+ return -ENOMEM;
+ }
+ mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
+ if (!mem) {
+ rte_errno = ENOMEM;
+ return -ENOMEM;
+ }
+ mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
+ size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
+ mem_mng->umem = mlx5_os_umem_reg(sh->ctx, mem, size,
+ IBV_ACCESS_LOCAL_WRITE);
+ if (!mem_mng->umem) {
+ rte_errno = errno;
+ mlx5_free(mem);
+ return -rte_errno;
+ }
+ mkey_attr.addr = (uintptr_t)mem;
+ mkey_attr.size = size;
+ mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem);
+ mkey_attr.pd = sh->pdn;
+ mkey_attr.log_entity_size = 0;
+ mkey_attr.pg_access = 0;
+ mkey_attr.klm_array = NULL;
+ mkey_attr.klm_num = 0;
+ mkey_attr.relaxed_ordering_write = sh->cmng.relaxed_ordering_write;
+ mkey_attr.relaxed_ordering_read = sh->cmng.relaxed_ordering_read;
+ mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
+ if (!mem_mng->dm) {
+ mlx5_os_umem_dereg(mem_mng->umem);
+ rte_errno = errno;
+ mlx5_free(mem);
+ return -rte_errno;
+ }
+ mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
+ raw_data = (volatile struct flow_counter_stats *)mem;
+ for (i = 0; i < raws_n; ++i) {
+ mem_mng->raws[i].mem_mng = mem_mng;
+ mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
+ }
+ for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
+ LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
+ mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
+ next);
+ LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
+ sh->cmng.mem_mng = mem_mng;
+ return 0;
+}
+
+/**
+ * Set the statistic memory to the new counter pool.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] pool
+ * Pointer to the pool to set the statistic memory.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise.
+ */
+static int
+mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
+ struct mlx5_flow_counter_pool *pool)
+{
+ struct mlx5_flow_counter_mng *cmng = &sh->cmng;
+ /* Resize statistic memory once used out. */
+ if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
+ mlx5_flow_create_counter_stat_mem_mng(sh)) {
+ DRV_LOG(ERR, "Cannot resize counter stat mem.");
+ return -1;
+ }
+ rte_spinlock_lock(&pool->sl);
+ pool->raw = cmng->mem_mng->raws + pool->index %
+ MLX5_CNT_CONTAINER_RESIZE;
+ rte_spinlock_unlock(&pool->sl);
+ pool->raw_hw = NULL;
+ return 0;
+}
+
+#define MLX5_POOL_QUERY_FREQ_US 1000000
+
+/**
+ * Set the periodic procedure for triggering asynchronous batch queries for all
+ * the counter pools.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ */
+void
+mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
+{
+ uint32_t pools_n, us;
+
+ pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
+ us = MLX5_POOL_QUERY_FREQ_US / pools_n;
+ DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
+ if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
+ sh->cmng.query_thread_on = 0;
+ DRV_LOG(ERR, "Cannot reinitialize query alarm");
+ } else {
+ sh->cmng.query_thread_on = 1;
+ }
+}
+
+/**
+ * The periodic procedure for triggering asynchronous batch queries for all the
+ * counter pools. This function is probably called by the host thread.
+ *
+ * @param[in] arg
+ * The parameter for the alarm process.
+ */
+void
+mlx5_flow_query_alarm(void *arg)
+{
+ struct mlx5_dev_ctx_shared *sh = arg;
+ int ret;
+ uint16_t pool_index = sh->cmng.pool_index;
+ struct mlx5_flow_counter_mng *cmng = &sh->cmng;
+ struct mlx5_flow_counter_pool *pool;
+ uint16_t n_valid;
+
+ if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
+ goto set_alarm;
+ rte_spinlock_lock(&cmng->pool_update_sl);
+ pool = cmng->pools[pool_index];
+ n_valid = cmng->n_valid;
+ rte_spinlock_unlock(&cmng->pool_update_sl);
+ /* Set the statistic memory to the new created pool. */
+ if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
+ goto set_alarm;
+ if (pool->raw_hw)
+ /* There is a pool query in progress. */
+ goto set_alarm;
+ pool->raw_hw =
+ LIST_FIRST(&sh->cmng.free_stat_raws);
+ if (!pool->raw_hw)
+ /* No free counter statistics raw memory. */
+ goto set_alarm;
+ /*
+ * Identify the counters released between query trigger and query
+ * handle more efficiently. The counter released in this gap period
+ * should wait for a new round of query as the new arrived packets
+ * will not be taken into account.
+ */
+ pool->query_gen++;
+ ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
+ MLX5_COUNTERS_PER_POOL,
+ NULL, NULL,
+ pool->raw_hw->mem_mng->dm->id,
+ (void *)(uintptr_t)
+ pool->raw_hw->data,
+ sh->devx_comp,
+ (uint64_t)(uintptr_t)pool);
+ if (ret) {
+ DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
+ " %d", pool->min_dcs->id);
+ pool->raw_hw = NULL;
+ goto set_alarm;
+ }
+ LIST_REMOVE(pool->raw_hw, next);
+ sh->cmng.pending_queries++;
+ pool_index++;
+ if (pool_index >= n_valid)
+ pool_index = 0;
+set_alarm:
+ sh->cmng.pool_index = pool_index;
+ mlx5_set_query_alarm(sh);
+}
+
+/**
+ * Check and callback event for new aged flow in the counter pool
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] pool
+ * Pointer to Current counter pool.
+ */
+static void
+mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
+ struct mlx5_flow_counter_pool *pool)
+{
+ struct mlx5_priv *priv;
+ struct mlx5_flow_counter *cnt;
+ struct mlx5_age_info *age_info;
+ struct mlx5_age_param *age_param;
+ struct mlx5_counter_stats_raw *cur = pool->raw_hw;
+ struct mlx5_counter_stats_raw *prev = pool->raw;
+ const uint64_t curr_time = MLX5_CURR_TIME_SEC;
+ const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
+ uint16_t expected = AGE_CANDIDATE;
+ uint32_t i;
+
+ pool->time_of_last_age_check = curr_time;
+ for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
+ cnt = MLX5_POOL_GET_CNT(pool, i);
+ age_param = MLX5_CNT_TO_AGE(cnt);
+ if (__atomic_load_n(&age_param->state,
+ __ATOMIC_RELAXED) != AGE_CANDIDATE)
+ continue;
+ if (cur->data[i].hits != prev->data[i].hits) {
+ __atomic_store_n(&age_param->sec_since_last_hit, 0,
+ __ATOMIC_RELAXED);
+ continue;
+ }
+ if (__atomic_add_fetch(&age_param->sec_since_last_hit,
+ time_delta,
+ __ATOMIC_RELAXED) <= age_param->timeout)
+ continue;
+ /**
+ * Hold the lock first, or if between the
+ * state AGE_TMOUT and tailq operation the
+ * release happened, the release procedure
+ * may delete a non-existent tailq node.
+ */
+ priv = rte_eth_devices[age_param->port_id].data->dev_private;
+ age_info = GET_PORT_AGE_INFO(priv);
+ rte_spinlock_lock(&age_info->aged_sl);
+ if (__atomic_compare_exchange_n(&age_param->state, &expected,
+ AGE_TMOUT, false,
+ __ATOMIC_RELAXED,
+ __ATOMIC_RELAXED)) {
+ TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
+ MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
+ }
+ rte_spinlock_unlock(&age_info->aged_sl);
+ }
+ mlx5_age_event_prepare(sh);
+}
+
+/**
+ * Handler for the HW respond about ready values from an asynchronous batch
+ * query. This function is probably called by the host thread.
+ *
+ * @param[in] sh
+ * The pointer to the shared device context.
+ * @param[in] async_id
+ * The Devx async ID.
+ * @param[in] status
+ * The status of the completion.
+ */
+void
+mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
+ uint64_t async_id, int status)
+{
+ struct mlx5_flow_counter_pool *pool =
+ (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
+ struct mlx5_counter_stats_raw *raw_to_free;
+ uint8_t query_gen = pool->query_gen ^ 1;
+ struct mlx5_flow_counter_mng *cmng = &sh->cmng;
+ enum mlx5_counter_type cnt_type =
+ pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
+ MLX5_COUNTER_TYPE_ORIGIN;
+
+ if (unlikely(status)) {
+ raw_to_free = pool->raw_hw;
+ } else {
+ raw_to_free = pool->raw;
+ if (pool->is_aged)
+ mlx5_flow_aging_check(sh, pool);
+ rte_spinlock_lock(&pool->sl);
+ pool->raw = pool->raw_hw;
+ rte_spinlock_unlock(&pool->sl);
+ /* Be sure the new raw counters data is updated in memory. */
+ rte_io_wmb();
+ if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
+ rte_spinlock_lock(&cmng->csl[cnt_type]);
+ TAILQ_CONCAT(&cmng->counters[cnt_type],
+ &pool->counters[query_gen], next);
+ rte_spinlock_unlock(&cmng->csl[cnt_type]);
+ }
+ }
+ LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
+ pool->raw_hw = NULL;
+ sh->cmng.pending_queries--;
+}
+
+static int
+flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
+ const struct flow_grp_info *grp_info,
+ struct rte_flow_error *error)
+{
+ if (grp_info->transfer && grp_info->external &&
+ grp_info->fdb_def_rule) {
+ if (group == UINT32_MAX)
+ return rte_flow_error_set
+ (error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+ NULL,
+ "group index not supported");
+ *table = group + 1;
+ } else {
+ *table = group;
+ }
+ DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
+ return 0;
+}
+
+/**
+ * Translate the rte_flow group index to HW table value.
+ *
+ * If tunnel offload is disabled, all group ids converted to flow table
+ * id using the standard method.
+ * If tunnel offload is enabled, group id can be converted using the
+ * standard or tunnel conversion method. Group conversion method
+ * selection depends on flags in `grp_info` parameter:
+ * - Internal (grp_info.external == 0) groups conversion uses the
+ * standard method.
+ * - Group ids in JUMP action converted with the tunnel conversion.
+ * - Group id in rule attribute conversion depends on a rule type and
+ * group id value:
+ * ** non zero group attributes converted with the tunnel method
+ * ** zero group attribute in non-tunnel rule is converted using the
+ * standard method - there's only one root table
+ * ** zero group attribute in steer tunnel rule is converted with the
+ * standard method - single root table
+ * ** zero group attribute in match tunnel rule is a special OvS
+ * case: that value is used for portability reasons. That group
+ * id is converted with the tunnel conversion method.
+ *
+ * @param[in] dev
+ * Port device
+ * @param[in] tunnel
+ * PMD tunnel offload object
+ * @param[in] group
+ * rte_flow group index value.
+ * @param[out] table
+ * HW table value.
+ * @param[in] grp_info
+ * flags used for conversion
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_group_to_table(struct rte_eth_dev *dev,
+ const struct mlx5_flow_tunnel *tunnel,
+ uint32_t group, uint32_t *table,
+ const struct flow_grp_info *grp_info,
+ struct rte_flow_error *error)
+{
+ int ret;
+ bool standard_translation;
+
+ if (!grp_info->skip_scale && grp_info->external &&
+ group < MLX5_MAX_TABLES_EXTERNAL)
+ group *= MLX5_FLOW_TABLE_FACTOR;
+ if (is_tunnel_offload_active(dev)) {
+ standard_translation = !grp_info->external ||
+ grp_info->std_tbl_fix;
+ } else {
+ standard_translation = true;
+ }
+ DRV_LOG(DEBUG,
+ "port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
+ dev->data->port_id, group, grp_info->transfer,
+ grp_info->external, grp_info->fdb_def_rule,
+ standard_translation ? "STANDARD" : "TUNNEL");
+ if (standard_translation)
+ ret = flow_group_to_table(dev->data->port_id, group, table,
+ grp_info, error);
+ else
+ ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
+ table, error);
+
+ return ret;
+}
+
+/**
+ * Discover availability of metadata reg_c's.
+ *
+ * Iteratively use test flows to check availability.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_dev_config *config = &priv->config;
+ enum modify_reg idx;
+ int n = 0;
+
+ /* reg_c[0] and reg_c[1] are reserved. */
+ config->flow_mreg_c[n++] = REG_C_0;
+ config->flow_mreg_c[n++] = REG_C_1;
+ /* Discover availability of other reg_c's. */
+ for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
+ struct rte_flow_attr attr = {
+ .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
+ .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
+ .ingress = 1,
+ };
+ struct rte_flow_item items[] = {
+ [0] = {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ struct rte_flow_action actions[] = {
+ [0] = {
+ .type = (enum rte_flow_action_type)
+ MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
+ .conf = &(struct mlx5_flow_action_copy_mreg){
+ .src = REG_C_1,
+ .dst = idx,
+ },
+ },
+ [1] = {
+ .type = RTE_FLOW_ACTION_TYPE_JUMP,
+ .conf = &(struct rte_flow_action_jump){
+ .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
+ },
+ },
+ [2] = {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
+ uint32_t flow_idx;
+ struct rte_flow *flow;
+ struct rte_flow_error error;
+
+ if (!config->dv_flow_en)
+ break;
+ /* Create internal flow, validation skips copy action. */
+ flow_idx = flow_list_create(dev, NULL, &attr, items,
+ actions, false, &error);
+ flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
+ flow_idx);
+ if (!flow)
+ continue;
+ config->flow_mreg_c[n++] = idx;
+ flow_list_destroy(dev, NULL, flow_idx);
+ }
+ for (; n < MLX5_MREG_C_NUM; ++n)
+ config->flow_mreg_c[n] = REG_NON;
+ return 0;
+}
+
+/**
+ * Dump flow raw hw data to file
+ *
+ * @param[in] dev
+ * The pointer to Ethernet device.
+ * @param[in] file
+ * A pointer to a file for output.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ * @return
+ * 0 on success, a nagative value otherwise.
+ */
+int
+mlx5_flow_dev_dump(struct rte_eth_dev *dev,
+ FILE *file,
+ struct rte_flow_error *error __rte_unused)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_dev_ctx_shared *sh = priv->sh;
+
+ if (!priv->config.dv_flow_en) {
+ if (fputs("device dv flow disabled\n", file) <= 0)
+ return -errno;
+ return -ENOTSUP;
+ }
+ return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
+ sh->tx_domain, file);
+}
+
+/**
+ * Get aged-out flows.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] context
+ * The address of an array of pointers to the aged-out flows contexts.
+ * @param[in] nb_countexts
+ * The length of context array pointers.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. Initialized in case of
+ * error only.
+ *
+ * @return
+ * how many contexts get in success, otherwise negative errno value.
+ * if nb_contexts is 0, return the amount of all aged contexts.
+ * if nb_contexts is not 0 , return the amount of aged flows reported
+ * in the context array.
+ */
+int
+mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
+ uint32_t nb_contexts, struct rte_flow_error *error)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->get_aged_flows(dev, contexts, nb_contexts,
+ error);
+ }
+ DRV_LOG(ERR,
+ "port %u get aged flows is not supported.",
+ dev->data->port_id);
+ return -ENOTSUP;
+}
+
+/* Wrapper for driver action_validate op callback */
+static int
+flow_drv_action_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_shared_action_conf *conf,
+ const struct rte_flow_action *action,
+ const struct mlx5_flow_driver_ops *fops,
+ struct rte_flow_error *error)
+{
+ static const char err_msg[] = "shared action validation unsupported";
+
+ if (!fops->action_validate) {
+ DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, err_msg);
+ return -rte_errno;
+ }
+ return fops->action_validate(dev, conf, action, error);
+}
+
+/**
+ * Destroys the shared action by handle.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param[in] action
+ * Handle for the shared action to be destroyed.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ *
+ * @note: wrapper for driver action_create op callback.
+ */
+static int
+mlx5_shared_action_destroy(struct rte_eth_dev *dev,
+ struct rte_flow_shared_action *action,
+ struct rte_flow_error *error)
+{
+ static const char err_msg[] = "shared action destruction unsupported";
+ struct rte_flow_attr attr = { .transfer = 0 };
+ const struct mlx5_flow_driver_ops *fops =
+ flow_get_drv_ops(flow_get_drv_type(dev, &attr));
+
+ if (!fops->action_destroy) {
+ DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, err_msg);
+ return -rte_errno;
+ }
+ return fops->action_destroy(dev, action, error);
+}
+
+/* Wrapper for driver action_destroy op callback */
+static int
+flow_drv_action_update(struct rte_eth_dev *dev,
+ struct rte_flow_shared_action *action,
+ const void *action_conf,
+ const struct mlx5_flow_driver_ops *fops,
+ struct rte_flow_error *error)
+{
+ static const char err_msg[] = "shared action update unsupported";
+
+ if (!fops->action_update) {
+ DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, err_msg);
+ return -rte_errno;
+ }
+ return fops->action_update(dev, action, action_conf, error);
+}
+
+/* Wrapper for driver action_destroy op callback */
+static int
+flow_drv_action_query(struct rte_eth_dev *dev,
+ const struct rte_flow_shared_action *action,
+ void *data,
+ const struct mlx5_flow_driver_ops *fops,
+ struct rte_flow_error *error)
+{
+ static const char err_msg[] = "shared action query unsupported";
+
+ if (!fops->action_query) {
+ DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, err_msg);
+ return -rte_errno;
+ }
+ return fops->action_query(dev, action, data, error);
+}
+
+/**
+ * Create shared action for reuse in multiple flow rules.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param[in] action
+ * Action configuration for shared action creation.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ * @return
+ * A valid handle in case of success, NULL otherwise and rte_errno is set.
+ */
+static struct rte_flow_shared_action *
+mlx5_shared_action_create(struct rte_eth_dev *dev,
+ const struct rte_flow_shared_action_conf *conf,
+ const struct rte_flow_action *action,
+ struct rte_flow_error *error)
+{
+ static const char err_msg[] = "shared action creation unsupported";
+ struct rte_flow_attr attr = { .transfer = 0 };
+ const struct mlx5_flow_driver_ops *fops =
+ flow_get_drv_ops(flow_get_drv_type(dev, &attr));
+
+ if (flow_drv_action_validate(dev, conf, action, fops, error))
+ return NULL;
+ if (!fops->action_create) {
+ DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, err_msg);
+ return NULL;
+ }
+ return fops->action_create(dev, conf, action, error);
+}
+
+/**
+ * Updates inplace the shared action configuration pointed by *action* handle
+ * with the configuration provided as *action* argument.
+ * The update of the shared action configuration effects all flow rules reusing
+ * the action via handle.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param[in] shared_action
+ * Handle for the shared action to be updated.
+ * @param[in] action
+ * Action specification used to modify the action pointed by handle.
+ * *action* should be of same type with the action pointed by the *action*
+ * handle argument, otherwise considered as invalid.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_shared_action_update(struct rte_eth_dev *dev,
+ struct rte_flow_shared_action *shared_action,
+ const struct rte_flow_action *action,
+ struct rte_flow_error *error)
+{
+ struct rte_flow_attr attr = { .transfer = 0 };
+ const struct mlx5_flow_driver_ops *fops =
+ flow_get_drv_ops(flow_get_drv_type(dev, &attr));
+ int ret;
+
+ ret = flow_drv_action_validate(dev, NULL, action, fops, error);
+ if (ret)
+ return ret;
+ return flow_drv_action_update(dev, shared_action, action->conf, fops,
+ error);
+}
+
+/**
+ * Query the shared action by handle.
+ *
+ * This function allows retrieving action-specific data such as counters.
+ * Data is gathered by special action which may be present/referenced in
+ * more than one flow rule definition.
+ *
+ * \see RTE_FLOW_ACTION_TYPE_COUNT
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param[in] action
+ * Handle for the shared action to query.
+ * @param[in, out] data
+ * Pointer to storage for the associated query data type.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_shared_action_query(struct rte_eth_dev *dev,
+ const struct rte_flow_shared_action *action,
+ void *data,
+ struct rte_flow_error *error)
+{
+ struct rte_flow_attr attr = { .transfer = 0 };
+ const struct mlx5_flow_driver_ops *fops =
+ flow_get_drv_ops(flow_get_drv_type(dev, &attr));
+
+ return flow_drv_action_query(dev, action, data, fops, error);
+}
+
+/**
+ * Destroy all shared actions.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_shared_action_flush(struct rte_eth_dev *dev)
+{
+ struct rte_flow_error error;
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_shared_action_rss *shared_rss;
+ int ret = 0;
+ uint32_t idx;
+
+ ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
+ priv->rss_shared_actions, idx, shared_rss, next) {
+ ret |= mlx5_shared_action_destroy(dev,
+ (struct rte_flow_shared_action *)(uintptr_t)idx, &error);
+ }
+ return ret;
+}
+
+#ifndef HAVE_MLX5DV_DR
+#define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
+#else
+#define MLX5_DOMAIN_SYNC_FLOW \
+ (MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
+#endif
+
+int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ const struct mlx5_flow_driver_ops *fops;
+ int ret;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
+ ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
+ if (ret > 0)
+ ret = -ret;
+ return ret;
+}
+
+/**
+ * tunnel offload functionalilty is defined for DV environment only
+ */
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+__extension__
+union tunnel_offload_mark {
+ uint32_t val;
+ struct {
+ uint32_t app_reserve:8;
+ uint32_t table_id:15;
+ uint32_t transfer:1;
+ uint32_t _unused_:8;
+ };
+};
+
+static bool
+mlx5_access_tunnel_offload_db
+ (struct rte_eth_dev *dev,
+ bool (*match)(struct rte_eth_dev *,
+ struct mlx5_flow_tunnel *, const void *),
+ void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
+ void (*miss)(struct rte_eth_dev *, void *),
+ void *ctx, bool lock_op);
+
+static int
+flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_action *app_actions,
+ uint32_t flow_idx,
+ struct tunnel_default_miss_ctx *ctx,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_flow *dev_flow;
+ struct rte_flow_attr miss_attr = *attr;
+ const struct mlx5_flow_tunnel *tunnel = app_actions[0].conf;
+ const struct rte_flow_item miss_items[2] = {
+ {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .spec = NULL,
+ .last = NULL,
+ .mask = NULL
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ .spec = NULL,
+ .last = NULL,
+ .mask = NULL
+ }
+ };
+ union tunnel_offload_mark mark_id;
+ struct rte_flow_action_mark miss_mark;
+ struct rte_flow_action miss_actions[3] = {
+ [0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
+ [2] = { .type = RTE_FLOW_ACTION_TYPE_END, .conf = NULL }
+ };
+ const struct rte_flow_action_jump *jump_data;
+ uint32_t i, flow_table = 0; /* prevent compilation warning */
+ struct flow_grp_info grp_info = {
+ .external = 1,
+ .transfer = attr->transfer,
+ .fdb_def_rule = !!priv->fdb_def_rule,
+ .std_tbl_fix = 0,
+ };
+ int ret;
+
+ if (!attr->transfer) {
+ uint32_t q_size;
+
+ miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
+ q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
+ ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
+ 0, SOCKET_ID_ANY);
+ if (!ctx->queue)
+ return rte_flow_error_set
+ (error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ NULL, "invalid default miss RSS");
+ ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+ ctx->action_rss.level = 0,
+ ctx->action_rss.types = priv->rss_conf.rss_hf,
+ ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
+ ctx->action_rss.queue_num = priv->reta_idx_n,
+ ctx->action_rss.key = priv->rss_conf.rss_key,
+ ctx->action_rss.queue = ctx->queue;
+ if (!priv->reta_idx_n || !priv->rxqs_n)
+ return rte_flow_error_set
+ (error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ NULL, "invalid port configuration");
+ if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
+ ctx->action_rss.types = 0;
+ for (i = 0; i != priv->reta_idx_n; ++i)
+ ctx->queue[i] = (*priv->reta_idx)[i];
+ } else {
+ miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
+ ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
+ }
+ miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
+ for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
+ jump_data = app_actions->conf;
+ miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
+ miss_attr.group = jump_data->group;
+ ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
+ &flow_table, &grp_info, error);
+ if (ret)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ NULL, "invalid tunnel id");
+ mark_id.app_reserve = 0;
+ mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
+ mark_id.transfer = !!attr->transfer;
+ mark_id._unused_ = 0;
+ miss_mark.id = mark_id.val;
+ dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
+ miss_items, miss_actions, flow_idx, error);
+ if (!dev_flow)
+ return -rte_errno;
+ dev_flow->flow = flow;
+ dev_flow->external = true;
+ dev_flow->tunnel = tunnel;
+ /* Subflow object was created, we must include one in the list. */
+ SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
+ dev_flow->handle, next);
+ DRV_LOG(DEBUG,
+ "port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
+ dev->data->port_id, tunnel->app_tunnel.type,
+ tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
+ ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
+ miss_actions, error);
+ if (!ret)
+ ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
+ error);
+
+ return ret;
+}
+
+static const struct mlx5_flow_tbl_data_entry *
+tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_dev_ctx_shared *sh = priv->sh;
+ struct mlx5_hlist_entry *he;
+ union tunnel_offload_mark mbits = { .val = mark };
+ union mlx5_flow_tbl_key table_key = {
+ {
+ .table_id = tunnel_id_to_flow_tbl(mbits.table_id),
+ .dummy = 0,
+ .domain = !!mbits.transfer,
+ .direction = 0,
+ }
+ };
+ he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, NULL);
+ return he ?
+ container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
+}
+
+static void
+mlx5_flow_tunnel_grp2tbl_remove_cb(struct mlx5_hlist *list,
+ struct mlx5_hlist_entry *entry)
+{
+ struct mlx5_dev_ctx_shared *sh = list->ctx;
+ struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
+
+ mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
+ tunnel_flow_tbl_to_id(tte->flow_table));
+ mlx5_free(tte);
+}
+
+static int
+mlx5_flow_tunnel_grp2tbl_match_cb(struct mlx5_hlist *list __rte_unused,
+ struct mlx5_hlist_entry *entry,
+ uint64_t key, void *cb_ctx __rte_unused)
+{
+ union tunnel_tbl_key tbl = {
+ .val = key,
+ };
+ struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
+
+ return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
+}
+
+static struct mlx5_hlist_entry *
+mlx5_flow_tunnel_grp2tbl_create_cb(struct mlx5_hlist *list, uint64_t key,
+ void *ctx __rte_unused)
+{
+ struct mlx5_dev_ctx_shared *sh = list->ctx;
+ struct tunnel_tbl_entry *tte;
+ union tunnel_tbl_key tbl = {
+ .val = key,
+ };
+
+ tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
+ sizeof(*tte), 0,
+ SOCKET_ID_ANY);
+ if (!tte)
+ goto err;
+ mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
+ &tte->flow_table);
+ if (tte->flow_table >= MLX5_MAX_TABLES) {
+ DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
+ tte->flow_table);
+ mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
+ tte->flow_table);
+ goto err;
+ } else if (!tte->flow_table) {
+ goto err;
+ }
+ tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
+ tte->tunnel_id = tbl.tunnel_id;
+ tte->group = tbl.group;
+ return &tte->hash;
+err:
+ if (tte)
+ mlx5_free(tte);
+ return NULL;
+}
+
+static uint32_t
+tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
+ const struct mlx5_flow_tunnel *tunnel,
+ uint32_t group, uint32_t *table,
+ struct rte_flow_error *error)
+{
+ struct mlx5_hlist_entry *he;
+ struct tunnel_tbl_entry *tte;
+ union tunnel_tbl_key key = {
+ .tunnel_id = tunnel ? tunnel->tunnel_id : 0,
+ .group = group
+ };
+ struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
+ struct mlx5_hlist *group_hash;
+
+ group_hash = tunnel ? tunnel->groups : thub->groups;
+ he = mlx5_hlist_register(group_hash, key.val, NULL);
+ if (!he)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+ NULL,
+ "tunnel group index not supported");
+ tte = container_of(he, typeof(*tte), hash);
+ *table = tte->flow_table;
+ DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
+ dev->data->port_id, key.tunnel_id, group, *table);
+ return 0;
+}
+
+static void
+mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
+ struct mlx5_flow_tunnel *tunnel)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_indexed_pool *ipool;
+
+ DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
+ dev->data->port_id, tunnel->tunnel_id);
+ LIST_REMOVE(tunnel, chain);
+ mlx5_hlist_destroy(tunnel->groups);
+ ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
+ mlx5_ipool_free(ipool, tunnel->tunnel_id);
+}
+
+static bool
+mlx5_access_tunnel_offload_db
+ (struct rte_eth_dev *dev,
+ bool (*match)(struct rte_eth_dev *,
+ struct mlx5_flow_tunnel *, const void *),
+ void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
+ void (*miss)(struct rte_eth_dev *, void *),
+ void *ctx, bool lock_op)
+{
+ bool verdict = false;
+ struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
+ struct mlx5_flow_tunnel *tunnel;
+
+ rte_spinlock_lock(&thub->sl);
+ LIST_FOREACH(tunnel, &thub->tunnels, chain) {
+ verdict = match(dev, tunnel, (const void *)ctx);
+ if (verdict)
+ break;
+ }
+ if (!lock_op)
+ rte_spinlock_unlock(&thub->sl);
+ if (verdict && hit)
+ hit(dev, tunnel, ctx);
+ if (!verdict && miss)
+ miss(dev, ctx);
+ if (lock_op)
+ rte_spinlock_unlock(&thub->sl);
+
+ return verdict;
+}
+
+struct tunnel_db_find_tunnel_id_ctx {
+ uint32_t tunnel_id;
+ struct mlx5_flow_tunnel *tunnel;
+};
+
+static bool
+find_tunnel_id_match(struct rte_eth_dev *dev,
+ struct mlx5_flow_tunnel *tunnel, const void *x)
+{
+ const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
+
+ RTE_SET_USED(dev);
+ return tunnel->tunnel_id == ctx->tunnel_id;
+}
+
+static void
+find_tunnel_id_hit(struct rte_eth_dev *dev,
+ struct mlx5_flow_tunnel *tunnel, void *x)
+{
+ struct tunnel_db_find_tunnel_id_ctx *ctx = x;
+ RTE_SET_USED(dev);
+ ctx->tunnel = tunnel;
+}
+
+static struct mlx5_flow_tunnel *
+mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
+{
+ struct tunnel_db_find_tunnel_id_ctx ctx = {
+ .tunnel_id = id,
+ };
+
+ mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
+ find_tunnel_id_hit, NULL, &ctx, true);
+
+ return ctx.tunnel;
+}
+
+static struct mlx5_flow_tunnel *
+mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
+ const struct rte_flow_tunnel *app_tunnel)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_indexed_pool *ipool;
+ struct mlx5_flow_tunnel *tunnel;
+ uint32_t id;
+
+ ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
+ tunnel = mlx5_ipool_zmalloc(ipool, &id);
+ if (!tunnel)
+ return NULL;
+ if (id >= MLX5_MAX_TUNNELS) {
+ mlx5_ipool_free(ipool, id);
+ DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
+ return NULL;
+ }
+ tunnel->groups = mlx5_hlist_create("tunnel groups", 1024, 0, 0,
+ mlx5_flow_tunnel_grp2tbl_create_cb,
+ mlx5_flow_tunnel_grp2tbl_match_cb,
+ mlx5_flow_tunnel_grp2tbl_remove_cb);
+ if (!tunnel->groups) {
+ mlx5_ipool_free(ipool, id);
+ return NULL;
+ }
+ tunnel->groups->ctx = priv->sh;
+ /* initiate new PMD tunnel */
+ memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
+ tunnel->tunnel_id = id;
+ tunnel->action.type = (typeof(tunnel->action.type))
+ MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
+ tunnel->action.conf = tunnel;
+ tunnel->item.type = (typeof(tunnel->item.type))
+ MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
+ tunnel->item.spec = tunnel;
+ tunnel->item.last = NULL;
+ tunnel->item.mask = NULL;
+
+ DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
+ dev->data->port_id, tunnel->tunnel_id);
+
+ return tunnel;
+}
+
+struct tunnel_db_get_tunnel_ctx {
+ const struct rte_flow_tunnel *app_tunnel;
+ struct mlx5_flow_tunnel *tunnel;
+};
+
+static bool get_tunnel_match(struct rte_eth_dev *dev,
+ struct mlx5_flow_tunnel *tunnel, const void *x)
+{
+ const struct tunnel_db_get_tunnel_ctx *ctx = x;
+
+ RTE_SET_USED(dev);
+ return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
+ sizeof(*ctx->app_tunnel));
+}
+
+static void get_tunnel_hit(struct rte_eth_dev *dev,
+ struct mlx5_flow_tunnel *tunnel, void *x)
+{
+ /* called under tunnel spinlock protection */
+ struct tunnel_db_get_tunnel_ctx *ctx = x;
+
+ RTE_SET_USED(dev);
+ tunnel->refctn++;
+ ctx->tunnel = tunnel;
+}
+
+static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
+{
+ /* called under tunnel spinlock protection */
+ struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
+ struct tunnel_db_get_tunnel_ctx *ctx = x;
+
+ rte_spinlock_unlock(&thub->sl);
+ ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
+ ctx->tunnel->refctn = 1;
+ rte_spinlock_lock(&thub->sl);
+ if (ctx->tunnel)
+ LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
+}
+
+
+static int
+mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
+ const struct rte_flow_tunnel *app_tunnel,
+ struct mlx5_flow_tunnel **tunnel)
+{
+ struct tunnel_db_get_tunnel_ctx ctx = {
+ .app_tunnel = app_tunnel,
+ };
+
+ mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
+ get_tunnel_miss, &ctx, true);
+ *tunnel = ctx.tunnel;
+ return ctx.tunnel ? 0 : -ENOMEM;
+}
+
+void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
+{
+ struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
+
+ if (!thub)
+ return;
+ if (!LIST_EMPTY(&thub->tunnels))
+ DRV_LOG(WARNING, "port %u tunnels present\n", port_id);
+ mlx5_hlist_destroy(thub->groups);
+ mlx5_free(thub);
+}
+
+int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
+{
+ int err;
+ struct mlx5_flow_tunnel_hub *thub;
+
+ thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
+ 0, SOCKET_ID_ANY);
+ if (!thub)
+ return -ENOMEM;
+ LIST_INIT(&thub->tunnels);
+ rte_spinlock_init(&thub->sl);
+ thub->groups = mlx5_hlist_create("flow groups",
+ rte_align32pow2(MLX5_MAX_TABLES), 0,
+ 0, mlx5_flow_tunnel_grp2tbl_create_cb,
+ mlx5_flow_tunnel_grp2tbl_match_cb,
+ mlx5_flow_tunnel_grp2tbl_remove_cb);
+ if (!thub->groups) {
+ err = -rte_errno;
+ goto err;
+ }
+ thub->groups->ctx = sh;
+ sh->tunnel_hub = thub;
+
+ return 0;
+
+err:
+ if (thub->groups)
+ mlx5_hlist_destroy(thub->groups);
+ if (thub)
+ mlx5_free(thub);
+ return err;
+}
+
+static inline bool
+mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
+ struct rte_flow_tunnel *tunnel,
+ const char *err_msg)
+{
+ err_msg = NULL;
+ if (!is_tunnel_offload_active(dev)) {
+ err_msg = "tunnel offload was not activated";
+ goto out;
+ } else if (!tunnel) {
+ err_msg = "no application tunnel";
+ goto out;
+ }
+
+ switch (tunnel->type) {
+ default:
+ err_msg = "unsupported tunnel type";
+ goto out;
+ case RTE_FLOW_ITEM_TYPE_VXLAN:
+ break;
+ }
+
+out:
+ return !err_msg;
+}
+
+static int
+mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
+ struct rte_flow_tunnel *app_tunnel,
+ struct rte_flow_action **actions,
+ uint32_t *num_of_actions,
+ struct rte_flow_error *error)
+{
+ int ret;
+ struct mlx5_flow_tunnel *tunnel;
+ const char *err_msg = NULL;
+ bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
+
+ if (!verdict)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
+ err_msg);
+ ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
+ if (ret < 0) {
+ return rte_flow_error_set(error, ret,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
+ "failed to initialize pmd tunnel");
+ }
+ *actions = &tunnel->action;
+ *num_of_actions = 1;
+ return 0;
+}
+
+static int
+mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
+ struct rte_flow_tunnel *app_tunnel,
+ struct rte_flow_item **items,
+ uint32_t *num_of_items,
+ struct rte_flow_error *error)
+{
+ int ret;
+ struct mlx5_flow_tunnel *tunnel;
+ const char *err_msg = NULL;
+ bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
+
+ if (!verdict)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+ err_msg);
+ ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
+ if (ret < 0) {
+ return rte_flow_error_set(error, ret,
+ RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+ "failed to initialize pmd tunnel");
+ }
+ *items = &tunnel->item;
+ *num_of_items = 1;
+ return 0;
+}
+
+struct tunnel_db_element_release_ctx {
+ struct rte_flow_item *items;
+ struct rte_flow_action *actions;
+ uint32_t num_elements;
+ struct rte_flow_error *error;
+ int ret;
+};
+
+static bool
+tunnel_element_release_match(struct rte_eth_dev *dev,
+ struct mlx5_flow_tunnel *tunnel, const void *x)
+{
+ const struct tunnel_db_element_release_ctx *ctx = x;
+
+ RTE_SET_USED(dev);
+ if (ctx->num_elements != 1)
+ return false;
+ else if (ctx->items)
+ return ctx->items == &tunnel->item;
+ else if (ctx->actions)
+ return ctx->actions == &tunnel->action;
+
+ return false;
+}
+
+static void
+tunnel_element_release_hit(struct rte_eth_dev *dev,
+ struct mlx5_flow_tunnel *tunnel, void *x)
+{
+ struct tunnel_db_element_release_ctx *ctx = x;
+ ctx->ret = 0;
+ if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
+ mlx5_flow_tunnel_free(dev, tunnel);
+}
+
+static void
+tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
+{
+ struct tunnel_db_element_release_ctx *ctx = x;
+ RTE_SET_USED(dev);
+ ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+ "invalid argument");
+}
+
+static int
+mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
+ struct rte_flow_item *pmd_items,
+ uint32_t num_items, struct rte_flow_error *err)
+{
+ struct tunnel_db_element_release_ctx ctx = {
+ .items = pmd_items,
+ .actions = NULL,
+ .num_elements = num_items,
+ .error = err,
+ };
+
+ mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
+ tunnel_element_release_hit,
+ tunnel_element_release_miss, &ctx, false);
+
+ return ctx.ret;
+}
+
+static int
+mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
+ struct rte_flow_action *pmd_actions,
+ uint32_t num_actions, struct rte_flow_error *err)
+{
+ struct tunnel_db_element_release_ctx ctx = {
+ .items = NULL,
+ .actions = pmd_actions,
+ .num_elements = num_actions,
+ .error = err,
+ };
+
+ mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
+ tunnel_element_release_hit,
+ tunnel_element_release_miss, &ctx, false);
+
+ return ctx.ret;
+}
+
+static int
+mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
+ struct rte_mbuf *m,
+ struct rte_flow_restore_info *info,
+ struct rte_flow_error *err)
+{
+ uint64_t ol_flags = m->ol_flags;
+ const struct mlx5_flow_tbl_data_entry *tble;
+ const uint64_t mask = PKT_RX_FDIR | PKT_RX_FDIR_ID;
+
+ if (!is_tunnel_offload_active(dev)) {
+ info->flags = 0;
+ return 0;
+ }
+
+ if ((ol_flags & mask) != mask)
+ goto err;
+ tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
+ if (!tble) {
+ DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
+ dev->data->port_id, m->hash.fdir.hi);
+ goto err;
+ }
+ MLX5_ASSERT(tble->tunnel);
+ memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
+ info->group_id = tble->group_id;
+ info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
+ RTE_FLOW_RESTORE_INFO_GROUP_ID |
+ RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
+
+ return 0;
+
+err:
+ return rte_flow_error_set(err, EINVAL,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "failed to get restore info");
+}
+
+#else /* HAVE_IBV_FLOW_DV_SUPPORT */
+static int
+mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused struct rte_flow_tunnel *app_tunnel,
+ __rte_unused struct rte_flow_action **actions,
+ __rte_unused uint32_t *num_of_actions,
+ __rte_unused struct rte_flow_error *error)
+{
+ return -ENOTSUP;
+}
+
+static int
+mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused struct rte_flow_tunnel *app_tunnel,
+ __rte_unused struct rte_flow_item **items,
+ __rte_unused uint32_t *num_of_items,
+ __rte_unused struct rte_flow_error *error)
+{
+ return -ENOTSUP;
+}
+
+static int
+mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused struct rte_flow_item *pmd_items,
+ __rte_unused uint32_t num_items,
+ __rte_unused struct rte_flow_error *err)
+{
+ return -ENOTSUP;
+}
+
+static int
+mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused struct rte_flow_action *pmd_action,
+ __rte_unused uint32_t num_actions,
+ __rte_unused struct rte_flow_error *err)
+{
+ return -ENOTSUP;
+}
+
+static int
+mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused struct rte_mbuf *m,
+ __rte_unused struct rte_flow_restore_info *i,
+ __rte_unused struct rte_flow_error *err)
+{
+ return -ENOTSUP;
+}
+
+static int
+flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused struct rte_flow *flow,
+ __rte_unused const struct rte_flow_attr *attr,
+ __rte_unused const struct rte_flow_action *actions,
+ __rte_unused uint32_t flow_idx,
+ __rte_unused struct tunnel_default_miss_ctx *ctx,
+ __rte_unused struct rte_flow_error *error)
+{
+ return -ENOTSUP;
+}
+
+static struct mlx5_flow_tunnel *
+mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused uint32_t id)
+{
+ return NULL;
+}
+
+static void
+mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused struct mlx5_flow_tunnel *tunnel)
+{
+}
+
+static uint32_t
+tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
+ __rte_unused const struct mlx5_flow_tunnel *t,
+ __rte_unused uint32_t group,
+ __rte_unused uint32_t *table,
+ struct rte_flow_error *error)
+{
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "tunnel offload requires DV support");
+}
+
+void
+mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
+ __rte_unused uint16_t port_id)
+{
+}
+#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
+