+ /* Put the actions after sample into Suffix flow. */
+ memcpy(actions_sfx, actions + sample_action_pos + 1,
+ sizeof(struct rte_flow_action) *
+ (actions_n - sample_action_pos - 1));
+ return tag_id;
+}
+
+/**
+ * The splitting for metadata feature.
+ *
+ * - Q/RSS action on NIC Rx should be split in order to pass by
+ * the mreg copy table (RX_CP_TBL) and then it jumps to the
+ * action table (RX_ACT_TBL) which has the split Q/RSS action.
+ *
+ * - All the actions on NIC Tx should have a mreg copy action to
+ * copy reg_a from WQE to reg_c[0].
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param[in] flow
+ * Parent flow structure pointer.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[in] flow_split_info
+ * Pointer to flow split info structure.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ * @return
+ * 0 on success, negative value otherwise
+ */
+static int
+flow_create_split_metadata(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct mlx5_flow_split_info *flow_split_info,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_dev_config *config = &priv->config;
+ const struct rte_flow_action *qrss = NULL;
+ struct rte_flow_action *ext_actions = NULL;
+ struct mlx5_flow *dev_flow = NULL;
+ uint32_t qrss_id = 0;
+ int mtr_sfx = 0;
+ size_t act_size;
+ int actions_n;
+ int encap_idx;
+ int ret;
+
+ /* Check whether extensive metadata feature is engaged. */
+ if (!config->dv_flow_en ||
+ config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
+ !mlx5_flow_ext_mreg_supported(dev))
+ return flow_create_split_inner(dev, flow, NULL, attr, items,
+ actions, flow_split_info, error);
+ actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
+ &encap_idx);
+ if (qrss) {
+ /* Exclude hairpin flows from splitting. */
+ if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+ const struct rte_flow_action_queue *queue;
+
+ queue = qrss->conf;
+ if (mlx5_rxq_get_type(dev, queue->index) ==
+ MLX5_RXQ_TYPE_HAIRPIN)
+ qrss = NULL;
+ } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
+ const struct rte_flow_action_rss *rss;
+
+ rss = qrss->conf;
+ if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
+ MLX5_RXQ_TYPE_HAIRPIN)
+ qrss = NULL;
+ }
+ }
+ if (qrss) {
+ /* Check if it is in meter suffix table. */
+ mtr_sfx = attr->group == (attr->transfer ?
+ (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
+ MLX5_FLOW_TABLE_LEVEL_SUFFIX);
+ /*
+ * Q/RSS action on NIC Rx should be split in order to pass by
+ * the mreg copy table (RX_CP_TBL) and then it jumps to the
+ * action table (RX_ACT_TBL) which has the split Q/RSS action.
+ */
+ act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
+ sizeof(struct rte_flow_action_set_tag) +
+ sizeof(struct rte_flow_action_jump);
+ ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
+ SOCKET_ID_ANY);
+ if (!ext_actions)
+ return rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, "no memory to split "
+ "metadata flow");
+ /*
+ * If we are the suffix flow of meter, tag already exist.
+ * Set the tag action to void.
+ */
+ if (mtr_sfx)
+ ext_actions[qrss - actions].type =
+ RTE_FLOW_ACTION_TYPE_VOID;
+ else
+ ext_actions[qrss - actions].type =
+ (enum rte_flow_action_type)
+ MLX5_RTE_FLOW_ACTION_TYPE_TAG;
+ /*
+ * Create the new actions list with removed Q/RSS action
+ * and appended set tag and jump to register copy table
+ * (RX_CP_TBL). We should preallocate unique tag ID here
+ * in advance, because it is needed for set tag action.
+ */
+ qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
+ qrss, actions_n, error);
+ if (!mtr_sfx && !qrss_id) {
+ ret = -rte_errno;
+ goto exit;
+ }
+ } else if (attr->egress && !attr->transfer) {
+ /*
+ * All the actions on NIC Tx should have a metadata register
+ * copy action to copy reg_a from WQE to reg_c[meta]
+ */
+ act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
+ sizeof(struct mlx5_flow_action_copy_mreg);
+ ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
+ SOCKET_ID_ANY);
+ if (!ext_actions)
+ return rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, "no memory to split "
+ "metadata flow");
+ /* Create the action list appended with copy register. */
+ ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
+ actions_n, error, encap_idx);
+ if (ret < 0)
+ goto exit;
+ }
+ /* Add the unmodified original or prefix subflow. */
+ ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
+ items, ext_actions ? ext_actions :
+ actions, flow_split_info, error);
+ if (ret < 0)
+ goto exit;
+ MLX5_ASSERT(dev_flow);
+ if (qrss) {
+ const struct rte_flow_attr q_attr = {
+ .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
+ .ingress = 1,
+ };
+ /* Internal PMD action to set register. */
+ struct mlx5_rte_flow_item_tag q_tag_spec = {
+ .data = qrss_id,
+ .id = REG_NON,
+ };
+ struct rte_flow_item q_items[] = {
+ {
+ .type = (enum rte_flow_item_type)
+ MLX5_RTE_FLOW_ITEM_TYPE_TAG,
+ .spec = &q_tag_spec,
+ .last = NULL,
+ .mask = NULL,
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ struct rte_flow_action q_actions[] = {
+ {
+ .type = qrss->type,
+ .conf = qrss->conf,
+ },
+ {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
+ uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
+
+ /*
+ * Configure the tag item only if there is no meter subflow.
+ * Since tag is already marked in the meter suffix subflow
+ * we can just use the meter suffix items as is.
+ */
+ if (qrss_id) {
+ /* Not meter subflow. */
+ MLX5_ASSERT(!mtr_sfx);
+ /*
+ * Put unique id in prefix flow due to it is destroyed
+ * after suffix flow and id will be freed after there
+ * is no actual flows with this id and identifier
+ * reallocation becomes possible (for example, for
+ * other flows in other threads).
+ */
+ dev_flow->handle->split_flow_id = qrss_id;
+ ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
+ error);
+ if (ret < 0)
+ goto exit;
+ q_tag_spec.id = ret;
+ }
+ dev_flow = NULL;
+ /* Add suffix subflow to execute Q/RSS. */
+ flow_split_info->prefix_layers = layers;
+ flow_split_info->prefix_mark = 0;
+ ret = flow_create_split_inner(dev, flow, &dev_flow,
+ &q_attr, mtr_sfx ? items :
+ q_items, q_actions,
+ flow_split_info, error);
+ if (ret < 0)
+ goto exit;
+ /* qrss ID should be freed if failed. */
+ qrss_id = 0;
+ MLX5_ASSERT(dev_flow);
+ }
+
+exit:
+ /*
+ * We do not destroy the partially created sub_flows in case of error.
+ * These ones are included into parent flow list and will be destroyed
+ * by flow_drv_destroy.
+ */
+ mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
+ qrss_id);
+ mlx5_free(ext_actions);
+ return ret;
+}
+
+/**
+ * The splitting for meter feature.
+ *
+ * - The meter flow will be split to two flows as prefix and
+ * suffix flow. The packets make sense only it pass the prefix
+ * meter action.
+ *
+ * - Reg_C_5 is used for the packet to match betweend prefix and
+ * suffix flow.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param[in] flow
+ * Parent flow structure pointer.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[in] flow_split_info
+ * Pointer to flow split info structure.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ * @return
+ * 0 on success, negative value otherwise
+ */
+static int
+flow_create_split_meter(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct mlx5_flow_split_info *flow_split_info,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct rte_flow_action *sfx_actions = NULL;
+ struct rte_flow_action *pre_actions = NULL;
+ struct rte_flow_item *sfx_items = NULL;
+ struct mlx5_flow *dev_flow = NULL;
+ struct rte_flow_attr sfx_attr = *attr;
+ uint32_t mtr = 0;
+ uint32_t mtr_tag_id = 0;
+ size_t act_size;
+ size_t item_size;
+ int actions_n = 0;
+ int ret;
+
+ if (priv->mtr_en)
+ actions_n = flow_check_meter_action(actions, &mtr);
+ if (mtr) {
+ /* The five prefix actions: meter, decap, encap, tag, end. */
+ act_size = sizeof(struct rte_flow_action) * (actions_n + 5) +
+ sizeof(struct mlx5_rte_flow_action_set_tag);
+ /* tag, vlan, port id, end. */
+#define METER_SUFFIX_ITEM 4
+ item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
+ sizeof(struct mlx5_rte_flow_item_tag) * 2;
+ sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
+ 0, SOCKET_ID_ANY);
+ if (!sfx_actions)
+ return rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, "no memory to split "
+ "meter flow");
+ sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
+ act_size);
+ pre_actions = sfx_actions + actions_n;
+ mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items,
+ actions, sfx_actions,
+ pre_actions);
+ if (!mtr_tag_id) {
+ ret = -rte_errno;
+ goto exit;
+ }
+ /* Add the prefix subflow. */
+ flow_split_info->prefix_mark = 0;
+ ret = flow_create_split_inner(dev, flow, &dev_flow,
+ attr, items, pre_actions,
+ flow_split_info, error);
+ if (ret) {
+ ret = -rte_errno;
+ goto exit;
+ }
+ dev_flow->handle->split_flow_id = mtr_tag_id;
+ /* Setting the sfx group atrr. */
+ sfx_attr.group = sfx_attr.transfer ?
+ (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
+ MLX5_FLOW_TABLE_LEVEL_SUFFIX;
+ flow_split_info->prefix_layers =
+ flow_get_prefix_layer_flags(dev_flow);
+ flow_split_info->prefix_mark = dev_flow->handle->mark;
+ }
+ /* Add the prefix subflow. */
+ ret = flow_create_split_metadata(dev, flow,
+ &sfx_attr, sfx_items ?
+ sfx_items : items,
+ sfx_actions ? sfx_actions : actions,
+ flow_split_info, error);
+exit:
+ if (sfx_actions)
+ mlx5_free(sfx_actions);
+ return ret;
+}
+
+/**
+ * The splitting for sample feature.
+ *
+ * Once Sample action is detected in the action list, the flow actions should
+ * be split into prefix sub flow and suffix sub flow.
+ *
+ * The original items remain in the prefix sub flow, all actions preceding the
+ * sample action and the sample action itself will be copied to the prefix
+ * sub flow, the actions following the sample action will be copied to the
+ * suffix sub flow, Queue action always be located in the suffix sub flow.
+ *
+ * In order to make the packet from prefix sub flow matches with suffix sub
+ * flow, an extra tag action be added into prefix sub flow, and the suffix sub
+ * flow uses tag item with the unique flow id.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param[in] flow
+ * Parent flow structure pointer.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[in] flow_split_info
+ * Pointer to flow split info structure.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ * @return
+ * 0 on success, negative value otherwise
+ */
+static int
+flow_create_split_sample(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct mlx5_flow_split_info *flow_split_info,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct rte_flow_action *sfx_actions = NULL;
+ struct rte_flow_action *pre_actions = NULL;
+ struct rte_flow_item *sfx_items = NULL;
+ struct mlx5_flow *dev_flow = NULL;
+ struct rte_flow_attr sfx_attr = *attr;
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ struct mlx5_flow_dv_sample_resource *sample_res;
+ struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
+ struct mlx5_flow_tbl_resource *sfx_tbl;
+#endif
+ size_t act_size;
+ size_t item_size;
+ uint32_t fdb_tx = 0;
+ int32_t tag_id = 0;
+ int actions_n = 0;
+ int sample_action_pos;
+ int qrss_action_pos;
+ int add_tag = 0;
+ int modify_after_mirror = 0;
+ uint16_t jump_table = 0;
+ const uint32_t next_ft_step = 1;
+ int ret = 0;
+
+ if (priv->sampler_en)
+ actions_n = flow_check_match_action(actions, attr,
+ RTE_FLOW_ACTION_TYPE_SAMPLE,
+ &sample_action_pos, &qrss_action_pos,
+ &modify_after_mirror);
+ if (actions_n) {
+ /* The prefix actions must includes sample, tag, end. */
+ act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
+ + sizeof(struct mlx5_rte_flow_action_set_tag);
+ item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
+ sizeof(struct mlx5_rte_flow_item_tag) * 2;
+ sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
+ item_size), 0, SOCKET_ID_ANY);
+ if (!sfx_actions)
+ return rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, "no memory to split "
+ "sample flow");
+ /* The representor_id is -1 for uplink. */
+ fdb_tx = (attr->transfer && priv->representor_id != -1);
+ /*
+ * When reg_c_preserve is set, metadata registers Cx preserve
+ * their value even through packet duplication.
+ */
+ add_tag = (!fdb_tx || priv->config.hca_attr.reg_c_preserve);
+ if (add_tag)
+ sfx_items = (struct rte_flow_item *)((char *)sfx_actions
+ + act_size);
+ if (modify_after_mirror)
+ jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
+ next_ft_step;
+ pre_actions = sfx_actions + actions_n;
+ tag_id = flow_sample_split_prep(dev, add_tag, sfx_items,
+ actions, sfx_actions,
+ pre_actions, actions_n,
+ sample_action_pos,
+ qrss_action_pos, jump_table,
+ error);
+ if (tag_id < 0 || (add_tag && !tag_id)) {
+ ret = -rte_errno;
+ goto exit;
+ }
+ if (modify_after_mirror)
+ flow_split_info->skip_scale =
+ 1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
+ /* Add the prefix subflow. */
+ ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
+ items, pre_actions,
+ flow_split_info, error);
+ if (ret) {
+ ret = -rte_errno;
+ goto exit;
+ }
+ dev_flow->handle->split_flow_id = tag_id;
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ if (!modify_after_mirror) {
+ /* Set the sfx group attr. */
+ sample_res = (struct mlx5_flow_dv_sample_resource *)
+ dev_flow->dv.sample_res;
+ sfx_tbl = (struct mlx5_flow_tbl_resource *)
+ sample_res->normal_path_tbl;
+ sfx_tbl_data = container_of(sfx_tbl,
+ struct mlx5_flow_tbl_data_entry,
+ tbl);
+ sfx_attr.group = sfx_attr.transfer ?
+ (sfx_tbl_data->table_id - 1) :
+ sfx_tbl_data->table_id;
+ } else {
+ MLX5_ASSERT(attr->transfer);
+ sfx_attr.group = jump_table;
+ }
+ flow_split_info->prefix_layers =
+ flow_get_prefix_layer_flags(dev_flow);
+ flow_split_info->prefix_mark = dev_flow->handle->mark;
+ /* Suffix group level already be scaled with factor, set
+ * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
+ * again in translation.
+ */
+ flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
+#endif
+ }
+ /* Add the suffix subflow. */
+ ret = flow_create_split_meter(dev, flow, &sfx_attr,
+ sfx_items ? sfx_items : items,
+ sfx_actions ? sfx_actions : actions,
+ flow_split_info, error);
+exit:
+ if (sfx_actions)
+ mlx5_free(sfx_actions);
+ return ret;
+}
+
+/**
+ * Split the flow to subflow set. The splitters might be linked
+ * in the chain, like this:
+ * flow_create_split_outer() calls:
+ * flow_create_split_meter() calls:
+ * flow_create_split_metadata(meter_subflow_0) calls:
+ * flow_create_split_inner(metadata_subflow_0)
+ * flow_create_split_inner(metadata_subflow_1)
+ * flow_create_split_inner(metadata_subflow_2)
+ * flow_create_split_metadata(meter_subflow_1) calls:
+ * flow_create_split_inner(metadata_subflow_0)
+ * flow_create_split_inner(metadata_subflow_1)
+ * flow_create_split_inner(metadata_subflow_2)
+ *
+ * This provide flexible way to add new levels of flow splitting.
+ * The all of successfully created subflows are included to the
+ * parent flow dev_flow list.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param[in] flow
+ * Parent flow structure pointer.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[in] flow_split_info
+ * Pointer to flow split info structure.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ * @return
+ * 0 on success, negative value otherwise
+ */
+static int
+flow_create_split_outer(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct mlx5_flow_split_info *flow_split_info,
+ struct rte_flow_error *error)
+{
+ int ret;
+
+ ret = flow_create_split_sample(dev, flow, attr, items,
+ actions, flow_split_info, error);
+ MLX5_ASSERT(ret <= 0);
+ return ret;
+}
+
+static struct mlx5_flow_tunnel *
+flow_tunnel_from_rule(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[])
+{
+ struct mlx5_flow_tunnel *tunnel;
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+ if (is_flow_tunnel_match_rule(dev, attr, items, actions))
+ tunnel = (struct mlx5_flow_tunnel *)items[0].spec;
+ else if (is_flow_tunnel_steer_rule(dev, attr, items, actions))
+ tunnel = (struct mlx5_flow_tunnel *)actions[0].conf;
+ else
+ tunnel = NULL;
+#pragma GCC diagnostic pop
+
+ return tunnel;
+}
+
+/**
+ * Adjust flow RSS workspace if needed.
+ *
+ * @param wks
+ * Pointer to thread flow work space.
+ * @param rss_desc
+ * Pointer to RSS descriptor.
+ * @param[in] nrssq_num
+ * New RSS queue number.
+ *
+ * @return
+ * 0 on success, -1 otherwise and rte_errno is set.
+ */
+static int
+flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
+ struct mlx5_flow_rss_desc *rss_desc,
+ uint32_t nrssq_num)
+{
+ if (likely(nrssq_num <= wks->rssq_num))
+ return 0;
+ rss_desc->queue = realloc(rss_desc->queue,
+ sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2));
+ if (!rss_desc->queue) {
+ rte_errno = ENOMEM;
+ return -1;
+ }
+ wks->rssq_num = RTE_ALIGN(nrssq_num, 2);
+ return 0;
+}
+
+/**
+ * Create a flow and add it to @p list.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param list
+ * Pointer to a TAILQ flow list. If this parameter NULL,
+ * no list insertion occurred, flow is just created,
+ * this is caller's responsibility to track the
+ * created flow.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[in] external
+ * This flow rule is created by request external to PMD.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * A flow index on success, 0 otherwise and rte_errno is set.
+ */
+static uint32_t
+flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action original_actions[],
+ bool external, struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct rte_flow *flow = NULL;
+ struct mlx5_flow *dev_flow;
+ const struct rte_flow_action_rss *rss = NULL;
+ struct mlx5_translated_shared_action
+ shared_actions[MLX5_MAX_SHARED_ACTIONS];
+ int shared_actions_n = MLX5_MAX_SHARED_ACTIONS;
+ union {
+ struct mlx5_flow_expand_rss buf;
+ uint8_t buffer[2048];
+ } expand_buffer;
+ union {
+ struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
+ uint8_t buffer[2048];
+ } actions_rx;
+ union {
+ struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
+ uint8_t buffer[2048];
+ } actions_hairpin_tx;
+ union {
+ struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
+ uint8_t buffer[2048];
+ } items_tx;
+ struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
+ struct mlx5_flow_rss_desc *rss_desc;
+ const struct rte_flow_action *p_actions_rx;
+ uint32_t i;
+ uint32_t idx = 0;
+ int hairpin_flow;
+ struct rte_flow_attr attr_tx = { .priority = 0 };
+ const struct rte_flow_action *actions;
+ struct rte_flow_action *translated_actions = NULL;
+ struct mlx5_flow_tunnel *tunnel;
+ struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
+ struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
+ struct mlx5_flow_split_info flow_split_info = {
+ .external = !!external,
+ .skip_scale = 0,
+ .flow_idx = 0,
+ .prefix_mark = 0,
+ .prefix_layers = 0
+ };
+ int ret;
+
+ MLX5_ASSERT(wks);
+ rss_desc = &wks->rss_desc;
+ ret = flow_shared_actions_translate(dev, original_actions,
+ shared_actions,
+ &shared_actions_n,
+ &translated_actions, error);
+ if (ret < 0) {
+ MLX5_ASSERT(translated_actions == NULL);
+ return 0;
+ }
+ actions = translated_actions ? translated_actions : original_actions;
+ p_actions_rx = actions;
+ hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
+ ret = flow_drv_validate(dev, attr, items, p_actions_rx,
+ external, hairpin_flow, error);
+ if (ret < 0)
+ goto error_before_hairpin_split;
+ flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx);
+ if (!flow) {
+ rte_errno = ENOMEM;
+ goto error_before_hairpin_split;
+ }
+ if (hairpin_flow > 0) {
+ if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
+ rte_errno = EINVAL;
+ goto error_before_hairpin_split;
+ }
+ flow_hairpin_split(dev, actions, actions_rx.actions,
+ actions_hairpin_tx.actions, items_tx.items,
+ idx);
+ p_actions_rx = actions_rx.actions;
+ }
+ flow_split_info.flow_idx = idx;
+ flow->drv_type = flow_get_drv_type(dev, attr);
+ MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
+ flow->drv_type < MLX5_FLOW_TYPE_MAX);
+ memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
+ /* RSS Action only works on NIC RX domain */
+ if (attr->ingress && !attr->transfer)
+ rss = flow_get_rss_action(p_actions_rx);
+ if (rss) {
+ if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
+ return 0;
+ /*
+ * The following information is required by
+ * mlx5_flow_hashfields_adjust() in advance.
+ */
+ rss_desc->level = rss->level;
+ /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
+ rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types;
+ }
+ flow->dev_handles = 0;
+ if (rss && rss->types) {
+ unsigned int graph_root;
+
+ graph_root = find_graph_root(items, rss->level);
+ ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
+ items, rss->types,
+ mlx5_support_expansion, graph_root);
+ MLX5_ASSERT(ret > 0 &&
+ (unsigned int)ret < sizeof(expand_buffer.buffer));
+ } else {
+ buf->entries = 1;
+ buf->entry[0].pattern = (void *)(uintptr_t)items;
+ }
+ rss_desc->shared_rss = flow_get_shared_rss_action(dev, shared_actions,
+ shared_actions_n);
+ for (i = 0; i < buf->entries; ++i) {
+ /* Initialize flow split data. */
+ flow_split_info.prefix_layers = 0;
+ flow_split_info.prefix_mark = 0;
+ flow_split_info.skip_scale = 0;
+ /*
+ * The splitter may create multiple dev_flows,
+ * depending on configuration. In the simplest
+ * case it just creates unmodified original flow.
+ */
+ ret = flow_create_split_outer(dev, flow, attr,
+ buf->entry[i].pattern,
+ p_actions_rx, &flow_split_info,
+ error);
+ if (ret < 0)
+ goto error;
+ if (is_flow_tunnel_steer_rule(dev, attr,
+ buf->entry[i].pattern,
+ p_actions_rx)) {
+ ret = flow_tunnel_add_default_miss(dev, flow, attr,
+ p_actions_rx,
+ idx,
+ &default_miss_ctx,
+ error);
+ if (ret < 0) {
+ mlx5_free(default_miss_ctx.queue);
+ goto error;
+ }
+ }
+ }
+ /* Create the tx flow. */
+ if (hairpin_flow) {
+ attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
+ attr_tx.ingress = 0;
+ attr_tx.egress = 1;
+ dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
+ actions_hairpin_tx.actions,
+ idx, error);
+ if (!dev_flow)
+ goto error;
+ dev_flow->flow = flow;
+ dev_flow->external = 0;
+ SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
+ dev_flow->handle, next);
+ ret = flow_drv_translate(dev, dev_flow, &attr_tx,
+ items_tx.items,
+ actions_hairpin_tx.actions, error);
+ if (ret < 0)
+ goto error;
+ }
+ /*
+ * Update the metadata register copy table. If extensive
+ * metadata feature is enabled and registers are supported
+ * we might create the extra rte_flow for each unique
+ * MARK/FLAG action ID.
+ *
+ * The table is updated for ingress Flows only, because
+ * the egress Flows belong to the different device and
+ * copy table should be updated in peer NIC Rx domain.
+ */
+ if (attr->ingress &&
+ (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
+ ret = flow_mreg_update_copy_table(dev, flow, actions, error);
+ if (ret)
+ goto error;
+ }
+ /*
+ * If the flow is external (from application) OR device is started,
+ * OR mreg discover, then apply immediately.
+ */
+ if (external || dev->data->dev_started ||
+ (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
+ attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
+ ret = flow_drv_apply(dev, flow, error);
+ if (ret < 0)
+ goto error;
+ }
+ if (list) {
+ rte_spinlock_lock(&priv->flow_list_lock);
+ ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx,
+ flow, next);
+ rte_spinlock_unlock(&priv->flow_list_lock);
+ }
+ flow_rxq_flags_set(dev, flow);
+ rte_free(translated_actions);
+ tunnel = flow_tunnel_from_rule(dev, attr, items, actions);
+ if (tunnel) {
+ flow->tunnel = 1;
+ flow->tunnel_id = tunnel->tunnel_id;
+ __atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
+ mlx5_free(default_miss_ctx.queue);
+ }
+ mlx5_flow_pop_thread_workspace();
+ return idx;
+error:
+ MLX5_ASSERT(flow);
+ ret = rte_errno; /* Save rte_errno before cleanup. */
+ flow_mreg_del_copy_action(dev, flow);
+ flow_drv_destroy(dev, flow);
+ if (rss_desc->shared_rss)
+ __atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
+ mlx5_ipool_get
+ (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
+ rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
+ mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx);
+ rte_errno = ret; /* Restore rte_errno. */
+ ret = rte_errno;
+ rte_errno = ret;
+ mlx5_flow_pop_thread_workspace();
+error_before_hairpin_split:
+ rte_free(translated_actions);
+ return 0;
+}
+
+/**
+ * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
+ * incoming packets to table 1.
+ *
+ * Other flow rules, requested for group n, will be created in
+ * e-switch table n+1.
+ * Jump action to e-switch group n will be created to group n+1.
+ *
+ * Used when working in switchdev mode, to utilise advantages of table 1
+ * and above.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ *
+ * @return
+ * Pointer to flow on success, NULL otherwise and rte_errno is set.
+ */
+struct rte_flow *
+mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
+{
+ const struct rte_flow_attr attr = {
+ .group = 0,
+ .priority = 0,
+ .ingress = 1,
+ .egress = 0,
+ .transfer = 1,
+ };
+ const struct rte_flow_item pattern = {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ };
+ struct rte_flow_action_jump jump = {
+ .group = 1,
+ };
+ const struct rte_flow_action actions[] = {
+ {
+ .type = RTE_FLOW_ACTION_TYPE_JUMP,
+ .conf = &jump,
+ },
+ {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct rte_flow_error error;
+
+ return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows,
+ &attr, &pattern,
+ actions, false, &error);
+}
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @see rte_flow_validate()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action original_actions[],
+ struct rte_flow_error *error)
+{
+ int hairpin_flow;
+ struct mlx5_translated_shared_action
+ shared_actions[MLX5_MAX_SHARED_ACTIONS];
+ int shared_actions_n = MLX5_MAX_SHARED_ACTIONS;
+ const struct rte_flow_action *actions;
+ struct rte_flow_action *translated_actions = NULL;
+ int ret = flow_shared_actions_translate(dev, original_actions,
+ shared_actions,
+ &shared_actions_n,
+ &translated_actions, error);
+
+ if (ret)
+ return ret;
+ actions = translated_actions ? translated_actions : original_actions;
+ hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
+ ret = flow_drv_validate(dev, attr, items, actions,
+ true, hairpin_flow, error);
+ rte_free(translated_actions);
+ return ret;
+}
+
+/**
+ * Create a flow.
+ *
+ * @see rte_flow_create()
+ * @see rte_flow_ops
+ */
+struct rte_flow *
+mlx5_flow_create(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ /*
+ * If the device is not started yet, it is not allowed to created a
+ * flow from application. PMD default flows and traffic control flows
+ * are not affected.
+ */
+ if (unlikely(!dev->data->dev_started)) {
+ DRV_LOG(DEBUG, "port %u is not started when "
+ "inserting a flow", dev->data->port_id);
+ rte_flow_error_set(error, ENODEV,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "port not started");
+ return NULL;
+ }
+
+ return (void *)(uintptr_t)flow_list_create(dev, &priv->flows,
+ attr, items, actions, true, error);
+}
+
+/**
+ * Destroy a flow in a list.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param list
+ * Pointer to the Indexed flow list. If this parameter NULL,
+ * there is no flow removal from the list. Be noted that as
+ * flow is add to the indexed list, memory of the indexed
+ * list points to maybe changed as flow destroyed.
+ * @param[in] flow_idx
+ * Index of flow to destroy.
+ */
+static void
+flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
+ uint32_t flow_idx)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
+ [MLX5_IPOOL_RTE_FLOW], flow_idx);
+
+ if (!flow)
+ return;
+ /*
+ * Update RX queue flags only if port is started, otherwise it is
+ * already clean.
+ */
+ if (dev->data->dev_started)
+ flow_rxq_flags_trim(dev, flow);
+ flow_drv_destroy(dev, flow);
+ if (list) {
+ rte_spinlock_lock(&priv->flow_list_lock);
+ ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list,
+ flow_idx, flow, next);
+ rte_spinlock_unlock(&priv->flow_list_lock);
+ }
+ if (flow->tunnel) {
+ struct mlx5_flow_tunnel *tunnel;
+
+ tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
+ RTE_VERIFY(tunnel);
+ if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
+ mlx5_flow_tunnel_free(dev, tunnel);
+ }
+ flow_mreg_del_copy_action(dev, flow);
+ mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param list
+ * Pointer to the Indexed flow list.
+ * @param active
+ * If flushing is called avtively.
+ */
+void
+mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active)
+{
+ uint32_t num_flushed = 0;
+
+ while (*list) {
+ flow_list_destroy(dev, list, *list);
+ num_flushed++;
+ }
+ if (active) {
+ DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
+ dev->data->port_id, num_flushed);
+ }
+}
+
+/**
+ * Stop all default actions for flows.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ */
+void
+mlx5_flow_stop_default(struct rte_eth_dev *dev)
+{
+ flow_mreg_del_default_copy_action(dev);
+ flow_rxq_flags_clear(dev);
+}
+
+/**
+ * Start all default actions for flows.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_start_default(struct rte_eth_dev *dev)
+{
+ struct rte_flow_error error;
+
+ /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
+ return flow_mreg_add_default_copy_action(dev, &error);
+}
+
+/**
+ * Release key of thread specific flow workspace data.
+ */
+void
+flow_release_workspace(void *data)
+{
+ struct mlx5_flow_workspace *wks = data;
+ struct mlx5_flow_workspace *next;
+
+ while (wks) {
+ next = wks->next;
+ free(wks->rss_desc.queue);
+ free(wks);
+ wks = next;
+ }
+}
+
+/**
+ * Get thread specific current flow workspace.
+ *
+ * @return pointer to thread specific flow workspace data, NULL on error.
+ */
+struct mlx5_flow_workspace*
+mlx5_flow_get_thread_workspace(void)
+{
+ struct mlx5_flow_workspace *data;
+
+ data = mlx5_flow_os_get_specific_workspace();
+ MLX5_ASSERT(data && data->inuse);
+ if (!data || !data->inuse)
+ DRV_LOG(ERR, "flow workspace not initialized.");
+ return data;
+}
+
+/**
+ * Allocate and init new flow workspace.
+ *
+ * @return pointer to flow workspace data, NULL on error.
+ */
+static struct mlx5_flow_workspace*
+flow_alloc_thread_workspace(void)
+{
+ struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
+
+ if (!data) {
+ DRV_LOG(ERR, "Failed to allocate flow workspace "
+ "memory.");
+ return NULL;
+ }
+ data->rss_desc.queue = calloc(1,
+ sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
+ if (!data->rss_desc.queue)
+ goto err;
+ data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
+ return data;
+err:
+ if (data->rss_desc.queue)
+ free(data->rss_desc.queue);
+ free(data);
+ return NULL;
+}
+
+/**
+ * Get new thread specific flow workspace.
+ *
+ * If current workspace inuse, create new one and set as current.
+ *
+ * @return pointer to thread specific flow workspace data, NULL on error.
+ */
+static struct mlx5_flow_workspace*
+mlx5_flow_push_thread_workspace(void)
+{
+ struct mlx5_flow_workspace *curr;
+ struct mlx5_flow_workspace *data;
+
+ curr = mlx5_flow_os_get_specific_workspace();
+ if (!curr) {
+ data = flow_alloc_thread_workspace();
+ if (!data)
+ return NULL;
+ } else if (!curr->inuse) {
+ data = curr;
+ } else if (curr->next) {
+ data = curr->next;
+ } else {
+ data = flow_alloc_thread_workspace();
+ if (!data)
+ return NULL;
+ curr->next = data;
+ data->prev = curr;
+ }
+ data->inuse = 1;
+ data->flow_idx = 0;
+ /* Set as current workspace */
+ if (mlx5_flow_os_set_specific_workspace(data))
+ DRV_LOG(ERR, "Failed to set flow workspace to thread.");
+ return data;
+}
+
+/**
+ * Close current thread specific flow workspace.
+ *
+ * If previous workspace available, set it as current.
+ *
+ * @return pointer to thread specific flow workspace data, NULL on error.
+ */
+static void
+mlx5_flow_pop_thread_workspace(void)
+{
+ struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
+
+ if (!data)
+ return;
+ if (!data->inuse) {
+ DRV_LOG(ERR, "Failed to close unused flow workspace.");
+ return;
+ }
+ data->inuse = 0;
+ if (!data->prev)
+ return;
+ if (mlx5_flow_os_set_specific_workspace(data->prev))
+ DRV_LOG(ERR, "Failed to set flow workspace to thread.");
+}
+
+/**
+ * Verify the flow list is empty
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ *
+ * @return the number of flows not released.
+ */
+int
+mlx5_flow_verify(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct rte_flow *flow;
+ uint32_t idx;
+ int ret = 0;
+
+ ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx,
+ flow, next) {
+ DRV_LOG(DEBUG, "port %u flow %p still referenced",
+ dev->data->port_id, (void *)flow);
+ ++ret;
+ }
+ return ret;
+}
+
+/**
+ * Enable default hairpin egress flow.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param queue
+ * The queue index.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
+ uint32_t queue)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ const struct rte_flow_attr attr = {
+ .egress = 1,
+ .priority = 0,
+ };
+ struct mlx5_rte_flow_item_tx_queue queue_spec = {
+ .queue = queue,
+ };
+ struct mlx5_rte_flow_item_tx_queue queue_mask = {
+ .queue = UINT32_MAX,
+ };
+ struct rte_flow_item items[] = {
+ {
+ .type = (enum rte_flow_item_type)
+ MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
+ .spec = &queue_spec,
+ .last = NULL,
+ .mask = &queue_mask,
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ struct rte_flow_action_jump jump = {
+ .group = MLX5_HAIRPIN_TX_TABLE,
+ };
+ struct rte_flow_action actions[2];
+ uint32_t flow_idx;
+ struct rte_flow_error error;
+
+ actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
+ actions[0].conf = &jump;
+ actions[1].type = RTE_FLOW_ACTION_TYPE_END;
+ flow_idx = flow_list_create(dev, &priv->ctrl_flows,
+ &attr, items, actions, false, &error);
+ if (!flow_idx) {
+ DRV_LOG(DEBUG,
+ "Failed to create ctrl flow: rte_errno(%d),"
+ " type(%d), message(%s)",
+ rte_errno, error.type,
+ error.message ? error.message : " (no stated reason)");
+ return -rte_errno;
+ }
+ return 0;
+}
+
+/**
+ * Enable a control flow configured from the control plane.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param eth_spec
+ * An Ethernet flow spec to apply.
+ * @param eth_mask
+ * An Ethernet flow mask to apply.
+ * @param vlan_spec
+ * A VLAN flow spec to apply.
+ * @param vlan_mask
+ * A VLAN flow mask to apply.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
+ struct rte_flow_item_eth *eth_spec,
+ struct rte_flow_item_eth *eth_mask,
+ struct rte_flow_item_vlan *vlan_spec,
+ struct rte_flow_item_vlan *vlan_mask)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ const struct rte_flow_attr attr = {
+ .ingress = 1,
+ .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
+ };
+ struct rte_flow_item items[] = {
+ {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .spec = eth_spec,
+ .last = NULL,
+ .mask = eth_mask,
+ },
+ {
+ .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
+ RTE_FLOW_ITEM_TYPE_END,
+ .spec = vlan_spec,
+ .last = NULL,
+ .mask = vlan_mask,
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ uint16_t queue[priv->reta_idx_n];
+ struct rte_flow_action_rss action_rss = {
+ .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+ .level = 0,
+ .types = priv->rss_conf.rss_hf,
+ .key_len = priv->rss_conf.rss_key_len,
+ .queue_num = priv->reta_idx_n,
+ .key = priv->rss_conf.rss_key,
+ .queue = queue,
+ };
+ struct rte_flow_action actions[] = {
+ {
+ .type = RTE_FLOW_ACTION_TYPE_RSS,
+ .conf = &action_rss,
+ },
+ {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
+ uint32_t flow_idx;
+ struct rte_flow_error error;
+ unsigned int i;
+
+ if (!priv->reta_idx_n || !priv->rxqs_n) {
+ return 0;
+ }
+ if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
+ action_rss.types = 0;
+ for (i = 0; i != priv->reta_idx_n; ++i)
+ queue[i] = (*priv->reta_idx)[i];
+ flow_idx = flow_list_create(dev, &priv->ctrl_flows,
+ &attr, items, actions, false, &error);
+ if (!flow_idx)
+ return -rte_errno;
+ return 0;
+}
+
+/**
+ * Enable a flow control configured from the control plane.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param eth_spec
+ * An Ethernet flow spec to apply.
+ * @param eth_mask
+ * An Ethernet flow mask to apply.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_ctrl_flow(struct rte_eth_dev *dev,
+ struct rte_flow_item_eth *eth_spec,
+ struct rte_flow_item_eth *eth_mask)
+{
+ return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
+}
+
+/**
+ * Create default miss flow rule matching lacp traffic
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param eth_spec
+ * An Ethernet flow spec to apply.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ /*
+ * The LACP matching is done by only using ether type since using
+ * a multicast dst mac causes kernel to give low priority to this flow.
+ */
+ static const struct rte_flow_item_eth lacp_spec = {
+ .type = RTE_BE16(0x8809),
+ };
+ static const struct rte_flow_item_eth lacp_mask = {
+ .type = 0xffff,
+ };
+ const struct rte_flow_attr attr = {
+ .ingress = 1,
+ };
+ struct rte_flow_item items[] = {
+ {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .spec = &lacp_spec,
+ .mask = &lacp_mask,
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ struct rte_flow_action actions[] = {
+ {
+ .type = (enum rte_flow_action_type)
+ MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
+ },
+ {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
+ struct rte_flow_error error;
+ uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows,
+ &attr, items, actions, false, &error);
+
+ if (!flow_idx)
+ return -rte_errno;
+ return 0;
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_destroy(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ struct rte_flow_error *error __rte_unused)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow);
+ return 0;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @see rte_flow_flush()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error __rte_unused)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ mlx5_flow_list_flush(dev, &priv->flows, false);
+ return 0;
+}
+
+/**
+ * Isolated mode.
+ *
+ * @see rte_flow_isolate()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_isolate(struct rte_eth_dev *dev,
+ int enable,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ if (dev->data->dev_started) {
+ rte_flow_error_set(error, EBUSY,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "port must be stopped first");
+ return -rte_errno;
+ }
+ priv->isolated = !!enable;
+ if (enable)
+ dev->dev_ops = &mlx5_dev_ops_isolate;
+ else
+ dev->dev_ops = &mlx5_dev_ops;
+
+ dev->rx_descriptor_status = mlx5_rx_descriptor_status;
+ dev->tx_descriptor_status = mlx5_tx_descriptor_status;
+
+ return 0;
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_drv_query(struct rte_eth_dev *dev,
+ uint32_t flow_idx,
+ const struct rte_flow_action *actions,
+ void *data,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
+ [MLX5_IPOOL_RTE_FLOW],
+ flow_idx);
+ enum mlx5_flow_drv_type ftype;
+
+ if (!flow) {
+ return rte_flow_error_set(error, ENOENT,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "invalid flow handle");
+ }
+ ftype = flow->drv_type;
+ MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
+ fops = flow_get_drv_ops(ftype);
+
+ return fops->query(dev, flow, actions, data, error);
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_action *actions,
+ void *data,
+ struct rte_flow_error *error)
+{
+ int ret;
+
+ ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
+ error);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+/**
+ * Get rte_flow callbacks.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param ops
+ * Pointer to operation-specific structure.
+ *
+ * @return 0
+ */
+int
+mlx5_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
+ const struct rte_flow_ops **ops)
+{
+ *ops = &mlx5_flow_ops;
+ return 0;
+}
+
+/**
+ * Create the needed meter and suffix tables.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in] fm
+ * Pointer to the flow meter.
+ *
+ * @return
+ * Pointer to table set on success, NULL otherwise.
+ */
+struct mlx5_meter_domains_infos *
+mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
+ const struct mlx5_flow_meter *fm)
+{
+ const struct mlx5_flow_driver_ops *fops;
+
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->create_mtr_tbls(dev, fm);
+}
+
+/**
+ * Destroy the meter table set.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in] tbl
+ * Pointer to the meter table set.
+ *
+ * @return
+ * 0 on success.
+ */
+int
+mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
+ struct mlx5_meter_domains_infos *tbls)
+{
+ const struct mlx5_flow_driver_ops *fops;
+
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->destroy_mtr_tbls(dev, tbls);
+}
+
+/**
+ * Create policer rules.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in] fm
+ * Pointer to flow meter structure.
+ * @param[in] attr
+ * Pointer to flow attributes.
+ *
+ * @return
+ * 0 on success, -1 otherwise.
+ */
+int
+mlx5_flow_create_policer_rules(struct rte_eth_dev *dev,
+ struct mlx5_flow_meter *fm,
+ const struct rte_flow_attr *attr)
+{
+ const struct mlx5_flow_driver_ops *fops;
+
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->create_policer_rules(dev, fm, attr);
+}
+
+/**
+ * Destroy policer rules.
+ *
+ * @param[in] fm
+ * Pointer to flow meter structure.
+ * @param[in] attr
+ * Pointer to flow attributes.
+ *
+ * @return
+ * 0 on success, -1 otherwise.
+ */
+int
+mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev,
+ struct mlx5_flow_meter *fm,
+ const struct rte_flow_attr *attr)
+{
+ const struct mlx5_flow_driver_ops *fops;
+
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->destroy_policer_rules(dev, fm, attr);
+}
+
+/**
+ * Allocate a counter.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device structure.
+ *
+ * @return
+ * Index to allocated counter on success, 0 otherwise.
+ */
+uint32_t
+mlx5_counter_alloc(struct rte_eth_dev *dev)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->counter_alloc(dev);
+ }
+ DRV_LOG(ERR,
+ "port %u counter allocate is not supported.",
+ dev->data->port_id);
+ return 0;
+}
+
+/**
+ * Free a counter.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device structure.
+ * @param[in] cnt
+ * Index to counter to be free.
+ */
+void
+mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ fops->counter_free(dev, cnt);
+ return;
+ }
+ DRV_LOG(ERR,
+ "port %u counter free is not supported.",
+ dev->data->port_id);
+}
+
+/**
+ * Query counter statistics.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device structure.
+ * @param[in] cnt
+ * Index to counter to query.
+ * @param[in] clear
+ * Set to clear counter statistics.
+ * @param[out] pkts
+ * The counter hits packets number to save.
+ * @param[out] bytes
+ * The counter hits bytes number to save.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise.
+ */
+int
+mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
+ bool clear, uint64_t *pkts, uint64_t *bytes)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };
+
+ if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
+ fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
+ return fops->counter_query(dev, cnt, clear, pkts, bytes);
+ }
+ DRV_LOG(ERR,
+ "port %u counter query is not supported.",
+ dev->data->port_id);
+ return -ENOTSUP;
+}
+
+/**
+ * Allocate a new memory for the counter values wrapped by all the needed
+ * management.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise.
+ */
+static int
+mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
+{
+ struct mlx5_devx_mkey_attr mkey_attr;
+ struct mlx5_counter_stats_mem_mng *mem_mng;
+ volatile struct flow_counter_stats *raw_data;
+ int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
+ int size = (sizeof(struct flow_counter_stats) *
+ MLX5_COUNTERS_PER_POOL +
+ sizeof(struct mlx5_counter_stats_raw)) * raws_n +
+ sizeof(struct mlx5_counter_stats_mem_mng);
+ size_t pgsize = rte_mem_page_size();
+ uint8_t *mem;
+ int i;
+
+ if (pgsize == (size_t)-1) {
+ DRV_LOG(ERR, "Failed to get mem page size");
+ rte_errno = ENOMEM;
+ return -ENOMEM;
+ }
+ mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
+ if (!mem) {
+ rte_errno = ENOMEM;
+ return -ENOMEM;
+ }
+ mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
+ size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
+ mem_mng->umem = mlx5_os_umem_reg(sh->ctx, mem, size,
+ IBV_ACCESS_LOCAL_WRITE);
+ if (!mem_mng->umem) {
+ rte_errno = errno;
+ mlx5_free(mem);
+ return -rte_errno;
+ }
+ mkey_attr.addr = (uintptr_t)mem;
+ mkey_attr.size = size;
+ mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem);
+ mkey_attr.pd = sh->pdn;
+ mkey_attr.log_entity_size = 0;
+ mkey_attr.pg_access = 0;
+ mkey_attr.klm_array = NULL;
+ mkey_attr.klm_num = 0;
+ mkey_attr.relaxed_ordering_write = sh->cmng.relaxed_ordering_write;
+ mkey_attr.relaxed_ordering_read = sh->cmng.relaxed_ordering_read;
+ mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
+ if (!mem_mng->dm) {
+ mlx5_os_umem_dereg(mem_mng->umem);
+ rte_errno = errno;
+ mlx5_free(mem);
+ return -rte_errno;
+ }
+ mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
+ raw_data = (volatile struct flow_counter_stats *)mem;
+ for (i = 0; i < raws_n; ++i) {
+ mem_mng->raws[i].mem_mng = mem_mng;
+ mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
+ }
+ for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
+ LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
+ mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
+ next);
+ LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
+ sh->cmng.mem_mng = mem_mng;
+ return 0;
+}
+
+/**
+ * Set the statistic memory to the new counter pool.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] pool
+ * Pointer to the pool to set the statistic memory.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise.
+ */
+static int
+mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
+ struct mlx5_flow_counter_pool *pool)
+{
+ struct mlx5_flow_counter_mng *cmng = &sh->cmng;
+ /* Resize statistic memory once used out. */
+ if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
+ mlx5_flow_create_counter_stat_mem_mng(sh)) {
+ DRV_LOG(ERR, "Cannot resize counter stat mem.");
+ return -1;
+ }
+ rte_spinlock_lock(&pool->sl);
+ pool->raw = cmng->mem_mng->raws + pool->index %
+ MLX5_CNT_CONTAINER_RESIZE;
+ rte_spinlock_unlock(&pool->sl);
+ pool->raw_hw = NULL;
+ return 0;
+}
+
+#define MLX5_POOL_QUERY_FREQ_US 1000000
+
+/**
+ * Set the periodic procedure for triggering asynchronous batch queries for all
+ * the counter pools.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ */
+void
+mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
+{
+ uint32_t pools_n, us;
+
+ pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
+ us = MLX5_POOL_QUERY_FREQ_US / pools_n;
+ DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
+ if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
+ sh->cmng.query_thread_on = 0;
+ DRV_LOG(ERR, "Cannot reinitialize query alarm");
+ } else {
+ sh->cmng.query_thread_on = 1;
+ }
+}
+
+/**
+ * The periodic procedure for triggering asynchronous batch queries for all the
+ * counter pools. This function is probably called by the host thread.
+ *
+ * @param[in] arg
+ * The parameter for the alarm process.
+ */
+void
+mlx5_flow_query_alarm(void *arg)
+{
+ struct mlx5_dev_ctx_shared *sh = arg;
+ int ret;
+ uint16_t pool_index = sh->cmng.pool_index;
+ struct mlx5_flow_counter_mng *cmng = &sh->cmng;
+ struct mlx5_flow_counter_pool *pool;
+ uint16_t n_valid;
+
+ if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
+ goto set_alarm;
+ rte_spinlock_lock(&cmng->pool_update_sl);
+ pool = cmng->pools[pool_index];
+ n_valid = cmng->n_valid;
+ rte_spinlock_unlock(&cmng->pool_update_sl);
+ /* Set the statistic memory to the new created pool. */
+ if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
+ goto set_alarm;
+ if (pool->raw_hw)
+ /* There is a pool query in progress. */
+ goto set_alarm;
+ pool->raw_hw =
+ LIST_FIRST(&sh->cmng.free_stat_raws);
+ if (!pool->raw_hw)
+ /* No free counter statistics raw memory. */
+ goto set_alarm;
+ /*
+ * Identify the counters released between query trigger and query
+ * handle more efficiently. The counter released in this gap period
+ * should wait for a new round of query as the new arrived packets
+ * will not be taken into account.
+ */
+ pool->query_gen++;
+ ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
+ MLX5_COUNTERS_PER_POOL,
+ NULL, NULL,
+ pool->raw_hw->mem_mng->dm->id,
+ (void *)(uintptr_t)
+ pool->raw_hw->data,
+ sh->devx_comp,
+ (uint64_t)(uintptr_t)pool);
+ if (ret) {
+ DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
+ " %d", pool->min_dcs->id);
+ pool->raw_hw = NULL;
+ goto set_alarm;
+ }
+ LIST_REMOVE(pool->raw_hw, next);
+ sh->cmng.pending_queries++;
+ pool_index++;
+ if (pool_index >= n_valid)
+ pool_index = 0;
+set_alarm:
+ sh->cmng.pool_index = pool_index;
+ mlx5_set_query_alarm(sh);
+}
+
+/**
+ * Check and callback event for new aged flow in the counter pool
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] pool
+ * Pointer to Current counter pool.
+ */
+static void
+mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
+ struct mlx5_flow_counter_pool *pool)
+{
+ struct mlx5_priv *priv;
+ struct mlx5_flow_counter *cnt;
+ struct mlx5_age_info *age_info;
+ struct mlx5_age_param *age_param;
+ struct mlx5_counter_stats_raw *cur = pool->raw_hw;
+ struct mlx5_counter_stats_raw *prev = pool->raw;
+ const uint64_t curr_time = MLX5_CURR_TIME_SEC;
+ const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
+ uint16_t expected = AGE_CANDIDATE;
+ uint32_t i;
+
+ pool->time_of_last_age_check = curr_time;
+ for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
+ cnt = MLX5_POOL_GET_CNT(pool, i);
+ age_param = MLX5_CNT_TO_AGE(cnt);
+ if (__atomic_load_n(&age_param->state,
+ __ATOMIC_RELAXED) != AGE_CANDIDATE)
+ continue;
+ if (cur->data[i].hits != prev->data[i].hits) {
+ __atomic_store_n(&age_param->sec_since_last_hit, 0,
+ __ATOMIC_RELAXED);
+ continue;
+ }
+ if (__atomic_add_fetch(&age_param->sec_since_last_hit,
+ time_delta,
+ __ATOMIC_RELAXED) <= age_param->timeout)
+ continue;
+ /**
+ * Hold the lock first, or if between the
+ * state AGE_TMOUT and tailq operation the
+ * release happened, the release procedure
+ * may delete a non-existent tailq node.
+ */
+ priv = rte_eth_devices[age_param->port_id].data->dev_private;
+ age_info = GET_PORT_AGE_INFO(priv);
+ rte_spinlock_lock(&age_info->aged_sl);
+ if (__atomic_compare_exchange_n(&age_param->state, &expected,
+ AGE_TMOUT, false,
+ __ATOMIC_RELAXED,
+ __ATOMIC_RELAXED)) {
+ TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
+ MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
+ }
+ rte_spinlock_unlock(&age_info->aged_sl);
+ }
+ mlx5_age_event_prepare(sh);
+}
+
+/**
+ * Handler for the HW respond about ready values from an asynchronous batch
+ * query. This function is probably called by the host thread.
+ *
+ * @param[in] sh
+ * The pointer to the shared device context.
+ * @param[in] async_id
+ * The Devx async ID.
+ * @param[in] status
+ * The status of the completion.
+ */
+void
+mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
+ uint64_t async_id, int status)
+{
+ struct mlx5_flow_counter_pool *pool =
+ (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
+ struct mlx5_counter_stats_raw *raw_to_free;
+ uint8_t query_gen = pool->query_gen ^ 1;
+ struct mlx5_flow_counter_mng *cmng = &sh->cmng;
+ enum mlx5_counter_type cnt_type =
+ pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
+ MLX5_COUNTER_TYPE_ORIGIN;
+
+ if (unlikely(status)) {
+ raw_to_free = pool->raw_hw;
+ } else {
+ raw_to_free = pool->raw;
+ if (pool->is_aged)
+ mlx5_flow_aging_check(sh, pool);
+ rte_spinlock_lock(&pool->sl);
+ pool->raw = pool->raw_hw;
+ rte_spinlock_unlock(&pool->sl);
+ /* Be sure the new raw counters data is updated in memory. */
+ rte_io_wmb();
+ if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
+ rte_spinlock_lock(&cmng->csl[cnt_type]);
+ TAILQ_CONCAT(&cmng->counters[cnt_type],
+ &pool->counters[query_gen], next);
+ rte_spinlock_unlock(&cmng->csl[cnt_type]);
+ }
+ }
+ LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
+ pool->raw_hw = NULL;
+ sh->cmng.pending_queries--;
+}
+
+static int
+flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
+ const struct flow_grp_info *grp_info,
+ struct rte_flow_error *error)
+{
+ if (grp_info->transfer && grp_info->external &&
+ grp_info->fdb_def_rule) {
+ if (group == UINT32_MAX)
+ return rte_flow_error_set
+ (error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+ NULL,
+ "group index not supported");
+ *table = group + 1;
+ } else {
+ *table = group;
+ }
+ DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
+ return 0;
+}
+
+/**
+ * Translate the rte_flow group index to HW table value.
+ *
+ * If tunnel offload is disabled, all group ids converted to flow table
+ * id using the standard method.
+ * If tunnel offload is enabled, group id can be converted using the
+ * standard or tunnel conversion method. Group conversion method
+ * selection depends on flags in `grp_info` parameter:
+ * - Internal (grp_info.external == 0) groups conversion uses the
+ * standard method.
+ * - Group ids in JUMP action converted with the tunnel conversion.
+ * - Group id in rule attribute conversion depends on a rule type and
+ * group id value:
+ * ** non zero group attributes converted with the tunnel method
+ * ** zero group attribute in non-tunnel rule is converted using the
+ * standard method - there's only one root table
+ * ** zero group attribute in steer tunnel rule is converted with the
+ * standard method - single root table
+ * ** zero group attribute in match tunnel rule is a special OvS
+ * case: that value is used for portability reasons. That group
+ * id is converted with the tunnel conversion method.
+ *
+ * @param[in] dev
+ * Port device
+ * @param[in] tunnel
+ * PMD tunnel offload object
+ * @param[in] group
+ * rte_flow group index value.
+ * @param[out] table
+ * HW table value.
+ * @param[in] grp_info
+ * flags used for conversion
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_group_to_table(struct rte_eth_dev *dev,
+ const struct mlx5_flow_tunnel *tunnel,
+ uint32_t group, uint32_t *table,
+ const struct flow_grp_info *grp_info,
+ struct rte_flow_error *error)
+{
+ int ret;
+ bool standard_translation;
+
+ if (!grp_info->skip_scale && grp_info->external &&
+ group < MLX5_MAX_TABLES_EXTERNAL)
+ group *= MLX5_FLOW_TABLE_FACTOR;
+ if (is_tunnel_offload_active(dev)) {
+ standard_translation = !grp_info->external ||
+ grp_info->std_tbl_fix;
+ } else {
+ standard_translation = true;
+ }
+ DRV_LOG(DEBUG,
+ "port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
+ dev->data->port_id, group, grp_info->transfer,
+ grp_info->external, grp_info->fdb_def_rule,
+ standard_translation ? "STANDARD" : "TUNNEL");
+ if (standard_translation)
+ ret = flow_group_to_table(dev->data->port_id, group, table,
+ grp_info, error);
+ else
+ ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
+ table, error);
+
+ return ret;
+}
+
+/**
+ * Discover availability of metadata reg_c's.
+ *
+ * Iteratively use test flows to check availability.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_dev_config *config = &priv->config;
+ enum modify_reg idx;
+ int n = 0;
+
+ /* reg_c[0] and reg_c[1] are reserved. */
+ config->flow_mreg_c[n++] = REG_C_0;
+ config->flow_mreg_c[n++] = REG_C_1;
+ /* Discover availability of other reg_c's. */
+ for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
+ struct rte_flow_attr attr = {
+ .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
+ .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
+ .ingress = 1,
+ };
+ struct rte_flow_item items[] = {
+ [0] = {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ struct rte_flow_action actions[] = {
+ [0] = {
+ .type = (enum rte_flow_action_type)
+ MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
+ .conf = &(struct mlx5_flow_action_copy_mreg){
+ .src = REG_C_1,
+ .dst = idx,
+ },
+ },
+ [1] = {
+ .type = RTE_FLOW_ACTION_TYPE_JUMP,
+ .conf = &(struct rte_flow_action_jump){
+ .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
+ },
+ },
+ [2] = {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
+ uint32_t flow_idx;
+ struct rte_flow *flow;
+ struct rte_flow_error error;
+
+ if (!config->dv_flow_en)
+ break;
+ /* Create internal flow, validation skips copy action. */
+ flow_idx = flow_list_create(dev, NULL, &attr, items,
+ actions, false, &error);
+ flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
+ flow_idx);
+ if (!flow)
+ continue;
+ config->flow_mreg_c[n++] = idx;
+ flow_list_destroy(dev, NULL, flow_idx);
+ }
+ for (; n < MLX5_MREG_C_NUM; ++n)
+ config->flow_mreg_c[n] = REG_NON;
+ return 0;
+}
+
+/**
+ * Dump flow raw hw data to file
+ *
+ * @param[in] dev
+ * The pointer to Ethernet device.
+ * @param[in] file
+ * A pointer to a file for output.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. PMDs initialize this
+ * structure in case of error only.
+ * @return
+ * 0 on success, a nagative value otherwise.
+ */
+int
+mlx5_flow_dev_dump(struct rte_eth_dev *dev,
+ FILE *file,
+ struct rte_flow_error *error __rte_unused)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_dev_ctx_shared *sh = priv->sh;
+
+ if (!priv->config.dv_flow_en) {
+ if (fputs("device dv flow disabled\n", file) <= 0)
+ return -errno;
+ return -ENOTSUP;
+ }
+ return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
+ sh->tx_domain, file);
+}
+
+/**
+ * Get aged-out flows.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] context
+ * The address of an array of pointers to the aged-out flows contexts.
+ * @param[in] nb_countexts
+ * The length of context array pointers.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL. Initialized in case of
+ * error only.
+ *
+ * @return
+ * how many contexts get in success, otherwise negative errno value.
+ * if nb_contexts is 0, return the amount of all aged contexts.
+ * if nb_contexts is not 0 , return the amount of aged flows reported
+ * in the context array.
+ */
+int
+mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
+ uint32_t nb_contexts, struct rte_flow_error *error)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ struct rte_flow_attr attr = { .transfer = 0 };