net/mlx5: support modify field flow action
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
index bf86aaa..0197a07 100644 (file)
@@ -12,7 +12,7 @@
 
 #include <rte_common.h>
 #include <rte_ether.h>
-#include <rte_ethdev_driver.h>
+#include <ethdev_driver.h>
 #include <rte_eal_paging.h>
 #include <rte_flow.h>
 #include <rte_cycles.h>
@@ -73,7 +73,7 @@ const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
 
 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
        [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
-#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+#if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
        [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
 #endif
        [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
@@ -696,11 +696,6 @@ static struct mlx5_flow_tunnel_info tunnels_info[] = {
        },
 };
 
-/* Key of thread specific flow workspace data. */
-static pthread_key_t key_workspace;
-
-/* Thread specific flow workspace data once initialization data. */
-static pthread_once_t key_workspace_init;
 
 
 /**
@@ -798,7 +793,7 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
                start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
                            (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
                skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
-               if (id > (REG_C_7 - start_reg))
+               if (id > (uint32_t)(REG_C_7 - start_reg))
                        return rte_flow_error_set(error, EINVAL,
                                                  RTE_FLOW_ERROR_TYPE_ITEM,
                                                  NULL, "invalid tag id");
@@ -814,7 +809,7 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
                 */
                if (skip_mtr_reg && config->flow_mreg_c
                    [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
-                       if (id >= (REG_C_7 - start_reg))
+                       if (id >= (uint32_t)(REG_C_7 - start_reg))
                                return rte_flow_error_set(error, EINVAL,
                                                       RTE_FLOW_ERROR_TYPE_ITEM,
                                                        NULL, "invalid tag id");
@@ -860,6 +855,58 @@ mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
        return config->flow_mreg_c[2] != REG_NON;
 }
 
+/**
+ * Get the lowest priority.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] attributes
+ *   Pointer to device flow rule attributes.
+ *
+ * @return
+ *   The value of lowest priority of flow.
+ */
+uint32_t
+mlx5_get_lowest_priority(struct rte_eth_dev *dev,
+                         const struct rte_flow_attr *attr)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+
+       if (!attr->group && !attr->transfer)
+               return priv->config.flow_prio - 2;
+       return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1;
+}
+
+/**
+ * Calculate matcher priority of the flow.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] attr
+ *   Pointer to device flow rule attributes.
+ * @param[in] subpriority
+ *   The priority based on the items.
+ * @return
+ *   The matcher priority of the flow.
+ */
+uint16_t
+mlx5_get_matcher_priority(struct rte_eth_dev *dev,
+                         const struct rte_flow_attr *attr,
+                         uint32_t subpriority)
+{
+       uint16_t priority = (uint16_t)attr->priority;
+       struct mlx5_priv *priv = dev->data->dev_private;
+
+       if (!attr->group && !attr->transfer) {
+               if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
+                       priority = priv->config.flow_prio - 1;
+               return mlx5_os_flow_adjust_priority(dev, priority, subpriority);
+       }
+       if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
+               priority = MLX5_NON_ROOT_FLOW_MAX_PRIO;
+       return priority * 3 + subpriority;
+}
+
 /**
  * Verify the @p item specifications (spec, last, mask) are compatible with the
  * NIC capabilities.
@@ -1002,17 +1049,29 @@ flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
        struct mlx5_priv *priv = dev->data->dev_private;
        const int mark = dev_handle->mark;
        const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
-       struct mlx5_hrxq *hrxq;
+       struct mlx5_ind_table_obj *ind_tbl = NULL;
        unsigned int i;
 
-       if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE)
-               return;
-       hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
+       if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
+               struct mlx5_hrxq *hrxq;
+
+               hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
                              dev_handle->rix_hrxq);
-       if (!hrxq)
+               if (hrxq)
+                       ind_tbl = hrxq->ind_table;
+       } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
+               struct mlx5_shared_action_rss *shared_rss;
+
+               shared_rss = mlx5_ipool_get
+                       (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
+                        dev_handle->rix_srss);
+               if (shared_rss)
+                       ind_tbl = shared_rss->ind_tbl;
+       }
+       if (!ind_tbl)
                return;
-       for (i = 0; i != hrxq->ind_table->queues_n; ++i) {
-               int idx = hrxq->ind_table->queues[i];
+       for (i = 0; i != ind_tbl->queues_n; ++i) {
+               int idx = ind_tbl->queues[i];
                struct mlx5_rxq_ctrl *rxq_ctrl =
                        container_of((*priv->rxqs)[idx],
                                     struct mlx5_rxq_ctrl, rxq);
@@ -1084,18 +1143,30 @@ flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
        struct mlx5_priv *priv = dev->data->dev_private;
        const int mark = dev_handle->mark;
        const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
-       struct mlx5_hrxq *hrxq;
+       struct mlx5_ind_table_obj *ind_tbl = NULL;
        unsigned int i;
 
-       if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE)
-               return;
-       hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
+       if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
+               struct mlx5_hrxq *hrxq;
+
+               hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
                              dev_handle->rix_hrxq);
-       if (!hrxq)
+               if (hrxq)
+                       ind_tbl = hrxq->ind_table;
+       } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
+               struct mlx5_shared_action_rss *shared_rss;
+
+               shared_rss = mlx5_ipool_get
+                       (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
+                        dev_handle->rix_srss);
+               if (shared_rss)
+                       ind_tbl = shared_rss->ind_tbl;
+       }
+       if (!ind_tbl)
                return;
        MLX5_ASSERT(dev->data->dev_started);
-       for (i = 0; i != hrxq->ind_table->queues_n; ++i) {
-               int idx = hrxq->ind_table->queues[i];
+       for (i = 0; i != ind_tbl->queues_n; ++i) {
+               int idx = ind_tbl->queues[i];
                struct mlx5_rxq_ctrl *rxq_ctrl =
                        container_of((*priv->rxqs)[idx],
                                     struct mlx5_rxq_ctrl, rxq);
@@ -1650,7 +1721,7 @@ mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
                return rte_flow_error_set(error, ENOTSUP,
                                          RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
                                          NULL, "groups is not supported");
-       if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
+       if (attributes->priority != MLX5_FLOW_LOWEST_PRIO_INDICATOR &&
            attributes->priority >= priority_max)
                return rte_flow_error_set(error, ENOTSUP,
                                          RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
@@ -2603,6 +2674,149 @@ mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
        return 0;
 }
 
+/**
+ * Validate Geneve TLV option item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] last_item
+ *   Previous validated item in the pattern items.
+ * @param[in] geneve_item
+ *   Previous GENEVE item specification.
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item,
+                                  uint64_t last_item,
+                                  const struct rte_flow_item *geneve_item,
+                                  struct rte_eth_dev *dev,
+                                  struct rte_flow_error *error)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_dev_ctx_shared *sh = priv->sh;
+       struct mlx5_geneve_tlv_option_resource *geneve_opt_resource;
+       struct mlx5_hca_attr *hca_attr = &priv->config.hca_attr;
+       uint8_t data_max_supported =
+                       hca_attr->max_geneve_tlv_option_data_len * 4;
+       struct mlx5_dev_config *config = &priv->config;
+       const struct rte_flow_item_geneve *geneve_spec;
+       const struct rte_flow_item_geneve *geneve_mask;
+       const struct rte_flow_item_geneve_opt *spec = item->spec;
+       const struct rte_flow_item_geneve_opt *mask = item->mask;
+       unsigned int i;
+       unsigned int data_len;
+       uint8_t tlv_option_len;
+       uint16_t optlen_m, optlen_v;
+       const struct rte_flow_item_geneve_opt full_mask = {
+               .option_class = RTE_BE16(0xffff),
+               .option_type = 0xff,
+               .option_len = 0x1f,
+       };
+
+       if (!mask)
+               mask = &rte_flow_item_geneve_opt_mask;
+       if (!spec)
+               return rte_flow_error_set
+                       (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
+                       "Geneve TLV opt class/type/length must be specified");
+       if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK)
+               return rte_flow_error_set
+                       (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
+                       "Geneve TLV opt length exceeeds the limit (31)");
+       /* Check if class type and length masks are full. */
+       if (full_mask.option_class != mask->option_class ||
+           full_mask.option_type != mask->option_type ||
+           full_mask.option_len != (mask->option_len & full_mask.option_len))
+               return rte_flow_error_set
+                       (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
+                       "Geneve TLV opt class/type/length masks must be full");
+       /* Check if length is supported */
+       if ((uint32_t)spec->option_len >
+                       config->hca_attr.max_geneve_tlv_option_data_len)
+               return rte_flow_error_set
+                       (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
+                       "Geneve TLV opt length not supported");
+       if (config->hca_attr.max_geneve_tlv_options > 1)
+               DRV_LOG(DEBUG,
+                       "max_geneve_tlv_options supports more than 1 option");
+       /* Check GENEVE item preceding. */
+       if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE))
+               return rte_flow_error_set
+                       (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
+                       "Geneve opt item must be preceded with Geneve item");
+       geneve_spec = geneve_item->spec;
+       geneve_mask = geneve_item->mask ? geneve_item->mask :
+                                         &rte_flow_item_geneve_mask;
+       /* Check if GENEVE TLV option size doesn't exceed option length */
+       if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 ||
+                           geneve_spec->ver_opt_len_o_c_rsvd0)) {
+               tlv_option_len = spec->option_len & mask->option_len;
+               optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0);
+               optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v);
+               optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0);
+               optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m);
+               if ((optlen_v & optlen_m) <= tlv_option_len)
+                       return rte_flow_error_set
+                               (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                "GENEVE TLV option length exceeds optlen");
+       }
+       /* Check if length is 0 or data is 0. */
+       if (spec->data == NULL || spec->option_len == 0)
+               return rte_flow_error_set
+                       (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
+                       "Geneve TLV opt with zero data/length not supported");
+       /* Check not all data & mask are 0. */
+       data_len = spec->option_len * 4;
+       if (mask->data == NULL) {
+               for (i = 0; i < data_len; i++)
+                       if (spec->data[i])
+                               break;
+               if (i == data_len)
+                       return rte_flow_error_set(error, ENOTSUP,
+                               RTE_FLOW_ERROR_TYPE_ITEM, item,
+                               "Can't match on Geneve option data 0");
+       } else {
+               for (i = 0; i < data_len; i++)
+                       if (spec->data[i] & mask->data[i])
+                               break;
+               if (i == data_len)
+                       return rte_flow_error_set(error, ENOTSUP,
+                               RTE_FLOW_ERROR_TYPE_ITEM, item,
+                               "Can't match on Geneve option data and mask 0");
+               /* Check data mask supported. */
+               for (i = data_max_supported; i < data_len ; i++)
+                       if (mask->data[i])
+                               return rte_flow_error_set(error, ENOTSUP,
+                                       RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                       "Data mask is of unsupported size");
+       }
+       /* Check GENEVE option is supported in NIC. */
+       if (!config->hca_attr.geneve_tlv_opt)
+               return rte_flow_error_set
+                       (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
+                       "Geneve TLV opt not supported");
+       /* Check if we already have geneve option with different type/class. */
+       rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
+       geneve_opt_resource = sh->geneve_tlv_option_resource;
+       if (geneve_opt_resource != NULL)
+               if (geneve_opt_resource->option_class != spec->option_class ||
+                   geneve_opt_resource->option_type != spec->option_type ||
+                   geneve_opt_resource->length != spec->option_len) {
+                       rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
+                       return rte_flow_error_set(error, ENOTSUP,
+                               RTE_FLOW_ERROR_TYPE_ITEM, item,
+                               "Only one Geneve TLV option supported");
+               }
+       rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
+       return 0;
+}
+
 /**
  * Validate MPLS item.
  *
@@ -3150,16 +3364,28 @@ flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
 static const struct rte_flow_action_rss*
 flow_get_rss_action(const struct rte_flow_action actions[])
 {
+       const struct rte_flow_action_rss *rss = NULL;
+
        for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
                switch (actions->type) {
                case RTE_FLOW_ACTION_TYPE_RSS:
-                       return (const struct rte_flow_action_rss *)
-                              actions->conf;
+                       rss = actions->conf;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SAMPLE:
+               {
+                       const struct rte_flow_action_sample *sample =
+                                                               actions->conf;
+                       const struct rte_flow_action *act = sample->actions;
+                       for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
+                               if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
+                                       rss = act->conf;
+                       break;
+               }
                default:
                        break;
                }
        }
-       return NULL;
+       return rss;
 }
 
 /**
@@ -3524,7 +3750,7 @@ flow_check_hairpin_split(struct rte_eth_dev *dev,
                        if (queue == NULL)
                                return 0;
                        conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
-                       if (conf != NULL && !!conf->tx_explicit)
+                       if (conf == NULL || conf->tx_explicit != 0)
                                return 0;
                        queue_action = 1;
                        action_n++;
@@ -3534,7 +3760,7 @@ flow_check_hairpin_split(struct rte_eth_dev *dev,
                        if (rss == NULL || rss->queue_num == 0)
                                return 0;
                        conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
-                       if (conf != NULL && !!conf->tx_explicit)
+                       if (conf == NULL || conf->tx_explicit != 0)
                                return 0;
                        queue_action = 1;
                        action_n++;
@@ -3663,7 +3889,7 @@ flow_dv_mreg_create_cb(struct mlx5_hlist *list, uint64_t key,
                };
        } else {
                /* Default rule, wildcard match. */
-               attr.priority = MLX5_FLOW_PRIO_RSVD;
+               attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR;
                items[0] = (struct rte_flow_item){
                        .type = RTE_FLOW_ITEM_TYPE_END,
                };
@@ -4466,6 +4692,8 @@ flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
  *   Pointer to the position of the matched action if exists, otherwise is -1.
  * @param[out] qrss_action_pos
  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
+ * @param[out] modify_after_mirror
+ *   Pointer to the flag of modify action after FDB mirroring.
  *
  * @return
  *   > 0 the total number of actions.
@@ -4475,14 +4703,15 @@ static int
 flow_check_match_action(const struct rte_flow_action actions[],
                        const struct rte_flow_attr *attr,
                        enum rte_flow_action_type action,
-                       int *match_action_pos, int *qrss_action_pos)
+                       int *match_action_pos, int *qrss_action_pos,
+                       int *modify_after_mirror)
 {
        const struct rte_flow_action_sample *sample;
        int actions_n = 0;
-       int jump_flag = 0;
        uint32_t ratio = 0;
        int sub_type = 0;
        int flag = 0;
+       int fdb_mirror = 0;
 
        *match_action_pos = -1;
        *qrss_action_pos = -1;
@@ -4491,27 +4720,53 @@ flow_check_match_action(const struct rte_flow_action actions[],
                        flag = 1;
                        *match_action_pos = actions_n;
                }
-               if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE ||
-                   actions->type == RTE_FLOW_ACTION_TYPE_RSS)
+               switch (actions->type) {
+               case RTE_FLOW_ACTION_TYPE_QUEUE:
+               case RTE_FLOW_ACTION_TYPE_RSS:
                        *qrss_action_pos = actions_n;
-               if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP)
-                       jump_flag = 1;
-               if (actions->type == RTE_FLOW_ACTION_TYPE_SAMPLE) {
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SAMPLE:
                        sample = actions->conf;
                        ratio = sample->ratio;
                        sub_type = ((const struct rte_flow_action *)
                                        (sample->actions))->type;
+                       if (ratio == 1 && attr->transfer)
+                               fdb_mirror = 1;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+               case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+               case RTE_FLOW_ACTION_TYPE_SET_TTL:
+               case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
+               case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
+               case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
+               case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
+               case RTE_FLOW_ACTION_TYPE_FLAG:
+               case RTE_FLOW_ACTION_TYPE_MARK:
+               case RTE_FLOW_ACTION_TYPE_SET_META:
+               case RTE_FLOW_ACTION_TYPE_SET_TAG:
+                       if (fdb_mirror)
+                               *modify_after_mirror = 1;
+                       break;
+               default:
+                       break;
                }
                actions_n++;
        }
-       if (flag && action == RTE_FLOW_ACTION_TYPE_SAMPLE && attr->transfer) {
-               if (ratio == 1) {
-                       /* JUMP Action not support for Mirroring;
-                        * Mirroring support multi-destination;
-                        */
-                       if (!jump_flag && sub_type != RTE_FLOW_ACTION_TYPE_END)
-                               flag = 0;
-               }
+       if (flag && fdb_mirror && !*modify_after_mirror) {
+               /* FDB mirroring uses the destination array to implement
+                * instead of FLOW_SAMPLER object.
+                */
+               if (sub_type != RTE_FLOW_ACTION_TYPE_END)
+                       flag = 0;
        }
        /* Count RTE_FLOW_ACTION_TYPE_END. */
        return flag ? actions_n + 1 : 0;
@@ -4530,8 +4785,8 @@ flow_check_match_action(const struct rte_flow_action actions[],
  *
  * @param dev
  *   Pointer to Ethernet device.
- * @param[in] fdb_tx
- *   FDB egress flow flag.
+ * @param[in] add_tag
+ *   Add extra tag action flag.
  * @param[out] sfx_items
  *   Suffix flow match items (list terminated by the END pattern item).
  * @param[in] actions
@@ -4546,6 +4801,8 @@ flow_check_match_action(const struct rte_flow_action actions[],
  *   The sample action position.
  * @param[in] qrss_action_pos
  *   The Queue/RSS action position.
+ * @param[in] jump_table
+ *   Add extra jump action flag.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
@@ -4555,7 +4812,7 @@ flow_check_match_action(const struct rte_flow_action actions[],
  */
 static int
 flow_sample_split_prep(struct rte_eth_dev *dev,
-                      uint32_t fdb_tx,
+                      int add_tag,
                       struct rte_flow_item sfx_items[],
                       const struct rte_flow_action actions[],
                       struct rte_flow_action actions_sfx[],
@@ -4563,14 +4820,17 @@ flow_sample_split_prep(struct rte_eth_dev *dev,
                       int actions_n,
                       int sample_action_pos,
                       int qrss_action_pos,
+                      int jump_table,
                       struct rte_flow_error *error)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_rte_flow_action_set_tag *set_tag;
        struct mlx5_rte_flow_item_tag *tag_spec;
        struct mlx5_rte_flow_item_tag *tag_mask;
+       struct rte_flow_action_jump *jump_action;
        uint32_t tag_id = 0;
        int index;
+       int append_index = 0;
        int ret;
 
        if (sample_action_pos < 0)
@@ -4578,9 +4838,37 @@ flow_sample_split_prep(struct rte_eth_dev *dev,
                                          RTE_FLOW_ERROR_TYPE_ACTION,
                                          NULL, "invalid position of sample "
                                          "action in list");
-       if (!fdb_tx) {
+       /* Prepare the actions for prefix and suffix flow. */
+       if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
+               index = qrss_action_pos;
+               /* Put the preceding the Queue/RSS action into prefix flow. */
+               if (index != 0)
+                       memcpy(actions_pre, actions,
+                              sizeof(struct rte_flow_action) * index);
+               /* Put others preceding the sample action into prefix flow. */
+               if (sample_action_pos > index + 1)
+                       memcpy(actions_pre + index, actions + index + 1,
+                              sizeof(struct rte_flow_action) *
+                              (sample_action_pos - index - 1));
+               index = sample_action_pos - 1;
+               /* Put Queue/RSS action into Suffix flow. */
+               memcpy(actions_sfx, actions + qrss_action_pos,
+                      sizeof(struct rte_flow_action));
+               actions_sfx++;
+       } else {
+               index = sample_action_pos;
+               if (index != 0)
+                       memcpy(actions_pre, actions,
+                              sizeof(struct rte_flow_action) * index);
+       }
+       /* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
+        * For CX6DX and above, metadata registers Cx preserve their value,
+        * add an extra tag action for NIC-RX and E-Switch Domain.
+        */
+       if (add_tag) {
                /* Prepare the prefix tag action. */
-               set_tag = (void *)(actions_pre + actions_n + 1);
+               append_index++;
+               set_tag = (void *)(actions_pre + actions_n + append_index);
                ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
                if (ret < 0)
                        return ret;
@@ -4605,32 +4893,7 @@ flow_sample_split_prep(struct rte_eth_dev *dev,
                        .type = (enum rte_flow_item_type)
                                RTE_FLOW_ITEM_TYPE_END,
                };
-       }
-       /* Prepare the actions for prefix and suffix flow. */
-       if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
-               index = qrss_action_pos;
-               /* Put the preceding the Queue/RSS action into prefix flow. */
-               if (index != 0)
-                       memcpy(actions_pre, actions,
-                              sizeof(struct rte_flow_action) * index);
-               /* Put others preceding the sample action into prefix flow. */
-               if (sample_action_pos > index + 1)
-                       memcpy(actions_pre + index, actions + index + 1,
-                              sizeof(struct rte_flow_action) *
-                              (sample_action_pos - index - 1));
-               index = sample_action_pos - 1;
-               /* Put Queue/RSS action into Suffix flow. */
-               memcpy(actions_sfx, actions + qrss_action_pos,
-                      sizeof(struct rte_flow_action));
-               actions_sfx++;
-       } else {
-               index = sample_action_pos;
-               if (index != 0)
-                       memcpy(actions_pre, actions,
-                              sizeof(struct rte_flow_action) * index);
-       }
-       /* Add the extra tag action for NIC-RX and E-Switch ingress. */
-       if (!fdb_tx) {
+               /* Prepare the tag action in prefix subflow. */
                actions_pre[index++] =
                        (struct rte_flow_action){
                        .type = (enum rte_flow_action_type)
@@ -4641,6 +4904,22 @@ flow_sample_split_prep(struct rte_eth_dev *dev,
        memcpy(actions_pre + index, actions + sample_action_pos,
               sizeof(struct rte_flow_action));
        index += 1;
+       /* For the modify action after the sample action in E-Switch mirroring,
+        * Add the extra jump action in prefix subflow and jump into the next
+        * table, then do the modify action in the new table.
+        */
+       if (jump_table) {
+               /* Prepare the prefix jump action. */
+               append_index++;
+               jump_action = (void *)(actions_pre + actions_n + append_index);
+               jump_action->group = jump_table;
+               actions_pre[index++] =
+                       (struct rte_flow_action){
+                       .type = (enum rte_flow_action_type)
+                               RTE_FLOW_ACTION_TYPE_JUMP,
+                       .conf = jump_action,
+               };
+       }
        actions_pre[index] = (struct rte_flow_action){
                .type = (enum rte_flow_action_type)
                        RTE_FLOW_ACTION_TYPE_END,
@@ -5043,12 +5322,17 @@ flow_create_split_sample(struct rte_eth_dev *dev,
        int actions_n = 0;
        int sample_action_pos;
        int qrss_action_pos;
+       int add_tag = 0;
+       int modify_after_mirror = 0;
+       uint16_t jump_table = 0;
+       const uint32_t next_ft_step = 1;
        int ret = 0;
 
        if (priv->sampler_en)
                actions_n = flow_check_match_action(actions, attr,
                                        RTE_FLOW_ACTION_TYPE_SAMPLE,
-                                       &sample_action_pos, &qrss_action_pos);
+                                       &sample_action_pos, &qrss_action_pos,
+                                       &modify_after_mirror);
        if (actions_n) {
                /* The prefix actions must includes sample, tag, end. */
                act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
@@ -5064,19 +5348,31 @@ flow_create_split_sample(struct rte_eth_dev *dev,
                                                  "sample flow");
                /* The representor_id is -1 for uplink. */
                fdb_tx = (attr->transfer && priv->representor_id != -1);
-               if (!fdb_tx)
+               /*
+                * When reg_c_preserve is set, metadata registers Cx preserve
+                * their value even through packet duplication.
+                */
+               add_tag = (!fdb_tx || priv->config.hca_attr.reg_c_preserve);
+               if (add_tag)
                        sfx_items = (struct rte_flow_item *)((char *)sfx_actions
                                        + act_size);
+               if (modify_after_mirror)
+                       jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
+                                    next_ft_step;
                pre_actions = sfx_actions + actions_n;
-               tag_id = flow_sample_split_prep(dev, fdb_tx, sfx_items,
+               tag_id = flow_sample_split_prep(dev, add_tag, sfx_items,
                                                actions, sfx_actions,
                                                pre_actions, actions_n,
                                                sample_action_pos,
-                                               qrss_action_pos, error);
-               if (tag_id < 0 || (!fdb_tx && !tag_id)) {
+                                               qrss_action_pos, jump_table,
+                                               error);
+               if (tag_id < 0 || (add_tag && !tag_id)) {
                        ret = -rte_errno;
                        goto exit;
                }
+               if (modify_after_mirror)
+                       flow_split_info->skip_scale =
+                                       1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
                /* Add the prefix subflow. */
                ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
                                              items, pre_actions,
@@ -5087,23 +5383,30 @@ flow_create_split_sample(struct rte_eth_dev *dev,
                }
                dev_flow->handle->split_flow_id = tag_id;
 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
-               /* Set the sfx group attr. */
-               sample_res = (struct mlx5_flow_dv_sample_resource *)
-                                       dev_flow->dv.sample_res;
-               sfx_tbl = (struct mlx5_flow_tbl_resource *)
-                                       sample_res->normal_path_tbl;
-               sfx_tbl_data = container_of(sfx_tbl,
-                                       struct mlx5_flow_tbl_data_entry, tbl);
-               sfx_attr.group = sfx_attr.transfer ?
-                                       (sfx_tbl_data->table_id - 1) :
-                                        sfx_tbl_data->table_id;
+               if (!modify_after_mirror) {
+                       /* Set the sfx group attr. */
+                       sample_res = (struct mlx5_flow_dv_sample_resource *)
+                                               dev_flow->dv.sample_res;
+                       sfx_tbl = (struct mlx5_flow_tbl_resource *)
+                                               sample_res->normal_path_tbl;
+                       sfx_tbl_data = container_of(sfx_tbl,
+                                               struct mlx5_flow_tbl_data_entry,
+                                               tbl);
+                       sfx_attr.group = sfx_attr.transfer ?
+                                               (sfx_tbl_data->table_id - 1) :
+                                               sfx_tbl_data->table_id;
+               } else {
+                       MLX5_ASSERT(attr->transfer);
+                       sfx_attr.group = jump_table;
+               }
                flow_split_info->prefix_layers =
                                flow_get_prefix_layer_flags(dev_flow);
                flow_split_info->prefix_mark = dev_flow->handle->mark;
                /* Suffix group level already be scaled with factor, set
-                * skip_scale to 1 to avoid scale again in translation.
+                * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
+                * again in translation.
                 */
-               flow_split_info->skip_scale = 1;
+               flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
 #endif
        }
        /* Add the suffix subflow. */
@@ -5254,7 +5557,7 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
        struct mlx5_priv *priv = dev->data->dev_private;
        struct rte_flow *flow = NULL;
        struct mlx5_flow *dev_flow;
-       const struct rte_flow_action_rss *rss;
+       const struct rte_flow_action_rss *rss = NULL;
        struct mlx5_translated_shared_action
                shared_actions[MLX5_MAX_SHARED_ACTIONS];
        int shared_actions_n = MLX5_MAX_SHARED_ACTIONS;
@@ -5332,7 +5635,9 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
        MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
                    flow->drv_type < MLX5_FLOW_TYPE_MAX);
        memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
-       rss = flow_get_rss_action(p_actions_rx);
+       /* RSS Action only works on NIC RX domain */
+       if (attr->ingress && !attr->transfer)
+               rss = flow_get_rss_action(p_actions_rx);
        if (rss) {
                if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
                        return 0;
@@ -5432,7 +5737,7 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
         */
        if (external || dev->data->dev_started ||
            (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
-            attr->priority == MLX5_FLOW_PRIO_RSVD)) {
+            attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
                ret = flow_drv_apply(dev, flow, error);
                if (ret < 0)
                        goto error;
@@ -5698,7 +6003,7 @@ mlx5_flow_start_default(struct rte_eth_dev *dev)
 /**
  * Release key of thread specific flow workspace data.
  */
-static void
+void
 flow_release_workspace(void *data)
 {
        struct mlx5_flow_workspace *wks = data;
@@ -5712,16 +6017,6 @@ flow_release_workspace(void *data)
        }
 }
 
-/**
- * Initialize key of thread specific flow workspace data.
- */
-static void
-flow_alloc_workspace(void)
-{
-       if (pthread_key_create(&key_workspace, flow_release_workspace))
-               DRV_LOG(ERR, "Can't create flow workspace data thread key.");
-}
-
 /**
  * Get thread specific current flow workspace.
  *
@@ -5732,7 +6027,7 @@ mlx5_flow_get_thread_workspace(void)
 {
        struct mlx5_flow_workspace *data;
 
-       data = pthread_getspecific(key_workspace);
+       data = mlx5_flow_os_get_specific_workspace();
        MLX5_ASSERT(data && data->inuse);
        if (!data || !data->inuse)
                DRV_LOG(ERR, "flow workspace not initialized.");
@@ -5780,11 +6075,7 @@ mlx5_flow_push_thread_workspace(void)
        struct mlx5_flow_workspace *curr;
        struct mlx5_flow_workspace *data;
 
-       if (pthread_once(&key_workspace_init, flow_alloc_workspace)) {
-               DRV_LOG(ERR, "Failed to init flow workspace data thread key.");
-               return NULL;
-       }
-       curr = pthread_getspecific(key_workspace);
+       curr = mlx5_flow_os_get_specific_workspace();
        if (!curr) {
                data = flow_alloc_thread_workspace();
                if (!data)
@@ -5803,7 +6094,7 @@ mlx5_flow_push_thread_workspace(void)
        data->inuse = 1;
        data->flow_idx = 0;
        /* Set as current workspace */
-       if (pthread_setspecific(key_workspace, data))
+       if (mlx5_flow_os_set_specific_workspace(data))
                DRV_LOG(ERR, "Failed to set flow workspace to thread.");
        return data;
 }
@@ -5829,7 +6120,7 @@ mlx5_flow_pop_thread_workspace(void)
        data->inuse = 0;
        if (!data->prev)
                return;
-       if (pthread_setspecific(key_workspace, data->prev))
+       if (mlx5_flow_os_set_specific_workspace(data->prev))
                DRV_LOG(ERR, "Failed to set flow workspace to thread.");
 }
 
@@ -5946,7 +6237,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
        struct mlx5_priv *priv = dev->data->dev_private;
        const struct rte_flow_attr attr = {
                .ingress = 1,
-               .priority = MLX5_FLOW_PRIO_RSVD,
+               .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
        };
        struct rte_flow_item items[] = {
                {
@@ -6135,9 +6426,9 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
        }
        priv->isolated = !!enable;
        if (enable)
-               dev->dev_ops = &mlx5_os_dev_ops_isolate;
+               dev->dev_ops = &mlx5_dev_ops_isolate;
        else
-               dev->dev_ops = &mlx5_os_dev_ops;
+               dev->dev_ops = &mlx5_dev_ops;
 
        dev->rx_descriptor_status = mlx5_rx_descriptor_status;
        dev->tx_descriptor_status = mlx5_tx_descriptor_status;
@@ -6446,7 +6737,7 @@ mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
        }
        mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
        size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
-       mem_mng->umem = mlx5_glue->devx_umem_reg(sh->ctx, mem, size,
+       mem_mng->umem = mlx5_os_umem_reg(sh->ctx, mem, size,
                                                 IBV_ACCESS_LOCAL_WRITE);
        if (!mem_mng->umem) {
                rte_errno = errno;
@@ -6465,7 +6756,7 @@ mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
        mkey_attr.relaxed_ordering_read = sh->cmng.relaxed_ordering_read;
        mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
        if (!mem_mng->dm) {
-               mlx5_glue->devx_umem_dereg(mem_mng->umem);
+               mlx5_os_umem_dereg(mem_mng->umem);
                rte_errno = errno;
                mlx5_free(mem);
                return -rte_errno;
@@ -6831,7 +7122,7 @@ mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
        for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
                struct rte_flow_attr attr = {
                        .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
-                       .priority = MLX5_FLOW_PRIO_RSVD,
+                       .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
                        .ingress = 1,
                };
                struct rte_flow_item items[] = {