net/ring: release port upon close
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_dv.c
index 85e90ed..79fdf34 100644 (file)
@@ -8,16 +8,6 @@
 #include <string.h>
 #include <unistd.h>
 
-/* Verbs header. */
-/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-#include <infiniband/verbs.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
-
 #include <rte_common.h>
 #include <rte_ether.h>
 #include <rte_ethdev_driver.h>
@@ -30,7 +20,9 @@
 #include <rte_vxlan.h>
 #include <rte_gtp.h>
 #include <rte_eal_paging.h>
+#include <rte_mpls.h>
 
+#include <mlx5_glue.h>
 #include <mlx5_devx_cmds.h>
 #include <mlx5_prm.h>
 #include <mlx5_malloc.h>
@@ -953,7 +945,7 @@ flow_dv_convert_action_modify_tcp_ack
 }
 
 static enum mlx5_modification_field reg_to_field[] = {
-       [REG_NONE] = MLX5_MODI_OUT_NONE,
+       [REG_NON] = MLX5_MODI_OUT_NONE,
        [REG_A] = MLX5_MODI_META_DATA_REG_A,
        [REG_B] = MLX5_MODI_META_DATA_REG_B,
        [REG_C_0] = MLX5_MODI_META_REG_C_0,
@@ -993,7 +985,7 @@ flow_dv_convert_action_set_reg
                return rte_flow_error_set(error, EINVAL,
                                          RTE_FLOW_ERROR_TYPE_ACTION, NULL,
                                          "too many items to modify");
-       MLX5_ASSERT(conf->id != REG_NONE);
+       MLX5_ASSERT(conf->id != REG_NON);
        MLX5_ASSERT(conf->id < RTE_DIM(reg_to_field));
        actions[i] = (struct mlx5_modification_cmd) {
                .action_type = MLX5_MODIFICATION_TYPE_SET,
@@ -1043,7 +1035,7 @@ flow_dv_convert_action_set_tag
        ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, conf->index, error);
        if (ret < 0)
                return ret;
-       MLX5_ASSERT(ret != REG_NONE);
+       MLX5_ASSERT(ret != REG_NON);
        MLX5_ASSERT((unsigned int)ret < RTE_DIM(reg_to_field));
        reg_type = reg_to_field[ret];
        MLX5_ASSERT(reg_type > 0);
@@ -1160,8 +1152,7 @@ flow_dv_convert_action_mark(struct rte_eth_dev *dev,
                .mask = &mask,
        };
        struct field_modify_info reg_c_x[] = {
-               {4, 0, 0}, /* dynamic instead of MLX5_MODI_META_REG_C_1. */
-               {0, 0, 0},
+               [1] = {0, 0, 0},
        };
        int reg;
 
@@ -1181,7 +1172,7 @@ flow_dv_convert_action_mark(struct rte_eth_dev *dev,
                mask = rte_cpu_to_be_32(mask) & msk_c0;
                mask = rte_cpu_to_be_32(mask << shl_c0);
        }
-       reg_c_x[0].id = reg_to_field[reg];
+       reg_c_x[0] = (struct field_modify_info){4, 0, reg_to_field[reg]};
        return flow_dv_convert_modify_action(&item, reg_c_x, NULL, resource,
                                             MLX5_MODIFICATION_TYPE_SET, error);
 }
@@ -1567,7 +1558,7 @@ flow_dv_validate_item_tag(struct rte_eth_dev *dev,
        ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, spec->index, error);
        if (ret < 0)
                return ret;
-       MLX5_ASSERT(ret != REG_NONE);
+       MLX5_ASSERT(ret != REG_NON);
        return 0;
 }
 
@@ -1841,7 +1832,17 @@ flow_dv_validate_action_pop_vlan(struct rte_eth_dev *dev,
                                          RTE_FLOW_ERROR_TYPE_ACTION, action,
                                          "no support for multiple VLAN "
                                          "actions");
-       if (!(item_flags & MLX5_FLOW_LAYER_OUTER_VLAN))
+       /* Pop VLAN with preceding Decap requires inner header with VLAN. */
+       if ((action_flags & MLX5_FLOW_ACTION_DECAP) &&
+           !(item_flags & MLX5_FLOW_LAYER_INNER_VLAN))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                         NULL,
+                                         "cannot pop vlan after decap without "
+                                         "match on inner vlan in the flow");
+       /* Pop VLAN without preceding Decap requires outer header with VLAN. */
+       if (!(action_flags & MLX5_FLOW_ACTION_DECAP) &&
+           !(item_flags & MLX5_FLOW_LAYER_OUTER_VLAN))
                return rte_flow_error_set(error, ENOTSUP,
                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
                                          NULL,
@@ -1950,22 +1951,11 @@ flow_dv_validate_action_push_vlan(struct rte_eth_dev *dev,
        const struct rte_flow_action_of_push_vlan *push_vlan = action->conf;
        const struct mlx5_priv *priv = dev->data->dev_private;
 
-       if (!attr->transfer && attr->ingress)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
-                                         NULL,
-                                         "push VLAN action not supported for "
-                                         "ingress");
        if (push_vlan->ethertype != RTE_BE16(RTE_ETHER_TYPE_VLAN) &&
            push_vlan->ethertype != RTE_BE16(RTE_ETHER_TYPE_QINQ))
                return rte_flow_error_set(error, EINVAL,
                                          RTE_FLOW_ERROR_TYPE_ACTION, action,
                                          "invalid vlan ethertype");
-       if (action_flags & MLX5_FLOW_VLAN_ACTIONS)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION, action,
-                                         "no support for multiple VLAN "
-                                         "actions");
        if (action_flags & MLX5_FLOW_ACTION_PORT_ID)
                return rte_flow_error_set(error, EINVAL,
                                          RTE_FLOW_ERROR_TYPE_ACTION, action,
@@ -2555,6 +2545,39 @@ flow_dv_validate_action_raw_encap_decap
        return 0;
 }
 
+/**
+ * Match encap_decap resource.
+ *
+ * @param entry
+ *   Pointer to exist resource entry object.
+ * @param ctx
+ *   Pointer to new encap_decap resource.
+ *
+ * @return
+ *   0 on matching, -1 otherwise.
+ */
+static int
+flow_dv_encap_decap_resource_match(struct mlx5_hlist_entry *entry, void *ctx)
+{
+       struct mlx5_flow_dv_encap_decap_resource *resource;
+       struct mlx5_flow_dv_encap_decap_resource *cache_resource;
+
+       resource = (struct mlx5_flow_dv_encap_decap_resource *)ctx;
+       cache_resource = container_of(entry,
+                                     struct mlx5_flow_dv_encap_decap_resource,
+                                     entry);
+       if (resource->entry.key == cache_resource->entry.key &&
+           resource->reformat_type == cache_resource->reformat_type &&
+           resource->ft_type == cache_resource->ft_type &&
+           resource->flags == cache_resource->flags &&
+           resource->size == cache_resource->size &&
+           !memcmp((const void *)resource->buf,
+                   (const void *)cache_resource->buf,
+                   resource->size))
+               return 0;
+       return -1;
+}
+
 /**
  * Find existing encap/decap resource or create and register a new one.
  *
@@ -2581,7 +2604,16 @@ flow_dv_encap_decap_resource_register
        struct mlx5_dev_ctx_shared *sh = priv->sh;
        struct mlx5_flow_dv_encap_decap_resource *cache_resource;
        struct mlx5dv_dr_domain *domain;
-       uint32_t idx = 0;
+       struct mlx5_hlist_entry *entry;
+       union mlx5_flow_encap_decap_key encap_decap_key = {
+               {
+                       .ft_type = resource->ft_type,
+                       .refmt_type = resource->reformat_type,
+                       .buf_size = resource->size,
+                       .table_level = !!dev_flow->dv.group,
+                       .cksum = 0,
+               }
+       };
        int ret;
 
        resource->flags = dev_flow->dv.group ? 0 : 1;
@@ -2591,24 +2623,23 @@ flow_dv_encap_decap_resource_register
                domain = sh->rx_domain;
        else
                domain = sh->tx_domain;
+       encap_decap_key.cksum = __rte_raw_cksum(resource->buf,
+                                               resource->size, 0);
+       resource->entry.key = encap_decap_key.v64;
        /* Lookup a matching resource from cache. */
-       ILIST_FOREACH(sh->ipool[MLX5_IPOOL_DECAP_ENCAP], sh->encaps_decaps, idx,
-                     cache_resource, next) {
-               if (resource->reformat_type == cache_resource->reformat_type &&
-                   resource->ft_type == cache_resource->ft_type &&
-                   resource->flags == cache_resource->flags &&
-                   resource->size == cache_resource->size &&
-                   !memcmp((const void *)resource->buf,
-                           (const void *)cache_resource->buf,
-                           resource->size)) {
-                       DRV_LOG(DEBUG, "encap/decap resource %p: refcnt %d++",
-                               (void *)cache_resource,
-                               rte_atomic32_read(&cache_resource->refcnt));
-                       rte_atomic32_inc(&cache_resource->refcnt);
-                       dev_flow->handle->dvh.rix_encap_decap = idx;
-                       dev_flow->dv.encap_decap = cache_resource;
-                       return 0;
-               }
+       entry = mlx5_hlist_lookup_ex(sh->encaps_decaps, resource->entry.key,
+                                    flow_dv_encap_decap_resource_match,
+                                    (void *)resource);
+       if (entry) {
+               cache_resource = container_of(entry,
+                       struct mlx5_flow_dv_encap_decap_resource, entry);
+               DRV_LOG(DEBUG, "encap/decap resource %p: refcnt %d++",
+                       (void *)cache_resource,
+                       rte_atomic32_read(&cache_resource->refcnt));
+               rte_atomic32_inc(&cache_resource->refcnt);
+               dev_flow->handle->dvh.rix_encap_decap = cache_resource->idx;
+               dev_flow->dv.encap_decap = cache_resource;
+               return 0;
        }
        /* Register new encap/decap resource. */
        cache_resource = mlx5_ipool_zmalloc(sh->ipool[MLX5_IPOOL_DECAP_ENCAP],
@@ -2618,6 +2649,7 @@ flow_dv_encap_decap_resource_register
                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
                                          "cannot allocate resource memory");
        *cache_resource = *resource;
+       cache_resource->idx = dev_flow->handle->dvh.rix_encap_decap;
        ret = mlx5_flow_os_create_flow_action_packet_reformat
                                        (sh->ctx, domain, cache_resource,
                                         &cache_resource->action);
@@ -2629,9 +2661,17 @@ flow_dv_encap_decap_resource_register
        }
        rte_atomic32_init(&cache_resource->refcnt);
        rte_atomic32_inc(&cache_resource->refcnt);
-       ILIST_INSERT(sh->ipool[MLX5_IPOOL_DECAP_ENCAP], &sh->encaps_decaps,
-                    dev_flow->handle->dvh.rix_encap_decap, cache_resource,
-                    next);
+       if (mlx5_hlist_insert_ex(sh->encaps_decaps, &cache_resource->entry,
+                                flow_dv_encap_decap_resource_match,
+                                (void *)cache_resource)) {
+               claim_zero(mlx5_flow_os_destroy_flow_action
+                                               (cache_resource->action));
+               mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_DECAP_ENCAP],
+                               cache_resource->idx);
+               return rte_flow_error_set(error, EEXIST,
+                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                         NULL, "action exist");
+       }
        dev_flow->dv.encap_decap = cache_resource;
        DRV_LOG(DEBUG, "new encap/decap resource %p: refcnt %d++",
                (void *)cache_resource,
@@ -2876,7 +2916,7 @@ flow_dv_push_vlan_action_resource_register
        return 0;
 }
 /**
- * Get the size of specific rte_flow_item_type
+ * Get the size of specific rte_flow_item_type hdr size
  *
  * @param[in] item_type
  *   Tested rte_flow_item_type.
@@ -2885,43 +2925,39 @@ flow_dv_push_vlan_action_resource_register
  *   sizeof struct item_type, 0 if void or irrelevant.
  */
 static size_t
-flow_dv_get_item_len(const enum rte_flow_item_type item_type)
+flow_dv_get_item_hdr_len(const enum rte_flow_item_type item_type)
 {
        size_t retval;
 
        switch (item_type) {
        case RTE_FLOW_ITEM_TYPE_ETH:
-               retval = sizeof(struct rte_flow_item_eth);
+               retval = sizeof(struct rte_ether_hdr);
                break;
        case RTE_FLOW_ITEM_TYPE_VLAN:
-               retval = sizeof(struct rte_flow_item_vlan);
+               retval = sizeof(struct rte_vlan_hdr);
                break;
        case RTE_FLOW_ITEM_TYPE_IPV4:
-               retval = sizeof(struct rte_flow_item_ipv4);
+               retval = sizeof(struct rte_ipv4_hdr);
                break;
        case RTE_FLOW_ITEM_TYPE_IPV6:
-               retval = sizeof(struct rte_flow_item_ipv6);
+               retval = sizeof(struct rte_ipv6_hdr);
                break;
        case RTE_FLOW_ITEM_TYPE_UDP:
-               retval = sizeof(struct rte_flow_item_udp);
+               retval = sizeof(struct rte_udp_hdr);
                break;
        case RTE_FLOW_ITEM_TYPE_TCP:
-               retval = sizeof(struct rte_flow_item_tcp);
+               retval = sizeof(struct rte_tcp_hdr);
                break;
        case RTE_FLOW_ITEM_TYPE_VXLAN:
-               retval = sizeof(struct rte_flow_item_vxlan);
+       case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+               retval = sizeof(struct rte_vxlan_hdr);
                break;
        case RTE_FLOW_ITEM_TYPE_GRE:
-               retval = sizeof(struct rte_flow_item_gre);
-               break;
        case RTE_FLOW_ITEM_TYPE_NVGRE:
-               retval = sizeof(struct rte_flow_item_nvgre);
-               break;
-       case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
-               retval = sizeof(struct rte_flow_item_vxlan_gpe);
+               retval = sizeof(struct rte_gre_hdr);
                break;
        case RTE_FLOW_ITEM_TYPE_MPLS:
-               retval = sizeof(struct rte_flow_item_mpls);
+               retval = sizeof(struct rte_mpls_hdr);
                break;
        case RTE_FLOW_ITEM_TYPE_VOID: /* Fall through. */
        default:
@@ -2974,7 +3010,7 @@ flow_dv_convert_encap_data(const struct rte_flow_item *items, uint8_t *buf,
                                          RTE_FLOW_ERROR_TYPE_ACTION,
                                          NULL, "invalid empty data");
        for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
-               len = flow_dv_get_item_len(items->type);
+               len = flow_dv_get_item_hdr_len(items->type);
                if (len + temp_size > MLX5_ENCAP_MAX_LEN)
                        return rte_flow_error_set(error, EINVAL,
                                                  RTE_FLOW_ERROR_TYPE_ACTION,
@@ -3971,6 +4007,40 @@ flow_dv_validate_action_modify_ipv6_dscp(const uint64_t action_flags,
        return ret;
 }
 
+/**
+ * Match modify-header resource.
+ *
+ * @param entry
+ *   Pointer to exist resource entry object.
+ * @param ctx
+ *   Pointer to new modify-header resource.
+ *
+ * @return
+ *   0 on matching, -1 otherwise.
+ */
+static int
+flow_dv_modify_hdr_resource_match(struct mlx5_hlist_entry *entry, void *ctx)
+{
+       struct mlx5_flow_dv_modify_hdr_resource *resource;
+       struct mlx5_flow_dv_modify_hdr_resource *cache_resource;
+       uint32_t actions_len;
+
+       resource = (struct mlx5_flow_dv_modify_hdr_resource *)ctx;
+       cache_resource = container_of(entry,
+                                     struct mlx5_flow_dv_modify_hdr_resource,
+                                     entry);
+       actions_len = resource->actions_num * sizeof(resource->actions[0]);
+       if (resource->entry.key == cache_resource->entry.key &&
+           resource->ft_type == cache_resource->ft_type &&
+           resource->actions_num == cache_resource->actions_num &&
+           resource->flags == cache_resource->flags &&
+           !memcmp((const void *)resource->actions,
+                   (const void *)cache_resource->actions,
+                   actions_len))
+               return 0;
+       return -1;
+}
+
 /**
  * Find existing modify-header resource or create and register a new one.
  *
@@ -3998,6 +4068,15 @@ flow_dv_modify_hdr_resource_register
        struct mlx5_flow_dv_modify_hdr_resource *cache_resource;
        struct mlx5dv_dr_domain *ns;
        uint32_t actions_len;
+       struct mlx5_hlist_entry *entry;
+       union mlx5_flow_modify_hdr_key hdr_mod_key = {
+               {
+                       .ft_type = resource->ft_type,
+                       .actions_num = resource->actions_num,
+                       .group = dev_flow->dv.group,
+                       .cksum = 0,
+               }
+       };
        int ret;
 
        resource->flags = dev_flow->dv.group ? 0 :
@@ -4015,20 +4094,22 @@ flow_dv_modify_hdr_resource_register
                ns = sh->rx_domain;
        /* Lookup a matching resource from cache. */
        actions_len = resource->actions_num * sizeof(resource->actions[0]);
-       LIST_FOREACH(cache_resource, &sh->modify_cmds, next) {
-               if (resource->ft_type == cache_resource->ft_type &&
-                   resource->actions_num == cache_resource->actions_num &&
-                   resource->flags == cache_resource->flags &&
-                   !memcmp((const void *)resource->actions,
-                           (const void *)cache_resource->actions,
-                           actions_len)) {
-                       DRV_LOG(DEBUG, "modify-header resource %p: refcnt %d++",
-                               (void *)cache_resource,
-                               rte_atomic32_read(&cache_resource->refcnt));
-                       rte_atomic32_inc(&cache_resource->refcnt);
-                       dev_flow->handle->dvh.modify_hdr = cache_resource;
-                       return 0;
-               }
+       hdr_mod_key.cksum = __rte_raw_cksum(resource->actions, actions_len, 0);
+       resource->entry.key = hdr_mod_key.v64;
+       entry = mlx5_hlist_lookup_ex(sh->modify_cmds, resource->entry.key,
+                                    flow_dv_modify_hdr_resource_match,
+                                    (void *)resource);
+       if (entry) {
+               cache_resource = container_of(entry,
+                                       struct mlx5_flow_dv_modify_hdr_resource,
+                                       entry);
+               DRV_LOG(DEBUG, "modify-header resource %p: refcnt %d++",
+                       (void *)cache_resource,
+                       rte_atomic32_read(&cache_resource->refcnt));
+               rte_atomic32_inc(&cache_resource->refcnt);
+               dev_flow->handle->dvh.modify_hdr = cache_resource;
+               return 0;
+
        }
        /* Register new modify-header resource. */
        cache_resource = mlx5_malloc(MLX5_MEM_ZERO,
@@ -4051,7 +4132,16 @@ flow_dv_modify_hdr_resource_register
        }
        rte_atomic32_init(&cache_resource->refcnt);
        rte_atomic32_inc(&cache_resource->refcnt);
-       LIST_INSERT_HEAD(&sh->modify_cmds, cache_resource, next);
+       if (mlx5_hlist_insert_ex(sh->modify_cmds, &cache_resource->entry,
+                                flow_dv_modify_hdr_resource_match,
+                                (void *)cache_resource)) {
+               claim_zero(mlx5_flow_os_destroy_flow_action
+                                               (cache_resource->action));
+               mlx5_free(cache_resource);
+               return rte_flow_error_set(error, EEXIST,
+                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                         NULL, "action exist");
+       }
        dev_flow->handle->dvh.modify_hdr = cache_resource;
        DRV_LOG(DEBUG, "new modify-header resource %p: refcnt %d++",
                (void *)cache_resource,
@@ -4412,44 +4502,71 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
                cont->last_pool_idx = pool->index;
        }
        /* Pool initialization must be updated before host thread access. */
-       rte_cio_wmb();
+       rte_io_wmb();
        rte_atomic16_add(&cont->n_valid, 1);
        return pool;
 }
 
 /**
- * Update the minimum dcs-id for aged or no-aged counter pool.
+ * Restore skipped counters in the pool.
+ *
+ * As counter pool query requires the first counter dcs
+ * ID start with 4 alinged, if the pool counters with
+ * min_dcs ID are not aligned with 4, the counters will
+ * be skipped.
+ * Once other min_dcs ID less than these skipped counter
+ * dcs ID appears, the skipped counters will be safe to
+ * use.
+ * Should be called when min_dcs is updated.
  *
- * @param[in] dev
- *   Pointer to the Ethernet device structure.
  * @param[in] pool
  *   Current counter pool.
- * @param[in] batch
- *   Whether the pool is for counter that was allocated by batch command.
- * @param[in] age
- *   Whether the counter is for aging.
+ * @param[in] last_min_dcs
+ *   Last min_dcs.
  */
 static void
-flow_dv_counter_update_min_dcs(struct rte_eth_dev *dev,
-                       struct mlx5_flow_counter_pool *pool,
-                       uint32_t batch, uint32_t age)
+flow_dv_counter_restore(struct mlx5_flow_counter_pool *pool,
+                       struct mlx5_devx_obj *last_min_dcs)
 {
-       struct mlx5_priv *priv = dev->data->dev_private;
-       struct mlx5_flow_counter_pool *other;
-       struct mlx5_pools_container *cont;
+       struct mlx5_flow_counter_ext *cnt_ext;
+       uint32_t offset, new_offset;
+       uint32_t skip_cnt = 0;
+       uint32_t i;
 
-       cont = MLX5_CNT_CONTAINER(priv->sh, batch, (age ^ 0x1));
-       other = flow_dv_find_pool_by_id(cont, pool->min_dcs->id);
-       if (!other)
+       if (!pool->skip_cnt)
                return;
-       if (pool->min_dcs->id < other->min_dcs->id) {
-               rte_atomic64_set(&other->a64_dcs,
-                       rte_atomic64_read(&pool->a64_dcs));
-       } else {
-               rte_atomic64_set(&pool->a64_dcs,
-                       rte_atomic64_read(&other->a64_dcs));
+       /*
+        * If last min_dcs is not valid. The skipped counter may even after
+        * last min_dcs, set the offset to the whole pool.
+        */
+       if (last_min_dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1))
+               offset = MLX5_COUNTERS_PER_POOL;
+       else
+               offset = last_min_dcs->id % MLX5_COUNTERS_PER_POOL;
+       new_offset = pool->min_dcs->id % MLX5_COUNTERS_PER_POOL;
+       /*
+        * Check the counters from 1 to the last_min_dcs range. Counters
+        * before new min_dcs indicates pool still has skipped counters.
+        * Counters be skipped after new min_dcs will be ready to use.
+        * Offset 0 counter must be empty or min_dcs, start from 1.
+        */
+       for (i = 1; i < offset; i++) {
+               cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i);
+               if (cnt_ext->skipped) {
+                       if (i > new_offset) {
+                               cnt_ext->skipped = 0;
+                               TAILQ_INSERT_TAIL
+                                       (&pool->counters[pool->query_gen],
+                                        MLX5_POOL_GET_CNT(pool, i), next);
+                       } else {
+                               skip_cnt++;
+                       }
+               }
        }
+       if (!skip_cnt)
+               pool->skip_cnt = 0;
 }
+
 /**
  * Prepare a new counter and/or a new counter pool.
  *
@@ -4475,33 +4592,84 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
        struct mlx5_pools_container *cont;
        struct mlx5_flow_counter_pool *pool;
        struct mlx5_counters tmp_tq;
+       struct mlx5_devx_obj *last_min_dcs;
        struct mlx5_devx_obj *dcs = NULL;
        struct mlx5_flow_counter *cnt;
+       uint32_t add2other;
        uint32_t i;
 
        cont = MLX5_CNT_CONTAINER(priv->sh, batch, age);
        if (!batch) {
+retry:
+               add2other = 0;
                /* bulk_bitmap must be 0 for single counter allocation. */
                dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0);
                if (!dcs)
                        return NULL;
                pool = flow_dv_find_pool_by_id(cont, dcs->id);
+               /* Check if counter belongs to exist pool ID range. */
                if (!pool) {
-                       pool = flow_dv_pool_create(dev, dcs, batch, age);
-                       if (!pool) {
-                               mlx5_devx_cmd_destroy(dcs);
-                               return NULL;
+                       pool = flow_dv_find_pool_by_id
+                              (MLX5_CNT_CONTAINER
+                              (priv->sh, batch, (age ^ 0x1)), dcs->id);
+                       /*
+                        * Pool eixsts, counter will be added to the other
+                        * container, need to reallocate it later.
+                        */
+                       if (pool) {
+                               add2other = 1;
+                       } else {
+                               pool = flow_dv_pool_create(dev, dcs, batch,
+                                                          age);
+                               if (!pool) {
+                                       mlx5_devx_cmd_destroy(dcs);
+                                       return NULL;
+                               }
                        }
-               } else if (dcs->id < pool->min_dcs->id) {
+               }
+               if ((dcs->id < pool->min_dcs->id ||
+                   pool->min_dcs->id &
+                   (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1)) &&
+                   !(dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1))) {
+                       /*
+                        * Update the pool min_dcs only if current dcs is
+                        * valid and exist min_dcs is not valid or greater
+                        * than new dcs.
+                        */
+                       last_min_dcs = pool->min_dcs;
                        rte_atomic64_set(&pool->a64_dcs,
                                         (int64_t)(uintptr_t)dcs);
+                       /*
+                        * Restore any skipped counters if the new min_dcs
+                        * ID is smaller or min_dcs is not valid.
+                        */
+                       if (dcs->id < last_min_dcs->id ||
+                           last_min_dcs->id &
+                           (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1))
+                               flow_dv_counter_restore(pool, last_min_dcs);
                }
-               flow_dv_counter_update_min_dcs(dev,
-                                               pool, batch, age);
                i = dcs->id % MLX5_COUNTERS_PER_POOL;
                cnt = MLX5_POOL_GET_CNT(pool, i);
                cnt->pool = pool;
                MLX5_GET_POOL_CNT_EXT(pool, i)->dcs = dcs;
+               /*
+                * If min_dcs is not valid, it means the new allocated dcs
+                * also fail to become the valid min_dcs, just skip it.
+                * Or if min_dcs is valid, and new dcs ID is smaller than
+                * min_dcs, but not become the min_dcs, also skip it.
+                */
+               if (pool->min_dcs->id &
+                   (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1) ||
+                   dcs->id < pool->min_dcs->id) {
+                       MLX5_GET_POOL_CNT_EXT(pool, i)->skipped = 1;
+                       pool->skip_cnt = 1;
+                       goto retry;
+               }
+               if (add2other) {
+                       TAILQ_INSERT_TAIL(&pool->counters[pool->query_gen],
+                                         cnt, next);
+                       goto retry;
+               }
                *cnt_free = cnt;
                return pool;
        }
@@ -5690,21 +5858,38 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
                                                  actions,
                                                  "no fate action is found");
        }
-       /* Continue validation for Xcap actions.*/
-       if ((action_flags & MLX5_FLOW_XCAP_ACTIONS) && (queue_index == 0xFFFF ||
-           mlx5_rxq_get_type(dev, queue_index) != MLX5_RXQ_TYPE_HAIRPIN)) {
+       /* Continue validation for Xcap and VLAN actions.*/
+       if ((action_flags & (MLX5_FLOW_XCAP_ACTIONS |
+                            MLX5_FLOW_VLAN_ACTIONS)) &&
+           (queue_index == 0xFFFF ||
+            mlx5_rxq_get_type(dev, queue_index) != MLX5_RXQ_TYPE_HAIRPIN)) {
                if ((action_flags & MLX5_FLOW_XCAP_ACTIONS) ==
                    MLX5_FLOW_XCAP_ACTIONS)
                        return rte_flow_error_set(error, ENOTSUP,
                                                  RTE_FLOW_ERROR_TYPE_ACTION,
                                                  NULL, "encap and decap "
                                                  "combination aren't supported");
-               if (!attr->transfer && attr->ingress && (action_flags &
-                                                       MLX5_FLOW_ACTION_ENCAP))
-                       return rte_flow_error_set(error, ENOTSUP,
-                                                 RTE_FLOW_ERROR_TYPE_ACTION,
-                                                 NULL, "encap is not supported"
-                                                 " for ingress traffic");
+               if (!attr->transfer && attr->ingress) {
+                       if (action_flags & MLX5_FLOW_ACTION_ENCAP)
+                               return rte_flow_error_set
+                                               (error, ENOTSUP,
+                                                RTE_FLOW_ERROR_TYPE_ACTION,
+                                                NULL, "encap is not supported"
+                                                " for ingress traffic");
+                       else if (action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN)
+                               return rte_flow_error_set
+                                               (error, ENOTSUP,
+                                                RTE_FLOW_ERROR_TYPE_ACTION,
+                                                NULL, "push VLAN action not "
+                                                "supported for ingress");
+                       else if ((action_flags & MLX5_FLOW_VLAN_ACTIONS) ==
+                                       MLX5_FLOW_VLAN_ACTIONS)
+                               return rte_flow_error_set
+                                               (error, ENOTSUP,
+                                                RTE_FLOW_ERROR_TYPE_ACTION,
+                                                NULL, "no support for "
+                                                "multiple VLAN actions");
+               }
        }
        /* Hairpin flow will add one more TAG action. */
        if (hairpin > 0)
@@ -6462,8 +6647,8 @@ flow_dv_translate_item_nvgre(void *matcher, void *key,
        const struct rte_flow_item_nvgre *nvgre_v = item->spec;
        void *misc_m = MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters);
        void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters);
-       const char *tni_flow_id_m = (const char *)nvgre_m->tni;
-       const char *tni_flow_id_v = (const char *)nvgre_v->tni;
+       const char *tni_flow_id_m;
+       const char *tni_flow_id_v;
        char *gre_key_m;
        char *gre_key_v;
        int size;
@@ -6488,6 +6673,8 @@ flow_dv_translate_item_nvgre(void *matcher, void *key,
                return;
        if (!nvgre_m)
                nvgre_m = &rte_flow_item_nvgre_mask;
+       tni_flow_id_m = (const char *)nvgre_m->tni;
+       tni_flow_id_v = (const char *)nvgre_v->tni;
        size = sizeof(nvgre_m->tni) + sizeof(nvgre_m->flow_id);
        gre_key_m = MLX5_ADDR_OF(fte_match_set_misc, misc_m, gre_key_h);
        gre_key_v = MLX5_ADDR_OF(fte_match_set_misc, misc_v, gre_key_h);
@@ -8597,8 +8784,7 @@ __flow_dv_translate(struct rte_eth_dev *dev,
                case RTE_FLOW_ITEM_TYPE_GRE:
                        flow_dv_translate_item_gre(match_mask, match_value,
                                                   items, tunnel);
-                       matcher.priority = rss_desc->level >= 2 ?
-                                   MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4;
+                       matcher.priority = MLX5_TUNNEL_PRIO_GET(rss_desc);
                        last_item = MLX5_FLOW_LAYER_GRE;
                        break;
                case RTE_FLOW_ITEM_TYPE_GRE_KEY:
@@ -8609,37 +8795,32 @@ __flow_dv_translate(struct rte_eth_dev *dev,
                case RTE_FLOW_ITEM_TYPE_NVGRE:
                        flow_dv_translate_item_nvgre(match_mask, match_value,
                                                     items, tunnel);
-                       matcher.priority = rss_desc->level >= 2 ?
-                                   MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4;
+                       matcher.priority = MLX5_TUNNEL_PRIO_GET(rss_desc);
                        last_item = MLX5_FLOW_LAYER_GRE;
                        break;
                case RTE_FLOW_ITEM_TYPE_VXLAN:
                        flow_dv_translate_item_vxlan(match_mask, match_value,
                                                     items, tunnel);
-                       matcher.priority = rss_desc->level >= 2 ?
-                                   MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4;
+                       matcher.priority = MLX5_TUNNEL_PRIO_GET(rss_desc);
                        last_item = MLX5_FLOW_LAYER_VXLAN;
                        break;
                case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
                        flow_dv_translate_item_vxlan_gpe(match_mask,
                                                         match_value, items,
                                                         tunnel);
-                       matcher.priority = rss_desc->level >= 2 ?
-                                   MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4;
+                       matcher.priority = MLX5_TUNNEL_PRIO_GET(rss_desc);
                        last_item = MLX5_FLOW_LAYER_VXLAN_GPE;
                        break;
                case RTE_FLOW_ITEM_TYPE_GENEVE:
                        flow_dv_translate_item_geneve(match_mask, match_value,
                                                      items, tunnel);
-                       matcher.priority = rss_desc->level >= 2 ?
-                                   MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4;
+                       matcher.priority = MLX5_TUNNEL_PRIO_GET(rss_desc);
                        last_item = MLX5_FLOW_LAYER_GENEVE;
                        break;
                case RTE_FLOW_ITEM_TYPE_MPLS:
                        flow_dv_translate_item_mpls(match_mask, match_value,
                                                    items, last_item, tunnel);
-                       matcher.priority = rss_desc->level >= 2 ?
-                                   MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4;
+                       matcher.priority = MLX5_TUNNEL_PRIO_GET(rss_desc);
                        last_item = MLX5_FLOW_LAYER_MPLS;
                        break;
                case RTE_FLOW_ITEM_TYPE_MARK:
@@ -8681,8 +8862,7 @@ __flow_dv_translate(struct rte_eth_dev *dev,
                case RTE_FLOW_ITEM_TYPE_GTP:
                        flow_dv_translate_item_gtp(match_mask, match_value,
                                                   items, tunnel);
-                       matcher.priority = rss_desc->level >= 2 ?
-                                   MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4;
+                       matcher.priority = MLX5_TUNNEL_PRIO_GET(rss_desc);
                        last_item = MLX5_FLOW_LAYER_GTP;
                        break;
                case RTE_FLOW_ITEM_TYPE_ECPRI:
@@ -8787,7 +8967,7 @@ __flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
                                dv->actions[n++] = priv->sh->esw_drop_action;
                        } else {
                                struct mlx5_hrxq *drop_hrxq;
-                               drop_hrxq = mlx5_hrxq_drop_new(dev);
+                               drop_hrxq = mlx5_drop_action_create(dev);
                                if (!drop_hrxq) {
                                        rte_flow_error_set
                                                (error, errno,
@@ -8798,7 +8978,7 @@ __flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
                                }
                                /*
                                 * Drop queues will be released by the specify
-                                * mlx5_hrxq_drop_release() function. Assign
+                                * mlx5_drop_action_destroy() function. Assign
                                 * the special index to hrxq to mark the queue
                                 * has been allocated.
                                 */
@@ -8821,12 +9001,12 @@ __flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
                        if (!hrxq_idx) {
                                hrxq_idx = mlx5_hrxq_new
                                                (dev, rss_desc->key,
-                                               MLX5_RSS_HASH_KEY_LEN,
-                                               dev_flow->hash_fields,
-                                               rss_desc->queue,
-                                               rss_desc->queue_num,
-                                               !!(dh->layers &
-                                               MLX5_FLOW_LAYER_TUNNEL));
+                                                MLX5_RSS_HASH_KEY_LEN,
+                                                dev_flow->hash_fields,
+                                                rss_desc->queue,
+                                                rss_desc->queue_num,
+                                                !!(dh->layers &
+                                                MLX5_FLOW_LAYER_TUNNEL));
                        }
                        hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
                                              hrxq_idx);
@@ -8883,7 +9063,7 @@ error_default_miss:
                /* hrxq is union, don't clear it if the flag is not set. */
                if (dh->rix_hrxq) {
                        if (dh->fate_action == MLX5_FLOW_FATE_DROP) {
-                               mlx5_hrxq_drop_release(dev);
+                               mlx5_drop_action_destroy(dev);
                                dh->rix_hrxq = 0;
                        } else if (dh->fate_action == MLX5_FLOW_FATE_QUEUE) {
                                mlx5_hrxq_release(dev, dh->rix_hrxq);
@@ -8962,9 +9142,8 @@ flow_dv_encap_decap_resource_release(struct rte_eth_dev *dev,
        if (rte_atomic32_dec_and_test(&cache_resource->refcnt)) {
                claim_zero(mlx5_flow_os_destroy_flow_action
                                                (cache_resource->action));
-               ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_DECAP_ENCAP],
-                            &priv->sh->encaps_decaps, idx,
-                            cache_resource, next);
+               mlx5_hlist_remove(priv->sh->encaps_decaps,
+                                 &cache_resource->entry);
                mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_DECAP_ENCAP], idx);
                DRV_LOG(DEBUG, "encap/decap resource %p: removed",
                        (void *)cache_resource);
@@ -9046,6 +9225,8 @@ flow_dv_default_miss_resource_release(struct rte_eth_dev *dev)
 /**
  * Release a modify-header resource.
  *
+ * @param dev
+ *   Pointer to Ethernet device.
  * @param handle
  *   Pointer to mlx5_flow_handle.
  *
@@ -9053,8 +9234,10 @@ flow_dv_default_miss_resource_release(struct rte_eth_dev *dev)
  *   1 while a reference on it exists, 0 when freed.
  */
 static int
-flow_dv_modify_hdr_resource_release(struct mlx5_flow_handle *handle)
+flow_dv_modify_hdr_resource_release(struct rte_eth_dev *dev,
+                                   struct mlx5_flow_handle *handle)
 {
+       struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_flow_dv_modify_hdr_resource *cache_resource =
                                                        handle->dvh.modify_hdr;
 
@@ -9065,7 +9248,8 @@ flow_dv_modify_hdr_resource_release(struct mlx5_flow_handle *handle)
        if (rte_atomic32_dec_and_test(&cache_resource->refcnt)) {
                claim_zero(mlx5_flow_os_destroy_flow_action
                                                (cache_resource->action));
-               LIST_REMOVE(cache_resource, next);
+               mlx5_hlist_remove(priv->sh->modify_cmds,
+                                 &cache_resource->entry);
                mlx5_free(cache_resource);
                DRV_LOG(DEBUG, "modify-header resource %p: removed",
                        (void *)cache_resource);
@@ -9172,7 +9356,7 @@ flow_dv_fate_resource_release(struct rte_eth_dev *dev,
                return;
        switch (handle->fate_action) {
        case MLX5_FLOW_FATE_DROP:
-               mlx5_hrxq_drop_release(dev);
+               mlx5_drop_action_destroy(dev);
                break;
        case MLX5_FLOW_FATE_QUEUE:
                mlx5_hrxq_release(dev, handle->rix_hrxq);
@@ -9275,7 +9459,7 @@ __flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
                if (dev_handle->dvh.rix_encap_decap)
                        flow_dv_encap_decap_resource_release(dev, dev_handle);
                if (dev_handle->dvh.modify_hdr)
-                       flow_dv_modify_hdr_resource_release(dev_handle);
+                       flow_dv_modify_hdr_resource_release(dev, dev_handle);
                if (dev_handle->dvh.rix_push_vlan)
                        flow_dv_push_vlan_action_resource_release(dev,
                                                                  dev_handle);
@@ -9985,3 +10169,4 @@ const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = {
 };
 
 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
+