net/mlx5: support shared action for RSS
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_dv.c
index 90b98cc..66d81e9 100644 (file)
@@ -293,7 +293,7 @@ flow_dv_shared_lock(struct rte_eth_dev *dev)
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_dev_ctx_shared *sh = priv->sh;
 
-       if (sh->dv_refcnt > 1) {
+       if (sh->refcnt > 1) {
                int ret;
 
                ret = pthread_mutex_lock(&sh->dv_mutex);
@@ -308,7 +308,7 @@ flow_dv_shared_unlock(struct rte_eth_dev *dev)
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_dev_ctx_shared *sh = priv->sh;
 
-       if (sh->dv_refcnt > 1) {
+       if (sh->refcnt > 1) {
                int ret;
 
                ret = pthread_mutex_unlock(&sh->dv_mutex);
@@ -4170,7 +4170,7 @@ flow_dv_validate_action_age(uint64_t action_flags,
        struct mlx5_priv *priv = dev->data->dev_private;
        const struct rte_flow_action_age *age = action->conf;
 
-       if (!priv->config.devx || priv->counter_fallback)
+       if (!priv->config.devx || priv->sh->cmng.counter_fallback)
                return rte_flow_error_set(error, ENOTSUP,
                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
                                          NULL,
@@ -4603,18 +4603,13 @@ flow_dv_counter_get_by_idx(struct rte_eth_dev *dev,
                           struct mlx5_flow_counter_pool **ppool)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
-       struct mlx5_pools_container *cont;
+       struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
        struct mlx5_flow_counter_pool *pool;
-       uint32_t batch = 0;
 
-       idx--;
-       if (idx >= MLX5_CNT_BATCH_OFFSET) {
-               idx -= MLX5_CNT_BATCH_OFFSET;
-               batch = 1;
-       }
-       cont = MLX5_CNT_CONTAINER(priv->sh, batch);
-       MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cont->n);
-       pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
+       /* Decrease to original index and clear shared bit. */
+       idx = (idx - 1) & (MLX5_CNT_SHARED_OFFSET - 1);
+       MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cmng->n);
+       pool = cmng->pools[idx / MLX5_COUNTERS_PER_POOL];
        MLX5_ASSERT(pool);
        if (ppool)
                *ppool = pool;
@@ -4646,8 +4641,8 @@ flow_dv_is_counter_in_pool(struct mlx5_flow_counter_pool *pool, int id)
 /**
  * Get a pool by devx counter ID.
  *
- * @param[in] cont
- *   Pointer to the counter container.
+ * @param[in] cmng
+ *   Pointer to the counter management.
  * @param[in] id
  *   The counter devx ID.
  *
@@ -4655,107 +4650,38 @@ flow_dv_is_counter_in_pool(struct mlx5_flow_counter_pool *pool, int id)
  *   The counter pool pointer if exists, NULL otherwise,
  */
 static struct mlx5_flow_counter_pool *
-flow_dv_find_pool_by_id(struct mlx5_pools_container *cont, int id)
+flow_dv_find_pool_by_id(struct mlx5_flow_counter_mng *cmng, int id)
 {
        uint32_t i;
+       struct mlx5_flow_counter_pool *pool = NULL;
 
+       rte_spinlock_lock(&cmng->pool_update_sl);
        /* Check last used pool. */
-       if (cont->last_pool_idx != POOL_IDX_INVALID &&
-           flow_dv_is_counter_in_pool(cont->pools[cont->last_pool_idx], id))
-               return cont->pools[cont->last_pool_idx];
+       if (cmng->last_pool_idx != POOL_IDX_INVALID &&
+           flow_dv_is_counter_in_pool(cmng->pools[cmng->last_pool_idx], id)) {
+               pool = cmng->pools[cmng->last_pool_idx];
+               goto out;
+       }
        /* ID out of range means no suitable pool in the container. */
-       if (id > cont->max_id || id < cont->min_id)
-               return NULL;
+       if (id > cmng->max_id || id < cmng->min_id)
+               goto out;
        /*
         * Find the pool from the end of the container, since mostly counter
         * ID is sequence increasing, and the last pool should be the needed
         * one.
         */
-       i = rte_atomic16_read(&cont->n_valid);
+       i = cmng->n_valid;
        while (i--) {
-               struct mlx5_flow_counter_pool *pool = cont->pools[i];
-
-               if (flow_dv_is_counter_in_pool(pool, id))
-                       return pool;
-       }
-       return NULL;
-}
-
-/**
- * Allocate a new memory for the counter values wrapped by all the needed
- * management.
- *
- * @param[in] dev
- *   Pointer to the Ethernet device structure.
- * @param[in] raws_n
- *   The raw memory areas - each one for MLX5_COUNTERS_PER_POOL counters.
- *
- * @return
- *   The new memory management pointer on success, otherwise NULL and rte_errno
- *   is set.
- */
-static struct mlx5_counter_stats_mem_mng *
-flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n)
-{
-       struct mlx5_priv *priv = dev->data->dev_private;
-       struct mlx5_dev_ctx_shared *sh = priv->sh;
-       struct mlx5_devx_mkey_attr mkey_attr;
-       struct mlx5_counter_stats_mem_mng *mem_mng;
-       volatile struct flow_counter_stats *raw_data;
-       int size = (sizeof(struct flow_counter_stats) *
-                       MLX5_COUNTERS_PER_POOL +
-                       sizeof(struct mlx5_counter_stats_raw)) * raws_n +
-                       sizeof(struct mlx5_counter_stats_mem_mng);
-       size_t pgsize = rte_mem_page_size();
-       if (pgsize == (size_t)-1) {
-               DRV_LOG(ERR, "Failed to get mem page size");
-               rte_errno = ENOMEM;
-               return NULL;
-       }
-       uint8_t *mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize,
-                                 SOCKET_ID_ANY);
-       int i;
+               struct mlx5_flow_counter_pool *pool_tmp = cmng->pools[i];
 
-       if (!mem) {
-               rte_errno = ENOMEM;
-               return NULL;
-       }
-       mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
-       size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
-       mem_mng->umem = mlx5_glue->devx_umem_reg(sh->ctx, mem, size,
-                                                IBV_ACCESS_LOCAL_WRITE);
-       if (!mem_mng->umem) {
-               rte_errno = errno;
-               mlx5_free(mem);
-               return NULL;
-       }
-       mkey_attr.addr = (uintptr_t)mem;
-       mkey_attr.size = size;
-       mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem);
-       mkey_attr.pd = sh->pdn;
-       mkey_attr.log_entity_size = 0;
-       mkey_attr.pg_access = 0;
-       mkey_attr.klm_array = NULL;
-       mkey_attr.klm_num = 0;
-       if (priv->config.hca_attr.relaxed_ordering_write &&
-               priv->config.hca_attr.relaxed_ordering_read  &&
-               !haswell_broadwell_cpu)
-               mkey_attr.relaxed_ordering = 1;
-       mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
-       if (!mem_mng->dm) {
-               mlx5_glue->devx_umem_dereg(mem_mng->umem);
-               rte_errno = errno;
-               mlx5_free(mem);
-               return NULL;
-       }
-       mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
-       raw_data = (volatile struct flow_counter_stats *)mem;
-       for (i = 0; i < raws_n; ++i) {
-               mem_mng->raws[i].mem_mng = mem_mng;
-               mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
+               if (flow_dv_is_counter_in_pool(pool_tmp, id)) {
+                       pool = pool_tmp;
+                       break;
+               }
        }
-       LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
-       return mem_mng;
+out:
+       rte_spinlock_unlock(&cmng->pool_update_sl);
+       return pool;
 }
 
 /**
@@ -4763,20 +4689,17 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n)
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
- * @param[in] batch
- *   Whether the pool is for counter that was allocated by batch command.
  *
  * @return
  *   0 on success, otherwise negative errno value and rte_errno is set.
  */
 static int
-flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
+flow_dv_container_resize(struct rte_eth_dev *dev)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
-       struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch);
-       struct mlx5_counter_stats_mem_mng *mem_mng = NULL;
-       void *old_pools = cont->pools;
-       uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE;
+       struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
+       void *old_pools = cmng->pools;
+       uint32_t resize = cmng->n + MLX5_CNT_CONTAINER_RESIZE;
        uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize;
        void *pools = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY);
 
@@ -4785,32 +4708,10 @@ flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
                return -ENOMEM;
        }
        if (old_pools)
-               memcpy(pools, old_pools, cont->n *
+               memcpy(pools, old_pools, cmng->n *
                                       sizeof(struct mlx5_flow_counter_pool *));
-       /*
-        * Fallback mode query the counter directly, no background query
-        * resources are needed.
-        */
-       if (!priv->counter_fallback) {
-               int i;
-
-               mem_mng = flow_dv_create_counter_stat_mem_mng(dev,
-                         MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES);
-               if (!mem_mng) {
-                       mlx5_free(pools);
-                       return -ENOMEM;
-               }
-               for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
-                       LIST_INSERT_HEAD(&priv->sh->cmng.free_stat_raws,
-                                        mem_mng->raws +
-                                        MLX5_CNT_CONTAINER_RESIZE +
-                                        i, next);
-       }
-       rte_spinlock_lock(&cont->resize_sl);
-       cont->n = resize;
-       cont->mem_mng = mem_mng;
-       cont->pools = pools;
-       rte_spinlock_unlock(&cont->resize_sl);
+       cmng->n = resize;
+       cmng->pools = pools;
        if (old_pools)
                mlx5_free(old_pools);
        return 0;
@@ -4838,25 +4739,15 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_flow_counter_pool *pool = NULL;
        struct mlx5_flow_counter *cnt;
-       struct mlx5_flow_counter_ext *cnt_ext = NULL;
        int offset;
 
        cnt = flow_dv_counter_get_by_idx(dev, counter, &pool);
        MLX5_ASSERT(pool);
-       if (counter < MLX5_CNT_BATCH_OFFSET) {
-               cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
-               if (priv->counter_fallback)
-                       return mlx5_devx_cmd_flow_counter_query(cnt_ext->dcs, 0,
+       if (priv->sh->cmng.counter_fallback)
+               return mlx5_devx_cmd_flow_counter_query(cnt->dcs_when_active, 0,
                                        0, pkts, bytes, 0, NULL, NULL, 0);
-       }
-
        rte_spinlock_lock(&pool->sl);
-       /*
-        * The single counters allocation may allocate smaller ID than the
-        * current allocated in parallel to the host reading.
-        * In this case the new counter values must be reported as 0.
-        */
-       if (unlikely(cnt_ext && cnt_ext->dcs->id < pool->raw->min_dcs_id)) {
+       if (!pool->raw) {
                *pkts = 0;
                *bytes = 0;
        } else {
@@ -4875,8 +4766,6 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
  *   Pointer to the Ethernet device structure.
  * @param[out] dcs
  *   The devX counter handle.
- * @param[in] batch
- *   Whether the pool is for counter that was allocated by batch command.
  * @param[in] age
  *   Whether the pool is for counter that was allocated for aging.
  * @param[in/out] cont_cur
@@ -4887,115 +4776,52 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
  */
 static struct mlx5_flow_counter_pool *
 flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
-                   uint32_t batch, uint32_t age)
+                   uint32_t age)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_flow_counter_pool *pool;
-       struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch);
-       int16_t n_valid = rte_atomic16_read(&cont->n_valid);
+       struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
+       bool fallback = priv->sh->cmng.counter_fallback;
        uint32_t size = sizeof(*pool);
 
-       if (cont->n == n_valid && flow_dv_container_resize(dev, batch))
-               return NULL;
-       size += MLX5_COUNTERS_PER_POOL * CNT_SIZE;
-       size += (batch ? 0 : MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE);
-       size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * AGE_SIZE);
+       size += MLX5_COUNTERS_PER_POOL * MLX5_CNT_SIZE;
+       size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * MLX5_AGE_SIZE);
        pool = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY);
        if (!pool) {
                rte_errno = ENOMEM;
                return NULL;
        }
-       pool->min_dcs = dcs;
-       if (!priv->counter_fallback)
-               pool->raw = cont->mem_mng->raws + n_valid %
-                                                     MLX5_CNT_CONTAINER_RESIZE;
-       pool->raw_hw = NULL;
-       pool->type = 0;
-       pool->type |= (batch ? 0 :  CNT_POOL_TYPE_EXT);
-       pool->type |= (!age ? 0 :  CNT_POOL_TYPE_AGE);
+       pool->raw = NULL;
+       pool->is_aged = !!age;
        pool->query_gen = 0;
+       pool->min_dcs = dcs;
        rte_spinlock_init(&pool->sl);
+       rte_spinlock_init(&pool->csl);
        TAILQ_INIT(&pool->counters[0]);
        TAILQ_INIT(&pool->counters[1]);
-       TAILQ_INSERT_HEAD(&cont->pool_list, pool, next);
-       pool->index = n_valid;
        pool->time_of_last_age_check = MLX5_CURR_TIME_SEC;
-       cont->pools[n_valid] = pool;
-       if (!batch) {
+       rte_spinlock_lock(&cmng->pool_update_sl);
+       pool->index = cmng->n_valid;
+       if (pool->index == cmng->n && flow_dv_container_resize(dev)) {
+               mlx5_free(pool);
+               rte_spinlock_unlock(&cmng->pool_update_sl);
+               return NULL;
+       }
+       cmng->pools[pool->index] = pool;
+       cmng->n_valid++;
+       if (unlikely(fallback)) {
                int base = RTE_ALIGN_FLOOR(dcs->id, MLX5_COUNTERS_PER_POOL);
 
-               if (base < cont->min_id)
-                       cont->min_id = base;
-               if (base > cont->max_id)
-                       cont->max_id = base + MLX5_COUNTERS_PER_POOL - 1;
-               cont->last_pool_idx = pool->index;
+               if (base < cmng->min_id)
+                       cmng->min_id = base;
+               if (base > cmng->max_id)
+                       cmng->max_id = base + MLX5_COUNTERS_PER_POOL - 1;
+               cmng->last_pool_idx = pool->index;
        }
-       /* Pool initialization must be updated before host thread access. */
-       rte_io_wmb();
-       rte_atomic16_add(&cont->n_valid, 1);
+       rte_spinlock_unlock(&cmng->pool_update_sl);
        return pool;
 }
 
-/**
- * Restore skipped counters in the pool.
- *
- * As counter pool query requires the first counter dcs
- * ID start with 4 alinged, if the pool counters with
- * min_dcs ID are not aligned with 4, the counters will
- * be skipped.
- * Once other min_dcs ID less than these skipped counter
- * dcs ID appears, the skipped counters will be safe to
- * use.
- * Should be called when min_dcs is updated.
- *
- * @param[in] pool
- *   Current counter pool.
- * @param[in] last_min_dcs
- *   Last min_dcs.
- */
-static void
-flow_dv_counter_restore(struct mlx5_flow_counter_pool *pool,
-                       struct mlx5_devx_obj *last_min_dcs)
-{
-       struct mlx5_flow_counter_ext *cnt_ext;
-       uint32_t offset, new_offset;
-       uint32_t skip_cnt = 0;
-       uint32_t i;
-
-       if (!pool->skip_cnt)
-               return;
-       /*
-        * If last min_dcs is not valid. The skipped counter may even after
-        * last min_dcs, set the offset to the whole pool.
-        */
-       if (last_min_dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1))
-               offset = MLX5_COUNTERS_PER_POOL;
-       else
-               offset = last_min_dcs->id % MLX5_COUNTERS_PER_POOL;
-       new_offset = pool->min_dcs->id % MLX5_COUNTERS_PER_POOL;
-       /*
-        * Check the counters from 1 to the last_min_dcs range. Counters
-        * before new min_dcs indicates pool still has skipped counters.
-        * Counters be skipped after new min_dcs will be ready to use.
-        * Offset 0 counter must be empty or min_dcs, start from 1.
-        */
-       for (i = 1; i < offset; i++) {
-               cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i);
-               if (cnt_ext->skipped) {
-                       if (i > new_offset) {
-                               cnt_ext->skipped = 0;
-                               TAILQ_INSERT_TAIL
-                                       (&pool->counters[pool->query_gen],
-                                        MLX5_POOL_GET_CNT(pool, i), next);
-                       } else {
-                               skip_cnt++;
-                       }
-               }
-       }
-       if (!skip_cnt)
-               pool->skip_cnt = 0;
-}
-
 /**
  * Prepare a new counter and/or a new counter pool.
  *
@@ -5003,8 +4829,6 @@ flow_dv_counter_restore(struct mlx5_flow_counter_pool *pool,
  *   Pointer to the Ethernet device structure.
  * @param[out] cnt_free
  *   Where to put the pointer of a new counter.
- * @param[in] batch
- *   Whether the pool is for counter that was allocated by batch command.
  * @param[in] age
  *   Whether the pool is for counter that was allocated for aging.
  *
@@ -5015,98 +4839,45 @@ flow_dv_counter_restore(struct mlx5_flow_counter_pool *pool,
 static struct mlx5_flow_counter_pool *
 flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
                             struct mlx5_flow_counter **cnt_free,
-                            uint32_t batch, uint32_t age)
+                            uint32_t age)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
-       struct mlx5_pools_container *cont;
+       struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
        struct mlx5_flow_counter_pool *pool;
        struct mlx5_counters tmp_tq;
-       struct mlx5_devx_obj *last_min_dcs;
        struct mlx5_devx_obj *dcs = NULL;
        struct mlx5_flow_counter *cnt;
        enum mlx5_counter_type cnt_type =
                        age ? MLX5_COUNTER_TYPE_AGE : MLX5_COUNTER_TYPE_ORIGIN;
-       uint32_t add2other;
+       bool fallback = priv->sh->cmng.counter_fallback;
        uint32_t i;
 
-       cont = MLX5_CNT_CONTAINER(priv->sh, batch);
-       if (!batch) {
-retry:
-               add2other = 0;
+       if (fallback) {
                /* bulk_bitmap must be 0 for single counter allocation. */
                dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0);
                if (!dcs)
                        return NULL;
-               pool = flow_dv_find_pool_by_id(cont, dcs->id);
-               /*
-                * If pool eixsts but with other type, counter will be added
-                * to the other pool, need to reallocate new counter in the
-                * ragne with same type later.
-                */
+               pool = flow_dv_find_pool_by_id(cmng, dcs->id);
                if (!pool) {
-                       pool = flow_dv_pool_create(dev, dcs, batch,
-                                                  age);
+                       pool = flow_dv_pool_create(dev, dcs, age);
                        if (!pool) {
                                mlx5_devx_cmd_destroy(dcs);
                                return NULL;
                        }
-               } else if ((!!IS_AGE_POOL(pool)) != age) {
-                       add2other = 1;
-               }
-               if ((dcs->id < pool->min_dcs->id ||
-                   pool->min_dcs->id &
-                   (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1)) &&
-                   !(dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1))) {
-                       /*
-                        * Update the pool min_dcs only if current dcs is
-                        * valid and exist min_dcs is not valid or greater
-                        * than new dcs.
-                        */
-                       last_min_dcs = pool->min_dcs;
-                       rte_atomic64_set(&pool->a64_dcs,
-                                        (int64_t)(uintptr_t)dcs);
-                       /*
-                        * Restore any skipped counters if the new min_dcs
-                        * ID is smaller or min_dcs is not valid.
-                        */
-                       if (dcs->id < last_min_dcs->id ||
-                           last_min_dcs->id &
-                           (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1))
-                               flow_dv_counter_restore(pool, last_min_dcs);
                }
                i = dcs->id % MLX5_COUNTERS_PER_POOL;
                cnt = MLX5_POOL_GET_CNT(pool, i);
                cnt->pool = pool;
-               MLX5_GET_POOL_CNT_EXT(pool, i)->dcs = dcs;
-               /*
-                * If min_dcs is not valid, it means the new allocated dcs
-                * also fail to become the valid min_dcs, just skip it.
-                * Or if min_dcs is valid, and new dcs ID is smaller than
-                * min_dcs, but not become the min_dcs, also skip it.
-                */
-               if (pool->min_dcs->id &
-                   (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1) ||
-                   dcs->id < pool->min_dcs->id) {
-                       MLX5_GET_POOL_CNT_EXT(pool, i)->skipped = 1;
-                       pool->skip_cnt = 1;
-                       goto retry;
-               }
-               if (add2other) {
-                       TAILQ_INSERT_TAIL(&pool->counters[pool->query_gen],
-                                         cnt, next);
-                       goto retry;
-               }
+               cnt->dcs_when_free = dcs;
                *cnt_free = cnt;
                return pool;
        }
-       /* bulk_bitmap is in 128 counters units. */
-       if (priv->config.hca_attr.flow_counter_bulk_alloc_bitmap & 0x4)
-               dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0x4);
+       dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0x4);
        if (!dcs) {
                rte_errno = ENODATA;
                return NULL;
        }
-       pool = flow_dv_pool_create(dev, dcs, batch, age);
+       pool = flow_dv_pool_create(dev, dcs, age);
        if (!pool) {
                mlx5_devx_cmd_destroy(dcs);
                return NULL;
@@ -5117,57 +4888,19 @@ retry:
                cnt->pool = pool;
                TAILQ_INSERT_HEAD(&tmp_tq, cnt, next);
        }
-       rte_spinlock_lock(&cont->csl);
-       TAILQ_CONCAT(&cont->counters[cnt_type], &tmp_tq, next);
-       rte_spinlock_unlock(&cont->csl);
+       rte_spinlock_lock(&cmng->csl[cnt_type]);
+       TAILQ_CONCAT(&cmng->counters[cnt_type], &tmp_tq, next);
+       rte_spinlock_unlock(&cmng->csl[cnt_type]);
        *cnt_free = MLX5_POOL_GET_CNT(pool, 0);
        (*cnt_free)->pool = pool;
        return pool;
 }
 
-/**
- * Search for existed shared counter.
- *
- * @param[in] dev
- *   Pointer to the Ethernet device structure.
- * @param[in] id
- *   The shared counter ID to search.
- * @param[out] ppool
- *   mlx5 flow counter pool in the container,
- *
- * @return
- *   NULL if not existed, otherwise pointer to the shared extend counter.
- */
-static struct mlx5_flow_counter_ext *
-flow_dv_counter_shared_search(struct rte_eth_dev *dev, uint32_t id,
-                             struct mlx5_flow_counter_pool **ppool)
-{
-       struct mlx5_priv *priv = dev->data->dev_private;
-       union mlx5_l3t_data data;
-       uint32_t cnt_idx;
-
-       if (mlx5_l3t_get_entry(priv->sh->cnt_id_tbl, id, &data) || !data.dword)
-               return NULL;
-       cnt_idx = data.dword;
-       /*
-        * Shared counters don't have age info. The counter extend is after
-        * the counter datat structure.
-        */
-       return (struct mlx5_flow_counter_ext *)
-              ((flow_dv_counter_get_by_idx(dev, cnt_idx, ppool)) + 1);
-}
-
 /**
  * Allocate a flow counter.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
- * @param[in] shared
- *   Indicate if this counter is shared with other flows.
- * @param[in] id
- *   Counter identifier.
- * @param[in] group
- *   Counter flow group.
  * @param[in] age
  *   Whether the counter was allocated for aging.
  *
@@ -5175,23 +4908,13 @@ flow_dv_counter_shared_search(struct rte_eth_dev *dev, uint32_t id,
  *   Index to flow counter on success, 0 otherwise and rte_errno is set.
  */
 static uint32_t
-flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
-                     uint16_t group, uint32_t age)
+flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t age)
 {
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_flow_counter_pool *pool = NULL;
        struct mlx5_flow_counter *cnt_free = NULL;
-       struct mlx5_flow_counter_ext *cnt_ext = NULL;
-       /*
-        * Currently group 0 flow counter cannot be assigned to a flow if it is
-        * not the first one in the batch counter allocation, so it is better
-        * to allocate counters one by one for these flows in a separate
-        * container.
-        * A counter can be shared between different groups so need to take
-        * shared counters from the single container.
-        */
-       uint32_t batch = (group && !shared && !priv->counter_fallback) ? 1 : 0;
-       struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch);
+       bool fallback = priv->sh->cmng.counter_fallback;
+       struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
        enum mlx5_counter_type cnt_type =
                        age ? MLX5_COUNTER_TYPE_AGE : MLX5_COUNTER_TYPE_ORIGIN;
        uint32_t cnt_idx;
@@ -5200,44 +4923,29 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
                rte_errno = ENOTSUP;
                return 0;
        }
-       if (shared) {
-               cnt_ext = flow_dv_counter_shared_search(dev, id, &pool);
-               if (cnt_ext) {
-                       if (cnt_ext->ref_cnt + 1 == 0) {
-                               rte_errno = E2BIG;
-                               return 0;
-                       }
-                       cnt_ext->ref_cnt++;
-                       cnt_idx = pool->index * MLX5_COUNTERS_PER_POOL +
-                                 (cnt_ext->dcs->id % MLX5_COUNTERS_PER_POOL)
-                                 + 1;
-                       return cnt_idx;
-               }
-       }
        /* Get free counters from container. */
-       rte_spinlock_lock(&cont->csl);
-       cnt_free = TAILQ_FIRST(&cont->counters[cnt_type]);
+       rte_spinlock_lock(&cmng->csl[cnt_type]);
+       cnt_free = TAILQ_FIRST(&cmng->counters[cnt_type]);
        if (cnt_free)
-               TAILQ_REMOVE(&cont->counters[cnt_type], cnt_free, next);
-       rte_spinlock_unlock(&cont->csl);
-       if (!cnt_free && !flow_dv_counter_pool_prepare(dev, &cnt_free,
-                                                      batch, age))
+               TAILQ_REMOVE(&cmng->counters[cnt_type], cnt_free, next);
+       rte_spinlock_unlock(&cmng->csl[cnt_type]);
+       if (!cnt_free && !flow_dv_counter_pool_prepare(dev, &cnt_free, age))
                goto err;
        pool = cnt_free->pool;
-       if (!batch)
-               cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free);
+       if (fallback)
+               cnt_free->dcs_when_active = cnt_free->dcs_when_free;
        /* Create a DV counter action only in the first time usage. */
        if (!cnt_free->action) {
                uint16_t offset;
                struct mlx5_devx_obj *dcs;
                int ret;
 
-               if (batch) {
+               if (!fallback) {
                        offset = MLX5_CNT_ARRAY_IDX(pool, cnt_free);
                        dcs = pool->min_dcs;
                } else {
                        offset = 0;
-                       dcs = cnt_ext->dcs;
+                       dcs = cnt_free->dcs_when_free;
                }
                ret = mlx5_flow_os_create_flow_action_count(dcs->obj, offset,
                                                            &cnt_free->action);
@@ -5248,37 +4956,80 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
        }
        cnt_idx = MLX5_MAKE_CNT_IDX(pool->index,
                                MLX5_CNT_ARRAY_IDX(pool, cnt_free));
-       cnt_idx += batch * MLX5_CNT_BATCH_OFFSET;
        /* Update the counter reset values. */
        if (_flow_dv_query_count(dev, cnt_idx, &cnt_free->hits,
                                 &cnt_free->bytes))
                goto err;
-       if (cnt_ext) {
-               cnt_ext->shared = shared;
-               cnt_ext->ref_cnt = 1;
-               cnt_ext->id = id;
-               if (shared) {
-                       union mlx5_l3t_data data;
-
-                       data.dword = cnt_idx;
-                       if (mlx5_l3t_set_entry(priv->sh->cnt_id_tbl, id, &data))
-                               return 0;
-               }
-       }
-       if (!priv->counter_fallback && !priv->sh->cmng.query_thread_on)
+       if (!fallback && !priv->sh->cmng.query_thread_on)
                /* Start the asynchronous batch query by the host thread. */
                mlx5_set_query_alarm(priv->sh);
        return cnt_idx;
 err:
        if (cnt_free) {
                cnt_free->pool = pool;
-               rte_spinlock_lock(&cont->csl);
-               TAILQ_INSERT_TAIL(&cont->counters[cnt_type], cnt_free, next);
-               rte_spinlock_unlock(&cont->csl);
+               if (fallback)
+                       cnt_free->dcs_when_free = cnt_free->dcs_when_active;
+               rte_spinlock_lock(&cmng->csl[cnt_type]);
+               TAILQ_INSERT_TAIL(&cmng->counters[cnt_type], cnt_free, next);
+               rte_spinlock_unlock(&cmng->csl[cnt_type]);
        }
        return 0;
 }
 
+/**
+ * Allocate a shared flow counter.
+ *
+ * @param[in] ctx
+ *   Pointer to the shared counter configuration.
+ * @param[in] data
+ *   Pointer to save the allocated counter index.
+ *
+ * @return
+ *   Index to flow counter on success, 0 otherwise and rte_errno is set.
+ */
+
+static int32_t
+flow_dv_counter_alloc_shared_cb(void *ctx, union mlx5_l3t_data *data)
+{
+       struct mlx5_shared_counter_conf *conf = ctx;
+       struct rte_eth_dev *dev = conf->dev;
+       struct mlx5_flow_counter *cnt;
+
+       data->dword = flow_dv_counter_alloc(dev, 0);
+       data->dword |= MLX5_CNT_SHARED_OFFSET;
+       cnt = flow_dv_counter_get_by_idx(dev, data->dword, NULL);
+       cnt->shared_info.id = conf->id;
+       return 0;
+}
+
+/**
+ * Get a shared flow counter.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] id
+ *   Counter identifier.
+ *
+ * @return
+ *   Index to flow counter on success, 0 otherwise and rte_errno is set.
+ */
+static uint32_t
+flow_dv_counter_get_shared(struct rte_eth_dev *dev, uint32_t id)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_shared_counter_conf conf = {
+               .dev = dev,
+               .id = id,
+       };
+       union mlx5_l3t_data data = {
+               .dword = 0,
+       };
+
+       mlx5_l3t_prepare_entry(priv->sh->cnt_id_tbl, id, &data,
+                              flow_dv_counter_alloc_shared_cb, &conf);
+       return data.dword;
+}
+
 /**
  * Get age param from counter index.
  *
@@ -5352,24 +5103,16 @@ flow_dv_counter_release(struct rte_eth_dev *dev, uint32_t counter)
        struct mlx5_priv *priv = dev->data->dev_private;
        struct mlx5_flow_counter_pool *pool = NULL;
        struct mlx5_flow_counter *cnt;
-       struct mlx5_flow_counter_ext *cnt_ext = NULL;
        enum mlx5_counter_type cnt_type;
 
        if (!counter)
                return;
        cnt = flow_dv_counter_get_by_idx(dev, counter, &pool);
        MLX5_ASSERT(pool);
-       if (counter < MLX5_CNT_BATCH_OFFSET) {
-               cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
-               if (cnt_ext) {
-                       if (--cnt_ext->ref_cnt)
-                               return;
-                       if (cnt_ext->shared)
-                               mlx5_l3t_clear_entry(priv->sh->cnt_id_tbl,
-                                                    cnt_ext->id);
-               }
-       }
-       if (IS_AGE_POOL(pool))
+       if (IS_SHARED_CNT(counter) &&
+           mlx5_l3t_clear_entry(priv->sh->cnt_id_tbl, cnt->shared_info.id))
+               return;
+       if (pool->is_aged)
                flow_dv_counter_remove_from_age(dev, counter, cnt);
        cnt->pool = pool;
        /*
@@ -5382,14 +5125,18 @@ flow_dv_counter_release(struct rte_eth_dev *dev, uint32_t counter)
         * function both operate with the different list.
         *
         */
-       if (!priv->counter_fallback) {
+       if (!priv->sh->cmng.counter_fallback) {
+               rte_spinlock_lock(&pool->csl);
                TAILQ_INSERT_TAIL(&pool->counters[pool->query_gen], cnt, next);
+               rte_spinlock_unlock(&pool->csl);
        } else {
-               cnt_type = IS_AGE_POOL(pool) ? MLX5_COUNTER_TYPE_AGE :
-                                              MLX5_COUNTER_TYPE_ORIGIN;
-               TAILQ_INSERT_TAIL(&((MLX5_CNT_CONTAINER
-                                 (priv->sh, 0))->counters[cnt_type]),
+               cnt->dcs_when_free = cnt->dcs_when_active;
+               cnt_type = pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
+                                          MLX5_COUNTER_TYPE_ORIGIN;
+               rte_spinlock_lock(&priv->sh->cmng.csl[cnt_type]);
+               TAILQ_INSERT_TAIL(&priv->sh->cmng.counters[cnt_type],
                                  cnt, next);
+               rte_spinlock_unlock(&priv->sh->cmng.csl[cnt_type]);
        }
 }
 
@@ -8521,10 +8268,10 @@ flow_dv_translate_create_counter(struct rte_eth_dev *dev,
        uint32_t counter;
        struct mlx5_age_param *age_param;
 
-       counter = flow_dv_counter_alloc(dev,
-                               count ? count->shared : 0,
-                               count ? count->id : 0,
-                               dev_flow->dv.group, !!age);
+       if (count && count->shared)
+               counter = flow_dv_counter_get_shared(dev, count->id);
+       else
+               counter = flow_dv_counter_alloc(dev, !!age);
        if (!counter || age == NULL)
                return counter;
        age_param  = flow_dv_counter_idx_get_age(dev, counter);
@@ -8678,20 +8425,16 @@ flow_dv_handle_rx_queue(struct rte_eth_dev *dev,
        struct mlx5_hrxq *hrxq;
 
        MLX5_ASSERT(rss_desc->queue_num);
-       *hrxq_idx = mlx5_hrxq_get(dev, rss_desc->key,
-                                 MLX5_RSS_HASH_KEY_LEN,
+       *hrxq_idx = mlx5_hrxq_get(dev, rss_desc->key, MLX5_RSS_HASH_KEY_LEN,
                                  dev_flow->hash_fields,
-                                 rss_desc->queue,
-                                 rss_desc->queue_num);
+                                 rss_desc->queue, rss_desc->queue_num);
        if (!*hrxq_idx) {
                *hrxq_idx = mlx5_hrxq_new
-                               (dev, rss_desc->key,
-                                MLX5_RSS_HASH_KEY_LEN,
+                               (dev, rss_desc->key, MLX5_RSS_HASH_KEY_LEN,
                                 dev_flow->hash_fields,
-                                rss_desc->queue,
-                                rss_desc->queue_num,
-                                !!(dh->layers &
-                                MLX5_FLOW_LAYER_TUNNEL));
+                                rss_desc->queue, rss_desc->queue_num,
+                                !!(dh->layers & MLX5_FLOW_LAYER_TUNNEL),
+                                false);
                if (!*hrxq_idx)
                        return NULL;
        }
@@ -10200,6 +9943,158 @@ __flow_dv_translate(struct rte_eth_dev *dev,
        return 0;
 }
 
+/**
+ * Set hash RX queue by hash fields (see enum ibv_rx_hash_fields)
+ * and tunnel.
+ *
+ * @param[in, out] action
+ *   Shred RSS action holding hash RX queue objects.
+ * @param[in] hash_fields
+ *   Defines combination of packet fields to participate in RX hash.
+ * @param[in] tunnel
+ *   Tunnel type
+ * @param[in] hrxq_idx
+ *   Hash RX queue index to set.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value.
+ */
+static int
+__flow_dv_action_rss_hrxq_set(struct mlx5_shared_action_rss *action,
+                             const uint64_t hash_fields,
+                             const int tunnel,
+                             uint32_t hrxq_idx)
+{
+       uint32_t *hrxqs = tunnel ? action->hrxq : action->hrxq_tunnel;
+
+       switch (hash_fields & ~IBV_RX_HASH_INNER) {
+       case MLX5_RSS_HASH_IPV4:
+               hrxqs[0] = hrxq_idx;
+               return 0;
+       case MLX5_RSS_HASH_IPV4_TCP:
+               hrxqs[1] = hrxq_idx;
+               return 0;
+       case MLX5_RSS_HASH_IPV4_UDP:
+               hrxqs[2] = hrxq_idx;
+               return 0;
+       case MLX5_RSS_HASH_IPV6:
+               hrxqs[3] = hrxq_idx;
+               return 0;
+       case MLX5_RSS_HASH_IPV6_TCP:
+               hrxqs[4] = hrxq_idx;
+               return 0;
+       case MLX5_RSS_HASH_IPV6_UDP:
+               hrxqs[5] = hrxq_idx;
+               return 0;
+       case MLX5_RSS_HASH_NONE:
+               hrxqs[6] = hrxq_idx;
+               return 0;
+       default:
+               return -1;
+       }
+}
+
+/**
+ * Look up for hash RX queue by hash fields (see enum ibv_rx_hash_fields)
+ * and tunnel.
+ *
+ * @param[in] action
+ *   Shred RSS action holding hash RX queue objects.
+ * @param[in] hash_fields
+ *   Defines combination of packet fields to participate in RX hash.
+ * @param[in] tunnel
+ *   Tunnel type
+ *
+ * @return
+ *   Valid hash RX queue index, otherwise 0.
+ */
+static uint32_t
+__flow_dv_action_rss_hrxq_lookup(const struct mlx5_shared_action_rss *action,
+                                const uint64_t hash_fields,
+                                const int tunnel)
+{
+       const uint32_t *hrxqs = tunnel ? action->hrxq : action->hrxq_tunnel;
+
+       switch (hash_fields & ~IBV_RX_HASH_INNER) {
+       case MLX5_RSS_HASH_IPV4:
+               return hrxqs[0];
+       case MLX5_RSS_HASH_IPV4_TCP:
+               return hrxqs[1];
+       case MLX5_RSS_HASH_IPV4_UDP:
+               return hrxqs[2];
+       case MLX5_RSS_HASH_IPV6:
+               return hrxqs[3];
+       case MLX5_RSS_HASH_IPV6_TCP:
+               return hrxqs[4];
+       case MLX5_RSS_HASH_IPV6_UDP:
+               return hrxqs[5];
+       case MLX5_RSS_HASH_NONE:
+               return hrxqs[6];
+       default:
+               return 0;
+       }
+}
+
+/**
+ * Retrieves hash RX queue suitable for the *flow*.
+ * If shared action configured for *flow* suitable hash RX queue will be
+ * retrieved from attached shared action.
+ *
+ * @param[in] flow
+ *   Shred RSS action holding hash RX queue objects.
+ * @param[in] dev_flow
+ *   Pointer to the sub flow.
+ * @param[out] hrxq
+ *   Pointer to retrieved hash RX queue object.
+ *
+ * @return
+ *   Valid hash RX queue index, otherwise 0 and rte_errno is set.
+ */
+static uint32_t
+__flow_dv_rss_get_hrxq(struct rte_eth_dev *dev, struct rte_flow *flow,
+                          struct mlx5_flow *dev_flow,
+                          struct mlx5_hrxq **hrxq)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       uint32_t hrxq_idx;
+
+       if (flow->shared_rss) {
+               hrxq_idx = __flow_dv_action_rss_hrxq_lookup
+                               (flow->shared_rss, dev_flow->hash_fields,
+                                !!(dev_flow->handle->layers &
+                                   MLX5_FLOW_LAYER_TUNNEL));
+               if (hrxq_idx) {
+                       *hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
+                                              hrxq_idx);
+                       rte_atomic32_inc(&(*hrxq)->refcnt);
+               }
+       } else {
+               struct mlx5_flow_rss_desc *rss_desc =
+                               &((struct mlx5_flow_rss_desc *)priv->rss_desc)
+                               [!!priv->flow_nested_idx];
+
+               MLX5_ASSERT(rss_desc->queue_num);
+               hrxq_idx = mlx5_hrxq_get(dev, rss_desc->key,
+                                        MLX5_RSS_HASH_KEY_LEN,
+                                        dev_flow->hash_fields,
+                                        rss_desc->queue, rss_desc->queue_num);
+               if (!hrxq_idx) {
+                       hrxq_idx = mlx5_hrxq_new(dev,
+                                                rss_desc->key,
+                                                MLX5_RSS_HASH_KEY_LEN,
+                                                dev_flow->hash_fields,
+                                                rss_desc->queue,
+                                                rss_desc->queue_num,
+                                                !!(dev_flow->handle->layers &
+                                                MLX5_FLOW_LAYER_TUNNEL),
+                                                false);
+               }
+               *hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
+                                      hrxq_idx);
+       }
+       return hrxq_idx;
+}
+
 /**
  * Apply the flow to the NIC, lock free,
  * (mutex should be acquired by caller).
@@ -10259,30 +10154,10 @@ __flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
                        }
                } else if (dh->fate_action == MLX5_FLOW_FATE_QUEUE &&
                           !dv_h->rix_sample && !dv_h->rix_dest_array) {
-                       struct mlx5_hrxq *hrxq;
-                       uint32_t hrxq_idx;
-                       struct mlx5_flow_rss_desc *rss_desc =
-                               &((struct mlx5_flow_rss_desc *)priv->rss_desc)
-                               [!!priv->flow_nested_idx];
+                       struct mlx5_hrxq *hrxq = NULL;
+                       uint32_t hrxq_idx = __flow_dv_rss_get_hrxq
+                                               (dev, flow, dev_flow, &hrxq);
 
-                       MLX5_ASSERT(rss_desc->queue_num);
-                       hrxq_idx = mlx5_hrxq_get(dev, rss_desc->key,
-                                                MLX5_RSS_HASH_KEY_LEN,
-                                                dev_flow->hash_fields,
-                                                rss_desc->queue,
-                                                rss_desc->queue_num);
-                       if (!hrxq_idx) {
-                               hrxq_idx = mlx5_hrxq_new
-                                               (dev, rss_desc->key,
-                                                MLX5_RSS_HASH_KEY_LEN,
-                                                dev_flow->hash_fields,
-                                                rss_desc->queue,
-                                                rss_desc->queue_num,
-                                                !!(dh->layers &
-                                                MLX5_FLOW_LAYER_TUNNEL));
-                       }
-                       hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
-                                             hrxq_idx);
                        if (!hrxq) {
                                rte_flow_error_set
                                        (error, rte_errno,
@@ -10835,12 +10710,16 @@ __flow_dv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
 static void
 __flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
 {
+       struct rte_flow_shared_action *shared;
        struct mlx5_flow_handle *dev_handle;
        struct mlx5_priv *priv = dev->data->dev_private;
 
        if (!flow)
                return;
        __flow_dv_remove(dev, flow);
+       shared = mlx5_flow_get_shared_rss(flow);
+       if (shared)
+               __atomic_sub_fetch(&shared->refcnt, 1, __ATOMIC_RELAXED);
        if (flow->counter) {
                flow_dv_counter_release(dev, flow->counter);
                flow->counter = 0;
@@ -10885,6 +10764,423 @@ __flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
        }
 }
 
+/**
+ * Release array of hash RX queue objects.
+ * Helper function.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in, out] hrxqs
+ *   Array of hash RX queue objects.
+ *
+ * @return
+ *   Total number of references to hash RX queue objects in *hrxqs* array
+ *   after this operation.
+ */
+static int
+__flow_dv_hrxqs_release(struct rte_eth_dev *dev,
+                       uint32_t (*hrxqs)[MLX5_RSS_HASH_FIELDS_LEN])
+{
+       size_t i;
+       int remaining = 0;
+
+       for (i = 0; i < RTE_DIM(*hrxqs); i++) {
+               int ret = mlx5_hrxq_release(dev, (*hrxqs)[i]);
+
+               if (!ret)
+                       (*hrxqs)[i] = 0;
+               remaining += ret;
+       }
+       return remaining;
+}
+
+/**
+ * Release all hash RX queue objects representing shared RSS action.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in, out] action
+ *   Shared RSS action to remove hash RX queue objects from.
+ *
+ * @return
+ *   Total number of references to hash RX queue objects stored in *action*
+ *   after this operation.
+ *   Expected to be 0 if no external references held.
+ */
+static int
+__flow_dv_action_rss_hrxqs_release(struct rte_eth_dev *dev,
+                                struct mlx5_shared_action_rss *action)
+{
+       return __flow_dv_hrxqs_release(dev, &action->hrxq) +
+               __flow_dv_hrxqs_release(dev, &action->hrxq_tunnel);
+}
+
+/**
+ * Setup shared RSS action.
+ * Prepare set of hash RX queue objects sufficient to handle all valid
+ * hash_fields combinations (see enum ibv_rx_hash_fields).
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in, out] action
+ *   Partially initialized shared RSS action.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value.
+ */
+static int
+__flow_dv_action_rss_setup(struct rte_eth_dev *dev,
+                       struct mlx5_shared_action_rss *action,
+                       struct rte_flow_error *error)
+{
+       size_t i;
+       int err;
+
+       for (i = 0; i < MLX5_RSS_HASH_FIELDS_LEN; i++) {
+               uint32_t hrxq_idx;
+               uint64_t hash_fields = mlx5_rss_hash_fields[i];
+               int tunnel;
+
+               for (tunnel = 0; tunnel < 2; tunnel++) {
+                       hrxq_idx = mlx5_hrxq_new(dev, action->origin.key,
+                                       MLX5_RSS_HASH_KEY_LEN,
+                                       hash_fields,
+                                       action->origin.queue,
+                                       action->origin.queue_num,
+                                       tunnel, true);
+                       if (!hrxq_idx) {
+                               rte_flow_error_set
+                                       (error, rte_errno,
+                                        RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                                        "cannot get hash queue");
+                               goto error_hrxq_new;
+                       }
+                       err = __flow_dv_action_rss_hrxq_set
+                               (action, hash_fields, tunnel, hrxq_idx);
+                       MLX5_ASSERT(!err);
+               }
+       }
+       return 0;
+error_hrxq_new:
+       err = rte_errno;
+       __flow_dv_action_rss_hrxqs_release(dev, action);
+       rte_errno = err;
+       return -rte_errno;
+}
+
+/**
+ * Create shared RSS action.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] conf
+ *   Shared action configuration.
+ * @param[in] rss
+ *   RSS action specification used to create shared action.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   A valid shared action handle in case of success, NULL otherwise and
+ *   rte_errno is set.
+ */
+static struct rte_flow_shared_action *
+__flow_dv_action_rss_create(struct rte_eth_dev *dev,
+                           const struct rte_flow_shared_action_conf *conf,
+                           const struct rte_flow_action_rss *rss,
+                           struct rte_flow_error *error)
+{
+       struct rte_flow_shared_action *shared_action = NULL;
+       void *queue = NULL;
+       struct mlx5_shared_action_rss *shared_rss;
+       struct rte_flow_action_rss *origin;
+       const uint8_t *rss_key;
+       uint32_t queue_size = rss->queue_num * sizeof(uint16_t);
+
+       RTE_SET_USED(conf);
+       queue = mlx5_malloc(0, RTE_ALIGN_CEIL(queue_size, sizeof(void *)),
+                           0, SOCKET_ID_ANY);
+       shared_action = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*shared_action), 0,
+                                   SOCKET_ID_ANY);
+       if (!shared_action || !queue) {
+               rte_flow_error_set(error, ENOMEM,
+                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                                  "cannot allocate resource memory");
+               goto error_rss_init;
+       }
+       shared_rss = &shared_action->rss;
+       shared_rss->queue = queue;
+       origin = &shared_rss->origin;
+       origin->func = rss->func;
+       origin->level = rss->level;
+       /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
+       origin->types = !rss->types ? ETH_RSS_IP : rss->types;
+       /* NULL RSS key indicates default RSS key. */
+       rss_key = !rss->key ? rss_hash_default_key : rss->key;
+       memcpy(shared_rss->key, rss_key, MLX5_RSS_HASH_KEY_LEN);
+       origin->key = &shared_rss->key[0];
+       origin->key_len = MLX5_RSS_HASH_KEY_LEN;
+       memcpy(shared_rss->queue, rss->queue, queue_size);
+       origin->queue = shared_rss->queue;
+       origin->queue_num = rss->queue_num;
+       if (__flow_dv_action_rss_setup(dev, shared_rss, error))
+               goto error_rss_init;
+       shared_action->type = MLX5_RTE_FLOW_ACTION_TYPE_SHARED_RSS;
+       return shared_action;
+error_rss_init:
+       mlx5_free(shared_action);
+       mlx5_free(queue);
+       return NULL;
+}
+
+/**
+ * Destroy the shared RSS action.
+ * Release related hash RX queue objects.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] shared_rss
+ *   The shared RSS action object to be removed.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value.
+ */
+static int
+__flow_dv_action_rss_release(struct rte_eth_dev *dev,
+                        struct mlx5_shared_action_rss *shared_rss,
+                        struct rte_flow_error *error)
+{
+       struct rte_flow_shared_action *shared_action = NULL;
+       uint32_t old_refcnt = 1;
+       int remaining = __flow_dv_action_rss_hrxqs_release(dev, shared_rss);
+
+       if (remaining) {
+               return rte_flow_error_set(error, ETOOMANYREFS,
+                                         RTE_FLOW_ERROR_TYPE_ACTION,
+                                         NULL,
+                                         "shared rss hrxq has references");
+       }
+       shared_action = container_of(shared_rss,
+                                    struct rte_flow_shared_action, rss);
+       if (!__atomic_compare_exchange_n(&shared_action->refcnt, &old_refcnt,
+                                        0, 0,
+                                        __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
+               return rte_flow_error_set(error, ETOOMANYREFS,
+                                         RTE_FLOW_ERROR_TYPE_ACTION,
+                                         NULL,
+                                         "shared rss has references");
+       }
+       rte_free(shared_rss->queue);
+       return 0;
+}
+
+/**
+ * Create shared action, lock free,
+ * (mutex should be acquired by caller).
+ * Dispatcher for action type specific call.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] conf
+ *   Shared action configuration.
+ * @param[in] action
+ *   Action specification used to create shared action.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   A valid shared action handle in case of success, NULL otherwise and
+ *   rte_errno is set.
+ */
+static struct rte_flow_shared_action *
+__flow_dv_action_create(struct rte_eth_dev *dev,
+                       const struct rte_flow_shared_action_conf *conf,
+                       const struct rte_flow_action *action,
+                       struct rte_flow_error *error)
+{
+       struct rte_flow_shared_action *shared_action = NULL;
+       struct mlx5_priv *priv = dev->data->dev_private;
+
+       switch (action->type) {
+       case RTE_FLOW_ACTION_TYPE_RSS:
+               shared_action = __flow_dv_action_rss_create(dev, conf,
+                                                           action->conf,
+                                                           error);
+               break;
+       default:
+               rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+                                  NULL, "action type not supported");
+               break;
+       }
+       if (shared_action) {
+               __atomic_add_fetch(&shared_action->refcnt, 1,
+                                  __ATOMIC_RELAXED);
+               LIST_INSERT_HEAD(&priv->shared_actions, shared_action, next);
+       }
+       return shared_action;
+}
+
+/**
+ * Destroy the shared action.
+ * Release action related resources on the NIC and the memory.
+ * Lock free, (mutex should be acquired by caller).
+ * Dispatcher for action type specific call.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] action
+ *   The shared action object to be removed.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value.
+ */
+static int
+__flow_dv_action_destroy(struct rte_eth_dev *dev,
+                        struct rte_flow_shared_action *action,
+                        struct rte_flow_error *error)
+{
+       int ret;
+
+       switch (action->type) {
+       case MLX5_RTE_FLOW_ACTION_TYPE_SHARED_RSS:
+               ret = __flow_dv_action_rss_release(dev, &action->rss, error);
+               break;
+       default:
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION,
+                                         NULL,
+                                         "action type not supported");
+       }
+       if (ret)
+               return ret;
+       LIST_REMOVE(action, next);
+       rte_free(action);
+       return 0;
+}
+
+/**
+ * Updates in place shared RSS action configuration.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] shared_rss
+ *   The shared RSS action object to be updated.
+ * @param[in] action_conf
+ *   RSS action specification used to modify *shared_rss*.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value.
+ * @note: currently only support update of RSS queues.
+ */
+static int
+__flow_dv_action_rss_update(struct rte_eth_dev *dev,
+                           struct mlx5_shared_action_rss *shared_rss,
+                           const struct rte_flow_action_rss *action_conf,
+                           struct rte_flow_error *error)
+{
+       size_t i;
+       int ret;
+       void *queue = NULL;
+       const uint8_t *rss_key;
+       uint32_t rss_key_len;
+       uint32_t queue_size = action_conf->queue_num * sizeof(uint16_t);
+
+       queue = mlx5_malloc(MLX5_MEM_ZERO,
+                           RTE_ALIGN_CEIL(queue_size, sizeof(void *)),
+                           0, SOCKET_ID_ANY);
+       if (!queue)
+               return rte_flow_error_set(error, ENOMEM,
+                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                         NULL,
+                                         "cannot allocate resource memory");
+       if (action_conf->key) {
+               rss_key = action_conf->key;
+               rss_key_len = action_conf->key_len;
+       } else {
+               rss_key = rss_hash_default_key;
+               rss_key_len = MLX5_RSS_HASH_KEY_LEN;
+       }
+       for (i = 0; i < MLX5_RSS_HASH_FIELDS_LEN; i++) {
+               uint32_t hrxq_idx;
+               uint64_t hash_fields = mlx5_rss_hash_fields[i];
+               int tunnel;
+
+               for (tunnel = 0; tunnel < 2; tunnel++) {
+                       hrxq_idx = __flow_dv_action_rss_hrxq_lookup
+                                       (shared_rss, hash_fields, tunnel);
+                       MLX5_ASSERT(hrxq_idx);
+                       ret = mlx5_hrxq_modify
+                               (dev, hrxq_idx,
+                                rss_key, rss_key_len,
+                                hash_fields,
+                                action_conf->queue, action_conf->queue_num);
+                       if (ret) {
+                               mlx5_free(queue);
+                               return rte_flow_error_set
+                                       (error, rte_errno,
+                                        RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+                                        "cannot update hash queue");
+                       }
+               }
+       }
+       mlx5_free(shared_rss->queue);
+       shared_rss->queue = queue;
+       memcpy(shared_rss->queue, action_conf->queue, queue_size);
+       shared_rss->origin.queue = shared_rss->queue;
+       shared_rss->origin.queue_num = action_conf->queue_num;
+       return 0;
+}
+
+/**
+ * Updates in place shared action configuration, lock free,
+ * (mutex should be acquired by caller).
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] action
+ *   The shared action object to be updated.
+ * @param[in] action_conf
+ *   Action specification used to modify *action*.
+ *   *action_conf* should be of type correlating with type of the *action*,
+ *   otherwise considered as invalid.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value.
+ */
+static int
+__flow_dv_action_update(struct rte_eth_dev *dev,
+                       struct rte_flow_shared_action *action,
+                       const void *action_conf,
+                       struct rte_flow_error *error)
+{
+       switch (action->type) {
+       case MLX5_RTE_FLOW_ACTION_TYPE_SHARED_RSS:
+               return __flow_dv_action_rss_update(dev, &action->rss,
+                                                  action_conf, error);
+       default:
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION,
+                                         NULL,
+                                         "action type not supported");
+       }
+}
 /**
  * Query a dv flow  rule for its statistics via devx.
  *
@@ -11434,6 +11730,103 @@ error:
        return -1;
 }
 
+/**
+ * Validate the batch counter support in root table.
+ *
+ * Create a simple flow with invalid counter and drop action on root table to
+ * validate if batch counter with offset on root table is supported or not.
+ *
+ * @param[in] dev
+ *   Pointer to rte_eth_dev structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_dv_discover_counter_offset_support(struct rte_eth_dev *dev)
+{
+       struct mlx5_priv *priv = dev->data->dev_private;
+       struct mlx5_dev_ctx_shared *sh = priv->sh;
+       struct mlx5_flow_dv_match_params mask = {
+               .size = sizeof(mask.buf),
+       };
+       struct mlx5_flow_dv_match_params value = {
+               .size = sizeof(value.buf),
+       };
+       struct mlx5dv_flow_matcher_attr dv_attr = {
+               .type = IBV_FLOW_ATTR_NORMAL,
+               .priority = 0,
+               .match_criteria_enable = 0,
+               .match_mask = (void *)&mask,
+       };
+       void *actions[2] = { 0 };
+       struct mlx5_flow_tbl_resource *tbl = NULL, *dest_tbl = NULL;
+       struct mlx5_devx_obj *dcs = NULL;
+       void *matcher = NULL;
+       void *flow = NULL;
+       int i, ret = -1;
+
+       tbl = flow_dv_tbl_resource_get(dev, 0, 0, 0, NULL);
+       if (!tbl)
+               goto err;
+       dest_tbl = flow_dv_tbl_resource_get(dev, 1, 0, 0, NULL);
+       if (!dest_tbl)
+               goto err;
+       dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0x4);
+       if (!dcs)
+               goto err;
+       ret = mlx5_flow_os_create_flow_action_count(dcs->obj, UINT16_MAX,
+                                                   &actions[0]);
+       if (ret)
+               goto err;
+       ret = mlx5_flow_os_create_flow_action_dest_flow_tbl
+                               (dest_tbl->obj, &actions[1]);
+       if (ret)
+               goto err;
+       dv_attr.match_criteria_enable = flow_dv_matcher_enable(mask.buf);
+       ret = mlx5_flow_os_create_flow_matcher(sh->ctx, &dv_attr, tbl->obj,
+                                              &matcher);
+       if (ret)
+               goto err;
+       ret = mlx5_flow_os_create_flow(matcher, (void *)&value, 2,
+                                      actions, &flow);
+err:
+       /*
+        * If batch counter with offset is not supported, the driver will not
+        * validate the invalid offset value, flow create should success.
+        * In this case, it means batch counter is not supported in root table.
+        *
+        * Otherwise, if flow create is failed, counter offset is supported.
+        */
+       if (flow) {
+               DRV_LOG(INFO, "Batch counter is not supported in root "
+                             "table. Switch to fallback mode.");
+               rte_errno = ENOTSUP;
+               ret = -rte_errno;
+               claim_zero(mlx5_flow_os_destroy_flow(flow));
+       } else {
+               /* Check matcher to make sure validate fail at flow create. */
+               if (!matcher || (matcher && errno != EINVAL))
+                       DRV_LOG(ERR, "Unexpected error in counter offset "
+                                    "support detection");
+               ret = 0;
+       }
+       for (i = 0; i < 2; i++) {
+               if (actions[i])
+                       claim_zero(mlx5_flow_os_destroy_flow_action
+                                  (actions[i]));
+       }
+       if (matcher)
+               claim_zero(mlx5_flow_os_destroy_flow_matcher(matcher));
+       if (tbl)
+               flow_dv_tbl_resource_release(dev, tbl);
+       if (dest_tbl)
+               flow_dv_tbl_resource_release(dev, dest_tbl);
+       if (dcs)
+               claim_zero(mlx5_devx_cmd_destroy(dcs));
+       return ret;
+}
+
 /**
  * Query a devx counter.
  *
@@ -11596,7 +11989,7 @@ flow_dv_counter_allocate(struct rte_eth_dev *dev)
        uint32_t cnt;
 
        flow_dv_shared_lock(dev);
-       cnt = flow_dv_counter_alloc(dev, 0, 0, 1, 0);
+       cnt = flow_dv_counter_alloc(dev, 0);
        flow_dv_shared_unlock(dev);
        return cnt;
 }
@@ -11612,6 +12005,92 @@ flow_dv_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
        flow_dv_shared_unlock(dev);
 }
 
+/**
+ * Validate shared action.
+ * Dispatcher for action type specific validation.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] conf
+ *   Shared action configuration.
+ * @param[in] action
+ *   The shared action object to validate.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   0 on success, otherwise negative errno value.
+ */
+static int
+flow_dv_action_validate(struct rte_eth_dev *dev,
+                       const struct rte_flow_shared_action_conf *conf,
+                       const struct rte_flow_action *action,
+                       struct rte_flow_error *error)
+{
+       RTE_SET_USED(conf);
+       switch (action->type) {
+       case RTE_FLOW_ACTION_TYPE_RSS:
+               return mlx5_validate_action_rss(dev, action, error);
+       default:
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION,
+                                         NULL,
+                                         "action type not supported");
+       }
+}
+
+/*
+ * Mutex-protected thunk to lock-free  __flow_dv_action_create().
+ */
+static struct rte_flow_shared_action *
+flow_dv_action_create(struct rte_eth_dev *dev,
+                     const struct rte_flow_shared_action_conf *conf,
+                     const struct rte_flow_action *action,
+                     struct rte_flow_error *error)
+{
+       struct rte_flow_shared_action *shared_action = NULL;
+
+       flow_dv_shared_lock(dev);
+       shared_action = __flow_dv_action_create(dev, conf, action, error);
+       flow_dv_shared_unlock(dev);
+       return shared_action;
+}
+
+/*
+ * Mutex-protected thunk to lock-free  __flow_dv_action_destroy().
+ */
+static int
+flow_dv_action_destroy(struct rte_eth_dev *dev,
+                      struct rte_flow_shared_action *action,
+                      struct rte_flow_error *error)
+{
+       int ret;
+
+       flow_dv_shared_lock(dev);
+       ret = __flow_dv_action_destroy(dev, action, error);
+       flow_dv_shared_unlock(dev);
+       return ret;
+}
+
+/*
+ * Mutex-protected thunk to lock-free  __flow_dv_action_update().
+ */
+static int
+flow_dv_action_update(struct rte_eth_dev *dev,
+                     struct rte_flow_shared_action *action,
+                     const void *action_conf,
+                     struct rte_flow_error *error)
+{
+       int ret;
+
+       flow_dv_shared_lock(dev);
+       ret = __flow_dv_action_update(dev, action, action_conf,
+                                     error);
+       flow_dv_shared_unlock(dev);
+       return ret;
+}
+
 const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = {
        .validate = flow_dv_validate,
        .prepare = flow_dv_prepare,
@@ -11628,6 +12107,10 @@ const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = {
        .counter_free = flow_dv_counter_free,
        .counter_query = flow_dv_counter_query,
        .get_aged_flows = flow_get_aged_flows,
+       .action_validate = flow_dv_action_validate,
+       .action_create = flow_dv_action_create,
+       .action_destroy = flow_dv_action_destroy,
+       .action_update = flow_dv_action_update,
 };
 
 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */