+ * 0 on success otherwise -errno and errno is set.
+ */
+static int
+flow_dv_modify_hdr_resource_register
+ (struct rte_eth_dev *dev,
+ struct mlx5_flow_dv_modify_hdr_resource *resource,
+ struct mlx5_flow *dev_flow,
+ struct rte_flow_error *error)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_ibv_shared *sh = priv->sh;
+ struct mlx5_flow_dv_modify_hdr_resource *cache_resource;
+ struct mlx5dv_dr_domain *ns;
+
+ if (resource->ft_type == MLX5DV_FLOW_TABLE_TYPE_FDB)
+ ns = sh->fdb_domain;
+ else if (resource->ft_type == MLX5DV_FLOW_TABLE_TYPE_NIC_TX)
+ ns = sh->tx_domain;
+ else
+ ns = sh->rx_domain;
+ resource->flags =
+ dev_flow->flow->group ? 0 : MLX5DV_DR_ACTION_FLAGS_ROOT_LEVEL;
+ /* Lookup a matching resource from cache. */
+ LIST_FOREACH(cache_resource, &sh->modify_cmds, next) {
+ if (resource->ft_type == cache_resource->ft_type &&
+ resource->actions_num == cache_resource->actions_num &&
+ resource->flags == cache_resource->flags &&
+ !memcmp((const void *)resource->actions,
+ (const void *)cache_resource->actions,
+ (resource->actions_num *
+ sizeof(resource->actions[0])))) {
+ DRV_LOG(DEBUG, "modify-header resource %p: refcnt %d++",
+ (void *)cache_resource,
+ rte_atomic32_read(&cache_resource->refcnt));
+ rte_atomic32_inc(&cache_resource->refcnt);
+ dev_flow->dv.modify_hdr = cache_resource;
+ return 0;
+ }
+ }
+ /* Register new modify-header resource. */
+ cache_resource = rte_calloc(__func__, 1, sizeof(*cache_resource), 0);
+ if (!cache_resource)
+ return rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "cannot allocate resource memory");
+ *cache_resource = *resource;
+ cache_resource->verbs_action =
+ mlx5_glue->dv_create_flow_action_modify_header
+ (sh->ctx, cache_resource->ft_type,
+ ns, cache_resource->flags,
+ cache_resource->actions_num *
+ sizeof(cache_resource->actions[0]),
+ (uint64_t *)cache_resource->actions);
+ if (!cache_resource->verbs_action) {
+ rte_free(cache_resource);
+ return rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, "cannot create action");
+ }
+ rte_atomic32_init(&cache_resource->refcnt);
+ rte_atomic32_inc(&cache_resource->refcnt);
+ LIST_INSERT_HEAD(&sh->modify_cmds, cache_resource, next);
+ dev_flow->dv.modify_hdr = cache_resource;
+ DRV_LOG(DEBUG, "new modify-header resource %p: refcnt %d++",
+ (void *)cache_resource,
+ rte_atomic32_read(&cache_resource->refcnt));
+ return 0;
+}
+
+#define MLX5_CNT_CONTAINER_RESIZE 64
+
+/**
+ * Get or create a flow counter.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] shared
+ * Indicate if this counter is shared with other flows.
+ * @param[in] id
+ * Counter identifier.
+ *
+ * @return
+ * pointer to flow counter on success, NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_flow_counter *
+flow_dv_counter_alloc_fallback(struct rte_eth_dev *dev, uint32_t shared,
+ uint32_t id)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_flow_counter *cnt = NULL;
+ struct mlx5_devx_obj *dcs = NULL;
+
+ if (!priv->config.devx) {
+ rte_errno = ENOTSUP;
+ return NULL;
+ }
+ if (shared) {
+ TAILQ_FOREACH(cnt, &priv->sh->cmng.flow_counters, next) {
+ if (cnt->shared && cnt->id == id) {
+ cnt->ref_cnt++;
+ return cnt;
+ }
+ }
+ }
+ dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0);
+ if (!dcs)
+ return NULL;
+ cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
+ if (!cnt) {
+ claim_zero(mlx5_devx_cmd_destroy(cnt->dcs));
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+ struct mlx5_flow_counter tmpl = {
+ .shared = shared,
+ .ref_cnt = 1,
+ .id = id,
+ .dcs = dcs,
+ };
+ tmpl.action = mlx5_glue->dv_create_flow_action_counter(dcs->obj, 0);
+ if (!tmpl.action) {
+ claim_zero(mlx5_devx_cmd_destroy(cnt->dcs));
+ rte_errno = errno;
+ rte_free(cnt);
+ return NULL;
+ }
+ *cnt = tmpl;
+ TAILQ_INSERT_HEAD(&priv->sh->cmng.flow_counters, cnt, next);
+ return cnt;
+}
+
+/**
+ * Release a flow counter.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] counter
+ * Pointer to the counter handler.
+ */
+static void
+flow_dv_counter_release_fallback(struct rte_eth_dev *dev,
+ struct mlx5_flow_counter *counter)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ if (!counter)
+ return;
+ if (--counter->ref_cnt == 0) {
+ TAILQ_REMOVE(&priv->sh->cmng.flow_counters, counter, next);
+ claim_zero(mlx5_devx_cmd_destroy(counter->dcs));
+ rte_free(counter);
+ }
+}
+
+/**
+ * Query a devx flow counter.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] cnt
+ * Pointer to the flow counter.
+ * @param[out] pkts
+ * The statistics value of packets.
+ * @param[out] bytes
+ * The statistics value of bytes.
+ *
+ * @return
+ * 0 on success, otherwise a negative errno value and rte_errno is set.
+ */
+static inline int
+_flow_dv_query_count_fallback(struct rte_eth_dev *dev __rte_unused,
+ struct mlx5_flow_counter *cnt, uint64_t *pkts,
+ uint64_t *bytes)
+{
+ return mlx5_devx_cmd_flow_counter_query(cnt->dcs, 0, 0, pkts, bytes,
+ 0, NULL, NULL, 0);
+}
+
+/**
+ * Get a pool by a counter.
+ *
+ * @param[in] cnt
+ * Pointer to the counter.
+ *
+ * @return
+ * The counter pool.
+ */
+static struct mlx5_flow_counter_pool *
+flow_dv_counter_pool_get(struct mlx5_flow_counter *cnt)
+{
+ if (!cnt->batch) {
+ cnt -= cnt->dcs->id % MLX5_COUNTERS_PER_POOL;
+ return (struct mlx5_flow_counter_pool *)cnt - 1;
+ }
+ return cnt->pool;
+}
+
+/**
+ * Get a pool by devx counter ID.
+ *
+ * @param[in] cont
+ * Pointer to the counter container.
+ * @param[in] id
+ * The counter devx ID.
+ *
+ * @return
+ * The counter pool pointer if exists, NULL otherwise,
+ */
+static struct mlx5_flow_counter_pool *
+flow_dv_find_pool_by_id(struct mlx5_pools_container *cont, int id)
+{
+ struct mlx5_flow_counter_pool *pool;
+
+ TAILQ_FOREACH(pool, &cont->pool_list, next) {
+ int base = (pool->min_dcs->id / MLX5_COUNTERS_PER_POOL) *
+ MLX5_COUNTERS_PER_POOL;
+
+ if (id >= base && id < base + MLX5_COUNTERS_PER_POOL)
+ return pool;
+ };
+ return NULL;
+}
+
+/**
+ * Allocate a new memory for the counter values wrapped by all the needed
+ * management.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] raws_n
+ * The raw memory areas - each one for MLX5_COUNTERS_PER_POOL counters.
+ *
+ * @return
+ * The new memory management pointer on success, otherwise NULL and rte_errno
+ * is set.
+ */
+static struct mlx5_counter_stats_mem_mng *
+flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n)
+{
+ struct mlx5_ibv_shared *sh = ((struct mlx5_priv *)
+ (dev->data->dev_private))->sh;
+ struct mlx5_devx_mkey_attr mkey_attr;
+ struct mlx5_counter_stats_mem_mng *mem_mng;
+ volatile struct flow_counter_stats *raw_data;
+ int size = (sizeof(struct flow_counter_stats) *
+ MLX5_COUNTERS_PER_POOL +
+ sizeof(struct mlx5_counter_stats_raw)) * raws_n +
+ sizeof(struct mlx5_counter_stats_mem_mng);
+ uint8_t *mem = rte_calloc(__func__, 1, size, sysconf(_SC_PAGESIZE));
+ int i;
+
+ if (!mem) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+ mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
+ size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
+ mem_mng->umem = mlx5_glue->devx_umem_reg(sh->ctx, mem, size,
+ IBV_ACCESS_LOCAL_WRITE);
+ if (!mem_mng->umem) {
+ rte_errno = errno;
+ rte_free(mem);
+ return NULL;
+ }
+ mkey_attr.addr = (uintptr_t)mem;
+ mkey_attr.size = size;
+ mkey_attr.umem_id = mem_mng->umem->umem_id;
+ mkey_attr.pd = sh->pdn;
+ mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
+ if (!mem_mng->dm) {
+ mlx5_glue->devx_umem_dereg(mem_mng->umem);
+ rte_errno = errno;
+ rte_free(mem);
+ return NULL;
+ }
+ mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
+ raw_data = (volatile struct flow_counter_stats *)mem;
+ for (i = 0; i < raws_n; ++i) {
+ mem_mng->raws[i].mem_mng = mem_mng;
+ mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
+ }
+ LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
+ return mem_mng;
+}
+
+/**
+ * Resize a counter container.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] batch
+ * Whether the pool is for counter that was allocated by batch command.
+ *
+ * @return
+ * The new container pointer on success, otherwise NULL and rte_errno is set.
+ */
+static struct mlx5_pools_container *
+flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_pools_container *cont =
+ MLX5_CNT_CONTAINER(priv->sh, batch, 0);
+ struct mlx5_pools_container *new_cont =
+ MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0);
+ struct mlx5_counter_stats_mem_mng *mem_mng;
+ uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE;
+ uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize;
+ int i;
+
+ if (cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1)) {
+ /* The last resize still hasn't detected by the host thread. */
+ rte_errno = EAGAIN;
+ return NULL;
+ }
+ new_cont->pools = rte_calloc(__func__, 1, mem_size, 0);
+ if (!new_cont->pools) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+ if (cont->n)
+ memcpy(new_cont->pools, cont->pools, cont->n *
+ sizeof(struct mlx5_flow_counter_pool *));
+ mem_mng = flow_dv_create_counter_stat_mem_mng(dev,
+ MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES);
+ if (!mem_mng) {
+ rte_free(new_cont->pools);
+ return NULL;
+ }
+ for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
+ LIST_INSERT_HEAD(&priv->sh->cmng.free_stat_raws,
+ mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE +
+ i, next);
+ new_cont->n = resize;
+ rte_atomic16_set(&new_cont->n_valid, rte_atomic16_read(&cont->n_valid));
+ TAILQ_INIT(&new_cont->pool_list);
+ TAILQ_CONCAT(&new_cont->pool_list, &cont->pool_list, next);
+ new_cont->init_mem_mng = mem_mng;
+ rte_cio_wmb();
+ /* Flip the master container. */
+ priv->sh->cmng.mhi[batch] ^= (uint8_t)1;
+ return new_cont;
+}
+
+/**
+ * Query a devx flow counter.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] cnt
+ * Pointer to the flow counter.
+ * @param[out] pkts
+ * The statistics value of packets.
+ * @param[out] bytes
+ * The statistics value of bytes.
+ *
+ * @return
+ * 0 on success, otherwise a negative errno value and rte_errno is set.
+ */
+static inline int
+_flow_dv_query_count(struct rte_eth_dev *dev,
+ struct mlx5_flow_counter *cnt, uint64_t *pkts,
+ uint64_t *bytes)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_flow_counter_pool *pool =
+ flow_dv_counter_pool_get(cnt);
+ int offset = cnt - &pool->counters_raw[0];
+
+ if (priv->counter_fallback)
+ return _flow_dv_query_count_fallback(dev, cnt, pkts, bytes);
+
+ rte_spinlock_lock(&pool->sl);
+ /*
+ * The single counters allocation may allocate smaller ID than the
+ * current allocated in parallel to the host reading.
+ * In this case the new counter values must be reported as 0.
+ */
+ if (unlikely(!cnt->batch && cnt->dcs->id < pool->raw->min_dcs_id)) {
+ *pkts = 0;
+ *bytes = 0;
+ } else {
+ *pkts = rte_be_to_cpu_64(pool->raw->data[offset].hits);
+ *bytes = rte_be_to_cpu_64(pool->raw->data[offset].bytes);
+ }
+ rte_spinlock_unlock(&pool->sl);
+ return 0;
+}
+
+/**
+ * Create and initialize a new counter pool.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[out] dcs
+ * The devX counter handle.
+ * @param[in] batch
+ * Whether the pool is for counter that was allocated by batch command.
+ *
+ * @return
+ * A new pool pointer on success, NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_flow_counter_pool *
+flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
+ uint32_t batch)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_flow_counter_pool *pool;
+ struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
+ 0);
+ int16_t n_valid = rte_atomic16_read(&cont->n_valid);
+ uint32_t size;
+
+ if (cont->n == n_valid) {
+ cont = flow_dv_container_resize(dev, batch);
+ if (!cont)
+ return NULL;
+ }
+ size = sizeof(*pool) + MLX5_COUNTERS_PER_POOL *
+ sizeof(struct mlx5_flow_counter);
+ pool = rte_calloc(__func__, 1, size, 0);
+ if (!pool) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+ pool->min_dcs = dcs;
+ pool->raw = cont->init_mem_mng->raws + n_valid %
+ MLX5_CNT_CONTAINER_RESIZE;
+ pool->raw_hw = NULL;
+ rte_spinlock_init(&pool->sl);
+ /*
+ * The generation of the new allocated counters in this pool is 0, 2 in
+ * the pool generation makes all the counters valid for allocation.
+ */
+ rte_atomic64_set(&pool->query_gen, 0x2);
+ TAILQ_INIT(&pool->counters);
+ TAILQ_INSERT_TAIL(&cont->pool_list, pool, next);
+ cont->pools[n_valid] = pool;
+ /* Pool initialization must be updated before host thread access. */
+ rte_cio_wmb();
+ rte_atomic16_add(&cont->n_valid, 1);
+ return pool;
+}
+
+/**
+ * Prepare a new counter and/or a new counter pool.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[out] cnt_free
+ * Where to put the pointer of a new counter.
+ * @param[in] batch
+ * Whether the pool is for counter that was allocated by batch command.
+ *
+ * @return
+ * The free counter pool pointer and @p cnt_free is set on success,
+ * NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_flow_counter_pool *
+flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
+ struct mlx5_flow_counter **cnt_free,
+ uint32_t batch)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_flow_counter_pool *pool;
+ struct mlx5_devx_obj *dcs = NULL;
+ struct mlx5_flow_counter *cnt;
+ uint32_t i;
+
+ if (!batch) {
+ /* bulk_bitmap must be 0 for single counter allocation. */
+ dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0);
+ if (!dcs)
+ return NULL;
+ pool = flow_dv_find_pool_by_id
+ (MLX5_CNT_CONTAINER(priv->sh, batch, 0), dcs->id);
+ if (!pool) {
+ pool = flow_dv_pool_create(dev, dcs, batch);
+ if (!pool) {
+ mlx5_devx_cmd_destroy(dcs);
+ return NULL;
+ }
+ } else if (dcs->id < pool->min_dcs->id) {
+ rte_atomic64_set(&pool->a64_dcs,
+ (int64_t)(uintptr_t)dcs);
+ }
+ cnt = &pool->counters_raw[dcs->id % MLX5_COUNTERS_PER_POOL];
+ TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
+ cnt->dcs = dcs;
+ *cnt_free = cnt;
+ return pool;
+ }
+ /* bulk_bitmap is in 128 counters units. */
+ if (priv->config.hca_attr.flow_counter_bulk_alloc_bitmap & 0x4)
+ dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0x4);
+ if (!dcs) {
+ rte_errno = ENODATA;
+ return NULL;
+ }
+ pool = flow_dv_pool_create(dev, dcs, batch);
+ if (!pool) {
+ mlx5_devx_cmd_destroy(dcs);
+ return NULL;
+ }
+ for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
+ cnt = &pool->counters_raw[i];
+ cnt->pool = pool;
+ TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
+ }
+ *cnt_free = &pool->counters_raw[0];
+ return pool;
+}
+
+/**
+ * Search for existed shared counter.
+ *
+ * @param[in] cont
+ * Pointer to the relevant counter pool container.
+ * @param[in] id
+ * The shared counter ID to search.
+ *
+ * @return
+ * NULL if not existed, otherwise pointer to the shared counter.