uint32_t attr;
};
+#define MLX5_FLOW_IPV4_LRO (1 << 0)
+#define MLX5_FLOW_IPV6_LRO (1 << 1)
+
/**
* Initialize flow attributes structure according to flow items' types.
*
return 0;
}
-#define MLX5_CNT_CONTAINER_SIZE 64
-#define MLX5_CNT_CONTAINER(priv, batch) (&(priv)->sh->cmng.ccont[batch])
+#define MLX5_CNT_CONTAINER_RESIZE 64
+
+/**
+ * Get or create a flow counter.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] shared
+ * Indicate if this counter is shared with other flows.
+ * @param[in] id
+ * Counter identifier.
+ *
+ * @return
+ * pointer to flow counter on success, NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_flow_counter *
+flow_dv_counter_alloc_fallback(struct rte_eth_dev *dev, uint32_t shared,
+ uint32_t id)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_flow_counter *cnt = NULL;
+ struct mlx5_devx_obj *dcs = NULL;
+
+ if (!priv->config.devx) {
+ rte_errno = ENOTSUP;
+ return NULL;
+ }
+ if (shared) {
+ TAILQ_FOREACH(cnt, &priv->sh->cmng.flow_counters, next) {
+ if (cnt->shared && cnt->id == id) {
+ cnt->ref_cnt++;
+ return cnt;
+ }
+ }
+ }
+ dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0);
+ if (!dcs)
+ return NULL;
+ cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
+ if (!cnt) {
+ claim_zero(mlx5_devx_cmd_destroy(cnt->dcs));
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+ struct mlx5_flow_counter tmpl = {
+ .shared = shared,
+ .ref_cnt = 1,
+ .id = id,
+ .dcs = dcs,
+ };
+ tmpl.action = mlx5_glue->dv_create_flow_action_counter(dcs->obj, 0);
+ if (!tmpl.action) {
+ claim_zero(mlx5_devx_cmd_destroy(cnt->dcs));
+ rte_errno = errno;
+ rte_free(cnt);
+ return NULL;
+ }
+ *cnt = tmpl;
+ TAILQ_INSERT_HEAD(&priv->sh->cmng.flow_counters, cnt, next);
+ return cnt;
+}
+
+/**
+ * Release a flow counter.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] counter
+ * Pointer to the counter handler.
+ */
+static void
+flow_dv_counter_release_fallback(struct rte_eth_dev *dev,
+ struct mlx5_flow_counter *counter)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ if (!counter)
+ return;
+ if (--counter->ref_cnt == 0) {
+ TAILQ_REMOVE(&priv->sh->cmng.flow_counters, counter, next);
+ claim_zero(mlx5_devx_cmd_destroy(counter->dcs));
+ rte_free(counter);
+ }
+}
+
+/**
+ * Query a devx flow counter.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] cnt
+ * Pointer to the flow counter.
+ * @param[out] pkts
+ * The statistics value of packets.
+ * @param[out] bytes
+ * The statistics value of bytes.
+ *
+ * @return
+ * 0 on success, otherwise a negative errno value and rte_errno is set.
+ */
+static inline int
+_flow_dv_query_count_fallback(struct rte_eth_dev *dev __rte_unused,
+ struct mlx5_flow_counter *cnt, uint64_t *pkts,
+ uint64_t *bytes)
+{
+ return mlx5_devx_cmd_flow_counter_query(cnt->dcs, 0, 0, pkts, bytes,
+ 0, NULL, NULL, 0);
+}
/**
* Get a pool by a counter.
{
struct mlx5_ibv_shared *sh = ((struct mlx5_priv *)
(dev->data->dev_private))->sh;
- struct mlx5dv_pd dv_pd;
- struct mlx5dv_obj dv_obj;
struct mlx5_devx_mkey_attr mkey_attr;
struct mlx5_counter_stats_mem_mng *mem_mng;
volatile struct flow_counter_stats *raw_data;
rte_free(mem);
return NULL;
}
- dv_obj.pd.in = sh->pd;
- dv_obj.pd.out = &dv_pd;
- mlx5_glue->dv_init_obj(&dv_obj, MLX5DV_OBJ_PD);
mkey_attr.addr = (uintptr_t)mem;
mkey_attr.size = size;
mkey_attr.umem_id = mem_mng->umem->umem_id;
- mkey_attr.pd = dv_pd.pdn;
+ mkey_attr.pd = sh->pdn;
mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
if (!mem_mng->dm) {
mlx5_glue->devx_umem_dereg(mem_mng->umem);
}
/**
- * Prepare a counter container.
+ * Resize a counter container.
*
* @param[in] dev
* Pointer to the Ethernet device structure.
* Whether the pool is for counter that was allocated by batch command.
*
* @return
- * The container pointer on success, otherwise NULL and rte_errno is set.
+ * The new container pointer on success, otherwise NULL and rte_errno is set.
*/
static struct mlx5_pools_container *
-flow_dv_container_prepare(struct rte_eth_dev *dev, uint32_t batch)
+flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
{
struct mlx5_priv *priv = dev->data->dev_private;
- struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv, batch);
+ struct mlx5_pools_container *cont =
+ MLX5_CNT_CONTAINER(priv->sh, batch, 0);
+ struct mlx5_pools_container *new_cont =
+ MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0);
struct mlx5_counter_stats_mem_mng *mem_mng;
- uint32_t size = MLX5_CNT_CONTAINER_SIZE;
- uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * size;
+ uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE;
+ uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize;
+ int i;
- cont->pools = rte_calloc(__func__, 1, mem_size, 0);
- if (!cont->pools) {
+ if (cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1)) {
+ /* The last resize still hasn't detected by the host thread. */
+ rte_errno = EAGAIN;
+ return NULL;
+ }
+ new_cont->pools = rte_calloc(__func__, 1, mem_size, 0);
+ if (!new_cont->pools) {
rte_errno = ENOMEM;
return NULL;
}
- mem_mng = flow_dv_create_counter_stat_mem_mng(dev, size);
+ if (cont->n)
+ memcpy(new_cont->pools, cont->pools, cont->n *
+ sizeof(struct mlx5_flow_counter_pool *));
+ mem_mng = flow_dv_create_counter_stat_mem_mng(dev,
+ MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES);
if (!mem_mng) {
- rte_free(cont->pools);
+ rte_free(new_cont->pools);
return NULL;
}
- cont->n = size;
- TAILQ_INIT(&cont->pool_list);
- cont->init_mem_mng = mem_mng;
- return cont;
+ for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
+ LIST_INSERT_HEAD(&priv->sh->cmng.free_stat_raws,
+ mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE +
+ i, next);
+ new_cont->n = resize;
+ rte_atomic16_set(&new_cont->n_valid, rte_atomic16_read(&cont->n_valid));
+ TAILQ_INIT(&new_cont->pool_list);
+ TAILQ_CONCAT(&new_cont->pool_list, &cont->pool_list, next);
+ new_cont->init_mem_mng = mem_mng;
+ rte_cio_wmb();
+ /* Flip the master container. */
+ priv->sh->cmng.mhi[batch] ^= (uint8_t)1;
+ return new_cont;
}
/**
* 0 on success, otherwise a negative errno value and rte_errno is set.
*/
static inline int
-_flow_dv_query_count(struct rte_eth_dev *dev __rte_unused,
+_flow_dv_query_count(struct rte_eth_dev *dev,
struct mlx5_flow_counter *cnt, uint64_t *pkts,
uint64_t *bytes)
{
+ struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_flow_counter_pool *pool =
flow_dv_counter_pool_get(cnt);
- uint16_t offset = pool->min_dcs->id % MLX5_COUNTERS_PER_POOL;
- int ret = mlx5_devx_cmd_flow_counter_query
- (pool->min_dcs, 0, MLX5_COUNTERS_PER_POOL - offset, NULL,
- NULL, pool->raw->mem_mng->dm->id,
- (void *)(uintptr_t)(pool->raw->data +
- offset));
-
- if (ret) {
- DRV_LOG(ERR, "Failed to trigger synchronous"
- " query for dcs ID %d\n",
- pool->min_dcs->id);
- return ret;
+ int offset = cnt - &pool->counters_raw[0];
+
+ if (priv->counter_fallback)
+ return _flow_dv_query_count_fallback(dev, cnt, pkts, bytes);
+
+ rte_spinlock_lock(&pool->sl);
+ /*
+ * The single counters allocation may allocate smaller ID than the
+ * current allocated in parallel to the host reading.
+ * In this case the new counter values must be reported as 0.
+ */
+ if (unlikely(!cnt->batch && cnt->dcs->id < pool->raw->min_dcs_id)) {
+ *pkts = 0;
+ *bytes = 0;
+ } else {
+ *pkts = rte_be_to_cpu_64(pool->raw->data[offset].hits);
+ *bytes = rte_be_to_cpu_64(pool->raw->data[offset].bytes);
}
- offset = cnt - &pool->counters_raw[0];
- *pkts = rte_be_to_cpu_64(pool->raw->data[offset].hits);
- *bytes = rte_be_to_cpu_64(pool->raw->data[offset].bytes);
+ rte_spinlock_unlock(&pool->sl);
return 0;
}
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_flow_counter_pool *pool;
- struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv, batch);
+ struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
+ 0);
+ int16_t n_valid = rte_atomic16_read(&cont->n_valid);
uint32_t size;
- if (!cont->n) {
- cont = flow_dv_container_prepare(dev, batch);
+ if (cont->n == n_valid) {
+ cont = flow_dv_container_resize(dev, batch);
if (!cont)
return NULL;
- } else if (cont->n == cont->n_valid) {
- DRV_LOG(ERR, "No space in container to allocate a new pool\n");
- rte_errno = ENOSPC;
- return NULL;
}
size = sizeof(*pool) + MLX5_COUNTERS_PER_POOL *
sizeof(struct mlx5_flow_counter);
return NULL;
}
pool->min_dcs = dcs;
- pool->raw = cont->init_mem_mng->raws + cont->n_valid;
+ pool->raw = cont->init_mem_mng->raws + n_valid %
+ MLX5_CNT_CONTAINER_RESIZE;
+ pool->raw_hw = NULL;
+ rte_spinlock_init(&pool->sl);
+ /*
+ * The generation of the new allocated counters in this pool is 0, 2 in
+ * the pool generation makes all the counters valid for allocation.
+ */
+ rte_atomic64_set(&pool->query_gen, 0x2);
TAILQ_INIT(&pool->counters);
TAILQ_INSERT_TAIL(&cont->pool_list, pool, next);
- cont->pools[cont->n_valid] = pool;
- cont->n_valid++;
+ cont->pools[n_valid] = pool;
+ /* Pool initialization must be updated before host thread access. */
+ rte_cio_wmb();
+ rte_atomic16_add(&cont->n_valid, 1);
return pool;
}
dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0);
if (!dcs)
return NULL;
- pool = flow_dv_find_pool_by_id(MLX5_CNT_CONTAINER(priv, batch),
- dcs->id);
+ pool = flow_dv_find_pool_by_id
+ (MLX5_CNT_CONTAINER(priv->sh, batch, 0), dcs->id);
if (!pool) {
pool = flow_dv_pool_create(dev, dcs, batch);
if (!pool) {
return NULL;
}
} else if (dcs->id < pool->min_dcs->id) {
- pool->min_dcs->id = dcs->id;
+ rte_atomic64_set(&pool->a64_dcs,
+ (int64_t)(uintptr_t)dcs);
}
cnt = &pool->counters_raw[dcs->id % MLX5_COUNTERS_PER_POOL];
TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
* shared counters from the single container.
*/
uint32_t batch = (group && !shared) ? 1 : 0;
- struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv, batch);
+ struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
+ 0);
+ if (priv->counter_fallback)
+ return flow_dv_counter_alloc_fallback(dev, shared, id);
if (!priv->config.devx) {
rte_errno = ENOTSUP;
return NULL;
}
}
/* Pools which has a free counters are in the start. */
- pool = TAILQ_FIRST(&cont->pool_list);
- if (pool)
+ TAILQ_FOREACH(pool, &cont->pool_list, next) {
+ /*
+ * The free counter reset values must be updated between the
+ * counter release to the counter allocation, so, at least one
+ * query must be done in this time. ensure it by saving the
+ * query generation in the release time.
+ * The free list is sorted according to the generation - so if
+ * the first one is not updated, all the others are not
+ * updated too.
+ */
cnt_free = TAILQ_FIRST(&pool->counters);
+ if (cnt_free && cnt_free->query_gen + 1 <
+ rte_atomic64_read(&pool->query_gen))
+ break;
+ cnt_free = NULL;
+ }
if (!cnt_free) {
pool = flow_dv_counter_pool_prepare(dev, &cnt_free, batch);
if (!pool)
cnt_free->shared = shared;
cnt_free->ref_cnt = 1;
cnt_free->id = id;
+ if (!priv->sh->cmng.query_thread_on)
+ /* Start the asynchronous batch query by the host thread. */
+ mlx5_set_query_alarm(priv->sh);
TAILQ_REMOVE(&pool->counters, cnt_free, next);
if (TAILQ_EMPTY(&pool->counters)) {
/* Move the pool to the end of the container pool list. */
* Pointer to the counter handler.
*/
static void
-flow_dv_counter_release(struct rte_eth_dev *dev __rte_unused,
+flow_dv_counter_release(struct rte_eth_dev *dev,
struct mlx5_flow_counter *counter)
{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
if (!counter)
return;
+ if (priv->counter_fallback) {
+ flow_dv_counter_release_fallback(dev, counter);
+ return;
+ }
if (--counter->ref_cnt == 0) {
struct mlx5_flow_counter_pool *pool =
flow_dv_counter_pool_get(counter);
- /* Put the counter in the end - the earliest one. */
+ /* Put the counter in the end - the last updated one. */
TAILQ_INSERT_TAIL(&pool->counters, counter, next);
+ counter->query_gen = rte_atomic64_read(&pool->query_gen);
}
}
(dev, items, attr, item_flags, error);
if (ret < 0)
return ret;
- last_item |= MLX5_FLOW_ITEM_PORT_ID;
+ last_item = MLX5_FLOW_ITEM_PORT_ID;
break;
case RTE_FLOW_ITEM_TYPE_ETH:
ret = mlx5_flow_validate_item_eth(items, item_flags,
MLX5_FLOW_LAYER_OUTER_L4_UDP;
break;
case RTE_FLOW_ITEM_TYPE_GRE:
- case RTE_FLOW_ITEM_TYPE_NVGRE:
ret = mlx5_flow_validate_item_gre(items, item_flags,
next_protocol, error);
if (ret < 0)
gre_item = items;
last_item = MLX5_FLOW_LAYER_GRE;
break;
+ case RTE_FLOW_ITEM_TYPE_NVGRE:
+ ret = mlx5_flow_validate_item_nvgre(items, item_flags,
+ next_protocol,
+ error);
+ if (ret < 0)
+ return ret;
+ last_item = MLX5_FLOW_LAYER_NVGRE;
+ break;
case RTE_FLOW_ITEM_TYPE_GRE_KEY:
ret = mlx5_flow_validate_item_gre_key
(items, item_flags, gre_item, error);
if (ret < 0)
return ret;
- item_flags |= MLX5_FLOW_LAYER_GRE_KEY;
+ last_item = MLX5_FLOW_LAYER_GRE_KEY;
break;
case RTE_FLOW_ITEM_TYPE_VXLAN:
ret = mlx5_flow_validate_item_vxlan(items, item_flags,
error);
if (ret < 0)
return ret;
- item_flags |= MLX5_FLOW_LAYER_ICMP;
+ last_item = MLX5_FLOW_LAYER_ICMP;
break;
case RTE_FLOW_ITEM_TYPE_ICMP6:
ret = mlx5_flow_validate_item_icmp6(items, item_flags,
error);
if (ret < 0)
return ret;
- item_flags |= MLX5_FLOW_LAYER_ICMP6;
+ last_item = MLX5_FLOW_LAYER_ICMP6;
break;
default:
return rte_flow_error_set(error, ENOTSUP,
int size;
int i;
- flow_dv_translate_item_gre(matcher, key, item, inner);
+ /* For NVGRE, GRE header fields must be set with defined values. */
+ const struct rte_flow_item_gre gre_spec = {
+ .c_rsvd0_ver = RTE_BE16(0x2000),
+ .protocol = RTE_BE16(RTE_ETHER_TYPE_TEB)
+ };
+ const struct rte_flow_item_gre gre_mask = {
+ .c_rsvd0_ver = RTE_BE16(0xB000),
+ .protocol = RTE_BE16(UINT16_MAX),
+ };
+ const struct rte_flow_item gre_item = {
+ .spec = &gre_spec,
+ .mask = &gre_mask,
+ .last = NULL,
+ };
+ flow_dv_translate_item_gre(matcher, key, &gre_item, inner);
if (!nvgre_v)
return;
if (!nvgre_m)
case RTE_FLOW_ITEM_TYPE_GRE_KEY:
flow_dv_translate_item_gre_key(match_mask,
match_value, items);
- item_flags |= MLX5_FLOW_LAYER_GRE_KEY;
+ last_item = MLX5_FLOW_LAYER_GRE_KEY;
break;
case RTE_FLOW_ITEM_TYPE_NVGRE:
flow_dv_translate_item_nvgre(match_mask, match_value,
case RTE_FLOW_ITEM_TYPE_ICMP:
flow_dv_translate_item_icmp(match_mask, match_value,
items, tunnel);
- item_flags |= MLX5_FLOW_LAYER_ICMP;
+ last_item = MLX5_FLOW_LAYER_ICMP;
break;
case RTE_FLOW_ITEM_TYPE_ICMP6:
flow_dv_translate_item_icmp6(match_mask, match_value,
items, tunnel);
- item_flags |= MLX5_FLOW_LAYER_ICMP6;
+ last_item = MLX5_FLOW_LAYER_ICMP6;
break;
default:
break;
dv->hash_fields,
(*flow->queue),
flow->rss.queue_num);
- if (!hrxq)
+ if (!hrxq) {
+ int lro = 0;
+
+ if (mlx5_lro_on(dev)) {
+ if ((dev_flow->layers &
+ MLX5_FLOW_LAYER_IPV4_LRO)
+ == MLX5_FLOW_LAYER_IPV4_LRO)
+ lro = MLX5_FLOW_IPV4_LRO;
+ else if ((dev_flow->layers &
+ MLX5_FLOW_LAYER_IPV6_LRO)
+ == MLX5_FLOW_LAYER_IPV6_LRO)
+ lro = MLX5_FLOW_IPV6_LRO;
+ }
hrxq = mlx5_hrxq_new
(dev, flow->key, MLX5_RSS_HASH_KEY_LEN,
dv->hash_fields, (*flow->queue),
flow->rss.queue_num,
!!(dev_flow->layers &
- MLX5_FLOW_LAYER_TUNNEL));
+ MLX5_FLOW_LAYER_TUNNEL), lro);
+ }
+
if (!hrxq) {
rte_flow_error_set
(error, rte_errno,