#include <rte_common.h>
#include <rte_ether.h>
#include <rte_ethdev_driver.h>
+#include <rte_eal_paging.h>
#include <rte_flow.h>
#include <rte_cycles.h>
#include <rte_flow_driver.h>
#include "mlx5_flow.h"
#include "mlx5_flow_os.h"
#include "mlx5_rxtx.h"
+#include "mlx5_common_os.h"
/** Device flow drivers. */
extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
* Bit-masks covering supported fields by the NIC to compare with user mask.
* @param[in] size
* Bit-masks size in bytes.
+ * @param[in] range_accepted
+ * True if range of values is accepted for specific fields, false otherwise.
* @param[out] error
* Pointer to error structure.
*
const uint8_t *mask,
const uint8_t *nic_mask,
unsigned int size,
+ bool range_accepted,
struct rte_flow_error *error)
{
unsigned int i;
RTE_FLOW_ERROR_TYPE_ITEM, item,
"mask/last without a spec is not"
" supported");
- if (item->spec && item->last) {
+ if (item->spec && item->last && !range_accepted) {
uint8_t spec[size];
uint8_t last[size];
unsigned int i;
ret = mlx5_flow_item_acceptable
(item, (const uint8_t *)mask,
(const uint8_t *)&rte_flow_item_icmp6_mask,
- sizeof(struct rte_flow_item_icmp6), error);
+ sizeof(struct rte_flow_item_icmp6),
+ MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
if (ret < 0)
return ret;
return 0;
ret = mlx5_flow_item_acceptable
(item, (const uint8_t *)mask,
(const uint8_t *)&nic_mask,
- sizeof(struct rte_flow_item_icmp), error);
+ sizeof(struct rte_flow_item_icmp),
+ MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
if (ret < 0)
return ret;
return 0;
ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
(const uint8_t *)&nic_mask,
sizeof(struct rte_flow_item_eth),
- error);
+ MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
return ret;
}
ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
(const uint8_t *)&nic_mask,
sizeof(struct rte_flow_item_vlan),
- error);
+ MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
if (ret)
return ret;
if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
* @param[in] acc_mask
* Acceptable mask, if NULL default internal default mask
* will be used to check whether item fields are supported.
+ * @param[in] range_accepted
+ * True if range of values is accepted for specific fields, false otherwise.
* @param[out] error
* Pointer to error structure.
*
uint64_t last_item,
uint16_t ether_type,
const struct rte_flow_item_ipv4 *acc_mask,
+ bool range_accepted,
struct rte_flow_error *error)
{
const struct rte_flow_item_ipv4 *mask = item->mask;
acc_mask ? (const uint8_t *)acc_mask
: (const uint8_t *)&nic_mask,
sizeof(struct rte_flow_item_ipv4),
- error);
+ range_accepted, error);
if (ret < 0)
return ret;
return 0;
RTE_FLOW_ERROR_TYPE_ITEM, item,
"IPv6 cannot follow L2/VLAN layer "
"which ether type is not IPv6");
+ if (mask && mask->hdr.proto == UINT8_MAX && spec)
+ next_proto = spec->hdr.proto;
if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) {
- if (mask && spec)
- next_proto = mask->hdr.proto & spec->hdr.proto;
if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM,
"multiple tunnel "
"not supported");
}
+ if (next_proto == IPPROTO_HOPOPTS ||
+ next_proto == IPPROTO_ROUTING ||
+ next_proto == IPPROTO_FRAGMENT ||
+ next_proto == IPPROTO_ESP ||
+ next_proto == IPPROTO_AH ||
+ next_proto == IPPROTO_DSTOPTS)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
+ "IPv6 proto (next header) should "
+ "not be set as extension header");
if (item_flags & MLX5_FLOW_LAYER_IPIP)
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM, item,
acc_mask ? (const uint8_t *)acc_mask
: (const uint8_t *)&nic_mask,
sizeof(struct rte_flow_item_ipv6),
- error);
+ MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
if (ret < 0)
return ret;
return 0;
ret = mlx5_flow_item_acceptable
(item, (const uint8_t *)mask,
(const uint8_t *)&rte_flow_item_udp_mask,
- sizeof(struct rte_flow_item_udp), error);
+ sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
+ error);
if (ret < 0)
return ret;
return 0;
ret = mlx5_flow_item_acceptable
(item, (const uint8_t *)mask,
(const uint8_t *)flow_mask,
- sizeof(struct rte_flow_item_tcp), error);
+ sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
+ error);
if (ret < 0)
return ret;
return 0;
(item, (const uint8_t *)mask,
(const uint8_t *)&rte_flow_item_vxlan_mask,
sizeof(struct rte_flow_item_vxlan),
- error);
+ MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
if (ret < 0)
return ret;
if (spec) {
(item, (const uint8_t *)mask,
(const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
sizeof(struct rte_flow_item_vxlan_gpe),
- error);
+ MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
if (ret < 0)
return ret;
if (spec) {
ret = mlx5_flow_item_acceptable
(item, (const uint8_t *)mask,
(const uint8_t *)&gre_key_default_mask,
- sizeof(rte_be32_t), error);
+ sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
return ret;
}
ret = mlx5_flow_item_acceptable
(item, (const uint8_t *)mask,
(const uint8_t *)&nic_mask,
- sizeof(struct rte_flow_item_gre), error);
+ sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
+ error);
if (ret < 0)
return ret;
#ifndef HAVE_MLX5DV_DR
ret = mlx5_flow_item_acceptable
(item, (const uint8_t *)mask,
(const uint8_t *)&nic_mask,
- sizeof(struct rte_flow_item_geneve), error);
+ sizeof(struct rte_flow_item_geneve),
+ MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
if (ret)
return ret;
if (spec) {
ret = mlx5_flow_item_acceptable
(item, (const uint8_t *)mask,
(const uint8_t *)&rte_flow_item_mpls_mask,
- sizeof(struct rte_flow_item_mpls), error);
+ sizeof(struct rte_flow_item_mpls),
+ MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
if (ret < 0)
return ret;
return 0;
ret = mlx5_flow_item_acceptable
(item, (const uint8_t *)mask,
(const uint8_t *)&rte_flow_item_nvgre_mask,
- sizeof(struct rte_flow_item_nvgre), error);
+ sizeof(struct rte_flow_item_nvgre),
+ MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
if (ret < 0)
return ret;
return 0;
acc_mask ? (const uint8_t *)acc_mask
: (const uint8_t *)&nic_mask,
sizeof(struct rte_flow_item_ecpri),
- error);
+ MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
}
/* Allocate unique ID for the split Q/RSS subflows. */
*
* @param[in] actions
* Pointer to the list of actions.
+ * @param[in] attr
+ * Flow rule attributes.
* @param[in] action
* The action to be check if exist.
* @param[out] match_action_pos
*/
static int
flow_check_match_action(const struct rte_flow_action actions[],
+ const struct rte_flow_attr *attr,
enum rte_flow_action_type action,
int *match_action_pos, int *qrss_action_pos)
{
+ const struct rte_flow_action_sample *sample;
int actions_n = 0;
+ int jump_flag = 0;
+ uint32_t ratio = 0;
+ int sub_type = 0;
int flag = 0;
*match_action_pos = -1;
if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE ||
actions->type == RTE_FLOW_ACTION_TYPE_RSS)
*qrss_action_pos = actions_n;
+ if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP)
+ jump_flag = 1;
+ if (actions->type == RTE_FLOW_ACTION_TYPE_SAMPLE) {
+ sample = actions->conf;
+ ratio = sample->ratio;
+ sub_type = ((const struct rte_flow_action *)
+ (sample->actions))->type;
+ }
actions_n++;
}
+ if (flag && action == RTE_FLOW_ACTION_TYPE_SAMPLE && attr->transfer) {
+ if (ratio == 1) {
+ /* JUMP Action not support for Mirroring;
+ * Mirroring support multi-destination;
+ */
+ if (!jump_flag && sub_type != RTE_FLOW_ACTION_TYPE_END)
+ flag = 0;
+ }
+ }
/* Count RTE_FLOW_ACTION_TYPE_END. */
return flag ? actions_n + 1 : 0;
}
int ret = 0;
if (priv->sampler_en)
- actions_n = flow_check_match_action(actions,
+ actions_n = flow_check_match_action(actions, attr,
RTE_FLOW_ACTION_TYPE_SAMPLE,
&sample_action_pos, &qrss_action_pos);
if (actions_n) {
return -ENOTSUP;
}
-#define MLX5_POOL_QUERY_FREQ_US 1000000
-
/**
- * Get number of all validate pools.
+ * Allocate a new memory for the counter values wrapped by all the needed
+ * management.
*
* @param[in] sh
* Pointer to mlx5_dev_ctx_shared object.
*
* @return
- * The number of all validate pools.
+ * 0 on success, a negative errno value otherwise.
*/
-static uint32_t
-mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh)
+static int
+mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
{
+ struct mlx5_devx_mkey_attr mkey_attr;
+ struct mlx5_counter_stats_mem_mng *mem_mng;
+ volatile struct flow_counter_stats *raw_data;
+ int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
+ int size = (sizeof(struct flow_counter_stats) *
+ MLX5_COUNTERS_PER_POOL +
+ sizeof(struct mlx5_counter_stats_raw)) * raws_n +
+ sizeof(struct mlx5_counter_stats_mem_mng);
+ size_t pgsize = rte_mem_page_size();
+ uint8_t *mem;
int i;
- uint32_t pools_n = 0;
- for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i)
- pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid);
- return pools_n;
+ if (pgsize == (size_t)-1) {
+ DRV_LOG(ERR, "Failed to get mem page size");
+ rte_errno = ENOMEM;
+ return -ENOMEM;
+ }
+ mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
+ if (!mem) {
+ rte_errno = ENOMEM;
+ return -ENOMEM;
+ }
+ mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
+ size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
+ mem_mng->umem = mlx5_glue->devx_umem_reg(sh->ctx, mem, size,
+ IBV_ACCESS_LOCAL_WRITE);
+ if (!mem_mng->umem) {
+ rte_errno = errno;
+ mlx5_free(mem);
+ return -rte_errno;
+ }
+ mkey_attr.addr = (uintptr_t)mem;
+ mkey_attr.size = size;
+ mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem);
+ mkey_attr.pd = sh->pdn;
+ mkey_attr.log_entity_size = 0;
+ mkey_attr.pg_access = 0;
+ mkey_attr.klm_array = NULL;
+ mkey_attr.klm_num = 0;
+ mkey_attr.relaxed_ordering = sh->cmng.relaxed_ordering;
+ mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
+ if (!mem_mng->dm) {
+ mlx5_glue->devx_umem_dereg(mem_mng->umem);
+ rte_errno = errno;
+ mlx5_free(mem);
+ return -rte_errno;
+ }
+ mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
+ raw_data = (volatile struct flow_counter_stats *)mem;
+ for (i = 0; i < raws_n; ++i) {
+ mem_mng->raws[i].mem_mng = mem_mng;
+ mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
+ }
+ for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
+ LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
+ mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
+ next);
+ LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
+ sh->cmng.mem_mng = mem_mng;
+ return 0;
}
+/**
+ * Set the statistic memory to the new counter pool.
+ *
+ * @param[in] sh
+ * Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] pool
+ * Pointer to the pool to set the statistic memory.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise.
+ */
+static int
+mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
+ struct mlx5_flow_counter_pool *pool)
+{
+ struct mlx5_flow_counter_mng *cmng = &sh->cmng;
+ /* Resize statistic memory once used out. */
+ if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
+ mlx5_flow_create_counter_stat_mem_mng(sh)) {
+ DRV_LOG(ERR, "Cannot resize counter stat mem.");
+ return -1;
+ }
+ rte_spinlock_lock(&pool->sl);
+ pool->raw = cmng->mem_mng->raws + pool->index %
+ MLX5_CNT_CONTAINER_RESIZE;
+ rte_spinlock_unlock(&pool->sl);
+ pool->raw_hw = NULL;
+ return 0;
+}
+
+#define MLX5_POOL_QUERY_FREQ_US 1000000
+
/**
* Set the periodic procedure for triggering asynchronous batch queries for all
* the counter pools.
{
uint32_t pools_n, us;
- pools_n = mlx5_get_all_valid_pool_count(sh);
+ pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
us = MLX5_POOL_QUERY_FREQ_US / pools_n;
DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
mlx5_flow_query_alarm(void *arg)
{
struct mlx5_dev_ctx_shared *sh = arg;
- struct mlx5_devx_obj *dcs;
- uint16_t offset;
int ret;
- uint8_t batch = sh->cmng.batch;
- uint8_t age = sh->cmng.age;
uint16_t pool_index = sh->cmng.pool_index;
- struct mlx5_pools_container *cont;
+ struct mlx5_flow_counter_mng *cmng = &sh->cmng;
struct mlx5_flow_counter_pool *pool;
- int cont_loop = MLX5_CCONT_TYPE_MAX;
+ uint16_t n_valid;
if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
goto set_alarm;
-next_container:
- cont = MLX5_CNT_CONTAINER(sh, batch, age);
- rte_spinlock_lock(&cont->resize_sl);
- if (!cont->pools) {
- rte_spinlock_unlock(&cont->resize_sl);
- /* Check if all the containers are empty. */
- if (unlikely(--cont_loop == 0))
- goto set_alarm;
- batch ^= 0x1;
- pool_index = 0;
- if (batch == 0 && pool_index == 0) {
- age ^= 0x1;
- sh->cmng.batch = batch;
- sh->cmng.age = age;
- }
- goto next_container;
- }
- pool = cont->pools[pool_index];
- rte_spinlock_unlock(&cont->resize_sl);
+ rte_spinlock_lock(&cmng->pool_update_sl);
+ pool = cmng->pools[pool_index];
+ n_valid = cmng->n_valid;
+ rte_spinlock_unlock(&cmng->pool_update_sl);
+ /* Set the statistic memory to the new created pool. */
+ if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
+ goto set_alarm;
if (pool->raw_hw)
/* There is a pool query in progress. */
goto set_alarm;
if (!pool->raw_hw)
/* No free counter statistics raw memory. */
goto set_alarm;
- dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read
- (&pool->a64_dcs);
- if (dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1)) {
- /* Pool without valid counter. */
- pool->raw_hw = NULL;
- goto next_pool;
- }
- offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL;
/*
* Identify the counters released between query trigger and query
- * handle more effiecntly. The counter released in this gap period
+ * handle more efficiently. The counter released in this gap period
* should wait for a new round of query as the new arrived packets
* will not be taken into account.
*/
pool->query_gen++;
- ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL -
- offset, NULL, NULL,
+ ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
+ MLX5_COUNTERS_PER_POOL,
+ NULL, NULL,
pool->raw_hw->mem_mng->dm->id,
(void *)(uintptr_t)
- (pool->raw_hw->data + offset),
+ pool->raw_hw->data,
sh->devx_comp,
(uint64_t)(uintptr_t)pool);
if (ret) {
pool->raw_hw = NULL;
goto set_alarm;
}
- pool->raw_hw->min_dcs_id = dcs->id;
LIST_REMOVE(pool->raw_hw, next);
sh->cmng.pending_queries++;
-next_pool:
pool_index++;
- if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
- batch ^= 0x1;
+ if (pool_index >= n_valid)
pool_index = 0;
- if (batch == 0 && pool_index == 0)
- age ^= 0x1;
- }
set_alarm:
- sh->cmng.batch = batch;
sh->cmng.pool_index = pool_index;
- sh->cmng.age = age;
mlx5_set_query_alarm(sh);
}
struct mlx5_age_param *age_param;
struct mlx5_counter_stats_raw *cur = pool->raw_hw;
struct mlx5_counter_stats_raw *prev = pool->raw;
- uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10);
+ const uint64_t curr_time = MLX5_CURR_TIME_SEC;
+ const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
+ uint16_t expected = AGE_CANDIDATE;
uint32_t i;
+ pool->time_of_last_age_check = curr_time;
for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
cnt = MLX5_POOL_GET_CNT(pool, i);
age_param = MLX5_CNT_TO_AGE(cnt);
- if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE)
+ if (__atomic_load_n(&age_param->state,
+ __ATOMIC_RELAXED) != AGE_CANDIDATE)
continue;
if (cur->data[i].hits != prev->data[i].hits) {
- age_param->expire = curr + age_param->timeout;
+ __atomic_store_n(&age_param->sec_since_last_hit, 0,
+ __ATOMIC_RELAXED);
continue;
}
- if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2))
+ if (__atomic_add_fetch(&age_param->sec_since_last_hit,
+ time_delta,
+ __ATOMIC_RELAXED) <= age_param->timeout)
continue;
/**
* Hold the lock first, or if between the
priv = rte_eth_devices[age_param->port_id].data->dev_private;
age_info = GET_PORT_AGE_INFO(priv);
rte_spinlock_lock(&age_info->aged_sl);
- /* If the cpmset fails, release happens. */
- if (rte_atomic16_cmpset((volatile uint16_t *)
- &age_param->state,
- AGE_CANDIDATE,
- AGE_TMOUT) ==
- AGE_CANDIDATE) {
+ if (__atomic_compare_exchange_n(&age_param->state, &expected,
+ AGE_TMOUT, false,
+ __ATOMIC_RELAXED,
+ __ATOMIC_RELAXED)) {
TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
}
struct mlx5_flow_counter_pool *pool =
(struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
struct mlx5_counter_stats_raw *raw_to_free;
- uint8_t age = !!IS_AGE_POOL(pool);
uint8_t query_gen = pool->query_gen ^ 1;
- struct mlx5_pools_container *cont =
- MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool), age);
+ struct mlx5_flow_counter_mng *cmng = &sh->cmng;
+ enum mlx5_counter_type cnt_type =
+ pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
+ MLX5_COUNTER_TYPE_ORIGIN;
if (unlikely(status)) {
raw_to_free = pool->raw_hw;
} else {
raw_to_free = pool->raw;
- if (IS_AGE_POOL(pool))
+ if (pool->is_aged)
mlx5_flow_aging_check(sh, pool);
rte_spinlock_lock(&pool->sl);
pool->raw = pool->raw_hw;
/* Be sure the new raw counters data is updated in memory. */
rte_io_wmb();
if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
- rte_spinlock_lock(&cont->csl);
- TAILQ_CONCAT(&cont->counters,
+ rte_spinlock_lock(&cmng->csl[cnt_type]);
+ TAILQ_CONCAT(&cmng->counters[cnt_type],
&pool->counters[query_gen], next);
- rte_spinlock_unlock(&cont->csl);
+ rte_spinlock_unlock(&cmng->csl[cnt_type]);
}
}
LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);