X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_flow_dv.c;h=5bb252e6970e228aadbf9fafc961f31a245796a0;hb=eb10fe7fb150d3a9c1ef01134403f85890d5e06e;hp=e28f01da97db7209fb79ea49c1a986e47a6dbebe;hpb=488d13abdcaee1c75f852962918b5fef241aeabe;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index e28f01da97..5bb252e697 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -284,7 +285,7 @@ static void flow_dv_shared_lock(struct rte_eth_dev *dev) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_ibv_shared *sh = priv->sh; + struct mlx5_dev_ctx_shared *sh = priv->sh; if (sh->dv_refcnt > 1) { int ret; @@ -299,7 +300,7 @@ static void flow_dv_shared_unlock(struct rte_eth_dev *dev) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_ibv_shared *sh = priv->sh; + struct mlx5_dev_ctx_shared *sh = priv->sh; if (sh->dv_refcnt > 1) { int ret; @@ -1638,6 +1639,91 @@ flow_dv_validate_item_port_id(struct rte_eth_dev *dev, return 0; } +/** + * Validate VLAN item. + * + * @param[in] item + * Item specification. + * @param[in] item_flags + * Bit-fields that holds the items detected until now. + * @param[in] dev + * Ethernet device flow is being created on. + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_validate_item_vlan(const struct rte_flow_item *item, + uint64_t item_flags, + struct rte_eth_dev *dev, + struct rte_flow_error *error) +{ + const struct rte_flow_item_vlan *mask = item->mask; + const struct rte_flow_item_vlan nic_mask = { + .tci = RTE_BE16(UINT16_MAX), + .inner_type = RTE_BE16(UINT16_MAX), + }; + const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); + int ret; + const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 | + MLX5_FLOW_LAYER_INNER_L4) : + (MLX5_FLOW_LAYER_OUTER_L3 | + MLX5_FLOW_LAYER_OUTER_L4); + const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : + MLX5_FLOW_LAYER_OUTER_VLAN; + + if (item_flags & vlanm) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "multiple VLAN layers not supported"); + else if ((item_flags & l34m) != 0) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "VLAN cannot follow L3/L4 layer"); + if (!mask) + mask = &rte_flow_item_vlan_mask; + ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, + (const uint8_t *)&nic_mask, + sizeof(struct rte_flow_item_vlan), + error); + if (ret) + return ret; + if (!tunnel && mask->tci != RTE_BE16(0x0fff)) { + struct mlx5_priv *priv = dev->data->dev_private; + + if (priv->vmwa_context) { + /* + * Non-NULL context means we have a virtual machine + * and SR-IOV enabled, we have to create VLAN interface + * to make hypervisor to setup E-Switch vport + * context correctly. We avoid creating the multiple + * VLAN interfaces, so we cannot support VLAN tag mask. + */ + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "VLAN tag mask is not" + " supported in virtual" + " environment"); + } + } + return 0; +} + +/* + * GTP flags are contained in 1 byte of the format: + * ------------------------------------------- + * | bit | 0 - 2 | 3 | 4 | 5 | 6 | 7 | + * |-----------------------------------------| + * | value | Version | PT | Res | E | S | PN | + * ------------------------------------------- + * + * Matching is supported only for GTP flags E, S, PN. + */ +#define MLX5_GTP_FLAGS_MASK 0x07 + /** * Validate GTP item. * @@ -1660,8 +1746,10 @@ flow_dv_validate_item_gtp(struct rte_eth_dev *dev, struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; + const struct rte_flow_item_gtp *spec = item->spec; const struct rte_flow_item_gtp *mask = item->mask; const struct rte_flow_item_gtp nic_mask = { + .v_pt_rsv_flags = MLX5_GTP_FLAGS_MASK, .msg_type = 0xff, .teid = RTE_BE32(0xffffffff), }; @@ -1681,6 +1769,11 @@ flow_dv_validate_item_gtp(struct rte_eth_dev *dev, "no outer UDP layer found"); if (!mask) mask = &rte_flow_item_gtp_mask; + if (spec && spec->v_pt_rsv_flags & ~MLX5_GTP_FLAGS_MASK) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "Match is supported for GTP" + " flags only"); return mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)&nic_mask, @@ -1788,6 +1881,9 @@ flow_dev_get_vlan_info_from_items(const struct rte_flow_item *items, const struct rte_flow_item_vlan *vlan_m = items->mask; const struct rte_flow_item_vlan *vlan_v = items->spec; + /* If VLAN item in pattern doesn't contain data, return here. */ + if (!vlan_v) + return; if (!vlan_m) vlan_m = &nic_mask; /* Only full match values are accepted */ @@ -1964,7 +2060,7 @@ flow_dv_validate_action_set_vlan_vid(uint64_t item_flags, const struct rte_flow_action *action = actions; const struct rte_flow_action_of_set_vlan_vid *conf = action->conf; - if (conf->vlan_vid > RTE_BE16(0xFFE)) + if (rte_be_to_cpu_16(conf->vlan_vid) > 0xFFE) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, action, "VLAN VID value is too big"); @@ -2464,7 +2560,7 @@ flow_dv_encap_decap_resource_register struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_ibv_shared *sh = priv->sh; + struct mlx5_dev_ctx_shared *sh = priv->sh; struct mlx5_flow_dv_encap_decap_resource *cache_resource; struct mlx5dv_dr_domain *domain; uint32_t idx = 0; @@ -2490,14 +2586,14 @@ flow_dv_encap_decap_resource_register (void *)cache_resource, rte_atomic32_read(&cache_resource->refcnt)); rte_atomic32_inc(&cache_resource->refcnt); - dev_flow->handle->dvh.encap_decap = idx; + dev_flow->handle->dvh.rix_encap_decap = idx; dev_flow->dv.encap_decap = cache_resource; return 0; } } /* Register new encap/decap resource. */ cache_resource = mlx5_ipool_zmalloc(sh->ipool[MLX5_IPOOL_DECAP_ENCAP], - &dev_flow->handle->dvh.encap_decap); + &dev_flow->handle->dvh.rix_encap_decap); if (!cache_resource) return rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -2518,7 +2614,8 @@ flow_dv_encap_decap_resource_register rte_atomic32_init(&cache_resource->refcnt); rte_atomic32_inc(&cache_resource->refcnt); ILIST_INSERT(sh->ipool[MLX5_IPOOL_DECAP_ENCAP], &sh->encaps_decaps, - dev_flow->handle->dvh.encap_decap, cache_resource, next); + dev_flow->handle->dvh.rix_encap_decap, cache_resource, + next); dev_flow->dv.encap_decap = cache_resource; DRV_LOG(DEBUG, "new encap/decap resource %p: refcnt %d++", (void *)cache_resource, @@ -2572,7 +2669,7 @@ flow_dv_jump_tbl_resource_register (void *)&tbl_data->jump, cnt); } rte_atomic32_inc(&tbl_data->jump.refcnt); - dev_flow->handle->jump = tbl_data->idx; + dev_flow->handle->rix_jump = tbl_data->idx; dev_flow->dv.jump = &tbl_data->jump; return 0; } @@ -2600,7 +2697,7 @@ flow_dv_port_id_action_resource_register struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_ibv_shared *sh = priv->sh; + struct mlx5_dev_ctx_shared *sh = priv->sh; struct mlx5_flow_dv_port_id_action_resource *cache_resource; uint32_t idx = 0; @@ -2613,14 +2710,14 @@ flow_dv_port_id_action_resource_register (void *)cache_resource, rte_atomic32_read(&cache_resource->refcnt)); rte_atomic32_inc(&cache_resource->refcnt); - dev_flow->handle->port_id_action = idx; + dev_flow->handle->rix_port_id_action = idx; dev_flow->dv.port_id_action = cache_resource; return 0; } } /* Register new port id action resource. */ cache_resource = mlx5_ipool_zmalloc(sh->ipool[MLX5_IPOOL_PORT_ID], - &dev_flow->handle->port_id_action); + &dev_flow->handle->rix_port_id_action); if (!cache_resource) return rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -2628,7 +2725,7 @@ flow_dv_port_id_action_resource_register *cache_resource = *resource; /* * Depending on rdma_core version the glue routine calls - * either mlx5dv_dr_action_create_dest_ib_port(domain, ibv_port) + * either mlx5dv_dr_action_create_dest_ib_port(domain, dev_port) * or mlx5dv_dr_action_create_dest_vport(domain, vport_id). */ cache_resource->action = @@ -2643,7 +2740,8 @@ flow_dv_port_id_action_resource_register rte_atomic32_init(&cache_resource->refcnt); rte_atomic32_inc(&cache_resource->refcnt); ILIST_INSERT(sh->ipool[MLX5_IPOOL_PORT_ID], &sh->port_id_action_list, - dev_flow->handle->port_id_action, cache_resource, next); + dev_flow->handle->rix_port_id_action, cache_resource, + next); dev_flow->dv.port_id_action = cache_resource; DRV_LOG(DEBUG, "new port id action resource %p: refcnt %d++", (void *)cache_resource, @@ -2674,7 +2772,7 @@ flow_dv_push_vlan_action_resource_register struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_ibv_shared *sh = priv->sh; + struct mlx5_dev_ctx_shared *sh = priv->sh; struct mlx5_flow_dv_push_vlan_action_resource *cache_resource; struct mlx5dv_dr_domain *domain; uint32_t idx = 0; @@ -2689,14 +2787,14 @@ flow_dv_push_vlan_action_resource_register (void *)cache_resource, rte_atomic32_read(&cache_resource->refcnt)); rte_atomic32_inc(&cache_resource->refcnt); - dev_flow->handle->dvh.push_vlan_res = idx; + dev_flow->handle->dvh.rix_push_vlan = idx; dev_flow->dv.push_vlan_res = cache_resource; return 0; } } /* Register new push_vlan action resource. */ cache_resource = mlx5_ipool_zmalloc(sh->ipool[MLX5_IPOOL_PUSH_VLAN], - &dev_flow->handle->dvh.push_vlan_res); + &dev_flow->handle->dvh.rix_push_vlan); if (!cache_resource) return rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -2721,7 +2819,7 @@ flow_dv_push_vlan_action_resource_register rte_atomic32_inc(&cache_resource->refcnt); ILIST_INSERT(sh->ipool[MLX5_IPOOL_PUSH_VLAN], &sh->push_vlan_action_list, - dev_flow->handle->dvh.push_vlan_res, + dev_flow->handle->dvh.rix_push_vlan, cache_resource, next); dev_flow->dv.push_vlan_res = cache_resource; DRV_LOG(DEBUG, "new push vlan action resource %p: refcnt %d++", @@ -3637,21 +3735,18 @@ flow_dv_validate_action_port_id(struct rte_eth_dev *dev, * @return * Max number of modify header actions device can support. */ -static unsigned int -flow_dv_modify_hdr_action_max(struct rte_eth_dev *dev, uint64_t flags) +static inline unsigned int +flow_dv_modify_hdr_action_max(struct rte_eth_dev *dev __rte_unused, + uint64_t flags) { /* - * There's no way to directly query the max cap. Although it has to be - * acquried by iterative trial, it is a safe assumption that more - * actions are supported by FW if extensive metadata register is - * supported. (Only in the root table) + * There's no way to directly query the max capacity from FW. + * The maximal value on root table should be assumed to be supported. */ if (!(flags & MLX5DV_DR_ACTION_FLAGS_ROOT_LEVEL)) return MLX5_MAX_MODIFY_NUM; else - return mlx5_flow_ext_mreg_supported(dev) ? - MLX5_ROOT_TBL_MODIFY_NUM : - MLX5_ROOT_TBL_MODIFY_NUM_NO_MREG; + return MLX5_ROOT_TBL_MODIFY_NUM; } /** @@ -3705,9 +3800,9 @@ mlx5_flow_validate_action_meter(struct rte_eth_dev *dev, return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, NULL, "Meter not found"); - if (fm->ref_cnt && (!(fm->attr.transfer == attr->transfer || - (!fm->attr.ingress && !attr->ingress && attr->egress) || - (!fm->attr.egress && !attr->egress && attr->ingress)))) + if (fm->ref_cnt && (!(fm->transfer == attr->transfer || + (!fm->ingress && !attr->ingress && attr->egress) || + (!fm->egress && !attr->egress && attr->ingress)))) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, NULL, "Flow attributes are either invalid " @@ -3716,6 +3811,50 @@ mlx5_flow_validate_action_meter(struct rte_eth_dev *dev, return 0; } +/** + * Validate the age action. + * + * @param[in] action_flags + * Holds the actions detected until now. + * @param[in] action + * Pointer to the age action. + * @param[in] dev + * Pointer to the Ethernet device structure. + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_dv_validate_action_age(uint64_t action_flags, + const struct rte_flow_action *action, + struct rte_eth_dev *dev, + struct rte_flow_error *error) +{ + struct mlx5_priv *priv = dev->data->dev_private; + const struct rte_flow_action_age *age = action->conf; + + if (!priv->config.devx || priv->counter_fallback) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, + "age action not supported"); + if (!(action->conf)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "configuration cannot be null"); + if (age->timeout >= UINT16_MAX / 2 / 10) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "Max age time: 3275 seconds"); + if (action_flags & MLX5_FLOW_ACTION_AGE) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "Duplicate age ctions set"); + return 0; +} + /** * Validate the modify-header IPv4 DSCP actions. * @@ -3807,7 +3946,7 @@ flow_dv_modify_hdr_resource_register struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_ibv_shared *sh = priv->sh; + struct mlx5_dev_ctx_shared *sh = priv->sh; struct mlx5_flow_dv_modify_hdr_resource *cache_resource; struct mlx5dv_dr_domain *ns; uint32_t actions_len; @@ -3893,20 +4032,22 @@ flow_dv_counter_get_by_idx(struct rte_eth_dev *dev, struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_pools_container *cont; struct mlx5_flow_counter_pool *pool; - uint32_t batch = 0; + uint32_t batch = 0, age = 0; idx--; + age = MLX_CNT_IS_AGE(idx); + idx = age ? idx - MLX5_CNT_AGE_OFFSET : idx; if (idx >= MLX5_CNT_BATCH_OFFSET) { idx -= MLX5_CNT_BATCH_OFFSET; batch = 1; } - cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0); + cont = MLX5_CNT_CONTAINER(priv->sh, batch, age); MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cont->n); pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL]; MLX5_ASSERT(pool); if (ppool) *ppool = pool; - return &pool->counters_raw[idx % MLX5_COUNTERS_PER_POOL]; + return MLX5_POOL_GET_CNT(pool, idx % MLX5_COUNTERS_PER_POOL); } /** @@ -3962,8 +4103,8 @@ flow_dv_find_pool_by_id(struct mlx5_pools_container *cont, int id) static struct mlx5_counter_stats_mem_mng * flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n) { - struct mlx5_ibv_shared *sh = ((struct mlx5_priv *) - (dev->data->dev_private))->sh; + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_dev_ctx_shared *sh = priv->sh; struct mlx5_devx_mkey_attr mkey_attr; struct mlx5_counter_stats_mem_mng *mem_mng; volatile struct flow_counter_stats *raw_data; @@ -3989,13 +4130,16 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n) } mkey_attr.addr = (uintptr_t)mem; mkey_attr.size = size; - mkey_attr.umem_id = mem_mng->umem->umem_id; + mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem); mkey_attr.pd = sh->pdn; mkey_attr.log_entity_size = 0; mkey_attr.pg_access = 0; mkey_attr.klm_array = NULL; mkey_attr.klm_num = 0; - mkey_attr.relaxed_ordering = 1; + if (priv->config.hca_attr.relaxed_ordering_write && + priv->config.hca_attr.relaxed_ordering_read && + !haswell_broadwell_cpu) + mkey_attr.relaxed_ordering = 1; mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr); if (!mem_mng->dm) { mlx5_glue->devx_umem_dereg(mem_mng->umem); @@ -4020,70 +4164,59 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n) * Pointer to the Ethernet device structure. * @param[in] batch * Whether the pool is for counter that was allocated by batch command. + * @param[in] age + * Whether the pool is for Aging counter. * * @return - * The new container pointer on success, otherwise NULL and rte_errno is set. + * 0 on success, otherwise negative errno value and rte_errno is set. */ -static struct mlx5_pools_container * -flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch) +static int +flow_dv_container_resize(struct rte_eth_dev *dev, + uint32_t batch, uint32_t age) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_pools_container *cont = - MLX5_CNT_CONTAINER(priv->sh, batch, 0); - struct mlx5_pools_container *new_cont = - MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0); + struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch, + age); struct mlx5_counter_stats_mem_mng *mem_mng = NULL; + void *old_pools = cont->pools; uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE; uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize; - int i; + void *pools = rte_calloc(__func__, 1, mem_size, 0); - /* Fallback mode has no background thread. Skip the check. */ - if (!priv->counter_fallback && - cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1)) { - /* The last resize still hasn't detected by the host thread. */ - rte_errno = EAGAIN; - return NULL; - } - new_cont->pools = rte_calloc(__func__, 1, mem_size, 0); - if (!new_cont->pools) { + if (!pools) { rte_errno = ENOMEM; - return NULL; + return -ENOMEM; } - if (cont->n) - memcpy(new_cont->pools, cont->pools, cont->n * - sizeof(struct mlx5_flow_counter_pool *)); + if (old_pools) + memcpy(pools, old_pools, cont->n * + sizeof(struct mlx5_flow_counter_pool *)); /* * Fallback mode query the counter directly, no background query * resources are needed. */ if (!priv->counter_fallback) { + int i; + mem_mng = flow_dv_create_counter_stat_mem_mng(dev, - MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES); + MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES); if (!mem_mng) { - rte_free(new_cont->pools); - return NULL; + rte_free(pools); + return -ENOMEM; } for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i) LIST_INSERT_HEAD(&priv->sh->cmng.free_stat_raws, mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i, next); - } else { - /* - * Release the old container pools directly as no background - * thread helps that. - */ - rte_free(cont->pools); } - new_cont->n = resize; - rte_atomic16_set(&new_cont->n_valid, rte_atomic16_read(&cont->n_valid)); - TAILQ_INIT(&new_cont->pool_list); - TAILQ_CONCAT(&new_cont->pool_list, &cont->pool_list, next); - new_cont->init_mem_mng = mem_mng; - rte_cio_wmb(); - /* Flip the master container. */ - priv->sh->cmng.mhi[batch] ^= (uint8_t)1; - return new_cont; + rte_spinlock_lock(&cont->resize_sl); + cont->n = resize; + cont->mem_mng = mem_mng; + cont->pools = pools; + rte_spinlock_unlock(&cont->resize_sl); + if (old_pools) + rte_free(old_pools); + return 0; } /** @@ -4130,7 +4263,7 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts, *pkts = 0; *bytes = 0; } else { - offset = cnt - &pool->counters_raw[0]; + offset = MLX5_CNT_ARRAY_IDX(pool, cnt); *pkts = rte_be_to_cpu_64(pool->raw->data[offset].hits); *bytes = rte_be_to_cpu_64(pool->raw->data[offset].bytes); } @@ -4147,32 +4280,30 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts, * The devX counter handle. * @param[in] batch * Whether the pool is for counter that was allocated by batch command. + * @param[in] age + * Whether the pool is for counter that was allocated for aging. * @param[in/out] cont_cur * Pointer to the container pointer, it will be update in pool resize. * * @return * The pool container pointer on success, NULL otherwise and rte_errno is set. */ -static struct mlx5_pools_container * +static struct mlx5_flow_counter_pool * flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, - uint32_t batch) + uint32_t batch, uint32_t age) { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_flow_counter_pool *pool; struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch, - 0); + age); int16_t n_valid = rte_atomic16_read(&cont->n_valid); - uint32_t size; + uint32_t size = sizeof(*pool); - if (cont->n == n_valid) { - cont = flow_dv_container_resize(dev, batch); - if (!cont) - return NULL; - } - size = sizeof(*pool); - if (!batch) - size += MLX5_COUNTERS_PER_POOL * - sizeof(struct mlx5_flow_counter_ext); + if (cont->n == n_valid && flow_dv_container_resize(dev, batch, age)) + return NULL; + size += MLX5_COUNTERS_PER_POOL * CNT_SIZE; + size += (batch ? 0 : MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE); + size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * AGE_SIZE); pool = rte_calloc(__func__, 1, size, 0); if (!pool) { rte_errno = ENOMEM; @@ -4180,9 +4311,12 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, } pool->min_dcs = dcs; if (!priv->counter_fallback) - pool->raw = cont->init_mem_mng->raws + n_valid % - MLX5_CNT_CONTAINER_RESIZE; + pool->raw = cont->mem_mng->raws + n_valid % + MLX5_CNT_CONTAINER_RESIZE; pool->raw_hw = NULL; + pool->type = 0; + pool->type |= (batch ? 0 : CNT_POOL_TYPE_EXT); + pool->type |= (!age ? 0 : CNT_POOL_TYPE_AGE); rte_spinlock_init(&pool->sl); /* * The generation of the new allocated counters in this pool is 0, 2 in @@ -4206,9 +4340,42 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, /* Pool initialization must be updated before host thread access. */ rte_cio_wmb(); rte_atomic16_add(&cont->n_valid, 1); - return cont; + return pool; } +/** + * Update the minimum dcs-id for aged or no-aged counter pool. + * + * @param[in] dev + * Pointer to the Ethernet device structure. + * @param[in] pool + * Current counter pool. + * @param[in] batch + * Whether the pool is for counter that was allocated by batch command. + * @param[in] age + * Whether the counter is for aging. + */ +static void +flow_dv_counter_update_min_dcs(struct rte_eth_dev *dev, + struct mlx5_flow_counter_pool *pool, + uint32_t batch, uint32_t age) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_flow_counter_pool *other; + struct mlx5_pools_container *cont; + + cont = MLX5_CNT_CONTAINER(priv->sh, batch, (age ^ 0x1)); + other = flow_dv_find_pool_by_id(cont, pool->min_dcs->id); + if (!other) + return; + if (pool->min_dcs->id < other->min_dcs->id) { + rte_atomic64_set(&other->a64_dcs, + rte_atomic64_read(&pool->a64_dcs)); + } else { + rte_atomic64_set(&pool->a64_dcs, + rte_atomic64_read(&other->a64_dcs)); + } +} /** * Prepare a new counter and/or a new counter pool. * @@ -4218,15 +4385,17 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, * Where to put the pointer of a new counter. * @param[in] batch * Whether the pool is for counter that was allocated by batch command. + * @param[in] age + * Whether the pool is for counter that was allocated for aging. * * @return - * The counter container pointer and @p cnt_free is set on success, + * The counter pool pointer and @p cnt_free is set on success, * NULL otherwise and rte_errno is set. */ -static struct mlx5_pools_container * +static struct mlx5_flow_counter_pool * flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, struct mlx5_flow_counter **cnt_free, - uint32_t batch) + uint32_t batch, uint32_t age) { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_pools_container *cont; @@ -4235,7 +4404,7 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, struct mlx5_flow_counter *cnt; uint32_t i; - cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0); + cont = MLX5_CNT_CONTAINER(priv->sh, batch, age); if (!batch) { /* bulk_bitmap must be 0 for single counter allocation. */ dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0); @@ -4243,22 +4412,23 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, return NULL; pool = flow_dv_find_pool_by_id(cont, dcs->id); if (!pool) { - cont = flow_dv_pool_create(dev, dcs, batch); - if (!cont) { + pool = flow_dv_pool_create(dev, dcs, batch, age); + if (!pool) { mlx5_devx_cmd_destroy(dcs); return NULL; } - pool = TAILQ_FIRST(&cont->pool_list); } else if (dcs->id < pool->min_dcs->id) { rte_atomic64_set(&pool->a64_dcs, (int64_t)(uintptr_t)dcs); } + flow_dv_counter_update_min_dcs(dev, + pool, batch, age); i = dcs->id % MLX5_COUNTERS_PER_POOL; - cnt = &pool->counters_raw[i]; + cnt = MLX5_POOL_GET_CNT(pool, i); TAILQ_INSERT_HEAD(&pool->counters, cnt, next); MLX5_GET_POOL_CNT_EXT(pool, i)->dcs = dcs; *cnt_free = cnt; - return cont; + return pool; } /* bulk_bitmap is in 128 counters units. */ if (priv->config.hca_attr.flow_counter_bulk_alloc_bitmap & 0x4) @@ -4267,18 +4437,17 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, rte_errno = ENODATA; return NULL; } - cont = flow_dv_pool_create(dev, dcs, batch); - if (!cont) { + pool = flow_dv_pool_create(dev, dcs, batch, age); + if (!pool) { mlx5_devx_cmd_destroy(dcs); return NULL; } - pool = TAILQ_FIRST(&cont->pool_list); for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { - cnt = &pool->counters_raw[i]; + cnt = MLX5_POOL_GET_CNT(pool, i); TAILQ_INSERT_HEAD(&pool->counters, cnt, next); } - *cnt_free = &pool->counters_raw[0]; - return cont; + *cnt_free = MLX5_POOL_GET_CNT(pool, 0); + return pool; } /** @@ -4298,15 +4467,15 @@ static struct mlx5_flow_counter_ext * flow_dv_counter_shared_search(struct mlx5_pools_container *cont, uint32_t id, struct mlx5_flow_counter_pool **ppool) { - static struct mlx5_flow_counter_ext *cnt; + struct mlx5_flow_counter_ext *cnt; struct mlx5_flow_counter_pool *pool; - uint32_t i; + uint32_t i, j; uint32_t n_valid = rte_atomic16_read(&cont->n_valid); for (i = 0; i < n_valid; i++) { pool = cont->pools[i]; - for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { - cnt = MLX5_GET_POOL_CNT_EXT(pool, i); + for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) { + cnt = MLX5_GET_POOL_CNT_EXT(pool, j); if (cnt->ref_cnt && cnt->shared && cnt->id == id) { if (ppool) *ppool = cont->pools[i]; @@ -4328,13 +4497,15 @@ flow_dv_counter_shared_search(struct mlx5_pools_container *cont, uint32_t id, * Counter identifier. * @param[in] group * Counter flow group. + * @param[in] age + * Whether the counter was allocated for aging. * * @return * Index to flow counter on success, 0 otherwise and rte_errno is set. */ static uint32_t flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, - uint16_t group) + uint16_t group, uint32_t age) { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_flow_counter_pool *pool = NULL; @@ -4350,7 +4521,7 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, */ uint32_t batch = (group && !shared && !priv->counter_fallback) ? 1 : 0; struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch, - 0); + age); uint32_t cnt_idx; if (!priv->config.devx) { @@ -4389,10 +4560,9 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, cnt_free = NULL; } if (!cnt_free) { - cont = flow_dv_counter_pool_prepare(dev, &cnt_free, batch); - if (!cont) + pool = flow_dv_counter_pool_prepare(dev, &cnt_free, batch, age); + if (!pool) return 0; - pool = TAILQ_FIRST(&cont->pool_list); } if (!batch) cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free); @@ -4402,7 +4572,7 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, struct mlx5_devx_obj *dcs; if (batch) { - offset = cnt_free - &pool->counters_raw[0]; + offset = MLX5_CNT_ARRAY_IDX(pool, cnt_free); dcs = pool->min_dcs; } else { offset = 0; @@ -4416,8 +4586,9 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, } } cnt_idx = MLX5_MAKE_CNT_IDX(pool->index, - (cnt_free - pool->counters_raw)); + MLX5_CNT_ARRAY_IDX(pool, cnt_free)); cnt_idx += batch * MLX5_CNT_BATCH_OFFSET; + cnt_idx += age * MLX5_CNT_AGE_OFFSET; /* Update the counter reset values. */ if (_flow_dv_query_count(dev, cnt_idx, &cnt_free->hits, &cnt_free->bytes)) @@ -4439,6 +4610,64 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, return cnt_idx; } +/** + * Get age param from counter index. + * + * @param[in] dev + * Pointer to the Ethernet device structure. + * @param[in] counter + * Index to the counter handler. + * + * @return + * The aging parameter specified for the counter index. + */ +static struct mlx5_age_param* +flow_dv_counter_idx_get_age(struct rte_eth_dev *dev, + uint32_t counter) +{ + struct mlx5_flow_counter *cnt; + struct mlx5_flow_counter_pool *pool = NULL; + + flow_dv_counter_get_by_idx(dev, counter, &pool); + counter = (counter - 1) % MLX5_COUNTERS_PER_POOL; + cnt = MLX5_POOL_GET_CNT(pool, counter); + return MLX5_CNT_TO_AGE(cnt); +} + +/** + * Remove a flow counter from aged counter list. + * + * @param[in] dev + * Pointer to the Ethernet device structure. + * @param[in] counter + * Index to the counter handler. + * @param[in] cnt + * Pointer to the counter handler. + */ +static void +flow_dv_counter_remove_from_age(struct rte_eth_dev *dev, + uint32_t counter, struct mlx5_flow_counter *cnt) +{ + struct mlx5_age_info *age_info; + struct mlx5_age_param *age_param; + struct mlx5_priv *priv = dev->data->dev_private; + + age_info = GET_PORT_AGE_INFO(priv); + age_param = flow_dv_counter_idx_get_age(dev, counter); + if (rte_atomic16_cmpset((volatile uint16_t *) + &age_param->state, + AGE_CANDIDATE, AGE_FREE) + != AGE_CANDIDATE) { + /** + * We need the lock even it is age timeout, + * since counter may still in process. + */ + rte_spinlock_lock(&age_info->aged_sl); + TAILQ_REMOVE(&age_info->aged_counters, cnt, next); + rte_spinlock_unlock(&age_info->aged_sl); + } + rte_atomic16_set(&age_param->state, AGE_FREE); +} /** * Release a flow counter. * @@ -4463,6 +4692,8 @@ flow_dv_counter_release(struct rte_eth_dev *dev, uint32_t counter) if (cnt_ext && --cnt_ext->ref_cnt) return; } + if (IS_AGE_POOL(pool)) + flow_dv_counter_remove_from_age(dev, counter, cnt); /* Put the counter in the end - the last updated one. */ TAILQ_INSERT_TAIL(&pool->counters, cnt, next); /* @@ -4488,7 +4719,9 @@ flow_dv_counter_release(struct rte_eth_dev *dev, uint32_t counter) * Pointer to error structure. * * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. + * - 0 on success and non root table. + * - 1 on success and root table. + * - a negative errno value otherwise and rte_errno is set. */ static int flow_dv_validate_attributes(struct rte_eth_dev *dev, @@ -4498,6 +4731,7 @@ flow_dv_validate_attributes(struct rte_eth_dev *dev, { struct mlx5_priv *priv = dev->data->dev_private; uint32_t priority_max = priv->config.flow_prio - 1; + int ret = 0; #ifndef HAVE_MLX5DV_DR if (attributes->group) @@ -4506,14 +4740,15 @@ flow_dv_validate_attributes(struct rte_eth_dev *dev, NULL, "groups are not supported"); #else - uint32_t table; - int ret; + uint32_t table = 0; ret = mlx5_flow_group_to_table(attributes, external, attributes->group, !!priv->fdb_def_rule, &table, error); if (ret) return ret; + if (!table) + ret = MLX5DV_DR_ACTION_FLAGS_ROOT_LEVEL; #endif if (attributes->priority != MLX5_FLOW_PRIO_RSVD && attributes->priority >= priority_max) @@ -4543,7 +4778,7 @@ flow_dv_validate_attributes(struct rte_eth_dev *dev, RTE_FLOW_ERROR_TYPE_ATTR, NULL, "must specify exactly one of " "ingress or egress"); - return 0; + return ret; } /** @@ -4559,6 +4794,8 @@ flow_dv_validate_attributes(struct rte_eth_dev *dev, * Pointer to the list of actions. * @param[in] external * This flow rule is created by request external to PMD. + * @param[in] hairpin + * Number of hairpin TX actions, 0 means classic flow. * @param[out] error * Pointer to the error structure. * @@ -4569,7 +4806,7 @@ static int flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, const struct rte_flow_item items[], const struct rte_flow_action actions[], - bool external, struct rte_flow_error *error) + bool external, int hairpin, struct rte_flow_error *error) { int ret; uint64_t action_flags = 0; @@ -4616,12 +4853,15 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, struct mlx5_dev_config *dev_conf = &priv->config; uint16_t queue_index = 0xFFFF; const struct rte_flow_item_vlan *vlan_m = NULL; + int16_t rw_act_num = 0; + uint64_t is_root; if (items == NULL) return -1; ret = flow_dv_validate_attributes(dev, attr, external, error); if (ret < 0) return ret; + is_root = (uint64_t)ret; for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); int type = items->type; @@ -4656,8 +4896,8 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, } break; case RTE_FLOW_ITEM_TYPE_VLAN: - ret = mlx5_flow_validate_item_vlan(items, item_flags, - dev, error); + ret = flow_dv_validate_item_vlan(items, item_flags, + dev, error); if (ret < 0) return ret; last_item = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : @@ -4898,6 +5138,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, action_flags |= MLX5_FLOW_ACTION_FLAG; ++actions_n; } + rw_act_num += MLX5_ACT_NUM_SET_MARK; break; case RTE_FLOW_ACTION_TYPE_MARK: ret = flow_dv_validate_action_mark(dev, actions, @@ -4916,6 +5157,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, action_flags |= MLX5_FLOW_ACTION_MARK; ++actions_n; } + rw_act_num += MLX5_ACT_NUM_SET_MARK; break; case RTE_FLOW_ACTION_TYPE_SET_META: ret = flow_dv_validate_action_set_meta(dev, actions, @@ -4927,6 +5169,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, if (!(action_flags & MLX5_FLOW_MODIFY_HDR_ACTIONS)) ++actions_n; action_flags |= MLX5_FLOW_ACTION_SET_META; + rw_act_num += MLX5_ACT_NUM_SET_META; break; case RTE_FLOW_ACTION_TYPE_SET_TAG: ret = flow_dv_validate_action_set_tag(dev, actions, @@ -4938,6 +5181,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, if (!(action_flags & MLX5_FLOW_MODIFY_HDR_ACTIONS)) ++actions_n; action_flags |= MLX5_FLOW_ACTION_SET_TAG; + rw_act_num += MLX5_ACT_NUM_SET_TAG; break; case RTE_FLOW_ACTION_TYPE_DROP: ret = mlx5_flow_validate_action_drop(action_flags, @@ -5015,6 +5259,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, return ret; /* Count VID with push_vlan command. */ action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID; + rw_act_num += MLX5_ACT_NUM_MDF_VID; break; case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: @@ -5076,8 +5321,15 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ? MLX5_FLOW_ACTION_SET_MAC_SRC : MLX5_FLOW_ACTION_SET_MAC_DST; + /* + * Even if the source and destination MAC addresses have + * overlap in the header with 4B alignment, the convert + * function will handle them separately and 4 SW actions + * will be created. And 2 actions will be added each + * time no matter how many bytes of address will be set. + */ + rw_act_num += MLX5_ACT_NUM_MDF_MAC; break; - case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC: case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST: ret = flow_dv_validate_action_modify_ipv4(action_flags, @@ -5093,6 +5345,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ? MLX5_FLOW_ACTION_SET_IPV4_SRC : MLX5_FLOW_ACTION_SET_IPV4_DST; + rw_act_num += MLX5_ACT_NUM_MDF_IPV4; break; case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC: case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST: @@ -5115,6 +5368,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ? MLX5_FLOW_ACTION_SET_IPV6_SRC : MLX5_FLOW_ACTION_SET_IPV6_DST; + rw_act_num += MLX5_ACT_NUM_MDF_IPV6; break; case RTE_FLOW_ACTION_TYPE_SET_TP_SRC: case RTE_FLOW_ACTION_TYPE_SET_TP_DST: @@ -5131,6 +5385,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, RTE_FLOW_ACTION_TYPE_SET_TP_SRC ? MLX5_FLOW_ACTION_SET_TP_SRC : MLX5_FLOW_ACTION_SET_TP_DST; + rw_act_num += MLX5_ACT_NUM_MDF_PORT; break; case RTE_FLOW_ACTION_TYPE_DEC_TTL: case RTE_FLOW_ACTION_TYPE_SET_TTL: @@ -5147,6 +5402,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, RTE_FLOW_ACTION_TYPE_SET_TTL ? MLX5_FLOW_ACTION_SET_TTL : MLX5_FLOW_ACTION_DEC_TTL; + rw_act_num += MLX5_ACT_NUM_MDF_TTL; break; case RTE_FLOW_ACTION_TYPE_JUMP: ret = flow_dv_validate_action_jump(actions, @@ -5174,6 +5430,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ ? MLX5_FLOW_ACTION_INC_TCP_SEQ : MLX5_FLOW_ACTION_DEC_TCP_SEQ; + rw_act_num += MLX5_ACT_NUM_MDF_TCPSEQ; break; case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK: case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK: @@ -5191,10 +5448,13 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, RTE_FLOW_ACTION_TYPE_INC_TCP_ACK ? MLX5_FLOW_ACTION_INC_TCP_ACK : MLX5_FLOW_ACTION_DEC_TCP_ACK; + rw_act_num += MLX5_ACT_NUM_MDF_TCPACK; break; - case MLX5_RTE_FLOW_ACTION_TYPE_TAG: case MLX5_RTE_FLOW_ACTION_TYPE_MARK: + break; + case MLX5_RTE_FLOW_ACTION_TYPE_TAG: case MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG: + rw_act_num += MLX5_ACT_NUM_SET_TAG; break; case RTE_FLOW_ACTION_TYPE_METER: ret = mlx5_flow_validate_action_meter(dev, @@ -5205,6 +5465,17 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, return ret; action_flags |= MLX5_FLOW_ACTION_METER; ++actions_n; + /* Meter action will add one more TAG action. */ + rw_act_num += MLX5_ACT_NUM_SET_TAG; + break; + case RTE_FLOW_ACTION_TYPE_AGE: + ret = flow_dv_validate_action_age(action_flags, + actions, dev, + error); + if (ret < 0) + return ret; + action_flags |= MLX5_FLOW_ACTION_AGE; + ++actions_n; break; case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP: ret = flow_dv_validate_action_modify_ipv4_dscp @@ -5218,6 +5489,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, if (!(action_flags & MLX5_FLOW_MODIFY_HDR_ACTIONS)) ++actions_n; action_flags |= MLX5_FLOW_ACTION_SET_IPV4_DSCP; + rw_act_num += MLX5_ACT_NUM_SET_DSCP; break; case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP: ret = flow_dv_validate_action_modify_ipv6_dscp @@ -5231,6 +5503,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, if (!(action_flags & MLX5_FLOW_MODIFY_HDR_ACTIONS)) ++actions_n; action_flags |= MLX5_FLOW_ACTION_SET_IPV6_DSCP; + rw_act_num += MLX5_ACT_NUM_SET_DSCP; break; default: return rte_flow_error_set(error, ENOTSUP, @@ -5303,6 +5576,21 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, NULL, "encap is not supported" " for ingress traffic"); } + /* Hairpin flow will add one more TAG action. */ + if (hairpin > 0) + rw_act_num += MLX5_ACT_NUM_SET_TAG; + /* extra metadata enabled: one more TAG action will be add. */ + if (dev_conf->dv_flow_en && + dev_conf->dv_xmeta_en != MLX5_XMETA_MODE_LEGACY && + mlx5_flow_ext_mreg_supported(dev)) + rw_act_num += MLX5_ACT_NUM_SET_TAG; + if ((uint32_t)rw_act_num > + flow_dv_modify_hdr_action_max(dev, is_root)) { + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "too many header modify" + " actions to support"); + } return 0; } @@ -5401,6 +5689,34 @@ flow_dv_check_valid_spec(void *match_mask, void *match_value) } #endif +/** + * Add match of ip_version. + * + * @param[in] group + * Flow group. + * @param[in] headers_v + * Values header pointer. + * @param[in] headers_m + * Masks header pointer. + * @param[in] ip_version + * The IP version to set. + */ +static inline void +flow_dv_set_match_ip_version(uint32_t group, + void *headers_v, + void *headers_m, + uint8_t ip_version) +{ + if (group == 0) + MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 0xf); + else + MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, + ip_version); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, ip_version); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, 0); + MLX5_SET(fte_match_set_lyr_2_4, headers_m, ethertype, 0); +} + /** * Add Ethernet item to matcher and to the value. * @@ -5415,7 +5731,8 @@ flow_dv_check_valid_spec(void *match_mask, void *match_value) */ static void flow_dv_translate_item_eth(void *matcher, void *key, - const struct rte_flow_item *item, int inner) + const struct rte_flow_item *item, int inner, + uint32_t group) { const struct rte_flow_item_eth *eth_m = item->mask; const struct rte_flow_item_eth *eth_v = item->spec; @@ -5470,11 +5787,22 @@ flow_dv_translate_item_eth(void *matcher, void *key, * HW supports match on one Ethertype, the Ethertype following the last * VLAN tag of the packet (see PRM). * Set match on ethertype only if ETH header is not followed by VLAN. + * HW is optimized for IPv4/IPv6. In such cases, avoid setting + * ethertype, and use ip_version field instead. */ - MLX5_SET(fte_match_set_lyr_2_4, headers_m, ethertype, - rte_be_to_cpu_16(eth_m->type)); - l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, ethertype); - *(uint16_t *)(l24_v) = eth_m->type & eth_v->type; + if (eth_v->type == RTE_BE16(RTE_ETHER_TYPE_IPV4) && + eth_m->type == 0xFFFF) { + flow_dv_set_match_ip_version(group, headers_v, headers_m, 4); + } else if (eth_v->type == RTE_BE16(RTE_ETHER_TYPE_IPV6) && + eth_m->type == 0xFFFF) { + flow_dv_set_match_ip_version(group, headers_v, headers_m, 6); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_m, ethertype, + rte_be_to_cpu_16(eth_m->type)); + l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + ethertype); + *(uint16_t *)(l24_v) = eth_m->type & eth_v->type; + } } /** @@ -5495,7 +5823,7 @@ static void flow_dv_translate_item_vlan(struct mlx5_flow *dev_flow, void *matcher, void *key, const struct rte_flow_item *item, - int inner) + int inner, uint32_t group) { const struct rte_flow_item_vlan *vlan_m = item->mask; const struct rte_flow_item_vlan *vlan_v = item->spec; @@ -5504,10 +5832,6 @@ flow_dv_translate_item_vlan(struct mlx5_flow *dev_flow, uint16_t tci_m; uint16_t tci_v; - if (!vlan_v) - return; - if (!vlan_m) - vlan_m = &rte_flow_item_vlan_mask; if (inner) { headers_m = MLX5_ADDR_OF(fte_match_param, matcher, inner_headers); @@ -5520,23 +5844,45 @@ flow_dv_translate_item_vlan(struct mlx5_flow *dev_flow, * This is workaround, masks are not supported, * and pre-validated. */ - dev_flow->handle->vf_vlan.tag = - rte_be_to_cpu_16(vlan_v->tci) & 0x0fff; + if (vlan_v) + dev_flow->handle->vf_vlan.tag = + rte_be_to_cpu_16(vlan_v->tci) & 0x0fff; } - tci_m = rte_be_to_cpu_16(vlan_m->tci); - tci_v = rte_be_to_cpu_16(vlan_m->tci & vlan_v->tci); + /* + * When VLAN item exists in flow, mark packet as tagged, + * even if TCI is not specified. + */ MLX5_SET(fte_match_set_lyr_2_4, headers_m, cvlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); + if (!vlan_v) + return; + if (!vlan_m) + vlan_m = &rte_flow_item_vlan_mask; + tci_m = rte_be_to_cpu_16(vlan_m->tci); + tci_v = rte_be_to_cpu_16(vlan_m->tci & vlan_v->tci); MLX5_SET(fte_match_set_lyr_2_4, headers_m, first_vid, tci_m); MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, tci_v); MLX5_SET(fte_match_set_lyr_2_4, headers_m, first_cfi, tci_m >> 12); MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_cfi, tci_v >> 12); MLX5_SET(fte_match_set_lyr_2_4, headers_m, first_prio, tci_m >> 13); MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, tci_v >> 13); - MLX5_SET(fte_match_set_lyr_2_4, headers_m, ethertype, - rte_be_to_cpu_16(vlan_m->inner_type)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - rte_be_to_cpu_16(vlan_m->inner_type & vlan_v->inner_type)); + /* + * HW is optimized for IPv4/IPv6. In such cases, avoid setting + * ethertype, and use ip_version field instead. + */ + if (vlan_v->inner_type == RTE_BE16(RTE_ETHER_TYPE_IPV4) && + vlan_m->inner_type == 0xFFFF) { + flow_dv_set_match_ip_version(group, headers_v, headers_m, 4); + } else if (vlan_v->inner_type == RTE_BE16(RTE_ETHER_TYPE_IPV6) && + vlan_m->inner_type == 0xFFFF) { + flow_dv_set_match_ip_version(group, headers_v, headers_m, 6); + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_m, ethertype, + rte_be_to_cpu_16(vlan_m->inner_type)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + rte_be_to_cpu_16(vlan_m->inner_type & + vlan_v->inner_type)); + } } /** @@ -5587,11 +5933,7 @@ flow_dv_translate_item_ipv4(void *matcher, void *key, outer_headers); headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers); } - if (group == 0) - MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 0xf); - else - MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 0x4); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 4); + flow_dv_set_match_ip_version(group, headers_v, headers_m, 4); /* * On outer header (which must contains L2), or inner header with L2, * set cvlan_tag mask bit to mark this packet as untagged. @@ -5689,11 +6031,7 @@ flow_dv_translate_item_ipv6(void *matcher, void *key, outer_headers); headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers); } - if (group == 0) - MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 0xf); - else - MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 0x6); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 6); + flow_dv_set_match_ip_version(group, headers_v, headers_m, 6); /* * On outer header (which must contains L2), or inner header with L2, * set cvlan_tag mask bit to mark this packet as untagged. @@ -6792,6 +7130,10 @@ flow_dv_translate_item_gtp(void *matcher, void *key, return; if (!gtp_m) gtp_m = &rte_flow_item_gtp_mask; + MLX5_SET(fte_match_set_misc3, misc3_m, gtpu_msg_flags, + gtp_m->v_pt_rsv_flags); + MLX5_SET(fte_match_set_misc3, misc3_v, gtpu_msg_flags, + gtp_v->v_pt_rsv_flags & gtp_m->v_pt_rsv_flags); MLX5_SET(fte_match_set_misc3, misc3_m, gtpu_msg_type, gtp_m->msg_type); MLX5_SET(fte_match_set_misc3, misc3_v, gtpu_msg_type, gtp_v->msg_type & gtp_m->msg_type); @@ -6864,7 +7206,7 @@ flow_dv_tbl_resource_get(struct rte_eth_dev *dev, struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_ibv_shared *sh = priv->sh; + struct mlx5_dev_ctx_shared *sh = priv->sh; struct mlx5_flow_tbl_resource *tbl; union mlx5_flow_tbl_key table_key = { { @@ -6949,7 +7291,7 @@ flow_dv_tbl_resource_release(struct rte_eth_dev *dev, struct mlx5_flow_tbl_resource *tbl) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_ibv_shared *sh = priv->sh; + struct mlx5_dev_ctx_shared *sh = priv->sh; struct mlx5_flow_tbl_data_entry *tbl_data = container_of(tbl, struct mlx5_flow_tbl_data_entry, tbl); @@ -6994,7 +7336,7 @@ flow_dv_matcher_register(struct rte_eth_dev *dev, struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_ibv_shared *sh = priv->sh; + struct mlx5_dev_ctx_shared *sh = priv->sh; struct mlx5_flow_dv_matcher *cache_matcher; struct mlx5dv_flow_matcher_attr dv_attr = { .type = IBV_FLOW_ATTR_NORMAL, @@ -7093,7 +7435,7 @@ flow_dv_tag_resource_register struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_ibv_shared *sh = priv->sh; + struct mlx5_dev_ctx_shared *sh = priv->sh; struct mlx5_flow_dv_tag_resource *cache_resource; struct mlx5_hlist_entry *entry; @@ -7103,7 +7445,7 @@ flow_dv_tag_resource_register cache_resource = container_of (entry, struct mlx5_flow_dv_tag_resource, entry); rte_atomic32_inc(&cache_resource->refcnt); - dev_flow->handle->dvh.tag_resource = cache_resource->idx; + dev_flow->handle->dvh.rix_tag = cache_resource->idx; dev_flow->dv.tag_resource = cache_resource; DRV_LOG(DEBUG, "cached tag resource %p: refcnt now %d++", (void *)cache_resource, @@ -7112,7 +7454,7 @@ flow_dv_tag_resource_register } /* Register new resource. */ cache_resource = mlx5_ipool_zmalloc(sh->ipool[MLX5_IPOOL_TAG], - &dev_flow->handle->dvh.tag_resource); + &dev_flow->handle->dvh.rix_tag); if (!cache_resource) return rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -7157,7 +7499,7 @@ flow_dv_tag_release(struct rte_eth_dev *dev, uint32_t tag_idx) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_ibv_shared *sh = priv->sh; + struct mlx5_dev_ctx_shared *sh = priv->sh; struct mlx5_flow_dv_tag_resource *tag; tag = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_TAG], tag_idx); @@ -7215,7 +7557,7 @@ flow_dv_translate_action_port_id(struct rte_eth_dev *dev, * This parameter is transferred to * mlx5dv_dr_action_create_dest_ib_port(). */ - *dst_port_id = priv->ibv_port; + *dst_port_id = priv->dev_port; #else /* * Legacy mode, no LAG configurations is supported. @@ -7227,6 +7569,53 @@ flow_dv_translate_action_port_id(struct rte_eth_dev *dev, return 0; } +/** + * Create a counter with aging configuration. + * + * @param[in] dev + * Pointer to rte_eth_dev structure. + * @param[out] count + * Pointer to the counter action configuration. + * @param[in] age + * Pointer to the aging action configuration. + * + * @return + * Index to flow counter on success, 0 otherwise. + */ +static uint32_t +flow_dv_translate_create_counter(struct rte_eth_dev *dev, + struct mlx5_flow *dev_flow, + const struct rte_flow_action_count *count, + const struct rte_flow_action_age *age) +{ + uint32_t counter; + struct mlx5_age_param *age_param; + + counter = flow_dv_counter_alloc(dev, + count ? count->shared : 0, + count ? count->id : 0, + dev_flow->dv.group, !!age); + if (!counter || age == NULL) + return counter; + age_param = flow_dv_counter_idx_get_age(dev, counter); + /* + * The counter age accuracy may have a bit delay. Have 3/4 + * second bias on the timeount in order to let it age in time. + */ + age_param->context = age->context ? age->context : + (void *)(uintptr_t)(dev_flow->flow_idx); + /* + * The counter age accuracy may have a bit delay. Have 3/4 + * second bias on the timeount in order to let it age in time. + */ + age_param->timeout = age->timeout * 10 - MLX5_AGING_TIME_DELAY; + /* Set expire time in unit of 0.1 sec. */ + age_param->port_id = dev->data->port_id; + age_param->expire = age_param->timeout + + rte_rdtsc() / (rte_get_tsc_hz() / 10); + rte_atomic16_set(&age_param->state, AGE_CANDIDATE); + return counter; +} /** * Add Tx queue matcher * @@ -7277,18 +7666,20 @@ flow_dv_translate_item_tx_queue(struct rte_eth_dev *dev, * * @param[in] dev_flow * Pointer to the mlx5_flow. + * @param[in] rss_desc + * Pointer to the mlx5_flow_rss_desc. */ static void -flow_dv_hashfields_set(struct mlx5_flow *dev_flow) +flow_dv_hashfields_set(struct mlx5_flow *dev_flow, + struct mlx5_flow_rss_desc *rss_desc) { - struct rte_flow *flow = dev_flow->flow; uint64_t items = dev_flow->handle->layers; int rss_inner = 0; - uint64_t rss_types = rte_eth_rss_hf_refine(flow->rss.types); + uint64_t rss_types = rte_eth_rss_hf_refine(rss_desc->types); dev_flow->hash_fields = 0; #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT - if (flow->rss.level >= 2) { + if (rss_desc->level >= 2) { dev_flow->hash_fields |= IBV_RX_HASH_INNER; rss_inner = 1; } @@ -7373,6 +7764,9 @@ __flow_dv_translate(struct rte_eth_dev *dev, struct mlx5_dev_config *dev_conf = &priv->config; struct rte_flow *flow = dev_flow->flow; struct mlx5_flow_handle *handle = dev_flow->handle; + struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *) + priv->rss_desc) + [!!priv->flow_nested_idx]; uint64_t item_flags = 0; uint64_t last_item = 0; uint64_t action_flags = 0; @@ -7391,6 +7785,8 @@ __flow_dv_translate(struct rte_eth_dev *dev, (MLX5_MAX_MODIFY_NUM + 1)]; } mhdr_dummy; struct mlx5_flow_dv_modify_hdr_resource *mhdr_res = &mhdr_dummy.res; + const struct rte_flow_action_count *count = NULL; + const struct rte_flow_action_age *age = NULL; union flow_dv_attr flow_attr = { .attr = 0 }; uint32_t tag_be; union mlx5_flow_tbl_key tbl_key; @@ -7419,7 +7815,6 @@ __flow_dv_translate(struct rte_eth_dev *dev, const struct rte_flow_action_queue *queue; const struct rte_flow_action_rss *rss; const struct rte_flow_action *action = actions; - const struct rte_flow_action_count *count = action->conf; const uint8_t *rss_key; const struct rte_flow_action_jump *jump_data; const struct rte_flow_action_meter *mtr; @@ -7428,6 +7823,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, struct mlx5_flow_dv_port_id_action_resource port_id_resource; int action_type = actions->type; const struct rte_flow_action *found_action = NULL; + struct mlx5_flow_meter *fm = NULL; switch (action_type) { case RTE_FLOW_ACTION_TYPE_VOID: @@ -7436,12 +7832,11 @@ __flow_dv_translate(struct rte_eth_dev *dev, if (flow_dv_translate_action_port_id(dev, action, &port_id, error)) return -rte_errno; - memset(&port_id_resource, 0, sizeof(port_id_resource)); port_id_resource.port_id = port_id; + MLX5_ASSERT(!handle->rix_port_id_action); if (flow_dv_port_id_action_resource_register (dev, &port_id_resource, dev_flow, error)) return -rte_errno; - MLX5_ASSERT(!handle->port_id_action); dev_flow->dv.actions[actions_n++] = dev_flow->dv.port_id_action->action; action_flags |= MLX5_FLOW_ACTION_PORT_ID; @@ -7468,7 +7863,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, * right now. So the pointer to the tag resource must be * zero before the register process. */ - MLX5_ASSERT(!handle->dvh.tag_resource); + MLX5_ASSERT(!handle->dvh.rix_tag); if (flow_dv_tag_resource_register(dev, tag_be, dev_flow, error)) return -rte_errno; @@ -7497,7 +7892,7 @@ __flow_dv_translate(struct rte_eth_dev *dev, tag_be = mlx5_flow_mark_set (((const struct rte_flow_action_mark *) (actions->conf))->id); - MLX5_ASSERT(!handle->dvh.tag_resource); + MLX5_ASSERT(!handle->dvh.rix_tag); if (flow_dv_tag_resource_register(dev, tag_be, dev_flow, error)) return -rte_errno; @@ -7526,23 +7921,20 @@ __flow_dv_translate(struct rte_eth_dev *dev, dev_flow->handle->fate_action = MLX5_FLOW_FATE_DROP; break; case RTE_FLOW_ACTION_TYPE_QUEUE: - MLX5_ASSERT(flow->rss.queue); queue = actions->conf; - flow->rss.queue_num = 1; - (*flow->rss.queue)[0] = queue->index; + rss_desc->queue_num = 1; + rss_desc->queue[0] = queue->index; action_flags |= MLX5_FLOW_ACTION_QUEUE; dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE; break; case RTE_FLOW_ACTION_TYPE_RSS: - MLX5_ASSERT(flow->rss.queue); rss = actions->conf; - if (flow->rss.queue) - memcpy((*flow->rss.queue), rss->queue, - rss->queue_num * sizeof(uint16_t)); - flow->rss.queue_num = rss->queue_num; + memcpy(rss_desc->queue, rss->queue, + rss->queue_num * sizeof(uint16_t)); + rss_desc->queue_num = rss->queue_num; /* NULL RSS key indicates default RSS key. */ rss_key = !rss->key ? rss_hash_default_key : rss->key; - memcpy(flow->rss.key, rss_key, MLX5_RSS_HASH_KEY_LEN); + memcpy(rss_desc->key, rss_key, MLX5_RSS_HASH_KEY_LEN); /* * rss->level and rss.types should be set in advance * when expanding items for RSS. @@ -7550,36 +7942,21 @@ __flow_dv_translate(struct rte_eth_dev *dev, action_flags |= MLX5_FLOW_ACTION_RSS; dev_flow->handle->fate_action = MLX5_FLOW_FATE_QUEUE; break; + case RTE_FLOW_ACTION_TYPE_AGE: case RTE_FLOW_ACTION_TYPE_COUNT: if (!dev_conf->devx) { - rte_errno = ENOTSUP; - goto cnt_err; - } - flow->counter = flow_dv_counter_alloc(dev, - count->shared, - count->id, - dev_flow->dv.group); - if (!flow->counter) - goto cnt_err; - dev_flow->dv.actions[actions_n++] = - (flow_dv_counter_get_by_idx(dev, - flow->counter, NULL))->action; - action_flags |= MLX5_FLOW_ACTION_COUNT; - break; -cnt_err: - if (rte_errno == ENOTSUP) return rte_flow_error_set (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "count action not supported"); + } + /* Save information first, will apply later. */ + if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) + count = action->conf; else - return rte_flow_error_set - (error, rte_errno, - RTE_FLOW_ERROR_TYPE_ACTION, - action, - "cannot create counter" - " object."); + age = action->conf; + action_flags |= MLX5_FLOW_ACTION_COUNT; break; case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN: dev_flow->dv.actions[actions_n++] = @@ -7804,20 +8181,31 @@ cnt_err: case RTE_FLOW_ACTION_TYPE_METER: mtr = actions->conf; if (!flow->meter) { - flow->meter = mlx5_flow_meter_attach(priv, - mtr->mtr_id, attr, - error); - if (!flow->meter) + fm = mlx5_flow_meter_attach(priv, mtr->mtr_id, + attr, error); + if (!fm) return rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_ACTION, NULL, "meter not found " "or invalid parameters"); + flow->meter = fm->idx; } /* Set the meter action. */ + if (!fm) { + fm = mlx5_ipool_get(priv->sh->ipool + [MLX5_IPOOL_MTR], flow->meter); + if (!fm) + return rte_flow_error_set(error, + rte_errno, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "meter not found " + "or invalid parameters"); + } dev_flow->dv.actions[actions_n++] = - flow->meter->mfts->meter_action; + fm->mfts->meter_action; action_flags |= MLX5_FLOW_ACTION_METER; break; case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP: @@ -7842,6 +8230,22 @@ cnt_err: dev_flow->dv.actions[modify_action_position] = handle->dvh.modify_hdr->verbs_action; } + if (action_flags & MLX5_FLOW_ACTION_COUNT) { + flow->counter = + flow_dv_translate_create_counter(dev, + dev_flow, count, age); + + if (!flow->counter) + return rte_flow_error_set + (error, rte_errno, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "cannot create counter" + " object."); + dev_flow->dv.actions[actions_n++] = + (flow_dv_counter_get_by_idx(dev, + flow->counter, NULL))->action; + } break; default: break; @@ -7864,7 +8268,8 @@ cnt_err: break; case RTE_FLOW_ITEM_TYPE_ETH: flow_dv_translate_item_eth(match_mask, match_value, - items, tunnel); + items, tunnel, + dev_flow->dv.group); matcher.priority = MLX5_PRIORITY_MAP_L2; last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : MLX5_FLOW_LAYER_OUTER_L2; @@ -7872,7 +8277,8 @@ cnt_err: case RTE_FLOW_ITEM_TYPE_VLAN: flow_dv_translate_item_vlan(dev_flow, match_mask, match_value, - items, tunnel); + items, tunnel, + dev_flow->dv.group); matcher.priority = MLX5_PRIORITY_MAP_L2; last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) : @@ -7942,7 +8348,7 @@ cnt_err: case RTE_FLOW_ITEM_TYPE_GRE: flow_dv_translate_item_gre(match_mask, match_value, items, tunnel); - matcher.priority = flow->rss.level >= 2 ? + matcher.priority = rss_desc->level >= 2 ? MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4; last_item = MLX5_FLOW_LAYER_GRE; break; @@ -7954,14 +8360,14 @@ cnt_err: case RTE_FLOW_ITEM_TYPE_NVGRE: flow_dv_translate_item_nvgre(match_mask, match_value, items, tunnel); - matcher.priority = flow->rss.level >= 2 ? + matcher.priority = rss_desc->level >= 2 ? MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4; last_item = MLX5_FLOW_LAYER_GRE; break; case RTE_FLOW_ITEM_TYPE_VXLAN: flow_dv_translate_item_vxlan(match_mask, match_value, items, tunnel); - matcher.priority = flow->rss.level >= 2 ? + matcher.priority = rss_desc->level >= 2 ? MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4; last_item = MLX5_FLOW_LAYER_VXLAN; break; @@ -7969,21 +8375,21 @@ cnt_err: flow_dv_translate_item_vxlan_gpe(match_mask, match_value, items, tunnel); - matcher.priority = flow->rss.level >= 2 ? + matcher.priority = rss_desc->level >= 2 ? MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4; last_item = MLX5_FLOW_LAYER_VXLAN_GPE; break; case RTE_FLOW_ITEM_TYPE_GENEVE: flow_dv_translate_item_geneve(match_mask, match_value, items, tunnel); - matcher.priority = flow->rss.level >= 2 ? + matcher.priority = rss_desc->level >= 2 ? MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4; last_item = MLX5_FLOW_LAYER_GENEVE; break; case RTE_FLOW_ITEM_TYPE_MPLS: flow_dv_translate_item_mpls(match_mask, match_value, items, last_item, tunnel); - matcher.priority = flow->rss.level >= 2 ? + matcher.priority = rss_desc->level >= 2 ? MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4; last_item = MLX5_FLOW_LAYER_MPLS; break; @@ -8026,7 +8432,7 @@ cnt_err: case RTE_FLOW_ITEM_TYPE_GTP: flow_dv_translate_item_gtp(match_mask, match_value, items, tunnel); - matcher.priority = flow->rss.level >= 2 ? + matcher.priority = rss_desc->level >= 2 ? MLX5_PRIORITY_MAP_L2 : MLX5_PRIORITY_MAP_L4; last_item = MLX5_FLOW_LAYER_GTP; break; @@ -8058,7 +8464,7 @@ cnt_err: */ handle->layers |= item_flags; if (action_flags & MLX5_FLOW_ACTION_RSS) - flow_dv_hashfields_set(dev_flow); + flow_dv_hashfields_set(dev_flow, rss_desc); /* Register matcher. */ matcher.crc = rte_raw_cksum((const void *)matcher.mask.buf, matcher.mask.size); @@ -8127,26 +8533,29 @@ __flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, * the special index to hrxq to mark the queue * has been allocated. */ - dh->hrxq = UINT32_MAX; + dh->rix_hrxq = UINT32_MAX; dv->actions[n++] = drop_hrxq->action; } } else if (dh->fate_action == MLX5_FLOW_FATE_QUEUE) { struct mlx5_hrxq *hrxq; uint32_t hrxq_idx; + struct mlx5_flow_rss_desc *rss_desc = + &((struct mlx5_flow_rss_desc *)priv->rss_desc) + [!!priv->flow_nested_idx]; - MLX5_ASSERT(flow->rss.queue); - hrxq_idx = mlx5_hrxq_get(dev, flow->rss.key, + MLX5_ASSERT(rss_desc->queue_num); + hrxq_idx = mlx5_hrxq_get(dev, rss_desc->key, MLX5_RSS_HASH_KEY_LEN, dev_flow->hash_fields, - (*flow->rss.queue), - flow->rss.queue_num); + rss_desc->queue, + rss_desc->queue_num); if (!hrxq_idx) { hrxq_idx = mlx5_hrxq_new - (dev, flow->rss.key, + (dev, rss_desc->key, MLX5_RSS_HASH_KEY_LEN, dev_flow->hash_fields, - (*flow->rss.queue), - flow->rss.queue_num, + rss_desc->queue, + rss_desc->queue_num, !!(dh->layers & MLX5_FLOW_LAYER_TUNNEL)); } @@ -8159,7 +8568,7 @@ __flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, "cannot get hash queue"); goto error; } - dh->hrxq = hrxq_idx; + dh->rix_hrxq = hrxq_idx; dv->actions[n++] = hrxq->action; } dh->ib_flow = @@ -8190,13 +8599,13 @@ error: SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles, handle_idx, dh, next) { /* hrxq is union, don't clear it if the flag is not set. */ - if (dh->hrxq) { + if (dh->rix_hrxq) { if (dh->fate_action == MLX5_FLOW_FATE_DROP) { mlx5_hrxq_drop_release(dev); - dh->hrxq = 0; + dh->rix_hrxq = 0; } else if (dh->fate_action == MLX5_FLOW_FATE_QUEUE) { - mlx5_hrxq_release(dev, dh->hrxq); - dh->hrxq = 0; + mlx5_hrxq_release(dev, dh->rix_hrxq); + dh->rix_hrxq = 0; } } if (dh->vf_vlan.tag && dh->vf_vlan.created) @@ -8257,7 +8666,7 @@ flow_dv_encap_decap_resource_release(struct rte_eth_dev *dev, struct mlx5_flow_handle *handle) { struct mlx5_priv *priv = dev->data->dev_private; - uint32_t idx = handle->dvh.encap_decap; + uint32_t idx = handle->dvh.rix_encap_decap; struct mlx5_flow_dv_encap_decap_resource *cache_resource; cache_resource = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_DECAP_ENCAP], @@ -8302,7 +8711,7 @@ flow_dv_jump_tbl_resource_release(struct rte_eth_dev *dev, struct mlx5_flow_tbl_data_entry *tbl_data; tbl_data = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_JUMP], - handle->jump); + handle->rix_jump); if (!tbl_data) return 0; cache_resource = &tbl_data->jump; @@ -8370,7 +8779,7 @@ flow_dv_port_id_action_resource_release(struct rte_eth_dev *dev, { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_flow_dv_port_id_action_resource *cache_resource; - uint32_t idx = handle->port_id_action; + uint32_t idx = handle->rix_port_id_action; cache_resource = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_PORT_ID], idx); @@ -8410,7 +8819,7 @@ flow_dv_push_vlan_action_resource_release(struct rte_eth_dev *dev, struct mlx5_flow_handle *handle) { struct mlx5_priv *priv = dev->data->dev_private; - uint32_t idx = handle->dvh.push_vlan_res; + uint32_t idx = handle->dvh.rix_push_vlan; struct mlx5_flow_dv_push_vlan_action_resource *cache_resource; cache_resource = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_PUSH_VLAN], @@ -8447,19 +8856,19 @@ static void flow_dv_fate_resource_release(struct rte_eth_dev *dev, struct mlx5_flow_handle *handle) { - if (!handle->fate_idx) + if (!handle->rix_fate) return; if (handle->fate_action == MLX5_FLOW_FATE_DROP) mlx5_hrxq_drop_release(dev); else if (handle->fate_action == MLX5_FLOW_FATE_QUEUE) - mlx5_hrxq_release(dev, handle->hrxq); + mlx5_hrxq_release(dev, handle->rix_hrxq); else if (handle->fate_action == MLX5_FLOW_FATE_JUMP) flow_dv_jump_tbl_resource_release(dev, handle); else if (handle->fate_action == MLX5_FLOW_FATE_PORT_ID) flow_dv_port_id_action_resource_release(dev, handle); else DRV_LOG(DEBUG, "Incorrect fate action:%d", handle->fate_action); - handle->fate_idx = 0; + handle->rix_fate = 0; } /** @@ -8522,8 +8931,13 @@ __flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) flow->counter = 0; } if (flow->meter) { - mlx5_flow_meter_detach(flow->meter); - flow->meter = NULL; + struct mlx5_flow_meter *fm; + + fm = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MTR], + flow->meter); + if (fm) + mlx5_flow_meter_detach(fm); + flow->meter = 0; } while (flow->dev_handles) { uint32_t tmp_idx = flow->dev_handles; @@ -8535,16 +8949,16 @@ __flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) flow->dev_handles = dev_handle->next.next; if (dev_handle->dvh.matcher) flow_dv_matcher_release(dev, dev_handle); - if (dev_handle->dvh.encap_decap) + if (dev_handle->dvh.rix_encap_decap) flow_dv_encap_decap_resource_release(dev, dev_handle); if (dev_handle->dvh.modify_hdr) flow_dv_modify_hdr_resource_release(dev_handle); - if (dev_handle->dvh.push_vlan_res) + if (dev_handle->dvh.rix_push_vlan) flow_dv_push_vlan_action_resource_release(dev, dev_handle); - if (dev_handle->dvh.tag_resource) + if (dev_handle->dvh.rix_tag) flow_dv_tag_release(dev, - dev_handle->dvh.tag_resource); + dev_handle->dvh.rix_tag); flow_dv_fate_resource_release(dev, dev_handle); mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], tmp_idx); @@ -8677,11 +9091,9 @@ flow_dv_destroy_mtr_tbl(struct rte_eth_dev *dev, claim_zero(mlx5_glue->dv_destroy_flow_matcher (mtd->egress.any_matcher)); if (mtd->egress.tbl) - claim_zero(flow_dv_tbl_resource_release(dev, - mtd->egress.tbl)); + flow_dv_tbl_resource_release(dev, mtd->egress.tbl); if (mtd->egress.sfx_tbl) - claim_zero(flow_dv_tbl_resource_release(dev, - mtd->egress.sfx_tbl)); + flow_dv_tbl_resource_release(dev, mtd->egress.sfx_tbl); if (mtd->ingress.color_matcher) claim_zero(mlx5_glue->dv_destroy_flow_matcher (mtd->ingress.color_matcher)); @@ -8689,11 +9101,9 @@ flow_dv_destroy_mtr_tbl(struct rte_eth_dev *dev, claim_zero(mlx5_glue->dv_destroy_flow_matcher (mtd->ingress.any_matcher)); if (mtd->ingress.tbl) - claim_zero(flow_dv_tbl_resource_release(dev, - mtd->ingress.tbl)); + flow_dv_tbl_resource_release(dev, mtd->ingress.tbl); if (mtd->ingress.sfx_tbl) - claim_zero(flow_dv_tbl_resource_release(dev, - mtd->ingress.sfx_tbl)); + flow_dv_tbl_resource_release(dev, mtd->ingress.sfx_tbl); if (mtd->transfer.color_matcher) claim_zero(mlx5_glue->dv_destroy_flow_matcher (mtd->transfer.color_matcher)); @@ -8701,11 +9111,9 @@ flow_dv_destroy_mtr_tbl(struct rte_eth_dev *dev, claim_zero(mlx5_glue->dv_destroy_flow_matcher (mtd->transfer.any_matcher)); if (mtd->transfer.tbl) - claim_zero(flow_dv_tbl_resource_release(dev, - mtd->transfer.tbl)); + flow_dv_tbl_resource_release(dev, mtd->transfer.tbl); if (mtd->transfer.sfx_tbl) - claim_zero(flow_dv_tbl_resource_release(dev, - mtd->transfer.sfx_tbl)); + flow_dv_tbl_resource_release(dev, mtd->transfer.sfx_tbl); if (mtd->drop_actn) claim_zero(mlx5_glue->destroy_flow_action(mtd->drop_actn)); rte_free(mtd); @@ -8739,7 +9147,7 @@ flow_dv_prepare_mtr_tables(struct rte_eth_dev *dev, uint32_t color_reg_c_idx) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_ibv_shared *sh = priv->sh; + struct mlx5_dev_ctx_shared *sh = priv->sh; struct mlx5_flow_dv_match_params mask = { .size = sizeof(mask.buf), }; @@ -8988,7 +9396,7 @@ flow_dv_create_policer_forward_rule(struct mlx5_flow_meter *fm, rte_col_2_mlx5_col(i), UINT8_MAX); if (mtb->count_actns[i]) actions[j++] = mtb->count_actns[i]; - if (fm->params.action[i] == MTR_POLICER_ACTION_DROP) + if (fm->action[i] == MTR_POLICER_ACTION_DROP) actions[j++] = mtb->drop_actn; else actions[j++] = dtb->jump_actn; @@ -9101,6 +9509,60 @@ flow_dv_counter_query(struct rte_eth_dev *dev, uint32_t counter, bool clear, return 0; } +/** + * Get aged-out flows. + * + * @param[in] dev + * Pointer to the Ethernet device structure. + * @param[in] context + * The address of an array of pointers to the aged-out flows contexts. + * @param[in] nb_contexts + * The length of context array pointers. + * @param[out] error + * Perform verbose error reporting if not NULL. Initialized in case of + * error only. + * + * @return + * how many contexts get in success, otherwise negative errno value. + * if nb_contexts is 0, return the amount of all aged contexts. + * if nb_contexts is not 0 , return the amount of aged flows reported + * in the context array. + * @note: only stub for now + */ +static int +flow_get_aged_flows(struct rte_eth_dev *dev, + void **context, + uint32_t nb_contexts, + struct rte_flow_error *error) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_age_info *age_info; + struct mlx5_age_param *age_param; + struct mlx5_flow_counter *counter; + int nb_flows = 0; + + if (nb_contexts && !context) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, + "Should assign at least one flow or" + " context to get if nb_contexts != 0"); + age_info = GET_PORT_AGE_INFO(priv); + rte_spinlock_lock(&age_info->aged_sl); + TAILQ_FOREACH(counter, &age_info->aged_counters, next) { + nb_flows++; + if (nb_contexts) { + age_param = MLX5_CNT_TO_AGE(counter); + context[nb_flows - 1] = age_param->context; + if (!(--nb_contexts)) + break; + } + } + rte_spinlock_unlock(&age_info->aged_sl); + MLX5_AGE_SET(age_info, MLX5_AGE_TRIGGER); + return nb_flows; +} + /* * Mutex-protected thunk to lock-free __flow_dv_translate(). */ @@ -9167,7 +9629,7 @@ flow_dv_counter_allocate(struct rte_eth_dev *dev) uint32_t cnt; flow_dv_shared_lock(dev); - cnt = flow_dv_counter_alloc(dev, 0, 0, 1); + cnt = flow_dv_counter_alloc(dev, 0, 0, 1, 0); flow_dv_shared_unlock(dev); return cnt; } @@ -9198,6 +9660,7 @@ const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = { .counter_alloc = flow_dv_counter_allocate, .counter_free = flow_dv_counter_free, .counter_query = flow_dv_counter_query, + .get_aged_flows = flow_get_aged_flows, }; #endif /* HAVE_IBV_FLOW_DV_SUPPORT */