X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_flow_dv.c;h=41500bc10944879aac2e24cec8e2d2881f9da4a3;hb=725a4284f7fc9272d929e1df63185a68597012c3;hp=da16e48c551196ee310d534ff46e5ae75866912a;hpb=5382d28c2110ceac10051e32a0be97bb59b82555;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index da16e48c55..41500bc109 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -143,36 +143,20 @@ struct field_modify_info modify_tcp[] = { }; static void -mlx5_flow_tunnel_ip_check(const struct rte_flow_item *item, uint64_t *flags) +mlx5_flow_tunnel_ip_check(const struct rte_flow_item *item __rte_unused, + uint8_t next_protocol, uint64_t *item_flags, + int *tunnel) { - uint8_t next_protocol = 0xFF; - - if (item->mask != NULL) { - switch (item->type) { - case RTE_FLOW_ITEM_TYPE_IPV4: - next_protocol = - ((const struct rte_flow_item_ipv4 *) - (item->spec))->hdr.next_proto_id; - next_protocol &= - ((const struct rte_flow_item_ipv4 *) - (item->mask))->hdr.next_proto_id; - break; - case RTE_FLOW_ITEM_TYPE_IPV6: - next_protocol = - ((const struct rte_flow_item_ipv6 *) - (item->spec))->hdr.proto; - next_protocol &= - ((const struct rte_flow_item_ipv6 *) - (item->mask))->hdr.proto; - break; - default: - break; - } + assert(item->type == RTE_FLOW_ITEM_TYPE_IPV4 || + item->type == RTE_FLOW_ITEM_TYPE_IPV6); + if (next_protocol == IPPROTO_IPIP) { + *item_flags |= MLX5_FLOW_LAYER_IPIP; + *tunnel = 1; + } + if (next_protocol == IPPROTO_IPV6) { + *item_flags |= MLX5_FLOW_LAYER_IPV6_ENCAP; + *tunnel = 1; } - if (next_protocol == IPPROTO_IPIP) - *flags |= MLX5_FLOW_LAYER_IPIP; - if (next_protocol == IPPROTO_IPV6) - *flags |= MLX5_FLOW_LAYER_IPV6_ENCAP; } /** @@ -965,6 +949,8 @@ flow_dv_validate_action_raw_encap(uint64_t action_flags, const struct rte_flow_attr *attr, struct rte_flow_error *error) { + const struct rte_flow_action_raw_encap *raw_encap = + (const struct rte_flow_action_raw_encap *)action->conf; if (!(action->conf)) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, action, @@ -986,6 +972,10 @@ flow_dv_validate_action_raw_encap(uint64_t action_flags, NULL, "encap action not supported for " "ingress"); + if (!raw_encap->size || !raw_encap->data) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, action, + "raw encap data cannot be empty"); return 0; } @@ -1504,9 +1494,9 @@ flow_dv_zero_encap_udp_csum(void *data, struct rte_flow_error *error) /* VLAN skipping */ while (proto == RTE_ETHER_TYPE_VLAN || proto == RTE_ETHER_TYPE_QINQ) { - next_hdr += sizeof(struct rte_vlan_hdr); vlan = (struct rte_vlan_hdr *)next_hdr; proto = RTE_BE16(vlan->eth_proto); + next_hdr += sizeof(struct rte_vlan_hdr); } /* HW calculates IPv4 csum. no need to proceed */ @@ -2147,8 +2137,114 @@ flow_dv_modify_hdr_resource_register return 0; } -#define MLX5_CNT_CONTAINER_SIZE 64 -#define MLX5_CNT_CONTAINER(priv, batch) (&(priv)->sh->cmng.ccont[batch]) +#define MLX5_CNT_CONTAINER_RESIZE 64 + +/** + * Get or create a flow counter. + * + * @param[in] dev + * Pointer to the Ethernet device structure. + * @param[in] shared + * Indicate if this counter is shared with other flows. + * @param[in] id + * Counter identifier. + * + * @return + * pointer to flow counter on success, NULL otherwise and rte_errno is set. + */ +static struct mlx5_flow_counter * +flow_dv_counter_alloc_fallback(struct rte_eth_dev *dev, uint32_t shared, + uint32_t id) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_flow_counter *cnt = NULL; + struct mlx5_devx_obj *dcs = NULL; + + if (!priv->config.devx) { + rte_errno = ENOTSUP; + return NULL; + } + if (shared) { + TAILQ_FOREACH(cnt, &priv->sh->cmng.flow_counters, next) { + if (cnt->shared && cnt->id == id) { + cnt->ref_cnt++; + return cnt; + } + } + } + dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0); + if (!dcs) + return NULL; + cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0); + if (!cnt) { + claim_zero(mlx5_devx_cmd_destroy(cnt->dcs)); + rte_errno = ENOMEM; + return NULL; + } + struct mlx5_flow_counter tmpl = { + .shared = shared, + .ref_cnt = 1, + .id = id, + .dcs = dcs, + }; + tmpl.action = mlx5_glue->dv_create_flow_action_counter(dcs->obj, 0); + if (!tmpl.action) { + claim_zero(mlx5_devx_cmd_destroy(cnt->dcs)); + rte_errno = errno; + rte_free(cnt); + return NULL; + } + *cnt = tmpl; + TAILQ_INSERT_HEAD(&priv->sh->cmng.flow_counters, cnt, next); + return cnt; +} + +/** + * Release a flow counter. + * + * @param[in] dev + * Pointer to the Ethernet device structure. + * @param[in] counter + * Pointer to the counter handler. + */ +static void +flow_dv_counter_release_fallback(struct rte_eth_dev *dev, + struct mlx5_flow_counter *counter) +{ + struct mlx5_priv *priv = dev->data->dev_private; + + if (!counter) + return; + if (--counter->ref_cnt == 0) { + TAILQ_REMOVE(&priv->sh->cmng.flow_counters, counter, next); + claim_zero(mlx5_devx_cmd_destroy(counter->dcs)); + rte_free(counter); + } +} + +/** + * Query a devx flow counter. + * + * @param[in] dev + * Pointer to the Ethernet device structure. + * @param[in] cnt + * Pointer to the flow counter. + * @param[out] pkts + * The statistics value of packets. + * @param[out] bytes + * The statistics value of bytes. + * + * @return + * 0 on success, otherwise a negative errno value and rte_errno is set. + */ +static inline int +_flow_dv_query_count_fallback(struct rte_eth_dev *dev __rte_unused, + struct mlx5_flow_counter *cnt, uint64_t *pkts, + uint64_t *bytes) +{ + return mlx5_devx_cmd_flow_counter_query(cnt->dcs, 0, 0, pkts, bytes, + 0, NULL, NULL, 0); +} /** * Get a pool by a counter. @@ -2213,8 +2309,6 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n) { struct mlx5_ibv_shared *sh = ((struct mlx5_priv *) (dev->data->dev_private))->sh; - struct mlx5dv_pd dv_pd; - struct mlx5dv_obj dv_obj; struct mlx5_devx_mkey_attr mkey_attr; struct mlx5_counter_stats_mem_mng *mem_mng; volatile struct flow_counter_stats *raw_data; @@ -2238,13 +2332,10 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n) rte_free(mem); return NULL; } - dv_obj.pd.in = sh->pd; - dv_obj.pd.out = &dv_pd; - mlx5_glue->dv_init_obj(&dv_obj, MLX5DV_OBJ_PD); mkey_attr.addr = (uintptr_t)mem; mkey_attr.size = size; mkey_attr.umem_id = mem_mng->umem->umem_id; - mkey_attr.pd = dv_pd.pdn; + mkey_attr.pd = sh->pdn; mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr); if (!mem_mng->dm) { mlx5_glue->devx_umem_dereg(mem_mng->umem); @@ -2263,7 +2354,7 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n) } /** - * Prepare a counter container. + * Resize a counter container. * * @param[in] dev * Pointer to the Ethernet device structure. @@ -2271,31 +2362,53 @@ flow_dv_create_counter_stat_mem_mng(struct rte_eth_dev *dev, int raws_n) * Whether the pool is for counter that was allocated by batch command. * * @return - * The container pointer on success, otherwise NULL and rte_errno is set. + * The new container pointer on success, otherwise NULL and rte_errno is set. */ static struct mlx5_pools_container * -flow_dv_container_prepare(struct rte_eth_dev *dev, uint32_t batch) +flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv, batch); + struct mlx5_pools_container *cont = + MLX5_CNT_CONTAINER(priv->sh, batch, 0); + struct mlx5_pools_container *new_cont = + MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0); struct mlx5_counter_stats_mem_mng *mem_mng; - uint32_t size = MLX5_CNT_CONTAINER_SIZE; - uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * size; + uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE; + uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize; + int i; - cont->pools = rte_calloc(__func__, 1, mem_size, 0); - if (!cont->pools) { + if (cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1)) { + /* The last resize still hasn't detected by the host thread. */ + rte_errno = EAGAIN; + return NULL; + } + new_cont->pools = rte_calloc(__func__, 1, mem_size, 0); + if (!new_cont->pools) { rte_errno = ENOMEM; return NULL; } - mem_mng = flow_dv_create_counter_stat_mem_mng(dev, size); + if (cont->n) + memcpy(new_cont->pools, cont->pools, cont->n * + sizeof(struct mlx5_flow_counter_pool *)); + mem_mng = flow_dv_create_counter_stat_mem_mng(dev, + MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES); if (!mem_mng) { - rte_free(cont->pools); + rte_free(new_cont->pools); return NULL; } - cont->n = size; - TAILQ_INIT(&cont->pool_list); - cont->init_mem_mng = mem_mng; - return cont; + for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i) + LIST_INSERT_HEAD(&priv->sh->cmng.free_stat_raws, + mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + + i, next); + new_cont->n = resize; + rte_atomic16_set(&new_cont->n_valid, rte_atomic16_read(&cont->n_valid)); + TAILQ_INIT(&new_cont->pool_list); + TAILQ_CONCAT(&new_cont->pool_list, &cont->pool_list, next); + new_cont->init_mem_mng = mem_mng; + rte_cio_wmb(); + /* Flip the master container. */ + priv->sh->cmng.mhi[batch] ^= (uint8_t)1; + return new_cont; } /** @@ -2314,28 +2427,32 @@ flow_dv_container_prepare(struct rte_eth_dev *dev, uint32_t batch) * 0 on success, otherwise a negative errno value and rte_errno is set. */ static inline int -_flow_dv_query_count(struct rte_eth_dev *dev __rte_unused, +_flow_dv_query_count(struct rte_eth_dev *dev, struct mlx5_flow_counter *cnt, uint64_t *pkts, uint64_t *bytes) { + struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_flow_counter_pool *pool = flow_dv_counter_pool_get(cnt); - uint16_t offset = pool->min_dcs->id % MLX5_COUNTERS_PER_POOL; - int ret = mlx5_devx_cmd_flow_counter_query - (pool->min_dcs, 0, MLX5_COUNTERS_PER_POOL - offset, NULL, - NULL, pool->raw->mem_mng->dm->id, - (void *)(uintptr_t)(pool->raw->data + - offset)); - - if (ret) { - DRV_LOG(ERR, "Failed to trigger synchronous" - " query for dcs ID %d\n", - pool->min_dcs->id); - return ret; + int offset = cnt - &pool->counters_raw[0]; + + if (priv->counter_fallback) + return _flow_dv_query_count_fallback(dev, cnt, pkts, bytes); + + rte_spinlock_lock(&pool->sl); + /* + * The single counters allocation may allocate smaller ID than the + * current allocated in parallel to the host reading. + * In this case the new counter values must be reported as 0. + */ + if (unlikely(!cnt->batch && cnt->dcs->id < pool->raw->min_dcs_id)) { + *pkts = 0; + *bytes = 0; + } else { + *pkts = rte_be_to_cpu_64(pool->raw->data[offset].hits); + *bytes = rte_be_to_cpu_64(pool->raw->data[offset].bytes); } - offset = cnt - &pool->counters_raw[0]; - *pkts = rte_be_to_cpu_64(pool->raw->data[offset].hits); - *bytes = rte_be_to_cpu_64(pool->raw->data[offset].bytes); + rte_spinlock_unlock(&pool->sl); return 0; } @@ -2358,17 +2475,15 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_flow_counter_pool *pool; - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv, batch); + struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch, + 0); + int16_t n_valid = rte_atomic16_read(&cont->n_valid); uint32_t size; - if (!cont->n) { - cont = flow_dv_container_prepare(dev, batch); + if (cont->n == n_valid) { + cont = flow_dv_container_resize(dev, batch); if (!cont) return NULL; - } else if (cont->n == cont->n_valid) { - DRV_LOG(ERR, "No space in container to allocate a new pool\n"); - rte_errno = ENOSPC; - return NULL; } size = sizeof(*pool) + MLX5_COUNTERS_PER_POOL * sizeof(struct mlx5_flow_counter); @@ -2378,11 +2493,21 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs, return NULL; } pool->min_dcs = dcs; - pool->raw = cont->init_mem_mng->raws + cont->n_valid; + pool->raw = cont->init_mem_mng->raws + n_valid % + MLX5_CNT_CONTAINER_RESIZE; + pool->raw_hw = NULL; + rte_spinlock_init(&pool->sl); + /* + * The generation of the new allocated counters in this pool is 0, 2 in + * the pool generation makes all the counters valid for allocation. + */ + rte_atomic64_set(&pool->query_gen, 0x2); TAILQ_INIT(&pool->counters); TAILQ_INSERT_TAIL(&cont->pool_list, pool, next); - cont->pools[cont->n_valid] = pool; - cont->n_valid++; + cont->pools[n_valid] = pool; + /* Pool initialization must be updated before host thread access. */ + rte_cio_wmb(); + rte_atomic16_add(&cont->n_valid, 1); return pool; } @@ -2416,8 +2541,8 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0); if (!dcs) return NULL; - pool = flow_dv_find_pool_by_id(MLX5_CNT_CONTAINER(priv, batch), - dcs->id); + pool = flow_dv_find_pool_by_id + (MLX5_CNT_CONTAINER(priv->sh, batch, 0), dcs->id); if (!pool) { pool = flow_dv_pool_create(dev, dcs, batch); if (!pool) { @@ -2425,7 +2550,8 @@ flow_dv_counter_pool_prepare(struct rte_eth_dev *dev, return NULL; } } else if (dcs->id < pool->min_dcs->id) { - pool->min_dcs->id = dcs->id; + rte_atomic64_set(&pool->a64_dcs, + (int64_t)(uintptr_t)dcs); } cnt = &pool->counters_raw[dcs->id % MLX5_COUNTERS_PER_POOL]; TAILQ_INSERT_HEAD(&pool->counters, cnt, next); @@ -2514,8 +2640,11 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, * shared counters from the single container. */ uint32_t batch = (group && !shared) ? 1 : 0; - struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv, batch); + struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch, + 0); + if (priv->counter_fallback) + return flow_dv_counter_alloc_fallback(dev, shared, id); if (!priv->config.devx) { rte_errno = ENOTSUP; return NULL; @@ -2532,9 +2661,22 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, } } /* Pools which has a free counters are in the start. */ - pool = TAILQ_FIRST(&cont->pool_list); - if (pool) + TAILQ_FOREACH(pool, &cont->pool_list, next) { + /* + * The free counter reset values must be updated between the + * counter release to the counter allocation, so, at least one + * query must be done in this time. ensure it by saving the + * query generation in the release time. + * The free list is sorted according to the generation - so if + * the first one is not updated, all the others are not + * updated too. + */ cnt_free = TAILQ_FIRST(&pool->counters); + if (cnt_free && cnt_free->query_gen + 1 < + rte_atomic64_read(&pool->query_gen)) + break; + cnt_free = NULL; + } if (!cnt_free) { pool = flow_dv_counter_pool_prepare(dev, &cnt_free, batch); if (!pool) @@ -2567,6 +2709,9 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, cnt_free->shared = shared; cnt_free->ref_cnt = 1; cnt_free->id = id; + if (!priv->sh->cmng.query_thread_on) + /* Start the asynchronous batch query by the host thread. */ + mlx5_set_query_alarm(priv->sh); TAILQ_REMOVE(&pool->counters, cnt_free, next); if (TAILQ_EMPTY(&pool->counters)) { /* Move the pool to the end of the container pool list. */ @@ -2585,17 +2730,24 @@ flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id, * Pointer to the counter handler. */ static void -flow_dv_counter_release(struct rte_eth_dev *dev __rte_unused, +flow_dv_counter_release(struct rte_eth_dev *dev, struct mlx5_flow_counter *counter) { + struct mlx5_priv *priv = dev->data->dev_private; + if (!counter) return; + if (priv->counter_fallback) { + flow_dv_counter_release_fallback(dev, counter); + return; + } if (--counter->ref_cnt == 0) { struct mlx5_flow_counter_pool *pool = flow_dv_counter_pool_get(counter); - /* Put the counter in the end - the earliest one. */ + /* Put the counter in the end - the last updated one. */ TAILQ_INSERT_TAIL(&pool->counters, counter, next); + counter->query_gen = rte_atomic64_read(&pool->query_gen); } } @@ -2655,7 +2807,7 @@ flow_dv_validate_attributes(struct rte_eth_dev *dev, (error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, NULL, "group must be smaller than " - RTE_STR(MLX5_MAX_FDB_TABLES)); + RTE_STR(MLX5_MAX_TABLES_FDB)); } if (!(attributes->egress ^ attributes->ingress)) return rte_flow_error_set(error, ENOTSUP, @@ -2718,7 +2870,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, (dev, items, attr, item_flags, error); if (ret < 0) return ret; - last_item |= MLX5_FLOW_ITEM_PORT_ID; + last_item = MLX5_FLOW_ITEM_PORT_ID; break; case RTE_FLOW_ITEM_TYPE_ETH: ret = mlx5_flow_validate_item_eth(items, item_flags, @@ -2730,13 +2882,15 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, break; case RTE_FLOW_ITEM_TYPE_VLAN: ret = mlx5_flow_validate_item_vlan(items, item_flags, - error); + dev, error); if (ret < 0) return ret; last_item = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : MLX5_FLOW_LAYER_OUTER_VLAN; break; case RTE_FLOW_ITEM_TYPE_IPV4: + mlx5_flow_tunnel_ip_check(items, next_protocol, + &item_flags, &tunnel); ret = mlx5_flow_validate_item_ipv4(items, item_flags, NULL, error); if (ret < 0) @@ -2756,9 +2910,10 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, /* Reset for inner layer. */ next_protocol = 0xff; } - mlx5_flow_tunnel_ip_check(items, &last_item); break; case RTE_FLOW_ITEM_TYPE_IPV6: + mlx5_flow_tunnel_ip_check(items, next_protocol, + &item_flags, &tunnel); ret = mlx5_flow_validate_item_ipv6(items, item_flags, NULL, error); if (ret < 0) @@ -2778,7 +2933,6 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, /* Reset for inner layer. */ next_protocol = 0xff; } - mlx5_flow_tunnel_ip_check(items, &last_item); break; case RTE_FLOW_ITEM_TYPE_TCP: ret = mlx5_flow_validate_item_tcp @@ -2801,7 +2955,6 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, MLX5_FLOW_LAYER_OUTER_L4_UDP; break; case RTE_FLOW_ITEM_TYPE_GRE: - case RTE_FLOW_ITEM_TYPE_NVGRE: ret = mlx5_flow_validate_item_gre(items, item_flags, next_protocol, error); if (ret < 0) @@ -2809,12 +2962,20 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, gre_item = items; last_item = MLX5_FLOW_LAYER_GRE; break; + case RTE_FLOW_ITEM_TYPE_NVGRE: + ret = mlx5_flow_validate_item_nvgre(items, item_flags, + next_protocol, + error); + if (ret < 0) + return ret; + last_item = MLX5_FLOW_LAYER_NVGRE; + break; case RTE_FLOW_ITEM_TYPE_GRE_KEY: ret = mlx5_flow_validate_item_gre_key (items, item_flags, gre_item, error); if (ret < 0) return ret; - item_flags |= MLX5_FLOW_LAYER_GRE_KEY; + last_item = MLX5_FLOW_LAYER_GRE_KEY; break; case RTE_FLOW_ITEM_TYPE_VXLAN: ret = mlx5_flow_validate_item_vxlan(items, item_flags, @@ -2852,7 +3013,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, error); if (ret < 0) return ret; - item_flags |= MLX5_FLOW_LAYER_ICMP; + last_item = MLX5_FLOW_LAYER_ICMP; break; case RTE_FLOW_ITEM_TYPE_ICMP6: ret = mlx5_flow_validate_item_icmp6(items, item_flags, @@ -2860,7 +3021,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, error); if (ret < 0) return ret; - item_flags |= MLX5_FLOW_LAYER_ICMP6; + last_item = MLX5_FLOW_LAYER_ICMP6; break; default: return rte_flow_error_set(error, ENOTSUP, @@ -3281,6 +3442,8 @@ flow_dv_translate_item_eth(void *matcher, void *key, /** * Add VLAN item to matcher and to the value. * + * @param[in, out] dev_flow + * Flow descriptor. * @param[in, out] matcher * Flow matcher. * @param[in, out] key @@ -3291,16 +3454,13 @@ flow_dv_translate_item_eth(void *matcher, void *key, * Item is inner pattern. */ static void -flow_dv_translate_item_vlan(void *matcher, void *key, +flow_dv_translate_item_vlan(struct mlx5_flow *dev_flow, + void *matcher, void *key, const struct rte_flow_item *item, int inner) { const struct rte_flow_item_vlan *vlan_m = item->mask; const struct rte_flow_item_vlan *vlan_v = item->spec; - const struct rte_flow_item_vlan nic_mask = { - .tci = RTE_BE16(0x0fff), - .inner_type = RTE_BE16(0xffff), - }; void *headers_m; void *headers_v; uint16_t tci_m; @@ -3309,7 +3469,7 @@ flow_dv_translate_item_vlan(void *matcher, void *key, if (!vlan_v) return; if (!vlan_m) - vlan_m = &nic_mask; + vlan_m = &rte_flow_item_vlan_mask; if (inner) { headers_m = MLX5_ADDR_OF(fte_match_param, matcher, inner_headers); @@ -3318,6 +3478,12 @@ flow_dv_translate_item_vlan(void *matcher, void *key, headers_m = MLX5_ADDR_OF(fte_match_param, matcher, outer_headers); headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers); + /* + * This is workaround, masks are not supported, + * and pre-validated. + */ + dev_flow->dv.vf_vlan.tag = + rte_be_to_cpu_16(vlan_v->tci) & 0x0fff; } tci_m = rte_be_to_cpu_16(vlan_m->tci); tci_v = rte_be_to_cpu_16(vlan_m->tci & vlan_v->tci); @@ -3329,6 +3495,10 @@ flow_dv_translate_item_vlan(void *matcher, void *key, MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_cfi, tci_v >> 12); MLX5_SET(fte_match_set_lyr_2_4, headers_m, first_prio, tci_m >> 13); MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, tci_v >> 13); + MLX5_SET(fte_match_set_lyr_2_4, headers_m, ethertype, + rte_be_to_cpu_16(vlan_m->inner_type)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, + rte_be_to_cpu_16(vlan_m->inner_type & vlan_v->inner_type)); } /** @@ -3754,7 +3924,21 @@ flow_dv_translate_item_nvgre(void *matcher, void *key, int size; int i; - flow_dv_translate_item_gre(matcher, key, item, inner); + /* For NVGRE, GRE header fields must be set with defined values. */ + const struct rte_flow_item_gre gre_spec = { + .c_rsvd0_ver = RTE_BE16(0x2000), + .protocol = RTE_BE16(RTE_ETHER_TYPE_TEB) + }; + const struct rte_flow_item_gre gre_mask = { + .c_rsvd0_ver = RTE_BE16(0xB000), + .protocol = RTE_BE16(UINT16_MAX), + }; + const struct rte_flow_item gre_item = { + .spec = &gre_spec, + .mask = &gre_mask, + .last = NULL, + }; + flow_dv_translate_item_gre(matcher, key, &gre_item, inner); if (!nvgre_v) return; if (!nvgre_m) @@ -4132,11 +4316,9 @@ flow_dv_matcher_enable(uint32_t *match_criteria) match_criteria_enable |= (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) << MLX5_MATCH_CRITERIA_ENABLE_MISC2_BIT; -#ifdef HAVE_MLX5DV_DR match_criteria_enable |= (!HEADER_IS_ZERO(match_criteria, misc_parameters_3)) << MLX5_MATCH_CRITERIA_ENABLE_MISC3_BIT; -#endif return match_criteria_enable; } @@ -4500,6 +4682,7 @@ flow_dv_translate(struct rte_eth_dev *dev, uint32_t modify_action_position = UINT32_MAX; void *match_mask = matcher.mask.buf; void *match_value = dev_flow->dv.value.buf; + uint8_t next_protocol = 0xff; flow->group = attr->group; if (attr->transfer) @@ -4812,7 +4995,8 @@ cnt_err: MLX5_FLOW_LAYER_OUTER_L2; break; case RTE_FLOW_ITEM_TYPE_VLAN: - flow_dv_translate_item_vlan(match_mask, match_value, + flow_dv_translate_item_vlan(dev_flow, + match_mask, match_value, items, tunnel); matcher.priority = MLX5_PRIORITY_MAP_L2; last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 | @@ -4821,6 +5005,8 @@ cnt_err: MLX5_FLOW_LAYER_OUTER_VLAN); break; case RTE_FLOW_ITEM_TYPE_IPV4: + mlx5_flow_tunnel_ip_check(items, next_protocol, + &item_flags, &tunnel); flow_dv_translate_item_ipv4(match_mask, match_value, items, tunnel, attr->group); matcher.priority = MLX5_PRIORITY_MAP_L3; @@ -4831,9 +5017,23 @@ cnt_err: MLX5_IPV4_IBV_RX_HASH); last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : MLX5_FLOW_LAYER_OUTER_L3_IPV4; - mlx5_flow_tunnel_ip_check(items, &last_item); + if (items->mask != NULL && + ((const struct rte_flow_item_ipv4 *) + items->mask)->hdr.next_proto_id) { + next_protocol = + ((const struct rte_flow_item_ipv4 *) + (items->spec))->hdr.next_proto_id; + next_protocol &= + ((const struct rte_flow_item_ipv4 *) + (items->mask))->hdr.next_proto_id; + } else { + /* Reset for inner layer. */ + next_protocol = 0xff; + } break; case RTE_FLOW_ITEM_TYPE_IPV6: + mlx5_flow_tunnel_ip_check(items, next_protocol, + &item_flags, &tunnel); flow_dv_translate_item_ipv6(match_mask, match_value, items, tunnel, attr->group); matcher.priority = MLX5_PRIORITY_MAP_L3; @@ -4844,7 +5044,19 @@ cnt_err: MLX5_IPV6_IBV_RX_HASH); last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : MLX5_FLOW_LAYER_OUTER_L3_IPV6; - mlx5_flow_tunnel_ip_check(items, &last_item); + if (items->mask != NULL && + ((const struct rte_flow_item_ipv6 *) + items->mask)->hdr.proto) { + next_protocol = + ((const struct rte_flow_item_ipv6 *) + items->spec)->hdr.proto; + next_protocol &= + ((const struct rte_flow_item_ipv6 *) + items->mask)->hdr.proto; + } else { + /* Reset for inner layer. */ + next_protocol = 0xff; + } break; case RTE_FLOW_ITEM_TYPE_TCP: flow_dv_translate_item_tcp(match_mask, match_value, @@ -4878,7 +5090,7 @@ cnt_err: case RTE_FLOW_ITEM_TYPE_GRE_KEY: flow_dv_translate_item_gre_key(match_mask, match_value, items); - item_flags |= MLX5_FLOW_LAYER_GRE_KEY; + last_item = MLX5_FLOW_LAYER_GRE_KEY; break; case RTE_FLOW_ITEM_TYPE_NVGRE: flow_dv_translate_item_nvgre(match_mask, match_value, @@ -4908,12 +5120,12 @@ cnt_err: case RTE_FLOW_ITEM_TYPE_ICMP: flow_dv_translate_item_icmp(match_mask, match_value, items, tunnel); - item_flags |= MLX5_FLOW_LAYER_ICMP; + last_item = MLX5_FLOW_LAYER_ICMP; break; case RTE_FLOW_ITEM_TYPE_ICMP6: flow_dv_translate_item_icmp6(match_mask, match_value, items, tunnel); - item_flags |= MLX5_FLOW_LAYER_ICMP6; + last_item = MLX5_FLOW_LAYER_ICMP6; break; default: break; @@ -4999,13 +5211,14 @@ flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, dv->hash_fields, (*flow->queue), flow->rss.queue_num); - if (!hrxq) + if (!hrxq) { hrxq = mlx5_hrxq_new (dev, flow->key, MLX5_RSS_HASH_KEY_LEN, dv->hash_fields, (*flow->queue), flow->rss.queue_num, !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL)); + } if (!hrxq) { rte_flow_error_set (error, rte_errno, @@ -5027,6 +5240,17 @@ flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, "hardware refuses to create flow"); goto error; } + if (priv->vmwa_context && + dev_flow->dv.vf_vlan.tag && + !dev_flow->dv.vf_vlan.created) { + /* + * The rule contains the VLAN pattern. + * For VF we are going to create VLAN + * interface to make hypervisor set correct + * e-Switch vport context. + */ + mlx5_vlan_vmwa_acquire(dev, &dev_flow->dv.vf_vlan); + } } return 0; error: @@ -5040,6 +5264,9 @@ error: mlx5_hrxq_release(dev, dv->hrxq); dv->hrxq = NULL; } + if (dev_flow->dv.vf_vlan.tag && + dev_flow->dv.vf_vlan.created) + mlx5_vlan_vmwa_release(dev, &dev_flow->dv.vf_vlan); } rte_errno = err; /* Restore rte_errno. */ return -rte_errno; @@ -5240,6 +5467,9 @@ flow_dv_remove(struct rte_eth_dev *dev, struct rte_flow *flow) mlx5_hrxq_release(dev, dv->hrxq); dv->hrxq = NULL; } + if (dev_flow->dv.vf_vlan.tag && + dev_flow->dv.vf_vlan.created) + mlx5_vlan_vmwa_release(dev, &dev_flow->dv.vf_vlan); } }