From: Gregory Etelson Date: Sun, 25 Oct 2020 14:08:09 +0000 (+0200) Subject: net/mlx5: implement tunnel offload X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=4ec6360de37dd92bf7411ec0f863cc13e5c0b0cc;p=dpdk.git net/mlx5: implement tunnel offload Tunnel Offload API provides hardware independent, unified model to offload tunneled traffic. Key model elements are: - apply matches to both outer and inner packet headers during entire offload procedure; - restore outer header of partially offloaded packet; - model is implemented as a set of helper functions. Implementation details: * tunnel_offload PMD parameter must be set to 1 to enable the feature. * application cannot use MARK and META flow actions with tunnel. * offload JUMP action is restricted to steering tunnel rule only. Signed-off-by: Gregory Etelson Acked-by: Viacheslav Ovsiienko --- diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index 69bb4fca0f..7f0bfedefd 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -787,6 +787,9 @@ Driver options 24 bits. The actual supported width can be retrieved in runtime by series of rte_flow_validate() trials. + - 3, this engages tunnel offload mode. In E-Switch configuration, that + mode implicitly activates ``dv_xmeta_en=1``. + +------+-----------+-----------+-------------+-------------+ | Mode | ``MARK`` | ``META`` | ``META`` Tx | FDB/Through | +======+===========+===========+=============+=============+ diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 40f9446d43..ed3f020d82 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -291,6 +291,12 @@ mlx5_alloc_shared_dr(struct mlx5_priv *priv) sh->esw_drop_action = mlx5_glue->dr_create_flow_action_drop(); } #endif + if (!sh->tunnel_hub) + err = mlx5_alloc_tunnel_hub(sh); + if (err) { + DRV_LOG(ERR, "mlx5_alloc_tunnel_hub failed err=%d", err); + goto error; + } if (priv->config.reclaim_mode == MLX5_RCM_AGGR) { mlx5_glue->dr_reclaim_domain_memory(sh->rx_domain, 1); mlx5_glue->dr_reclaim_domain_memory(sh->tx_domain, 1); @@ -335,6 +341,10 @@ error: mlx5_hlist_destroy(sh->tag_table, NULL, NULL); sh->tag_table = NULL; } + if (sh->tunnel_hub) { + mlx5_release_tunnel_hub(sh, priv->dev_port); + sh->tunnel_hub = NULL; + } mlx5_free_table_hash_list(priv); return err; } @@ -391,6 +401,10 @@ mlx5_os_free_shared_dr(struct mlx5_priv *priv) mlx5_hlist_destroy(sh->tag_table, NULL, NULL); sh->tag_table = NULL; } + if (sh->tunnel_hub) { + mlx5_release_tunnel_hub(sh, priv->dev_port); + sh->tunnel_hub = NULL; + } mlx5_free_table_hash_list(priv); } @@ -733,6 +747,10 @@ err_secondary: strerror(rte_errno)); goto error; } + if (config->dv_miss_info) { + if (switch_info->master || switch_info->representor) + config->dv_xmeta_en = MLX5_XMETA_MODE_META16; + } mlx5_malloc_mem_select(config->sys_mem_en); sh = mlx5_alloc_shared_dev_ctx(spawn, config); if (!sh) diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 2484251b2f..6c422e8b4a 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -1619,13 +1619,17 @@ mlx5_args_check(const char *key, const char *val, void *opaque) } else if (strcmp(MLX5_DV_XMETA_EN, key) == 0) { if (tmp != MLX5_XMETA_MODE_LEGACY && tmp != MLX5_XMETA_MODE_META16 && - tmp != MLX5_XMETA_MODE_META32) { + tmp != MLX5_XMETA_MODE_META32 && + tmp != MLX5_XMETA_MODE_MISS_INFO) { DRV_LOG(ERR, "invalid extensive " "metadata parameter"); rte_errno = EINVAL; return -rte_errno; } - config->dv_xmeta_en = tmp; + if (tmp != MLX5_XMETA_MODE_MISS_INFO) + config->dv_xmeta_en = tmp; + else + config->dv_miss_info = 1; } else if (strcmp(MLX5_LACP_BY_USER, key) == 0) { config->lacp_by_user = !!tmp; } else if (strcmp(MLX5_MR_EXT_MEMSEG_EN, key) == 0) { diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 61c194c49b..72dac7119f 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -208,6 +208,7 @@ struct mlx5_dev_config { unsigned int rt_timestamp:1; /* realtime timestamp format. */ unsigned int sys_mem_en:1; /* The default memory allocator. */ unsigned int decap_en:1; /* Whether decap will be used or not. */ + unsigned int dv_miss_info:1; /* restore packet after partial hw miss */ struct { unsigned int enabled:1; /* Whether MPRQ is enabled. */ unsigned int stride_num_n; /* Number of strides. */ @@ -644,6 +645,7 @@ struct mlx5_dev_ctx_shared { /* UAR same-page access control required in 32bit implementations. */ #endif struct mlx5_hlist *flow_tbls; + struct mlx5_flow_tunnel_hub *tunnel_hub; /* Direct Rules tables for FDB, NIC TX+RX */ void *esw_drop_action; /* Pointer to DR E-Switch drop action. */ void *pop_vlan_action; /* Pointer to DR pop VLAN action. */ diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h index 22e41df1eb..42916ed7a7 100644 --- a/drivers/net/mlx5/mlx5_defs.h +++ b/drivers/net/mlx5/mlx5_defs.h @@ -165,6 +165,8 @@ #define MLX5_XMETA_MODE_LEGACY 0 #define MLX5_XMETA_MODE_META16 1 #define MLX5_XMETA_MODE_META32 2 +/* Provide info on patrial hw miss. Implies MLX5_XMETA_MODE_META16 */ +#define MLX5_XMETA_MODE_MISS_INFO 3 /* MLX5_TX_DB_NC supported values. */ #define MLX5_TXDB_CACHED 0 diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index 6077685430..0c496eeab0 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -32,6 +32,18 @@ #include "mlx5_rxtx.h" #include "mlx5_common_os.h" +static struct mlx5_flow_tunnel * +mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id); +static void +mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel); +static const struct mlx5_flow_tbl_data_entry * +tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark); +static int +mlx5_get_flow_tunnel(struct rte_eth_dev *dev, + const struct rte_flow_tunnel *app_tunnel, + struct mlx5_flow_tunnel **tunnel); + + /** Device flow drivers. */ extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops; @@ -567,6 +579,162 @@ static int mlx5_shared_action_query const struct rte_flow_shared_action *action, void *data, struct rte_flow_error *error); +static inline bool +mlx5_flow_tunnel_validate(struct rte_eth_dev *dev, + struct rte_flow_tunnel *tunnel, + const char *err_msg) +{ + err_msg = NULL; + if (!is_tunnel_offload_active(dev)) { + err_msg = "tunnel offload was not activated"; + goto out; + } else if (!tunnel) { + err_msg = "no application tunnel"; + goto out; + } + + switch (tunnel->type) { + default: + err_msg = "unsupported tunnel type"; + goto out; + case RTE_FLOW_ITEM_TYPE_VXLAN: + break; + } + +out: + return !err_msg; +} + + +static int +mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev, + struct rte_flow_tunnel *app_tunnel, + struct rte_flow_action **actions, + uint32_t *num_of_actions, + struct rte_flow_error *error) +{ + int ret; + struct mlx5_flow_tunnel *tunnel; + const char *err_msg = NULL; + bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg); + + if (!verdict) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, + err_msg); + ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel); + if (ret < 0) { + return rte_flow_error_set(error, ret, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, + "failed to initialize pmd tunnel"); + } + *actions = &tunnel->action; + *num_of_actions = 1; + return 0; +} + +static int +mlx5_flow_tunnel_match(struct rte_eth_dev *dev, + struct rte_flow_tunnel *app_tunnel, + struct rte_flow_item **items, + uint32_t *num_of_items, + struct rte_flow_error *error) +{ + int ret; + struct mlx5_flow_tunnel *tunnel; + const char *err_msg = NULL; + bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg); + + if (!verdict) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + err_msg); + ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel); + if (ret < 0) { + return rte_flow_error_set(error, ret, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "failed to initialize pmd tunnel"); + } + *items = &tunnel->item; + *num_of_items = 1; + return 0; +} + +static int +mlx5_flow_item_release(struct rte_eth_dev *dev, + struct rte_flow_item *pmd_items, + uint32_t num_items, struct rte_flow_error *err) +{ + struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); + struct mlx5_flow_tunnel *tun; + + LIST_FOREACH(tun, &thub->tunnels, chain) { + if (&tun->item == pmd_items) + break; + } + if (!tun || num_items != 1) + return rte_flow_error_set(err, EINVAL, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "invalid argument"); + if (!__atomic_sub_fetch(&tun->refctn, 1, __ATOMIC_RELAXED)) + mlx5_flow_tunnel_free(dev, tun); + return 0; +} + +static int +mlx5_flow_action_release(struct rte_eth_dev *dev, + struct rte_flow_action *pmd_actions, + uint32_t num_actions, struct rte_flow_error *err) +{ + struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); + struct mlx5_flow_tunnel *tun; + + LIST_FOREACH(tun, &thub->tunnels, chain) { + if (&tun->action == pmd_actions) + break; + } + if (!tun || num_actions != 1) + return rte_flow_error_set(err, EINVAL, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "invalid argument"); + if (!__atomic_sub_fetch(&tun->refctn, 1, __ATOMIC_RELAXED)) + mlx5_flow_tunnel_free(dev, tun); + + return 0; +} + +static int +mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev, + struct rte_mbuf *m, + struct rte_flow_restore_info *info, + struct rte_flow_error *err) +{ + uint64_t ol_flags = m->ol_flags; + const struct mlx5_flow_tbl_data_entry *tble; + const uint64_t mask = PKT_RX_FDIR | PKT_RX_FDIR_ID; + + if ((ol_flags & mask) != mask) + goto err; + tble = tunnel_mark_decode(dev, m->hash.fdir.hi); + if (!tble) { + DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x", + dev->data->port_id, m->hash.fdir.hi); + goto err; + } + MLX5_ASSERT(tble->tunnel); + memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel)); + info->group_id = tble->group_id; + info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL | + RTE_FLOW_RESTORE_INFO_GROUP_ID | + RTE_FLOW_RESTORE_INFO_ENCAPSULATED; + + return 0; + +err: + return rte_flow_error_set(err, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "failed to get restore info"); +} static const struct rte_flow_ops mlx5_flow_ops = { .validate = mlx5_flow_validate, @@ -581,6 +749,11 @@ static const struct rte_flow_ops mlx5_flow_ops = { .shared_action_destroy = mlx5_shared_action_destroy, .shared_action_update = mlx5_shared_action_update, .shared_action_query = mlx5_shared_action_query, + .tunnel_decap_set = mlx5_flow_tunnel_decap_set, + .tunnel_match = mlx5_flow_tunnel_match, + .tunnel_action_decap_release = mlx5_flow_action_release, + .tunnel_item_release = mlx5_flow_item_release, + .get_restore_info = mlx5_flow_tunnel_get_restore_info, }; /* Convert FDIR request to Generic flow. */ @@ -4065,6 +4238,142 @@ flow_hairpin_split(struct rte_eth_dev *dev, return 0; } +__extension__ +union tunnel_offload_mark { + uint32_t val; + struct { + uint32_t app_reserve:8; + uint32_t table_id:15; + uint32_t transfer:1; + uint32_t _unused_:8; + }; +}; + +struct tunnel_default_miss_ctx { + uint16_t *queue; + __extension__ + union { + struct rte_flow_action_rss action_rss; + struct rte_flow_action_queue miss_queue; + struct rte_flow_action_jump miss_jump; + uint8_t raw[0]; + }; +}; + +static int +flow_tunnel_add_default_miss(struct rte_eth_dev *dev, + struct rte_flow *flow, + const struct rte_flow_attr *attr, + const struct rte_flow_action *app_actions, + uint32_t flow_idx, + struct tunnel_default_miss_ctx *ctx, + struct rte_flow_error *error) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_flow *dev_flow; + struct rte_flow_attr miss_attr = *attr; + const struct mlx5_flow_tunnel *tunnel = app_actions[0].conf; + const struct rte_flow_item miss_items[2] = { + { + .type = RTE_FLOW_ITEM_TYPE_ETH, + .spec = NULL, + .last = NULL, + .mask = NULL + }, + { + .type = RTE_FLOW_ITEM_TYPE_END, + .spec = NULL, + .last = NULL, + .mask = NULL + } + }; + union tunnel_offload_mark mark_id; + struct rte_flow_action_mark miss_mark; + struct rte_flow_action miss_actions[3] = { + [0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark }, + [2] = { .type = RTE_FLOW_ACTION_TYPE_END, .conf = NULL } + }; + const struct rte_flow_action_jump *jump_data; + uint32_t i, flow_table = 0; /* prevent compilation warning */ + struct flow_grp_info grp_info = { + .external = 1, + .transfer = attr->transfer, + .fdb_def_rule = !!priv->fdb_def_rule, + .std_tbl_fix = 0, + }; + int ret; + + if (!attr->transfer) { + uint32_t q_size; + + miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS; + q_size = priv->reta_idx_n * sizeof(ctx->queue[0]); + ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size, + 0, SOCKET_ID_ANY); + if (!ctx->queue) + return rte_flow_error_set + (error, ENOMEM, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, + NULL, "invalid default miss RSS"); + ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT, + ctx->action_rss.level = 0, + ctx->action_rss.types = priv->rss_conf.rss_hf, + ctx->action_rss.key_len = priv->rss_conf.rss_key_len, + ctx->action_rss.queue_num = priv->reta_idx_n, + ctx->action_rss.key = priv->rss_conf.rss_key, + ctx->action_rss.queue = ctx->queue; + if (!priv->reta_idx_n || !priv->rxqs_n) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, + NULL, "invalid port configuration"); + if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)) + ctx->action_rss.types = 0; + for (i = 0; i != priv->reta_idx_n; ++i) + ctx->queue[i] = (*priv->reta_idx)[i]; + } else { + miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP; + ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP; + } + miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw; + for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++); + jump_data = app_actions->conf; + miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY; + miss_attr.group = jump_data->group; + ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group, + &flow_table, grp_info, error); + if (ret) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, + NULL, "invalid tunnel id"); + mark_id.app_reserve = 0; + mark_id.table_id = tunnel_flow_tbl_to_id(flow_table); + mark_id.transfer = !!attr->transfer; + mark_id._unused_ = 0; + miss_mark.id = mark_id.val; + dev_flow = flow_drv_prepare(dev, flow, &miss_attr, + miss_items, miss_actions, flow_idx, error); + if (!dev_flow) + return -rte_errno; + dev_flow->flow = flow; + dev_flow->external = true; + dev_flow->tunnel = tunnel; + /* Subflow object was created, we must include one in the list. */ + SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, + dev_flow->handle, next); + DRV_LOG(DEBUG, + "port %u tunnel type=%d id=%u miss rule priority=%u group=%u", + dev->data->port_id, tunnel->app_tunnel.type, + tunnel->tunnel_id, miss_attr.priority, miss_attr.group); + ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items, + miss_actions, error); + if (!ret) + ret = flow_mreg_update_copy_table(dev, flow, miss_actions, + error); + + return ret; +} + /** * The last stage of splitting chain, just creates the subflow * without any modification. @@ -5187,6 +5496,27 @@ flow_create_split_outer(struct rte_eth_dev *dev, return ret; } +static struct mlx5_flow_tunnel * +flow_tunnel_from_rule(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[]) +{ + struct mlx5_flow_tunnel *tunnel; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-qual" + if (is_flow_tunnel_match_rule(dev, attr, items, actions)) + tunnel = (struct mlx5_flow_tunnel *)items[0].spec; + else if (is_flow_tunnel_steer_rule(dev, attr, items, actions)) + tunnel = (struct mlx5_flow_tunnel *)actions[0].conf; + else + tunnel = NULL; +#pragma GCC diagnostic pop + + return tunnel; +} + /** * Create a flow and add it to @p list. * @@ -5253,6 +5583,8 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list, struct rte_flow_attr attr_factor = {0}; const struct rte_flow_action *actions; struct rte_flow_action *translated_actions = NULL; + struct mlx5_flow_tunnel *tunnel; + struct tunnel_default_miss_ctx default_miss_ctx = { 0, }; int ret = flow_shared_actions_translate(original_actions, shared_actions, &shared_actions_n, @@ -5264,8 +5596,6 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list, } actions = translated_actions ? translated_actions : original_actions; memcpy((void *)&attr_factor, (const void *)attr, sizeof(*attr)); - if (external) - attr_factor.group *= MLX5_FLOW_TABLE_FACTOR; p_actions_rx = actions; hairpin_flow = flow_check_hairpin_split(dev, &attr_factor, actions); ret = flow_drv_validate(dev, &attr_factor, items, p_actions_rx, @@ -5340,6 +5670,19 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list, error); if (ret < 0) goto error; + if (is_flow_tunnel_steer_rule(dev, attr, + buf->entry[i].pattern, + p_actions_rx)) { + ret = flow_tunnel_add_default_miss(dev, flow, attr, + p_actions_rx, + idx, + &default_miss_ctx, + error); + if (ret < 0) { + mlx5_free(default_miss_ctx.queue); + goto error; + } + } } /* Create the tx flow. */ if (hairpin_flow) { @@ -5395,6 +5738,13 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list, priv->flow_idx = priv->flow_nested_idx; if (priv->flow_nested_idx) priv->flow_nested_idx = 0; + tunnel = flow_tunnel_from_rule(dev, attr, items, actions); + if (tunnel) { + flow->tunnel = 1; + flow->tunnel_id = tunnel->tunnel_id; + __atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED); + mlx5_free(default_miss_ctx.queue); + } return idx; error: MLX5_ASSERT(flow); @@ -5530,6 +5880,7 @@ mlx5_flow_create(struct rte_eth_dev *dev, "port not started"); return NULL; } + return (void *)(uintptr_t)flow_list_create(dev, &priv->flows, attr, items, actions, true, error); } @@ -5584,6 +5935,13 @@ flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, } } mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); + if (flow->tunnel) { + struct mlx5_flow_tunnel *tunnel; + tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id); + RTE_VERIFY(tunnel); + if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED)) + mlx5_flow_tunnel_free(dev, tunnel); + } } /** @@ -7118,19 +7476,122 @@ mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh, sh->cmng.pending_queries--; } +static const struct mlx5_flow_tbl_data_entry * +tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_dev_ctx_shared *sh = priv->sh; + struct mlx5_hlist_entry *he; + union tunnel_offload_mark mbits = { .val = mark }; + union mlx5_flow_tbl_key table_key = { + { + .table_id = tunnel_id_to_flow_tbl(mbits.table_id), + .reserved = 0, + .domain = !!mbits.transfer, + .direction = 0, + } + }; + he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64); + return he ? + container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL; +} + +static uint32_t +tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev, + const struct mlx5_flow_tunnel *tunnel, + uint32_t group, uint32_t *table, + struct rte_flow_error *error) +{ + struct mlx5_hlist_entry *he; + struct tunnel_tbl_entry *tte; + union tunnel_tbl_key key = { + .tunnel_id = tunnel ? tunnel->tunnel_id : 0, + .group = group + }; + struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); + struct mlx5_hlist *group_hash; + + group_hash = tunnel ? tunnel->groups : thub->groups; + he = mlx5_hlist_lookup(group_hash, key.val); + if (!he) { + int ret; + tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, + sizeof(*tte), 0, + SOCKET_ID_ANY); + if (!tte) + goto err; + tte->hash.key = key.val; + ret = mlx5_flow_id_get(thub->table_ids, &tte->flow_table); + if (ret) { + mlx5_free(tte); + goto err; + } + tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table); + mlx5_hlist_insert(group_hash, &tte->hash); + } else { + tte = container_of(he, typeof(*tte), hash); + } + *table = tte->flow_table; + DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x", + dev->data->port_id, key.tunnel_id, group, *table); + return 0; + +err: + return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_GROUP, + NULL, "tunnel group index not supported"); +} + +static int +flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table, + struct flow_grp_info grp_info, struct rte_flow_error *error) +{ + if (grp_info.transfer && grp_info.external && grp_info.fdb_def_rule) { + if (group == UINT32_MAX) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_GROUP, + NULL, + "group index not supported"); + *table = group + 1; + } else { + *table = group; + } + DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table); + return 0; +} + /** * Translate the rte_flow group index to HW table value. * - * @param[in] attributes - * Pointer to flow attributes - * @param[in] external - * Value is part of flow rule created by request external to PMD. + * If tunnel offload is disabled, all group ids converted to flow table + * id using the standard method. + * If tunnel offload is enabled, group id can be converted using the + * standard or tunnel conversion method. Group conversion method + * selection depends on flags in `grp_info` parameter: + * - Internal (grp_info.external == 0) groups conversion uses the + * standard method. + * - Group ids in JUMP action converted with the tunnel conversion. + * - Group id in rule attribute conversion depends on a rule type and + * group id value: + * ** non zero group attributes converted with the tunnel method + * ** zero group attribute in non-tunnel rule is converted using the + * standard method - there's only one root table + * ** zero group attribute in steer tunnel rule is converted with the + * standard method - single root table + * ** zero group attribute in match tunnel rule is a special OvS + * case: that value is used for portability reasons. That group + * id is converted with the tunnel conversion method. + * + * @param[in] dev + * Port device + * @param[in] tunnel + * PMD tunnel offload object * @param[in] group * rte_flow group index value. - * @param[out] fdb_def_rule - * Whether fdb jump to table 1 is configured. * @param[out] table * HW table value. + * @param[in] grp_info + * flags used for conversion * @param[out] error * Pointer to error structure. * @@ -7138,22 +7599,36 @@ mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh, * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external, - uint32_t group, bool fdb_def_rule, uint32_t *table, +mlx5_flow_group_to_table(struct rte_eth_dev *dev, + const struct mlx5_flow_tunnel *tunnel, + uint32_t group, uint32_t *table, + struct flow_grp_info grp_info, struct rte_flow_error *error) { - if (attributes->transfer && external && fdb_def_rule) { - if (group == UINT32_MAX) - return rte_flow_error_set - (error, EINVAL, - RTE_FLOW_ERROR_TYPE_ATTR_GROUP, - NULL, - "group index not supported"); - *table = group + 1; + int ret; + bool standard_translation; + + if (grp_info.external && group < MLX5_MAX_TABLES_EXTERNAL) + group *= MLX5_FLOW_TABLE_FACTOR; + if (is_tunnel_offload_active(dev)) { + standard_translation = !grp_info.external || + grp_info.std_tbl_fix; } else { - *table = group; + standard_translation = true; } - return 0; + DRV_LOG(DEBUG, + "port %u group=%#x transfer=%d external=%d fdb_def_rule=%d translate=%s", + dev->data->port_id, group, grp_info.transfer, + grp_info.external, grp_info.fdb_def_rule, + standard_translation ? "STANDARD" : "TUNNEL"); + if (standard_translation) + ret = flow_group_to_table(dev->data->port_id, group, table, + grp_info, error); + else + ret = tunnel_flow_group_to_flow_table(dev, tunnel, group, + table, error); + + return ret; } /** @@ -7524,3 +7999,168 @@ mlx5_shared_action_flush(struct rte_eth_dev *dev) } return ret; } + +static void +mlx5_flow_tunnel_free(struct rte_eth_dev *dev, + struct mlx5_flow_tunnel *tunnel) +{ + struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); + struct mlx5_flow_id_pool *id_pool = thub->tunnel_ids; + + DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x", + dev->data->port_id, tunnel->tunnel_id); + RTE_VERIFY(!__atomic_load_n(&tunnel->refctn, __ATOMIC_RELAXED)); + LIST_REMOVE(tunnel, chain); + mlx5_flow_id_release(id_pool, tunnel->tunnel_id); + mlx5_hlist_destroy(tunnel->groups, NULL, NULL); + mlx5_free(tunnel); +} + +static struct mlx5_flow_tunnel * +mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id) +{ + struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); + struct mlx5_flow_tunnel *tun; + + LIST_FOREACH(tun, &thub->tunnels, chain) { + if (tun->tunnel_id == id) + break; + } + + return tun; +} + +static struct mlx5_flow_tunnel * +mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev, + const struct rte_flow_tunnel *app_tunnel) +{ + int ret; + struct mlx5_flow_tunnel *tunnel; + struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); + struct mlx5_flow_id_pool *id_pool = thub->tunnel_ids; + uint32_t id; + + ret = mlx5_flow_id_get(id_pool, &id); + if (ret) + return NULL; + /** + * mlx5 flow tunnel is an auxlilary data structure + * It's not part of IO. No need to allocate it from + * huge pages pools dedicated for IO + */ + tunnel = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*tunnel), + 0, SOCKET_ID_ANY); + if (!tunnel) { + mlx5_flow_id_pool_release(id_pool); + return NULL; + } + tunnel->groups = mlx5_hlist_create("tunnel groups", 1024); + if (!tunnel->groups) { + mlx5_flow_id_pool_release(id_pool); + mlx5_free(tunnel); + return NULL; + } + /* initiate new PMD tunnel */ + memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel)); + tunnel->tunnel_id = id; + tunnel->action.type = (typeof(tunnel->action.type)) + MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET; + tunnel->action.conf = tunnel; + tunnel->item.type = (typeof(tunnel->item.type)) + MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL; + tunnel->item.spec = tunnel; + tunnel->item.last = NULL; + tunnel->item.mask = NULL; + + DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x", + dev->data->port_id, tunnel->tunnel_id); + + return tunnel; +} + +static int +mlx5_get_flow_tunnel(struct rte_eth_dev *dev, + const struct rte_flow_tunnel *app_tunnel, + struct mlx5_flow_tunnel **tunnel) +{ + int ret; + struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); + struct mlx5_flow_tunnel *tun; + + LIST_FOREACH(tun, &thub->tunnels, chain) { + if (!memcmp(app_tunnel, &tun->app_tunnel, + sizeof(*app_tunnel))) { + *tunnel = tun; + ret = 0; + break; + } + } + if (!tun) { + tun = mlx5_flow_tunnel_allocate(dev, app_tunnel); + if (tun) { + LIST_INSERT_HEAD(&thub->tunnels, tun, chain); + *tunnel = tun; + } else { + ret = -ENOMEM; + } + } + if (tun) + __atomic_add_fetch(&tun->refctn, 1, __ATOMIC_RELAXED); + + return ret; +} + +void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id) +{ + struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub; + + if (!thub) + return; + if (!LIST_EMPTY(&thub->tunnels)) + DRV_LOG(WARNING, "port %u tunnels present\n", port_id); + mlx5_flow_id_pool_release(thub->tunnel_ids); + mlx5_flow_id_pool_release(thub->table_ids); + mlx5_hlist_destroy(thub->groups, NULL, NULL); + mlx5_free(thub); +} + +int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh) +{ + int err; + struct mlx5_flow_tunnel_hub *thub; + + thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub), + 0, SOCKET_ID_ANY); + if (!thub) + return -ENOMEM; + LIST_INIT(&thub->tunnels); + thub->tunnel_ids = mlx5_flow_id_pool_alloc(MLX5_MAX_TUNNELS); + if (!thub->tunnel_ids) { + err = -rte_errno; + goto err; + } + thub->table_ids = mlx5_flow_id_pool_alloc(MLX5_MAX_TABLES); + if (!thub->table_ids) { + err = -rte_errno; + goto err; + } + thub->groups = mlx5_hlist_create("flow groups", MLX5_MAX_TABLES); + if (!thub->groups) { + err = -rte_errno; + goto err; + } + sh->tunnel_hub = thub; + + return 0; + +err: + if (thub->groups) + mlx5_hlist_destroy(thub->groups, NULL, NULL); + if (thub->table_ids) + mlx5_flow_id_pool_release(thub->table_ids); + if (thub->tunnel_ids) + mlx5_flow_id_pool_release(thub->tunnel_ids); + if (thub) + mlx5_free(thub); + return err; +} diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index 7faab43fe6..507b4f9992 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -26,6 +26,7 @@ enum mlx5_rte_flow_item_type { MLX5_RTE_FLOW_ITEM_TYPE_TAG, MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE, MLX5_RTE_FLOW_ITEM_TYPE_VLAN, + MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL, }; /* Private (internal) rte flow actions. */ @@ -36,6 +37,7 @@ enum mlx5_rte_flow_action_type { MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG, MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS, MLX5_RTE_FLOW_ACTION_TYPE_SHARED_RSS, + MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET, }; /* Matches on selected register. */ @@ -74,7 +76,6 @@ enum mlx5_feature_name { MLX5_MTR_SFX, }; -/* Pattern outer Layer bits. */ #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0) #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1) #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2) @@ -202,6 +203,8 @@ enum mlx5_feature_name { #define MLX5_FLOW_ACTION_AGE (1ull << 34) #define MLX5_FLOW_ACTION_DEFAULT_MISS (1ull << 35) #define MLX5_FLOW_ACTION_SAMPLE (1ull << 36) +#define MLX5_FLOW_ACTION_TUNNEL_SET (1ull << 37) +#define MLX5_FLOW_ACTION_TUNNEL_MATCH (1ull << 38) #define MLX5_FLOW_FATE_ACTIONS \ (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | \ @@ -531,6 +534,10 @@ struct mlx5_flow_tbl_data_entry { struct mlx5_flow_dv_jump_tbl_resource jump; /**< jump resource, at most one for each table created. */ uint32_t idx; /**< index for the indexed mempool. */ + /**< tunnel offload */ + const struct mlx5_flow_tunnel *tunnel; + uint32_t group_id; + bool external; }; /* Sub rdma-core actions list. */ @@ -769,6 +776,7 @@ struct mlx5_flow { }; struct mlx5_flow_handle *handle; uint32_t handle_idx; /* Index of the mlx5 flow handle memory. */ + const struct mlx5_flow_tunnel *tunnel; }; /* Flow meter state. */ @@ -914,6 +922,112 @@ struct mlx5_fdir_flow { #define HAIRPIN_FLOW_ID_BITS 28 +#define MLX5_MAX_TUNNELS 256 +#define MLX5_TNL_MISS_RULE_PRIORITY 3 +#define MLX5_TNL_MISS_FDB_JUMP_GRP 0x1234faac + +/* + * When tunnel offload is active, all JUMP group ids are converted + * using the same method. That conversion is applied both to tunnel and + * regular rule types. + * Group ids used in tunnel rules are relative to it's tunnel (!). + * Application can create number of steer rules, using the same + * tunnel, with different group id in each rule. + * Each tunnel stores its groups internally in PMD tunnel object. + * Groups used in regular rules do not belong to any tunnel and are stored + * in tunnel hub. + */ + +struct mlx5_flow_tunnel { + LIST_ENTRY(mlx5_flow_tunnel) chain; + struct rte_flow_tunnel app_tunnel; /** app tunnel copy */ + uint32_t tunnel_id; /** unique tunnel ID */ + uint32_t refctn; + struct rte_flow_action action; + struct rte_flow_item item; + struct mlx5_hlist *groups; /** tunnel groups */ +}; + +/** PMD tunnel related context */ +struct mlx5_flow_tunnel_hub { + LIST_HEAD(, mlx5_flow_tunnel) tunnels; + struct mlx5_flow_id_pool *tunnel_ids; + struct mlx5_flow_id_pool *table_ids; + struct mlx5_hlist *groups; /** non tunnel groups */ +}; + +/* convert jump group to flow table ID in tunnel rules */ +struct tunnel_tbl_entry { + struct mlx5_hlist_entry hash; + uint32_t flow_table; +}; + +static inline uint32_t +tunnel_id_to_flow_tbl(uint32_t id) +{ + return id | (1u << 16); +} + +static inline uint32_t +tunnel_flow_tbl_to_id(uint32_t flow_tbl) +{ + return flow_tbl & ~(1u << 16); +} + +union tunnel_tbl_key { + uint64_t val; + struct { + uint32_t tunnel_id; + uint32_t group; + }; +}; + +static inline struct mlx5_flow_tunnel_hub * +mlx5_tunnel_hub(struct rte_eth_dev *dev) +{ + struct mlx5_priv *priv = dev->data->dev_private; + return priv->sh->tunnel_hub; +} + +static inline bool +is_tunnel_offload_active(struct rte_eth_dev *dev) +{ + struct mlx5_priv *priv = dev->data->dev_private; + return !!priv->config.dv_miss_info; +} + +static inline bool +is_flow_tunnel_match_rule(__rte_unused struct rte_eth_dev *dev, + __rte_unused const struct rte_flow_attr *attr, + __rte_unused const struct rte_flow_item items[], + __rte_unused const struct rte_flow_action actions[]) +{ + return (items[0].type == (typeof(items[0].type)) + MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL); +} + +static inline bool +is_flow_tunnel_steer_rule(__rte_unused struct rte_eth_dev *dev, + __rte_unused const struct rte_flow_attr *attr, + __rte_unused const struct rte_flow_item items[], + __rte_unused const struct rte_flow_action actions[]) +{ + return (actions[0].type == (typeof(actions[0].type)) + MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET); +} + +static inline const struct mlx5_flow_tunnel * +flow_actions_to_tunnel(const struct rte_flow_action actions[]) +{ + return actions[0].conf; +} + +static inline const struct mlx5_flow_tunnel * +flow_items_to_tunnel(const struct rte_flow_item items[]) +{ + return items[0].spec; +} + /* Flow structure. */ struct rte_flow { ILIST_ENTRY(uint32_t)next; /**< Index to the next flow structure. */ @@ -922,12 +1036,14 @@ struct rte_flow { /**< Device flow handles that are part of the flow. */ uint32_t drv_type:2; /**< Driver type. */ uint32_t fdir:1; /**< Identifier of associated FDIR if any. */ + uint32_t tunnel:1; uint32_t hairpin_flow_id:HAIRPIN_FLOW_ID_BITS; /**< The flow id used for hairpin. */ uint32_t copy_applied:1; /**< The MARK copy Flow os applied. */ uint32_t rix_mreg_copy; /**< Index to metadata register copy table resource. */ uint32_t counter; /**< Holds flow counter. */ + uint32_t tunnel_id; /**< Tunnel id */ uint16_t meter; /**< Holds flow meter id. */ } __rte_packed; @@ -1089,9 +1205,54 @@ void mlx5_flow_id_pool_release(struct mlx5_flow_id_pool *pool); uint32_t mlx5_flow_id_get(struct mlx5_flow_id_pool *pool, uint32_t *id); uint32_t mlx5_flow_id_release(struct mlx5_flow_id_pool *pool, uint32_t id); -int mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, - bool external, uint32_t group, bool fdb_def_rule, - uint32_t *table, struct rte_flow_error *error); +__extension__ +struct flow_grp_info { + uint64_t external:1; + uint64_t transfer:1; + uint64_t fdb_def_rule:1; + /* force standard group translation */ + uint64_t std_tbl_fix:1; +}; + +static inline bool +tunnel_use_standard_attr_group_translate + (struct rte_eth_dev *dev, + const struct mlx5_flow_tunnel *tunnel, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[]) +{ + bool verdict; + + if (!is_tunnel_offload_active(dev)) + /* no tunnel offload API */ + verdict = true; + else if (tunnel) { + /* + * OvS will use jump to group 0 in tunnel steer rule. + * If tunnel steer rule starts from group 0 (attr.group == 0) + * that 0 group must be translated with standard method. + * attr.group == 0 in tunnel match rule translated with tunnel + * method + */ + verdict = !attr->group && + is_flow_tunnel_steer_rule(dev, attr, items, actions); + } else { + /* + * non-tunnel group translation uses standard method for + * root group only: attr.group == 0 + */ + verdict = !attr->group; + } + + return verdict; +} + +int mlx5_flow_group_to_table(struct rte_eth_dev *dev, + const struct mlx5_flow_tunnel *tunnel, + uint32_t group, uint32_t *table, + struct flow_grp_info flags, + struct rte_flow_error *error); uint64_t mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc, int tunnel, uint64_t layer_types, uint64_t hash_fields); @@ -1231,4 +1392,6 @@ int mlx5_flow_meter_flush(struct rte_eth_dev *dev, int mlx5_flow_dv_discover_counter_offset_support(struct rte_eth_dev *dev); struct rte_flow_shared_action *mlx5_flow_get_shared_rss(struct rte_flow *flow); int mlx5_shared_action_flush(struct rte_eth_dev *dev); +void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id); +int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh); #endif /* RTE_PMD_MLX5_FLOW_H_ */ diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 66d81e9598..504d842c09 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -3947,14 +3947,21 @@ flow_dv_validate_action_modify_ttl(const uint64_t action_flags, * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -flow_dv_validate_action_jump(const struct rte_flow_action *action, +flow_dv_validate_action_jump(struct rte_eth_dev *dev, + const struct mlx5_flow_tunnel *tunnel, + const struct rte_flow_action *action, uint64_t action_flags, const struct rte_flow_attr *attributes, bool external, struct rte_flow_error *error) { uint32_t target_group, table; int ret = 0; - + struct flow_grp_info grp_info = { + .external = !!external, + .transfer = !!attributes->transfer, + .fdb_def_rule = 1, + .std_tbl_fix = 0 + }; if (action_flags & (MLX5_FLOW_FATE_ACTIONS | MLX5_FLOW_FATE_ESWITCH_ACTIONS)) return rte_flow_error_set(error, EINVAL, @@ -3977,11 +3984,13 @@ flow_dv_validate_action_jump(const struct rte_flow_action *action, NULL, "action configuration not set"); target_group = ((const struct rte_flow_action_jump *)action->conf)->group; - ret = mlx5_flow_group_to_table(attributes, external, target_group, - true, &table, error); + ret = mlx5_flow_group_to_table(dev, tunnel, target_group, &table, + grp_info, error); if (ret) return ret; - if (attributes->group == target_group) + if (attributes->group == target_group && + !(action_flags & (MLX5_FLOW_ACTION_TUNNEL_SET | + MLX5_FLOW_ACTION_TUNNEL_MATCH))) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, NULL, "target group must be other than" @@ -5160,8 +5169,9 @@ flow_dv_counter_release(struct rte_eth_dev *dev, uint32_t counter) */ static int flow_dv_validate_attributes(struct rte_eth_dev *dev, + const struct mlx5_flow_tunnel *tunnel, const struct rte_flow_attr *attributes, - bool external __rte_unused, + struct flow_grp_info grp_info, struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; @@ -5169,6 +5179,8 @@ flow_dv_validate_attributes(struct rte_eth_dev *dev, int ret = 0; #ifndef HAVE_MLX5DV_DR + RTE_SET_USED(tunnel); + RTE_SET_USED(grp_info); if (attributes->group) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_GROUP, @@ -5177,9 +5189,8 @@ flow_dv_validate_attributes(struct rte_eth_dev *dev, #else uint32_t table = 0; - ret = mlx5_flow_group_to_table(attributes, external, - attributes->group, !!priv->fdb_def_rule, - &table, error); + ret = mlx5_flow_group_to_table(dev, tunnel, attributes->group, &table, + grp_info, error); if (ret) return ret; if (!table) @@ -5293,10 +5304,28 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, const struct rte_flow_item_vlan *vlan_m = NULL; int16_t rw_act_num = 0; uint64_t is_root; + const struct mlx5_flow_tunnel *tunnel; + struct flow_grp_info grp_info = { + .external = !!external, + .transfer = !!attr->transfer, + .fdb_def_rule = !!priv->fdb_def_rule, + }; if (items == NULL) return -1; - ret = flow_dv_validate_attributes(dev, attr, external, error); + if (is_flow_tunnel_match_rule(dev, attr, items, actions)) { + tunnel = flow_items_to_tunnel(items); + action_flags |= MLX5_FLOW_ACTION_TUNNEL_MATCH | + MLX5_FLOW_ACTION_DECAP; + } else if (is_flow_tunnel_steer_rule(dev, attr, items, actions)) { + tunnel = flow_actions_to_tunnel(actions); + action_flags |= MLX5_FLOW_ACTION_TUNNEL_SET; + } else { + tunnel = NULL; + } + grp_info.std_tbl_fix = tunnel_use_standard_attr_group_translate + (dev, tunnel, attr, items, actions); + ret = flow_dv_validate_attributes(dev, tunnel, attr, grp_info, error); if (ret < 0) return ret; is_root = (uint64_t)ret; @@ -5309,6 +5338,15 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, RTE_FLOW_ERROR_TYPE_ITEM, NULL, "item not supported"); switch (type) { + case MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL: + if (items[0].type != (typeof(items[0].type)) + MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "MLX5 private items " + "must be the first"); + break; case RTE_FLOW_ITEM_TYPE_VOID: break; case RTE_FLOW_ITEM_TYPE_PORT_ID: @@ -5894,7 +5932,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, rw_act_num += MLX5_ACT_NUM_MDF_TTL; break; case RTE_FLOW_ACTION_TYPE_JUMP: - ret = flow_dv_validate_action_jump(actions, + ret = flow_dv_validate_action_jump(dev, tunnel, actions, action_flags, attr, external, error); @@ -6003,6 +6041,17 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, action_flags |= MLX5_FLOW_ACTION_SAMPLE; ++actions_n; break; + case MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET: + if (actions[0].type != (typeof(actions[0].type)) + MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "MLX5 private action " + "must be the first"); + + action_flags |= MLX5_FLOW_ACTION_TUNNEL_SET; + break; default: return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, @@ -6010,6 +6059,54 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, "action not supported"); } } + /* + * Validate actions in flow rules + * - Explicit decap action is prohibited by the tunnel offload API. + * - Drop action in tunnel steer rule is prohibited by the API. + * - Application cannot use MARK action because it's value can mask + * tunnel default miss nitification. + * - JUMP in tunnel match rule has no support in current PMD + * implementation. + * - TAG & META are reserved for future uses. + */ + if (action_flags & MLX5_FLOW_ACTION_TUNNEL_SET) { + uint64_t bad_actions_mask = MLX5_FLOW_ACTION_DECAP | + MLX5_FLOW_ACTION_MARK | + MLX5_FLOW_ACTION_SET_TAG | + MLX5_FLOW_ACTION_SET_META | + MLX5_FLOW_ACTION_DROP; + + if (action_flags & bad_actions_mask) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "Invalid RTE action in tunnel " + "set decap rule"); + if (!(action_flags & MLX5_FLOW_ACTION_JUMP)) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "tunnel set decap rule must terminate " + "with JUMP"); + if (!attr->ingress) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "tunnel flows for ingress traffic only"); + } + if (action_flags & MLX5_FLOW_ACTION_TUNNEL_MATCH) { + uint64_t bad_actions_mask = MLX5_FLOW_ACTION_JUMP | + MLX5_FLOW_ACTION_MARK | + MLX5_FLOW_ACTION_SET_TAG | + MLX5_FLOW_ACTION_SET_META; + + if (action_flags & bad_actions_mask) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "Invalid RTE action in tunnel " + "set match rule"); + } /* * Validate the drop action mutual exclusion with other actions. * Drop action is mutually-exclusive with any other action, except for @@ -7876,6 +7973,9 @@ static struct mlx5_flow_tbl_resource * flow_dv_tbl_resource_get(struct rte_eth_dev *dev, uint32_t table_id, uint8_t egress, uint8_t transfer, + bool external, + const struct mlx5_flow_tunnel *tunnel, + uint32_t group_id, struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; @@ -7912,6 +8012,9 @@ flow_dv_tbl_resource_get(struct rte_eth_dev *dev, return NULL; } tbl_data->idx = idx; + tbl_data->tunnel = tunnel; + tbl_data->group_id = group_id; + tbl_data->external = external; tbl = &tbl_data->tbl; pos = &tbl_data->entry; if (transfer) @@ -7975,6 +8078,41 @@ flow_dv_tbl_resource_release(struct rte_eth_dev *dev, mlx5_flow_os_destroy_flow_tbl(tbl->obj); tbl->obj = NULL; + if (is_tunnel_offload_active(dev) && tbl_data->external) { + struct mlx5_hlist_entry *he; + struct mlx5_hlist *tunnel_grp_hash; + struct mlx5_flow_tunnel_hub *thub = + mlx5_tunnel_hub(dev); + union tunnel_tbl_key tunnel_key = { + .tunnel_id = tbl_data->tunnel ? + tbl_data->tunnel->tunnel_id : 0, + .group = tbl_data->group_id + }; + union mlx5_flow_tbl_key table_key = { + .v64 = pos->key + }; + uint32_t table_id = table_key.table_id; + + tunnel_grp_hash = tbl_data->tunnel ? + tbl_data->tunnel->groups : + thub->groups; + he = mlx5_hlist_lookup(tunnel_grp_hash, tunnel_key.val); + if (he) { + struct tunnel_tbl_entry *tte; + tte = container_of(he, typeof(*tte), hash); + MLX5_ASSERT(tte->flow_table == table_id); + mlx5_hlist_remove(tunnel_grp_hash, he); + mlx5_free(tte); + } + mlx5_flow_id_release(mlx5_tunnel_hub(dev)->table_ids, + tunnel_flow_tbl_to_id(table_id)); + DRV_LOG(DEBUG, + "port %u release table_id %#x tunnel %u group %u", + dev->data->port_id, table_id, + tbl_data->tunnel ? + tbl_data->tunnel->tunnel_id : 0, + tbl_data->group_id); + } /* remove the entry from the hash list and free memory. */ mlx5_hlist_remove(sh->flow_tbls, pos); mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_JUMP], @@ -8020,7 +8158,7 @@ flow_dv_matcher_register(struct rte_eth_dev *dev, int ret; tbl = flow_dv_tbl_resource_get(dev, key->table_id, key->direction, - key->domain, error); + key->domain, false, NULL, 0, error); if (!tbl) return -rte_errno; /* No need to refill the error info */ tbl_data = container_of(tbl, struct mlx5_flow_tbl_data_entry, tbl); @@ -8511,7 +8649,8 @@ flow_dv_sample_resource_register(struct rte_eth_dev *dev, *cache_resource = *resource; /* Create normal path table level */ tbl = flow_dv_tbl_resource_get(dev, next_ft_id, - attr->egress, attr->transfer, error); + attr->egress, attr->transfer, + dev_flow->external, NULL, 0, error); if (!tbl) { rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, @@ -9123,6 +9262,12 @@ __flow_dv_translate(struct rte_eth_dev *dev, int tmp_actions_n = 0; uint32_t table; int ret = 0; + const struct mlx5_flow_tunnel *tunnel; + struct flow_grp_info grp_info = { + .external = !!dev_flow->external, + .transfer = !!attr->transfer, + .fdb_def_rule = !!priv->fdb_def_rule, + }; memset(&mdest_res, 0, sizeof(struct mlx5_flow_dv_dest_array_resource)); memset(&sample_res, 0, sizeof(struct mlx5_flow_dv_sample_resource)); @@ -9130,8 +9275,17 @@ __flow_dv_translate(struct rte_eth_dev *dev, MLX5DV_FLOW_TABLE_TYPE_NIC_RX; /* update normal path action resource into last index of array */ sample_act = &mdest_res.sample_act[MLX5_MAX_DEST_NUM - 1]; - ret = mlx5_flow_group_to_table(attr, dev_flow->external, attr->group, - !!priv->fdb_def_rule, &table, error); + tunnel = is_flow_tunnel_match_rule(dev, attr, items, actions) ? + flow_items_to_tunnel(items) : + is_flow_tunnel_steer_rule(dev, attr, items, actions) ? + flow_actions_to_tunnel(actions) : + dev_flow->tunnel ? dev_flow->tunnel : NULL; + mhdr_res->ft_type = attr->egress ? MLX5DV_FLOW_TABLE_TYPE_NIC_TX : + MLX5DV_FLOW_TABLE_TYPE_NIC_RX; + grp_info.std_tbl_fix = tunnel_use_standard_attr_group_translate + (dev, tunnel, attr, items, actions); + ret = mlx5_flow_group_to_table(dev, tunnel, attr->group, &table, + grp_info, error); if (ret) return ret; dev_flow->dv.group = table; @@ -9141,6 +9295,45 @@ __flow_dv_translate(struct rte_eth_dev *dev, priority = dev_conf->flow_prio - 1; /* number of actions must be set to 0 in case of dirty stack. */ mhdr_res->actions_num = 0; + if (is_flow_tunnel_match_rule(dev, attr, items, actions)) { + /* + * do not add decap action if match rule drops packet + * HW rejects rules with decap & drop + */ + bool add_decap = true; + const struct rte_flow_action *ptr = actions; + struct mlx5_flow_tbl_resource *tbl; + + for (; ptr->type != RTE_FLOW_ACTION_TYPE_END; ptr++) { + if (ptr->type == RTE_FLOW_ACTION_TYPE_DROP) { + add_decap = false; + break; + } + } + if (add_decap) { + if (flow_dv_create_action_l2_decap(dev, dev_flow, + attr->transfer, + error)) + return -rte_errno; + dev_flow->dv.actions[actions_n++] = + dev_flow->dv.encap_decap->action; + action_flags |= MLX5_FLOW_ACTION_DECAP; + } + /* + * bind table_id with for tunnel match rule. + * Tunnel set rule establishes that bind in JUMP action handler. + * Required for scenario when application creates tunnel match + * rule before tunnel set rule. + */ + tbl = flow_dv_tbl_resource_get(dev, table, attr->egress, + attr->transfer, + !!dev_flow->external, tunnel, + attr->group, error); + if (!tbl) + return rte_flow_error_set + (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + actions, "cannot register tunnel group"); + } for (; !actions_end ; actions++) { const struct rte_flow_action_queue *queue; const struct rte_flow_action_rss *rss; @@ -9161,6 +9354,9 @@ __flow_dv_translate(struct rte_eth_dev *dev, actions, "action not supported"); switch (action_type) { + case MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET: + action_flags |= MLX5_FLOW_ACTION_TUNNEL_SET; + break; case RTE_FLOW_ACTION_TYPE_VOID: break; case RTE_FLOW_ACTION_TYPE_PORT_ID: @@ -9404,18 +9600,18 @@ __flow_dv_translate(struct rte_eth_dev *dev, case RTE_FLOW_ACTION_TYPE_JUMP: jump_group = ((const struct rte_flow_action_jump *) action->conf)->group; - if (dev_flow->external && jump_group < - MLX5_MAX_TABLES_EXTERNAL) - jump_group *= MLX5_FLOW_TABLE_FACTOR; - ret = mlx5_flow_group_to_table(attr, dev_flow->external, + grp_info.std_tbl_fix = 0; + ret = mlx5_flow_group_to_table(dev, tunnel, jump_group, - !!priv->fdb_def_rule, - &table, error); + &table, + grp_info, error); if (ret) return ret; - tbl = flow_dv_tbl_resource_get(dev, table, - attr->egress, - attr->transfer, error); + tbl = flow_dv_tbl_resource_get(dev, table, attr->egress, + attr->transfer, + !!dev_flow->external, + tunnel, jump_group, + error); if (!tbl) return rte_flow_error_set (error, errno, @@ -11439,7 +11635,8 @@ flow_dv_prepare_mtr_tables(struct rte_eth_dev *dev, dtb = &mtb->ingress; /* Create the meter table with METER level. */ dtb->tbl = flow_dv_tbl_resource_get(dev, MLX5_FLOW_TABLE_LEVEL_METER, - egress, transfer, &error); + egress, transfer, false, NULL, 0, + &error); if (!dtb->tbl) { DRV_LOG(ERR, "Failed to create meter policer table."); return -1; @@ -11447,7 +11644,8 @@ flow_dv_prepare_mtr_tables(struct rte_eth_dev *dev, /* Create the meter suffix table with SUFFIX level. */ dtb->sfx_tbl = flow_dv_tbl_resource_get(dev, MLX5_FLOW_TABLE_LEVEL_SUFFIX, - egress, transfer, &error); + egress, transfer, false, NULL, 0, + &error); if (!dtb->sfx_tbl) { DRV_LOG(ERR, "Failed to create meter suffix table."); return -1; @@ -11766,10 +11964,10 @@ mlx5_flow_dv_discover_counter_offset_support(struct rte_eth_dev *dev) void *flow = NULL; int i, ret = -1; - tbl = flow_dv_tbl_resource_get(dev, 0, 0, 0, NULL); + tbl = flow_dv_tbl_resource_get(dev, 0, 0, 0, false, NULL, 0, NULL); if (!tbl) goto err; - dest_tbl = flow_dv_tbl_resource_get(dev, 1, 0, 0, NULL); + dest_tbl = flow_dv_tbl_resource_get(dev, 1, 0, 0, false, NULL, 0, NULL); if (!dest_tbl) goto err; dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0x4);