X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fnet%2Fmlx5%2Fmlx5_flow.c;h=e2af7a9c35ba9e2121f6defdbf1f8d7f9181b5bd;hb=23f627e0ed28;hp=08c7cdf005f2ec6651ae96a893de3fbb2cecc8e6;hpb=c8f0abe7f89d61a347ba9e2d3bf337c89c0fb16f;p=dpdk.git diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index 08c7cdf005..e2af7a9c35 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -10,19 +10,10 @@ #include #include -/* Verbs header. */ -/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ -#ifdef PEDANTIC -#pragma GCC diagnostic ignored "-Wpedantic" -#endif -#include -#ifdef PEDANTIC -#pragma GCC diagnostic error "-Wpedantic" -#endif - #include #include #include +#include #include #include #include @@ -32,20 +23,29 @@ #include #include #include +#include #include "mlx5_defs.h" #include "mlx5.h" #include "mlx5_flow.h" +#include "mlx5_flow_os.h" #include "mlx5_rxtx.h" +#include "mlx5_common_os.h" +#include "rte_pmd_mlx5.h" + +static struct mlx5_flow_tunnel * +mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id); +static void +mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel); +static const struct mlx5_flow_tbl_data_entry * +tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark); +static int +mlx5_get_flow_tunnel(struct rte_eth_dev *dev, + const struct rte_flow_tunnel *app_tunnel, + struct mlx5_flow_tunnel **tunnel); -/* Dev ops structure defined in mlx5.c */ -extern const struct eth_dev_ops mlx5_dev_ops; -extern const struct eth_dev_ops mlx5_dev_ops_isolate; /** Device flow drivers. */ -#ifdef HAVE_IBV_FLOW_DV_SUPPORT -extern const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops; -#endif extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops; const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops; @@ -59,6 +59,331 @@ const struct mlx5_flow_driver_ops *flow_drv_ops[] = { [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops }; +/** Helper macro to build input graph for mlx5_flow_expand_rss(). */ +#define MLX5_FLOW_EXPAND_RSS_NEXT(...) \ + (const int []){ \ + __VA_ARGS__, 0, \ + } + +/** Node object of input graph for mlx5_flow_expand_rss(). */ +struct mlx5_flow_expand_node { + const int *const next; + /**< + * List of next node indexes. Index 0 is interpreted as a terminator. + */ + const enum rte_flow_item_type type; + /**< Pattern item type of current node. */ + uint64_t rss_types; + /**< + * RSS types bit-field associated with this node + * (see ETH_RSS_* definitions). + */ +}; + +/** Object returned by mlx5_flow_expand_rss(). */ +struct mlx5_flow_expand_rss { + uint32_t entries; + /**< Number of entries @p patterns and @p priorities. */ + struct { + struct rte_flow_item *pattern; /**< Expanded pattern array. */ + uint32_t priority; /**< Priority offset for each expansion. */ + } entry[]; +}; + +static enum rte_flow_item_type +mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item) +{ + enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID; + uint16_t ether_type = 0; + uint16_t ether_type_m; + uint8_t ip_next_proto = 0; + uint8_t ip_next_proto_m; + + if (item == NULL || item->spec == NULL) + return ret; + switch (item->type) { + case RTE_FLOW_ITEM_TYPE_ETH: + if (item->mask) + ether_type_m = ((const struct rte_flow_item_eth *) + (item->mask))->type; + else + ether_type_m = rte_flow_item_eth_mask.type; + if (ether_type_m != RTE_BE16(0xFFFF)) + break; + ether_type = ((const struct rte_flow_item_eth *) + (item->spec))->type; + if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4) + ret = RTE_FLOW_ITEM_TYPE_IPV4; + else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6) + ret = RTE_FLOW_ITEM_TYPE_IPV6; + else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN) + ret = RTE_FLOW_ITEM_TYPE_VLAN; + else + ret = RTE_FLOW_ITEM_TYPE_END; + break; + case RTE_FLOW_ITEM_TYPE_VLAN: + if (item->mask) + ether_type_m = ((const struct rte_flow_item_vlan *) + (item->mask))->inner_type; + else + ether_type_m = rte_flow_item_vlan_mask.inner_type; + if (ether_type_m != RTE_BE16(0xFFFF)) + break; + ether_type = ((const struct rte_flow_item_vlan *) + (item->spec))->inner_type; + if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4) + ret = RTE_FLOW_ITEM_TYPE_IPV4; + else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6) + ret = RTE_FLOW_ITEM_TYPE_IPV6; + else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN) + ret = RTE_FLOW_ITEM_TYPE_VLAN; + else + ret = RTE_FLOW_ITEM_TYPE_END; + break; + case RTE_FLOW_ITEM_TYPE_IPV4: + if (item->mask) + ip_next_proto_m = ((const struct rte_flow_item_ipv4 *) + (item->mask))->hdr.next_proto_id; + else + ip_next_proto_m = + rte_flow_item_ipv4_mask.hdr.next_proto_id; + if (ip_next_proto_m != 0xFF) + break; + ip_next_proto = ((const struct rte_flow_item_ipv4 *) + (item->spec))->hdr.next_proto_id; + if (ip_next_proto == IPPROTO_UDP) + ret = RTE_FLOW_ITEM_TYPE_UDP; + else if (ip_next_proto == IPPROTO_TCP) + ret = RTE_FLOW_ITEM_TYPE_TCP; + else if (ip_next_proto == IPPROTO_IP) + ret = RTE_FLOW_ITEM_TYPE_IPV4; + else if (ip_next_proto == IPPROTO_IPV6) + ret = RTE_FLOW_ITEM_TYPE_IPV6; + else + ret = RTE_FLOW_ITEM_TYPE_END; + break; + case RTE_FLOW_ITEM_TYPE_IPV6: + if (item->mask) + ip_next_proto_m = ((const struct rte_flow_item_ipv6 *) + (item->mask))->hdr.proto; + else + ip_next_proto_m = + rte_flow_item_ipv6_mask.hdr.proto; + if (ip_next_proto_m != 0xFF) + break; + ip_next_proto = ((const struct rte_flow_item_ipv6 *) + (item->spec))->hdr.proto; + if (ip_next_proto == IPPROTO_UDP) + ret = RTE_FLOW_ITEM_TYPE_UDP; + else if (ip_next_proto == IPPROTO_TCP) + ret = RTE_FLOW_ITEM_TYPE_TCP; + else if (ip_next_proto == IPPROTO_IP) + ret = RTE_FLOW_ITEM_TYPE_IPV4; + else if (ip_next_proto == IPPROTO_IPV6) + ret = RTE_FLOW_ITEM_TYPE_IPV6; + else + ret = RTE_FLOW_ITEM_TYPE_END; + break; + default: + ret = RTE_FLOW_ITEM_TYPE_VOID; + break; + } + return ret; +} + +/** + * Expand RSS flows into several possible flows according to the RSS hash + * fields requested and the driver capabilities. + * + * @param[out] buf + * Buffer to store the result expansion. + * @param[in] size + * Buffer size in bytes. If 0, @p buf can be NULL. + * @param[in] pattern + * User flow pattern. + * @param[in] types + * RSS types to expand (see ETH_RSS_* definitions). + * @param[in] graph + * Input graph to expand @p pattern according to @p types. + * @param[in] graph_root_index + * Index of root node in @p graph, typically 0. + * + * @return + * A positive value representing the size of @p buf in bytes regardless of + * @p size on success, a negative errno value otherwise and rte_errno is + * set, the following errors are defined: + * + * -E2BIG: graph-depth @p graph is too deep. + */ +static int +mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size, + const struct rte_flow_item *pattern, uint64_t types, + const struct mlx5_flow_expand_node graph[], + int graph_root_index) +{ + const int elt_n = 8; + const struct rte_flow_item *item; + const struct mlx5_flow_expand_node *node = &graph[graph_root_index]; + const int *next_node; + const int *stack[elt_n]; + int stack_pos = 0; + struct rte_flow_item flow_items[elt_n]; + unsigned int i; + size_t lsize; + size_t user_pattern_size = 0; + void *addr = NULL; + const struct mlx5_flow_expand_node *next = NULL; + struct rte_flow_item missed_item; + int missed = 0; + int elt = 0; + const struct rte_flow_item *last_item = NULL; + + memset(&missed_item, 0, sizeof(missed_item)); + lsize = offsetof(struct mlx5_flow_expand_rss, entry) + + elt_n * sizeof(buf->entry[0]); + if (lsize <= size) { + buf->entry[0].priority = 0; + buf->entry[0].pattern = (void *)&buf->entry[elt_n]; + buf->entries = 0; + addr = buf->entry[0].pattern; + } + for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { + if (item->type != RTE_FLOW_ITEM_TYPE_VOID) + last_item = item; + for (i = 0; node->next && node->next[i]; ++i) { + next = &graph[node->next[i]]; + if (next->type == item->type) + break; + } + if (next) + node = next; + user_pattern_size += sizeof(*item); + } + user_pattern_size += sizeof(*item); /* Handle END item. */ + lsize += user_pattern_size; + /* Copy the user pattern in the first entry of the buffer. */ + if (lsize <= size) { + rte_memcpy(addr, pattern, user_pattern_size); + addr = (void *)(((uintptr_t)addr) + user_pattern_size); + buf->entries = 1; + } + /* Start expanding. */ + memset(flow_items, 0, sizeof(flow_items)); + user_pattern_size -= sizeof(*item); + /* + * Check if the last valid item has spec set, need complete pattern, + * and the pattern can be used for expansion. + */ + missed_item.type = mlx5_flow_expand_rss_item_complete(last_item); + if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) { + /* Item type END indicates expansion is not required. */ + return lsize; + } + if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) { + next = NULL; + missed = 1; + for (i = 0; node->next && node->next[i]; ++i) { + next = &graph[node->next[i]]; + if (next->type == missed_item.type) { + flow_items[0].type = missed_item.type; + flow_items[1].type = RTE_FLOW_ITEM_TYPE_END; + break; + } + next = NULL; + } + } + if (next && missed) { + elt = 2; /* missed item + item end. */ + node = next; + lsize += elt * sizeof(*item) + user_pattern_size; + if ((node->rss_types & types) && lsize <= size) { + buf->entry[buf->entries].priority = 1; + buf->entry[buf->entries].pattern = addr; + buf->entries++; + rte_memcpy(addr, buf->entry[0].pattern, + user_pattern_size); + addr = (void *)(((uintptr_t)addr) + user_pattern_size); + rte_memcpy(addr, flow_items, elt * sizeof(*item)); + addr = (void *)(((uintptr_t)addr) + + elt * sizeof(*item)); + } + } + memset(flow_items, 0, sizeof(flow_items)); + next_node = node->next; + stack[stack_pos] = next_node; + node = next_node ? &graph[*next_node] : NULL; + while (node) { + flow_items[stack_pos].type = node->type; + if (node->rss_types & types) { + /* + * compute the number of items to copy from the + * expansion and copy it. + * When the stack_pos is 0, there are 1 element in it, + * plus the addition END item. + */ + elt = stack_pos + 2; + flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END; + lsize += elt * sizeof(*item) + user_pattern_size; + if (lsize <= size) { + size_t n = elt * sizeof(*item); + + buf->entry[buf->entries].priority = + stack_pos + 1 + missed; + buf->entry[buf->entries].pattern = addr; + buf->entries++; + rte_memcpy(addr, buf->entry[0].pattern, + user_pattern_size); + addr = (void *)(((uintptr_t)addr) + + user_pattern_size); + rte_memcpy(addr, &missed_item, + missed * sizeof(*item)); + addr = (void *)(((uintptr_t)addr) + + missed * sizeof(*item)); + rte_memcpy(addr, flow_items, n); + addr = (void *)(((uintptr_t)addr) + n); + } + } + /* Go deeper. */ + if (node->next) { + next_node = node->next; + if (stack_pos++ == elt_n) { + rte_errno = E2BIG; + return -rte_errno; + } + stack[stack_pos] = next_node; + } else if (*(next_node + 1)) { + /* Follow up with the next possibility. */ + ++next_node; + } else { + /* Move to the next path. */ + if (stack_pos) + next_node = stack[--stack_pos]; + next_node++; + stack[stack_pos] = next_node; + } + node = *next_node ? &graph[*next_node] : NULL; + }; + /* no expanded flows but we have missed item, create one rule for it */ + if (buf->entries == 1 && missed != 0) { + elt = 2; + lsize += elt * sizeof(*item) + user_pattern_size; + if (lsize <= size) { + buf->entry[buf->entries].priority = 1; + buf->entry[buf->entries].pattern = addr; + buf->entries++; + flow_items[0].type = missed_item.type; + flow_items[1].type = RTE_FLOW_ITEM_TYPE_END; + rte_memcpy(addr, buf->entry[0].pattern, + user_pattern_size); + addr = (void *)(((uintptr_t)addr) + user_pattern_size); + rte_memcpy(addr, flow_items, elt * sizeof(*item)); + addr = (void *)(((uintptr_t)addr) + + elt * sizeof(*item)); + } + } + return lsize; +} + enum mlx5_expansion { MLX5_EXPANSION_ROOT, MLX5_EXPANSION_ROOT_OUTER, @@ -89,46 +414,47 @@ enum mlx5_expansion { }; /** Supported expansion of items. */ -static const struct rte_flow_expand_node mlx5_support_expansion[] = { +static const struct mlx5_flow_expand_node mlx5_support_expansion[] = { [MLX5_EXPANSION_ROOT] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, - MLX5_EXPANSION_IPV4, - MLX5_EXPANSION_IPV6), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, + MLX5_EXPANSION_IPV4, + MLX5_EXPANSION_IPV6), .type = RTE_FLOW_ITEM_TYPE_END, }, [MLX5_EXPANSION_ROOT_OUTER] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, - MLX5_EXPANSION_OUTER_IPV4, - MLX5_EXPANSION_OUTER_IPV6), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH, + MLX5_EXPANSION_OUTER_IPV4, + MLX5_EXPANSION_OUTER_IPV6), .type = RTE_FLOW_ITEM_TYPE_END, }, [MLX5_EXPANSION_ROOT_ETH_VLAN] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN), .type = RTE_FLOW_ITEM_TYPE_END, }, [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN), + .next = MLX5_FLOW_EXPAND_RSS_NEXT + (MLX5_EXPANSION_OUTER_ETH_VLAN), .type = RTE_FLOW_ITEM_TYPE_END, }, [MLX5_EXPANSION_OUTER_ETH] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, - MLX5_EXPANSION_OUTER_IPV6, - MLX5_EXPANSION_MPLS), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, + MLX5_EXPANSION_OUTER_IPV6, + MLX5_EXPANSION_MPLS), .type = RTE_FLOW_ITEM_TYPE_ETH, .rss_types = 0, }, [MLX5_EXPANSION_OUTER_ETH_VLAN] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN), .type = RTE_FLOW_ITEM_TYPE_ETH, .rss_types = 0, }, [MLX5_EXPANSION_OUTER_VLAN] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, - MLX5_EXPANSION_OUTER_IPV6), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4, + MLX5_EXPANSION_OUTER_IPV6), .type = RTE_FLOW_ITEM_TYPE_VLAN, }, [MLX5_EXPANSION_OUTER_IPV4] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT + .next = MLX5_FLOW_EXPAND_RSS_NEXT (MLX5_EXPANSION_OUTER_IPV4_UDP, MLX5_EXPANSION_OUTER_IPV4_TCP, MLX5_EXPANSION_GRE, @@ -139,8 +465,8 @@ static const struct rte_flow_expand_node mlx5_support_expansion[] = { ETH_RSS_NONFRAG_IPV4_OTHER, }, [MLX5_EXPANSION_OUTER_IPV4_UDP] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, - MLX5_EXPANSION_VXLAN_GPE), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, + MLX5_EXPANSION_VXLAN_GPE), .type = RTE_FLOW_ITEM_TYPE_UDP, .rss_types = ETH_RSS_NONFRAG_IPV4_UDP, }, @@ -149,7 +475,7 @@ static const struct rte_flow_expand_node mlx5_support_expansion[] = { .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, }, [MLX5_EXPANSION_OUTER_IPV6] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT + .next = MLX5_FLOW_EXPAND_RSS_NEXT (MLX5_EXPANSION_OUTER_IPV6_UDP, MLX5_EXPANSION_OUTER_IPV6_TCP, MLX5_EXPANSION_IPV4, @@ -159,8 +485,8 @@ static const struct rte_flow_expand_node mlx5_support_expansion[] = { ETH_RSS_NONFRAG_IPV6_OTHER, }, [MLX5_EXPANSION_OUTER_IPV6_UDP] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, - MLX5_EXPANSION_VXLAN_GPE), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN, + MLX5_EXPANSION_VXLAN_GPE), .type = RTE_FLOW_ITEM_TYPE_UDP, .rss_types = ETH_RSS_NONFRAG_IPV6_UDP, }, @@ -169,43 +495,43 @@ static const struct rte_flow_expand_node mlx5_support_expansion[] = { .rss_types = ETH_RSS_NONFRAG_IPV6_TCP, }, [MLX5_EXPANSION_VXLAN] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, - MLX5_EXPANSION_IPV4, - MLX5_EXPANSION_IPV6), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, + MLX5_EXPANSION_IPV4, + MLX5_EXPANSION_IPV6), .type = RTE_FLOW_ITEM_TYPE_VXLAN, }, [MLX5_EXPANSION_VXLAN_GPE] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, - MLX5_EXPANSION_IPV4, - MLX5_EXPANSION_IPV6), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH, + MLX5_EXPANSION_IPV4, + MLX5_EXPANSION_IPV6), .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE, }, [MLX5_EXPANSION_GRE] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4), .type = RTE_FLOW_ITEM_TYPE_GRE, }, [MLX5_EXPANSION_MPLS] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, - MLX5_EXPANSION_IPV6), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, + MLX5_EXPANSION_IPV6), .type = RTE_FLOW_ITEM_TYPE_MPLS, }, [MLX5_EXPANSION_ETH] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, - MLX5_EXPANSION_IPV6), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, + MLX5_EXPANSION_IPV6), .type = RTE_FLOW_ITEM_TYPE_ETH, }, [MLX5_EXPANSION_ETH_VLAN] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN), .type = RTE_FLOW_ITEM_TYPE_ETH, }, [MLX5_EXPANSION_VLAN] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, - MLX5_EXPANSION_IPV6), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4, + MLX5_EXPANSION_IPV6), .type = RTE_FLOW_ITEM_TYPE_VLAN, }, [MLX5_EXPANSION_IPV4] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, - MLX5_EXPANSION_IPV4_TCP), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP, + MLX5_EXPANSION_IPV4_TCP), .type = RTE_FLOW_ITEM_TYPE_IPV4, .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | ETH_RSS_NONFRAG_IPV4_OTHER, @@ -219,8 +545,8 @@ static const struct rte_flow_expand_node mlx5_support_expansion[] = { .rss_types = ETH_RSS_NONFRAG_IPV4_TCP, }, [MLX5_EXPANSION_IPV6] = { - .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, - MLX5_EXPANSION_IPV6_TCP), + .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP, + MLX5_EXPANSION_IPV6_TCP), .type = RTE_FLOW_ITEM_TYPE_IPV6, .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER, @@ -235,6 +561,182 @@ static const struct rte_flow_expand_node mlx5_support_expansion[] = { }, }; +static struct rte_flow_shared_action * +mlx5_shared_action_create(struct rte_eth_dev *dev, + const struct rte_flow_shared_action_conf *conf, + const struct rte_flow_action *action, + struct rte_flow_error *error); +static int mlx5_shared_action_destroy + (struct rte_eth_dev *dev, + struct rte_flow_shared_action *shared_action, + struct rte_flow_error *error); +static int mlx5_shared_action_update + (struct rte_eth_dev *dev, + struct rte_flow_shared_action *shared_action, + const struct rte_flow_action *action, + struct rte_flow_error *error); +static int mlx5_shared_action_query + (struct rte_eth_dev *dev, + const struct rte_flow_shared_action *action, + void *data, + struct rte_flow_error *error); +static inline bool +mlx5_flow_tunnel_validate(struct rte_eth_dev *dev, + struct rte_flow_tunnel *tunnel, + const char *err_msg) +{ + err_msg = NULL; + if (!is_tunnel_offload_active(dev)) { + err_msg = "tunnel offload was not activated"; + goto out; + } else if (!tunnel) { + err_msg = "no application tunnel"; + goto out; + } + + switch (tunnel->type) { + default: + err_msg = "unsupported tunnel type"; + goto out; + case RTE_FLOW_ITEM_TYPE_VXLAN: + break; + } + +out: + return !err_msg; +} + + +static int +mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev, + struct rte_flow_tunnel *app_tunnel, + struct rte_flow_action **actions, + uint32_t *num_of_actions, + struct rte_flow_error *error) +{ + int ret; + struct mlx5_flow_tunnel *tunnel; + const char *err_msg = NULL; + bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg); + + if (!verdict) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, + err_msg); + ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel); + if (ret < 0) { + return rte_flow_error_set(error, ret, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, + "failed to initialize pmd tunnel"); + } + *actions = &tunnel->action; + *num_of_actions = 1; + return 0; +} + +static int +mlx5_flow_tunnel_match(struct rte_eth_dev *dev, + struct rte_flow_tunnel *app_tunnel, + struct rte_flow_item **items, + uint32_t *num_of_items, + struct rte_flow_error *error) +{ + int ret; + struct mlx5_flow_tunnel *tunnel; + const char *err_msg = NULL; + bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg); + + if (!verdict) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + err_msg); + ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel); + if (ret < 0) { + return rte_flow_error_set(error, ret, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "failed to initialize pmd tunnel"); + } + *items = &tunnel->item; + *num_of_items = 1; + return 0; +} + +static int +mlx5_flow_item_release(struct rte_eth_dev *dev, + struct rte_flow_item *pmd_items, + uint32_t num_items, struct rte_flow_error *err) +{ + struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); + struct mlx5_flow_tunnel *tun; + + LIST_FOREACH(tun, &thub->tunnels, chain) { + if (&tun->item == pmd_items) + break; + } + if (!tun || num_items != 1) + return rte_flow_error_set(err, EINVAL, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "invalid argument"); + if (!__atomic_sub_fetch(&tun->refctn, 1, __ATOMIC_RELAXED)) + mlx5_flow_tunnel_free(dev, tun); + return 0; +} + +static int +mlx5_flow_action_release(struct rte_eth_dev *dev, + struct rte_flow_action *pmd_actions, + uint32_t num_actions, struct rte_flow_error *err) +{ + struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); + struct mlx5_flow_tunnel *tun; + + LIST_FOREACH(tun, &thub->tunnels, chain) { + if (&tun->action == pmd_actions) + break; + } + if (!tun || num_actions != 1) + return rte_flow_error_set(err, EINVAL, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "invalid argument"); + if (!__atomic_sub_fetch(&tun->refctn, 1, __ATOMIC_RELAXED)) + mlx5_flow_tunnel_free(dev, tun); + + return 0; +} + +static int +mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev, + struct rte_mbuf *m, + struct rte_flow_restore_info *info, + struct rte_flow_error *err) +{ + uint64_t ol_flags = m->ol_flags; + const struct mlx5_flow_tbl_data_entry *tble; + const uint64_t mask = PKT_RX_FDIR | PKT_RX_FDIR_ID; + + if ((ol_flags & mask) != mask) + goto err; + tble = tunnel_mark_decode(dev, m->hash.fdir.hi); + if (!tble) { + DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x", + dev->data->port_id, m->hash.fdir.hi); + goto err; + } + MLX5_ASSERT(tble->tunnel); + memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel)); + info->group_id = tble->group_id; + info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL | + RTE_FLOW_RESTORE_INFO_GROUP_ID | + RTE_FLOW_RESTORE_INFO_ENCAPSULATED; + + return 0; + +err: + return rte_flow_error_set(err, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "failed to get restore info"); +} + static const struct rte_flow_ops mlx5_flow_ops = { .validate = mlx5_flow_validate, .create = mlx5_flow_create, @@ -244,6 +746,15 @@ static const struct rte_flow_ops mlx5_flow_ops = { .query = mlx5_flow_query, .dev_dump = mlx5_flow_dev_dump, .get_aged_flows = mlx5_flow_get_aged_flows, + .shared_action_create = mlx5_shared_action_create, + .shared_action_destroy = mlx5_shared_action_destroy, + .shared_action_update = mlx5_shared_action_update, + .shared_action_query = mlx5_shared_action_query, + .tunnel_decap_set = mlx5_flow_tunnel_decap_set, + .tunnel_match = mlx5_flow_tunnel_match, + .tunnel_action_decap_release = mlx5_flow_action_release, + .tunnel_item_release = mlx5_flow_item_release, + .get_restore_info = mlx5_flow_tunnel_get_restore_info, }; /* Convert FDIR request to Generic flow. */ @@ -272,17 +783,6 @@ struct mlx5_fdir { struct rte_flow_action_queue queue; }; -/* Map of Verbs to Flow priority with 8 Verbs priorities. */ -static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = { - { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 }, -}; - -/* Map of Verbs to Flow priority with 16 Verbs priorities. */ -static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = { - { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 }, - { 9, 10, 11 }, { 12, 13, 14 }, -}; - /* Tunnel information. */ struct mlx5_flow_tunnel_info { uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */ @@ -379,7 +879,7 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev, case MLX5_METADATA_FDB: switch (config->dv_xmeta_en) { case MLX5_XMETA_MODE_LEGACY: - return REG_NONE; + return REG_NON; case MLX5_XMETA_MODE_META16: return REG_C_0; case MLX5_XMETA_MODE_META32: @@ -389,7 +889,7 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev, case MLX5_FLOW_MARK: switch (config->dv_xmeta_en) { case MLX5_XMETA_MODE_LEGACY: - return REG_NONE; + return REG_NON; case MLX5_XMETA_MODE_META16: return REG_C_1; case MLX5_XMETA_MODE_META32: @@ -407,7 +907,7 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev, return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3; case MLX5_MTR_COLOR: - MLX5_ASSERT(priv->mtr_color_reg != REG_NONE); + MLX5_ASSERT(priv->mtr_color_reg != REG_NON); return priv->mtr_color_reg; case MLX5_COPY_MARK: /* @@ -430,7 +930,7 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev, return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, NULL, "invalid tag id"); - if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NONE) + if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, NULL, "unsupported tag id"); @@ -447,7 +947,7 @@ mlx5_flow_get_reg_id(struct rte_eth_dev *dev, RTE_FLOW_ERROR_TYPE_ITEM, NULL, "invalid tag id"); if (config->flow_mreg_c - [id + 1 + start_reg - REG_C_0] != REG_NONE) + [id + 1 + start_reg - REG_C_0] != REG_NON) return config->flow_mreg_c [id + 1 + start_reg - REG_C_0]; return rte_flow_error_set(error, ENOTSUP, @@ -485,107 +985,7 @@ mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev) * - reg_c's are preserved across different domain (FDB and NIC) on * packet loopback by flow lookup miss. */ - return config->flow_mreg_c[2] != REG_NONE; -} - -/** - * Discover the maximum number of priority available. - * - * @param[in] dev - * Pointer to the Ethernet device structure. - * - * @return - * number of supported flow priority on success, a negative errno - * value otherwise and rte_errno is set. - */ -int -mlx5_flow_discover_priorities(struct rte_eth_dev *dev) -{ - struct mlx5_priv *priv = dev->data->dev_private; - struct { - struct ibv_flow_attr attr; - struct ibv_flow_spec_eth eth; - struct ibv_flow_spec_action_drop drop; - } flow_attr = { - .attr = { - .num_of_specs = 2, - .port = (uint8_t)priv->ibv_port, - }, - .eth = { - .type = IBV_FLOW_SPEC_ETH, - .size = sizeof(struct ibv_flow_spec_eth), - }, - .drop = { - .size = sizeof(struct ibv_flow_spec_action_drop), - .type = IBV_FLOW_SPEC_ACTION_DROP, - }, - }; - struct ibv_flow *flow; - struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev); - uint16_t vprio[] = { 8, 16 }; - int i; - int priority = 0; - - if (!drop) { - rte_errno = ENOTSUP; - return -rte_errno; - } - for (i = 0; i != RTE_DIM(vprio); i++) { - flow_attr.attr.priority = vprio[i] - 1; - flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr); - if (!flow) - break; - claim_zero(mlx5_glue->destroy_flow(flow)); - priority = vprio[i]; - } - mlx5_hrxq_drop_release(dev); - switch (priority) { - case 8: - priority = RTE_DIM(priority_map_3); - break; - case 16: - priority = RTE_DIM(priority_map_5); - break; - default: - rte_errno = ENOTSUP; - DRV_LOG(ERR, - "port %u verbs maximum priority: %d expected 8/16", - dev->data->port_id, priority); - return -rte_errno; - } - DRV_LOG(INFO, "port %u flow maximum priority: %d", - dev->data->port_id, priority); - return priority; -} - -/** - * Adjust flow priority based on the highest layer and the request priority. - * - * @param[in] dev - * Pointer to the Ethernet device structure. - * @param[in] priority - * The rule base priority. - * @param[in] subpriority - * The priority based on the items. - * - * @return - * The new priority. - */ -uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority, - uint32_t subpriority) -{ - uint32_t res = 0; - struct mlx5_priv *priv = dev->data->dev_private; - - switch (priv->config.flow_prio) { - case RTE_DIM(priority_map_3): - res = priority_map_3[priority][subpriority]; - break; - case RTE_DIM(priority_map_5): - res = priority_map_5[priority][subpriority]; - break; - } - return res; + return config->flow_mreg_c[2] != REG_NON; } /** @@ -600,6 +1000,8 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority, * Bit-masks covering supported fields by the NIC to compare with user mask. * @param[in] size * Bit-masks size in bytes. + * @param[in] range_accepted + * True if range of values is accepted for specific fields, false otherwise. * @param[out] error * Pointer to error structure. * @@ -611,6 +1013,7 @@ mlx5_flow_item_acceptable(const struct rte_flow_item *item, const uint8_t *mask, const uint8_t *nic_mask, unsigned int size, + bool range_accepted, struct rte_flow_error *error) { unsigned int i; @@ -628,7 +1031,7 @@ mlx5_flow_item_acceptable(const struct rte_flow_item *item, RTE_FLOW_ERROR_TYPE_ITEM, item, "mask/last without a spec is not" " supported"); - if (item->spec && item->last) { + if (item->spec && item->last && !range_accepted) { uint8_t spec[size]; uint8_t last[size]; unsigned int i; @@ -1121,16 +1524,10 @@ mlx5_flow_validate_action_queue(const struct rte_flow_action *action, /* * Validate the rss action. * - * @param[in] action - * Pointer to the queue action. - * @param[in] action_flags - * Bit-fields that holds the actions detected until now. * @param[in] dev * Pointer to the Ethernet device structure. - * @param[in] attr - * Attributes of flow that includes this action. - * @param[in] item_flags - * Items that were detected. + * @param[in] action + * Pointer to the queue action. * @param[out] error * Pointer to error structure. * @@ -1138,23 +1535,14 @@ mlx5_flow_validate_action_queue(const struct rte_flow_action *action, * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_flow_validate_action_rss(const struct rte_flow_action *action, - uint64_t action_flags, - struct rte_eth_dev *dev, - const struct rte_flow_attr *attr, - uint64_t item_flags, - struct rte_flow_error *error) +mlx5_validate_action_rss(struct rte_eth_dev *dev, + const struct rte_flow_action *action, + struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; const struct rte_flow_action_rss *rss = action->conf; - int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); unsigned int i; - if (action_flags & MLX5_FLOW_FATE_ACTIONS) - return rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_ACTION, NULL, - "can't have 2 fate actions" - " in same flow"); if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT && rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) return rte_flow_error_set(error, ENOTSUP, @@ -1228,26 +1616,22 @@ mlx5_flow_validate_action_rss(const struct rte_flow_action *action, (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF, &rss->queue[i], "queue is not configured"); } - if (attr->egress) - return rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, - "rss action not supported for " - "egress"); - if (rss->level > 1 && !tunnel) - return rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, - "inner RSS is not supported for " - "non-tunnel flows"); return 0; } /* - * Validate the count action. + * Validate the rss action. * + * @param[in] action + * Pointer to the queue action. + * @param[in] action_flags + * Bit-fields that holds the actions detected until now. * @param[in] dev * Pointer to the Ethernet device structure. * @param[in] attr * Attributes of flow that includes this action. + * @param[in] item_flags + * Items that were detected. * @param[out] error * Pointer to error structure. * @@ -1255,20 +1639,109 @@ mlx5_flow_validate_action_rss(const struct rte_flow_action *action, * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused, - const struct rte_flow_attr *attr, - struct rte_flow_error *error) +mlx5_flow_validate_action_rss(const struct rte_flow_action *action, + uint64_t action_flags, + struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + uint64_t item_flags, + struct rte_flow_error *error) { + const struct rte_flow_action_rss *rss = action->conf; + int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); + int ret; + + if (action_flags & MLX5_FLOW_FATE_ACTIONS) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "can't have 2 fate actions" + " in same flow"); + ret = mlx5_validate_action_rss(dev, action, error); + if (ret) + return ret; if (attr->egress) return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, - "count action not supported for " + "rss action not supported for " "egress"); - return 0; -} - -/** - * Verify the @p attributes will be correctly understood by the NIC and store + if (rss->level > 1 && !tunnel) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, + "inner RSS is not supported for " + "non-tunnel flows"); + if ((item_flags & MLX5_FLOW_LAYER_ECPRI) && + !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) { + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL, + "RSS on eCPRI is not supported now"); + } + return 0; +} + +/* + * Validate the default miss action. + * + * @param[in] action_flags + * Bit-fields that holds the actions detected until now. + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_flow_validate_action_default_miss(uint64_t action_flags, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) +{ + if (action_flags & MLX5_FLOW_FATE_ACTIONS) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, NULL, + "can't have 2 fate actions in" + " same flow"); + if (attr->egress) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, + "default miss action not supported " + "for egress"); + if (attr->group) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL, + "only group 0 is supported"); + if (attr->transfer) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, + NULL, "transfer is not supported"); + return 0; +} + +/* + * Validate the count action. + * + * @param[in] dev + * Pointer to the Ethernet device structure. + * @param[in] attr + * Attributes of flow that includes this action. + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused, + const struct rte_flow_attr *attr, + struct rte_flow_error *error) +{ + if (attr->egress) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL, + "count action not supported for " + "egress"); + return 0; +} + +/** + * Verify the @p attributes will be correctly understood by the NIC and store * them in the @p flow if everything is correct. * * @param[in] dev @@ -1321,6 +1794,8 @@ mlx5_flow_validate_attributes(struct rte_eth_dev *dev, * Item specification. * @param[in] item_flags * Bit-fields that holds the items detected until now. + * @param[in] ext_vlan_sup + * Whether extended VLAN features are supported or not. * @param[out] error * Pointer to error structure. * @@ -1360,7 +1835,8 @@ mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item, ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)&rte_flow_item_icmp6_mask, - sizeof(struct rte_flow_item_icmp6), error); + sizeof(struct rte_flow_item_icmp6), + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) return ret; return 0; @@ -1386,6 +1862,12 @@ mlx5_flow_validate_item_icmp(const struct rte_flow_item *item, struct rte_flow_error *error) { const struct rte_flow_item_icmp *mask = item->mask; + const struct rte_flow_item_icmp nic_mask = { + .hdr.icmp_type = 0xff, + .hdr.icmp_code = 0xff, + .hdr.icmp_ident = RTE_BE16(0xffff), + .hdr.icmp_seq_nb = RTE_BE16(0xffff), + }; const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : MLX5_FLOW_LAYER_OUTER_L3_IPV4; @@ -1408,11 +1890,12 @@ mlx5_flow_validate_item_icmp(const struct rte_flow_item *item, RTE_FLOW_ERROR_TYPE_ITEM, item, "multiple L4 layers not supported"); if (!mask) - mask = &rte_flow_item_icmp_mask; + mask = &nic_mask; ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, - (const uint8_t *)&rte_flow_item_icmp_mask, - sizeof(struct rte_flow_item_icmp), error); + (const uint8_t *)&nic_mask, + sizeof(struct rte_flow_item_icmp), + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) return ret; return 0; @@ -1433,7 +1916,7 @@ mlx5_flow_validate_item_icmp(const struct rte_flow_item *item, */ int mlx5_flow_validate_item_eth(const struct rte_flow_item *item, - uint64_t item_flags, + uint64_t item_flags, bool ext_vlan_sup, struct rte_flow_error *error) { const struct rte_flow_item_eth *mask = item->mask; @@ -1441,6 +1924,7 @@ mlx5_flow_validate_item_eth(const struct rte_flow_item *item, .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", .type = RTE_BE16(0xffff), + .has_vlan = ext_vlan_sup ? 1 : 0, }; int ret; int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); @@ -1467,7 +1951,7 @@ mlx5_flow_validate_item_eth(const struct rte_flow_item *item, ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, (const uint8_t *)&nic_mask, sizeof(struct rte_flow_item_eth), - error); + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); return ret; } @@ -1521,7 +2005,7 @@ mlx5_flow_validate_item_vlan(const struct rte_flow_item *item, ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, (const uint8_t *)&nic_mask, sizeof(struct rte_flow_item_vlan), - error); + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret) return ret; if (!tunnel && mask->tci != RTE_BE16(0x0fff)) { @@ -1566,9 +2050,15 @@ mlx5_flow_validate_item_vlan(const struct rte_flow_item *item, * Item specification. * @param[in] item_flags * Bit-fields that holds the items detected until now. + * @param[in] last_item + * Previous validated item in the pattern items. + * @param[in] ether_type + * Type in the ethernet layer header (including dot1q). * @param[in] acc_mask * Acceptable mask, if NULL default internal default mask * will be used to check whether item fields are supported. + * @param[in] range_accepted + * True if range of values is accepted for specific fields, false otherwise. * @param[out] error * Pointer to error structure. * @@ -1581,6 +2071,7 @@ mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, uint64_t last_item, uint16_t ether_type, const struct rte_flow_item_ipv4 *acc_mask, + bool range_accepted, struct rte_flow_error *error) { const struct rte_flow_item_ipv4 *mask = item->mask; @@ -1651,7 +2142,7 @@ mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, acc_mask ? (const uint8_t *)acc_mask : (const uint8_t *)&nic_mask, sizeof(struct rte_flow_item_ipv4), - error); + range_accepted, error); if (ret < 0) return ret; return 0; @@ -1664,6 +2155,10 @@ mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, * Item specification. * @param[in] item_flags * Bit-fields that holds the items detected until now. + * @param[in] last_item + * Previous validated item in the pattern items. + * @param[in] ether_type + * Type in the ethernet layer header (including dot1q). * @param[in] acc_mask * Acceptable mask, if NULL default internal default mask * will be used to check whether item fields are supported. @@ -1712,9 +2207,9 @@ mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, RTE_FLOW_ERROR_TYPE_ITEM, item, "IPv6 cannot follow L2/VLAN layer " "which ether type is not IPv6"); + if (mask && mask->hdr.proto == UINT8_MAX && spec) + next_proto = spec->hdr.proto; if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) { - if (mask && spec) - next_proto = mask->hdr.proto & spec->hdr.proto; if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, @@ -1722,6 +2217,16 @@ mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, "multiple tunnel " "not supported"); } + if (next_proto == IPPROTO_HOPOPTS || + next_proto == IPPROTO_ROUTING || + next_proto == IPPROTO_FRAGMENT || + next_proto == IPPROTO_ESP || + next_proto == IPPROTO_AH || + next_proto == IPPROTO_DSTOPTS) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "IPv6 proto (next header) should " + "not be set as extension header"); if (item_flags & MLX5_FLOW_LAYER_IPIP) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, @@ -1746,7 +2251,7 @@ mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, acc_mask ? (const uint8_t *)acc_mask : (const uint8_t *)&nic_mask, sizeof(struct rte_flow_item_ipv6), - error); + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) return ret; return 0; @@ -1801,7 +2306,8 @@ mlx5_flow_validate_item_udp(const struct rte_flow_item *item, ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)&rte_flow_item_udp_mask, - sizeof(struct rte_flow_item_udp), error); + sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED, + error); if (ret < 0) return ret; return 0; @@ -1856,7 +2362,8 @@ mlx5_flow_validate_item_tcp(const struct rte_flow_item *item, ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)flow_mask, - sizeof(struct rte_flow_item_tcp), error); + sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED, + error); if (ret < 0) return ret; return 0; @@ -1910,7 +2417,7 @@ mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item, (item, (const uint8_t *)mask, (const uint8_t *)&rte_flow_item_vxlan_mask, sizeof(struct rte_flow_item_vxlan), - error); + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) return ret; if (spec) { @@ -1981,7 +2488,7 @@ mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item, (item, (const uint8_t *)mask, (const uint8_t *)&rte_flow_item_vxlan_gpe_mask, sizeof(struct rte_flow_item_vxlan_gpe), - error); + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) return ret; if (spec) { @@ -2055,7 +2562,7 @@ mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item, ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)&gre_key_default_mask, - sizeof(rte_be32_t), error); + sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error); return ret; } @@ -2107,7 +2614,8 @@ mlx5_flow_validate_item_gre(const struct rte_flow_item *item, ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)&nic_mask, - sizeof(struct rte_flow_item_gre), error); + sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED, + error); if (ret < 0) return ret; #ifndef HAVE_MLX5DV_DR @@ -2182,7 +2690,8 @@ mlx5_flow_validate_item_geneve(const struct rte_flow_item *item, ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)&nic_mask, - sizeof(struct rte_flow_item_geneve), error); + sizeof(struct rte_flow_item_geneve), + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret) return ret; if (spec) { @@ -2265,15 +2774,17 @@ mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused, ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)&rte_flow_item_mpls_mask, - sizeof(struct rte_flow_item_mpls), error); + sizeof(struct rte_flow_item_mpls), + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) return ret; return 0; -#endif +#else return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item, "MPLS is not supported by Verbs, please" " update."); +#endif } /** @@ -2319,12 +2830,104 @@ mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item, ret = mlx5_flow_item_acceptable (item, (const uint8_t *)mask, (const uint8_t *)&rte_flow_item_nvgre_mask, - sizeof(struct rte_flow_item_nvgre), error); + sizeof(struct rte_flow_item_nvgre), + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); if (ret < 0) return ret; return 0; } +/** + * Validate eCPRI item. + * + * @param[in] item + * Item specification. + * @param[in] item_flags + * Bit-fields that holds the items detected until now. + * @param[in] last_item + * Previous validated item in the pattern items. + * @param[in] ether_type + * Type in the ethernet layer header (including dot1q). + * @param[in] acc_mask + * Acceptable mask, if NULL default internal default mask + * will be used to check whether item fields are supported. + * @param[out] error + * Pointer to error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item, + uint64_t item_flags, + uint64_t last_item, + uint16_t ether_type, + const struct rte_flow_item_ecpri *acc_mask, + struct rte_flow_error *error) +{ + const struct rte_flow_item_ecpri *mask = item->mask; + const struct rte_flow_item_ecpri nic_mask = { + .hdr = { + .common = { + .u32 = + RTE_BE32(((const struct rte_ecpri_common_hdr) { + .type = 0xFF, + }).u32), + }, + .dummy[0] = 0xFFFFFFFF, + }, + }; + const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 | + MLX5_FLOW_LAYER_OUTER_VLAN); + struct rte_flow_item_ecpri mask_lo; + + if ((last_item & outer_l2_vlan) && ether_type && + ether_type != RTE_ETHER_TYPE_ECPRI) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "eCPRI cannot follow L2/VLAN layer " + "which ether type is not 0xAEFE."); + if (item_flags & MLX5_FLOW_LAYER_TUNNEL) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "eCPRI with tunnel is not supported " + "right now."); + if (item_flags & MLX5_FLOW_LAYER_OUTER_L3) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "multiple L3 layers not supported"); + else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "eCPRI cannot follow a TCP layer."); + /* In specification, eCPRI could be over UDP layer. */ + else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "eCPRI over UDP layer is not yet " + "supported right now."); + /* Mask for type field in common header could be zero. */ + if (!mask) + mask = &rte_flow_item_ecpri_mask; + mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32); + /* Input mask is in big-endian format. */ + if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "partial mask is not supported " + "for protocol"); + else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "message header mask must be after " + "a type mask"); + return mlx5_flow_item_acceptable(item, (const uint8_t *)mask, + acc_mask ? (const uint8_t *)acc_mask + : (const uint8_t *)&nic_mask, + sizeof(struct rte_flow_item_ecpri), + MLX5_ITEM_RANGE_NOT_ACCEPTED, error); +} + /* Allocate unique ID for the split Q/RSS subflows. */ static uint32_t flow_qrss_get_id(struct rte_eth_dev *dev) @@ -2440,6 +3043,14 @@ flow_null_query(struct rte_eth_dev *dev __rte_unused, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL); } +static int +flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused, + uint32_t domains __rte_unused, + uint32_t flags __rte_unused) +{ + return 0; +} + /* Void driver to protect from null pointer reference. */ const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = { .validate = flow_null_validate, @@ -2449,6 +3060,7 @@ const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = { .remove = flow_null_remove, .destroy = flow_null_destroy, .query = flow_null_query, + .sync_domain = flow_null_sync_domain, }; /** @@ -2467,8 +3079,12 @@ static enum mlx5_flow_drv_type flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr) { struct mlx5_priv *priv = dev->data->dev_private; - enum mlx5_flow_drv_type type = MLX5_FLOW_TYPE_MAX; + /* The OS can determine first a specific flow type (DV, VERBS) */ + enum mlx5_flow_drv_type type = mlx5_flow_os_get_type(); + if (type != MLX5_FLOW_TYPE_MAX) + return type; + /* If no OS specific type - continue with DV/VERBS selection */ if (attr->transfer && priv->config.dv_esw_en) type = MLX5_FLOW_TYPE_DV; if (!attr->transfer) @@ -2705,6 +3321,138 @@ flow_get_rss_action(const struct rte_flow_action actions[]) return NULL; } +/* maps shared action to translated non shared in some actions array */ +struct mlx5_translated_shared_action { + struct rte_flow_shared_action *action; /**< Shared action */ + int index; /**< Index in related array of rte_flow_action */ +}; + +/** + * Translates actions of type RTE_FLOW_ACTION_TYPE_SHARED to related + * non shared action if translation possible. + * This functionality used to run same execution path for both shared & non + * shared actions on flow create. All necessary preparations for shared + * action handling should be preformed on *shared* actions list returned + * from this call. + * + * @param[in] actions + * List of actions to translate. + * @param[out] shared + * List to store translated shared actions. + * @param[in, out] shared_n + * Size of *shared* array. On return should be updated with number of shared + * actions retrieved from the *actions* list. + * @param[out] translated_actions + * List of actions where all shared actions were translated to non shared + * if possible. NULL if no translation took place. + * @param[out] error + * Pointer to the error structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +flow_shared_actions_translate(const struct rte_flow_action actions[], + struct mlx5_translated_shared_action *shared, + int *shared_n, + struct rte_flow_action **translated_actions, + struct rte_flow_error *error) +{ + struct rte_flow_action *translated = NULL; + size_t actions_size; + int n; + int copied_n = 0; + struct mlx5_translated_shared_action *shared_end = NULL; + + for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) { + if (actions[n].type != RTE_FLOW_ACTION_TYPE_SHARED) + continue; + if (copied_n == *shared_n) { + return rte_flow_error_set + (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "too many shared actions"); + } + rte_memcpy(&shared[copied_n].action, &actions[n].conf, + sizeof(actions[n].conf)); + shared[copied_n].index = n; + copied_n++; + } + n++; + *shared_n = copied_n; + if (!copied_n) + return 0; + actions_size = sizeof(struct rte_flow_action) * n; + translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY); + if (!translated) { + rte_errno = ENOMEM; + return -ENOMEM; + } + memcpy(translated, actions, actions_size); + for (shared_end = shared + copied_n; shared < shared_end; shared++) { + const struct rte_flow_shared_action *shared_action; + + shared_action = shared->action; + switch (shared_action->type) { + case MLX5_RTE_FLOW_ACTION_TYPE_SHARED_RSS: + translated[shared->index].type = + RTE_FLOW_ACTION_TYPE_RSS; + translated[shared->index].conf = + &shared_action->rss.origin; + break; + default: + mlx5_free(translated); + return rte_flow_error_set + (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "invalid shared action type"); + } + } + *translated_actions = translated; + return 0; +} + +/** + * Get Shared RSS action from the action list. + * + * @param[in] shared + * Pointer to the list of actions. + * @param[in] shared_n + * Actions list length. + * + * @return + * Pointer to the MLX5 RSS action if exists, otherwise return NULL. + */ +static struct mlx5_shared_action_rss * +flow_get_shared_rss_action(struct mlx5_translated_shared_action *shared, + int shared_n) +{ + struct mlx5_translated_shared_action *shared_end; + + for (shared_end = shared + shared_n; shared < shared_end; shared++) { + struct rte_flow_shared_action *shared_action; + + shared_action = shared->action; + switch (shared_action->type) { + case MLX5_RTE_FLOW_ACTION_TYPE_SHARED_RSS: + __atomic_add_fetch(&shared_action->refcnt, 1, + __ATOMIC_RELAXED); + return &shared_action->rss; + default: + break; + } + } + return NULL; +} + +struct rte_flow_shared_action * +mlx5_flow_get_shared_rss(struct rte_flow *flow) +{ + if (flow->shared_rss) + return container_of(flow->shared_rss, + struct rte_flow_shared_action, rss); + else + return NULL; +} + static unsigned int find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level) { @@ -2855,10 +3603,10 @@ flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr) } /** - * Check if the flow should be splited due to hairpin. + * Check if the flow should be split due to hairpin. * The reason for the split is that in current HW we can't - * support encap on Rx, so if a flow have encap we move it - * to Tx. + * support encap and push-vlan on Rx, so if a flow contains + * these actions we move it to Tx. * * @param dev * Pointer to Ethernet device. @@ -2878,10 +3626,11 @@ flow_check_hairpin_split(struct rte_eth_dev *dev, { int queue_action = 0; int action_n = 0; - int encap = 0; + int split = 0; const struct rte_flow_action_queue *queue; const struct rte_flow_action_rss *rss; const struct rte_flow_action_raw_encap *raw_encap; + const struct rte_eth_hairpin_conf *conf; if (!attr->ingress) return 0; @@ -2891,8 +3640,8 @@ flow_check_hairpin_split(struct rte_eth_dev *dev, queue = actions->conf; if (queue == NULL) return 0; - if (mlx5_rxq_get_type(dev, queue->index) != - MLX5_RXQ_TYPE_HAIRPIN) + conf = mlx5_rxq_get_hairpin_conf(dev, queue->index); + if (conf != NULL && !!conf->tx_explicit) return 0; queue_action = 1; action_n++; @@ -2901,15 +3650,18 @@ flow_check_hairpin_split(struct rte_eth_dev *dev, rss = actions->conf; if (rss == NULL || rss->queue_num == 0) return 0; - if (mlx5_rxq_get_type(dev, rss->queue[0]) != - MLX5_RXQ_TYPE_HAIRPIN) + conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]); + if (conf != NULL && !!conf->tx_explicit) return 0; queue_action = 1; action_n++; break; case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: - encap = 1; + case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: + case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: + case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: + split++; action_n++; break; case RTE_FLOW_ACTION_TYPE_RAW_ENCAP: @@ -2917,7 +3669,7 @@ flow_check_hairpin_split(struct rte_eth_dev *dev, if (raw_encap->size > (sizeof(struct rte_flow_item_eth) + sizeof(struct rte_flow_item_ipv4))) - encap = 1; + split++; action_n++; break; default: @@ -2925,7 +3677,7 @@ flow_check_hairpin_split(struct rte_eth_dev *dev, break; } } - if (encap == 1 && queue_action) + if (split && queue_action) return action_n; return 0; } @@ -2987,7 +3739,7 @@ flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id, }; struct mlx5_flow_action_copy_mreg cp_mreg = { .dst = REG_B, - .src = 0, + .src = REG_NON, }; struct rte_flow_action_jump jump = { .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP, @@ -3367,7 +4119,8 @@ flow_mreg_update_copy_table(struct rte_eth_dev *dev, /** * Split the hairpin flow. - * Since HW can't support encap on Rx we move the encap to Tx. + * Since HW can't support encap and push-vlan on Rx, we move these + * actions to Tx. * If the count action is after the encap then we also * move the count action. in this case the count will also measure * the outer bytes. @@ -3411,6 +4164,9 @@ flow_hairpin_split(struct rte_eth_dev *dev, switch (actions->type) { case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP: case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP: + case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN: + case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID: + case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP: rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action)); actions_tx++; @@ -3471,7 +4227,7 @@ flow_hairpin_split(struct rte_eth_dev *dev, actions_rx++; set_tag = (void *)actions_rx; set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL); - MLX5_ASSERT(set_tag->id > REG_NONE); + MLX5_ASSERT(set_tag->id > REG_NON); set_tag->data = *flow_id; tag_action->conf = set_tag; /* Create Tx item list. */ @@ -3483,20 +4239,155 @@ flow_hairpin_split(struct rte_eth_dev *dev, tag_item = (void *)addr; tag_item->data = *flow_id; tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL); - MLX5_ASSERT(set_tag->id > REG_NONE); + MLX5_ASSERT(set_tag->id > REG_NON); item->spec = tag_item; addr += sizeof(struct mlx5_rte_flow_item_tag); tag_item = (void *)addr; tag_item->data = UINT32_MAX; tag_item->id = UINT16_MAX; item->mask = tag_item; - addr += sizeof(struct mlx5_rte_flow_item_tag); item->last = NULL; item++; item->type = RTE_FLOW_ITEM_TYPE_END; return 0; } +__extension__ +union tunnel_offload_mark { + uint32_t val; + struct { + uint32_t app_reserve:8; + uint32_t table_id:15; + uint32_t transfer:1; + uint32_t _unused_:8; + }; +}; + +struct tunnel_default_miss_ctx { + uint16_t *queue; + __extension__ + union { + struct rte_flow_action_rss action_rss; + struct rte_flow_action_queue miss_queue; + struct rte_flow_action_jump miss_jump; + uint8_t raw[0]; + }; +}; + +static int +flow_tunnel_add_default_miss(struct rte_eth_dev *dev, + struct rte_flow *flow, + const struct rte_flow_attr *attr, + const struct rte_flow_action *app_actions, + uint32_t flow_idx, + struct tunnel_default_miss_ctx *ctx, + struct rte_flow_error *error) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_flow *dev_flow; + struct rte_flow_attr miss_attr = *attr; + const struct mlx5_flow_tunnel *tunnel = app_actions[0].conf; + const struct rte_flow_item miss_items[2] = { + { + .type = RTE_FLOW_ITEM_TYPE_ETH, + .spec = NULL, + .last = NULL, + .mask = NULL + }, + { + .type = RTE_FLOW_ITEM_TYPE_END, + .spec = NULL, + .last = NULL, + .mask = NULL + } + }; + union tunnel_offload_mark mark_id; + struct rte_flow_action_mark miss_mark; + struct rte_flow_action miss_actions[3] = { + [0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark }, + [2] = { .type = RTE_FLOW_ACTION_TYPE_END, .conf = NULL } + }; + const struct rte_flow_action_jump *jump_data; + uint32_t i, flow_table = 0; /* prevent compilation warning */ + struct flow_grp_info grp_info = { + .external = 1, + .transfer = attr->transfer, + .fdb_def_rule = !!priv->fdb_def_rule, + .std_tbl_fix = 0, + }; + int ret; + + if (!attr->transfer) { + uint32_t q_size; + + miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS; + q_size = priv->reta_idx_n * sizeof(ctx->queue[0]); + ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size, + 0, SOCKET_ID_ANY); + if (!ctx->queue) + return rte_flow_error_set + (error, ENOMEM, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, + NULL, "invalid default miss RSS"); + ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT, + ctx->action_rss.level = 0, + ctx->action_rss.types = priv->rss_conf.rss_hf, + ctx->action_rss.key_len = priv->rss_conf.rss_key_len, + ctx->action_rss.queue_num = priv->reta_idx_n, + ctx->action_rss.key = priv->rss_conf.rss_key, + ctx->action_rss.queue = ctx->queue; + if (!priv->reta_idx_n || !priv->rxqs_n) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, + NULL, "invalid port configuration"); + if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)) + ctx->action_rss.types = 0; + for (i = 0; i != priv->reta_idx_n; ++i) + ctx->queue[i] = (*priv->reta_idx)[i]; + } else { + miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP; + ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP; + } + miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw; + for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++); + jump_data = app_actions->conf; + miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY; + miss_attr.group = jump_data->group; + ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group, + &flow_table, grp_info, error); + if (ret) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, + NULL, "invalid tunnel id"); + mark_id.app_reserve = 0; + mark_id.table_id = tunnel_flow_tbl_to_id(flow_table); + mark_id.transfer = !!attr->transfer; + mark_id._unused_ = 0; + miss_mark.id = mark_id.val; + dev_flow = flow_drv_prepare(dev, flow, &miss_attr, + miss_items, miss_actions, flow_idx, error); + if (!dev_flow) + return -rte_errno; + dev_flow->flow = flow; + dev_flow->external = true; + dev_flow->tunnel = tunnel; + /* Subflow object was created, we must include one in the list. */ + SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx, + dev_flow->handle, next); + DRV_LOG(DEBUG, + "port %u tunnel type=%d id=%u miss rule priority=%u group=%u", + dev->data->port_id, tunnel->app_tunnel.type, + tunnel->tunnel_id, miss_attr.priority, miss_attr.group); + ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items, + miss_actions, error); + if (!ret) + ret = flow_mreg_update_copy_table(dev, flow, miss_actions, + error); + + return ret; +} + /** * The last stage of splitting chain, just creates the subflow * without any modification. @@ -3509,6 +4400,8 @@ flow_hairpin_split(struct rte_eth_dev *dev, * Pointer to return the created subflow, may be NULL. * @param[in] prefix_layers * Prefix subflow layers, may be 0. + * @param[in] prefix_mark + * Prefix subflow mark flag, may be 0. * @param[in] attr * Flow rule attributes. * @param[in] items @@ -3529,6 +4422,7 @@ flow_create_split_inner(struct rte_eth_dev *dev, struct rte_flow *flow, struct mlx5_flow **sub_flow, uint64_t prefix_layers, + uint32_t prefix_mark, const struct rte_flow_attr *attr, const struct rte_flow_item items[], const struct rte_flow_action actions[], @@ -3548,10 +4442,13 @@ flow_create_split_inner(struct rte_eth_dev *dev, dev_flow->handle, next); /* * If dev_flow is as one of the suffix flow, some actions in suffix - * flow may need some user defined item layer flags. + * flow may need some user defined item layer flags, and pass the + * Metadate rxq mark flag to suffix flow as well. */ if (prefix_layers) dev_flow->handle->layers = prefix_layers; + if (prefix_mark) + dev_flow->handle->mark = 1; if (sub_flow) *sub_flow = dev_flow; return flow_drv_translate(dev, dev_flow, attr, items, actions, error); @@ -3882,31 +4779,230 @@ flow_mreg_tx_copy_prep(struct rte_eth_dev *dev, } /** - * The splitting for metadata feature. - * - * - Q/RSS action on NIC Rx should be split in order to pass by - * the mreg copy table (RX_CP_TBL) and then it jumps to the - * action table (RX_ACT_TBL) which has the split Q/RSS action. - * - * - All the actions on NIC Tx should have a mreg copy action to - * copy reg_a from WQE to reg_c[0]. + * Check the match action from the action list. * - * @param dev - * Pointer to Ethernet device. - * @param[in] flow - * Parent flow structure pointer. - * @param[in] prefix_layers - * Prefix flow layer flags. + * @param[in] actions + * Pointer to the list of actions. * @param[in] attr * Flow rule attributes. - * @param[in] items - * Pattern specification (list terminated by the END pattern item). - * @param[in] actions - * Associated actions (list terminated by the END action). - * @param[in] external - * This flow rule is created by request external to PMD. - * @param[in] flow_idx - * This memory pool index to the flow. + * @param[in] action + * The action to be check if exist. + * @param[out] match_action_pos + * Pointer to the position of the matched action if exists, otherwise is -1. + * @param[out] qrss_action_pos + * Pointer to the position of the Queue/RSS action if exists, otherwise is -1. + * + * @return + * > 0 the total number of actions. + * 0 if not found match action in action list. + */ +static int +flow_check_match_action(const struct rte_flow_action actions[], + const struct rte_flow_attr *attr, + enum rte_flow_action_type action, + int *match_action_pos, int *qrss_action_pos) +{ + const struct rte_flow_action_sample *sample; + int actions_n = 0; + int jump_flag = 0; + uint32_t ratio = 0; + int sub_type = 0; + int flag = 0; + + *match_action_pos = -1; + *qrss_action_pos = -1; + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { + if (actions->type == action) { + flag = 1; + *match_action_pos = actions_n; + } + if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE || + actions->type == RTE_FLOW_ACTION_TYPE_RSS) + *qrss_action_pos = actions_n; + if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP) + jump_flag = 1; + if (actions->type == RTE_FLOW_ACTION_TYPE_SAMPLE) { + sample = actions->conf; + ratio = sample->ratio; + sub_type = ((const struct rte_flow_action *) + (sample->actions))->type; + } + actions_n++; + } + if (flag && action == RTE_FLOW_ACTION_TYPE_SAMPLE && attr->transfer) { + if (ratio == 1) { + /* JUMP Action not support for Mirroring; + * Mirroring support multi-destination; + */ + if (!jump_flag && sub_type != RTE_FLOW_ACTION_TYPE_END) + flag = 0; + } + } + /* Count RTE_FLOW_ACTION_TYPE_END. */ + return flag ? actions_n + 1 : 0; +} + +#define SAMPLE_SUFFIX_ITEM 2 + +/** + * Split the sample flow. + * + * As sample flow will split to two sub flow, sample flow with + * sample action, the other actions will move to new suffix flow. + * + * Also add unique tag id with tag action in the sample flow, + * the same tag id will be as match in the suffix flow. + * + * @param dev + * Pointer to Ethernet device. + * @param[in] fdb_tx + * FDB egress flow flag. + * @param[out] sfx_items + * Suffix flow match items (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[out] actions_sfx + * Suffix flow actions. + * @param[out] actions_pre + * Prefix flow actions. + * @param[in] actions_n + * The total number of actions. + * @param[in] sample_action_pos + * The sample action position. + * @param[in] qrss_action_pos + * The Queue/RSS action position. + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * 0 on success, or unique flow_id, a negative errno value + * otherwise and rte_errno is set. + */ +static int +flow_sample_split_prep(struct rte_eth_dev *dev, + uint32_t fdb_tx, + struct rte_flow_item sfx_items[], + const struct rte_flow_action actions[], + struct rte_flow_action actions_sfx[], + struct rte_flow_action actions_pre[], + int actions_n, + int sample_action_pos, + int qrss_action_pos, + struct rte_flow_error *error) +{ + struct mlx5_rte_flow_action_set_tag *set_tag; + struct mlx5_rte_flow_item_tag *tag_spec; + struct mlx5_rte_flow_item_tag *tag_mask; + uint32_t tag_id = 0; + int index; + int ret; + + if (sample_action_pos < 0) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "invalid position of sample " + "action in list"); + if (!fdb_tx) { + /* Prepare the prefix tag action. */ + set_tag = (void *)(actions_pre + actions_n + 1); + ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error); + if (ret < 0) + return ret; + set_tag->id = ret; + tag_id = flow_qrss_get_id(dev); + set_tag->data = tag_id; + /* Prepare the suffix subflow items. */ + tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM); + tag_spec->data = tag_id; + tag_spec->id = set_tag->id; + tag_mask = tag_spec + 1; + tag_mask->data = UINT32_MAX; + sfx_items[0] = (struct rte_flow_item){ + .type = (enum rte_flow_item_type) + MLX5_RTE_FLOW_ITEM_TYPE_TAG, + .spec = tag_spec, + .last = NULL, + .mask = tag_mask, + }; + sfx_items[1] = (struct rte_flow_item){ + .type = (enum rte_flow_item_type) + RTE_FLOW_ITEM_TYPE_END, + }; + } + /* Prepare the actions for prefix and suffix flow. */ + if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) { + index = qrss_action_pos; + /* Put the preceding the Queue/RSS action into prefix flow. */ + if (index != 0) + memcpy(actions_pre, actions, + sizeof(struct rte_flow_action) * index); + /* Put others preceding the sample action into prefix flow. */ + if (sample_action_pos > index + 1) + memcpy(actions_pre + index, actions + index + 1, + sizeof(struct rte_flow_action) * + (sample_action_pos - index - 1)); + index = sample_action_pos - 1; + /* Put Queue/RSS action into Suffix flow. */ + memcpy(actions_sfx, actions + qrss_action_pos, + sizeof(struct rte_flow_action)); + actions_sfx++; + } else { + index = sample_action_pos; + if (index != 0) + memcpy(actions_pre, actions, + sizeof(struct rte_flow_action) * index); + } + /* Add the extra tag action for NIC-RX and E-Switch ingress. */ + if (!fdb_tx) { + actions_pre[index++] = + (struct rte_flow_action){ + .type = (enum rte_flow_action_type) + MLX5_RTE_FLOW_ACTION_TYPE_TAG, + .conf = set_tag, + }; + } + memcpy(actions_pre + index, actions + sample_action_pos, + sizeof(struct rte_flow_action)); + index += 1; + actions_pre[index] = (struct rte_flow_action){ + .type = (enum rte_flow_action_type) + RTE_FLOW_ACTION_TYPE_END, + }; + /* Put the actions after sample into Suffix flow. */ + memcpy(actions_sfx, actions + sample_action_pos + 1, + sizeof(struct rte_flow_action) * + (actions_n - sample_action_pos - 1)); + return tag_id; +} + +/** + * The splitting for metadata feature. + * + * - Q/RSS action on NIC Rx should be split in order to pass by + * the mreg copy table (RX_CP_TBL) and then it jumps to the + * action table (RX_ACT_TBL) which has the split Q/RSS action. + * + * - All the actions on NIC Tx should have a mreg copy action to + * copy reg_a from WQE to reg_c[0]. + * + * @param dev + * Pointer to Ethernet device. + * @param[in] flow + * Parent flow structure pointer. + * @param[in] prefix_layers + * Prefix flow layer flags. + * @param[in] prefix_mark + * Prefix subflow mark flag, may be 0. + * @param[in] attr + * Flow rule attributes. + * @param[in] items + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[in] external + * This flow rule is created by request external to PMD. + * @param[in] flow_idx + * This memory pool index to the flow. * @param[out] error * Perform verbose error reporting if not NULL. * @return @@ -3916,6 +5012,7 @@ static int flow_create_split_metadata(struct rte_eth_dev *dev, struct rte_flow *flow, uint64_t prefix_layers, + uint32_t prefix_mark, const struct rte_flow_attr *attr, const struct rte_flow_item items[], const struct rte_flow_action actions[], @@ -3939,8 +5036,9 @@ flow_create_split_metadata(struct rte_eth_dev *dev, config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY || !mlx5_flow_ext_mreg_supported(dev)) return flow_create_split_inner(dev, flow, NULL, prefix_layers, - attr, items, actions, external, - flow_idx, error); + prefix_mark, attr, items, + actions, external, flow_idx, + error); actions_n = flow_parse_metadata_split_actions_info(actions, &qrss, &encap_idx); if (qrss) { @@ -3974,7 +5072,8 @@ flow_create_split_metadata(struct rte_eth_dev *dev, act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + sizeof(struct rte_flow_action_set_tag) + sizeof(struct rte_flow_action_jump); - ext_actions = rte_zmalloc(__func__, act_size, 0); + ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, + SOCKET_ID_ANY); if (!ext_actions) return rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_ACTION, @@ -4010,7 +5109,8 @@ flow_create_split_metadata(struct rte_eth_dev *dev, */ act_size = sizeof(struct rte_flow_action) * (actions_n + 1) + sizeof(struct mlx5_flow_action_copy_mreg); - ext_actions = rte_zmalloc(__func__, act_size, 0); + ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0, + SOCKET_ID_ANY); if (!ext_actions) return rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_ACTION, @@ -4023,7 +5123,8 @@ flow_create_split_metadata(struct rte_eth_dev *dev, goto exit; } /* Add the unmodified original or prefix subflow. */ - ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, attr, + ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, + prefix_mark, attr, items, ext_actions ? ext_actions : actions, external, flow_idx, error); if (ret < 0) @@ -4037,7 +5138,7 @@ flow_create_split_metadata(struct rte_eth_dev *dev, /* Internal PMD action to set register. */ struct mlx5_rte_flow_item_tag q_tag_spec = { .data = qrss_id, - .id = 0, + .id = REG_NON, }; struct rte_flow_item q_items[] = { { @@ -4086,7 +5187,7 @@ flow_create_split_metadata(struct rte_eth_dev *dev, } dev_flow = NULL; /* Add suffix subflow to execute Q/RSS. */ - ret = flow_create_split_inner(dev, flow, &dev_flow, layers, + ret = flow_create_split_inner(dev, flow, &dev_flow, layers, 0, &q_attr, mtr_sfx ? items : q_items, q_actions, external, flow_idx, error); @@ -4104,7 +5205,7 @@ exit: * by flow_drv_destroy. */ flow_qrss_free_id(dev, qrss_id); - rte_free(ext_actions); + mlx5_free(ext_actions); return ret; } @@ -4122,6 +5223,10 @@ exit: * Pointer to Ethernet device. * @param[in] flow * Parent flow structure pointer. + * @param[in] prefix_layers + * Prefix subflow layers, may be 0. + * @param[in] prefix_mark + * Prefix subflow mark flag, may be 0. * @param[in] attr * Flow rule attributes. * @param[in] items @@ -4139,12 +5244,14 @@ exit: */ static int flow_create_split_meter(struct rte_eth_dev *dev, - struct rte_flow *flow, - const struct rte_flow_attr *attr, - const struct rte_flow_item items[], - const struct rte_flow_action actions[], - bool external, uint32_t flow_idx, - struct rte_flow_error *error) + struct rte_flow *flow, + uint64_t prefix_layers, + uint32_t prefix_mark, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], + bool external, uint32_t flow_idx, + struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; struct rte_flow_action *sfx_actions = NULL; @@ -4169,7 +5276,8 @@ flow_create_split_meter(struct rte_eth_dev *dev, #define METER_SUFFIX_ITEM 4 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM + sizeof(struct mlx5_rte_flow_item_tag) * 2; - sfx_actions = rte_zmalloc(__func__, (act_size + item_size), 0); + sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size), + 0, SOCKET_ID_ANY); if (!sfx_actions) return rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_ACTION, @@ -4186,8 +5294,10 @@ flow_create_split_meter(struct rte_eth_dev *dev, goto exit; } /* Add the prefix subflow. */ - ret = flow_create_split_inner(dev, flow, &dev_flow, 0, attr, - items, pre_actions, external, + ret = flow_create_split_inner(dev, flow, &dev_flow, + prefix_layers, 0, + attr, items, + pre_actions, external, flow_idx, error); if (ret) { ret = -rte_errno; @@ -4202,13 +5312,147 @@ flow_create_split_meter(struct rte_eth_dev *dev, /* Add the prefix subflow. */ ret = flow_create_split_metadata(dev, flow, dev_flow ? flow_get_prefix_layer_flags(dev_flow) : - 0, &sfx_attr, - sfx_items ? sfx_items : items, + prefix_layers, dev_flow ? + dev_flow->handle->mark : prefix_mark, + &sfx_attr, sfx_items ? + sfx_items : items, sfx_actions ? sfx_actions : actions, external, flow_idx, error); exit: if (sfx_actions) - rte_free(sfx_actions); + mlx5_free(sfx_actions); + return ret; +} + +/** + * The splitting for sample feature. + * + * Once Sample action is detected in the action list, the flow actions should + * be split into prefix sub flow and suffix sub flow. + * + * The original items remain in the prefix sub flow, all actions preceding the + * sample action and the sample action itself will be copied to the prefix + * sub flow, the actions following the sample action will be copied to the + * suffix sub flow, Queue action always be located in the suffix sub flow. + * + * In order to make the packet from prefix sub flow matches with suffix sub + * flow, an extra tag action be added into prefix sub flow, and the suffix sub + * flow uses tag item with the unique flow id. + * + * @param dev + * Pointer to Ethernet device. + * @param[in] flow + * Parent flow structure pointer. + * @param[in] attr + * Flow rule attributes. + * @param[in] items + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[in] external + * This flow rule is created by request external to PMD. + * @param[in] flow_idx + * This memory pool index to the flow. + * @param[out] error + * Perform verbose error reporting if not NULL. + * @return + * 0 on success, negative value otherwise + */ +static int +flow_create_split_sample(struct rte_eth_dev *dev, + struct rte_flow *flow, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], + bool external, uint32_t flow_idx, + struct rte_flow_error *error) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct rte_flow_action *sfx_actions = NULL; + struct rte_flow_action *pre_actions = NULL; + struct rte_flow_item *sfx_items = NULL; + struct mlx5_flow *dev_flow = NULL; + struct rte_flow_attr sfx_attr = *attr; +#ifdef HAVE_IBV_FLOW_DV_SUPPORT + struct mlx5_flow_dv_sample_resource *sample_res; + struct mlx5_flow_tbl_data_entry *sfx_tbl_data; + struct mlx5_flow_tbl_resource *sfx_tbl; + union mlx5_flow_tbl_key sfx_table_key; +#endif + size_t act_size; + size_t item_size; + uint32_t fdb_tx = 0; + int32_t tag_id = 0; + int actions_n = 0; + int sample_action_pos; + int qrss_action_pos; + int ret = 0; + + if (priv->sampler_en) + actions_n = flow_check_match_action(actions, attr, + RTE_FLOW_ACTION_TYPE_SAMPLE, + &sample_action_pos, &qrss_action_pos); + if (actions_n) { + /* The prefix actions must includes sample, tag, end. */ + act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1) + + sizeof(struct mlx5_rte_flow_action_set_tag); + item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM + + sizeof(struct mlx5_rte_flow_item_tag) * 2; + sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + + item_size), 0, SOCKET_ID_ANY); + if (!sfx_actions) + return rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, "no memory to split " + "sample flow"); + /* The representor_id is -1 for uplink. */ + fdb_tx = (attr->transfer && priv->representor_id != -1); + if (!fdb_tx) + sfx_items = (struct rte_flow_item *)((char *)sfx_actions + + act_size); + pre_actions = sfx_actions + actions_n; + tag_id = flow_sample_split_prep(dev, fdb_tx, sfx_items, + actions, sfx_actions, + pre_actions, actions_n, + sample_action_pos, + qrss_action_pos, error); + if (tag_id < 0 || (!fdb_tx && !tag_id)) { + ret = -rte_errno; + goto exit; + } + /* Add the prefix subflow. */ + ret = flow_create_split_inner(dev, flow, &dev_flow, 0, 0, attr, + items, pre_actions, external, + flow_idx, error); + if (ret) { + ret = -rte_errno; + goto exit; + } + dev_flow->handle->split_flow_id = tag_id; +#ifdef HAVE_IBV_FLOW_DV_SUPPORT + /* Set the sfx group attr. */ + sample_res = (struct mlx5_flow_dv_sample_resource *) + dev_flow->dv.sample_res; + sfx_tbl = (struct mlx5_flow_tbl_resource *) + sample_res->normal_path_tbl; + sfx_tbl_data = container_of(sfx_tbl, + struct mlx5_flow_tbl_data_entry, tbl); + sfx_table_key.v64 = sfx_tbl_data->entry.key; + sfx_attr.group = sfx_attr.transfer ? + (sfx_table_key.table_id - 1) : + sfx_table_key.table_id; +#endif + } + /* Add the suffix subflow. */ + ret = flow_create_split_meter(dev, flow, dev_flow ? + flow_get_prefix_layer_flags(dev_flow) : 0, + dev_flow ? dev_flow->handle->mark : 0, + &sfx_attr, sfx_items ? sfx_items : items, + sfx_actions ? sfx_actions : actions, + external, flow_idx, error); +exit: + if (sfx_actions) + mlx5_free(sfx_actions); return ret; } @@ -4260,12 +5504,33 @@ flow_create_split_outer(struct rte_eth_dev *dev, { int ret; - ret = flow_create_split_meter(dev, flow, attr, items, - actions, external, flow_idx, error); + ret = flow_create_split_sample(dev, flow, attr, items, + actions, external, flow_idx, error); MLX5_ASSERT(ret <= 0); return ret; } +static struct mlx5_flow_tunnel * +flow_tunnel_from_rule(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[]) +{ + struct mlx5_flow_tunnel *tunnel; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-qual" + if (is_flow_tunnel_match_rule(dev, attr, items, actions)) + tunnel = (struct mlx5_flow_tunnel *)items[0].spec; + else if (is_flow_tunnel_steer_rule(dev, attr, items, actions)) + tunnel = (struct mlx5_flow_tunnel *)actions[0].conf; + else + tunnel = NULL; +#pragma GCC diagnostic pop + + return tunnel; +} + /** * Create a flow and add it to @p list. * @@ -4294,15 +5559,18 @@ static uint32_t flow_list_create(struct rte_eth_dev *dev, uint32_t *list, const struct rte_flow_attr *attr, const struct rte_flow_item items[], - const struct rte_flow_action actions[], + const struct rte_flow_action original_actions[], bool external, struct rte_flow_error *error) { struct mlx5_priv *priv = dev->data->dev_private; struct rte_flow *flow = NULL; struct mlx5_flow *dev_flow; const struct rte_flow_action_rss *rss; + struct mlx5_translated_shared_action + shared_actions[MLX5_MAX_SHARED_ACTIONS]; + int shared_actions_n = MLX5_MAX_SHARED_ACTIONS; union { - struct rte_flow_expand_rss buf; + struct mlx5_flow_expand_rss buf; uint8_t buffer[2048]; } expand_buffer; union { @@ -4317,26 +5585,41 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list, struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS]; uint8_t buffer[2048]; } items_tx; - struct rte_flow_expand_rss *buf = &expand_buffer.buf; + struct mlx5_flow_expand_rss *buf = &expand_buffer.buf; struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *) priv->rss_desc)[!!priv->flow_idx]; - const struct rte_flow_action *p_actions_rx = actions; + const struct rte_flow_action *p_actions_rx; uint32_t i; uint32_t idx = 0; int hairpin_flow; uint32_t hairpin_id = 0; struct rte_flow_attr attr_tx = { .priority = 0 }; - int ret; - - hairpin_flow = flow_check_hairpin_split(dev, attr, actions); - ret = flow_drv_validate(dev, attr, items, p_actions_rx, + struct rte_flow_attr attr_factor = {0}; + const struct rte_flow_action *actions; + struct rte_flow_action *translated_actions = NULL; + struct mlx5_flow_tunnel *tunnel; + struct tunnel_default_miss_ctx default_miss_ctx = { 0, }; + int ret = flow_shared_actions_translate(original_actions, + shared_actions, + &shared_actions_n, + &translated_actions, error); + + if (ret < 0) { + MLX5_ASSERT(translated_actions == NULL); + return 0; + } + actions = translated_actions ? translated_actions : original_actions; + memcpy((void *)&attr_factor, (const void *)attr, sizeof(*attr)); + p_actions_rx = actions; + hairpin_flow = flow_check_hairpin_split(dev, &attr_factor, actions); + ret = flow_drv_validate(dev, &attr_factor, items, p_actions_rx, external, hairpin_flow, error); if (ret < 0) - return 0; + goto error_before_hairpin_split; if (hairpin_flow > 0) { if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) { rte_errno = EINVAL; - return 0; + goto error_before_hairpin_split; } flow_hairpin_split(dev, actions, actions_rx.actions, actions_hairpin_tx.actions, items_tx.items, @@ -4348,7 +5631,7 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list, rte_errno = ENOMEM; goto error_before_flow; } - flow->drv_type = flow_get_drv_type(dev, attr); + flow->drv_type = flow_get_drv_type(dev, &attr_factor); if (hairpin_id != 0) flow->hairpin_flow_id = hairpin_id; MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN && @@ -4369,16 +5652,17 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list, unsigned int graph_root; graph_root = find_graph_root(items, rss->level); - ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer), - items, rss->types, - mlx5_support_expansion, - graph_root); + ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer), + items, rss->types, + mlx5_support_expansion, graph_root); MLX5_ASSERT(ret > 0 && (unsigned int)ret < sizeof(expand_buffer.buffer)); } else { buf->entries = 1; buf->entry[0].pattern = (void *)(uintptr_t)items; } + flow->shared_rss = flow_get_shared_rss_action(shared_actions, + shared_actions_n); /* * Record the start index when there is a nested call. All sub-flows * need to be translated before another calling. @@ -4394,12 +5678,25 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list, * depending on configuration. In the simplest * case it just creates unmodified original flow. */ - ret = flow_create_split_outer(dev, flow, attr, + ret = flow_create_split_outer(dev, flow, &attr_factor, buf->entry[i].pattern, p_actions_rx, external, idx, error); if (ret < 0) goto error; + if (is_flow_tunnel_steer_rule(dev, attr, + buf->entry[i].pattern, + p_actions_rx)) { + ret = flow_tunnel_add_default_miss(dev, flow, attr, + p_actions_rx, + idx, + &default_miss_ctx, + error); + if (ret < 0) { + mlx5_free(default_miss_ctx.queue); + goto error; + } + } } /* Create the tx flow. */ if (hairpin_flow) { @@ -4431,8 +5728,8 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list, * the egress Flows belong to the different device and * copy table should be updated in peer NIC Rx domain. */ - if (attr->ingress && - (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) { + if (attr_factor.ingress && + (external || attr_factor.group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) { ret = flow_mreg_update_copy_table(dev, flow, actions, error); if (ret) goto error; @@ -4450,10 +5747,18 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list, ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx, flow, next); flow_rxq_flags_set(dev, flow); + rte_free(translated_actions); /* Nested flow creation index recovery. */ priv->flow_idx = priv->flow_nested_idx; if (priv->flow_nested_idx) priv->flow_nested_idx = 0; + tunnel = flow_tunnel_from_rule(dev, attr, items, actions); + if (tunnel) { + flow->tunnel = 1; + flow->tunnel_id = tunnel->tunnel_id; + __atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED); + mlx5_free(default_miss_ctx.queue); + } return idx; error: MLX5_ASSERT(flow); @@ -4471,6 +5776,8 @@ error_before_flow: priv->flow_idx = priv->flow_nested_idx; if (priv->flow_nested_idx) priv->flow_nested_idx = 0; +error_before_hairpin_split: + rte_free(translated_actions); return 0; } @@ -4534,14 +5841,28 @@ int mlx5_flow_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, const struct rte_flow_item items[], - const struct rte_flow_action actions[], + const struct rte_flow_action original_actions[], struct rte_flow_error *error) { int hairpin_flow; + struct mlx5_translated_shared_action + shared_actions[MLX5_MAX_SHARED_ACTIONS]; + int shared_actions_n = MLX5_MAX_SHARED_ACTIONS; + const struct rte_flow_action *actions; + struct rte_flow_action *translated_actions = NULL; + int ret = flow_shared_actions_translate(original_actions, + shared_actions, + &shared_actions_n, + &translated_actions, error); + if (ret) + return ret; + actions = translated_actions ? translated_actions : original_actions; hairpin_flow = flow_check_hairpin_split(dev, attr, actions); - return flow_drv_validate(dev, attr, items, actions, + ret = flow_drv_validate(dev, attr, items, actions, true, hairpin_flow, error); + rte_free(translated_actions); + return ret; } /** @@ -4573,6 +5894,7 @@ mlx5_flow_create(struct rte_eth_dev *dev, "port not started"); return NULL; } + return (void *)(uintptr_t)flow_list_create(dev, &priv->flows, attr, items, actions, true, error); } @@ -4622,11 +5944,18 @@ flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list, } if (priv_fdir_flow) { LIST_REMOVE(priv_fdir_flow, next); - rte_free(priv_fdir_flow->fdir); - rte_free(priv_fdir_flow); + mlx5_free(priv_fdir_flow->fdir); + mlx5_free(priv_fdir_flow); } } mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx); + if (flow->tunnel) { + struct mlx5_flow_tunnel *tunnel; + tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id); + RTE_VERIFY(tunnel); + if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED)) + mlx5_flow_tunnel_free(dev, tunnel); + } } /** @@ -4763,11 +6092,12 @@ mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev) struct mlx5_priv *priv = dev->data->dev_private; if (!priv->inter_flows) { - priv->inter_flows = rte_calloc(__func__, 1, + priv->inter_flows = mlx5_malloc(MLX5_MEM_ZERO, MLX5_NUM_MAX_DEV_FLOWS * sizeof(struct mlx5_flow) + (sizeof(struct mlx5_flow_rss_desc) + - sizeof(uint16_t) * UINT16_MAX) * 2, 0); + sizeof(uint16_t) * UINT16_MAX) * 2, 0, + SOCKET_ID_ANY); if (!priv->inter_flows) { DRV_LOG(ERR, "can't allocate intermediate memory."); return; @@ -4791,7 +6121,7 @@ mlx5_flow_free_intermediate(struct rte_eth_dev *dev) { struct mlx5_priv *priv = dev->data->dev_private; - rte_free(priv->inter_flows); + mlx5_free(priv->inter_flows); priv->inter_flows = NULL; } @@ -4986,6 +6316,62 @@ mlx5_ctrl_flow(struct rte_eth_dev *dev, return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL); } +/** + * Create default miss flow rule matching lacp traffic + * + * @param dev + * Pointer to Ethernet device. + * @param eth_spec + * An Ethernet flow spec to apply. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_flow_lacp_miss(struct rte_eth_dev *dev) +{ + struct mlx5_priv *priv = dev->data->dev_private; + /* + * The LACP matching is done by only using ether type since using + * a multicast dst mac causes kernel to give low priority to this flow. + */ + static const struct rte_flow_item_eth lacp_spec = { + .type = RTE_BE16(0x8809), + }; + static const struct rte_flow_item_eth lacp_mask = { + .type = 0xffff, + }; + const struct rte_flow_attr attr = { + .ingress = 1, + }; + struct rte_flow_item items[] = { + { + .type = RTE_FLOW_ITEM_TYPE_ETH, + .spec = &lacp_spec, + .mask = &lacp_mask, + }, + { + .type = RTE_FLOW_ITEM_TYPE_END, + }, + }; + struct rte_flow_action actions[] = { + { + .type = (enum rte_flow_action_type) + MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS, + }, + { + .type = RTE_FLOW_ACTION_TYPE_END, + }, + }; + struct rte_flow_error error; + uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows, + &attr, items, actions, false, &error); + + if (!flow_idx) + return -rte_errno; + return 0; +} + /** * Destroy a flow. * @@ -5041,9 +6427,13 @@ mlx5_flow_isolate(struct rte_eth_dev *dev, } priv->isolated = !!enable; if (enable) - dev->dev_ops = &mlx5_dev_ops_isolate; + dev->dev_ops = &mlx5_os_dev_ops_isolate; else - dev->dev_ops = &mlx5_dev_ops; + dev->dev_ops = &mlx5_os_dev_ops; + + dev->rx_descriptor_status = mlx5_rx_descriptor_status; + dev->tx_descriptor_status = mlx5_tx_descriptor_status; + return 0; } @@ -5375,7 +6765,8 @@ flow_fdir_filter_add(struct rte_eth_dev *dev, uint32_t flow_idx; int ret; - fdir_flow = rte_zmalloc(__func__, sizeof(*fdir_flow), 0); + fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*fdir_flow), 0, + SOCKET_ID_ANY); if (!fdir_flow) { rte_errno = ENOMEM; return -rte_errno; @@ -5388,8 +6779,9 @@ flow_fdir_filter_add(struct rte_eth_dev *dev, rte_errno = EEXIST; goto error; } - priv_fdir_flow = rte_zmalloc(__func__, sizeof(struct mlx5_fdir_flow), - 0); + priv_fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, + sizeof(struct mlx5_fdir_flow), + 0, SOCKET_ID_ANY); if (!priv_fdir_flow) { rte_errno = ENOMEM; goto error; @@ -5408,8 +6800,8 @@ flow_fdir_filter_add(struct rte_eth_dev *dev, dev->data->port_id, (void *)flow); return 0; error: - rte_free(priv_fdir_flow); - rte_free(fdir_flow); + mlx5_free(priv_fdir_flow); + mlx5_free(fdir_flow); return -rte_errno; } @@ -5449,8 +6841,8 @@ flow_fdir_filter_delete(struct rte_eth_dev *dev, LIST_REMOVE(priv_fdir_flow, next); flow_idx = priv_fdir_flow->rix_flow; flow_list_destroy(dev, &priv->flows, flow_idx); - rte_free(priv_fdir_flow->fdir); - rte_free(priv_fdir_flow); + mlx5_free(priv_fdir_flow->fdir); + mlx5_free(priv_fdir_flow); DRV_LOG(DEBUG, "port %u deleted FDIR flow %u", dev->data->port_id, flow_idx); return 0; @@ -5495,8 +6887,8 @@ flow_fdir_filter_flush(struct rte_eth_dev *dev) priv_fdir_flow = LIST_FIRST(&priv->fdir_flows); LIST_REMOVE(priv_fdir_flow, next); flow_list_destroy(dev, &priv->flows, priv_fdir_flow->rix_flow); - rte_free(priv_fdir_flow->fdir); - rte_free(priv_fdir_flow); + mlx5_free(priv_fdir_flow->fdir); + mlx5_free(priv_fdir_flow); } } @@ -5787,46 +7179,126 @@ mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt, return -ENOTSUP; } -#define MLX5_POOL_QUERY_FREQ_US 1000000 - /** - * Get number of all validate pools. + * Allocate a new memory for the counter values wrapped by all the needed + * management. * * @param[in] sh - * Pointer to mlx5_ibv_shared object. + * Pointer to mlx5_dev_ctx_shared object. * * @return - * The number of all validate pools. + * 0 on success, a negative errno value otherwise. */ -static uint32_t -mlx5_get_all_valid_pool_count(struct mlx5_ibv_shared *sh) +static int +mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh) { - uint8_t age, i; - uint32_t pools_n = 0; - struct mlx5_pools_container *cont; + struct mlx5_devx_mkey_attr mkey_attr; + struct mlx5_counter_stats_mem_mng *mem_mng; + volatile struct flow_counter_stats *raw_data; + int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES; + int size = (sizeof(struct flow_counter_stats) * + MLX5_COUNTERS_PER_POOL + + sizeof(struct mlx5_counter_stats_raw)) * raws_n + + sizeof(struct mlx5_counter_stats_mem_mng); + size_t pgsize = rte_mem_page_size(); + uint8_t *mem; + int i; - for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) { - for (i = 0; i < 2 ; ++i) { - cont = MLX5_CNT_CONTAINER(sh, i, 0, age); - pools_n += rte_atomic16_read(&cont->n_valid); - } + if (pgsize == (size_t)-1) { + DRV_LOG(ERR, "Failed to get mem page size"); + rte_errno = ENOMEM; + return -ENOMEM; + } + mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY); + if (!mem) { + rte_errno = ENOMEM; + return -ENOMEM; + } + mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1; + size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n; + mem_mng->umem = mlx5_glue->devx_umem_reg(sh->ctx, mem, size, + IBV_ACCESS_LOCAL_WRITE); + if (!mem_mng->umem) { + rte_errno = errno; + mlx5_free(mem); + return -rte_errno; + } + mkey_attr.addr = (uintptr_t)mem; + mkey_attr.size = size; + mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem); + mkey_attr.pd = sh->pdn; + mkey_attr.log_entity_size = 0; + mkey_attr.pg_access = 0; + mkey_attr.klm_array = NULL; + mkey_attr.klm_num = 0; + mkey_attr.relaxed_ordering = sh->cmng.relaxed_ordering; + mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr); + if (!mem_mng->dm) { + mlx5_glue->devx_umem_dereg(mem_mng->umem); + rte_errno = errno; + mlx5_free(mem); + return -rte_errno; + } + mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size); + raw_data = (volatile struct flow_counter_stats *)mem; + for (i = 0; i < raws_n; ++i) { + mem_mng->raws[i].mem_mng = mem_mng; + mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL; + } + for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i) + LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, + mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i, + next); + LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next); + sh->cmng.mem_mng = mem_mng; + return 0; +} + +/** + * Set the statistic memory to the new counter pool. + * + * @param[in] sh + * Pointer to mlx5_dev_ctx_shared object. + * @param[in] pool + * Pointer to the pool to set the statistic memory. + * + * @return + * 0 on success, a negative errno value otherwise. + */ +static int +mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh, + struct mlx5_flow_counter_pool *pool) +{ + struct mlx5_flow_counter_mng *cmng = &sh->cmng; + /* Resize statistic memory once used out. */ + if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) && + mlx5_flow_create_counter_stat_mem_mng(sh)) { + DRV_LOG(ERR, "Cannot resize counter stat mem."); + return -1; } - return pools_n; + rte_spinlock_lock(&pool->sl); + pool->raw = cmng->mem_mng->raws + pool->index % + MLX5_CNT_CONTAINER_RESIZE; + rte_spinlock_unlock(&pool->sl); + pool->raw_hw = NULL; + return 0; } +#define MLX5_POOL_QUERY_FREQ_US 1000000 + /** * Set the periodic procedure for triggering asynchronous batch queries for all * the counter pools. * * @param[in] sh - * Pointer to mlx5_ibv_shared object. + * Pointer to mlx5_dev_ctx_shared object. */ void -mlx5_set_query_alarm(struct mlx5_ibv_shared *sh) +mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh) { uint32_t pools_n, us; - pools_n = mlx5_get_all_valid_pool_count(sh); + pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED); us = MLX5_POOL_QUERY_FREQ_US / pools_n; DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us); if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) { @@ -5847,50 +7319,22 @@ mlx5_set_query_alarm(struct mlx5_ibv_shared *sh) void mlx5_flow_query_alarm(void *arg) { - struct mlx5_ibv_shared *sh = arg; - struct mlx5_devx_obj *dcs; - uint16_t offset; + struct mlx5_dev_ctx_shared *sh = arg; int ret; - uint8_t batch = sh->cmng.batch; - uint8_t age = sh->cmng.age; uint16_t pool_index = sh->cmng.pool_index; - struct mlx5_pools_container *cont; - struct mlx5_pools_container *mcont; + struct mlx5_flow_counter_mng *cmng = &sh->cmng; struct mlx5_flow_counter_pool *pool; + uint16_t n_valid; if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES) goto set_alarm; -next_container: - cont = MLX5_CNT_CONTAINER(sh, batch, 1, age); - mcont = MLX5_CNT_CONTAINER(sh, batch, 0, age); - /* Check if resize was done and need to flip a container. */ - if (cont != mcont) { - if (cont->pools) { - /* Clean the old container. */ - rte_free(cont->pools); - memset(cont, 0, sizeof(*cont)); - } - rte_cio_wmb(); - /* Flip the host container. */ - sh->cmng.mhi[batch][age] ^= (uint8_t)2; - cont = mcont; - } - if (!cont->pools) { - /* 2 empty containers case is unexpected. */ - if (unlikely(batch != sh->cmng.batch) && - unlikely(age != sh->cmng.age)) { - goto set_alarm; - } - batch ^= 0x1; - pool_index = 0; - if (batch == 0 && pool_index == 0) { - age ^= 0x1; - sh->cmng.batch = batch; - sh->cmng.age = age; - } - goto next_container; - } - pool = cont->pools[pool_index]; + rte_spinlock_lock(&cmng->pool_update_sl); + pool = cmng->pools[pool_index]; + n_valid = cmng->n_valid; + rte_spinlock_unlock(&cmng->pool_update_sl); + /* Set the statistic memory to the new created pool. */ + if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool))) + goto set_alarm; if (pool->raw_hw) /* There is a pool query in progress. */ goto set_alarm; @@ -5899,44 +7343,34 @@ next_container: if (!pool->raw_hw) /* No free counter statistics raw memory. */ goto set_alarm; - dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read - (&pool->a64_dcs); - offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL; /* * Identify the counters released between query trigger and query - * handle more effiecntly. The counter released in this gap period + * handle more efficiently. The counter released in this gap period * should wait for a new round of query as the new arrived packets * will not be taken into account. */ - rte_atomic64_add(&pool->start_query_gen, 1); - ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL - - offset, NULL, NULL, + pool->query_gen++; + ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0, + MLX5_COUNTERS_PER_POOL, + NULL, NULL, pool->raw_hw->mem_mng->dm->id, (void *)(uintptr_t) - (pool->raw_hw->data + offset), + pool->raw_hw->data, sh->devx_comp, (uint64_t)(uintptr_t)pool); if (ret) { - rte_atomic64_sub(&pool->start_query_gen, 1); DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID" " %d", pool->min_dcs->id); pool->raw_hw = NULL; goto set_alarm; } - pool->raw_hw->min_dcs_id = dcs->id; LIST_REMOVE(pool->raw_hw, next); sh->cmng.pending_queries++; pool_index++; - if (pool_index >= rte_atomic16_read(&cont->n_valid)) { - batch ^= 0x1; + if (pool_index >= n_valid) pool_index = 0; - if (batch == 0 && pool_index == 0) - age ^= 0x1; - } set_alarm: - sh->cmng.batch = batch; sh->cmng.pool_index = pool_index; - sh->cmng.age = age; mlx5_set_query_alarm(sh); } @@ -5944,12 +7378,12 @@ set_alarm: * Check and callback event for new aged flow in the counter pool * * @param[in] sh - * Pointer to mlx5_ibv_shared object. + * Pointer to mlx5_dev_ctx_shared object. * @param[in] pool * Pointer to Current counter pool. */ static void -mlx5_flow_aging_check(struct mlx5_ibv_shared *sh, +mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh, struct mlx5_flow_counter_pool *pool) { struct mlx5_priv *priv; @@ -5958,19 +7392,26 @@ mlx5_flow_aging_check(struct mlx5_ibv_shared *sh, struct mlx5_age_param *age_param; struct mlx5_counter_stats_raw *cur = pool->raw_hw; struct mlx5_counter_stats_raw *prev = pool->raw; - uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10); + const uint64_t curr_time = MLX5_CURR_TIME_SEC; + const uint32_t time_delta = curr_time - pool->time_of_last_age_check; + uint16_t expected = AGE_CANDIDATE; uint32_t i; + pool->time_of_last_age_check = curr_time; for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) { cnt = MLX5_POOL_GET_CNT(pool, i); age_param = MLX5_CNT_TO_AGE(cnt); - if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE) + if (__atomic_load_n(&age_param->state, + __ATOMIC_RELAXED) != AGE_CANDIDATE) continue; if (cur->data[i].hits != prev->data[i].hits) { - age_param->expire = curr + age_param->timeout; + __atomic_store_n(&age_param->sec_since_last_hit, 0, + __ATOMIC_RELAXED); continue; } - if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2)) + if (__atomic_add_fetch(&age_param->sec_since_last_hit, + time_delta, + __ATOMIC_RELAXED) <= age_param->timeout) continue; /** * Hold the lock first, or if between the @@ -5981,12 +7422,10 @@ mlx5_flow_aging_check(struct mlx5_ibv_shared *sh, priv = rte_eth_devices[age_param->port_id].data->dev_private; age_info = GET_PORT_AGE_INFO(priv); rte_spinlock_lock(&age_info->aged_sl); - /* If the cpmset fails, release happens. */ - if (rte_atomic16_cmpset((volatile uint16_t *) - &age_param->state, - AGE_CANDIDATE, - AGE_TMOUT) == - AGE_CANDIDATE) { + if (__atomic_compare_exchange_n(&age_param->state, &expected, + AGE_TMOUT, false, + __ATOMIC_RELAXED, + __ATOMIC_RELAXED)) { TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next); MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW); } @@ -5997,7 +7436,7 @@ mlx5_flow_aging_check(struct mlx5_ibv_shared *sh, if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW)) continue; if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) - _rte_eth_dev_callback_process + rte_eth_dev_callback_process (&rte_eth_devices[sh->port[i].devx_ih_port_id], RTE_ETH_EVENT_FLOW_AGED, NULL); age_info->flags = 0; @@ -6009,55 +7448,164 @@ mlx5_flow_aging_check(struct mlx5_ibv_shared *sh, * query. This function is probably called by the host thread. * * @param[in] sh - * The pointer to the shared IB device context. + * The pointer to the shared device context. * @param[in] async_id * The Devx async ID. * @param[in] status * The status of the completion. */ void -mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh, +mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh, uint64_t async_id, int status) { struct mlx5_flow_counter_pool *pool = (struct mlx5_flow_counter_pool *)(uintptr_t)async_id; struct mlx5_counter_stats_raw *raw_to_free; + uint8_t query_gen = pool->query_gen ^ 1; + struct mlx5_flow_counter_mng *cmng = &sh->cmng; + enum mlx5_counter_type cnt_type = + pool->is_aged ? MLX5_COUNTER_TYPE_AGE : + MLX5_COUNTER_TYPE_ORIGIN; if (unlikely(status)) { - rte_atomic64_sub(&pool->start_query_gen, 1); raw_to_free = pool->raw_hw; } else { raw_to_free = pool->raw; - if (IS_AGE_POOL(pool)) + if (pool->is_aged) mlx5_flow_aging_check(sh, pool); rte_spinlock_lock(&pool->sl); pool->raw = pool->raw_hw; rte_spinlock_unlock(&pool->sl); - MLX5_ASSERT(rte_atomic64_read(&pool->end_query_gen) + 1 == - rte_atomic64_read(&pool->start_query_gen)); - rte_atomic64_set(&pool->end_query_gen, - rte_atomic64_read(&pool->start_query_gen)); /* Be sure the new raw counters data is updated in memory. */ - rte_cio_wmb(); + rte_io_wmb(); + if (!TAILQ_EMPTY(&pool->counters[query_gen])) { + rte_spinlock_lock(&cmng->csl[cnt_type]); + TAILQ_CONCAT(&cmng->counters[cnt_type], + &pool->counters[query_gen], next); + rte_spinlock_unlock(&cmng->csl[cnt_type]); + } } LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next); pool->raw_hw = NULL; sh->cmng.pending_queries--; } +static const struct mlx5_flow_tbl_data_entry * +tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_dev_ctx_shared *sh = priv->sh; + struct mlx5_hlist_entry *he; + union tunnel_offload_mark mbits = { .val = mark }; + union mlx5_flow_tbl_key table_key = { + { + .table_id = tunnel_id_to_flow_tbl(mbits.table_id), + .reserved = 0, + .domain = !!mbits.transfer, + .direction = 0, + } + }; + he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64); + return he ? + container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL; +} + +static uint32_t +tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev, + const struct mlx5_flow_tunnel *tunnel, + uint32_t group, uint32_t *table, + struct rte_flow_error *error) +{ + struct mlx5_hlist_entry *he; + struct tunnel_tbl_entry *tte; + union tunnel_tbl_key key = { + .tunnel_id = tunnel ? tunnel->tunnel_id : 0, + .group = group + }; + struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); + struct mlx5_hlist *group_hash; + + group_hash = tunnel ? tunnel->groups : thub->groups; + he = mlx5_hlist_lookup(group_hash, key.val); + if (!he) { + int ret; + tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, + sizeof(*tte), 0, + SOCKET_ID_ANY); + if (!tte) + goto err; + tte->hash.key = key.val; + ret = mlx5_flow_id_get(thub->table_ids, &tte->flow_table); + if (ret) { + mlx5_free(tte); + goto err; + } + tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table); + mlx5_hlist_insert(group_hash, &tte->hash); + } else { + tte = container_of(he, typeof(*tte), hash); + } + *table = tte->flow_table; + DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x", + dev->data->port_id, key.tunnel_id, group, *table); + return 0; + +err: + return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ATTR_GROUP, + NULL, "tunnel group index not supported"); +} + +static int +flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table, + struct flow_grp_info grp_info, struct rte_flow_error *error) +{ + if (grp_info.transfer && grp_info.external && grp_info.fdb_def_rule) { + if (group == UINT32_MAX) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_GROUP, + NULL, + "group index not supported"); + *table = group + 1; + } else { + *table = group; + } + DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table); + return 0; +} + /** * Translate the rte_flow group index to HW table value. * - * @param[in] attributes - * Pointer to flow attributes - * @param[in] external - * Value is part of flow rule created by request external to PMD. + * If tunnel offload is disabled, all group ids converted to flow table + * id using the standard method. + * If tunnel offload is enabled, group id can be converted using the + * standard or tunnel conversion method. Group conversion method + * selection depends on flags in `grp_info` parameter: + * - Internal (grp_info.external == 0) groups conversion uses the + * standard method. + * - Group ids in JUMP action converted with the tunnel conversion. + * - Group id in rule attribute conversion depends on a rule type and + * group id value: + * ** non zero group attributes converted with the tunnel method + * ** zero group attribute in non-tunnel rule is converted using the + * standard method - there's only one root table + * ** zero group attribute in steer tunnel rule is converted with the + * standard method - single root table + * ** zero group attribute in match tunnel rule is a special OvS + * case: that value is used for portability reasons. That group + * id is converted with the tunnel conversion method. + * + * @param[in] dev + * Port device + * @param[in] tunnel + * PMD tunnel offload object * @param[in] group * rte_flow group index value. - * @param[out] fdb_def_rule - * Whether fdb jump to table 1 is configured. * @param[out] table * HW table value. + * @param[in] grp_info + * flags used for conversion * @param[out] error * Pointer to error structure. * @@ -6065,22 +7613,36 @@ mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh, * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external, - uint32_t group, bool fdb_def_rule, uint32_t *table, +mlx5_flow_group_to_table(struct rte_eth_dev *dev, + const struct mlx5_flow_tunnel *tunnel, + uint32_t group, uint32_t *table, + struct flow_grp_info grp_info, struct rte_flow_error *error) { - if (attributes->transfer && external && fdb_def_rule) { - if (group == UINT32_MAX) - return rte_flow_error_set - (error, EINVAL, - RTE_FLOW_ERROR_TYPE_ATTR_GROUP, - NULL, - "group index not supported"); - *table = group + 1; + int ret; + bool standard_translation; + + if (grp_info.external && group < MLX5_MAX_TABLES_EXTERNAL) + group *= MLX5_FLOW_TABLE_FACTOR; + if (is_tunnel_offload_active(dev)) { + standard_translation = !grp_info.external || + grp_info.std_tbl_fix; } else { - *table = group; + standard_translation = true; } - return 0; + DRV_LOG(DEBUG, + "port %u group=%#x transfer=%d external=%d fdb_def_rule=%d translate=%s", + dev->data->port_id, group, grp_info.transfer, + grp_info.external, grp_info.fdb_def_rule, + standard_translation ? "STANDARD" : "TUNNEL"); + if (standard_translation) + ret = flow_group_to_table(dev->data->port_id, group, table, + grp_info, error); + else + ret = tunnel_flow_group_to_flow_table(dev, tunnel, group, + table, error); + + return ret; } /** @@ -6154,7 +7716,7 @@ mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev) flow_list_destroy(dev, NULL, flow_idx); } for (; n < MLX5_MREG_C_NUM; ++n) - config->flow_mreg_c[n] = REG_NONE; + config->flow_mreg_c[n] = REG_NON; return 0; } @@ -6177,8 +7739,13 @@ mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow_error *error __rte_unused) { struct mlx5_priv *priv = dev->data->dev_private; - struct mlx5_ibv_shared *sh = priv->sh; + struct mlx5_dev_ctx_shared *sh = priv->sh; + if (!priv->config.dv_flow_en) { + if (fputs("device dv flow disabled\n", file) <= 0) + return -errno; + return -ENOTSUP; + } return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain, sh->tx_domain, file); } @@ -6219,3 +7786,416 @@ mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts, dev->data->port_id); return -ENOTSUP; } + +/* Wrapper for driver action_validate op callback */ +static int +flow_drv_action_validate(struct rte_eth_dev *dev, + const struct rte_flow_shared_action_conf *conf, + const struct rte_flow_action *action, + const struct mlx5_flow_driver_ops *fops, + struct rte_flow_error *error) +{ + static const char err_msg[] = "shared action validation unsupported"; + + if (!fops->action_validate) { + DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg); + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, + NULL, err_msg); + return -rte_errno; + } + return fops->action_validate(dev, conf, action, error); +} + +/** + * Destroys the shared action by handle. + * + * @param dev + * Pointer to Ethernet device structure. + * @param[in] action + * Handle for the shared action to be destroyed. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + * + * @note: wrapper for driver action_create op callback. + */ +static int +mlx5_shared_action_destroy(struct rte_eth_dev *dev, + struct rte_flow_shared_action *action, + struct rte_flow_error *error) +{ + static const char err_msg[] = "shared action destruction unsupported"; + struct rte_flow_attr attr = { .transfer = 0 }; + const struct mlx5_flow_driver_ops *fops = + flow_get_drv_ops(flow_get_drv_type(dev, &attr)); + + if (!fops->action_destroy) { + DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg); + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, + NULL, err_msg); + return -rte_errno; + } + return fops->action_destroy(dev, action, error); +} + +/* Wrapper for driver action_destroy op callback */ +static int +flow_drv_action_update(struct rte_eth_dev *dev, + struct rte_flow_shared_action *action, + const void *action_conf, + const struct mlx5_flow_driver_ops *fops, + struct rte_flow_error *error) +{ + static const char err_msg[] = "shared action update unsupported"; + + if (!fops->action_update) { + DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg); + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, + NULL, err_msg); + return -rte_errno; + } + return fops->action_update(dev, action, action_conf, error); +} + +/** + * Create shared action for reuse in multiple flow rules. + * + * @param dev + * Pointer to Ethernet device structure. + * @param[in] action + * Action configuration for shared action creation. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * @return + * A valid handle in case of success, NULL otherwise and rte_errno is set. + */ +static struct rte_flow_shared_action * +mlx5_shared_action_create(struct rte_eth_dev *dev, + const struct rte_flow_shared_action_conf *conf, + const struct rte_flow_action *action, + struct rte_flow_error *error) +{ + static const char err_msg[] = "shared action creation unsupported"; + struct rte_flow_attr attr = { .transfer = 0 }; + const struct mlx5_flow_driver_ops *fops = + flow_get_drv_ops(flow_get_drv_type(dev, &attr)); + + if (flow_drv_action_validate(dev, conf, action, fops, error)) + return NULL; + if (!fops->action_create) { + DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg); + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, + NULL, err_msg); + return NULL; + } + return fops->action_create(dev, conf, action, error); +} + +/** + * Updates inplace the shared action configuration pointed by *action* handle + * with the configuration provided as *action* argument. + * The update of the shared action configuration effects all flow rules reusing + * the action via handle. + * + * @param dev + * Pointer to Ethernet device structure. + * @param[in] shared_action + * Handle for the shared action to be updated. + * @param[in] action + * Action specification used to modify the action pointed by handle. + * *action* should be of same type with the action pointed by the *action* + * handle argument, otherwise considered as invalid. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_shared_action_update(struct rte_eth_dev *dev, + struct rte_flow_shared_action *shared_action, + const struct rte_flow_action *action, + struct rte_flow_error *error) +{ + struct rte_flow_attr attr = { .transfer = 0 }; + const struct mlx5_flow_driver_ops *fops = + flow_get_drv_ops(flow_get_drv_type(dev, &attr)); + int ret; + + switch (shared_action->type) { + case MLX5_RTE_FLOW_ACTION_TYPE_SHARED_RSS: + if (action->type != RTE_FLOW_ACTION_TYPE_RSS) { + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "update action type invalid"); + } + ret = flow_drv_action_validate(dev, NULL, action, fops, error); + if (ret) + return ret; + return flow_drv_action_update(dev, shared_action, action->conf, + fops, error); + default: + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "action type not supported"); + } +} + +/** + * Query the shared action by handle. + * + * This function allows retrieving action-specific data such as counters. + * Data is gathered by special action which may be present/referenced in + * more than one flow rule definition. + * + * \see RTE_FLOW_ACTION_TYPE_COUNT + * + * @param dev + * Pointer to Ethernet device structure. + * @param[in] action + * Handle for the shared action to query. + * @param[in, out] data + * Pointer to storage for the associated query data type. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_shared_action_query(struct rte_eth_dev *dev, + const struct rte_flow_shared_action *action, + void *data, + struct rte_flow_error *error) +{ + (void)dev; + switch (action->type) { + case MLX5_RTE_FLOW_ACTION_TYPE_SHARED_RSS: + __atomic_load(&action->refcnt, (uint32_t *)data, + __ATOMIC_RELAXED); + return 0; + default: + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "action type not supported"); + } +} + +/** + * Destroy all shared actions. + * + * @param dev + * Pointer to Ethernet device. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_shared_action_flush(struct rte_eth_dev *dev) +{ + struct rte_flow_error error; + struct mlx5_priv *priv = dev->data->dev_private; + struct rte_flow_shared_action *action; + int ret = 0; + + while (!LIST_EMPTY(&priv->shared_actions)) { + action = LIST_FIRST(&priv->shared_actions); + ret = mlx5_shared_action_destroy(dev, action, &error); + } + return ret; +} + +static void +mlx5_flow_tunnel_free(struct rte_eth_dev *dev, + struct mlx5_flow_tunnel *tunnel) +{ + struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); + struct mlx5_flow_id_pool *id_pool = thub->tunnel_ids; + + DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x", + dev->data->port_id, tunnel->tunnel_id); + RTE_VERIFY(!__atomic_load_n(&tunnel->refctn, __ATOMIC_RELAXED)); + LIST_REMOVE(tunnel, chain); + mlx5_flow_id_release(id_pool, tunnel->tunnel_id); + mlx5_hlist_destroy(tunnel->groups, NULL, NULL); + mlx5_free(tunnel); +} + +static struct mlx5_flow_tunnel * +mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id) +{ + struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); + struct mlx5_flow_tunnel *tun; + + LIST_FOREACH(tun, &thub->tunnels, chain) { + if (tun->tunnel_id == id) + break; + } + + return tun; +} + +static struct mlx5_flow_tunnel * +mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev, + const struct rte_flow_tunnel *app_tunnel) +{ + int ret; + struct mlx5_flow_tunnel *tunnel; + struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); + struct mlx5_flow_id_pool *id_pool = thub->tunnel_ids; + uint32_t id; + + ret = mlx5_flow_id_get(id_pool, &id); + if (ret) + return NULL; + /** + * mlx5 flow tunnel is an auxlilary data structure + * It's not part of IO. No need to allocate it from + * huge pages pools dedicated for IO + */ + tunnel = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*tunnel), + 0, SOCKET_ID_ANY); + if (!tunnel) { + mlx5_flow_id_pool_release(id_pool); + return NULL; + } + tunnel->groups = mlx5_hlist_create("tunnel groups", 1024); + if (!tunnel->groups) { + mlx5_flow_id_pool_release(id_pool); + mlx5_free(tunnel); + return NULL; + } + /* initiate new PMD tunnel */ + memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel)); + tunnel->tunnel_id = id; + tunnel->action.type = (typeof(tunnel->action.type)) + MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET; + tunnel->action.conf = tunnel; + tunnel->item.type = (typeof(tunnel->item.type)) + MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL; + tunnel->item.spec = tunnel; + tunnel->item.last = NULL; + tunnel->item.mask = NULL; + + DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x", + dev->data->port_id, tunnel->tunnel_id); + + return tunnel; +} + +static int +mlx5_get_flow_tunnel(struct rte_eth_dev *dev, + const struct rte_flow_tunnel *app_tunnel, + struct mlx5_flow_tunnel **tunnel) +{ + int ret; + struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev); + struct mlx5_flow_tunnel *tun; + + LIST_FOREACH(tun, &thub->tunnels, chain) { + if (!memcmp(app_tunnel, &tun->app_tunnel, + sizeof(*app_tunnel))) { + *tunnel = tun; + ret = 0; + break; + } + } + if (!tun) { + tun = mlx5_flow_tunnel_allocate(dev, app_tunnel); + if (tun) { + LIST_INSERT_HEAD(&thub->tunnels, tun, chain); + *tunnel = tun; + } else { + ret = -ENOMEM; + } + } + if (tun) + __atomic_add_fetch(&tun->refctn, 1, __ATOMIC_RELAXED); + + return ret; +} + +void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id) +{ + struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub; + + if (!thub) + return; + if (!LIST_EMPTY(&thub->tunnels)) + DRV_LOG(WARNING, "port %u tunnels present\n", port_id); + mlx5_flow_id_pool_release(thub->tunnel_ids); + mlx5_flow_id_pool_release(thub->table_ids); + mlx5_hlist_destroy(thub->groups, NULL, NULL); + mlx5_free(thub); +} + +int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh) +{ + int err; + struct mlx5_flow_tunnel_hub *thub; + + thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub), + 0, SOCKET_ID_ANY); + if (!thub) + return -ENOMEM; + LIST_INIT(&thub->tunnels); + thub->tunnel_ids = mlx5_flow_id_pool_alloc(MLX5_MAX_TUNNELS); + if (!thub->tunnel_ids) { + err = -rte_errno; + goto err; + } + thub->table_ids = mlx5_flow_id_pool_alloc(MLX5_MAX_TABLES); + if (!thub->table_ids) { + err = -rte_errno; + goto err; + } + thub->groups = mlx5_hlist_create("flow groups", MLX5_MAX_TABLES); + if (!thub->groups) { + err = -rte_errno; + goto err; + } + sh->tunnel_hub = thub; + + return 0; + +err: + if (thub->groups) + mlx5_hlist_destroy(thub->groups, NULL, NULL); + if (thub->table_ids) + mlx5_flow_id_pool_release(thub->table_ids); + if (thub->tunnel_ids) + mlx5_flow_id_pool_release(thub->tunnel_ids); + if (thub) + mlx5_free(thub); + return err; +} + +#ifndef HAVE_MLX5DV_DR +#define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1)) +#else +#define MLX5_DOMAIN_SYNC_FLOW \ + (MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW) +#endif + +int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct mlx5_flow_driver_ops *fops; + int ret; + struct rte_flow_attr attr = { .transfer = 0 }; + + fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr)); + ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW); + if (ret > 0) + ret = -ret; + return ret; +}