d5ed673891f27372df26997ad633d3060e00b5e1
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <stdalign.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <stdbool.h>
10 #include <sys/queue.h>
11
12 #include <rte_common.h>
13 #include <rte_ether.h>
14 #include <ethdev_driver.h>
15 #include <rte_eal_paging.h>
16 #include <rte_flow.h>
17 #include <rte_cycles.h>
18 #include <rte_flow_driver.h>
19 #include <rte_malloc.h>
20 #include <rte_ip.h>
21
22 #include <mlx5_glue.h>
23 #include <mlx5_devx_cmds.h>
24 #include <mlx5_prm.h>
25 #include <mlx5_malloc.h>
26
27 #include "mlx5_defs.h"
28 #include "mlx5.h"
29 #include "mlx5_flow.h"
30 #include "mlx5_flow_os.h"
31 #include "mlx5_rx.h"
32 #include "mlx5_tx.h"
33 #include "mlx5_common_os.h"
34 #include "rte_pmd_mlx5.h"
35
36 struct tunnel_default_miss_ctx {
37         uint16_t *queue;
38         __extension__
39         union {
40                 struct rte_flow_action_rss action_rss;
41                 struct rte_flow_action_queue miss_queue;
42                 struct rte_flow_action_jump miss_jump;
43                 uint8_t raw[0];
44         };
45 };
46
47 static int
48 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
49                              struct rte_flow *flow,
50                              const struct rte_flow_attr *attr,
51                              const struct rte_flow_action *app_actions,
52                              uint32_t flow_idx,
53                              const struct mlx5_flow_tunnel *tunnel,
54                              struct tunnel_default_miss_ctx *ctx,
55                              struct rte_flow_error *error);
56 static struct mlx5_flow_tunnel *
57 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id);
58 static void
59 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel);
60 static uint32_t
61 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
62                                 const struct mlx5_flow_tunnel *tunnel,
63                                 uint32_t group, uint32_t *table,
64                                 struct rte_flow_error *error);
65
66 static struct mlx5_flow_workspace *mlx5_flow_push_thread_workspace(void);
67 static void mlx5_flow_pop_thread_workspace(void);
68
69
70 /** Device flow drivers. */
71 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
72
73 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
74
75 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
76         [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
77 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
78         [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
79 #endif
80         [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
81         [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
82 };
83
84 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
85 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
86         (const int []){ \
87                 __VA_ARGS__, 0, \
88         }
89
90 /** Node object of input graph for mlx5_flow_expand_rss(). */
91 struct mlx5_flow_expand_node {
92         const int *const next;
93         /**<
94          * List of next node indexes. Index 0 is interpreted as a terminator.
95          */
96         const enum rte_flow_item_type type;
97         /**< Pattern item type of current node. */
98         uint64_t rss_types;
99         /**<
100          * RSS types bit-field associated with this node
101          * (see RTE_ETH_RSS_* definitions).
102          */
103         uint64_t node_flags;
104         /**<
105          *  Bit-fields that define how the node is used in the expansion.
106          * (see MLX5_EXPANSION_NODE_* definitions).
107          */
108 };
109
110 /* Optional expand field. The expansion alg will not go deeper. */
111 #define MLX5_EXPANSION_NODE_OPTIONAL (UINT64_C(1) << 0)
112
113 /* The node is not added implicitly as expansion to the flow pattern.
114  * If the node type does not match the flow pattern item type, the
115  * expansion alg will go deeper to its next items.
116  * In the current implementation, the list of next nodes indexes can
117  * have up to one node with this flag set and it has to be the last
118  * node index (before the list terminator).
119  */
120 #define MLX5_EXPANSION_NODE_EXPLICIT (UINT64_C(1) << 1)
121
122 /** Object returned by mlx5_flow_expand_rss(). */
123 struct mlx5_flow_expand_rss {
124         uint32_t entries;
125         /**< Number of entries @p patterns and @p priorities. */
126         struct {
127                 struct rte_flow_item *pattern; /**< Expanded pattern array. */
128                 uint32_t priority; /**< Priority offset for each expansion. */
129         } entry[];
130 };
131
132 static void
133 mlx5_dbg__print_pattern(const struct rte_flow_item *item);
134
135 static const struct mlx5_flow_expand_node *
136 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
137                 unsigned int item_idx,
138                 const struct mlx5_flow_expand_node graph[],
139                 const struct mlx5_flow_expand_node *node);
140
141 static bool
142 mlx5_flow_is_rss_expandable_item(const struct rte_flow_item *item)
143 {
144         switch (item->type) {
145         case RTE_FLOW_ITEM_TYPE_ETH:
146         case RTE_FLOW_ITEM_TYPE_VLAN:
147         case RTE_FLOW_ITEM_TYPE_IPV4:
148         case RTE_FLOW_ITEM_TYPE_IPV6:
149         case RTE_FLOW_ITEM_TYPE_UDP:
150         case RTE_FLOW_ITEM_TYPE_TCP:
151         case RTE_FLOW_ITEM_TYPE_VXLAN:
152         case RTE_FLOW_ITEM_TYPE_NVGRE:
153         case RTE_FLOW_ITEM_TYPE_GRE:
154         case RTE_FLOW_ITEM_TYPE_GENEVE:
155         case RTE_FLOW_ITEM_TYPE_MPLS:
156         case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
157         case RTE_FLOW_ITEM_TYPE_GRE_KEY:
158         case RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT:
159         case RTE_FLOW_ITEM_TYPE_GTP:
160                 return true;
161         default:
162                 break;
163         }
164         return false;
165 }
166
167 static enum rte_flow_item_type
168 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
169 {
170         enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID;
171         uint16_t ether_type = 0;
172         uint16_t ether_type_m;
173         uint8_t ip_next_proto = 0;
174         uint8_t ip_next_proto_m;
175
176         if (item == NULL || item->spec == NULL)
177                 return ret;
178         switch (item->type) {
179         case RTE_FLOW_ITEM_TYPE_ETH:
180                 if (item->mask)
181                         ether_type_m = ((const struct rte_flow_item_eth *)
182                                                 (item->mask))->type;
183                 else
184                         ether_type_m = rte_flow_item_eth_mask.type;
185                 if (ether_type_m != RTE_BE16(0xFFFF))
186                         break;
187                 ether_type = ((const struct rte_flow_item_eth *)
188                                 (item->spec))->type;
189                 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
190                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
191                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
192                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
193                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
194                         ret = RTE_FLOW_ITEM_TYPE_VLAN;
195                 else
196                         ret = RTE_FLOW_ITEM_TYPE_END;
197                 break;
198         case RTE_FLOW_ITEM_TYPE_VLAN:
199                 if (item->mask)
200                         ether_type_m = ((const struct rte_flow_item_vlan *)
201                                                 (item->mask))->inner_type;
202                 else
203                         ether_type_m = rte_flow_item_vlan_mask.inner_type;
204                 if (ether_type_m != RTE_BE16(0xFFFF))
205                         break;
206                 ether_type = ((const struct rte_flow_item_vlan *)
207                                 (item->spec))->inner_type;
208                 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
209                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
210                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
211                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
212                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
213                         ret = RTE_FLOW_ITEM_TYPE_VLAN;
214                 else
215                         ret = RTE_FLOW_ITEM_TYPE_END;
216                 break;
217         case RTE_FLOW_ITEM_TYPE_IPV4:
218                 if (item->mask)
219                         ip_next_proto_m = ((const struct rte_flow_item_ipv4 *)
220                                         (item->mask))->hdr.next_proto_id;
221                 else
222                         ip_next_proto_m =
223                                 rte_flow_item_ipv4_mask.hdr.next_proto_id;
224                 if (ip_next_proto_m != 0xFF)
225                         break;
226                 ip_next_proto = ((const struct rte_flow_item_ipv4 *)
227                                 (item->spec))->hdr.next_proto_id;
228                 if (ip_next_proto == IPPROTO_UDP)
229                         ret = RTE_FLOW_ITEM_TYPE_UDP;
230                 else if (ip_next_proto == IPPROTO_TCP)
231                         ret = RTE_FLOW_ITEM_TYPE_TCP;
232                 else if (ip_next_proto == IPPROTO_IP)
233                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
234                 else if (ip_next_proto == IPPROTO_IPV6)
235                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
236                 else
237                         ret = RTE_FLOW_ITEM_TYPE_END;
238                 break;
239         case RTE_FLOW_ITEM_TYPE_IPV6:
240                 if (item->mask)
241                         ip_next_proto_m = ((const struct rte_flow_item_ipv6 *)
242                                                 (item->mask))->hdr.proto;
243                 else
244                         ip_next_proto_m =
245                                 rte_flow_item_ipv6_mask.hdr.proto;
246                 if (ip_next_proto_m != 0xFF)
247                         break;
248                 ip_next_proto = ((const struct rte_flow_item_ipv6 *)
249                                 (item->spec))->hdr.proto;
250                 if (ip_next_proto == IPPROTO_UDP)
251                         ret = RTE_FLOW_ITEM_TYPE_UDP;
252                 else if (ip_next_proto == IPPROTO_TCP)
253                         ret = RTE_FLOW_ITEM_TYPE_TCP;
254                 else if (ip_next_proto == IPPROTO_IP)
255                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
256                 else if (ip_next_proto == IPPROTO_IPV6)
257                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
258                 else
259                         ret = RTE_FLOW_ITEM_TYPE_END;
260                 break;
261         default:
262                 ret = RTE_FLOW_ITEM_TYPE_VOID;
263                 break;
264         }
265         return ret;
266 }
267
268 static const int *
269 mlx5_flow_expand_rss_skip_explicit(const struct mlx5_flow_expand_node graph[],
270                 const int *next_node)
271 {
272         const struct mlx5_flow_expand_node *node = NULL;
273         const int *next = next_node;
274
275         while (next && *next) {
276                 /*
277                  * Skip the nodes with the MLX5_EXPANSION_NODE_EXPLICIT
278                  * flag set, because they were not found in the flow pattern.
279                  */
280                 node = &graph[*next];
281                 if (!(node->node_flags & MLX5_EXPANSION_NODE_EXPLICIT))
282                         break;
283                 next = node->next;
284         }
285         return next;
286 }
287
288 #define MLX5_RSS_EXP_ELT_N 16
289
290 /**
291  * Expand RSS flows into several possible flows according to the RSS hash
292  * fields requested and the driver capabilities.
293  *
294  * @param[out] buf
295  *   Buffer to store the result expansion.
296  * @param[in] size
297  *   Buffer size in bytes. If 0, @p buf can be NULL.
298  * @param[in] pattern
299  *   User flow pattern.
300  * @param[in] types
301  *   RSS types to expand (see RTE_ETH_RSS_* definitions).
302  * @param[in] graph
303  *   Input graph to expand @p pattern according to @p types.
304  * @param[in] graph_root_index
305  *   Index of root node in @p graph, typically 0.
306  *
307  * @return
308  *   A positive value representing the size of @p buf in bytes regardless of
309  *   @p size on success, a negative errno value otherwise and rte_errno is
310  *   set, the following errors are defined:
311  *
312  *   -E2BIG: graph-depth @p graph is too deep.
313  *   -EINVAL: @p size has not enough space for expanded pattern.
314  */
315 static int
316 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
317                      const struct rte_flow_item *pattern, uint64_t types,
318                      const struct mlx5_flow_expand_node graph[],
319                      int graph_root_index)
320 {
321         const struct rte_flow_item *item;
322         const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
323         const int *next_node;
324         const int *stack[MLX5_RSS_EXP_ELT_N];
325         int stack_pos = 0;
326         struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N];
327         unsigned int i, item_idx, last_expand_item_idx = 0;
328         size_t lsize;
329         size_t user_pattern_size = 0;
330         void *addr = NULL;
331         const struct mlx5_flow_expand_node *next = NULL;
332         struct rte_flow_item missed_item;
333         int missed = 0;
334         int elt = 0;
335         const struct rte_flow_item *last_expand_item = NULL;
336
337         memset(&missed_item, 0, sizeof(missed_item));
338         lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
339                 MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]);
340         if (lsize > size)
341                 return -EINVAL;
342         buf->entry[0].priority = 0;
343         buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N];
344         buf->entries = 0;
345         addr = buf->entry[0].pattern;
346         for (item = pattern, item_idx = 0;
347                         item->type != RTE_FLOW_ITEM_TYPE_END;
348                         item++, item_idx++) {
349                 if (!mlx5_flow_is_rss_expandable_item(item)) {
350                         user_pattern_size += sizeof(*item);
351                         continue;
352                 }
353                 last_expand_item = item;
354                 last_expand_item_idx = item_idx;
355                 i = 0;
356                 while (node->next && node->next[i]) {
357                         next = &graph[node->next[i]];
358                         if (next->type == item->type)
359                                 break;
360                         if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
361                                 node = next;
362                                 i = 0;
363                         } else {
364                                 ++i;
365                         }
366                 }
367                 if (next)
368                         node = next;
369                 user_pattern_size += sizeof(*item);
370         }
371         user_pattern_size += sizeof(*item); /* Handle END item. */
372         lsize += user_pattern_size;
373         if (lsize > size)
374                 return -EINVAL;
375         /* Copy the user pattern in the first entry of the buffer. */
376         rte_memcpy(addr, pattern, user_pattern_size);
377         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
378         buf->entries = 1;
379         /* Start expanding. */
380         memset(flow_items, 0, sizeof(flow_items));
381         user_pattern_size -= sizeof(*item);
382         /*
383          * Check if the last valid item has spec set, need complete pattern,
384          * and the pattern can be used for expansion.
385          */
386         missed_item.type = mlx5_flow_expand_rss_item_complete(last_expand_item);
387         if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
388                 /* Item type END indicates expansion is not required. */
389                 return lsize;
390         }
391         if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
392                 next = NULL;
393                 missed = 1;
394                 for (i = 0; node->next && node->next[i]; ++i) {
395                         next = &graph[node->next[i]];
396                         if (next->type == missed_item.type) {
397                                 flow_items[0].type = missed_item.type;
398                                 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
399                                 break;
400                         }
401                         next = NULL;
402                 }
403         }
404         if (next && missed) {
405                 elt = 2; /* missed item + item end. */
406                 node = next;
407                 lsize += elt * sizeof(*item) + user_pattern_size;
408                 if (lsize > size)
409                         return -EINVAL;
410                 if (node->rss_types & types) {
411                         buf->entry[buf->entries].priority = 1;
412                         buf->entry[buf->entries].pattern = addr;
413                         buf->entries++;
414                         rte_memcpy(addr, buf->entry[0].pattern,
415                                    user_pattern_size);
416                         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
417                         rte_memcpy(addr, flow_items, elt * sizeof(*item));
418                         addr = (void *)(((uintptr_t)addr) +
419                                         elt * sizeof(*item));
420                 }
421         } else if (last_expand_item != NULL) {
422                 node = mlx5_flow_expand_rss_adjust_node(pattern,
423                                 last_expand_item_idx, graph, node);
424         }
425         memset(flow_items, 0, sizeof(flow_items));
426         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
427                         node->next);
428         stack[stack_pos] = next_node;
429         node = next_node ? &graph[*next_node] : NULL;
430         while (node) {
431                 flow_items[stack_pos].type = node->type;
432                 if (node->rss_types & types) {
433                         size_t n;
434                         /*
435                          * compute the number of items to copy from the
436                          * expansion and copy it.
437                          * When the stack_pos is 0, there are 1 element in it,
438                          * plus the addition END item.
439                          */
440                         elt = stack_pos + 2;
441                         flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
442                         lsize += elt * sizeof(*item) + user_pattern_size;
443                         if (lsize > size)
444                                 return -EINVAL;
445                         n = elt * sizeof(*item);
446                         buf->entry[buf->entries].priority =
447                                 stack_pos + 1 + missed;
448                         buf->entry[buf->entries].pattern = addr;
449                         buf->entries++;
450                         rte_memcpy(addr, buf->entry[0].pattern,
451                                    user_pattern_size);
452                         addr = (void *)(((uintptr_t)addr) +
453                                         user_pattern_size);
454                         rte_memcpy(addr, &missed_item,
455                                    missed * sizeof(*item));
456                         addr = (void *)(((uintptr_t)addr) +
457                                 missed * sizeof(*item));
458                         rte_memcpy(addr, flow_items, n);
459                         addr = (void *)(((uintptr_t)addr) + n);
460                 }
461                 /* Go deeper. */
462                 if (!(node->node_flags & MLX5_EXPANSION_NODE_OPTIONAL) &&
463                                 node->next) {
464                         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
465                                         node->next);
466                         if (stack_pos++ == MLX5_RSS_EXP_ELT_N) {
467                                 rte_errno = E2BIG;
468                                 return -rte_errno;
469                         }
470                         stack[stack_pos] = next_node;
471                 } else if (*(next_node + 1)) {
472                         /* Follow up with the next possibility. */
473                         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
474                                         ++next_node);
475                 } else if (!stack_pos) {
476                         /*
477                          * Completing the traverse over the different paths.
478                          * The next_node is advanced to the terminator.
479                          */
480                         ++next_node;
481                 } else {
482                         /* Move to the next path. */
483                         while (stack_pos) {
484                                 next_node = stack[--stack_pos];
485                                 next_node++;
486                                 if (*next_node)
487                                         break;
488                         }
489                         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
490                                         next_node);
491                         stack[stack_pos] = next_node;
492                 }
493                 node = next_node && *next_node ? &graph[*next_node] : NULL;
494         };
495         return lsize;
496 }
497
498 enum mlx5_expansion {
499         MLX5_EXPANSION_ROOT,
500         MLX5_EXPANSION_ROOT_OUTER,
501         MLX5_EXPANSION_OUTER_ETH,
502         MLX5_EXPANSION_OUTER_VLAN,
503         MLX5_EXPANSION_OUTER_IPV4,
504         MLX5_EXPANSION_OUTER_IPV4_UDP,
505         MLX5_EXPANSION_OUTER_IPV4_TCP,
506         MLX5_EXPANSION_OUTER_IPV6,
507         MLX5_EXPANSION_OUTER_IPV6_UDP,
508         MLX5_EXPANSION_OUTER_IPV6_TCP,
509         MLX5_EXPANSION_VXLAN,
510         MLX5_EXPANSION_STD_VXLAN,
511         MLX5_EXPANSION_L3_VXLAN,
512         MLX5_EXPANSION_VXLAN_GPE,
513         MLX5_EXPANSION_GRE,
514         MLX5_EXPANSION_NVGRE,
515         MLX5_EXPANSION_GRE_KEY,
516         MLX5_EXPANSION_MPLS,
517         MLX5_EXPANSION_ETH,
518         MLX5_EXPANSION_VLAN,
519         MLX5_EXPANSION_IPV4,
520         MLX5_EXPANSION_IPV4_UDP,
521         MLX5_EXPANSION_IPV4_TCP,
522         MLX5_EXPANSION_IPV6,
523         MLX5_EXPANSION_IPV6_UDP,
524         MLX5_EXPANSION_IPV6_TCP,
525         MLX5_EXPANSION_IPV6_FRAG_EXT,
526         MLX5_EXPANSION_GTP
527 };
528
529 /** Supported expansion of items. */
530 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
531         [MLX5_EXPANSION_ROOT] = {
532                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
533                                                   MLX5_EXPANSION_IPV4,
534                                                   MLX5_EXPANSION_IPV6),
535                 .type = RTE_FLOW_ITEM_TYPE_END,
536         },
537         [MLX5_EXPANSION_ROOT_OUTER] = {
538                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
539                                                   MLX5_EXPANSION_OUTER_IPV4,
540                                                   MLX5_EXPANSION_OUTER_IPV6),
541                 .type = RTE_FLOW_ITEM_TYPE_END,
542         },
543         [MLX5_EXPANSION_OUTER_ETH] = {
544                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
545                 .type = RTE_FLOW_ITEM_TYPE_ETH,
546                 .rss_types = 0,
547         },
548         [MLX5_EXPANSION_OUTER_VLAN] = {
549                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
550                                                   MLX5_EXPANSION_OUTER_IPV6),
551                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
552                 .node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
553         },
554         [MLX5_EXPANSION_OUTER_IPV4] = {
555                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
556                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
557                          MLX5_EXPANSION_OUTER_IPV4_TCP,
558                          MLX5_EXPANSION_GRE,
559                          MLX5_EXPANSION_NVGRE,
560                          MLX5_EXPANSION_IPV4,
561                          MLX5_EXPANSION_IPV6),
562                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
563                 .rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
564                         RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
565         },
566         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
567                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
568                                                   MLX5_EXPANSION_VXLAN_GPE,
569                                                   MLX5_EXPANSION_MPLS,
570                                                   MLX5_EXPANSION_GTP),
571                 .type = RTE_FLOW_ITEM_TYPE_UDP,
572                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
573         },
574         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
575                 .type = RTE_FLOW_ITEM_TYPE_TCP,
576                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
577         },
578         [MLX5_EXPANSION_OUTER_IPV6] = {
579                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
580                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
581                          MLX5_EXPANSION_OUTER_IPV6_TCP,
582                          MLX5_EXPANSION_IPV4,
583                          MLX5_EXPANSION_IPV6,
584                          MLX5_EXPANSION_GRE,
585                          MLX5_EXPANSION_NVGRE),
586                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
587                 .rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
588                         RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
589         },
590         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
591                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
592                                                   MLX5_EXPANSION_VXLAN_GPE,
593                                                   MLX5_EXPANSION_MPLS,
594                                                   MLX5_EXPANSION_GTP),
595                 .type = RTE_FLOW_ITEM_TYPE_UDP,
596                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
597         },
598         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
599                 .type = RTE_FLOW_ITEM_TYPE_TCP,
600                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
601         },
602         [MLX5_EXPANSION_VXLAN] = {
603                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
604                                                   MLX5_EXPANSION_IPV4,
605                                                   MLX5_EXPANSION_IPV6),
606                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
607         },
608         [MLX5_EXPANSION_STD_VXLAN] = {
609                         .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
610                                         .type = RTE_FLOW_ITEM_TYPE_VXLAN,
611         },
612         [MLX5_EXPANSION_L3_VXLAN] = {
613                         .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
614                                         MLX5_EXPANSION_IPV6),
615                                         .type = RTE_FLOW_ITEM_TYPE_VXLAN,
616         },
617         [MLX5_EXPANSION_VXLAN_GPE] = {
618                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
619                                                   MLX5_EXPANSION_IPV4,
620                                                   MLX5_EXPANSION_IPV6),
621                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
622         },
623         [MLX5_EXPANSION_GRE] = {
624                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
625                                                   MLX5_EXPANSION_IPV6,
626                                                   MLX5_EXPANSION_GRE_KEY,
627                                                   MLX5_EXPANSION_MPLS),
628                 .type = RTE_FLOW_ITEM_TYPE_GRE,
629         },
630         [MLX5_EXPANSION_GRE_KEY] = {
631                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
632                                                   MLX5_EXPANSION_IPV6,
633                                                   MLX5_EXPANSION_MPLS),
634                 .type = RTE_FLOW_ITEM_TYPE_GRE_KEY,
635                 .node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
636         },
637         [MLX5_EXPANSION_NVGRE] = {
638                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
639                 .type = RTE_FLOW_ITEM_TYPE_NVGRE,
640         },
641         [MLX5_EXPANSION_MPLS] = {
642                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
643                                                   MLX5_EXPANSION_IPV6,
644                                                   MLX5_EXPANSION_ETH),
645                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
646                 .node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
647         },
648         [MLX5_EXPANSION_ETH] = {
649                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
650                 .type = RTE_FLOW_ITEM_TYPE_ETH,
651         },
652         [MLX5_EXPANSION_VLAN] = {
653                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
654                                                   MLX5_EXPANSION_IPV6),
655                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
656                 .node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
657         },
658         [MLX5_EXPANSION_IPV4] = {
659                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
660                                                   MLX5_EXPANSION_IPV4_TCP),
661                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
662                 .rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
663                         RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
664         },
665         [MLX5_EXPANSION_IPV4_UDP] = {
666                 .type = RTE_FLOW_ITEM_TYPE_UDP,
667                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
668         },
669         [MLX5_EXPANSION_IPV4_TCP] = {
670                 .type = RTE_FLOW_ITEM_TYPE_TCP,
671                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
672         },
673         [MLX5_EXPANSION_IPV6] = {
674                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
675                                                   MLX5_EXPANSION_IPV6_TCP,
676                                                   MLX5_EXPANSION_IPV6_FRAG_EXT),
677                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
678                 .rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
679                         RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
680         },
681         [MLX5_EXPANSION_IPV6_UDP] = {
682                 .type = RTE_FLOW_ITEM_TYPE_UDP,
683                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
684         },
685         [MLX5_EXPANSION_IPV6_TCP] = {
686                 .type = RTE_FLOW_ITEM_TYPE_TCP,
687                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
688         },
689         [MLX5_EXPANSION_IPV6_FRAG_EXT] = {
690                 .type = RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
691         },
692         [MLX5_EXPANSION_GTP] = {
693                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
694                                                   MLX5_EXPANSION_IPV6),
695                 .type = RTE_FLOW_ITEM_TYPE_GTP,
696         },
697 };
698
699 static struct rte_flow_action_handle *
700 mlx5_action_handle_create(struct rte_eth_dev *dev,
701                           const struct rte_flow_indir_action_conf *conf,
702                           const struct rte_flow_action *action,
703                           struct rte_flow_error *error);
704 static int mlx5_action_handle_destroy
705                                 (struct rte_eth_dev *dev,
706                                  struct rte_flow_action_handle *handle,
707                                  struct rte_flow_error *error);
708 static int mlx5_action_handle_update
709                                 (struct rte_eth_dev *dev,
710                                  struct rte_flow_action_handle *handle,
711                                  const void *update,
712                                  struct rte_flow_error *error);
713 static int mlx5_action_handle_query
714                                 (struct rte_eth_dev *dev,
715                                  const struct rte_flow_action_handle *handle,
716                                  void *data,
717                                  struct rte_flow_error *error);
718 static int
719 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
720                     struct rte_flow_tunnel *app_tunnel,
721                     struct rte_flow_action **actions,
722                     uint32_t *num_of_actions,
723                     struct rte_flow_error *error);
724 static int
725 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
726                        struct rte_flow_tunnel *app_tunnel,
727                        struct rte_flow_item **items,
728                        uint32_t *num_of_items,
729                        struct rte_flow_error *error);
730 static int
731 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
732                               struct rte_flow_item *pmd_items,
733                               uint32_t num_items, struct rte_flow_error *err);
734 static int
735 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
736                                 struct rte_flow_action *pmd_actions,
737                                 uint32_t num_actions,
738                                 struct rte_flow_error *err);
739 static int
740 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
741                                   struct rte_mbuf *m,
742                                   struct rte_flow_restore_info *info,
743                                   struct rte_flow_error *err);
744
745 static const struct rte_flow_ops mlx5_flow_ops = {
746         .validate = mlx5_flow_validate,
747         .create = mlx5_flow_create,
748         .destroy = mlx5_flow_destroy,
749         .flush = mlx5_flow_flush,
750         .isolate = mlx5_flow_isolate,
751         .query = mlx5_flow_query,
752         .dev_dump = mlx5_flow_dev_dump,
753         .get_aged_flows = mlx5_flow_get_aged_flows,
754         .action_handle_create = mlx5_action_handle_create,
755         .action_handle_destroy = mlx5_action_handle_destroy,
756         .action_handle_update = mlx5_action_handle_update,
757         .action_handle_query = mlx5_action_handle_query,
758         .tunnel_decap_set = mlx5_flow_tunnel_decap_set,
759         .tunnel_match = mlx5_flow_tunnel_match,
760         .tunnel_action_decap_release = mlx5_flow_tunnel_action_release,
761         .tunnel_item_release = mlx5_flow_tunnel_item_release,
762         .get_restore_info = mlx5_flow_tunnel_get_restore_info,
763 };
764
765 /* Tunnel information. */
766 struct mlx5_flow_tunnel_info {
767         uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
768         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
769 };
770
771 static struct mlx5_flow_tunnel_info tunnels_info[] = {
772         {
773                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
774                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
775         },
776         {
777                 .tunnel = MLX5_FLOW_LAYER_GENEVE,
778                 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
779         },
780         {
781                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
782                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
783         },
784         {
785                 .tunnel = MLX5_FLOW_LAYER_GRE,
786                 .ptype = RTE_PTYPE_TUNNEL_GRE,
787         },
788         {
789                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
790                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
791         },
792         {
793                 .tunnel = MLX5_FLOW_LAYER_MPLS,
794                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
795         },
796         {
797                 .tunnel = MLX5_FLOW_LAYER_NVGRE,
798                 .ptype = RTE_PTYPE_TUNNEL_NVGRE,
799         },
800         {
801                 .tunnel = MLX5_FLOW_LAYER_IPIP,
802                 .ptype = RTE_PTYPE_TUNNEL_IP,
803         },
804         {
805                 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
806                 .ptype = RTE_PTYPE_TUNNEL_IP,
807         },
808         {
809                 .tunnel = MLX5_FLOW_LAYER_GTP,
810                 .ptype = RTE_PTYPE_TUNNEL_GTPU,
811         },
812 };
813
814
815
816 /**
817  * Translate tag ID to register.
818  *
819  * @param[in] dev
820  *   Pointer to the Ethernet device structure.
821  * @param[in] feature
822  *   The feature that request the register.
823  * @param[in] id
824  *   The request register ID.
825  * @param[out] error
826  *   Error description in case of any.
827  *
828  * @return
829  *   The request register on success, a negative errno
830  *   value otherwise and rte_errno is set.
831  */
832 int
833 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
834                      enum mlx5_feature_name feature,
835                      uint32_t id,
836                      struct rte_flow_error *error)
837 {
838         struct mlx5_priv *priv = dev->data->dev_private;
839         struct mlx5_dev_config *config = &priv->config;
840         enum modify_reg start_reg;
841         bool skip_mtr_reg = false;
842
843         switch (feature) {
844         case MLX5_HAIRPIN_RX:
845                 return REG_B;
846         case MLX5_HAIRPIN_TX:
847                 return REG_A;
848         case MLX5_METADATA_RX:
849                 switch (config->dv_xmeta_en) {
850                 case MLX5_XMETA_MODE_LEGACY:
851                         return REG_B;
852                 case MLX5_XMETA_MODE_META16:
853                         return REG_C_0;
854                 case MLX5_XMETA_MODE_META32:
855                         return REG_C_1;
856                 }
857                 break;
858         case MLX5_METADATA_TX:
859                 return REG_A;
860         case MLX5_METADATA_FDB:
861                 switch (config->dv_xmeta_en) {
862                 case MLX5_XMETA_MODE_LEGACY:
863                         return REG_NON;
864                 case MLX5_XMETA_MODE_META16:
865                         return REG_C_0;
866                 case MLX5_XMETA_MODE_META32:
867                         return REG_C_1;
868                 }
869                 break;
870         case MLX5_FLOW_MARK:
871                 switch (config->dv_xmeta_en) {
872                 case MLX5_XMETA_MODE_LEGACY:
873                         return REG_NON;
874                 case MLX5_XMETA_MODE_META16:
875                         return REG_C_1;
876                 case MLX5_XMETA_MODE_META32:
877                         return REG_C_0;
878                 }
879                 break;
880         case MLX5_MTR_ID:
881                 /*
882                  * If meter color and meter id share one register, flow match
883                  * should use the meter color register for match.
884                  */
885                 if (priv->mtr_reg_share)
886                         return priv->mtr_color_reg;
887                 else
888                         return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
889                                REG_C_3;
890         case MLX5_MTR_COLOR:
891         case MLX5_ASO_FLOW_HIT:
892         case MLX5_ASO_CONNTRACK:
893                 /* All features use the same REG_C. */
894                 MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
895                 return priv->mtr_color_reg;
896         case MLX5_COPY_MARK:
897                 /*
898                  * Metadata COPY_MARK register using is in meter suffix sub
899                  * flow while with meter. It's safe to share the same register.
900                  */
901                 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
902         case MLX5_APP_TAG:
903                 /*
904                  * If meter is enable, it will engage the register for color
905                  * match and flow match. If meter color match is not using the
906                  * REG_C_2, need to skip the REG_C_x be used by meter color
907                  * match.
908                  * If meter is disable, free to use all available registers.
909                  */
910                 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
911                             (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
912                 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
913                 if (id > (uint32_t)(REG_C_7 - start_reg))
914                         return rte_flow_error_set(error, EINVAL,
915                                                   RTE_FLOW_ERROR_TYPE_ITEM,
916                                                   NULL, "invalid tag id");
917                 if (priv->sh->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
918                         return rte_flow_error_set(error, ENOTSUP,
919                                                   RTE_FLOW_ERROR_TYPE_ITEM,
920                                                   NULL, "unsupported tag id");
921                 /*
922                  * This case means meter is using the REG_C_x great than 2.
923                  * Take care not to conflict with meter color REG_C_x.
924                  * If the available index REG_C_y >= REG_C_x, skip the
925                  * color register.
926                  */
927                 if (skip_mtr_reg && priv->sh->flow_mreg_c
928                     [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
929                         if (id >= (uint32_t)(REG_C_7 - start_reg))
930                                 return rte_flow_error_set(error, EINVAL,
931                                                        RTE_FLOW_ERROR_TYPE_ITEM,
932                                                         NULL, "invalid tag id");
933                         if (priv->sh->flow_mreg_c
934                             [id + 1 + start_reg - REG_C_0] != REG_NON)
935                                 return priv->sh->flow_mreg_c
936                                                [id + 1 + start_reg - REG_C_0];
937                         return rte_flow_error_set(error, ENOTSUP,
938                                                   RTE_FLOW_ERROR_TYPE_ITEM,
939                                                   NULL, "unsupported tag id");
940                 }
941                 return priv->sh->flow_mreg_c[id + start_reg - REG_C_0];
942         }
943         MLX5_ASSERT(false);
944         return rte_flow_error_set(error, EINVAL,
945                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
946                                   NULL, "invalid feature name");
947 }
948
949 /**
950  * Check extensive flow metadata register support.
951  *
952  * @param dev
953  *   Pointer to rte_eth_dev structure.
954  *
955  * @return
956  *   True if device supports extensive flow metadata register, otherwise false.
957  */
958 bool
959 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
960 {
961         struct mlx5_priv *priv = dev->data->dev_private;
962
963         /*
964          * Having available reg_c can be regarded inclusively as supporting
965          * extensive flow metadata register, which could mean,
966          * - metadata register copy action by modify header.
967          * - 16 modify header actions is supported.
968          * - reg_c's are preserved across different domain (FDB and NIC) on
969          *   packet loopback by flow lookup miss.
970          */
971         return priv->sh->flow_mreg_c[2] != REG_NON;
972 }
973
974 /**
975  * Get the lowest priority.
976  *
977  * @param[in] dev
978  *   Pointer to the Ethernet device structure.
979  * @param[in] attributes
980  *   Pointer to device flow rule attributes.
981  *
982  * @return
983  *   The value of lowest priority of flow.
984  */
985 uint32_t
986 mlx5_get_lowest_priority(struct rte_eth_dev *dev,
987                           const struct rte_flow_attr *attr)
988 {
989         struct mlx5_priv *priv = dev->data->dev_private;
990
991         if (!attr->group && !attr->transfer)
992                 return priv->sh->flow_max_priority - 2;
993         return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1;
994 }
995
996 /**
997  * Calculate matcher priority of the flow.
998  *
999  * @param[in] dev
1000  *   Pointer to the Ethernet device structure.
1001  * @param[in] attr
1002  *   Pointer to device flow rule attributes.
1003  * @param[in] subpriority
1004  *   The priority based on the items.
1005  * @param[in] external
1006  *   Flow is user flow.
1007  * @return
1008  *   The matcher priority of the flow.
1009  */
1010 uint16_t
1011 mlx5_get_matcher_priority(struct rte_eth_dev *dev,
1012                           const struct rte_flow_attr *attr,
1013                           uint32_t subpriority, bool external)
1014 {
1015         uint16_t priority = (uint16_t)attr->priority;
1016         struct mlx5_priv *priv = dev->data->dev_private;
1017
1018         if (!attr->group && !attr->transfer) {
1019                 if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1020                         priority = priv->sh->flow_max_priority - 1;
1021                 return mlx5_os_flow_adjust_priority(dev, priority, subpriority);
1022         } else if (!external && attr->transfer && attr->group == 0 &&
1023                    attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR) {
1024                 return (priv->sh->flow_max_priority - 1) * 3;
1025         }
1026         if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1027                 priority = MLX5_NON_ROOT_FLOW_MAX_PRIO;
1028         return priority * 3 + subpriority;
1029 }
1030
1031 /**
1032  * Verify the @p item specifications (spec, last, mask) are compatible with the
1033  * NIC capabilities.
1034  *
1035  * @param[in] item
1036  *   Item specification.
1037  * @param[in] mask
1038  *   @p item->mask or flow default bit-masks.
1039  * @param[in] nic_mask
1040  *   Bit-masks covering supported fields by the NIC to compare with user mask.
1041  * @param[in] size
1042  *   Bit-masks size in bytes.
1043  * @param[in] range_accepted
1044  *   True if range of values is accepted for specific fields, false otherwise.
1045  * @param[out] error
1046  *   Pointer to error structure.
1047  *
1048  * @return
1049  *   0 on success, a negative errno value otherwise and rte_errno is set.
1050  */
1051 int
1052 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
1053                           const uint8_t *mask,
1054                           const uint8_t *nic_mask,
1055                           unsigned int size,
1056                           bool range_accepted,
1057                           struct rte_flow_error *error)
1058 {
1059         unsigned int i;
1060
1061         MLX5_ASSERT(nic_mask);
1062         for (i = 0; i < size; ++i)
1063                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
1064                         return rte_flow_error_set(error, ENOTSUP,
1065                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1066                                                   item,
1067                                                   "mask enables non supported"
1068                                                   " bits");
1069         if (!item->spec && (item->mask || item->last))
1070                 return rte_flow_error_set(error, EINVAL,
1071                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1072                                           "mask/last without a spec is not"
1073                                           " supported");
1074         if (item->spec && item->last && !range_accepted) {
1075                 uint8_t spec[size];
1076                 uint8_t last[size];
1077                 unsigned int i;
1078                 int ret;
1079
1080                 for (i = 0; i < size; ++i) {
1081                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
1082                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
1083                 }
1084                 ret = memcmp(spec, last, size);
1085                 if (ret != 0)
1086                         return rte_flow_error_set(error, EINVAL,
1087                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1088                                                   item,
1089                                                   "range is not valid");
1090         }
1091         return 0;
1092 }
1093
1094 /**
1095  * Adjust the hash fields according to the @p flow information.
1096  *
1097  * @param[in] dev_flow.
1098  *   Pointer to the mlx5_flow.
1099  * @param[in] tunnel
1100  *   1 when the hash field is for a tunnel item.
1101  * @param[in] layer_types
1102  *   RTE_ETH_RSS_* types.
1103  * @param[in] hash_fields
1104  *   Item hash fields.
1105  *
1106  * @return
1107  *   The hash fields that should be used.
1108  */
1109 uint64_t
1110 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
1111                             int tunnel __rte_unused, uint64_t layer_types,
1112                             uint64_t hash_fields)
1113 {
1114 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1115         int rss_request_inner = rss_desc->level >= 2;
1116
1117         /* Check RSS hash level for tunnel. */
1118         if (tunnel && rss_request_inner)
1119                 hash_fields |= IBV_RX_HASH_INNER;
1120         else if (tunnel || rss_request_inner)
1121                 return 0;
1122 #endif
1123         /* Check if requested layer matches RSS hash fields. */
1124         if (!(rss_desc->types & layer_types))
1125                 return 0;
1126         return hash_fields;
1127 }
1128
1129 /**
1130  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
1131  * if several tunnel rules are used on this queue, the tunnel ptype will be
1132  * cleared.
1133  *
1134  * @param rxq_ctrl
1135  *   Rx queue to update.
1136  */
1137 static void
1138 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
1139 {
1140         unsigned int i;
1141         uint32_t tunnel_ptype = 0;
1142
1143         /* Look up for the ptype to use. */
1144         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
1145                 if (!rxq_ctrl->flow_tunnels_n[i])
1146                         continue;
1147                 if (!tunnel_ptype) {
1148                         tunnel_ptype = tunnels_info[i].ptype;
1149                 } else {
1150                         tunnel_ptype = 0;
1151                         break;
1152                 }
1153         }
1154         rxq_ctrl->rxq.tunnel = tunnel_ptype;
1155 }
1156
1157 /**
1158  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
1159  * flow.
1160  *
1161  * @param[in] dev
1162  *   Pointer to the Ethernet device structure.
1163  * @param[in] dev_handle
1164  *   Pointer to device flow handle structure.
1165  */
1166 void
1167 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
1168                        struct mlx5_flow_handle *dev_handle)
1169 {
1170         struct mlx5_priv *priv = dev->data->dev_private;
1171         const int mark = dev_handle->mark;
1172         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1173         struct mlx5_ind_table_obj *ind_tbl = NULL;
1174         unsigned int i;
1175
1176         if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1177                 struct mlx5_hrxq *hrxq;
1178
1179                 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1180                               dev_handle->rix_hrxq);
1181                 if (hrxq)
1182                         ind_tbl = hrxq->ind_table;
1183         } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1184                 struct mlx5_shared_action_rss *shared_rss;
1185
1186                 shared_rss = mlx5_ipool_get
1187                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1188                          dev_handle->rix_srss);
1189                 if (shared_rss)
1190                         ind_tbl = shared_rss->ind_tbl;
1191         }
1192         if (!ind_tbl)
1193                 return;
1194         for (i = 0; i != ind_tbl->queues_n; ++i) {
1195                 int idx = ind_tbl->queues[i];
1196                 struct mlx5_rxq_ctrl *rxq_ctrl =
1197                         container_of((*priv->rxqs)[idx],
1198                                      struct mlx5_rxq_ctrl, rxq);
1199
1200                 /*
1201                  * To support metadata register copy on Tx loopback,
1202                  * this must be always enabled (metadata may arive
1203                  * from other port - not from local flows only.
1204                  */
1205                 if (priv->config.dv_flow_en &&
1206                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
1207                     mlx5_flow_ext_mreg_supported(dev)) {
1208                         rxq_ctrl->rxq.mark = 1;
1209                         rxq_ctrl->flow_mark_n = 1;
1210                 } else if (mark) {
1211                         rxq_ctrl->rxq.mark = 1;
1212                         rxq_ctrl->flow_mark_n++;
1213                 }
1214                 if (tunnel) {
1215                         unsigned int j;
1216
1217                         /* Increase the counter matching the flow. */
1218                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1219                                 if ((tunnels_info[j].tunnel &
1220                                      dev_handle->layers) ==
1221                                     tunnels_info[j].tunnel) {
1222                                         rxq_ctrl->flow_tunnels_n[j]++;
1223                                         break;
1224                                 }
1225                         }
1226                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1227                 }
1228         }
1229 }
1230
1231 /**
1232  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
1233  *
1234  * @param[in] dev
1235  *   Pointer to the Ethernet device structure.
1236  * @param[in] flow
1237  *   Pointer to flow structure.
1238  */
1239 static void
1240 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1241 {
1242         struct mlx5_priv *priv = dev->data->dev_private;
1243         uint32_t handle_idx;
1244         struct mlx5_flow_handle *dev_handle;
1245
1246         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1247                        handle_idx, dev_handle, next)
1248                 flow_drv_rxq_flags_set(dev, dev_handle);
1249 }
1250
1251 /**
1252  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1253  * device flow if no other flow uses it with the same kind of request.
1254  *
1255  * @param dev
1256  *   Pointer to Ethernet device.
1257  * @param[in] dev_handle
1258  *   Pointer to the device flow handle structure.
1259  */
1260 static void
1261 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1262                         struct mlx5_flow_handle *dev_handle)
1263 {
1264         struct mlx5_priv *priv = dev->data->dev_private;
1265         const int mark = dev_handle->mark;
1266         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1267         struct mlx5_ind_table_obj *ind_tbl = NULL;
1268         unsigned int i;
1269
1270         if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1271                 struct mlx5_hrxq *hrxq;
1272
1273                 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1274                               dev_handle->rix_hrxq);
1275                 if (hrxq)
1276                         ind_tbl = hrxq->ind_table;
1277         } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1278                 struct mlx5_shared_action_rss *shared_rss;
1279
1280                 shared_rss = mlx5_ipool_get
1281                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1282                          dev_handle->rix_srss);
1283                 if (shared_rss)
1284                         ind_tbl = shared_rss->ind_tbl;
1285         }
1286         if (!ind_tbl)
1287                 return;
1288         MLX5_ASSERT(dev->data->dev_started);
1289         for (i = 0; i != ind_tbl->queues_n; ++i) {
1290                 int idx = ind_tbl->queues[i];
1291                 struct mlx5_rxq_ctrl *rxq_ctrl =
1292                         container_of((*priv->rxqs)[idx],
1293                                      struct mlx5_rxq_ctrl, rxq);
1294
1295                 if (priv->config.dv_flow_en &&
1296                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
1297                     mlx5_flow_ext_mreg_supported(dev)) {
1298                         rxq_ctrl->rxq.mark = 1;
1299                         rxq_ctrl->flow_mark_n = 1;
1300                 } else if (mark) {
1301                         rxq_ctrl->flow_mark_n--;
1302                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
1303                 }
1304                 if (tunnel) {
1305                         unsigned int j;
1306
1307                         /* Decrease the counter matching the flow. */
1308                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1309                                 if ((tunnels_info[j].tunnel &
1310                                      dev_handle->layers) ==
1311                                     tunnels_info[j].tunnel) {
1312                                         rxq_ctrl->flow_tunnels_n[j]--;
1313                                         break;
1314                                 }
1315                         }
1316                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1317                 }
1318         }
1319 }
1320
1321 /**
1322  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1323  * @p flow if no other flow uses it with the same kind of request.
1324  *
1325  * @param dev
1326  *   Pointer to Ethernet device.
1327  * @param[in] flow
1328  *   Pointer to the flow.
1329  */
1330 static void
1331 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1332 {
1333         struct mlx5_priv *priv = dev->data->dev_private;
1334         uint32_t handle_idx;
1335         struct mlx5_flow_handle *dev_handle;
1336
1337         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1338                        handle_idx, dev_handle, next)
1339                 flow_drv_rxq_flags_trim(dev, dev_handle);
1340 }
1341
1342 /**
1343  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1344  *
1345  * @param dev
1346  *   Pointer to Ethernet device.
1347  */
1348 static void
1349 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1350 {
1351         struct mlx5_priv *priv = dev->data->dev_private;
1352         unsigned int i;
1353
1354         for (i = 0; i != priv->rxqs_n; ++i) {
1355                 struct mlx5_rxq_ctrl *rxq_ctrl;
1356                 unsigned int j;
1357
1358                 if (!(*priv->rxqs)[i])
1359                         continue;
1360                 rxq_ctrl = container_of((*priv->rxqs)[i],
1361                                         struct mlx5_rxq_ctrl, rxq);
1362                 rxq_ctrl->flow_mark_n = 0;
1363                 rxq_ctrl->rxq.mark = 0;
1364                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1365                         rxq_ctrl->flow_tunnels_n[j] = 0;
1366                 rxq_ctrl->rxq.tunnel = 0;
1367         }
1368 }
1369
1370 /**
1371  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1372  *
1373  * @param[in] dev
1374  *   Pointer to the Ethernet device structure.
1375  */
1376 void
1377 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1378 {
1379         struct mlx5_priv *priv = dev->data->dev_private;
1380         struct mlx5_rxq_data *data;
1381         unsigned int i;
1382
1383         for (i = 0; i != priv->rxqs_n; ++i) {
1384                 if (!(*priv->rxqs)[i])
1385                         continue;
1386                 data = (*priv->rxqs)[i];
1387                 if (!rte_flow_dynf_metadata_avail()) {
1388                         data->dynf_meta = 0;
1389                         data->flow_meta_mask = 0;
1390                         data->flow_meta_offset = -1;
1391                         data->flow_meta_port_mask = 0;
1392                 } else {
1393                         data->dynf_meta = 1;
1394                         data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1395                         data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1396                         data->flow_meta_port_mask = priv->sh->dv_meta_mask;
1397                 }
1398         }
1399 }
1400
1401 /*
1402  * return a pointer to the desired action in the list of actions.
1403  *
1404  * @param[in] actions
1405  *   The list of actions to search the action in.
1406  * @param[in] action
1407  *   The action to find.
1408  *
1409  * @return
1410  *   Pointer to the action in the list, if found. NULL otherwise.
1411  */
1412 const struct rte_flow_action *
1413 mlx5_flow_find_action(const struct rte_flow_action *actions,
1414                       enum rte_flow_action_type action)
1415 {
1416         if (actions == NULL)
1417                 return NULL;
1418         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1419                 if (actions->type == action)
1420                         return actions;
1421         return NULL;
1422 }
1423
1424 /*
1425  * Validate the flag action.
1426  *
1427  * @param[in] action_flags
1428  *   Bit-fields that holds the actions detected until now.
1429  * @param[in] attr
1430  *   Attributes of flow that includes this action.
1431  * @param[out] error
1432  *   Pointer to error structure.
1433  *
1434  * @return
1435  *   0 on success, a negative errno value otherwise and rte_errno is set.
1436  */
1437 int
1438 mlx5_flow_validate_action_flag(uint64_t action_flags,
1439                                const struct rte_flow_attr *attr,
1440                                struct rte_flow_error *error)
1441 {
1442         if (action_flags & MLX5_FLOW_ACTION_MARK)
1443                 return rte_flow_error_set(error, EINVAL,
1444                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1445                                           "can't mark and flag in same flow");
1446         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1447                 return rte_flow_error_set(error, EINVAL,
1448                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1449                                           "can't have 2 flag"
1450                                           " actions in same flow");
1451         if (attr->egress)
1452                 return rte_flow_error_set(error, ENOTSUP,
1453                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1454                                           "flag action not supported for "
1455                                           "egress");
1456         return 0;
1457 }
1458
1459 /*
1460  * Validate the mark action.
1461  *
1462  * @param[in] action
1463  *   Pointer to the queue action.
1464  * @param[in] action_flags
1465  *   Bit-fields that holds the actions detected until now.
1466  * @param[in] attr
1467  *   Attributes of flow that includes this action.
1468  * @param[out] error
1469  *   Pointer to error structure.
1470  *
1471  * @return
1472  *   0 on success, a negative errno value otherwise and rte_errno is set.
1473  */
1474 int
1475 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1476                                uint64_t action_flags,
1477                                const struct rte_flow_attr *attr,
1478                                struct rte_flow_error *error)
1479 {
1480         const struct rte_flow_action_mark *mark = action->conf;
1481
1482         if (!mark)
1483                 return rte_flow_error_set(error, EINVAL,
1484                                           RTE_FLOW_ERROR_TYPE_ACTION,
1485                                           action,
1486                                           "configuration cannot be null");
1487         if (mark->id >= MLX5_FLOW_MARK_MAX)
1488                 return rte_flow_error_set(error, EINVAL,
1489                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1490                                           &mark->id,
1491                                           "mark id must in 0 <= id < "
1492                                           RTE_STR(MLX5_FLOW_MARK_MAX));
1493         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1494                 return rte_flow_error_set(error, EINVAL,
1495                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1496                                           "can't flag and mark in same flow");
1497         if (action_flags & MLX5_FLOW_ACTION_MARK)
1498                 return rte_flow_error_set(error, EINVAL,
1499                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1500                                           "can't have 2 mark actions in same"
1501                                           " flow");
1502         if (attr->egress)
1503                 return rte_flow_error_set(error, ENOTSUP,
1504                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1505                                           "mark action not supported for "
1506                                           "egress");
1507         return 0;
1508 }
1509
1510 /*
1511  * Validate the drop action.
1512  *
1513  * @param[in] action_flags
1514  *   Bit-fields that holds the actions detected until now.
1515  * @param[in] attr
1516  *   Attributes of flow that includes this action.
1517  * @param[out] error
1518  *   Pointer to error structure.
1519  *
1520  * @return
1521  *   0 on success, a negative errno value otherwise and rte_errno is set.
1522  */
1523 int
1524 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1525                                const struct rte_flow_attr *attr,
1526                                struct rte_flow_error *error)
1527 {
1528         if (attr->egress)
1529                 return rte_flow_error_set(error, ENOTSUP,
1530                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1531                                           "drop action not supported for "
1532                                           "egress");
1533         return 0;
1534 }
1535
1536 /*
1537  * Validate the queue action.
1538  *
1539  * @param[in] action
1540  *   Pointer to the queue action.
1541  * @param[in] action_flags
1542  *   Bit-fields that holds the actions detected until now.
1543  * @param[in] dev
1544  *   Pointer to the Ethernet device structure.
1545  * @param[in] attr
1546  *   Attributes of flow that includes this action.
1547  * @param[out] error
1548  *   Pointer to error structure.
1549  *
1550  * @return
1551  *   0 on success, a negative errno value otherwise and rte_errno is set.
1552  */
1553 int
1554 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1555                                 uint64_t action_flags,
1556                                 struct rte_eth_dev *dev,
1557                                 const struct rte_flow_attr *attr,
1558                                 struct rte_flow_error *error)
1559 {
1560         struct mlx5_priv *priv = dev->data->dev_private;
1561         const struct rte_flow_action_queue *queue = action->conf;
1562
1563         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1564                 return rte_flow_error_set(error, EINVAL,
1565                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1566                                           "can't have 2 fate actions in"
1567                                           " same flow");
1568         if (!priv->rxqs_n)
1569                 return rte_flow_error_set(error, EINVAL,
1570                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1571                                           NULL, "No Rx queues configured");
1572         if (queue->index >= priv->rxqs_n)
1573                 return rte_flow_error_set(error, EINVAL,
1574                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1575                                           &queue->index,
1576                                           "queue index out of range");
1577         if (!(*priv->rxqs)[queue->index])
1578                 return rte_flow_error_set(error, EINVAL,
1579                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1580                                           &queue->index,
1581                                           "queue is not configured");
1582         if (attr->egress)
1583                 return rte_flow_error_set(error, ENOTSUP,
1584                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1585                                           "queue action not supported for "
1586                                           "egress");
1587         return 0;
1588 }
1589
1590 /*
1591  * Validate the rss action.
1592  *
1593  * @param[in] dev
1594  *   Pointer to the Ethernet device structure.
1595  * @param[in] action
1596  *   Pointer to the queue action.
1597  * @param[out] error
1598  *   Pointer to error structure.
1599  *
1600  * @return
1601  *   0 on success, a negative errno value otherwise and rte_errno is set.
1602  */
1603 int
1604 mlx5_validate_action_rss(struct rte_eth_dev *dev,
1605                          const struct rte_flow_action *action,
1606                          struct rte_flow_error *error)
1607 {
1608         struct mlx5_priv *priv = dev->data->dev_private;
1609         const struct rte_flow_action_rss *rss = action->conf;
1610         enum mlx5_rxq_type rxq_type = MLX5_RXQ_TYPE_UNDEFINED;
1611         unsigned int i;
1612
1613         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1614             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1615                 return rte_flow_error_set(error, ENOTSUP,
1616                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1617                                           &rss->func,
1618                                           "RSS hash function not supported");
1619 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1620         if (rss->level > 2)
1621 #else
1622         if (rss->level > 1)
1623 #endif
1624                 return rte_flow_error_set(error, ENOTSUP,
1625                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1626                                           &rss->level,
1627                                           "tunnel RSS is not supported");
1628         /* allow RSS key_len 0 in case of NULL (default) RSS key. */
1629         if (rss->key_len == 0 && rss->key != NULL)
1630                 return rte_flow_error_set(error, ENOTSUP,
1631                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1632                                           &rss->key_len,
1633                                           "RSS hash key length 0");
1634         if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1635                 return rte_flow_error_set(error, ENOTSUP,
1636                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1637                                           &rss->key_len,
1638                                           "RSS hash key too small");
1639         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1640                 return rte_flow_error_set(error, ENOTSUP,
1641                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1642                                           &rss->key_len,
1643                                           "RSS hash key too large");
1644         if (rss->queue_num > priv->config.ind_table_max_size)
1645                 return rte_flow_error_set(error, ENOTSUP,
1646                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1647                                           &rss->queue_num,
1648                                           "number of queues too large");
1649         if (rss->types & MLX5_RSS_HF_MASK)
1650                 return rte_flow_error_set(error, ENOTSUP,
1651                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1652                                           &rss->types,
1653                                           "some RSS protocols are not"
1654                                           " supported");
1655         if ((rss->types & (RTE_ETH_RSS_L3_SRC_ONLY | RTE_ETH_RSS_L3_DST_ONLY)) &&
1656             !(rss->types & RTE_ETH_RSS_IP))
1657                 return rte_flow_error_set(error, EINVAL,
1658                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1659                                           "L3 partial RSS requested but L3 RSS"
1660                                           " type not specified");
1661         if ((rss->types & (RTE_ETH_RSS_L4_SRC_ONLY | RTE_ETH_RSS_L4_DST_ONLY)) &&
1662             !(rss->types & (RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP)))
1663                 return rte_flow_error_set(error, EINVAL,
1664                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1665                                           "L4 partial RSS requested but L4 RSS"
1666                                           " type not specified");
1667         if (!priv->rxqs_n)
1668                 return rte_flow_error_set(error, EINVAL,
1669                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1670                                           NULL, "No Rx queues configured");
1671         if (!rss->queue_num)
1672                 return rte_flow_error_set(error, EINVAL,
1673                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1674                                           NULL, "No queues configured");
1675         for (i = 0; i != rss->queue_num; ++i) {
1676                 struct mlx5_rxq_ctrl *rxq_ctrl;
1677
1678                 if (rss->queue[i] >= priv->rxqs_n)
1679                         return rte_flow_error_set
1680                                 (error, EINVAL,
1681                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1682                                  &rss->queue[i], "queue index out of range");
1683                 if (!(*priv->rxqs)[rss->queue[i]])
1684                         return rte_flow_error_set
1685                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1686                                  &rss->queue[i], "queue is not configured");
1687                 rxq_ctrl = container_of((*priv->rxqs)[rss->queue[i]],
1688                                         struct mlx5_rxq_ctrl, rxq);
1689                 if (i == 0)
1690                         rxq_type = rxq_ctrl->type;
1691                 if (rxq_type != rxq_ctrl->type)
1692                         return rte_flow_error_set
1693                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1694                                  &rss->queue[i],
1695                                  "combining hairpin and regular RSS queues is not supported");
1696         }
1697         return 0;
1698 }
1699
1700 /*
1701  * Validate the rss action.
1702  *
1703  * @param[in] action
1704  *   Pointer to the queue action.
1705  * @param[in] action_flags
1706  *   Bit-fields that holds the actions detected until now.
1707  * @param[in] dev
1708  *   Pointer to the Ethernet device structure.
1709  * @param[in] attr
1710  *   Attributes of flow that includes this action.
1711  * @param[in] item_flags
1712  *   Items that were detected.
1713  * @param[out] error
1714  *   Pointer to error structure.
1715  *
1716  * @return
1717  *   0 on success, a negative errno value otherwise and rte_errno is set.
1718  */
1719 int
1720 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1721                               uint64_t action_flags,
1722                               struct rte_eth_dev *dev,
1723                               const struct rte_flow_attr *attr,
1724                               uint64_t item_flags,
1725                               struct rte_flow_error *error)
1726 {
1727         const struct rte_flow_action_rss *rss = action->conf;
1728         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1729         int ret;
1730
1731         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1732                 return rte_flow_error_set(error, EINVAL,
1733                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1734                                           "can't have 2 fate actions"
1735                                           " in same flow");
1736         ret = mlx5_validate_action_rss(dev, action, error);
1737         if (ret)
1738                 return ret;
1739         if (attr->egress)
1740                 return rte_flow_error_set(error, ENOTSUP,
1741                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1742                                           "rss action not supported for "
1743                                           "egress");
1744         if (rss->level > 1 && !tunnel)
1745                 return rte_flow_error_set(error, EINVAL,
1746                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1747                                           "inner RSS is not supported for "
1748                                           "non-tunnel flows");
1749         if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
1750             !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
1751                 return rte_flow_error_set(error, EINVAL,
1752                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1753                                           "RSS on eCPRI is not supported now");
1754         }
1755         if ((item_flags & MLX5_FLOW_LAYER_MPLS) &&
1756             !(item_flags &
1757               (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3)) &&
1758             rss->level > 1)
1759                 return rte_flow_error_set(error, EINVAL,
1760                                           RTE_FLOW_ERROR_TYPE_ITEM, NULL,
1761                                           "MPLS inner RSS needs to specify inner L2/L3 items after MPLS in pattern");
1762         return 0;
1763 }
1764
1765 /*
1766  * Validate the default miss action.
1767  *
1768  * @param[in] action_flags
1769  *   Bit-fields that holds the actions detected until now.
1770  * @param[out] error
1771  *   Pointer to error structure.
1772  *
1773  * @return
1774  *   0 on success, a negative errno value otherwise and rte_errno is set.
1775  */
1776 int
1777 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
1778                                 const struct rte_flow_attr *attr,
1779                                 struct rte_flow_error *error)
1780 {
1781         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1782                 return rte_flow_error_set(error, EINVAL,
1783                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1784                                           "can't have 2 fate actions in"
1785                                           " same flow");
1786         if (attr->egress)
1787                 return rte_flow_error_set(error, ENOTSUP,
1788                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1789                                           "default miss action not supported "
1790                                           "for egress");
1791         if (attr->group)
1792                 return rte_flow_error_set(error, ENOTSUP,
1793                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
1794                                           "only group 0 is supported");
1795         if (attr->transfer)
1796                 return rte_flow_error_set(error, ENOTSUP,
1797                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1798                                           NULL, "transfer is not supported");
1799         return 0;
1800 }
1801
1802 /*
1803  * Validate the count action.
1804  *
1805  * @param[in] dev
1806  *   Pointer to the Ethernet device structure.
1807  * @param[in] attr
1808  *   Attributes of flow that includes this action.
1809  * @param[out] error
1810  *   Pointer to error structure.
1811  *
1812  * @return
1813  *   0 on success, a negative errno value otherwise and rte_errno is set.
1814  */
1815 int
1816 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1817                                 const struct rte_flow_attr *attr,
1818                                 struct rte_flow_error *error)
1819 {
1820         if (attr->egress)
1821                 return rte_flow_error_set(error, ENOTSUP,
1822                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1823                                           "count action not supported for "
1824                                           "egress");
1825         return 0;
1826 }
1827
1828 /*
1829  * Validate the ASO CT action.
1830  *
1831  * @param[in] dev
1832  *   Pointer to the Ethernet device structure.
1833  * @param[in] conntrack
1834  *   Pointer to the CT action profile.
1835  * @param[out] error
1836  *   Pointer to error structure.
1837  *
1838  * @return
1839  *   0 on success, a negative errno value otherwise and rte_errno is set.
1840  */
1841 int
1842 mlx5_validate_action_ct(struct rte_eth_dev *dev,
1843                         const struct rte_flow_action_conntrack *conntrack,
1844                         struct rte_flow_error *error)
1845 {
1846         RTE_SET_USED(dev);
1847
1848         if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
1849                 return rte_flow_error_set(error, EINVAL,
1850                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1851                                           "Invalid CT state");
1852         if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
1853                 return rte_flow_error_set(error, EINVAL,
1854                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1855                                           "Invalid last TCP packet flag");
1856         return 0;
1857 }
1858
1859 /**
1860  * Verify the @p attributes will be correctly understood by the NIC and store
1861  * them in the @p flow if everything is correct.
1862  *
1863  * @param[in] dev
1864  *   Pointer to the Ethernet device structure.
1865  * @param[in] attributes
1866  *   Pointer to flow attributes
1867  * @param[out] error
1868  *   Pointer to error structure.
1869  *
1870  * @return
1871  *   0 on success, a negative errno value otherwise and rte_errno is set.
1872  */
1873 int
1874 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
1875                               const struct rte_flow_attr *attributes,
1876                               struct rte_flow_error *error)
1877 {
1878         struct mlx5_priv *priv = dev->data->dev_private;
1879         uint32_t priority_max = priv->sh->flow_max_priority - 1;
1880
1881         if (attributes->group)
1882                 return rte_flow_error_set(error, ENOTSUP,
1883                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1884                                           NULL, "groups is not supported");
1885         if (attributes->priority != MLX5_FLOW_LOWEST_PRIO_INDICATOR &&
1886             attributes->priority >= priority_max)
1887                 return rte_flow_error_set(error, ENOTSUP,
1888                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1889                                           NULL, "priority out of range");
1890         if (attributes->egress)
1891                 return rte_flow_error_set(error, ENOTSUP,
1892                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1893                                           "egress is not supported");
1894         if (attributes->transfer && !priv->config.dv_esw_en)
1895                 return rte_flow_error_set(error, ENOTSUP,
1896                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1897                                           NULL, "transfer is not supported");
1898         if (!attributes->ingress)
1899                 return rte_flow_error_set(error, EINVAL,
1900                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1901                                           NULL,
1902                                           "ingress attribute is mandatory");
1903         return 0;
1904 }
1905
1906 /**
1907  * Validate ICMP6 item.
1908  *
1909  * @param[in] item
1910  *   Item specification.
1911  * @param[in] item_flags
1912  *   Bit-fields that holds the items detected until now.
1913  * @param[in] ext_vlan_sup
1914  *   Whether extended VLAN features are supported or not.
1915  * @param[out] error
1916  *   Pointer to error structure.
1917  *
1918  * @return
1919  *   0 on success, a negative errno value otherwise and rte_errno is set.
1920  */
1921 int
1922 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
1923                                uint64_t item_flags,
1924                                uint8_t target_protocol,
1925                                struct rte_flow_error *error)
1926 {
1927         const struct rte_flow_item_icmp6 *mask = item->mask;
1928         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1929         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1930                                       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1931         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1932                                       MLX5_FLOW_LAYER_OUTER_L4;
1933         int ret;
1934
1935         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
1936                 return rte_flow_error_set(error, EINVAL,
1937                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1938                                           "protocol filtering not compatible"
1939                                           " with ICMP6 layer");
1940         if (!(item_flags & l3m))
1941                 return rte_flow_error_set(error, EINVAL,
1942                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1943                                           "IPv6 is mandatory to filter on"
1944                                           " ICMP6");
1945         if (item_flags & l4m)
1946                 return rte_flow_error_set(error, EINVAL,
1947                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1948                                           "multiple L4 layers not supported");
1949         if (!mask)
1950                 mask = &rte_flow_item_icmp6_mask;
1951         ret = mlx5_flow_item_acceptable
1952                 (item, (const uint8_t *)mask,
1953                  (const uint8_t *)&rte_flow_item_icmp6_mask,
1954                  sizeof(struct rte_flow_item_icmp6),
1955                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1956         if (ret < 0)
1957                 return ret;
1958         return 0;
1959 }
1960
1961 /**
1962  * Validate ICMP item.
1963  *
1964  * @param[in] item
1965  *   Item specification.
1966  * @param[in] item_flags
1967  *   Bit-fields that holds the items detected until now.
1968  * @param[out] error
1969  *   Pointer to error structure.
1970  *
1971  * @return
1972  *   0 on success, a negative errno value otherwise and rte_errno is set.
1973  */
1974 int
1975 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
1976                              uint64_t item_flags,
1977                              uint8_t target_protocol,
1978                              struct rte_flow_error *error)
1979 {
1980         const struct rte_flow_item_icmp *mask = item->mask;
1981         const struct rte_flow_item_icmp nic_mask = {
1982                 .hdr.icmp_type = 0xff,
1983                 .hdr.icmp_code = 0xff,
1984                 .hdr.icmp_ident = RTE_BE16(0xffff),
1985                 .hdr.icmp_seq_nb = RTE_BE16(0xffff),
1986         };
1987         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1988         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1989                                       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1990         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1991                                       MLX5_FLOW_LAYER_OUTER_L4;
1992         int ret;
1993
1994         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
1995                 return rte_flow_error_set(error, EINVAL,
1996                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1997                                           "protocol filtering not compatible"
1998                                           " with ICMP layer");
1999         if (!(item_flags & l3m))
2000                 return rte_flow_error_set(error, EINVAL,
2001                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2002                                           "IPv4 is mandatory to filter"
2003                                           " on ICMP");
2004         if (item_flags & l4m)
2005                 return rte_flow_error_set(error, EINVAL,
2006                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2007                                           "multiple L4 layers not supported");
2008         if (!mask)
2009                 mask = &nic_mask;
2010         ret = mlx5_flow_item_acceptable
2011                 (item, (const uint8_t *)mask,
2012                  (const uint8_t *)&nic_mask,
2013                  sizeof(struct rte_flow_item_icmp),
2014                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2015         if (ret < 0)
2016                 return ret;
2017         return 0;
2018 }
2019
2020 /**
2021  * Validate Ethernet item.
2022  *
2023  * @param[in] item
2024  *   Item specification.
2025  * @param[in] item_flags
2026  *   Bit-fields that holds the items detected until now.
2027  * @param[out] error
2028  *   Pointer to error structure.
2029  *
2030  * @return
2031  *   0 on success, a negative errno value otherwise and rte_errno is set.
2032  */
2033 int
2034 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
2035                             uint64_t item_flags, bool ext_vlan_sup,
2036                             struct rte_flow_error *error)
2037 {
2038         const struct rte_flow_item_eth *mask = item->mask;
2039         const struct rte_flow_item_eth nic_mask = {
2040                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2041                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2042                 .type = RTE_BE16(0xffff),
2043                 .has_vlan = ext_vlan_sup ? 1 : 0,
2044         };
2045         int ret;
2046         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2047         const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
2048                                        MLX5_FLOW_LAYER_OUTER_L2;
2049
2050         if (item_flags & ethm)
2051                 return rte_flow_error_set(error, ENOTSUP,
2052                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2053                                           "multiple L2 layers not supported");
2054         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
2055             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
2056                 return rte_flow_error_set(error, EINVAL,
2057                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2058                                           "L2 layer should not follow "
2059                                           "L3 layers");
2060         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
2061             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
2062                 return rte_flow_error_set(error, EINVAL,
2063                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2064                                           "L2 layer should not follow VLAN");
2065         if (item_flags & MLX5_FLOW_LAYER_GTP)
2066                 return rte_flow_error_set(error, EINVAL,
2067                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2068                                           "L2 layer should not follow GTP");
2069         if (!mask)
2070                 mask = &rte_flow_item_eth_mask;
2071         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2072                                         (const uint8_t *)&nic_mask,
2073                                         sizeof(struct rte_flow_item_eth),
2074                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2075         return ret;
2076 }
2077
2078 /**
2079  * Validate VLAN item.
2080  *
2081  * @param[in] item
2082  *   Item specification.
2083  * @param[in] item_flags
2084  *   Bit-fields that holds the items detected until now.
2085  * @param[in] dev
2086  *   Ethernet device flow is being created on.
2087  * @param[out] error
2088  *   Pointer to error structure.
2089  *
2090  * @return
2091  *   0 on success, a negative errno value otherwise and rte_errno is set.
2092  */
2093 int
2094 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
2095                              uint64_t item_flags,
2096                              struct rte_eth_dev *dev,
2097                              struct rte_flow_error *error)
2098 {
2099         const struct rte_flow_item_vlan *spec = item->spec;
2100         const struct rte_flow_item_vlan *mask = item->mask;
2101         const struct rte_flow_item_vlan nic_mask = {
2102                 .tci = RTE_BE16(UINT16_MAX),
2103                 .inner_type = RTE_BE16(UINT16_MAX),
2104         };
2105         uint16_t vlan_tag = 0;
2106         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2107         int ret;
2108         const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
2109                                         MLX5_FLOW_LAYER_INNER_L4) :
2110                                        (MLX5_FLOW_LAYER_OUTER_L3 |
2111                                         MLX5_FLOW_LAYER_OUTER_L4);
2112         const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
2113                                         MLX5_FLOW_LAYER_OUTER_VLAN;
2114
2115         if (item_flags & vlanm)
2116                 return rte_flow_error_set(error, EINVAL,
2117                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2118                                           "multiple VLAN layers not supported");
2119         else if ((item_flags & l34m) != 0)
2120                 return rte_flow_error_set(error, EINVAL,
2121                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2122                                           "VLAN cannot follow L3/L4 layer");
2123         if (!mask)
2124                 mask = &rte_flow_item_vlan_mask;
2125         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2126                                         (const uint8_t *)&nic_mask,
2127                                         sizeof(struct rte_flow_item_vlan),
2128                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2129         if (ret)
2130                 return ret;
2131         if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
2132                 struct mlx5_priv *priv = dev->data->dev_private;
2133
2134                 if (priv->vmwa_context) {
2135                         /*
2136                          * Non-NULL context means we have a virtual machine
2137                          * and SR-IOV enabled, we have to create VLAN interface
2138                          * to make hypervisor to setup E-Switch vport
2139                          * context correctly. We avoid creating the multiple
2140                          * VLAN interfaces, so we cannot support VLAN tag mask.
2141                          */
2142                         return rte_flow_error_set(error, EINVAL,
2143                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2144                                                   item,
2145                                                   "VLAN tag mask is not"
2146                                                   " supported in virtual"
2147                                                   " environment");
2148                 }
2149         }
2150         if (spec) {
2151                 vlan_tag = spec->tci;
2152                 vlan_tag &= mask->tci;
2153         }
2154         /*
2155          * From verbs perspective an empty VLAN is equivalent
2156          * to a packet without VLAN layer.
2157          */
2158         if (!vlan_tag)
2159                 return rte_flow_error_set(error, EINVAL,
2160                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2161                                           item->spec,
2162                                           "VLAN cannot be empty");
2163         return 0;
2164 }
2165
2166 /**
2167  * Validate IPV4 item.
2168  *
2169  * @param[in] item
2170  *   Item specification.
2171  * @param[in] item_flags
2172  *   Bit-fields that holds the items detected until now.
2173  * @param[in] last_item
2174  *   Previous validated item in the pattern items.
2175  * @param[in] ether_type
2176  *   Type in the ethernet layer header (including dot1q).
2177  * @param[in] acc_mask
2178  *   Acceptable mask, if NULL default internal default mask
2179  *   will be used to check whether item fields are supported.
2180  * @param[in] range_accepted
2181  *   True if range of values is accepted for specific fields, false otherwise.
2182  * @param[out] error
2183  *   Pointer to error structure.
2184  *
2185  * @return
2186  *   0 on success, a negative errno value otherwise and rte_errno is set.
2187  */
2188 int
2189 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2190                              uint64_t item_flags,
2191                              uint64_t last_item,
2192                              uint16_t ether_type,
2193                              const struct rte_flow_item_ipv4 *acc_mask,
2194                              bool range_accepted,
2195                              struct rte_flow_error *error)
2196 {
2197         const struct rte_flow_item_ipv4 *mask = item->mask;
2198         const struct rte_flow_item_ipv4 *spec = item->spec;
2199         const struct rte_flow_item_ipv4 nic_mask = {
2200                 .hdr = {
2201                         .src_addr = RTE_BE32(0xffffffff),
2202                         .dst_addr = RTE_BE32(0xffffffff),
2203                         .type_of_service = 0xff,
2204                         .next_proto_id = 0xff,
2205                 },
2206         };
2207         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2208         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2209                                       MLX5_FLOW_LAYER_OUTER_L3;
2210         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2211                                       MLX5_FLOW_LAYER_OUTER_L4;
2212         int ret;
2213         uint8_t next_proto = 0xFF;
2214         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2215                                   MLX5_FLOW_LAYER_OUTER_VLAN |
2216                                   MLX5_FLOW_LAYER_INNER_VLAN);
2217
2218         if ((last_item & l2_vlan) && ether_type &&
2219             ether_type != RTE_ETHER_TYPE_IPV4)
2220                 return rte_flow_error_set(error, EINVAL,
2221                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2222                                           "IPv4 cannot follow L2/VLAN layer "
2223                                           "which ether type is not IPv4");
2224         if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2225                 if (mask && spec)
2226                         next_proto = mask->hdr.next_proto_id &
2227                                      spec->hdr.next_proto_id;
2228                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2229                         return rte_flow_error_set(error, EINVAL,
2230                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2231                                                   item,
2232                                                   "multiple tunnel "
2233                                                   "not supported");
2234         }
2235         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
2236                 return rte_flow_error_set(error, EINVAL,
2237                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2238                                           "wrong tunnel type - IPv6 specified "
2239                                           "but IPv4 item provided");
2240         if (item_flags & l3m)
2241                 return rte_flow_error_set(error, ENOTSUP,
2242                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2243                                           "multiple L3 layers not supported");
2244         else if (item_flags & l4m)
2245                 return rte_flow_error_set(error, EINVAL,
2246                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2247                                           "L3 cannot follow an L4 layer.");
2248         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2249                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2250                 return rte_flow_error_set(error, EINVAL,
2251                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2252                                           "L3 cannot follow an NVGRE layer.");
2253         if (!mask)
2254                 mask = &rte_flow_item_ipv4_mask;
2255         else if (mask->hdr.next_proto_id != 0 &&
2256                  mask->hdr.next_proto_id != 0xff)
2257                 return rte_flow_error_set(error, EINVAL,
2258                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2259                                           "partial mask is not supported"
2260                                           " for protocol");
2261         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2262                                         acc_mask ? (const uint8_t *)acc_mask
2263                                                  : (const uint8_t *)&nic_mask,
2264                                         sizeof(struct rte_flow_item_ipv4),
2265                                         range_accepted, error);
2266         if (ret < 0)
2267                 return ret;
2268         return 0;
2269 }
2270
2271 /**
2272  * Validate IPV6 item.
2273  *
2274  * @param[in] item
2275  *   Item specification.
2276  * @param[in] item_flags
2277  *   Bit-fields that holds the items detected until now.
2278  * @param[in] last_item
2279  *   Previous validated item in the pattern items.
2280  * @param[in] ether_type
2281  *   Type in the ethernet layer header (including dot1q).
2282  * @param[in] acc_mask
2283  *   Acceptable mask, if NULL default internal default mask
2284  *   will be used to check whether item fields are supported.
2285  * @param[out] error
2286  *   Pointer to error structure.
2287  *
2288  * @return
2289  *   0 on success, a negative errno value otherwise and rte_errno is set.
2290  */
2291 int
2292 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2293                              uint64_t item_flags,
2294                              uint64_t last_item,
2295                              uint16_t ether_type,
2296                              const struct rte_flow_item_ipv6 *acc_mask,
2297                              struct rte_flow_error *error)
2298 {
2299         const struct rte_flow_item_ipv6 *mask = item->mask;
2300         const struct rte_flow_item_ipv6 *spec = item->spec;
2301         const struct rte_flow_item_ipv6 nic_mask = {
2302                 .hdr = {
2303                         .src_addr =
2304                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2305                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2306                         .dst_addr =
2307                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2308                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2309                         .vtc_flow = RTE_BE32(0xffffffff),
2310                         .proto = 0xff,
2311                 },
2312         };
2313         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2314         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2315                                       MLX5_FLOW_LAYER_OUTER_L3;
2316         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2317                                       MLX5_FLOW_LAYER_OUTER_L4;
2318         int ret;
2319         uint8_t next_proto = 0xFF;
2320         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2321                                   MLX5_FLOW_LAYER_OUTER_VLAN |
2322                                   MLX5_FLOW_LAYER_INNER_VLAN);
2323
2324         if ((last_item & l2_vlan) && ether_type &&
2325             ether_type != RTE_ETHER_TYPE_IPV6)
2326                 return rte_flow_error_set(error, EINVAL,
2327                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2328                                           "IPv6 cannot follow L2/VLAN layer "
2329                                           "which ether type is not IPv6");
2330         if (mask && mask->hdr.proto == UINT8_MAX && spec)
2331                 next_proto = spec->hdr.proto;
2332         if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2333                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2334                         return rte_flow_error_set(error, EINVAL,
2335                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2336                                                   item,
2337                                                   "multiple tunnel "
2338                                                   "not supported");
2339         }
2340         if (next_proto == IPPROTO_HOPOPTS  ||
2341             next_proto == IPPROTO_ROUTING  ||
2342             next_proto == IPPROTO_FRAGMENT ||
2343             next_proto == IPPROTO_ESP      ||
2344             next_proto == IPPROTO_AH       ||
2345             next_proto == IPPROTO_DSTOPTS)
2346                 return rte_flow_error_set(error, EINVAL,
2347                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2348                                           "IPv6 proto (next header) should "
2349                                           "not be set as extension header");
2350         if (item_flags & MLX5_FLOW_LAYER_IPIP)
2351                 return rte_flow_error_set(error, EINVAL,
2352                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2353                                           "wrong tunnel type - IPv4 specified "
2354                                           "but IPv6 item provided");
2355         if (item_flags & l3m)
2356                 return rte_flow_error_set(error, ENOTSUP,
2357                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2358                                           "multiple L3 layers not supported");
2359         else if (item_flags & l4m)
2360                 return rte_flow_error_set(error, EINVAL,
2361                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2362                                           "L3 cannot follow an L4 layer.");
2363         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2364                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2365                 return rte_flow_error_set(error, EINVAL,
2366                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2367                                           "L3 cannot follow an NVGRE layer.");
2368         if (!mask)
2369                 mask = &rte_flow_item_ipv6_mask;
2370         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2371                                         acc_mask ? (const uint8_t *)acc_mask
2372                                                  : (const uint8_t *)&nic_mask,
2373                                         sizeof(struct rte_flow_item_ipv6),
2374                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2375         if (ret < 0)
2376                 return ret;
2377         return 0;
2378 }
2379
2380 /**
2381  * Validate UDP item.
2382  *
2383  * @param[in] item
2384  *   Item specification.
2385  * @param[in] item_flags
2386  *   Bit-fields that holds the items detected until now.
2387  * @param[in] target_protocol
2388  *   The next protocol in the previous item.
2389  * @param[in] flow_mask
2390  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2391  * @param[out] error
2392  *   Pointer to error structure.
2393  *
2394  * @return
2395  *   0 on success, a negative errno value otherwise and rte_errno is set.
2396  */
2397 int
2398 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2399                             uint64_t item_flags,
2400                             uint8_t target_protocol,
2401                             struct rte_flow_error *error)
2402 {
2403         const struct rte_flow_item_udp *mask = item->mask;
2404         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2405         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2406                                       MLX5_FLOW_LAYER_OUTER_L3;
2407         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2408                                       MLX5_FLOW_LAYER_OUTER_L4;
2409         int ret;
2410
2411         if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2412                 return rte_flow_error_set(error, EINVAL,
2413                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2414                                           "protocol filtering not compatible"
2415                                           " with UDP layer");
2416         if (!(item_flags & l3m))
2417                 return rte_flow_error_set(error, EINVAL,
2418                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2419                                           "L3 is mandatory to filter on L4");
2420         if (item_flags & l4m)
2421                 return rte_flow_error_set(error, EINVAL,
2422                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2423                                           "multiple L4 layers not supported");
2424         if (!mask)
2425                 mask = &rte_flow_item_udp_mask;
2426         ret = mlx5_flow_item_acceptable
2427                 (item, (const uint8_t *)mask,
2428                  (const uint8_t *)&rte_flow_item_udp_mask,
2429                  sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2430                  error);
2431         if (ret < 0)
2432                 return ret;
2433         return 0;
2434 }
2435
2436 /**
2437  * Validate TCP item.
2438  *
2439  * @param[in] item
2440  *   Item specification.
2441  * @param[in] item_flags
2442  *   Bit-fields that holds the items detected until now.
2443  * @param[in] target_protocol
2444  *   The next protocol in the previous item.
2445  * @param[out] error
2446  *   Pointer to error structure.
2447  *
2448  * @return
2449  *   0 on success, a negative errno value otherwise and rte_errno is set.
2450  */
2451 int
2452 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2453                             uint64_t item_flags,
2454                             uint8_t target_protocol,
2455                             const struct rte_flow_item_tcp *flow_mask,
2456                             struct rte_flow_error *error)
2457 {
2458         const struct rte_flow_item_tcp *mask = item->mask;
2459         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2460         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2461                                       MLX5_FLOW_LAYER_OUTER_L3;
2462         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2463                                       MLX5_FLOW_LAYER_OUTER_L4;
2464         int ret;
2465
2466         MLX5_ASSERT(flow_mask);
2467         if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2468                 return rte_flow_error_set(error, EINVAL,
2469                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2470                                           "protocol filtering not compatible"
2471                                           " with TCP layer");
2472         if (!(item_flags & l3m))
2473                 return rte_flow_error_set(error, EINVAL,
2474                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2475                                           "L3 is mandatory to filter on L4");
2476         if (item_flags & l4m)
2477                 return rte_flow_error_set(error, EINVAL,
2478                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2479                                           "multiple L4 layers not supported");
2480         if (!mask)
2481                 mask = &rte_flow_item_tcp_mask;
2482         ret = mlx5_flow_item_acceptable
2483                 (item, (const uint8_t *)mask,
2484                  (const uint8_t *)flow_mask,
2485                  sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2486                  error);
2487         if (ret < 0)
2488                 return ret;
2489         return 0;
2490 }
2491
2492 /**
2493  * Validate VXLAN item.
2494  *
2495  * @param[in] dev
2496  *   Pointer to the Ethernet device structure.
2497  * @param[in] udp_dport
2498  *   UDP destination port
2499  * @param[in] item
2500  *   Item specification.
2501  * @param[in] item_flags
2502  *   Bit-fields that holds the items detected until now.
2503  * @param[in] attr
2504  *   Flow rule attributes.
2505  * @param[out] error
2506  *   Pointer to error structure.
2507  *
2508  * @return
2509  *   0 on success, a negative errno value otherwise and rte_errno is set.
2510  */
2511 int
2512 mlx5_flow_validate_item_vxlan(struct rte_eth_dev *dev,
2513                               uint16_t udp_dport,
2514                               const struct rte_flow_item *item,
2515                               uint64_t item_flags,
2516                               const struct rte_flow_attr *attr,
2517                               struct rte_flow_error *error)
2518 {
2519         const struct rte_flow_item_vxlan *spec = item->spec;
2520         const struct rte_flow_item_vxlan *mask = item->mask;
2521         int ret;
2522         struct mlx5_priv *priv = dev->data->dev_private;
2523         union vni {
2524                 uint32_t vlan_id;
2525                 uint8_t vni[4];
2526         } id = { .vlan_id = 0, };
2527         const struct rte_flow_item_vxlan nic_mask = {
2528                 .vni = "\xff\xff\xff",
2529                 .rsvd1 = 0xff,
2530         };
2531         const struct rte_flow_item_vxlan *valid_mask;
2532
2533         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2534                 return rte_flow_error_set(error, ENOTSUP,
2535                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2536                                           "multiple tunnel layers not"
2537                                           " supported");
2538         valid_mask = &rte_flow_item_vxlan_mask;
2539         /*
2540          * Verify only UDPv4 is present as defined in
2541          * https://tools.ietf.org/html/rfc7348
2542          */
2543         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2544                 return rte_flow_error_set(error, EINVAL,
2545                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2546                                           "no outer UDP layer found");
2547         if (!mask)
2548                 mask = &rte_flow_item_vxlan_mask;
2549
2550         if (priv->sh->steering_format_version !=
2551             MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
2552             !udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN) {
2553                 /* FDB domain & NIC domain non-zero group */
2554                 if ((attr->transfer || attr->group) && priv->sh->misc5_cap)
2555                         valid_mask = &nic_mask;
2556                 /* Group zero in NIC domain */
2557                 if (!attr->group && !attr->transfer &&
2558                     priv->sh->tunnel_header_0_1)
2559                         valid_mask = &nic_mask;
2560         }
2561         ret = mlx5_flow_item_acceptable
2562                 (item, (const uint8_t *)mask,
2563                  (const uint8_t *)valid_mask,
2564                  sizeof(struct rte_flow_item_vxlan),
2565                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2566         if (ret < 0)
2567                 return ret;
2568         if (spec) {
2569                 memcpy(&id.vni[1], spec->vni, 3);
2570                 memcpy(&id.vni[1], mask->vni, 3);
2571         }
2572         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2573                 return rte_flow_error_set(error, ENOTSUP,
2574                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2575                                           "VXLAN tunnel must be fully defined");
2576         return 0;
2577 }
2578
2579 /**
2580  * Validate VXLAN_GPE item.
2581  *
2582  * @param[in] item
2583  *   Item specification.
2584  * @param[in] item_flags
2585  *   Bit-fields that holds the items detected until now.
2586  * @param[in] priv
2587  *   Pointer to the private data structure.
2588  * @param[in] target_protocol
2589  *   The next protocol in the previous item.
2590  * @param[out] error
2591  *   Pointer to error structure.
2592  *
2593  * @return
2594  *   0 on success, a negative errno value otherwise and rte_errno is set.
2595  */
2596 int
2597 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
2598                                   uint64_t item_flags,
2599                                   struct rte_eth_dev *dev,
2600                                   struct rte_flow_error *error)
2601 {
2602         struct mlx5_priv *priv = dev->data->dev_private;
2603         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
2604         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
2605         int ret;
2606         union vni {
2607                 uint32_t vlan_id;
2608                 uint8_t vni[4];
2609         } id = { .vlan_id = 0, };
2610
2611         if (!priv->config.l3_vxlan_en)
2612                 return rte_flow_error_set(error, ENOTSUP,
2613                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2614                                           "L3 VXLAN is not enabled by device"
2615                                           " parameter and/or not configured in"
2616                                           " firmware");
2617         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2618                 return rte_flow_error_set(error, ENOTSUP,
2619                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2620                                           "multiple tunnel layers not"
2621                                           " supported");
2622         /*
2623          * Verify only UDPv4 is present as defined in
2624          * https://tools.ietf.org/html/rfc7348
2625          */
2626         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2627                 return rte_flow_error_set(error, EINVAL,
2628                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2629                                           "no outer UDP layer found");
2630         if (!mask)
2631                 mask = &rte_flow_item_vxlan_gpe_mask;
2632         ret = mlx5_flow_item_acceptable
2633                 (item, (const uint8_t *)mask,
2634                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2635                  sizeof(struct rte_flow_item_vxlan_gpe),
2636                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2637         if (ret < 0)
2638                 return ret;
2639         if (spec) {
2640                 if (spec->protocol)
2641                         return rte_flow_error_set(error, ENOTSUP,
2642                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2643                                                   item,
2644                                                   "VxLAN-GPE protocol"
2645                                                   " not supported");
2646                 memcpy(&id.vni[1], spec->vni, 3);
2647                 memcpy(&id.vni[1], mask->vni, 3);
2648         }
2649         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2650                 return rte_flow_error_set(error, ENOTSUP,
2651                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2652                                           "VXLAN-GPE tunnel must be fully"
2653                                           " defined");
2654         return 0;
2655 }
2656 /**
2657  * Validate GRE Key item.
2658  *
2659  * @param[in] item
2660  *   Item specification.
2661  * @param[in] item_flags
2662  *   Bit flags to mark detected items.
2663  * @param[in] gre_item
2664  *   Pointer to gre_item
2665  * @param[out] error
2666  *   Pointer to error structure.
2667  *
2668  * @return
2669  *   0 on success, a negative errno value otherwise and rte_errno is set.
2670  */
2671 int
2672 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2673                                 uint64_t item_flags,
2674                                 const struct rte_flow_item *gre_item,
2675                                 struct rte_flow_error *error)
2676 {
2677         const rte_be32_t *mask = item->mask;
2678         int ret = 0;
2679         rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2680         const struct rte_flow_item_gre *gre_spec;
2681         const struct rte_flow_item_gre *gre_mask;
2682
2683         if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2684                 return rte_flow_error_set(error, ENOTSUP,
2685                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2686                                           "Multiple GRE key not support");
2687         if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2688                 return rte_flow_error_set(error, ENOTSUP,
2689                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2690                                           "No preceding GRE header");
2691         if (item_flags & MLX5_FLOW_LAYER_INNER)
2692                 return rte_flow_error_set(error, ENOTSUP,
2693                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2694                                           "GRE key following a wrong item");
2695         gre_mask = gre_item->mask;
2696         if (!gre_mask)
2697                 gre_mask = &rte_flow_item_gre_mask;
2698         gre_spec = gre_item->spec;
2699         if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2700                          !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2701                 return rte_flow_error_set(error, EINVAL,
2702                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2703                                           "Key bit must be on");
2704
2705         if (!mask)
2706                 mask = &gre_key_default_mask;
2707         ret = mlx5_flow_item_acceptable
2708                 (item, (const uint8_t *)mask,
2709                  (const uint8_t *)&gre_key_default_mask,
2710                  sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2711         return ret;
2712 }
2713
2714 /**
2715  * Validate GRE item.
2716  *
2717  * @param[in] item
2718  *   Item specification.
2719  * @param[in] item_flags
2720  *   Bit flags to mark detected items.
2721  * @param[in] target_protocol
2722  *   The next protocol in the previous item.
2723  * @param[out] error
2724  *   Pointer to error structure.
2725  *
2726  * @return
2727  *   0 on success, a negative errno value otherwise and rte_errno is set.
2728  */
2729 int
2730 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
2731                             uint64_t item_flags,
2732                             uint8_t target_protocol,
2733                             struct rte_flow_error *error)
2734 {
2735         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
2736         const struct rte_flow_item_gre *mask = item->mask;
2737         int ret;
2738         const struct rte_flow_item_gre nic_mask = {
2739                 .c_rsvd0_ver = RTE_BE16(0xB000),
2740                 .protocol = RTE_BE16(UINT16_MAX),
2741         };
2742
2743         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2744                 return rte_flow_error_set(error, EINVAL,
2745                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2746                                           "protocol filtering not compatible"
2747                                           " with this GRE layer");
2748         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2749                 return rte_flow_error_set(error, ENOTSUP,
2750                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2751                                           "multiple tunnel layers not"
2752                                           " supported");
2753         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2754                 return rte_flow_error_set(error, ENOTSUP,
2755                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2756                                           "L3 Layer is missing");
2757         if (!mask)
2758                 mask = &rte_flow_item_gre_mask;
2759         ret = mlx5_flow_item_acceptable
2760                 (item, (const uint8_t *)mask,
2761                  (const uint8_t *)&nic_mask,
2762                  sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2763                  error);
2764         if (ret < 0)
2765                 return ret;
2766 #ifndef HAVE_MLX5DV_DR
2767 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2768         if (spec && (spec->protocol & mask->protocol))
2769                 return rte_flow_error_set(error, ENOTSUP,
2770                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2771                                           "without MPLS support the"
2772                                           " specification cannot be used for"
2773                                           " filtering");
2774 #endif
2775 #endif
2776         return 0;
2777 }
2778
2779 /**
2780  * Validate Geneve item.
2781  *
2782  * @param[in] item
2783  *   Item specification.
2784  * @param[in] itemFlags
2785  *   Bit-fields that holds the items detected until now.
2786  * @param[in] enPriv
2787  *   Pointer to the private data structure.
2788  * @param[out] error
2789  *   Pointer to error structure.
2790  *
2791  * @return
2792  *   0 on success, a negative errno value otherwise and rte_errno is set.
2793  */
2794
2795 int
2796 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
2797                                uint64_t item_flags,
2798                                struct rte_eth_dev *dev,
2799                                struct rte_flow_error *error)
2800 {
2801         struct mlx5_priv *priv = dev->data->dev_private;
2802         const struct rte_flow_item_geneve *spec = item->spec;
2803         const struct rte_flow_item_geneve *mask = item->mask;
2804         int ret;
2805         uint16_t gbhdr;
2806         uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
2807                           MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2808         const struct rte_flow_item_geneve nic_mask = {
2809                 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2810                 .vni = "\xff\xff\xff",
2811                 .protocol = RTE_BE16(UINT16_MAX),
2812         };
2813
2814         if (!priv->config.hca_attr.tunnel_stateless_geneve_rx)
2815                 return rte_flow_error_set(error, ENOTSUP,
2816                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2817                                           "L3 Geneve is not enabled by device"
2818                                           " parameter and/or not configured in"
2819                                           " firmware");
2820         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2821                 return rte_flow_error_set(error, ENOTSUP,
2822                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2823                                           "multiple tunnel layers not"
2824                                           " supported");
2825         /*
2826          * Verify only UDPv4 is present as defined in
2827          * https://tools.ietf.org/html/rfc7348
2828          */
2829         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2830                 return rte_flow_error_set(error, EINVAL,
2831                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2832                                           "no outer UDP layer found");
2833         if (!mask)
2834                 mask = &rte_flow_item_geneve_mask;
2835         ret = mlx5_flow_item_acceptable
2836                                   (item, (const uint8_t *)mask,
2837                                    (const uint8_t *)&nic_mask,
2838                                    sizeof(struct rte_flow_item_geneve),
2839                                    MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2840         if (ret)
2841                 return ret;
2842         if (spec) {
2843                 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2844                 if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2845                      MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2846                      MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2847                         return rte_flow_error_set(error, ENOTSUP,
2848                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2849                                                   item,
2850                                                   "Geneve protocol unsupported"
2851                                                   " fields are being used");
2852                 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2853                         return rte_flow_error_set
2854                                         (error, ENOTSUP,
2855                                          RTE_FLOW_ERROR_TYPE_ITEM,
2856                                          item,
2857                                          "Unsupported Geneve options length");
2858         }
2859         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2860                 return rte_flow_error_set
2861                                     (error, ENOTSUP,
2862                                      RTE_FLOW_ERROR_TYPE_ITEM, item,
2863                                      "Geneve tunnel must be fully defined");
2864         return 0;
2865 }
2866
2867 /**
2868  * Validate Geneve TLV option item.
2869  *
2870  * @param[in] item
2871  *   Item specification.
2872  * @param[in] last_item
2873  *   Previous validated item in the pattern items.
2874  * @param[in] geneve_item
2875  *   Previous GENEVE item specification.
2876  * @param[in] dev
2877  *   Pointer to the rte_eth_dev structure.
2878  * @param[out] error
2879  *   Pointer to error structure.
2880  *
2881  * @return
2882  *   0 on success, a negative errno value otherwise and rte_errno is set.
2883  */
2884 int
2885 mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item,
2886                                    uint64_t last_item,
2887                                    const struct rte_flow_item *geneve_item,
2888                                    struct rte_eth_dev *dev,
2889                                    struct rte_flow_error *error)
2890 {
2891         struct mlx5_priv *priv = dev->data->dev_private;
2892         struct mlx5_dev_ctx_shared *sh = priv->sh;
2893         struct mlx5_geneve_tlv_option_resource *geneve_opt_resource;
2894         struct mlx5_hca_attr *hca_attr = &priv->config.hca_attr;
2895         uint8_t data_max_supported =
2896                         hca_attr->max_geneve_tlv_option_data_len * 4;
2897         struct mlx5_dev_config *config = &priv->config;
2898         const struct rte_flow_item_geneve *geneve_spec;
2899         const struct rte_flow_item_geneve *geneve_mask;
2900         const struct rte_flow_item_geneve_opt *spec = item->spec;
2901         const struct rte_flow_item_geneve_opt *mask = item->mask;
2902         unsigned int i;
2903         unsigned int data_len;
2904         uint8_t tlv_option_len;
2905         uint16_t optlen_m, optlen_v;
2906         const struct rte_flow_item_geneve_opt full_mask = {
2907                 .option_class = RTE_BE16(0xffff),
2908                 .option_type = 0xff,
2909                 .option_len = 0x1f,
2910         };
2911
2912         if (!mask)
2913                 mask = &rte_flow_item_geneve_opt_mask;
2914         if (!spec)
2915                 return rte_flow_error_set
2916                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2917                         "Geneve TLV opt class/type/length must be specified");
2918         if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK)
2919                 return rte_flow_error_set
2920                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2921                         "Geneve TLV opt length exceeeds the limit (31)");
2922         /* Check if class type and length masks are full. */
2923         if (full_mask.option_class != mask->option_class ||
2924             full_mask.option_type != mask->option_type ||
2925             full_mask.option_len != (mask->option_len & full_mask.option_len))
2926                 return rte_flow_error_set
2927                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2928                         "Geneve TLV opt class/type/length masks must be full");
2929         /* Check if length is supported */
2930         if ((uint32_t)spec->option_len >
2931                         config->hca_attr.max_geneve_tlv_option_data_len)
2932                 return rte_flow_error_set
2933                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2934                         "Geneve TLV opt length not supported");
2935         if (config->hca_attr.max_geneve_tlv_options > 1)
2936                 DRV_LOG(DEBUG,
2937                         "max_geneve_tlv_options supports more than 1 option");
2938         /* Check GENEVE item preceding. */
2939         if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE))
2940                 return rte_flow_error_set
2941                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2942                         "Geneve opt item must be preceded with Geneve item");
2943         geneve_spec = geneve_item->spec;
2944         geneve_mask = geneve_item->mask ? geneve_item->mask :
2945                                           &rte_flow_item_geneve_mask;
2946         /* Check if GENEVE TLV option size doesn't exceed option length */
2947         if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 ||
2948                             geneve_spec->ver_opt_len_o_c_rsvd0)) {
2949                 tlv_option_len = spec->option_len & mask->option_len;
2950                 optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0);
2951                 optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v);
2952                 optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0);
2953                 optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m);
2954                 if ((optlen_v & optlen_m) <= tlv_option_len)
2955                         return rte_flow_error_set
2956                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2957                                  "GENEVE TLV option length exceeds optlen");
2958         }
2959         /* Check if length is 0 or data is 0. */
2960         if (spec->data == NULL || spec->option_len == 0)
2961                 return rte_flow_error_set
2962                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2963                         "Geneve TLV opt with zero data/length not supported");
2964         /* Check not all data & mask are 0. */
2965         data_len = spec->option_len * 4;
2966         if (mask->data == NULL) {
2967                 for (i = 0; i < data_len; i++)
2968                         if (spec->data[i])
2969                                 break;
2970                 if (i == data_len)
2971                         return rte_flow_error_set(error, ENOTSUP,
2972                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
2973                                 "Can't match on Geneve option data 0");
2974         } else {
2975                 for (i = 0; i < data_len; i++)
2976                         if (spec->data[i] & mask->data[i])
2977                                 break;
2978                 if (i == data_len)
2979                         return rte_flow_error_set(error, ENOTSUP,
2980                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
2981                                 "Can't match on Geneve option data and mask 0");
2982                 /* Check data mask supported. */
2983                 for (i = data_max_supported; i < data_len ; i++)
2984                         if (mask->data[i])
2985                                 return rte_flow_error_set(error, ENOTSUP,
2986                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
2987                                         "Data mask is of unsupported size");
2988         }
2989         /* Check GENEVE option is supported in NIC. */
2990         if (!config->hca_attr.geneve_tlv_opt)
2991                 return rte_flow_error_set
2992                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2993                         "Geneve TLV opt not supported");
2994         /* Check if we already have geneve option with different type/class. */
2995         rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
2996         geneve_opt_resource = sh->geneve_tlv_option_resource;
2997         if (geneve_opt_resource != NULL)
2998                 if (geneve_opt_resource->option_class != spec->option_class ||
2999                     geneve_opt_resource->option_type != spec->option_type ||
3000                     geneve_opt_resource->length != spec->option_len) {
3001                         rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3002                         return rte_flow_error_set(error, ENOTSUP,
3003                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
3004                                 "Only one Geneve TLV option supported");
3005                 }
3006         rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3007         return 0;
3008 }
3009
3010 /**
3011  * Validate MPLS item.
3012  *
3013  * @param[in] dev
3014  *   Pointer to the rte_eth_dev structure.
3015  * @param[in] item
3016  *   Item specification.
3017  * @param[in] item_flags
3018  *   Bit-fields that holds the items detected until now.
3019  * @param[in] prev_layer
3020  *   The protocol layer indicated in previous item.
3021  * @param[out] error
3022  *   Pointer to error structure.
3023  *
3024  * @return
3025  *   0 on success, a negative errno value otherwise and rte_errno is set.
3026  */
3027 int
3028 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
3029                              const struct rte_flow_item *item __rte_unused,
3030                              uint64_t item_flags __rte_unused,
3031                              uint64_t prev_layer __rte_unused,
3032                              struct rte_flow_error *error)
3033 {
3034 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
3035         const struct rte_flow_item_mpls *mask = item->mask;
3036         struct mlx5_priv *priv = dev->data->dev_private;
3037         int ret;
3038
3039         if (!priv->config.mpls_en)
3040                 return rte_flow_error_set(error, ENOTSUP,
3041                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3042                                           "MPLS not supported or"
3043                                           " disabled in firmware"
3044                                           " configuration.");
3045         /* MPLS over UDP, GRE is allowed */
3046         if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L4_UDP |
3047                             MLX5_FLOW_LAYER_GRE |
3048                             MLX5_FLOW_LAYER_GRE_KEY)))
3049                 return rte_flow_error_set(error, EINVAL,
3050                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3051                                           "protocol filtering not compatible"
3052                                           " with MPLS layer");
3053         /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
3054         if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
3055             !(item_flags & MLX5_FLOW_LAYER_GRE))
3056                 return rte_flow_error_set(error, ENOTSUP,
3057                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3058                                           "multiple tunnel layers not"
3059                                           " supported");
3060         if (!mask)
3061                 mask = &rte_flow_item_mpls_mask;
3062         ret = mlx5_flow_item_acceptable
3063                 (item, (const uint8_t *)mask,
3064                  (const uint8_t *)&rte_flow_item_mpls_mask,
3065                  sizeof(struct rte_flow_item_mpls),
3066                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3067         if (ret < 0)
3068                 return ret;
3069         return 0;
3070 #else
3071         return rte_flow_error_set(error, ENOTSUP,
3072                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
3073                                   "MPLS is not supported by Verbs, please"
3074                                   " update.");
3075 #endif
3076 }
3077
3078 /**
3079  * Validate NVGRE item.
3080  *
3081  * @param[in] item
3082  *   Item specification.
3083  * @param[in] item_flags
3084  *   Bit flags to mark detected items.
3085  * @param[in] target_protocol
3086  *   The next protocol in the previous item.
3087  * @param[out] error
3088  *   Pointer to error structure.
3089  *
3090  * @return
3091  *   0 on success, a negative errno value otherwise and rte_errno is set.
3092  */
3093 int
3094 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
3095                               uint64_t item_flags,
3096                               uint8_t target_protocol,
3097                               struct rte_flow_error *error)
3098 {
3099         const struct rte_flow_item_nvgre *mask = item->mask;
3100         int ret;
3101
3102         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3103                 return rte_flow_error_set(error, EINVAL,
3104                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3105                                           "protocol filtering not compatible"
3106                                           " with this GRE layer");
3107         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3108                 return rte_flow_error_set(error, ENOTSUP,
3109                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3110                                           "multiple tunnel layers not"
3111                                           " supported");
3112         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3113                 return rte_flow_error_set(error, ENOTSUP,
3114                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3115                                           "L3 Layer is missing");
3116         if (!mask)
3117                 mask = &rte_flow_item_nvgre_mask;
3118         ret = mlx5_flow_item_acceptable
3119                 (item, (const uint8_t *)mask,
3120                  (const uint8_t *)&rte_flow_item_nvgre_mask,
3121                  sizeof(struct rte_flow_item_nvgre),
3122                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3123         if (ret < 0)
3124                 return ret;
3125         return 0;
3126 }
3127
3128 /**
3129  * Validate eCPRI item.
3130  *
3131  * @param[in] item
3132  *   Item specification.
3133  * @param[in] item_flags
3134  *   Bit-fields that holds the items detected until now.
3135  * @param[in] last_item
3136  *   Previous validated item in the pattern items.
3137  * @param[in] ether_type
3138  *   Type in the ethernet layer header (including dot1q).
3139  * @param[in] acc_mask
3140  *   Acceptable mask, if NULL default internal default mask
3141  *   will be used to check whether item fields are supported.
3142  * @param[out] error
3143  *   Pointer to error structure.
3144  *
3145  * @return
3146  *   0 on success, a negative errno value otherwise and rte_errno is set.
3147  */
3148 int
3149 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
3150                               uint64_t item_flags,
3151                               uint64_t last_item,
3152                               uint16_t ether_type,
3153                               const struct rte_flow_item_ecpri *acc_mask,
3154                               struct rte_flow_error *error)
3155 {
3156         const struct rte_flow_item_ecpri *mask = item->mask;
3157         const struct rte_flow_item_ecpri nic_mask = {
3158                 .hdr = {
3159                         .common = {
3160                                 .u32 =
3161                                 RTE_BE32(((const struct rte_ecpri_common_hdr) {
3162                                         .type = 0xFF,
3163                                         }).u32),
3164                         },
3165                         .dummy[0] = 0xFFFFFFFF,
3166                 },
3167         };
3168         const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
3169                                         MLX5_FLOW_LAYER_OUTER_VLAN);
3170         struct rte_flow_item_ecpri mask_lo;
3171
3172         if (!(last_item & outer_l2_vlan) &&
3173             last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP)
3174                 return rte_flow_error_set(error, EINVAL,
3175                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3176                                           "eCPRI can only follow L2/VLAN layer or UDP layer");
3177         if ((last_item & outer_l2_vlan) && ether_type &&
3178             ether_type != RTE_ETHER_TYPE_ECPRI)
3179                 return rte_flow_error_set(error, EINVAL,
3180                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3181                                           "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE");
3182         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3183                 return rte_flow_error_set(error, EINVAL,
3184                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3185                                           "eCPRI with tunnel is not supported right now");
3186         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
3187                 return rte_flow_error_set(error, ENOTSUP,
3188                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3189                                           "multiple L3 layers not supported");
3190         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
3191                 return rte_flow_error_set(error, EINVAL,
3192                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3193                                           "eCPRI cannot coexist with a TCP layer");
3194         /* In specification, eCPRI could be over UDP layer. */
3195         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
3196                 return rte_flow_error_set(error, EINVAL,
3197                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3198                                           "eCPRI over UDP layer is not yet supported right now");
3199         /* Mask for type field in common header could be zero. */
3200         if (!mask)
3201                 mask = &rte_flow_item_ecpri_mask;
3202         mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
3203         /* Input mask is in big-endian format. */
3204         if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
3205                 return rte_flow_error_set(error, EINVAL,
3206                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3207                                           "partial mask is not supported for protocol");
3208         else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
3209                 return rte_flow_error_set(error, EINVAL,
3210                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3211                                           "message header mask must be after a type mask");
3212         return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
3213                                          acc_mask ? (const uint8_t *)acc_mask
3214                                                   : (const uint8_t *)&nic_mask,
3215                                          sizeof(struct rte_flow_item_ecpri),
3216                                          MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3217 }
3218
3219 static int
3220 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
3221                    const struct rte_flow_attr *attr __rte_unused,
3222                    const struct rte_flow_item items[] __rte_unused,
3223                    const struct rte_flow_action actions[] __rte_unused,
3224                    bool external __rte_unused,
3225                    int hairpin __rte_unused,
3226                    struct rte_flow_error *error)
3227 {
3228         return rte_flow_error_set(error, ENOTSUP,
3229                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3230 }
3231
3232 static struct mlx5_flow *
3233 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
3234                   const struct rte_flow_attr *attr __rte_unused,
3235                   const struct rte_flow_item items[] __rte_unused,
3236                   const struct rte_flow_action actions[] __rte_unused,
3237                   struct rte_flow_error *error)
3238 {
3239         rte_flow_error_set(error, ENOTSUP,
3240                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3241         return NULL;
3242 }
3243
3244 static int
3245 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
3246                     struct mlx5_flow *dev_flow __rte_unused,
3247                     const struct rte_flow_attr *attr __rte_unused,
3248                     const struct rte_flow_item items[] __rte_unused,
3249                     const struct rte_flow_action actions[] __rte_unused,
3250                     struct rte_flow_error *error)
3251 {
3252         return rte_flow_error_set(error, ENOTSUP,
3253                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3254 }
3255
3256 static int
3257 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
3258                 struct rte_flow *flow __rte_unused,
3259                 struct rte_flow_error *error)
3260 {
3261         return rte_flow_error_set(error, ENOTSUP,
3262                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3263 }
3264
3265 static void
3266 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
3267                  struct rte_flow *flow __rte_unused)
3268 {
3269 }
3270
3271 static void
3272 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
3273                   struct rte_flow *flow __rte_unused)
3274 {
3275 }
3276
3277 static int
3278 flow_null_query(struct rte_eth_dev *dev __rte_unused,
3279                 struct rte_flow *flow __rte_unused,
3280                 const struct rte_flow_action *actions __rte_unused,
3281                 void *data __rte_unused,
3282                 struct rte_flow_error *error)
3283 {
3284         return rte_flow_error_set(error, ENOTSUP,
3285                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3286 }
3287
3288 static int
3289 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused,
3290                       uint32_t domains __rte_unused,
3291                       uint32_t flags __rte_unused)
3292 {
3293         return 0;
3294 }
3295
3296 /* Void driver to protect from null pointer reference. */
3297 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
3298         .validate = flow_null_validate,
3299         .prepare = flow_null_prepare,
3300         .translate = flow_null_translate,
3301         .apply = flow_null_apply,
3302         .remove = flow_null_remove,
3303         .destroy = flow_null_destroy,
3304         .query = flow_null_query,
3305         .sync_domain = flow_null_sync_domain,
3306 };
3307
3308 /**
3309  * Select flow driver type according to flow attributes and device
3310  * configuration.
3311  *
3312  * @param[in] dev
3313  *   Pointer to the dev structure.
3314  * @param[in] attr
3315  *   Pointer to the flow attributes.
3316  *
3317  * @return
3318  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
3319  */
3320 static enum mlx5_flow_drv_type
3321 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
3322 {
3323         struct mlx5_priv *priv = dev->data->dev_private;
3324         /* The OS can determine first a specific flow type (DV, VERBS) */
3325         enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
3326
3327         if (type != MLX5_FLOW_TYPE_MAX)
3328                 return type;
3329         /* If no OS specific type - continue with DV/VERBS selection */
3330         if (attr->transfer && priv->config.dv_esw_en)
3331                 type = MLX5_FLOW_TYPE_DV;
3332         if (!attr->transfer)
3333                 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
3334                                                  MLX5_FLOW_TYPE_VERBS;
3335         return type;
3336 }
3337
3338 #define flow_get_drv_ops(type) flow_drv_ops[type]
3339
3340 /**
3341  * Flow driver validation API. This abstracts calling driver specific functions.
3342  * The type of flow driver is determined according to flow attributes.
3343  *
3344  * @param[in] dev
3345  *   Pointer to the dev structure.
3346  * @param[in] attr
3347  *   Pointer to the flow attributes.
3348  * @param[in] items
3349  *   Pointer to the list of items.
3350  * @param[in] actions
3351  *   Pointer to the list of actions.
3352  * @param[in] external
3353  *   This flow rule is created by request external to PMD.
3354  * @param[in] hairpin
3355  *   Number of hairpin TX actions, 0 means classic flow.
3356  * @param[out] error
3357  *   Pointer to the error structure.
3358  *
3359  * @return
3360  *   0 on success, a negative errno value otherwise and rte_errno is set.
3361  */
3362 static inline int
3363 flow_drv_validate(struct rte_eth_dev *dev,
3364                   const struct rte_flow_attr *attr,
3365                   const struct rte_flow_item items[],
3366                   const struct rte_flow_action actions[],
3367                   bool external, int hairpin, struct rte_flow_error *error)
3368 {
3369         const struct mlx5_flow_driver_ops *fops;
3370         enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
3371
3372         fops = flow_get_drv_ops(type);
3373         return fops->validate(dev, attr, items, actions, external,
3374                               hairpin, error);
3375 }
3376
3377 /**
3378  * Flow driver preparation API. This abstracts calling driver specific
3379  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3380  * calculates the size of memory required for device flow, allocates the memory,
3381  * initializes the device flow and returns the pointer.
3382  *
3383  * @note
3384  *   This function initializes device flow structure such as dv or verbs in
3385  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
3386  *   rest. For example, adding returning device flow to flow->dev_flow list and
3387  *   setting backward reference to the flow should be done out of this function.
3388  *   layers field is not filled either.
3389  *
3390  * @param[in] dev
3391  *   Pointer to the dev structure.
3392  * @param[in] attr
3393  *   Pointer to the flow attributes.
3394  * @param[in] items
3395  *   Pointer to the list of items.
3396  * @param[in] actions
3397  *   Pointer to the list of actions.
3398  * @param[in] flow_idx
3399  *   This memory pool index to the flow.
3400  * @param[out] error
3401  *   Pointer to the error structure.
3402  *
3403  * @return
3404  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
3405  */
3406 static inline struct mlx5_flow *
3407 flow_drv_prepare(struct rte_eth_dev *dev,
3408                  const struct rte_flow *flow,
3409                  const struct rte_flow_attr *attr,
3410                  const struct rte_flow_item items[],
3411                  const struct rte_flow_action actions[],
3412                  uint32_t flow_idx,
3413                  struct rte_flow_error *error)
3414 {
3415         const struct mlx5_flow_driver_ops *fops;
3416         enum mlx5_flow_drv_type type = flow->drv_type;
3417         struct mlx5_flow *mlx5_flow = NULL;
3418
3419         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3420         fops = flow_get_drv_ops(type);
3421         mlx5_flow = fops->prepare(dev, attr, items, actions, error);
3422         if (mlx5_flow)
3423                 mlx5_flow->flow_idx = flow_idx;
3424         return mlx5_flow;
3425 }
3426
3427 /**
3428  * Flow driver translation API. This abstracts calling driver specific
3429  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3430  * translates a generic flow into a driver flow. flow_drv_prepare() must
3431  * precede.
3432  *
3433  * @note
3434  *   dev_flow->layers could be filled as a result of parsing during translation
3435  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
3436  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
3437  *   flow->actions could be overwritten even though all the expanded dev_flows
3438  *   have the same actions.
3439  *
3440  * @param[in] dev
3441  *   Pointer to the rte dev structure.
3442  * @param[in, out] dev_flow
3443  *   Pointer to the mlx5 flow.
3444  * @param[in] attr
3445  *   Pointer to the flow attributes.
3446  * @param[in] items
3447  *   Pointer to the list of items.
3448  * @param[in] actions
3449  *   Pointer to the list of actions.
3450  * @param[out] error
3451  *   Pointer to the error structure.
3452  *
3453  * @return
3454  *   0 on success, a negative errno value otherwise and rte_errno is set.
3455  */
3456 static inline int
3457 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
3458                    const struct rte_flow_attr *attr,
3459                    const struct rte_flow_item items[],
3460                    const struct rte_flow_action actions[],
3461                    struct rte_flow_error *error)
3462 {
3463         const struct mlx5_flow_driver_ops *fops;
3464         enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
3465
3466         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3467         fops = flow_get_drv_ops(type);
3468         return fops->translate(dev, dev_flow, attr, items, actions, error);
3469 }
3470
3471 /**
3472  * Flow driver apply API. This abstracts calling driver specific functions.
3473  * Parent flow (rte_flow) should have driver type (drv_type). It applies
3474  * translated driver flows on to device. flow_drv_translate() must precede.
3475  *
3476  * @param[in] dev
3477  *   Pointer to Ethernet device structure.
3478  * @param[in, out] flow
3479  *   Pointer to flow structure.
3480  * @param[out] error
3481  *   Pointer to error structure.
3482  *
3483  * @return
3484  *   0 on success, a negative errno value otherwise and rte_errno is set.
3485  */
3486 static inline int
3487 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3488                struct rte_flow_error *error)
3489 {
3490         const struct mlx5_flow_driver_ops *fops;
3491         enum mlx5_flow_drv_type type = flow->drv_type;
3492
3493         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3494         fops = flow_get_drv_ops(type);
3495         return fops->apply(dev, flow, error);
3496 }
3497
3498 /**
3499  * Flow driver destroy API. This abstracts calling driver specific functions.
3500  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3501  * on device and releases resources of the flow.
3502  *
3503  * @param[in] dev
3504  *   Pointer to Ethernet device.
3505  * @param[in, out] flow
3506  *   Pointer to flow structure.
3507  */
3508 static inline void
3509 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3510 {
3511         const struct mlx5_flow_driver_ops *fops;
3512         enum mlx5_flow_drv_type type = flow->drv_type;
3513
3514         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3515         fops = flow_get_drv_ops(type);
3516         fops->destroy(dev, flow);
3517 }
3518
3519 /**
3520  * Flow driver find RSS policy tbl API. This abstracts calling driver
3521  * specific functions. Parent flow (rte_flow) should have driver
3522  * type (drv_type). It will find the RSS policy table that has the rss_desc.
3523  *
3524  * @param[in] dev
3525  *   Pointer to Ethernet device.
3526  * @param[in, out] flow
3527  *   Pointer to flow structure.
3528  * @param[in] policy
3529  *   Pointer to meter policy table.
3530  * @param[in] rss_desc
3531  *   Pointer to rss_desc
3532  */
3533 static struct mlx5_flow_meter_sub_policy *
3534 flow_drv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
3535                 struct rte_flow *flow,
3536                 struct mlx5_flow_meter_policy *policy,
3537                 struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS])
3538 {
3539         const struct mlx5_flow_driver_ops *fops;
3540         enum mlx5_flow_drv_type type = flow->drv_type;
3541
3542         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3543         fops = flow_get_drv_ops(type);
3544         return fops->meter_sub_policy_rss_prepare(dev, policy, rss_desc);
3545 }
3546
3547 /**
3548  * Flow driver color tag rule API. This abstracts calling driver
3549  * specific functions. Parent flow (rte_flow) should have driver
3550  * type (drv_type). It will create the color tag rules in hierarchy meter.
3551  *
3552  * @param[in] dev
3553  *   Pointer to Ethernet device.
3554  * @param[in, out] flow
3555  *   Pointer to flow structure.
3556  * @param[in] fm
3557  *   Pointer to flow meter structure.
3558  * @param[in] src_port
3559  *   The src port this extra rule should use.
3560  * @param[in] item
3561  *   The src port id match item.
3562  * @param[out] error
3563  *   Pointer to error structure.
3564  */
3565 static int
3566 flow_drv_mtr_hierarchy_rule_create(struct rte_eth_dev *dev,
3567                 struct rte_flow *flow,
3568                 struct mlx5_flow_meter_info *fm,
3569                 int32_t src_port,
3570                 const struct rte_flow_item *item,
3571                 struct rte_flow_error *error)
3572 {
3573         const struct mlx5_flow_driver_ops *fops;
3574         enum mlx5_flow_drv_type type = flow->drv_type;
3575
3576         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3577         fops = flow_get_drv_ops(type);
3578         return fops->meter_hierarchy_rule_create(dev, fm,
3579                                                 src_port, item, error);
3580 }
3581
3582 /**
3583  * Get RSS action from the action list.
3584  *
3585  * @param[in] dev
3586  *   Pointer to Ethernet device.
3587  * @param[in] actions
3588  *   Pointer to the list of actions.
3589  * @param[in] flow
3590  *   Parent flow structure pointer.
3591  *
3592  * @return
3593  *   Pointer to the RSS action if exist, else return NULL.
3594  */
3595 static const struct rte_flow_action_rss*
3596 flow_get_rss_action(struct rte_eth_dev *dev,
3597                     const struct rte_flow_action actions[])
3598 {
3599         struct mlx5_priv *priv = dev->data->dev_private;
3600         const struct rte_flow_action_rss *rss = NULL;
3601         struct mlx5_meter_policy_action_container *acg;
3602         struct mlx5_meter_policy_action_container *acy;
3603
3604         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3605                 switch (actions->type) {
3606                 case RTE_FLOW_ACTION_TYPE_RSS:
3607                         rss = actions->conf;
3608                         break;
3609                 case RTE_FLOW_ACTION_TYPE_SAMPLE:
3610                 {
3611                         const struct rte_flow_action_sample *sample =
3612                                                                 actions->conf;
3613                         const struct rte_flow_action *act = sample->actions;
3614                         for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
3615                                 if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
3616                                         rss = act->conf;
3617                         break;
3618                 }
3619                 case RTE_FLOW_ACTION_TYPE_METER:
3620                 {
3621                         uint32_t mtr_idx;
3622                         struct mlx5_flow_meter_info *fm;
3623                         struct mlx5_flow_meter_policy *policy;
3624                         const struct rte_flow_action_meter *mtr = actions->conf;
3625
3626                         fm = mlx5_flow_meter_find(priv, mtr->mtr_id, &mtr_idx);
3627                         if (fm && !fm->def_policy) {
3628                                 policy = mlx5_flow_meter_policy_find(dev,
3629                                                 fm->policy_id, NULL);
3630                                 MLX5_ASSERT(policy);
3631                                 if (policy->is_hierarchy) {
3632                                         policy =
3633                                 mlx5_flow_meter_hierarchy_get_final_policy(dev,
3634                                                                         policy);
3635                                         if (!policy)
3636                                                 return NULL;
3637                                 }
3638                                 if (policy->is_rss) {
3639                                         acg =
3640                                         &policy->act_cnt[RTE_COLOR_GREEN];
3641                                         acy =
3642                                         &policy->act_cnt[RTE_COLOR_YELLOW];
3643                                         if (acg->fate_action ==
3644                                             MLX5_FLOW_FATE_SHARED_RSS)
3645                                                 rss = acg->rss->conf;
3646                                         else if (acy->fate_action ==
3647                                                  MLX5_FLOW_FATE_SHARED_RSS)
3648                                                 rss = acy->rss->conf;
3649                                 }
3650                         }
3651                         break;
3652                 }
3653                 default:
3654                         break;
3655                 }
3656         }
3657         return rss;
3658 }
3659
3660 /**
3661  * Get ASO age action by index.
3662  *
3663  * @param[in] dev
3664  *   Pointer to the Ethernet device structure.
3665  * @param[in] age_idx
3666  *   Index to the ASO age action.
3667  *
3668  * @return
3669  *   The specified ASO age action.
3670  */
3671 struct mlx5_aso_age_action*
3672 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx)
3673 {
3674         uint16_t pool_idx = age_idx & UINT16_MAX;
3675         uint16_t offset = (age_idx >> 16) & UINT16_MAX;
3676         struct mlx5_priv *priv = dev->data->dev_private;
3677         struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
3678         struct mlx5_aso_age_pool *pool;
3679
3680         rte_rwlock_read_lock(&mng->resize_rwl);
3681         pool = mng->pools[pool_idx];
3682         rte_rwlock_read_unlock(&mng->resize_rwl);
3683         return &pool->actions[offset - 1];
3684 }
3685
3686 /* maps indirect action to translated direct in some actions array */
3687 struct mlx5_translated_action_handle {
3688         struct rte_flow_action_handle *action; /**< Indirect action handle. */
3689         int index; /**< Index in related array of rte_flow_action. */
3690 };
3691
3692 /**
3693  * Translates actions of type RTE_FLOW_ACTION_TYPE_INDIRECT to related
3694  * direct action if translation possible.
3695  * This functionality used to run same execution path for both direct and
3696  * indirect actions on flow create. All necessary preparations for indirect
3697  * action handling should be performed on *handle* actions list returned
3698  * from this call.
3699  *
3700  * @param[in] dev
3701  *   Pointer to Ethernet device.
3702  * @param[in] actions
3703  *   List of actions to translate.
3704  * @param[out] handle
3705  *   List to store translated indirect action object handles.
3706  * @param[in, out] indir_n
3707  *   Size of *handle* array. On return should be updated with number of
3708  *   indirect actions retrieved from the *actions* list.
3709  * @param[out] translated_actions
3710  *   List of actions where all indirect actions were translated to direct
3711  *   if possible. NULL if no translation took place.
3712  * @param[out] error
3713  *   Pointer to the error structure.
3714  *
3715  * @return
3716  *   0 on success, a negative errno value otherwise and rte_errno is set.
3717  */
3718 static int
3719 flow_action_handles_translate(struct rte_eth_dev *dev,
3720                               const struct rte_flow_action actions[],
3721                               struct mlx5_translated_action_handle *handle,
3722                               int *indir_n,
3723                               struct rte_flow_action **translated_actions,
3724                               struct rte_flow_error *error)
3725 {
3726         struct mlx5_priv *priv = dev->data->dev_private;
3727         struct rte_flow_action *translated = NULL;
3728         size_t actions_size;
3729         int n;
3730         int copied_n = 0;
3731         struct mlx5_translated_action_handle *handle_end = NULL;
3732
3733         for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) {
3734                 if (actions[n].type != RTE_FLOW_ACTION_TYPE_INDIRECT)
3735                         continue;
3736                 if (copied_n == *indir_n) {
3737                         return rte_flow_error_set
3738                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM,
3739                                  NULL, "too many shared actions");
3740                 }
3741                 rte_memcpy(&handle[copied_n].action, &actions[n].conf,
3742                            sizeof(actions[n].conf));
3743                 handle[copied_n].index = n;
3744                 copied_n++;
3745         }
3746         n++;
3747         *indir_n = copied_n;
3748         if (!copied_n)
3749                 return 0;
3750         actions_size = sizeof(struct rte_flow_action) * n;
3751         translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY);
3752         if (!translated) {
3753                 rte_errno = ENOMEM;
3754                 return -ENOMEM;
3755         }
3756         memcpy(translated, actions, actions_size);
3757         for (handle_end = handle + copied_n; handle < handle_end; handle++) {
3758                 struct mlx5_shared_action_rss *shared_rss;
3759                 uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
3760                 uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
3761                 uint32_t idx = act_idx &
3762                                ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
3763
3764                 switch (type) {
3765                 case MLX5_INDIRECT_ACTION_TYPE_RSS:
3766                         shared_rss = mlx5_ipool_get
3767                           (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx);
3768                         translated[handle->index].type =
3769                                 RTE_FLOW_ACTION_TYPE_RSS;
3770                         translated[handle->index].conf =
3771                                 &shared_rss->origin;
3772                         break;
3773                 case MLX5_INDIRECT_ACTION_TYPE_COUNT:
3774                         translated[handle->index].type =
3775                                                 (enum rte_flow_action_type)
3776                                                 MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
3777                         translated[handle->index].conf = (void *)(uintptr_t)idx;
3778                         break;
3779                 case MLX5_INDIRECT_ACTION_TYPE_AGE:
3780                         if (priv->sh->flow_hit_aso_en) {
3781                                 translated[handle->index].type =
3782                                         (enum rte_flow_action_type)
3783                                         MLX5_RTE_FLOW_ACTION_TYPE_AGE;
3784                                 translated[handle->index].conf =
3785                                                          (void *)(uintptr_t)idx;
3786                                 break;
3787                         }
3788                         /* Fall-through */
3789                 case MLX5_INDIRECT_ACTION_TYPE_CT:
3790                         if (priv->sh->ct_aso_en) {
3791                                 translated[handle->index].type =
3792                                         RTE_FLOW_ACTION_TYPE_CONNTRACK;
3793                                 translated[handle->index].conf =
3794                                                          (void *)(uintptr_t)idx;
3795                                 break;
3796                         }
3797                         /* Fall-through */
3798                 default:
3799                         mlx5_free(translated);
3800                         return rte_flow_error_set
3801                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
3802                                  NULL, "invalid indirect action type");
3803                 }
3804         }
3805         *translated_actions = translated;
3806         return 0;
3807 }
3808
3809 /**
3810  * Get Shared RSS action from the action list.
3811  *
3812  * @param[in] dev
3813  *   Pointer to Ethernet device.
3814  * @param[in] shared
3815  *   Pointer to the list of actions.
3816  * @param[in] shared_n
3817  *   Actions list length.
3818  *
3819  * @return
3820  *   The MLX5 RSS action ID if exists, otherwise return 0.
3821  */
3822 static uint32_t
3823 flow_get_shared_rss_action(struct rte_eth_dev *dev,
3824                            struct mlx5_translated_action_handle *handle,
3825                            int shared_n)
3826 {
3827         struct mlx5_translated_action_handle *handle_end;
3828         struct mlx5_priv *priv = dev->data->dev_private;
3829         struct mlx5_shared_action_rss *shared_rss;
3830
3831
3832         for (handle_end = handle + shared_n; handle < handle_end; handle++) {
3833                 uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
3834                 uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
3835                 uint32_t idx = act_idx &
3836                                ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
3837                 switch (type) {
3838                 case MLX5_INDIRECT_ACTION_TYPE_RSS:
3839                         shared_rss = mlx5_ipool_get
3840                                 (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
3841                                                                            idx);
3842                         __atomic_add_fetch(&shared_rss->refcnt, 1,
3843                                            __ATOMIC_RELAXED);
3844                         return idx;
3845                 default:
3846                         break;
3847                 }
3848         }
3849         return 0;
3850 }
3851
3852 static unsigned int
3853 find_graph_root(uint32_t rss_level)
3854 {
3855         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
3856                                MLX5_EXPANSION_ROOT_OUTER;
3857 }
3858
3859 /**
3860  *  Get layer flags from the prefix flow.
3861  *
3862  *  Some flows may be split to several subflows, the prefix subflow gets the
3863  *  match items and the suffix sub flow gets the actions.
3864  *  Some actions need the user defined match item flags to get the detail for
3865  *  the action.
3866  *  This function helps the suffix flow to get the item layer flags from prefix
3867  *  subflow.
3868  *
3869  * @param[in] dev_flow
3870  *   Pointer the created preifx subflow.
3871  *
3872  * @return
3873  *   The layers get from prefix subflow.
3874  */
3875 static inline uint64_t
3876 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
3877 {
3878         uint64_t layers = 0;
3879
3880         /*
3881          * Layers bits could be localization, but usually the compiler will
3882          * help to do the optimization work for source code.
3883          * If no decap actions, use the layers directly.
3884          */
3885         if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
3886                 return dev_flow->handle->layers;
3887         /* Convert L3 layers with decap action. */
3888         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
3889                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
3890         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
3891                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
3892         /* Convert L4 layers with decap action.  */
3893         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
3894                 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
3895         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
3896                 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
3897         return layers;
3898 }
3899
3900 /**
3901  * Get metadata split action information.
3902  *
3903  * @param[in] actions
3904  *   Pointer to the list of actions.
3905  * @param[out] qrss
3906  *   Pointer to the return pointer.
3907  * @param[out] qrss_type
3908  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
3909  *   if no QUEUE/RSS is found.
3910  * @param[out] encap_idx
3911  *   Pointer to the index of the encap action if exists, otherwise the last
3912  *   action index.
3913  *
3914  * @return
3915  *   Total number of actions.
3916  */
3917 static int
3918 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
3919                                        const struct rte_flow_action **qrss,
3920                                        int *encap_idx)
3921 {
3922         const struct rte_flow_action_raw_encap *raw_encap;
3923         int actions_n = 0;
3924         int raw_decap_idx = -1;
3925
3926         *encap_idx = -1;
3927         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3928                 switch (actions->type) {
3929                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3930                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3931                         *encap_idx = actions_n;
3932                         break;
3933                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3934                         raw_decap_idx = actions_n;
3935                         break;
3936                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3937                         raw_encap = actions->conf;
3938                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
3939                                 *encap_idx = raw_decap_idx != -1 ?
3940                                                       raw_decap_idx : actions_n;
3941                         break;
3942                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3943                 case RTE_FLOW_ACTION_TYPE_RSS:
3944                         *qrss = actions;
3945                         break;
3946                 default:
3947                         break;
3948                 }
3949                 actions_n++;
3950         }
3951         if (*encap_idx == -1)
3952                 *encap_idx = actions_n;
3953         /* Count RTE_FLOW_ACTION_TYPE_END. */
3954         return actions_n + 1;
3955 }
3956
3957 /**
3958  * Check if the action will change packet.
3959  *
3960  * @param dev
3961  *   Pointer to Ethernet device.
3962  * @param[in] type
3963  *   action type.
3964  *
3965  * @return
3966  *   true if action will change packet, false otherwise.
3967  */
3968 static bool flow_check_modify_action_type(struct rte_eth_dev *dev,
3969                                           enum rte_flow_action_type type)
3970 {
3971         struct mlx5_priv *priv = dev->data->dev_private;
3972
3973         switch (type) {
3974         case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
3975         case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
3976         case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
3977         case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
3978         case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
3979         case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
3980         case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
3981         case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
3982         case RTE_FLOW_ACTION_TYPE_DEC_TTL:
3983         case RTE_FLOW_ACTION_TYPE_SET_TTL:
3984         case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
3985         case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
3986         case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
3987         case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
3988         case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
3989         case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
3990         case RTE_FLOW_ACTION_TYPE_SET_META:
3991         case RTE_FLOW_ACTION_TYPE_SET_TAG:
3992         case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
3993         case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3994         case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3995         case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
3996         case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3997         case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
3998         case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3999         case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4000         case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4001         case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4002         case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
4003                 return true;
4004         case RTE_FLOW_ACTION_TYPE_FLAG:
4005         case RTE_FLOW_ACTION_TYPE_MARK:
4006                 if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY)
4007                         return true;
4008                 else
4009                         return false;
4010         default:
4011                 return false;
4012         }
4013 }
4014
4015 /**
4016  * Check meter action from the action list.
4017  *
4018  * @param dev
4019  *   Pointer to Ethernet device.
4020  * @param[in] actions
4021  *   Pointer to the list of actions.
4022  * @param[out] has_mtr
4023  *   Pointer to the meter exist flag.
4024  * @param[out] has_modify
4025  *   Pointer to the flag showing there's packet change action.
4026  * @param[out] meter_id
4027  *   Pointer to the meter id.
4028  *
4029  * @return
4030  *   Total number of actions.
4031  */
4032 static int
4033 flow_check_meter_action(struct rte_eth_dev *dev,
4034                         const struct rte_flow_action actions[],
4035                         bool *has_mtr, bool *has_modify, uint32_t *meter_id)
4036 {
4037         const struct rte_flow_action_meter *mtr = NULL;
4038         int actions_n = 0;
4039
4040         MLX5_ASSERT(has_mtr);
4041         *has_mtr = false;
4042         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4043                 switch (actions->type) {
4044                 case RTE_FLOW_ACTION_TYPE_METER:
4045                         mtr = actions->conf;
4046                         *meter_id = mtr->mtr_id;
4047                         *has_mtr = true;
4048                         break;
4049                 default:
4050                         break;
4051                 }
4052                 if (!*has_mtr)
4053                         *has_modify |= flow_check_modify_action_type(dev,
4054                                                                 actions->type);
4055                 actions_n++;
4056         }
4057         /* Count RTE_FLOW_ACTION_TYPE_END. */
4058         return actions_n + 1;
4059 }
4060
4061 /**
4062  * Check if the flow should be split due to hairpin.
4063  * The reason for the split is that in current HW we can't
4064  * support encap and push-vlan on Rx, so if a flow contains
4065  * these actions we move it to Tx.
4066  *
4067  * @param dev
4068  *   Pointer to Ethernet device.
4069  * @param[in] attr
4070  *   Flow rule attributes.
4071  * @param[in] actions
4072  *   Associated actions (list terminated by the END action).
4073  *
4074  * @return
4075  *   > 0 the number of actions and the flow should be split,
4076  *   0 when no split required.
4077  */
4078 static int
4079 flow_check_hairpin_split(struct rte_eth_dev *dev,
4080                          const struct rte_flow_attr *attr,
4081                          const struct rte_flow_action actions[])
4082 {
4083         int queue_action = 0;
4084         int action_n = 0;
4085         int split = 0;
4086         const struct rte_flow_action_queue *queue;
4087         const struct rte_flow_action_rss *rss;
4088         const struct rte_flow_action_raw_encap *raw_encap;
4089         const struct rte_eth_hairpin_conf *conf;
4090
4091         if (!attr->ingress)
4092                 return 0;
4093         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4094                 switch (actions->type) {
4095                 case RTE_FLOW_ACTION_TYPE_QUEUE:
4096                         queue = actions->conf;
4097                         if (queue == NULL)
4098                                 return 0;
4099                         conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
4100                         if (conf == NULL || conf->tx_explicit != 0)
4101                                 return 0;
4102                         queue_action = 1;
4103                         action_n++;
4104                         break;
4105                 case RTE_FLOW_ACTION_TYPE_RSS:
4106                         rss = actions->conf;
4107                         if (rss == NULL || rss->queue_num == 0)
4108                                 return 0;
4109                         conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
4110                         if (conf == NULL || conf->tx_explicit != 0)
4111                                 return 0;
4112                         queue_action = 1;
4113                         action_n++;
4114                         break;
4115                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4116                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4117                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4118                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4119                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4120                         split++;
4121                         action_n++;
4122                         break;
4123                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4124                         raw_encap = actions->conf;
4125                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4126                                 split++;
4127                         action_n++;
4128                         break;
4129                 default:
4130                         action_n++;
4131                         break;
4132                 }
4133         }
4134         if (split && queue_action)
4135                 return action_n;
4136         return 0;
4137 }
4138
4139 /* Declare flow create/destroy prototype in advance. */
4140 static uint32_t
4141 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4142                  const struct rte_flow_attr *attr,
4143                  const struct rte_flow_item items[],
4144                  const struct rte_flow_action actions[],
4145                  bool external, struct rte_flow_error *error);
4146
4147 static void
4148 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4149                   uint32_t flow_idx);
4150
4151 int
4152 flow_dv_mreg_match_cb(void *tool_ctx __rte_unused,
4153                       struct mlx5_list_entry *entry, void *cb_ctx)
4154 {
4155         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4156         struct mlx5_flow_mreg_copy_resource *mcp_res =
4157                                container_of(entry, typeof(*mcp_res), hlist_ent);
4158
4159         return mcp_res->mark_id != *(uint32_t *)(ctx->data);
4160 }
4161
4162 struct mlx5_list_entry *
4163 flow_dv_mreg_create_cb(void *tool_ctx, void *cb_ctx)
4164 {
4165         struct rte_eth_dev *dev = tool_ctx;
4166         struct mlx5_priv *priv = dev->data->dev_private;
4167         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4168         struct mlx5_flow_mreg_copy_resource *mcp_res;
4169         struct rte_flow_error *error = ctx->error;
4170         uint32_t idx = 0;
4171         int ret;
4172         uint32_t mark_id = *(uint32_t *)(ctx->data);
4173         struct rte_flow_attr attr = {
4174                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4175                 .ingress = 1,
4176         };
4177         struct mlx5_rte_flow_item_tag tag_spec = {
4178                 .data = mark_id,
4179         };
4180         struct rte_flow_item items[] = {
4181                 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
4182         };
4183         struct rte_flow_action_mark ftag = {
4184                 .id = mark_id,
4185         };
4186         struct mlx5_flow_action_copy_mreg cp_mreg = {
4187                 .dst = REG_B,
4188                 .src = REG_NON,
4189         };
4190         struct rte_flow_action_jump jump = {
4191                 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4192         };
4193         struct rte_flow_action actions[] = {
4194                 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
4195         };
4196
4197         /* Fill the register fileds in the flow. */
4198         ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
4199         if (ret < 0)
4200                 return NULL;
4201         tag_spec.id = ret;
4202         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4203         if (ret < 0)
4204                 return NULL;
4205         cp_mreg.src = ret;
4206         /* Provide the full width of FLAG specific value. */
4207         if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
4208                 tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
4209         /* Build a new flow. */
4210         if (mark_id != MLX5_DEFAULT_COPY_ID) {
4211                 items[0] = (struct rte_flow_item){
4212                         .type = (enum rte_flow_item_type)
4213                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4214                         .spec = &tag_spec,
4215                 };
4216                 items[1] = (struct rte_flow_item){
4217                         .type = RTE_FLOW_ITEM_TYPE_END,
4218                 };
4219                 actions[0] = (struct rte_flow_action){
4220                         .type = (enum rte_flow_action_type)
4221                                 MLX5_RTE_FLOW_ACTION_TYPE_MARK,
4222                         .conf = &ftag,
4223                 };
4224                 actions[1] = (struct rte_flow_action){
4225                         .type = (enum rte_flow_action_type)
4226                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4227                         .conf = &cp_mreg,
4228                 };
4229                 actions[2] = (struct rte_flow_action){
4230                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
4231                         .conf = &jump,
4232                 };
4233                 actions[3] = (struct rte_flow_action){
4234                         .type = RTE_FLOW_ACTION_TYPE_END,
4235                 };
4236         } else {
4237                 /* Default rule, wildcard match. */
4238                 attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR;
4239                 items[0] = (struct rte_flow_item){
4240                         .type = RTE_FLOW_ITEM_TYPE_END,
4241                 };
4242                 actions[0] = (struct rte_flow_action){
4243                         .type = (enum rte_flow_action_type)
4244                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4245                         .conf = &cp_mreg,
4246                 };
4247                 actions[1] = (struct rte_flow_action){
4248                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
4249                         .conf = &jump,
4250                 };
4251                 actions[2] = (struct rte_flow_action){
4252                         .type = RTE_FLOW_ACTION_TYPE_END,
4253                 };
4254         }
4255         /* Build a new entry. */
4256         mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4257         if (!mcp_res) {
4258                 rte_errno = ENOMEM;
4259                 return NULL;
4260         }
4261         mcp_res->idx = idx;
4262         mcp_res->mark_id = mark_id;
4263         /*
4264          * The copy Flows are not included in any list. There
4265          * ones are referenced from other Flows and can not
4266          * be applied, removed, deleted in ardbitrary order
4267          * by list traversing.
4268          */
4269         mcp_res->rix_flow = flow_list_create(dev, MLX5_FLOW_TYPE_MCP,
4270                                         &attr, items, actions, false, error);
4271         if (!mcp_res->rix_flow) {
4272                 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx);
4273                 return NULL;
4274         }
4275         return &mcp_res->hlist_ent;
4276 }
4277
4278 struct mlx5_list_entry *
4279 flow_dv_mreg_clone_cb(void *tool_ctx, struct mlx5_list_entry *oentry,
4280                       void *cb_ctx __rte_unused)
4281 {
4282         struct rte_eth_dev *dev = tool_ctx;
4283         struct mlx5_priv *priv = dev->data->dev_private;
4284         struct mlx5_flow_mreg_copy_resource *mcp_res;
4285         uint32_t idx = 0;
4286
4287         mcp_res = mlx5_ipool_malloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4288         if (!mcp_res) {
4289                 rte_errno = ENOMEM;
4290                 return NULL;
4291         }
4292         memcpy(mcp_res, oentry, sizeof(*mcp_res));
4293         mcp_res->idx = idx;
4294         return &mcp_res->hlist_ent;
4295 }
4296
4297 void
4298 flow_dv_mreg_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4299 {
4300         struct mlx5_flow_mreg_copy_resource *mcp_res =
4301                                container_of(entry, typeof(*mcp_res), hlist_ent);
4302         struct rte_eth_dev *dev = tool_ctx;
4303         struct mlx5_priv *priv = dev->data->dev_private;
4304
4305         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4306 }
4307
4308 /**
4309  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4310  *
4311  * As mark_id is unique, if there's already a registered flow for the mark_id,
4312  * return by increasing the reference counter of the resource. Otherwise, create
4313  * the resource (mcp_res) and flow.
4314  *
4315  * Flow looks like,
4316  *   - If ingress port is ANY and reg_c[1] is mark_id,
4317  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4318  *
4319  * For default flow (zero mark_id), flow is like,
4320  *   - If ingress port is ANY,
4321  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
4322  *
4323  * @param dev
4324  *   Pointer to Ethernet device.
4325  * @param mark_id
4326  *   ID of MARK action, zero means default flow for META.
4327  * @param[out] error
4328  *   Perform verbose error reporting if not NULL.
4329  *
4330  * @return
4331  *   Associated resource on success, NULL otherwise and rte_errno is set.
4332  */
4333 static struct mlx5_flow_mreg_copy_resource *
4334 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
4335                           struct rte_flow_error *error)
4336 {
4337         struct mlx5_priv *priv = dev->data->dev_private;
4338         struct mlx5_list_entry *entry;
4339         struct mlx5_flow_cb_ctx ctx = {
4340                 .dev = dev,
4341                 .error = error,
4342                 .data = &mark_id,
4343         };
4344
4345         /* Check if already registered. */
4346         MLX5_ASSERT(priv->mreg_cp_tbl);
4347         entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx);
4348         if (!entry)
4349                 return NULL;
4350         return container_of(entry, struct mlx5_flow_mreg_copy_resource,
4351                             hlist_ent);
4352 }
4353
4354 void
4355 flow_dv_mreg_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4356 {
4357         struct mlx5_flow_mreg_copy_resource *mcp_res =
4358                                container_of(entry, typeof(*mcp_res), hlist_ent);
4359         struct rte_eth_dev *dev = tool_ctx;
4360         struct mlx5_priv *priv = dev->data->dev_private;
4361
4362         MLX5_ASSERT(mcp_res->rix_flow);
4363         flow_list_destroy(dev, MLX5_FLOW_TYPE_MCP, mcp_res->rix_flow);
4364         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4365 }
4366
4367 /**
4368  * Release flow in RX_CP_TBL.
4369  *
4370  * @param dev
4371  *   Pointer to Ethernet device.
4372  * @flow
4373  *   Parent flow for wich copying is provided.
4374  */
4375 static void
4376 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
4377                           struct rte_flow *flow)
4378 {
4379         struct mlx5_flow_mreg_copy_resource *mcp_res;
4380         struct mlx5_priv *priv = dev->data->dev_private;
4381
4382         if (!flow->rix_mreg_copy)
4383                 return;
4384         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
4385                                  flow->rix_mreg_copy);
4386         if (!mcp_res || !priv->mreg_cp_tbl)
4387                 return;
4388         MLX5_ASSERT(mcp_res->rix_flow);
4389         mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
4390         flow->rix_mreg_copy = 0;
4391 }
4392
4393 /**
4394  * Remove the default copy action from RX_CP_TBL.
4395  *
4396  * This functions is called in the mlx5_dev_start(). No thread safe
4397  * is guaranteed.
4398  *
4399  * @param dev
4400  *   Pointer to Ethernet device.
4401  */
4402 static void
4403 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
4404 {
4405         struct mlx5_list_entry *entry;
4406         struct mlx5_priv *priv = dev->data->dev_private;
4407         struct mlx5_flow_cb_ctx ctx;
4408         uint32_t mark_id;
4409
4410         /* Check if default flow is registered. */
4411         if (!priv->mreg_cp_tbl)
4412                 return;
4413         mark_id = MLX5_DEFAULT_COPY_ID;
4414         ctx.data = &mark_id;
4415         entry = mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx);
4416         if (!entry)
4417                 return;
4418         mlx5_hlist_unregister(priv->mreg_cp_tbl, entry);
4419 }
4420
4421 /**
4422  * Add the default copy action in in RX_CP_TBL.
4423  *
4424  * This functions is called in the mlx5_dev_start(). No thread safe
4425  * is guaranteed.
4426  *
4427  * @param dev
4428  *   Pointer to Ethernet device.
4429  * @param[out] error
4430  *   Perform verbose error reporting if not NULL.
4431  *
4432  * @return
4433  *   0 for success, negative value otherwise and rte_errno is set.
4434  */
4435 static int
4436 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
4437                                   struct rte_flow_error *error)
4438 {
4439         struct mlx5_priv *priv = dev->data->dev_private;
4440         struct mlx5_flow_mreg_copy_resource *mcp_res;
4441         struct mlx5_flow_cb_ctx ctx;
4442         uint32_t mark_id;
4443
4444         /* Check whether extensive metadata feature is engaged. */
4445         if (!priv->config.dv_flow_en ||
4446             priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4447             !mlx5_flow_ext_mreg_supported(dev) ||
4448             !priv->sh->dv_regc0_mask)
4449                 return 0;
4450         /*
4451          * Add default mreg copy flow may be called multiple time, but
4452          * only be called once in stop. Avoid register it twice.
4453          */
4454         mark_id = MLX5_DEFAULT_COPY_ID;
4455         ctx.data = &mark_id;
4456         if (mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx))
4457                 return 0;
4458         mcp_res = flow_mreg_add_copy_action(dev, mark_id, error);
4459         if (!mcp_res)
4460                 return -rte_errno;
4461         return 0;
4462 }
4463
4464 /**
4465  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4466  *
4467  * All the flow having Q/RSS action should be split by
4468  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
4469  * performs the following,
4470  *   - CQE->flow_tag := reg_c[1] (MARK)
4471  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4472  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
4473  * but there should be a flow per each MARK ID set by MARK action.
4474  *
4475  * For the aforementioned reason, if there's a MARK action in flow's action
4476  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
4477  * the MARK ID to CQE's flow_tag like,
4478  *   - If reg_c[1] is mark_id,
4479  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4480  *
4481  * For SET_META action which stores value in reg_c[0], as the destination is
4482  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
4483  * MARK ID means the default flow. The default flow looks like,
4484  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4485  *
4486  * @param dev
4487  *   Pointer to Ethernet device.
4488  * @param flow
4489  *   Pointer to flow structure.
4490  * @param[in] actions
4491  *   Pointer to the list of actions.
4492  * @param[out] error
4493  *   Perform verbose error reporting if not NULL.
4494  *
4495  * @return
4496  *   0 on success, negative value otherwise and rte_errno is set.
4497  */
4498 static int
4499 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
4500                             struct rte_flow *flow,
4501                             const struct rte_flow_action *actions,
4502                             struct rte_flow_error *error)
4503 {
4504         struct mlx5_priv *priv = dev->data->dev_private;
4505         struct mlx5_dev_config *config = &priv->config;
4506         struct mlx5_flow_mreg_copy_resource *mcp_res;
4507         const struct rte_flow_action_mark *mark;
4508
4509         /* Check whether extensive metadata feature is engaged. */
4510         if (!config->dv_flow_en ||
4511             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4512             !mlx5_flow_ext_mreg_supported(dev) ||
4513             !priv->sh->dv_regc0_mask)
4514                 return 0;
4515         /* Find MARK action. */
4516         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4517                 switch (actions->type) {
4518                 case RTE_FLOW_ACTION_TYPE_FLAG:
4519                         mcp_res = flow_mreg_add_copy_action
4520                                 (dev, MLX5_FLOW_MARK_DEFAULT, error);
4521                         if (!mcp_res)
4522                                 return -rte_errno;
4523                         flow->rix_mreg_copy = mcp_res->idx;
4524                         return 0;
4525                 case RTE_FLOW_ACTION_TYPE_MARK:
4526                         mark = (const struct rte_flow_action_mark *)
4527                                 actions->conf;
4528                         mcp_res =
4529                                 flow_mreg_add_copy_action(dev, mark->id, error);
4530                         if (!mcp_res)
4531                                 return -rte_errno;
4532                         flow->rix_mreg_copy = mcp_res->idx;
4533                         return 0;
4534                 default:
4535                         break;
4536                 }
4537         }
4538         return 0;
4539 }
4540
4541 #define MLX5_MAX_SPLIT_ACTIONS 24
4542 #define MLX5_MAX_SPLIT_ITEMS 24
4543
4544 /**
4545  * Split the hairpin flow.
4546  * Since HW can't support encap and push-vlan on Rx, we move these
4547  * actions to Tx.
4548  * If the count action is after the encap then we also
4549  * move the count action. in this case the count will also measure
4550  * the outer bytes.
4551  *
4552  * @param dev
4553  *   Pointer to Ethernet device.
4554  * @param[in] actions
4555  *   Associated actions (list terminated by the END action).
4556  * @param[out] actions_rx
4557  *   Rx flow actions.
4558  * @param[out] actions_tx
4559  *   Tx flow actions..
4560  * @param[out] pattern_tx
4561  *   The pattern items for the Tx flow.
4562  * @param[out] flow_id
4563  *   The flow ID connected to this flow.
4564  *
4565  * @return
4566  *   0 on success.
4567  */
4568 static int
4569 flow_hairpin_split(struct rte_eth_dev *dev,
4570                    const struct rte_flow_action actions[],
4571                    struct rte_flow_action actions_rx[],
4572                    struct rte_flow_action actions_tx[],
4573                    struct rte_flow_item pattern_tx[],
4574                    uint32_t flow_id)
4575 {
4576         const struct rte_flow_action_raw_encap *raw_encap;
4577         const struct rte_flow_action_raw_decap *raw_decap;
4578         struct mlx5_rte_flow_action_set_tag *set_tag;
4579         struct rte_flow_action *tag_action;
4580         struct mlx5_rte_flow_item_tag *tag_item;
4581         struct rte_flow_item *item;
4582         char *addr;
4583         int encap = 0;
4584
4585         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4586                 switch (actions->type) {
4587                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4588                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4589                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4590                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4591                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4592                         rte_memcpy(actions_tx, actions,
4593                                sizeof(struct rte_flow_action));
4594                         actions_tx++;
4595                         break;
4596                 case RTE_FLOW_ACTION_TYPE_COUNT:
4597                         if (encap) {
4598                                 rte_memcpy(actions_tx, actions,
4599                                            sizeof(struct rte_flow_action));
4600                                 actions_tx++;
4601                         } else {
4602                                 rte_memcpy(actions_rx, actions,
4603                                            sizeof(struct rte_flow_action));
4604                                 actions_rx++;
4605                         }
4606                         break;
4607                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4608                         raw_encap = actions->conf;
4609                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) {
4610                                 memcpy(actions_tx, actions,
4611                                        sizeof(struct rte_flow_action));
4612                                 actions_tx++;
4613                                 encap = 1;
4614                         } else {
4615                                 rte_memcpy(actions_rx, actions,
4616                                            sizeof(struct rte_flow_action));
4617                                 actions_rx++;
4618                         }
4619                         break;
4620                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4621                         raw_decap = actions->conf;
4622                         if (raw_decap->size < MLX5_ENCAPSULATION_DECISION_SIZE) {
4623                                 memcpy(actions_tx, actions,
4624                                        sizeof(struct rte_flow_action));
4625                                 actions_tx++;
4626                         } else {
4627                                 rte_memcpy(actions_rx, actions,
4628                                            sizeof(struct rte_flow_action));
4629                                 actions_rx++;
4630                         }
4631                         break;
4632                 default:
4633                         rte_memcpy(actions_rx, actions,
4634                                    sizeof(struct rte_flow_action));
4635                         actions_rx++;
4636                         break;
4637                 }
4638         }
4639         /* Add set meta action and end action for the Rx flow. */
4640         tag_action = actions_rx;
4641         tag_action->type = (enum rte_flow_action_type)
4642                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4643         actions_rx++;
4644         rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
4645         actions_rx++;
4646         set_tag = (void *)actions_rx;
4647         *set_tag = (struct mlx5_rte_flow_action_set_tag) {
4648                 .id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL),
4649                 .data = flow_id,
4650         };
4651         MLX5_ASSERT(set_tag->id > REG_NON);
4652         tag_action->conf = set_tag;
4653         /* Create Tx item list. */
4654         rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
4655         addr = (void *)&pattern_tx[2];
4656         item = pattern_tx;
4657         item->type = (enum rte_flow_item_type)
4658                      MLX5_RTE_FLOW_ITEM_TYPE_TAG;
4659         tag_item = (void *)addr;
4660         tag_item->data = flow_id;
4661         tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
4662         MLX5_ASSERT(set_tag->id > REG_NON);
4663         item->spec = tag_item;
4664         addr += sizeof(struct mlx5_rte_flow_item_tag);
4665         tag_item = (void *)addr;
4666         tag_item->data = UINT32_MAX;
4667         tag_item->id = UINT16_MAX;
4668         item->mask = tag_item;
4669         item->last = NULL;
4670         item++;
4671         item->type = RTE_FLOW_ITEM_TYPE_END;
4672         return 0;
4673 }
4674
4675 /**
4676  * The last stage of splitting chain, just creates the subflow
4677  * without any modification.
4678  *
4679  * @param[in] dev
4680  *   Pointer to Ethernet device.
4681  * @param[in] flow
4682  *   Parent flow structure pointer.
4683  * @param[in, out] sub_flow
4684  *   Pointer to return the created subflow, may be NULL.
4685  * @param[in] attr
4686  *   Flow rule attributes.
4687  * @param[in] items
4688  *   Pattern specification (list terminated by the END pattern item).
4689  * @param[in] actions
4690  *   Associated actions (list terminated by the END action).
4691  * @param[in] flow_split_info
4692  *   Pointer to flow split info structure.
4693  * @param[out] error
4694  *   Perform verbose error reporting if not NULL.
4695  * @return
4696  *   0 on success, negative value otherwise
4697  */
4698 static int
4699 flow_create_split_inner(struct rte_eth_dev *dev,
4700                         struct rte_flow *flow,
4701                         struct mlx5_flow **sub_flow,
4702                         const struct rte_flow_attr *attr,
4703                         const struct rte_flow_item items[],
4704                         const struct rte_flow_action actions[],
4705                         struct mlx5_flow_split_info *flow_split_info,
4706                         struct rte_flow_error *error)
4707 {
4708         struct mlx5_flow *dev_flow;
4709
4710         dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
4711                                     flow_split_info->flow_idx, error);
4712         if (!dev_flow)
4713                 return -rte_errno;
4714         dev_flow->flow = flow;
4715         dev_flow->external = flow_split_info->external;
4716         dev_flow->skip_scale = flow_split_info->skip_scale;
4717         /* Subflow object was created, we must include one in the list. */
4718         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
4719                       dev_flow->handle, next);
4720         /*
4721          * If dev_flow is as one of the suffix flow, some actions in suffix
4722          * flow may need some user defined item layer flags, and pass the
4723          * Metadate rxq mark flag to suffix flow as well.
4724          */
4725         if (flow_split_info->prefix_layers)
4726                 dev_flow->handle->layers = flow_split_info->prefix_layers;
4727         if (flow_split_info->prefix_mark)
4728                 dev_flow->handle->mark = 1;
4729         if (sub_flow)
4730                 *sub_flow = dev_flow;
4731 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
4732         dev_flow->dv.table_id = flow_split_info->table_id;
4733 #endif
4734         return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
4735 }
4736
4737 /**
4738  * Get the sub policy of a meter.
4739  *
4740  * @param[in] dev
4741  *   Pointer to Ethernet device.
4742  * @param[in] flow
4743  *   Parent flow structure pointer.
4744  * @param wks
4745  *   Pointer to thread flow work space.
4746  * @param[in] attr
4747  *   Flow rule attributes.
4748  * @param[in] items
4749  *   Pattern specification (list terminated by the END pattern item).
4750  * @param[out] error
4751  *   Perform verbose error reporting if not NULL.
4752  *
4753  * @return
4754  *   Pointer to the meter sub policy, NULL otherwise and rte_errno is set.
4755  */
4756 static struct mlx5_flow_meter_sub_policy *
4757 get_meter_sub_policy(struct rte_eth_dev *dev,
4758                      struct rte_flow *flow,
4759                      struct mlx5_flow_workspace *wks,
4760                      const struct rte_flow_attr *attr,
4761                      const struct rte_flow_item items[],
4762                      struct rte_flow_error *error)
4763 {
4764         struct mlx5_flow_meter_policy *policy;
4765         struct mlx5_flow_meter_policy *final_policy;
4766         struct mlx5_flow_meter_sub_policy *sub_policy = NULL;
4767
4768         policy = wks->policy;
4769         final_policy = policy->is_hierarchy ? wks->final_policy : policy;
4770         if (final_policy->is_rss || final_policy->is_queue) {
4771                 struct mlx5_flow_rss_desc rss_desc_v[MLX5_MTR_RTE_COLORS];
4772                 struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS] = {0};
4773                 uint32_t i;
4774
4775                 /*
4776                  * This is a tmp dev_flow,
4777                  * no need to register any matcher for it in translate.
4778                  */
4779                 wks->skip_matcher_reg = 1;
4780                 for (i = 0; i < MLX5_MTR_RTE_COLORS; i++) {
4781                         struct mlx5_flow dev_flow = {0};
4782                         struct mlx5_flow_handle dev_handle = { {0} };
4783                         uint8_t fate = final_policy->act_cnt[i].fate_action;
4784
4785                         if (fate == MLX5_FLOW_FATE_SHARED_RSS) {
4786                                 const struct rte_flow_action_rss *rss_act =
4787                                         final_policy->act_cnt[i].rss->conf;
4788                                 struct rte_flow_action rss_actions[2] = {
4789                                         [0] = {
4790                                         .type = RTE_FLOW_ACTION_TYPE_RSS,
4791                                         .conf = rss_act,
4792                                         },
4793                                         [1] = {
4794                                         .type = RTE_FLOW_ACTION_TYPE_END,
4795                                         .conf = NULL,
4796                                         }
4797                                 };
4798
4799                                 dev_flow.handle = &dev_handle;
4800                                 dev_flow.ingress = attr->ingress;
4801                                 dev_flow.flow = flow;
4802                                 dev_flow.external = 0;
4803 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
4804                                 dev_flow.dv.transfer = attr->transfer;
4805 #endif
4806                                 /**
4807                                  * Translate RSS action to get rss hash fields.
4808                                  */
4809                                 if (flow_drv_translate(dev, &dev_flow, attr,
4810                                                 items, rss_actions, error))
4811                                         goto exit;
4812                                 rss_desc_v[i] = wks->rss_desc;
4813                                 rss_desc_v[i].key_len = MLX5_RSS_HASH_KEY_LEN;
4814                                 rss_desc_v[i].hash_fields =
4815                                                 dev_flow.hash_fields;
4816                                 rss_desc_v[i].queue_num =
4817                                                 rss_desc_v[i].hash_fields ?
4818                                                 rss_desc_v[i].queue_num : 1;
4819                                 rss_desc_v[i].tunnel =
4820                                                 !!(dev_flow.handle->layers &
4821                                                    MLX5_FLOW_LAYER_TUNNEL);
4822                                 /* Use the RSS queues in the containers. */
4823                                 rss_desc_v[i].queue =
4824                                         (uint16_t *)(uintptr_t)rss_act->queue;
4825                                 rss_desc[i] = &rss_desc_v[i];
4826                         } else if (fate == MLX5_FLOW_FATE_QUEUE) {
4827                                 /* This is queue action. */
4828                                 rss_desc_v[i] = wks->rss_desc;
4829                                 rss_desc_v[i].key_len = 0;
4830                                 rss_desc_v[i].hash_fields = 0;
4831                                 rss_desc_v[i].queue =
4832                                         &final_policy->act_cnt[i].queue;
4833                                 rss_desc_v[i].queue_num = 1;
4834                                 rss_desc[i] = &rss_desc_v[i];
4835                         } else {
4836                                 rss_desc[i] = NULL;
4837                         }
4838                 }
4839                 sub_policy = flow_drv_meter_sub_policy_rss_prepare(dev,
4840                                                 flow, policy, rss_desc);
4841         } else {
4842                 enum mlx5_meter_domain mtr_domain =
4843                         attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
4844                                 (attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
4845                                                 MLX5_MTR_DOMAIN_INGRESS);
4846                 sub_policy = policy->sub_policys[mtr_domain][0];
4847         }
4848         if (!sub_policy)
4849                 rte_flow_error_set(error, EINVAL,
4850                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4851                                    "Failed to get meter sub-policy.");
4852 exit:
4853         return sub_policy;
4854 }
4855
4856 /**
4857  * Split the meter flow.
4858  *
4859  * As meter flow will split to three sub flow, other than meter
4860  * action, the other actions make sense to only meter accepts
4861  * the packet. If it need to be dropped, no other additional
4862  * actions should be take.
4863  *
4864  * One kind of special action which decapsulates the L3 tunnel
4865  * header will be in the prefix sub flow, as not to take the
4866  * L3 tunnel header into account.
4867  *
4868  * @param[in] dev
4869  *   Pointer to Ethernet device.
4870  * @param[in] flow
4871  *   Parent flow structure pointer.
4872  * @param wks
4873  *   Pointer to thread flow work space.
4874  * @param[in] attr
4875  *   Flow rule attributes.
4876  * @param[in] items
4877  *   Pattern specification (list terminated by the END pattern item).
4878  * @param[out] sfx_items
4879  *   Suffix flow match items (list terminated by the END pattern item).
4880  * @param[in] actions
4881  *   Associated actions (list terminated by the END action).
4882  * @param[out] actions_sfx
4883  *   Suffix flow actions.
4884  * @param[out] actions_pre
4885  *   Prefix flow actions.
4886  * @param[out] mtr_flow_id
4887  *   Pointer to meter flow id.
4888  * @param[out] error
4889  *   Perform verbose error reporting if not NULL.
4890  *
4891  * @return
4892  *   0 on success, a negative errno value otherwise and rte_errno is set.
4893  */
4894 static int
4895 flow_meter_split_prep(struct rte_eth_dev *dev,
4896                       struct rte_flow *flow,
4897                       struct mlx5_flow_workspace *wks,
4898                       const struct rte_flow_attr *attr,
4899                       const struct rte_flow_item items[],
4900                       struct rte_flow_item sfx_items[],
4901                       const struct rte_flow_action actions[],
4902                       struct rte_flow_action actions_sfx[],
4903                       struct rte_flow_action actions_pre[],
4904                       uint32_t *mtr_flow_id,
4905                       struct rte_flow_error *error)
4906 {
4907         struct mlx5_priv *priv = dev->data->dev_private;
4908         struct mlx5_flow_meter_info *fm = wks->fm;
4909         struct rte_flow_action *tag_action = NULL;
4910         struct rte_flow_item *tag_item;
4911         struct mlx5_rte_flow_action_set_tag *set_tag;
4912         const struct rte_flow_action_raw_encap *raw_encap;
4913         const struct rte_flow_action_raw_decap *raw_decap;
4914         struct mlx5_rte_flow_item_tag *tag_item_spec;
4915         struct mlx5_rte_flow_item_tag *tag_item_mask;
4916         uint32_t tag_id = 0;
4917         struct rte_flow_item *vlan_item_dst = NULL;
4918         const struct rte_flow_item *vlan_item_src = NULL;
4919         struct rte_flow_action *hw_mtr_action;
4920         struct rte_flow_action *action_pre_head = NULL;
4921         int32_t flow_src_port = priv->representor_id;
4922         bool mtr_first;
4923         uint8_t mtr_id_offset = priv->mtr_reg_share ? MLX5_MTR_COLOR_BITS : 0;
4924         uint8_t mtr_reg_bits = priv->mtr_reg_share ?
4925                                 MLX5_MTR_IDLE_BITS_IN_COLOR_REG : MLX5_REG_BITS;
4926         uint32_t flow_id = 0;
4927         uint32_t flow_id_reversed = 0;
4928         uint8_t flow_id_bits = 0;
4929         int shift;
4930
4931         /* Prepare the suffix subflow items. */
4932         tag_item = sfx_items++;
4933         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
4934                 struct mlx5_priv *port_priv;
4935                 const struct rte_flow_item_port_id *pid_v;
4936                 int item_type = items->type;
4937
4938                 switch (item_type) {
4939                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
4940                         pid_v = items->spec;
4941                         MLX5_ASSERT(pid_v);
4942                         port_priv = mlx5_port_to_eswitch_info(pid_v->id, false);
4943                         if (!port_priv)
4944                                 return rte_flow_error_set(error,
4945                                                 rte_errno,
4946                                                 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
4947                                                 pid_v,
4948                                                 "Failed to get port info.");
4949                         flow_src_port = port_priv->representor_id;
4950                         if (!fm->def_policy && wks->policy->is_hierarchy &&
4951                             flow_src_port != priv->representor_id) {
4952                                 if (flow_drv_mtr_hierarchy_rule_create(dev,
4953                                                                 flow, fm,
4954                                                                 flow_src_port,
4955                                                                 items,
4956                                                                 error))
4957                                         return -rte_errno;
4958                         }
4959                         memcpy(sfx_items, items, sizeof(*sfx_items));
4960                         sfx_items++;
4961                         break;
4962                 case RTE_FLOW_ITEM_TYPE_VLAN:
4963                         /* Determine if copy vlan item below. */
4964                         vlan_item_src = items;
4965                         vlan_item_dst = sfx_items++;
4966                         vlan_item_dst->type = RTE_FLOW_ITEM_TYPE_VOID;
4967                         break;
4968                 default:
4969                         break;
4970                 }
4971         }
4972         sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
4973         sfx_items++;
4974         mtr_first = priv->sh->meter_aso_en &&
4975                 (attr->egress || (attr->transfer && flow_src_port != UINT16_MAX));
4976         /* For ASO meter, meter must be before tag in TX direction. */
4977         if (mtr_first) {
4978                 action_pre_head = actions_pre++;
4979                 /* Leave space for tag action. */
4980                 tag_action = actions_pre++;
4981         }
4982         /* Prepare the actions for prefix and suffix flow. */
4983         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4984                 struct rte_flow_action *action_cur = NULL;
4985
4986                 switch (actions->type) {
4987                 case RTE_FLOW_ACTION_TYPE_METER:
4988                         if (mtr_first) {
4989                                 action_cur = action_pre_head;
4990                         } else {
4991                                 /* Leave space for tag action. */
4992                                 tag_action = actions_pre++;
4993                                 action_cur = actions_pre++;
4994                         }
4995                         break;
4996                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4997                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4998                         action_cur = actions_pre++;
4999                         break;
5000                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5001                         raw_encap = actions->conf;
5002                         if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
5003                                 action_cur = actions_pre++;
5004                         break;
5005                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5006                         raw_decap = actions->conf;
5007                         if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
5008                                 action_cur = actions_pre++;
5009                         break;
5010                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5011                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5012                         if (vlan_item_dst && vlan_item_src) {
5013                                 memcpy(vlan_item_dst, vlan_item_src,
5014                                         sizeof(*vlan_item_dst));
5015                                 /*
5016                                  * Convert to internal match item, it is used
5017                                  * for vlan push and set vid.
5018                                  */
5019                                 vlan_item_dst->type = (enum rte_flow_item_type)
5020                                                 MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
5021                         }
5022                         break;
5023                 default:
5024                         break;
5025                 }
5026                 if (!action_cur)
5027                         action_cur = (fm->def_policy) ?
5028                                         actions_sfx++ : actions_pre++;
5029                 memcpy(action_cur, actions, sizeof(struct rte_flow_action));
5030         }
5031         /* Add end action to the actions. */
5032         actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
5033         if (priv->sh->meter_aso_en) {
5034                 /**
5035                  * For ASO meter, need to add an extra jump action explicitly,
5036                  * to jump from meter to policer table.
5037                  */
5038                 struct mlx5_flow_meter_sub_policy *sub_policy;
5039                 struct mlx5_flow_tbl_data_entry *tbl_data;
5040
5041                 if (!fm->def_policy) {
5042                         sub_policy = get_meter_sub_policy(dev, flow, wks,
5043                                                           attr, items, error);
5044                         if (!sub_policy)
5045                                 return -rte_errno;
5046                 } else {
5047                         enum mlx5_meter_domain mtr_domain =
5048                         attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5049                                 (attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5050                                                 MLX5_MTR_DOMAIN_INGRESS);
5051
5052                         sub_policy =
5053                         &priv->sh->mtrmng->def_policy[mtr_domain]->sub_policy;
5054                 }
5055                 tbl_data = container_of(sub_policy->tbl_rsc,
5056                                         struct mlx5_flow_tbl_data_entry, tbl);
5057                 hw_mtr_action = actions_pre++;
5058                 hw_mtr_action->type = (enum rte_flow_action_type)
5059                                       MLX5_RTE_FLOW_ACTION_TYPE_JUMP;
5060                 hw_mtr_action->conf = tbl_data->jump.action;
5061         }
5062         actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
5063         actions_pre++;
5064         if (!tag_action)
5065                 return rte_flow_error_set(error, ENOMEM,
5066                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5067                                           NULL, "No tag action space.");
5068         if (!mtr_flow_id) {
5069                 tag_action->type = RTE_FLOW_ACTION_TYPE_VOID;
5070                 goto exit;
5071         }
5072         /* Only default-policy Meter creates mtr flow id. */
5073         if (fm->def_policy) {
5074                 mlx5_ipool_malloc(fm->flow_ipool, &tag_id);
5075                 if (!tag_id)
5076                         return rte_flow_error_set(error, ENOMEM,
5077                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5078                                         "Failed to allocate meter flow id.");
5079                 flow_id = tag_id - 1;
5080                 flow_id_bits = (!flow_id) ? 1 :
5081                                 (MLX5_REG_BITS - __builtin_clz(flow_id));
5082                 if ((flow_id_bits + priv->sh->mtrmng->max_mtr_bits) >
5083                     mtr_reg_bits) {
5084                         mlx5_ipool_free(fm->flow_ipool, tag_id);
5085                         return rte_flow_error_set(error, EINVAL,
5086                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5087                                         "Meter flow id exceeds max limit.");
5088                 }
5089                 if (flow_id_bits > priv->sh->mtrmng->max_mtr_flow_bits)
5090                         priv->sh->mtrmng->max_mtr_flow_bits = flow_id_bits;
5091         }
5092         /* Build tag actions and items for meter_id/meter flow_id. */
5093         set_tag = (struct mlx5_rte_flow_action_set_tag *)actions_pre;
5094         tag_item_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
5095         tag_item_mask = tag_item_spec + 1;
5096         /* Both flow_id and meter_id share the same register. */
5097         *set_tag = (struct mlx5_rte_flow_action_set_tag) {
5098                 .id = (enum modify_reg)mlx5_flow_get_reg_id(dev, MLX5_MTR_ID,
5099                                                             0, error),
5100                 .offset = mtr_id_offset,
5101                 .length = mtr_reg_bits,
5102                 .data = flow->meter,
5103         };
5104         /*
5105          * The color Reg bits used by flow_id are growing from
5106          * msb to lsb, so must do bit reverse for flow_id val in RegC.
5107          */
5108         for (shift = 0; shift < flow_id_bits; shift++)
5109                 flow_id_reversed = (flow_id_reversed << 1) |
5110                                 ((flow_id >> shift) & 0x1);
5111         set_tag->data |=
5112                 flow_id_reversed << (mtr_reg_bits - flow_id_bits);
5113         tag_item_spec->id = set_tag->id;
5114         tag_item_spec->data = set_tag->data << mtr_id_offset;
5115         tag_item_mask->data = UINT32_MAX << mtr_id_offset;
5116         tag_action->type = (enum rte_flow_action_type)
5117                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5118         tag_action->conf = set_tag;
5119         tag_item->type = (enum rte_flow_item_type)
5120                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5121         tag_item->spec = tag_item_spec;
5122         tag_item->last = NULL;
5123         tag_item->mask = tag_item_mask;
5124 exit:
5125         if (mtr_flow_id)
5126                 *mtr_flow_id = tag_id;
5127         return 0;
5128 }
5129
5130 /**
5131  * Split action list having QUEUE/RSS for metadata register copy.
5132  *
5133  * Once Q/RSS action is detected in user's action list, the flow action
5134  * should be split in order to copy metadata registers, which will happen in
5135  * RX_CP_TBL like,
5136  *   - CQE->flow_tag := reg_c[1] (MARK)
5137  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
5138  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
5139  * This is because the last action of each flow must be a terminal action
5140  * (QUEUE, RSS or DROP).
5141  *
5142  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
5143  * stored and kept in the mlx5_flow structure per each sub_flow.
5144  *
5145  * The Q/RSS action is replaced with,
5146  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
5147  * And the following JUMP action is added at the end,
5148  *   - JUMP, to RX_CP_TBL.
5149  *
5150  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
5151  * flow_create_split_metadata() routine. The flow will look like,
5152  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
5153  *
5154  * @param dev
5155  *   Pointer to Ethernet device.
5156  * @param[out] split_actions
5157  *   Pointer to store split actions to jump to CP_TBL.
5158  * @param[in] actions
5159  *   Pointer to the list of original flow actions.
5160  * @param[in] qrss
5161  *   Pointer to the Q/RSS action.
5162  * @param[in] actions_n
5163  *   Number of original actions.
5164  * @param[out] error
5165  *   Perform verbose error reporting if not NULL.
5166  *
5167  * @return
5168  *   non-zero unique flow_id on success, otherwise 0 and
5169  *   error/rte_error are set.
5170  */
5171 static uint32_t
5172 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
5173                           struct rte_flow_action *split_actions,
5174                           const struct rte_flow_action *actions,
5175                           const struct rte_flow_action *qrss,
5176                           int actions_n, struct rte_flow_error *error)
5177 {
5178         struct mlx5_priv *priv = dev->data->dev_private;
5179         struct mlx5_rte_flow_action_set_tag *set_tag;
5180         struct rte_flow_action_jump *jump;
5181         const int qrss_idx = qrss - actions;
5182         uint32_t flow_id = 0;
5183         int ret = 0;
5184
5185         /*
5186          * Given actions will be split
5187          * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
5188          * - Add jump to mreg CP_TBL.
5189          * As a result, there will be one more action.
5190          */
5191         ++actions_n;
5192         memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
5193         set_tag = (void *)(split_actions + actions_n);
5194         /*
5195          * If tag action is not set to void(it means we are not the meter
5196          * suffix flow), add the tag action. Since meter suffix flow already
5197          * has the tag added.
5198          */
5199         if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) {
5200                 /*
5201                  * Allocate the new subflow ID. This one is unique within
5202                  * device and not shared with representors. Otherwise,
5203                  * we would have to resolve multi-thread access synch
5204                  * issue. Each flow on the shared device is appended
5205                  * with source vport identifier, so the resulting
5206                  * flows will be unique in the shared (by master and
5207                  * representors) domain even if they have coinciding
5208                  * IDs.
5209                  */
5210                 mlx5_ipool_malloc(priv->sh->ipool
5211                                   [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id);
5212                 if (!flow_id)
5213                         return rte_flow_error_set(error, ENOMEM,
5214                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5215                                                   NULL, "can't allocate id "
5216                                                   "for split Q/RSS subflow");
5217                 /* Internal SET_TAG action to set flow ID. */
5218                 *set_tag = (struct mlx5_rte_flow_action_set_tag){
5219                         .data = flow_id,
5220                 };
5221                 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
5222                 if (ret < 0)
5223                         return ret;
5224                 set_tag->id = ret;
5225                 /* Construct new actions array. */
5226                 /* Replace QUEUE/RSS action. */
5227                 split_actions[qrss_idx] = (struct rte_flow_action){
5228                         .type = (enum rte_flow_action_type)
5229                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5230                         .conf = set_tag,
5231                 };
5232         }
5233         /* JUMP action to jump to mreg copy table (CP_TBL). */
5234         jump = (void *)(set_tag + 1);
5235         *jump = (struct rte_flow_action_jump){
5236                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
5237         };
5238         split_actions[actions_n - 2] = (struct rte_flow_action){
5239                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
5240                 .conf = jump,
5241         };
5242         split_actions[actions_n - 1] = (struct rte_flow_action){
5243                 .type = RTE_FLOW_ACTION_TYPE_END,
5244         };
5245         return flow_id;
5246 }
5247
5248 /**
5249  * Extend the given action list for Tx metadata copy.
5250  *
5251  * Copy the given action list to the ext_actions and add flow metadata register
5252  * copy action in order to copy reg_a set by WQE to reg_c[0].
5253  *
5254  * @param[out] ext_actions
5255  *   Pointer to the extended action list.
5256  * @param[in] actions
5257  *   Pointer to the list of actions.
5258  * @param[in] actions_n
5259  *   Number of actions in the list.
5260  * @param[out] error
5261  *   Perform verbose error reporting if not NULL.
5262  * @param[in] encap_idx
5263  *   The encap action inndex.
5264  *
5265  * @return
5266  *   0 on success, negative value otherwise
5267  */
5268 static int
5269 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
5270                        struct rte_flow_action *ext_actions,
5271                        const struct rte_flow_action *actions,
5272                        int actions_n, struct rte_flow_error *error,
5273                        int encap_idx)
5274 {
5275         struct mlx5_flow_action_copy_mreg *cp_mreg =
5276                 (struct mlx5_flow_action_copy_mreg *)
5277                         (ext_actions + actions_n + 1);
5278         int ret;
5279
5280         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
5281         if (ret < 0)
5282                 return ret;
5283         cp_mreg->dst = ret;
5284         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
5285         if (ret < 0)
5286                 return ret;
5287         cp_mreg->src = ret;
5288         if (encap_idx != 0)
5289                 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
5290         if (encap_idx == actions_n - 1) {
5291                 ext_actions[actions_n - 1] = (struct rte_flow_action){
5292                         .type = (enum rte_flow_action_type)
5293                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5294                         .conf = cp_mreg,
5295                 };
5296                 ext_actions[actions_n] = (struct rte_flow_action){
5297                         .type = RTE_FLOW_ACTION_TYPE_END,
5298                 };
5299         } else {
5300                 ext_actions[encap_idx] = (struct rte_flow_action){
5301                         .type = (enum rte_flow_action_type)
5302                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5303                         .conf = cp_mreg,
5304                 };
5305                 memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
5306                                 sizeof(*ext_actions) * (actions_n - encap_idx));
5307         }
5308         return 0;
5309 }
5310
5311 /**
5312  * Check the match action from the action list.
5313  *
5314  * @param[in] actions
5315  *   Pointer to the list of actions.
5316  * @param[in] attr
5317  *   Flow rule attributes.
5318  * @param[in] action
5319  *   The action to be check if exist.
5320  * @param[out] match_action_pos
5321  *   Pointer to the position of the matched action if exists, otherwise is -1.
5322  * @param[out] qrss_action_pos
5323  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
5324  * @param[out] modify_after_mirror
5325  *   Pointer to the flag of modify action after FDB mirroring.
5326  *
5327  * @return
5328  *   > 0 the total number of actions.
5329  *   0 if not found match action in action list.
5330  */
5331 static int
5332 flow_check_match_action(const struct rte_flow_action actions[],
5333                         const struct rte_flow_attr *attr,
5334                         enum rte_flow_action_type action,
5335                         int *match_action_pos, int *qrss_action_pos,
5336                         int *modify_after_mirror)
5337 {
5338         const struct rte_flow_action_sample *sample;
5339         const struct rte_flow_action_raw_decap *decap;
5340         int actions_n = 0;
5341         uint32_t ratio = 0;
5342         int sub_type = 0;
5343         int flag = 0;
5344         int fdb_mirror = 0;
5345
5346         *match_action_pos = -1;
5347         *qrss_action_pos = -1;
5348         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5349                 if (actions->type == action) {
5350                         flag = 1;
5351                         *match_action_pos = actions_n;
5352                 }
5353                 switch (actions->type) {
5354                 case RTE_FLOW_ACTION_TYPE_QUEUE:
5355                 case RTE_FLOW_ACTION_TYPE_RSS:
5356                         *qrss_action_pos = actions_n;
5357                         break;
5358                 case RTE_FLOW_ACTION_TYPE_SAMPLE:
5359                         sample = actions->conf;
5360                         ratio = sample->ratio;
5361                         sub_type = ((const struct rte_flow_action *)
5362                                         (sample->actions))->type;
5363                         if (ratio == 1 && attr->transfer)
5364                                 fdb_mirror = 1;
5365                         break;
5366                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
5367                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
5368                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
5369                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
5370                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
5371                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
5372                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
5373                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
5374                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
5375                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
5376                 case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
5377                 case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
5378                 case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
5379                 case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
5380                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
5381                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
5382                 case RTE_FLOW_ACTION_TYPE_FLAG:
5383                 case RTE_FLOW_ACTION_TYPE_MARK:
5384                 case RTE_FLOW_ACTION_TYPE_SET_META:
5385                 case RTE_FLOW_ACTION_TYPE_SET_TAG:
5386                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
5387                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5388                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5389                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
5390                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5391                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5392                 case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
5393                 case RTE_FLOW_ACTION_TYPE_METER:
5394                         if (fdb_mirror)
5395                                 *modify_after_mirror = 1;
5396                         break;
5397                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5398                         decap = actions->conf;
5399                         while ((++actions)->type == RTE_FLOW_ACTION_TYPE_VOID)
5400                                 ;
5401                         actions_n++;
5402                         if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
5403                                 const struct rte_flow_action_raw_encap *encap =
5404                                                                 actions->conf;
5405                                 if (decap->size <=
5406                                         MLX5_ENCAPSULATION_DECISION_SIZE &&
5407                                     encap->size >
5408                                         MLX5_ENCAPSULATION_DECISION_SIZE)
5409                                         /* L3 encap. */
5410                                         break;
5411                         }
5412                         if (fdb_mirror)
5413                                 *modify_after_mirror = 1;
5414                         break;
5415                 default:
5416                         break;
5417                 }
5418                 actions_n++;
5419         }
5420         if (flag && fdb_mirror && !*modify_after_mirror) {
5421                 /* FDB mirroring uses the destination array to implement
5422                  * instead of FLOW_SAMPLER object.
5423                  */
5424                 if (sub_type != RTE_FLOW_ACTION_TYPE_END)
5425                         flag = 0;
5426         }
5427         /* Count RTE_FLOW_ACTION_TYPE_END. */
5428         return flag ? actions_n + 1 : 0;
5429 }
5430
5431 #define SAMPLE_SUFFIX_ITEM 2
5432
5433 /**
5434  * Split the sample flow.
5435  *
5436  * As sample flow will split to two sub flow, sample flow with
5437  * sample action, the other actions will move to new suffix flow.
5438  *
5439  * Also add unique tag id with tag action in the sample flow,
5440  * the same tag id will be as match in the suffix flow.
5441  *
5442  * @param dev
5443  *   Pointer to Ethernet device.
5444  * @param[in] add_tag
5445  *   Add extra tag action flag.
5446  * @param[out] sfx_items
5447  *   Suffix flow match items (list terminated by the END pattern item).
5448  * @param[in] actions
5449  *   Associated actions (list terminated by the END action).
5450  * @param[out] actions_sfx
5451  *   Suffix flow actions.
5452  * @param[out] actions_pre
5453  *   Prefix flow actions.
5454  * @param[in] actions_n
5455  *  The total number of actions.
5456  * @param[in] sample_action_pos
5457  *   The sample action position.
5458  * @param[in] qrss_action_pos
5459  *   The Queue/RSS action position.
5460  * @param[in] jump_table
5461  *   Add extra jump action flag.
5462  * @param[out] error
5463  *   Perform verbose error reporting if not NULL.
5464  *
5465  * @return
5466  *   0 on success, or unique flow_id, a negative errno value
5467  *   otherwise and rte_errno is set.
5468  */
5469 static int
5470 flow_sample_split_prep(struct rte_eth_dev *dev,
5471                        int add_tag,
5472                        struct rte_flow_item sfx_items[],
5473                        const struct rte_flow_action actions[],
5474                        struct rte_flow_action actions_sfx[],
5475                        struct rte_flow_action actions_pre[],
5476                        int actions_n,
5477                        int sample_action_pos,
5478                        int qrss_action_pos,
5479                        int jump_table,
5480                        struct rte_flow_error *error)
5481 {
5482         struct mlx5_priv *priv = dev->data->dev_private;
5483         struct mlx5_rte_flow_action_set_tag *set_tag;
5484         struct mlx5_rte_flow_item_tag *tag_spec;
5485         struct mlx5_rte_flow_item_tag *tag_mask;
5486         struct rte_flow_action_jump *jump_action;
5487         uint32_t tag_id = 0;
5488         int index;
5489         int append_index = 0;
5490         int ret;
5491
5492         if (sample_action_pos < 0)
5493                 return rte_flow_error_set(error, EINVAL,
5494                                           RTE_FLOW_ERROR_TYPE_ACTION,
5495                                           NULL, "invalid position of sample "
5496                                           "action in list");
5497         /* Prepare the actions for prefix and suffix flow. */
5498         if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
5499                 index = qrss_action_pos;
5500                 /* Put the preceding the Queue/RSS action into prefix flow. */
5501                 if (index != 0)
5502                         memcpy(actions_pre, actions,
5503                                sizeof(struct rte_flow_action) * index);
5504                 /* Put others preceding the sample action into prefix flow. */
5505                 if (sample_action_pos > index + 1)
5506                         memcpy(actions_pre + index, actions + index + 1,
5507                                sizeof(struct rte_flow_action) *
5508                                (sample_action_pos - index - 1));
5509                 index = sample_action_pos - 1;
5510                 /* Put Queue/RSS action into Suffix flow. */
5511                 memcpy(actions_sfx, actions + qrss_action_pos,
5512                        sizeof(struct rte_flow_action));
5513                 actions_sfx++;
5514         } else {
5515                 index = sample_action_pos;
5516                 if (index != 0)
5517                         memcpy(actions_pre, actions,
5518                                sizeof(struct rte_flow_action) * index);
5519         }
5520         /* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
5521          * For CX6DX and above, metadata registers Cx preserve their value,
5522          * add an extra tag action for NIC-RX and E-Switch Domain.
5523          */
5524         if (add_tag) {
5525                 /* Prepare the prefix tag action. */
5526                 append_index++;
5527                 set_tag = (void *)(actions_pre + actions_n + append_index);
5528                 ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
5529                 if (ret < 0)
5530                         return ret;
5531                 mlx5_ipool_malloc(priv->sh->ipool
5532                                   [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id);
5533                 *set_tag = (struct mlx5_rte_flow_action_set_tag) {
5534                         .id = ret,
5535                         .data = tag_id,
5536                 };
5537                 /* Prepare the suffix subflow items. */
5538                 tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
5539                 tag_spec->data = tag_id;
5540                 tag_spec->id = set_tag->id;
5541                 tag_mask = tag_spec + 1;
5542                 tag_mask->data = UINT32_MAX;
5543                 sfx_items[0] = (struct rte_flow_item){
5544                         .type = (enum rte_flow_item_type)
5545                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
5546                         .spec = tag_spec,
5547                         .last = NULL,
5548                         .mask = tag_mask,
5549                 };
5550                 sfx_items[1] = (struct rte_flow_item){
5551                         .type = (enum rte_flow_item_type)
5552                                 RTE_FLOW_ITEM_TYPE_END,
5553                 };
5554                 /* Prepare the tag action in prefix subflow. */
5555                 actions_pre[index++] =
5556                         (struct rte_flow_action){
5557                         .type = (enum rte_flow_action_type)
5558                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5559                         .conf = set_tag,
5560                 };
5561         }
5562         memcpy(actions_pre + index, actions + sample_action_pos,
5563                sizeof(struct rte_flow_action));
5564         index += 1;
5565         /* For the modify action after the sample action in E-Switch mirroring,
5566          * Add the extra jump action in prefix subflow and jump into the next
5567          * table, then do the modify action in the new table.
5568          */
5569         if (jump_table) {
5570                 /* Prepare the prefix jump action. */
5571                 append_index++;
5572                 jump_action = (void *)(actions_pre + actions_n + append_index);
5573                 jump_action->group = jump_table;
5574                 actions_pre[index++] =
5575                         (struct rte_flow_action){
5576                         .type = (enum rte_flow_action_type)
5577                                 RTE_FLOW_ACTION_TYPE_JUMP,
5578                         .conf = jump_action,
5579                 };
5580         }
5581         actions_pre[index] = (struct rte_flow_action){
5582                 .type = (enum rte_flow_action_type)
5583                         RTE_FLOW_ACTION_TYPE_END,
5584         };
5585         /* Put the actions after sample into Suffix flow. */
5586         memcpy(actions_sfx, actions + sample_action_pos + 1,
5587                sizeof(struct rte_flow_action) *
5588                (actions_n - sample_action_pos - 1));
5589         return tag_id;
5590 }
5591
5592 /**
5593  * The splitting for metadata feature.
5594  *
5595  * - Q/RSS action on NIC Rx should be split in order to pass by
5596  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
5597  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
5598  *
5599  * - All the actions on NIC Tx should have a mreg copy action to
5600  *   copy reg_a from WQE to reg_c[0].
5601  *
5602  * @param dev
5603  *   Pointer to Ethernet device.
5604  * @param[in] flow
5605  *   Parent flow structure pointer.
5606  * @param[in] attr
5607  *   Flow rule attributes.
5608  * @param[in] items
5609  *   Pattern specification (list terminated by the END pattern item).
5610  * @param[in] actions
5611  *   Associated actions (list terminated by the END action).
5612  * @param[in] flow_split_info
5613  *   Pointer to flow split info structure.
5614  * @param[out] error
5615  *   Perform verbose error reporting if not NULL.
5616  * @return
5617  *   0 on success, negative value otherwise
5618  */
5619 static int
5620 flow_create_split_metadata(struct rte_eth_dev *dev,
5621                            struct rte_flow *flow,
5622                            const struct rte_flow_attr *attr,
5623                            const struct rte_flow_item items[],
5624                            const struct rte_flow_action actions[],
5625                            struct mlx5_flow_split_info *flow_split_info,
5626                            struct rte_flow_error *error)
5627 {
5628         struct mlx5_priv *priv = dev->data->dev_private;
5629         struct mlx5_dev_config *config = &priv->config;
5630         const struct rte_flow_action *qrss = NULL;
5631         struct rte_flow_action *ext_actions = NULL;
5632         struct mlx5_flow *dev_flow = NULL;
5633         uint32_t qrss_id = 0;
5634         int mtr_sfx = 0;
5635         size_t act_size;
5636         int actions_n;
5637         int encap_idx;
5638         int ret;
5639
5640         /* Check whether extensive metadata feature is engaged. */
5641         if (!config->dv_flow_en ||
5642             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
5643             !mlx5_flow_ext_mreg_supported(dev))
5644                 return flow_create_split_inner(dev, flow, NULL, attr, items,
5645                                                actions, flow_split_info, error);
5646         actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
5647                                                            &encap_idx);
5648         if (qrss) {
5649                 /* Exclude hairpin flows from splitting. */
5650                 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
5651                         const struct rte_flow_action_queue *queue;
5652
5653                         queue = qrss->conf;
5654                         if (mlx5_rxq_get_type(dev, queue->index) ==
5655                             MLX5_RXQ_TYPE_HAIRPIN)
5656                                 qrss = NULL;
5657                 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
5658                         const struct rte_flow_action_rss *rss;
5659
5660                         rss = qrss->conf;
5661                         if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
5662                             MLX5_RXQ_TYPE_HAIRPIN)
5663                                 qrss = NULL;
5664                 }
5665         }
5666         if (qrss) {
5667                 /* Check if it is in meter suffix table. */
5668                 mtr_sfx = attr->group == (attr->transfer ?
5669                           (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
5670                           MLX5_FLOW_TABLE_LEVEL_METER);
5671                 /*
5672                  * Q/RSS action on NIC Rx should be split in order to pass by
5673                  * the mreg copy table (RX_CP_TBL) and then it jumps to the
5674                  * action table (RX_ACT_TBL) which has the split Q/RSS action.
5675                  */
5676                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
5677                            sizeof(struct rte_flow_action_set_tag) +
5678                            sizeof(struct rte_flow_action_jump);
5679                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
5680                                           SOCKET_ID_ANY);
5681                 if (!ext_actions)
5682                         return rte_flow_error_set(error, ENOMEM,
5683                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5684                                                   NULL, "no memory to split "
5685                                                   "metadata flow");
5686                 /*
5687                  * If we are the suffix flow of meter, tag already exist.
5688                  * Set the tag action to void.
5689                  */
5690                 if (mtr_sfx)
5691                         ext_actions[qrss - actions].type =
5692                                                 RTE_FLOW_ACTION_TYPE_VOID;
5693                 else
5694                         ext_actions[qrss - actions].type =
5695                                                 (enum rte_flow_action_type)
5696                                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5697                 /*
5698                  * Create the new actions list with removed Q/RSS action
5699                  * and appended set tag and jump to register copy table
5700                  * (RX_CP_TBL). We should preallocate unique tag ID here
5701                  * in advance, because it is needed for set tag action.
5702                  */
5703                 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
5704                                                     qrss, actions_n, error);
5705                 if (!mtr_sfx && !qrss_id) {
5706                         ret = -rte_errno;
5707                         goto exit;
5708                 }
5709         } else if (attr->egress && !attr->transfer) {
5710                 /*
5711                  * All the actions on NIC Tx should have a metadata register
5712                  * copy action to copy reg_a from WQE to reg_c[meta]
5713                  */
5714                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
5715                            sizeof(struct mlx5_flow_action_copy_mreg);
5716                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
5717                                           SOCKET_ID_ANY);
5718                 if (!ext_actions)
5719                         return rte_flow_error_set(error, ENOMEM,
5720                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5721                                                   NULL, "no memory to split "
5722                                                   "metadata flow");
5723                 /* Create the action list appended with copy register. */
5724                 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
5725                                              actions_n, error, encap_idx);
5726                 if (ret < 0)
5727                         goto exit;
5728         }
5729         /* Add the unmodified original or prefix subflow. */
5730         ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
5731                                       items, ext_actions ? ext_actions :
5732                                       actions, flow_split_info, error);
5733         if (ret < 0)
5734                 goto exit;
5735         MLX5_ASSERT(dev_flow);
5736         if (qrss) {
5737                 const struct rte_flow_attr q_attr = {
5738                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
5739                         .ingress = 1,
5740                 };
5741                 /* Internal PMD action to set register. */
5742                 struct mlx5_rte_flow_item_tag q_tag_spec = {
5743                         .data = qrss_id,
5744                         .id = REG_NON,
5745                 };
5746                 struct rte_flow_item q_items[] = {
5747                         {
5748                                 .type = (enum rte_flow_item_type)
5749                                         MLX5_RTE_FLOW_ITEM_TYPE_TAG,
5750                                 .spec = &q_tag_spec,
5751                                 .last = NULL,
5752                                 .mask = NULL,
5753                         },
5754                         {
5755                                 .type = RTE_FLOW_ITEM_TYPE_END,
5756                         },
5757                 };
5758                 struct rte_flow_action q_actions[] = {
5759                         {
5760                                 .type = qrss->type,
5761                                 .conf = qrss->conf,
5762                         },
5763                         {
5764                                 .type = RTE_FLOW_ACTION_TYPE_END,
5765                         },
5766                 };
5767                 uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
5768
5769                 /*
5770                  * Configure the tag item only if there is no meter subflow.
5771                  * Since tag is already marked in the meter suffix subflow
5772                  * we can just use the meter suffix items as is.
5773                  */
5774                 if (qrss_id) {
5775                         /* Not meter subflow. */
5776                         MLX5_ASSERT(!mtr_sfx);
5777                         /*
5778                          * Put unique id in prefix flow due to it is destroyed
5779                          * after suffix flow and id will be freed after there
5780                          * is no actual flows with this id and identifier
5781                          * reallocation becomes possible (for example, for
5782                          * other flows in other threads).
5783                          */
5784                         dev_flow->handle->split_flow_id = qrss_id;
5785                         ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
5786                                                    error);
5787                         if (ret < 0)
5788                                 goto exit;
5789                         q_tag_spec.id = ret;
5790                 }
5791                 dev_flow = NULL;
5792                 /* Add suffix subflow to execute Q/RSS. */
5793                 flow_split_info->prefix_layers = layers;
5794                 flow_split_info->prefix_mark = 0;
5795                 ret = flow_create_split_inner(dev, flow, &dev_flow,
5796                                               &q_attr, mtr_sfx ? items :
5797                                               q_items, q_actions,
5798                                               flow_split_info, error);
5799                 if (ret < 0)
5800                         goto exit;
5801                 /* qrss ID should be freed if failed. */
5802                 qrss_id = 0;
5803                 MLX5_ASSERT(dev_flow);
5804         }
5805
5806 exit:
5807         /*
5808          * We do not destroy the partially created sub_flows in case of error.
5809          * These ones are included into parent flow list and will be destroyed
5810          * by flow_drv_destroy.
5811          */
5812         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
5813                         qrss_id);
5814         mlx5_free(ext_actions);
5815         return ret;
5816 }
5817
5818 /**
5819  * Create meter internal drop flow with the original pattern.
5820  *
5821  * @param dev
5822  *   Pointer to Ethernet device.
5823  * @param[in] flow
5824  *   Parent flow structure pointer.
5825  * @param[in] attr
5826  *   Flow rule attributes.
5827  * @param[in] items
5828  *   Pattern specification (list terminated by the END pattern item).
5829  * @param[in] flow_split_info
5830  *   Pointer to flow split info structure.
5831  * @param[in] fm
5832  *   Pointer to flow meter structure.
5833  * @param[out] error
5834  *   Perform verbose error reporting if not NULL.
5835  * @return
5836  *   0 on success, negative value otherwise
5837  */
5838 static uint32_t
5839 flow_meter_create_drop_flow_with_org_pattern(struct rte_eth_dev *dev,
5840                         struct rte_flow *flow,
5841                         const struct rte_flow_attr *attr,
5842                         const struct rte_flow_item items[],
5843                         struct mlx5_flow_split_info *flow_split_info,
5844                         struct mlx5_flow_meter_info *fm,
5845                         struct rte_flow_error *error)
5846 {
5847         struct mlx5_flow *dev_flow = NULL;
5848         struct rte_flow_attr drop_attr = *attr;
5849         struct rte_flow_action drop_actions[3];
5850         struct mlx5_flow_split_info drop_split_info = *flow_split_info;
5851
5852         MLX5_ASSERT(fm->drop_cnt);
5853         drop_actions[0].type =
5854                 (enum rte_flow_action_type)MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
5855         drop_actions[0].conf = (void *)(uintptr_t)fm->drop_cnt;
5856         drop_actions[1].type = RTE_FLOW_ACTION_TYPE_DROP;
5857         drop_actions[1].conf = NULL;
5858         drop_actions[2].type = RTE_FLOW_ACTION_TYPE_END;
5859         drop_actions[2].conf = NULL;
5860         drop_split_info.external = false;
5861         drop_split_info.skip_scale |= 1 << MLX5_SCALE_FLOW_GROUP_BIT;
5862         drop_split_info.table_id = MLX5_MTR_TABLE_ID_DROP;
5863         drop_attr.group = MLX5_FLOW_TABLE_LEVEL_METER;
5864         return flow_create_split_inner(dev, flow, &dev_flow,
5865                                 &drop_attr, items, drop_actions,
5866                                 &drop_split_info, error);
5867 }
5868
5869 /**
5870  * The splitting for meter feature.
5871  *
5872  * - The meter flow will be split to two flows as prefix and
5873  *   suffix flow. The packets make sense only it pass the prefix
5874  *   meter action.
5875  *
5876  * - Reg_C_5 is used for the packet to match betweend prefix and
5877  *   suffix flow.
5878  *
5879  * @param dev
5880  *   Pointer to Ethernet device.
5881  * @param[in] flow
5882  *   Parent flow structure pointer.
5883  * @param[in] attr
5884  *   Flow rule attributes.
5885  * @param[in] items
5886  *   Pattern specification (list terminated by the END pattern item).
5887  * @param[in] actions
5888  *   Associated actions (list terminated by the END action).
5889  * @param[in] flow_split_info
5890  *   Pointer to flow split info structure.
5891  * @param[out] error
5892  *   Perform verbose error reporting if not NULL.
5893  * @return
5894  *   0 on success, negative value otherwise
5895  */
5896 static int
5897 flow_create_split_meter(struct rte_eth_dev *dev,
5898                         struct rte_flow *flow,
5899                         const struct rte_flow_attr *attr,
5900                         const struct rte_flow_item items[],
5901                         const struct rte_flow_action actions[],
5902                         struct mlx5_flow_split_info *flow_split_info,
5903                         struct rte_flow_error *error)
5904 {
5905         struct mlx5_priv *priv = dev->data->dev_private;
5906         struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
5907         struct rte_flow_action *sfx_actions = NULL;
5908         struct rte_flow_action *pre_actions = NULL;
5909         struct rte_flow_item *sfx_items = NULL;
5910         struct mlx5_flow *dev_flow = NULL;
5911         struct rte_flow_attr sfx_attr = *attr;
5912         struct mlx5_flow_meter_info *fm = NULL;
5913         uint8_t skip_scale_restore;
5914         bool has_mtr = false;
5915         bool has_modify = false;
5916         bool set_mtr_reg = true;
5917         bool is_mtr_hierarchy = false;
5918         uint32_t meter_id = 0;
5919         uint32_t mtr_idx = 0;
5920         uint32_t mtr_flow_id = 0;
5921         size_t act_size;
5922         size_t item_size;
5923         int actions_n = 0;
5924         int ret = 0;
5925
5926         if (priv->mtr_en)
5927                 actions_n = flow_check_meter_action(dev, actions, &has_mtr,
5928                                                     &has_modify, &meter_id);
5929         if (has_mtr) {
5930                 if (flow->meter) {
5931                         fm = flow_dv_meter_find_by_idx(priv, flow->meter);
5932                         if (!fm)
5933                                 return rte_flow_error_set(error, EINVAL,
5934                                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5935                                                 NULL, "Meter not found.");
5936                 } else {
5937                         fm = mlx5_flow_meter_find(priv, meter_id, &mtr_idx);
5938                         if (!fm)
5939                                 return rte_flow_error_set(error, EINVAL,
5940                                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5941                                                 NULL, "Meter not found.");
5942                         ret = mlx5_flow_meter_attach(priv, fm,
5943                                                      &sfx_attr, error);
5944                         if (ret)
5945                                 return -rte_errno;
5946                         flow->meter = mtr_idx;
5947                 }
5948                 MLX5_ASSERT(wks);
5949                 wks->fm = fm;
5950                 if (!fm->def_policy) {
5951                         wks->policy = mlx5_flow_meter_policy_find(dev,
5952                                                                   fm->policy_id,
5953                                                                   NULL);
5954                         MLX5_ASSERT(wks->policy);
5955                         if (wks->policy->is_hierarchy) {
5956                                 wks->final_policy =
5957                                 mlx5_flow_meter_hierarchy_get_final_policy(dev,
5958                                                                 wks->policy);
5959                                 if (!wks->final_policy)
5960                                         return rte_flow_error_set(error,
5961                                         EINVAL,
5962                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
5963                                 "Failed to find terminal policy of hierarchy.");
5964                                 is_mtr_hierarchy = true;
5965                         }
5966                 }
5967                 /*
5968                  * If it isn't default-policy Meter, and
5969                  * 1. There's no action in flow to change
5970                  *    packet (modify/encap/decap etc.), OR
5971                  * 2. No drop count needed for this meter.
5972                  * 3. It's not meter hierarchy.
5973                  * Then no need to use regC to save meter id anymore.
5974                  */
5975                 if (!fm->def_policy && !is_mtr_hierarchy &&
5976                     (!has_modify || !fm->drop_cnt))
5977                         set_mtr_reg = false;
5978                 /* Prefix actions: meter, decap, encap, tag, jump, end. */
5979                 act_size = sizeof(struct rte_flow_action) * (actions_n + 6) +
5980                            sizeof(struct mlx5_rte_flow_action_set_tag);
5981                 /* Suffix items: tag, vlan, port id, end. */
5982 #define METER_SUFFIX_ITEM 4
5983                 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
5984                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
5985                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
5986                                           0, SOCKET_ID_ANY);
5987                 if (!sfx_actions)
5988                         return rte_flow_error_set(error, ENOMEM,
5989                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5990                                                   NULL, "no memory to split "
5991                                                   "meter flow");
5992                 sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
5993                              act_size);
5994                 /* There's no suffix flow for meter of non-default policy. */
5995                 if (!fm->def_policy)
5996                         pre_actions = sfx_actions + 1;
5997                 else
5998                         pre_actions = sfx_actions + actions_n;
5999                 ret = flow_meter_split_prep(dev, flow, wks, &sfx_attr,
6000                                             items, sfx_items, actions,
6001                                             sfx_actions, pre_actions,
6002                                             (set_mtr_reg ? &mtr_flow_id : NULL),
6003                                             error);
6004                 if (ret) {
6005                         ret = -rte_errno;
6006                         goto exit;
6007                 }
6008                 /* Add the prefix subflow. */
6009                 flow_split_info->prefix_mark = 0;
6010                 skip_scale_restore = flow_split_info->skip_scale;
6011                 flow_split_info->skip_scale |=
6012                         1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6013                 ret = flow_create_split_inner(dev, flow, &dev_flow,
6014                                               attr, items, pre_actions,
6015                                               flow_split_info, error);
6016                 flow_split_info->skip_scale = skip_scale_restore;
6017                 if (ret) {
6018                         if (mtr_flow_id)
6019                                 mlx5_ipool_free(fm->flow_ipool, mtr_flow_id);
6020                         ret = -rte_errno;
6021                         goto exit;
6022                 }
6023                 if (mtr_flow_id) {
6024                         dev_flow->handle->split_flow_id = mtr_flow_id;
6025                         dev_flow->handle->is_meter_flow_id = 1;
6026                 }
6027                 if (!fm->def_policy) {
6028                         if (!set_mtr_reg && fm->drop_cnt)
6029                                 ret =
6030                         flow_meter_create_drop_flow_with_org_pattern(dev, flow,
6031                                                         &sfx_attr, items,
6032                                                         flow_split_info,
6033                                                         fm, error);
6034                         goto exit;
6035                 }
6036                 /* Setting the sfx group atrr. */
6037                 sfx_attr.group = sfx_attr.transfer ?
6038                                 (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6039                                  MLX5_FLOW_TABLE_LEVEL_METER;
6040                 flow_split_info->prefix_layers =
6041                                 flow_get_prefix_layer_flags(dev_flow);
6042                 flow_split_info->prefix_mark = dev_flow->handle->mark;
6043                 flow_split_info->table_id = MLX5_MTR_TABLE_ID_SUFFIX;
6044         }
6045         /* Add the prefix subflow. */
6046         ret = flow_create_split_metadata(dev, flow,
6047                                          &sfx_attr, sfx_items ?
6048                                          sfx_items : items,
6049                                          sfx_actions ? sfx_actions : actions,
6050                                          flow_split_info, error);
6051 exit:
6052         if (sfx_actions)
6053                 mlx5_free(sfx_actions);
6054         return ret;
6055 }
6056
6057 /**
6058  * The splitting for sample feature.
6059  *
6060  * Once Sample action is detected in the action list, the flow actions should
6061  * be split into prefix sub flow and suffix sub flow.
6062  *
6063  * The original items remain in the prefix sub flow, all actions preceding the
6064  * sample action and the sample action itself will be copied to the prefix
6065  * sub flow, the actions following the sample action will be copied to the
6066  * suffix sub flow, Queue action always be located in the suffix sub flow.
6067  *
6068  * In order to make the packet from prefix sub flow matches with suffix sub
6069  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
6070  * flow uses tag item with the unique flow id.
6071  *
6072  * @param dev
6073  *   Pointer to Ethernet device.
6074  * @param[in] flow
6075  *   Parent flow structure pointer.
6076  * @param[in] attr
6077  *   Flow rule attributes.
6078  * @param[in] items
6079  *   Pattern specification (list terminated by the END pattern item).
6080  * @param[in] actions
6081  *   Associated actions (list terminated by the END action).
6082  * @param[in] flow_split_info
6083  *   Pointer to flow split info structure.
6084  * @param[out] error
6085  *   Perform verbose error reporting if not NULL.
6086  * @return
6087  *   0 on success, negative value otherwise
6088  */
6089 static int
6090 flow_create_split_sample(struct rte_eth_dev *dev,
6091                          struct rte_flow *flow,
6092                          const struct rte_flow_attr *attr,
6093                          const struct rte_flow_item items[],
6094                          const struct rte_flow_action actions[],
6095                          struct mlx5_flow_split_info *flow_split_info,
6096                          struct rte_flow_error *error)
6097 {
6098         struct mlx5_priv *priv = dev->data->dev_private;
6099         struct rte_flow_action *sfx_actions = NULL;
6100         struct rte_flow_action *pre_actions = NULL;
6101         struct rte_flow_item *sfx_items = NULL;
6102         struct mlx5_flow *dev_flow = NULL;
6103         struct rte_flow_attr sfx_attr = *attr;
6104 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6105         struct mlx5_flow_dv_sample_resource *sample_res;
6106         struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
6107         struct mlx5_flow_tbl_resource *sfx_tbl;
6108 #endif
6109         size_t act_size;
6110         size_t item_size;
6111         uint32_t fdb_tx = 0;
6112         int32_t tag_id = 0;
6113         int actions_n = 0;
6114         int sample_action_pos;
6115         int qrss_action_pos;
6116         int add_tag = 0;
6117         int modify_after_mirror = 0;
6118         uint16_t jump_table = 0;
6119         const uint32_t next_ft_step = 1;
6120         int ret = 0;
6121
6122         if (priv->sampler_en)
6123                 actions_n = flow_check_match_action(actions, attr,
6124                                         RTE_FLOW_ACTION_TYPE_SAMPLE,
6125                                         &sample_action_pos, &qrss_action_pos,
6126                                         &modify_after_mirror);
6127         if (actions_n) {
6128                 /* The prefix actions must includes sample, tag, end. */
6129                 act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
6130                            + sizeof(struct mlx5_rte_flow_action_set_tag);
6131                 item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
6132                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
6133                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
6134                                           item_size), 0, SOCKET_ID_ANY);
6135                 if (!sfx_actions)
6136                         return rte_flow_error_set(error, ENOMEM,
6137                                                   RTE_FLOW_ERROR_TYPE_ACTION,
6138                                                   NULL, "no memory to split "
6139                                                   "sample flow");
6140                 /* The representor_id is UINT16_MAX for uplink. */
6141                 fdb_tx = (attr->transfer && priv->representor_id != UINT16_MAX);
6142                 /*
6143                  * When reg_c_preserve is set, metadata registers Cx preserve
6144                  * their value even through packet duplication.
6145                  */
6146                 add_tag = (!fdb_tx || priv->config.hca_attr.reg_c_preserve);
6147                 if (add_tag)
6148                         sfx_items = (struct rte_flow_item *)((char *)sfx_actions
6149                                         + act_size);
6150                 if (modify_after_mirror)
6151                         jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
6152                                      next_ft_step;
6153                 pre_actions = sfx_actions + actions_n;
6154                 tag_id = flow_sample_split_prep(dev, add_tag, sfx_items,
6155                                                 actions, sfx_actions,
6156                                                 pre_actions, actions_n,
6157                                                 sample_action_pos,
6158                                                 qrss_action_pos, jump_table,
6159                                                 error);
6160                 if (tag_id < 0 || (add_tag && !tag_id)) {
6161                         ret = -rte_errno;
6162                         goto exit;
6163                 }
6164                 if (modify_after_mirror)
6165                         flow_split_info->skip_scale =
6166                                         1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6167                 /* Add the prefix subflow. */
6168                 ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6169                                               items, pre_actions,
6170                                               flow_split_info, error);
6171                 if (ret) {
6172                         ret = -rte_errno;
6173                         goto exit;
6174                 }
6175                 dev_flow->handle->split_flow_id = tag_id;
6176 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6177                 if (!modify_after_mirror) {
6178                         /* Set the sfx group attr. */
6179                         sample_res = (struct mlx5_flow_dv_sample_resource *)
6180                                                 dev_flow->dv.sample_res;
6181                         sfx_tbl = (struct mlx5_flow_tbl_resource *)
6182                                                 sample_res->normal_path_tbl;
6183                         sfx_tbl_data = container_of(sfx_tbl,
6184                                                 struct mlx5_flow_tbl_data_entry,
6185                                                 tbl);
6186                         sfx_attr.group = sfx_attr.transfer ?
6187                         (sfx_tbl_data->level - 1) : sfx_tbl_data->level;
6188                 } else {
6189                         MLX5_ASSERT(attr->transfer);
6190                         sfx_attr.group = jump_table;
6191                 }
6192                 flow_split_info->prefix_layers =
6193                                 flow_get_prefix_layer_flags(dev_flow);
6194                 flow_split_info->prefix_mark = dev_flow->handle->mark;
6195                 /* Suffix group level already be scaled with factor, set
6196                  * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
6197                  * again in translation.
6198                  */
6199                 flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6200 #endif
6201         }
6202         /* Add the suffix subflow. */
6203         ret = flow_create_split_meter(dev, flow, &sfx_attr,
6204                                       sfx_items ? sfx_items : items,
6205                                       sfx_actions ? sfx_actions : actions,
6206                                       flow_split_info, error);
6207 exit:
6208         if (sfx_actions)
6209                 mlx5_free(sfx_actions);
6210         return ret;
6211 }
6212
6213 /**
6214  * Split the flow to subflow set. The splitters might be linked
6215  * in the chain, like this:
6216  * flow_create_split_outer() calls:
6217  *   flow_create_split_meter() calls:
6218  *     flow_create_split_metadata(meter_subflow_0) calls:
6219  *       flow_create_split_inner(metadata_subflow_0)
6220  *       flow_create_split_inner(metadata_subflow_1)
6221  *       flow_create_split_inner(metadata_subflow_2)
6222  *     flow_create_split_metadata(meter_subflow_1) calls:
6223  *       flow_create_split_inner(metadata_subflow_0)
6224  *       flow_create_split_inner(metadata_subflow_1)
6225  *       flow_create_split_inner(metadata_subflow_2)
6226  *
6227  * This provide flexible way to add new levels of flow splitting.
6228  * The all of successfully created subflows are included to the
6229  * parent flow dev_flow list.
6230  *
6231  * @param dev
6232  *   Pointer to Ethernet device.
6233  * @param[in] flow
6234  *   Parent flow structure pointer.
6235  * @param[in] attr
6236  *   Flow rule attributes.
6237  * @param[in] items
6238  *   Pattern specification (list terminated by the END pattern item).
6239  * @param[in] actions
6240  *   Associated actions (list terminated by the END action).
6241  * @param[in] flow_split_info
6242  *   Pointer to flow split info structure.
6243  * @param[out] error
6244  *   Perform verbose error reporting if not NULL.
6245  * @return
6246  *   0 on success, negative value otherwise
6247  */
6248 static int
6249 flow_create_split_outer(struct rte_eth_dev *dev,
6250                         struct rte_flow *flow,
6251                         const struct rte_flow_attr *attr,
6252                         const struct rte_flow_item items[],
6253                         const struct rte_flow_action actions[],
6254                         struct mlx5_flow_split_info *flow_split_info,
6255                         struct rte_flow_error *error)
6256 {
6257         int ret;
6258
6259         ret = flow_create_split_sample(dev, flow, attr, items,
6260                                        actions, flow_split_info, error);
6261         MLX5_ASSERT(ret <= 0);
6262         return ret;
6263 }
6264
6265 static inline struct mlx5_flow_tunnel *
6266 flow_tunnel_from_rule(const struct mlx5_flow *flow)
6267 {
6268         struct mlx5_flow_tunnel *tunnel;
6269
6270 #pragma GCC diagnostic push
6271 #pragma GCC diagnostic ignored "-Wcast-qual"
6272         tunnel = (typeof(tunnel))flow->tunnel;
6273 #pragma GCC diagnostic pop
6274
6275         return tunnel;
6276 }
6277
6278 /**
6279  * Adjust flow RSS workspace if needed.
6280  *
6281  * @param wks
6282  *   Pointer to thread flow work space.
6283  * @param rss_desc
6284  *   Pointer to RSS descriptor.
6285  * @param[in] nrssq_num
6286  *   New RSS queue number.
6287  *
6288  * @return
6289  *   0 on success, -1 otherwise and rte_errno is set.
6290  */
6291 static int
6292 flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
6293                           struct mlx5_flow_rss_desc *rss_desc,
6294                           uint32_t nrssq_num)
6295 {
6296         if (likely(nrssq_num <= wks->rssq_num))
6297                 return 0;
6298         rss_desc->queue = realloc(rss_desc->queue,
6299                           sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2));
6300         if (!rss_desc->queue) {
6301                 rte_errno = ENOMEM;
6302                 return -1;
6303         }
6304         wks->rssq_num = RTE_ALIGN(nrssq_num, 2);
6305         return 0;
6306 }
6307
6308 /**
6309  * Create a flow and add it to @p list.
6310  *
6311  * @param dev
6312  *   Pointer to Ethernet device.
6313  * @param list
6314  *   Pointer to a TAILQ flow list. If this parameter NULL,
6315  *   no list insertion occurred, flow is just created,
6316  *   this is caller's responsibility to track the
6317  *   created flow.
6318  * @param[in] attr
6319  *   Flow rule attributes.
6320  * @param[in] items
6321  *   Pattern specification (list terminated by the END pattern item).
6322  * @param[in] actions
6323  *   Associated actions (list terminated by the END action).
6324  * @param[in] external
6325  *   This flow rule is created by request external to PMD.
6326  * @param[out] error
6327  *   Perform verbose error reporting if not NULL.
6328  *
6329  * @return
6330  *   A flow index on success, 0 otherwise and rte_errno is set.
6331  */
6332 static uint32_t
6333 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6334                  const struct rte_flow_attr *attr,
6335                  const struct rte_flow_item items[],
6336                  const struct rte_flow_action original_actions[],
6337                  bool external, struct rte_flow_error *error)
6338 {
6339         struct mlx5_priv *priv = dev->data->dev_private;
6340         struct rte_flow *flow = NULL;
6341         struct mlx5_flow *dev_flow;
6342         const struct rte_flow_action_rss *rss = NULL;
6343         struct mlx5_translated_action_handle
6344                 indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
6345         int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
6346         union {
6347                 struct mlx5_flow_expand_rss buf;
6348                 uint8_t buffer[4096];
6349         } expand_buffer;
6350         union {
6351                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6352                 uint8_t buffer[2048];
6353         } actions_rx;
6354         union {
6355                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6356                 uint8_t buffer[2048];
6357         } actions_hairpin_tx;
6358         union {
6359                 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
6360                 uint8_t buffer[2048];
6361         } items_tx;
6362         struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
6363         struct mlx5_flow_rss_desc *rss_desc;
6364         const struct rte_flow_action *p_actions_rx;
6365         uint32_t i;
6366         uint32_t idx = 0;
6367         int hairpin_flow;
6368         struct rte_flow_attr attr_tx = { .priority = 0 };
6369         const struct rte_flow_action *actions;
6370         struct rte_flow_action *translated_actions = NULL;
6371         struct mlx5_flow_tunnel *tunnel;
6372         struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
6373         struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
6374         struct mlx5_flow_split_info flow_split_info = {
6375                 .external = !!external,
6376                 .skip_scale = 0,
6377                 .flow_idx = 0,
6378                 .prefix_mark = 0,
6379                 .prefix_layers = 0,
6380                 .table_id = 0
6381         };
6382         int ret;
6383
6384         MLX5_ASSERT(wks);
6385         rss_desc = &wks->rss_desc;
6386         ret = flow_action_handles_translate(dev, original_actions,
6387                                             indir_actions,
6388                                             &indir_actions_n,
6389                                             &translated_actions, error);
6390         if (ret < 0) {
6391                 MLX5_ASSERT(translated_actions == NULL);
6392                 return 0;
6393         }
6394         actions = translated_actions ? translated_actions : original_actions;
6395         p_actions_rx = actions;
6396         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
6397         ret = flow_drv_validate(dev, attr, items, p_actions_rx,
6398                                 external, hairpin_flow, error);
6399         if (ret < 0)
6400                 goto error_before_hairpin_split;
6401         flow = mlx5_ipool_zmalloc(priv->flows[type], &idx);
6402         if (!flow) {
6403                 rte_errno = ENOMEM;
6404                 goto error_before_hairpin_split;
6405         }
6406         if (hairpin_flow > 0) {
6407                 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
6408                         rte_errno = EINVAL;
6409                         goto error_before_hairpin_split;
6410                 }
6411                 flow_hairpin_split(dev, actions, actions_rx.actions,
6412                                    actions_hairpin_tx.actions, items_tx.items,
6413                                    idx);
6414                 p_actions_rx = actions_rx.actions;
6415         }
6416         flow_split_info.flow_idx = idx;
6417         flow->drv_type = flow_get_drv_type(dev, attr);
6418         MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
6419                     flow->drv_type < MLX5_FLOW_TYPE_MAX);
6420         memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
6421         /* RSS Action only works on NIC RX domain */
6422         if (attr->ingress && !attr->transfer)
6423                 rss = flow_get_rss_action(dev, p_actions_rx);
6424         if (rss) {
6425                 if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
6426                         return 0;
6427                 /*
6428                  * The following information is required by
6429                  * mlx5_flow_hashfields_adjust() in advance.
6430                  */
6431                 rss_desc->level = rss->level;
6432                 /* RSS type 0 indicates default RSS type (RTE_ETH_RSS_IP). */
6433                 rss_desc->types = !rss->types ? RTE_ETH_RSS_IP : rss->types;
6434         }
6435         flow->dev_handles = 0;
6436         if (rss && rss->types) {
6437                 unsigned int graph_root;
6438
6439                 graph_root = find_graph_root(rss->level);
6440                 ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
6441                                            items, rss->types,
6442                                            mlx5_support_expansion, graph_root);
6443                 MLX5_ASSERT(ret > 0 &&
6444                        (unsigned int)ret < sizeof(expand_buffer.buffer));
6445                 if (rte_log_can_log(mlx5_logtype, RTE_LOG_DEBUG)) {
6446                         for (i = 0; i < buf->entries; ++i)
6447                                 mlx5_dbg__print_pattern(buf->entry[i].pattern);
6448                 }
6449         } else {
6450                 buf->entries = 1;
6451                 buf->entry[0].pattern = (void *)(uintptr_t)items;
6452         }
6453         rss_desc->shared_rss = flow_get_shared_rss_action(dev, indir_actions,
6454                                                       indir_actions_n);
6455         for (i = 0; i < buf->entries; ++i) {
6456                 /* Initialize flow split data. */
6457                 flow_split_info.prefix_layers = 0;
6458                 flow_split_info.prefix_mark = 0;
6459                 flow_split_info.skip_scale = 0;
6460                 /*
6461                  * The splitter may create multiple dev_flows,
6462                  * depending on configuration. In the simplest
6463                  * case it just creates unmodified original flow.
6464                  */
6465                 ret = flow_create_split_outer(dev, flow, attr,
6466                                               buf->entry[i].pattern,
6467                                               p_actions_rx, &flow_split_info,
6468                                               error);
6469                 if (ret < 0)
6470                         goto error;
6471                 if (is_flow_tunnel_steer_rule(wks->flows[0].tof_type)) {
6472                         ret = flow_tunnel_add_default_miss(dev, flow, attr,
6473                                                            p_actions_rx,
6474                                                            idx,
6475                                                            wks->flows[0].tunnel,
6476                                                            &default_miss_ctx,
6477                                                            error);
6478                         if (ret < 0) {
6479                                 mlx5_free(default_miss_ctx.queue);
6480                                 goto error;
6481                         }
6482                 }
6483         }
6484         /* Create the tx flow. */
6485         if (hairpin_flow) {
6486                 attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
6487                 attr_tx.ingress = 0;
6488                 attr_tx.egress = 1;
6489                 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
6490                                          actions_hairpin_tx.actions,
6491                                          idx, error);
6492                 if (!dev_flow)
6493                         goto error;
6494                 dev_flow->flow = flow;
6495                 dev_flow->external = 0;
6496                 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
6497                               dev_flow->handle, next);
6498                 ret = flow_drv_translate(dev, dev_flow, &attr_tx,
6499                                          items_tx.items,
6500                                          actions_hairpin_tx.actions, error);
6501                 if (ret < 0)
6502                         goto error;
6503         }
6504         /*
6505          * Update the metadata register copy table. If extensive
6506          * metadata feature is enabled and registers are supported
6507          * we might create the extra rte_flow for each unique
6508          * MARK/FLAG action ID.
6509          *
6510          * The table is updated for ingress Flows only, because
6511          * the egress Flows belong to the different device and
6512          * copy table should be updated in peer NIC Rx domain.
6513          */
6514         if (attr->ingress &&
6515             (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
6516                 ret = flow_mreg_update_copy_table(dev, flow, actions, error);
6517                 if (ret)
6518                         goto error;
6519         }
6520         /*
6521          * If the flow is external (from application) OR device is started,
6522          * OR mreg discover, then apply immediately.
6523          */
6524         if (external || dev->data->dev_started ||
6525             (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
6526              attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
6527                 ret = flow_drv_apply(dev, flow, error);
6528                 if (ret < 0)
6529                         goto error;
6530         }
6531         flow->type = type;
6532         flow_rxq_flags_set(dev, flow);
6533         rte_free(translated_actions);
6534         tunnel = flow_tunnel_from_rule(wks->flows);
6535         if (tunnel) {
6536                 flow->tunnel = 1;
6537                 flow->tunnel_id = tunnel->tunnel_id;
6538                 __atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
6539                 mlx5_free(default_miss_ctx.queue);
6540         }
6541         mlx5_flow_pop_thread_workspace();
6542         return idx;
6543 error:
6544         MLX5_ASSERT(flow);
6545         ret = rte_errno; /* Save rte_errno before cleanup. */
6546         flow_mreg_del_copy_action(dev, flow);
6547         flow_drv_destroy(dev, flow);
6548         if (rss_desc->shared_rss)
6549                 __atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
6550                         mlx5_ipool_get
6551                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
6552                         rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
6553         mlx5_ipool_free(priv->flows[type], idx);
6554         rte_errno = ret; /* Restore rte_errno. */
6555         ret = rte_errno;
6556         rte_errno = ret;
6557         mlx5_flow_pop_thread_workspace();
6558 error_before_hairpin_split:
6559         rte_free(translated_actions);
6560         return 0;
6561 }
6562
6563 /**
6564  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
6565  * incoming packets to table 1.
6566  *
6567  * Other flow rules, requested for group n, will be created in
6568  * e-switch table n+1.
6569  * Jump action to e-switch group n will be created to group n+1.
6570  *
6571  * Used when working in switchdev mode, to utilise advantages of table 1
6572  * and above.
6573  *
6574  * @param dev
6575  *   Pointer to Ethernet device.
6576  *
6577  * @return
6578  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
6579  */
6580 struct rte_flow *
6581 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
6582 {
6583         const struct rte_flow_attr attr = {
6584                 .group = 0,
6585                 .priority = 0,
6586                 .ingress = 1,
6587                 .egress = 0,
6588                 .transfer = 1,
6589         };
6590         const struct rte_flow_item pattern = {
6591                 .type = RTE_FLOW_ITEM_TYPE_END,
6592         };
6593         struct rte_flow_action_jump jump = {
6594                 .group = 1,
6595         };
6596         const struct rte_flow_action actions[] = {
6597                 {
6598                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
6599                         .conf = &jump,
6600                 },
6601                 {
6602                         .type = RTE_FLOW_ACTION_TYPE_END,
6603                 },
6604         };
6605         struct rte_flow_error error;
6606
6607         return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
6608                                                    &attr, &pattern,
6609                                                    actions, false, &error);
6610 }
6611
6612 /**
6613  * Create a dedicated flow rule on e-switch table 1, matches ESW manager
6614  * and sq number, directs all packets to peer vport.
6615  *
6616  * @param dev
6617  *   Pointer to Ethernet device.
6618  * @param txq
6619  *   Txq index.
6620  *
6621  * @return
6622  *   Flow ID on success, 0 otherwise and rte_errno is set.
6623  */
6624 uint32_t
6625 mlx5_flow_create_devx_sq_miss_flow(struct rte_eth_dev *dev, uint32_t txq)
6626 {
6627         struct rte_flow_attr attr = {
6628                 .group = 0,
6629                 .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
6630                 .ingress = 1,
6631                 .egress = 0,
6632                 .transfer = 1,
6633         };
6634         struct rte_flow_item_port_id port_spec = {
6635                 .id = MLX5_PORT_ESW_MGR,
6636         };
6637         struct mlx5_rte_flow_item_tx_queue txq_spec = {
6638                 .queue = txq,
6639         };
6640         struct rte_flow_item pattern[] = {
6641                 {
6642                         .type = RTE_FLOW_ITEM_TYPE_PORT_ID,
6643                         .spec = &port_spec,
6644                 },
6645                 {
6646                         .type = (enum rte_flow_item_type)
6647                                 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
6648                         .spec = &txq_spec,
6649                 },
6650                 {
6651                         .type = RTE_FLOW_ITEM_TYPE_END,
6652                 },
6653         };
6654         struct rte_flow_action_jump jump = {
6655                 .group = 1,
6656         };
6657         struct rte_flow_action_port_id port = {
6658                 .id = dev->data->port_id,
6659         };
6660         struct rte_flow_action actions[] = {
6661                 {
6662                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
6663                         .conf = &jump,
6664                 },
6665                 {
6666                         .type = RTE_FLOW_ACTION_TYPE_END,
6667                 },
6668         };
6669         struct rte_flow_error error;
6670
6671         /*
6672          * Creates group 0, highest priority jump flow.
6673          * Matches txq to bypass kernel packets.
6674          */
6675         if (flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern, actions,
6676                              false, &error) == 0)
6677                 return 0;
6678         /* Create group 1, lowest priority redirect flow for txq. */
6679         attr.group = 1;
6680         actions[0].conf = &port;
6681         actions[0].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
6682         return flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern,
6683                                 actions, false, &error);
6684 }
6685
6686 /**
6687  * Validate a flow supported by the NIC.
6688  *
6689  * @see rte_flow_validate()
6690  * @see rte_flow_ops
6691  */
6692 int
6693 mlx5_flow_validate(struct rte_eth_dev *dev,
6694                    const struct rte_flow_attr *attr,
6695                    const struct rte_flow_item items[],
6696                    const struct rte_flow_action original_actions[],
6697                    struct rte_flow_error *error)
6698 {
6699         int hairpin_flow;
6700         struct mlx5_translated_action_handle
6701                 indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
6702         int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
6703         const struct rte_flow_action *actions;
6704         struct rte_flow_action *translated_actions = NULL;
6705         int ret = flow_action_handles_translate(dev, original_actions,
6706                                                 indir_actions,
6707                                                 &indir_actions_n,
6708                                                 &translated_actions, error);
6709
6710         if (ret)
6711                 return ret;
6712         actions = translated_actions ? translated_actions : original_actions;
6713         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
6714         ret = flow_drv_validate(dev, attr, items, actions,
6715                                 true, hairpin_flow, error);
6716         rte_free(translated_actions);
6717         return ret;
6718 }
6719
6720 /**
6721  * Create a flow.
6722  *
6723  * @see rte_flow_create()
6724  * @see rte_flow_ops
6725  */
6726 struct rte_flow *
6727 mlx5_flow_create(struct rte_eth_dev *dev,
6728                  const struct rte_flow_attr *attr,
6729                  const struct rte_flow_item items[],
6730                  const struct rte_flow_action actions[],
6731                  struct rte_flow_error *error)
6732 {
6733         /*
6734          * If the device is not started yet, it is not allowed to created a
6735          * flow from application. PMD default flows and traffic control flows
6736          * are not affected.
6737          */
6738         if (unlikely(!dev->data->dev_started)) {
6739                 DRV_LOG(DEBUG, "port %u is not started when "
6740                         "inserting a flow", dev->data->port_id);
6741                 rte_flow_error_set(error, ENODEV,
6742                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6743                                    NULL,
6744                                    "port not started");
6745                 return NULL;
6746         }
6747
6748         return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_GEN,
6749                                                    attr, items, actions,
6750                                                    true, error);
6751 }
6752
6753 /**
6754  * Destroy a flow in a list.
6755  *
6756  * @param dev
6757  *   Pointer to Ethernet device.
6758  * @param[in] flow_idx
6759  *   Index of flow to destroy.
6760  */
6761 static void
6762 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6763                   uint32_t flow_idx)
6764 {
6765         struct mlx5_priv *priv = dev->data->dev_private;
6766         struct rte_flow *flow = mlx5_ipool_get(priv->flows[type], flow_idx);
6767
6768         if (!flow)
6769                 return;
6770         MLX5_ASSERT(flow->type == type);
6771         /*
6772          * Update RX queue flags only if port is started, otherwise it is
6773          * already clean.
6774          */
6775         if (dev->data->dev_started)
6776                 flow_rxq_flags_trim(dev, flow);
6777         flow_drv_destroy(dev, flow);
6778         if (flow->tunnel) {
6779                 struct mlx5_flow_tunnel *tunnel;
6780
6781                 tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
6782                 RTE_VERIFY(tunnel);
6783                 if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
6784                         mlx5_flow_tunnel_free(dev, tunnel);
6785         }
6786         flow_mreg_del_copy_action(dev, flow);
6787         mlx5_ipool_free(priv->flows[type], flow_idx);
6788 }
6789
6790 /**
6791  * Destroy all flows.
6792  *
6793  * @param dev
6794  *   Pointer to Ethernet device.
6795  * @param type
6796  *   Flow type to be flushed.
6797  * @param active
6798  *   If flushing is called avtively.
6799  */
6800 void
6801 mlx5_flow_list_flush(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6802                      bool active)
6803 {
6804         struct mlx5_priv *priv = dev->data->dev_private;
6805         uint32_t num_flushed = 0, fidx = 1;
6806         struct rte_flow *flow;
6807
6808         MLX5_IPOOL_FOREACH(priv->flows[type], fidx, flow) {
6809                 flow_list_destroy(dev, type, fidx);
6810                 num_flushed++;
6811         }
6812         if (active) {
6813                 DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
6814                         dev->data->port_id, num_flushed);
6815         }
6816 }
6817
6818 /**
6819  * Stop all default actions for flows.
6820  *
6821  * @param dev
6822  *   Pointer to Ethernet device.
6823  */
6824 void
6825 mlx5_flow_stop_default(struct rte_eth_dev *dev)
6826 {
6827         flow_mreg_del_default_copy_action(dev);
6828         flow_rxq_flags_clear(dev);
6829 }
6830
6831 /**
6832  * Start all default actions for flows.
6833  *
6834  * @param dev
6835  *   Pointer to Ethernet device.
6836  * @return
6837  *   0 on success, a negative errno value otherwise and rte_errno is set.
6838  */
6839 int
6840 mlx5_flow_start_default(struct rte_eth_dev *dev)
6841 {
6842         struct rte_flow_error error;
6843
6844         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
6845         return flow_mreg_add_default_copy_action(dev, &error);
6846 }
6847
6848 /**
6849  * Release key of thread specific flow workspace data.
6850  */
6851 void
6852 flow_release_workspace(void *data)
6853 {
6854         struct mlx5_flow_workspace *wks = data;
6855         struct mlx5_flow_workspace *next;
6856
6857         while (wks) {
6858                 next = wks->next;
6859                 free(wks->rss_desc.queue);
6860                 free(wks);
6861                 wks = next;
6862         }
6863 }
6864
6865 /**
6866  * Get thread specific current flow workspace.
6867  *
6868  * @return pointer to thread specific flow workspace data, NULL on error.
6869  */
6870 struct mlx5_flow_workspace*
6871 mlx5_flow_get_thread_workspace(void)
6872 {
6873         struct mlx5_flow_workspace *data;
6874
6875         data = mlx5_flow_os_get_specific_workspace();
6876         MLX5_ASSERT(data && data->inuse);
6877         if (!data || !data->inuse)
6878                 DRV_LOG(ERR, "flow workspace not initialized.");
6879         return data;
6880 }
6881
6882 /**
6883  * Allocate and init new flow workspace.
6884  *
6885  * @return pointer to flow workspace data, NULL on error.
6886  */
6887 static struct mlx5_flow_workspace*
6888 flow_alloc_thread_workspace(void)
6889 {
6890         struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
6891
6892         if (!data) {
6893                 DRV_LOG(ERR, "Failed to allocate flow workspace "
6894                         "memory.");
6895                 return NULL;
6896         }
6897         data->rss_desc.queue = calloc(1,
6898                         sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
6899         if (!data->rss_desc.queue)
6900                 goto err;
6901         data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
6902         return data;
6903 err:
6904         if (data->rss_desc.queue)
6905                 free(data->rss_desc.queue);
6906         free(data);
6907         return NULL;
6908 }
6909
6910 /**
6911  * Get new thread specific flow workspace.
6912  *
6913  * If current workspace inuse, create new one and set as current.
6914  *
6915  * @return pointer to thread specific flow workspace data, NULL on error.
6916  */
6917 static struct mlx5_flow_workspace*
6918 mlx5_flow_push_thread_workspace(void)
6919 {
6920         struct mlx5_flow_workspace *curr;
6921         struct mlx5_flow_workspace *data;
6922
6923         curr = mlx5_flow_os_get_specific_workspace();
6924         if (!curr) {
6925                 data = flow_alloc_thread_workspace();
6926                 if (!data)
6927                         return NULL;
6928         } else if (!curr->inuse) {
6929                 data = curr;
6930         } else if (curr->next) {
6931                 data = curr->next;
6932         } else {
6933                 data = flow_alloc_thread_workspace();
6934                 if (!data)
6935                         return NULL;
6936                 curr->next = data;
6937                 data->prev = curr;
6938         }
6939         data->inuse = 1;
6940         data->flow_idx = 0;
6941         /* Set as current workspace */
6942         if (mlx5_flow_os_set_specific_workspace(data))
6943                 DRV_LOG(ERR, "Failed to set flow workspace to thread.");
6944         return data;
6945 }
6946
6947 /**
6948  * Close current thread specific flow workspace.
6949  *
6950  * If previous workspace available, set it as current.
6951  *
6952  * @return pointer to thread specific flow workspace data, NULL on error.
6953  */
6954 static void
6955 mlx5_flow_pop_thread_workspace(void)
6956 {
6957         struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
6958
6959         if (!data)
6960                 return;
6961         if (!data->inuse) {
6962                 DRV_LOG(ERR, "Failed to close unused flow workspace.");
6963                 return;
6964         }
6965         data->inuse = 0;
6966         if (!data->prev)
6967                 return;
6968         if (mlx5_flow_os_set_specific_workspace(data->prev))
6969                 DRV_LOG(ERR, "Failed to set flow workspace to thread.");
6970 }
6971
6972 /**
6973  * Verify the flow list is empty
6974  *
6975  * @param dev
6976  *  Pointer to Ethernet device.
6977  *
6978  * @return the number of flows not released.
6979  */
6980 int
6981 mlx5_flow_verify(struct rte_eth_dev *dev __rte_unused)
6982 {
6983         struct mlx5_priv *priv = dev->data->dev_private;
6984         struct rte_flow *flow;
6985         uint32_t idx = 0;
6986         int ret = 0, i;
6987
6988         for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
6989                 MLX5_IPOOL_FOREACH(priv->flows[i], idx, flow) {
6990                         DRV_LOG(DEBUG, "port %u flow %p still referenced",
6991                                 dev->data->port_id, (void *)flow);
6992                         ret++;
6993                 }
6994         }
6995         return ret;
6996 }
6997
6998 /**
6999  * Enable default hairpin egress flow.
7000  *
7001  * @param dev
7002  *   Pointer to Ethernet device.
7003  * @param queue
7004  *   The queue index.
7005  *
7006  * @return
7007  *   0 on success, a negative errno value otherwise and rte_errno is set.
7008  */
7009 int
7010 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
7011                             uint32_t queue)
7012 {
7013         const struct rte_flow_attr attr = {
7014                 .egress = 1,
7015                 .priority = 0,
7016         };
7017         struct mlx5_rte_flow_item_tx_queue queue_spec = {
7018                 .queue = queue,
7019         };
7020         struct mlx5_rte_flow_item_tx_queue queue_mask = {
7021                 .queue = UINT32_MAX,
7022         };
7023         struct rte_flow_item items[] = {
7024                 {
7025                         .type = (enum rte_flow_item_type)
7026                                 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
7027                         .spec = &queue_spec,
7028                         .last = NULL,
7029                         .mask = &queue_mask,
7030                 },
7031                 {
7032                         .type = RTE_FLOW_ITEM_TYPE_END,
7033                 },
7034         };
7035         struct rte_flow_action_jump jump = {
7036                 .group = MLX5_HAIRPIN_TX_TABLE,
7037         };
7038         struct rte_flow_action actions[2];
7039         uint32_t flow_idx;
7040         struct rte_flow_error error;
7041
7042         actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
7043         actions[0].conf = &jump;
7044         actions[1].type = RTE_FLOW_ACTION_TYPE_END;
7045         flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7046                                     &attr, items, actions, false, &error);
7047         if (!flow_idx) {
7048                 DRV_LOG(DEBUG,
7049                         "Failed to create ctrl flow: rte_errno(%d),"
7050                         " type(%d), message(%s)",
7051                         rte_errno, error.type,
7052                         error.message ? error.message : " (no stated reason)");
7053                 return -rte_errno;
7054         }
7055         return 0;
7056 }
7057
7058 /**
7059  * Enable a control flow configured from the control plane.
7060  *
7061  * @param dev
7062  *   Pointer to Ethernet device.
7063  * @param eth_spec
7064  *   An Ethernet flow spec to apply.
7065  * @param eth_mask
7066  *   An Ethernet flow mask to apply.
7067  * @param vlan_spec
7068  *   A VLAN flow spec to apply.
7069  * @param vlan_mask
7070  *   A VLAN flow mask to apply.
7071  *
7072  * @return
7073  *   0 on success, a negative errno value otherwise and rte_errno is set.
7074  */
7075 int
7076 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
7077                     struct rte_flow_item_eth *eth_spec,
7078                     struct rte_flow_item_eth *eth_mask,
7079                     struct rte_flow_item_vlan *vlan_spec,
7080                     struct rte_flow_item_vlan *vlan_mask)
7081 {
7082         struct mlx5_priv *priv = dev->data->dev_private;
7083         const struct rte_flow_attr attr = {
7084                 .ingress = 1,
7085                 .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7086         };
7087         struct rte_flow_item items[] = {
7088                 {
7089                         .type = RTE_FLOW_ITEM_TYPE_ETH,
7090                         .spec = eth_spec,
7091                         .last = NULL,
7092                         .mask = eth_mask,
7093                 },
7094                 {
7095                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
7096                                               RTE_FLOW_ITEM_TYPE_END,
7097                         .spec = vlan_spec,
7098                         .last = NULL,
7099                         .mask = vlan_mask,
7100                 },
7101                 {
7102                         .type = RTE_FLOW_ITEM_TYPE_END,
7103                 },
7104         };
7105         uint16_t queue[priv->reta_idx_n];
7106         struct rte_flow_action_rss action_rss = {
7107                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
7108                 .level = 0,
7109                 .types = priv->rss_conf.rss_hf,
7110                 .key_len = priv->rss_conf.rss_key_len,
7111                 .queue_num = priv->reta_idx_n,
7112                 .key = priv->rss_conf.rss_key,
7113                 .queue = queue,
7114         };
7115         struct rte_flow_action actions[] = {
7116                 {
7117                         .type = RTE_FLOW_ACTION_TYPE_RSS,
7118                         .conf = &action_rss,
7119                 },
7120                 {
7121                         .type = RTE_FLOW_ACTION_TYPE_END,
7122                 },
7123         };
7124         uint32_t flow_idx;
7125         struct rte_flow_error error;
7126         unsigned int i;
7127
7128         if (!priv->reta_idx_n || !priv->rxqs_n) {
7129                 return 0;
7130         }
7131         if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
7132                 action_rss.types = 0;
7133         for (i = 0; i != priv->reta_idx_n; ++i)
7134                 queue[i] = (*priv->reta_idx)[i];
7135         flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7136                                     &attr, items, actions, false, &error);
7137         if (!flow_idx)
7138                 return -rte_errno;
7139         return 0;
7140 }
7141
7142 /**
7143  * Enable a flow control configured from the control plane.
7144  *
7145  * @param dev
7146  *   Pointer to Ethernet device.
7147  * @param eth_spec
7148  *   An Ethernet flow spec to apply.
7149  * @param eth_mask
7150  *   An Ethernet flow mask to apply.
7151  *
7152  * @return
7153  *   0 on success, a negative errno value otherwise and rte_errno is set.
7154  */
7155 int
7156 mlx5_ctrl_flow(struct rte_eth_dev *dev,
7157                struct rte_flow_item_eth *eth_spec,
7158                struct rte_flow_item_eth *eth_mask)
7159 {
7160         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
7161 }
7162
7163 /**
7164  * Create default miss flow rule matching lacp traffic
7165  *
7166  * @param dev
7167  *   Pointer to Ethernet device.
7168  * @param eth_spec
7169  *   An Ethernet flow spec to apply.
7170  *
7171  * @return
7172  *   0 on success, a negative errno value otherwise and rte_errno is set.
7173  */
7174 int
7175 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
7176 {
7177         /*
7178          * The LACP matching is done by only using ether type since using
7179          * a multicast dst mac causes kernel to give low priority to this flow.
7180          */
7181         static const struct rte_flow_item_eth lacp_spec = {
7182                 .type = RTE_BE16(0x8809),
7183         };
7184         static const struct rte_flow_item_eth lacp_mask = {
7185                 .type = 0xffff,
7186         };
7187         const struct rte_flow_attr attr = {
7188                 .ingress = 1,
7189         };
7190         struct rte_flow_item items[] = {
7191                 {
7192                         .type = RTE_FLOW_ITEM_TYPE_ETH,
7193                         .spec = &lacp_spec,
7194                         .mask = &lacp_mask,
7195                 },
7196                 {
7197                         .type = RTE_FLOW_ITEM_TYPE_END,
7198                 },
7199         };
7200         struct rte_flow_action actions[] = {
7201                 {
7202                         .type = (enum rte_flow_action_type)
7203                                 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
7204                 },
7205                 {
7206                         .type = RTE_FLOW_ACTION_TYPE_END,
7207                 },
7208         };
7209         struct rte_flow_error error;
7210         uint32_t flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7211                                         &attr, items, actions,
7212                                         false, &error);
7213
7214         if (!flow_idx)
7215                 return -rte_errno;
7216         return 0;
7217 }
7218
7219 /**
7220  * Destroy a flow.
7221  *
7222  * @see rte_flow_destroy()
7223  * @see rte_flow_ops
7224  */
7225 int
7226 mlx5_flow_destroy(struct rte_eth_dev *dev,
7227                   struct rte_flow *flow,
7228                   struct rte_flow_error *error __rte_unused)
7229 {
7230         flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN,
7231                                 (uintptr_t)(void *)flow);
7232         return 0;
7233 }
7234
7235 /**
7236  * Destroy all flows.
7237  *
7238  * @see rte_flow_flush()
7239  * @see rte_flow_ops
7240  */
7241 int
7242 mlx5_flow_flush(struct rte_eth_dev *dev,
7243                 struct rte_flow_error *error __rte_unused)
7244 {
7245         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, false);
7246         return 0;
7247 }
7248
7249 /**
7250  * Isolated mode.
7251  *
7252  * @see rte_flow_isolate()
7253  * @see rte_flow_ops
7254  */
7255 int
7256 mlx5_flow_isolate(struct rte_eth_dev *dev,
7257                   int enable,
7258                   struct rte_flow_error *error)
7259 {
7260         struct mlx5_priv *priv = dev->data->dev_private;
7261
7262         if (dev->data->dev_started) {
7263                 rte_flow_error_set(error, EBUSY,
7264                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7265                                    NULL,
7266                                    "port must be stopped first");
7267                 return -rte_errno;
7268         }
7269         priv->isolated = !!enable;
7270         if (enable)
7271                 dev->dev_ops = &mlx5_dev_ops_isolate;
7272         else
7273                 dev->dev_ops = &mlx5_dev_ops;
7274
7275         dev->rx_descriptor_status = mlx5_rx_descriptor_status;
7276         dev->tx_descriptor_status = mlx5_tx_descriptor_status;
7277
7278         return 0;
7279 }
7280
7281 /**
7282  * Query a flow.
7283  *
7284  * @see rte_flow_query()
7285  * @see rte_flow_ops
7286  */
7287 static int
7288 flow_drv_query(struct rte_eth_dev *dev,
7289                uint32_t flow_idx,
7290                const struct rte_flow_action *actions,
7291                void *data,
7292                struct rte_flow_error *error)
7293 {
7294         struct mlx5_priv *priv = dev->data->dev_private;
7295         const struct mlx5_flow_driver_ops *fops;
7296         struct rte_flow *flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
7297                                                flow_idx);
7298         enum mlx5_flow_drv_type ftype;
7299
7300         if (!flow) {
7301                 return rte_flow_error_set(error, ENOENT,
7302                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7303                           NULL,
7304                           "invalid flow handle");
7305         }
7306         ftype = flow->drv_type;
7307         MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
7308         fops = flow_get_drv_ops(ftype);
7309
7310         return fops->query(dev, flow, actions, data, error);
7311 }
7312
7313 /**
7314  * Query a flow.
7315  *
7316  * @see rte_flow_query()
7317  * @see rte_flow_ops
7318  */
7319 int
7320 mlx5_flow_query(struct rte_eth_dev *dev,
7321                 struct rte_flow *flow,
7322                 const struct rte_flow_action *actions,
7323                 void *data,
7324                 struct rte_flow_error *error)
7325 {
7326         int ret;
7327
7328         ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
7329                              error);
7330         if (ret < 0)
7331                 return ret;
7332         return 0;
7333 }
7334
7335 /**
7336  * Get rte_flow callbacks.
7337  *
7338  * @param dev
7339  *   Pointer to Ethernet device structure.
7340  * @param ops
7341  *   Pointer to operation-specific structure.
7342  *
7343  * @return 0
7344  */
7345 int
7346 mlx5_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
7347                   const struct rte_flow_ops **ops)
7348 {
7349         *ops = &mlx5_flow_ops;
7350         return 0;
7351 }
7352
7353 /**
7354  * Validate meter policy actions.
7355  * Dispatcher for action type specific validation.
7356  *
7357  * @param[in] dev
7358  *   Pointer to the Ethernet device structure.
7359  * @param[in] action
7360  *   The meter policy action object to validate.
7361  * @param[in] attr
7362  *   Attributes of flow to determine steering domain.
7363  * @param[out] is_rss
7364  *   Is RSS or not.
7365  * @param[out] domain_bitmap
7366  *   Domain bitmap.
7367  * @param[out] is_def_policy
7368  *   Is default policy or not.
7369  * @param[out] error
7370  *   Perform verbose error reporting if not NULL. Initialized in case of
7371  *   error only.
7372  *
7373  * @return
7374  *   0 on success, otherwise negative errno value.
7375  */
7376 int
7377 mlx5_flow_validate_mtr_acts(struct rte_eth_dev *dev,
7378                         const struct rte_flow_action *actions[RTE_COLORS],
7379                         struct rte_flow_attr *attr,
7380                         bool *is_rss,
7381                         uint8_t *domain_bitmap,
7382                         uint8_t *policy_mode,
7383                         struct rte_mtr_error *error)
7384 {
7385         const struct mlx5_flow_driver_ops *fops;
7386
7387         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7388         return fops->validate_mtr_acts(dev, actions, attr, is_rss,
7389                                        domain_bitmap, policy_mode, error);
7390 }
7391
7392 /**
7393  * Destroy the meter table set.
7394  *
7395  * @param[in] dev
7396  *   Pointer to Ethernet device.
7397  * @param[in] mtr_policy
7398  *   Meter policy struct.
7399  */
7400 void
7401 mlx5_flow_destroy_mtr_acts(struct rte_eth_dev *dev,
7402                       struct mlx5_flow_meter_policy *mtr_policy)
7403 {
7404         const struct mlx5_flow_driver_ops *fops;
7405
7406         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7407         fops->destroy_mtr_acts(dev, mtr_policy);
7408 }
7409
7410 /**
7411  * Create policy action, lock free,
7412  * (mutex should be acquired by caller).
7413  * Dispatcher for action type specific call.
7414  *
7415  * @param[in] dev
7416  *   Pointer to the Ethernet device structure.
7417  * @param[in] mtr_policy
7418  *   Meter policy struct.
7419  * @param[in] action
7420  *   Action specification used to create meter actions.
7421  * @param[out] error
7422  *   Perform verbose error reporting if not NULL. Initialized in case of
7423  *   error only.
7424  *
7425  * @return
7426  *   0 on success, otherwise negative errno value.
7427  */
7428 int
7429 mlx5_flow_create_mtr_acts(struct rte_eth_dev *dev,
7430                       struct mlx5_flow_meter_policy *mtr_policy,
7431                       const struct rte_flow_action *actions[RTE_COLORS],
7432                       struct rte_mtr_error *error)
7433 {
7434         const struct mlx5_flow_driver_ops *fops;
7435
7436         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7437         return fops->create_mtr_acts(dev, mtr_policy, actions, error);
7438 }
7439
7440 /**
7441  * Create policy rules, lock free,
7442  * (mutex should be acquired by caller).
7443  * Dispatcher for action type specific call.
7444  *
7445  * @param[in] dev
7446  *   Pointer to the Ethernet device structure.
7447  * @param[in] mtr_policy
7448  *   Meter policy struct.
7449  *
7450  * @return
7451  *   0 on success, -1 otherwise.
7452  */
7453 int
7454 mlx5_flow_create_policy_rules(struct rte_eth_dev *dev,
7455                              struct mlx5_flow_meter_policy *mtr_policy)
7456 {
7457         const struct mlx5_flow_driver_ops *fops;
7458
7459         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7460         return fops->create_policy_rules(dev, mtr_policy);
7461 }
7462
7463 /**
7464  * Destroy policy rules, lock free,
7465  * (mutex should be acquired by caller).
7466  * Dispatcher for action type specific call.
7467  *
7468  * @param[in] dev
7469  *   Pointer to the Ethernet device structure.
7470  * @param[in] mtr_policy
7471  *   Meter policy struct.
7472  */
7473 void
7474 mlx5_flow_destroy_policy_rules(struct rte_eth_dev *dev,
7475                              struct mlx5_flow_meter_policy *mtr_policy)
7476 {
7477         const struct mlx5_flow_driver_ops *fops;
7478
7479         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7480         fops->destroy_policy_rules(dev, mtr_policy);
7481 }
7482
7483 /**
7484  * Destroy the default policy table set.
7485  *
7486  * @param[in] dev
7487  *   Pointer to Ethernet device.
7488  */
7489 void
7490 mlx5_flow_destroy_def_policy(struct rte_eth_dev *dev)
7491 {
7492         const struct mlx5_flow_driver_ops *fops;
7493
7494         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7495         fops->destroy_def_policy(dev);
7496 }
7497
7498 /**
7499  * Destroy the default policy table set.
7500  *
7501  * @param[in] dev
7502  *   Pointer to Ethernet device.
7503  *
7504  * @return
7505  *   0 on success, -1 otherwise.
7506  */
7507 int
7508 mlx5_flow_create_def_policy(struct rte_eth_dev *dev)
7509 {
7510         const struct mlx5_flow_driver_ops *fops;
7511
7512         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7513         return fops->create_def_policy(dev);
7514 }
7515
7516 /**
7517  * Create the needed meter and suffix tables.
7518  *
7519  * @param[in] dev
7520  *   Pointer to Ethernet device.
7521  *
7522  * @return
7523  *   0 on success, -1 otherwise.
7524  */
7525 int
7526 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
7527                         struct mlx5_flow_meter_info *fm,
7528                         uint32_t mtr_idx,
7529                         uint8_t domain_bitmap)
7530 {
7531         const struct mlx5_flow_driver_ops *fops;
7532
7533         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7534         return fops->create_mtr_tbls(dev, fm, mtr_idx, domain_bitmap);
7535 }
7536
7537 /**
7538  * Destroy the meter table set.
7539  *
7540  * @param[in] dev
7541  *   Pointer to Ethernet device.
7542  * @param[in] tbl
7543  *   Pointer to the meter table set.
7544  */
7545 void
7546 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
7547                            struct mlx5_flow_meter_info *fm)
7548 {
7549         const struct mlx5_flow_driver_ops *fops;
7550
7551         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7552         fops->destroy_mtr_tbls(dev, fm);
7553 }
7554
7555 /**
7556  * Destroy the global meter drop table.
7557  *
7558  * @param[in] dev
7559  *   Pointer to Ethernet device.
7560  */
7561 void
7562 mlx5_flow_destroy_mtr_drop_tbls(struct rte_eth_dev *dev)
7563 {
7564         const struct mlx5_flow_driver_ops *fops;
7565
7566         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7567         fops->destroy_mtr_drop_tbls(dev);
7568 }
7569
7570 /**
7571  * Destroy the sub policy table with RX queue.
7572  *
7573  * @param[in] dev
7574  *   Pointer to Ethernet device.
7575  * @param[in] mtr_policy
7576  *   Pointer to meter policy table.
7577  */
7578 void
7579 mlx5_flow_destroy_sub_policy_with_rxq(struct rte_eth_dev *dev,
7580                 struct mlx5_flow_meter_policy *mtr_policy)
7581 {
7582         const struct mlx5_flow_driver_ops *fops;
7583
7584         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7585         fops->destroy_sub_policy_with_rxq(dev, mtr_policy);
7586 }
7587
7588 /**
7589  * Allocate the needed aso flow meter id.
7590  *
7591  * @param[in] dev
7592  *   Pointer to Ethernet device.
7593  *
7594  * @return
7595  *   Index to aso flow meter on success, NULL otherwise.
7596  */
7597 uint32_t
7598 mlx5_flow_mtr_alloc(struct rte_eth_dev *dev)
7599 {
7600         const struct mlx5_flow_driver_ops *fops;
7601
7602         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7603         return fops->create_meter(dev);
7604 }
7605
7606 /**
7607  * Free the aso flow meter id.
7608  *
7609  * @param[in] dev
7610  *   Pointer to Ethernet device.
7611  * @param[in] mtr_idx
7612  *  Index to aso flow meter to be free.
7613  *
7614  * @return
7615  *   0 on success.
7616  */
7617 void
7618 mlx5_flow_mtr_free(struct rte_eth_dev *dev, uint32_t mtr_idx)
7619 {
7620         const struct mlx5_flow_driver_ops *fops;
7621
7622         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7623         fops->free_meter(dev, mtr_idx);
7624 }
7625
7626 /**
7627  * Allocate a counter.
7628  *
7629  * @param[in] dev
7630  *   Pointer to Ethernet device structure.
7631  *
7632  * @return
7633  *   Index to allocated counter  on success, 0 otherwise.
7634  */
7635 uint32_t
7636 mlx5_counter_alloc(struct rte_eth_dev *dev)
7637 {
7638         const struct mlx5_flow_driver_ops *fops;
7639         struct rte_flow_attr attr = { .transfer = 0 };
7640
7641         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7642                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7643                 return fops->counter_alloc(dev);
7644         }
7645         DRV_LOG(ERR,
7646                 "port %u counter allocate is not supported.",
7647                  dev->data->port_id);
7648         return 0;
7649 }
7650
7651 /**
7652  * Free a counter.
7653  *
7654  * @param[in] dev
7655  *   Pointer to Ethernet device structure.
7656  * @param[in] cnt
7657  *   Index to counter to be free.
7658  */
7659 void
7660 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
7661 {
7662         const struct mlx5_flow_driver_ops *fops;
7663         struct rte_flow_attr attr = { .transfer = 0 };
7664
7665         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7666                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7667                 fops->counter_free(dev, cnt);
7668                 return;
7669         }
7670         DRV_LOG(ERR,
7671                 "port %u counter free is not supported.",
7672                  dev->data->port_id);
7673 }
7674
7675 /**
7676  * Query counter statistics.
7677  *
7678  * @param[in] dev
7679  *   Pointer to Ethernet device structure.
7680  * @param[in] cnt
7681  *   Index to counter to query.
7682  * @param[in] clear
7683  *   Set to clear counter statistics.
7684  * @param[out] pkts
7685  *   The counter hits packets number to save.
7686  * @param[out] bytes
7687  *   The counter hits bytes number to save.
7688  *
7689  * @return
7690  *   0 on success, a negative errno value otherwise.
7691  */
7692 int
7693 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
7694                    bool clear, uint64_t *pkts, uint64_t *bytes)
7695 {
7696         const struct mlx5_flow_driver_ops *fops;
7697         struct rte_flow_attr attr = { .transfer = 0 };
7698
7699         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7700                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7701                 return fops->counter_query(dev, cnt, clear, pkts, bytes);
7702         }
7703         DRV_LOG(ERR,
7704                 "port %u counter query is not supported.",
7705                  dev->data->port_id);
7706         return -ENOTSUP;
7707 }
7708
7709 /**
7710  * Allocate a new memory for the counter values wrapped by all the needed
7711  * management.
7712  *
7713  * @param[in] sh
7714  *   Pointer to mlx5_dev_ctx_shared object.
7715  *
7716  * @return
7717  *   0 on success, a negative errno value otherwise.
7718  */
7719 static int
7720 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
7721 {
7722         struct mlx5_devx_mkey_attr mkey_attr;
7723         struct mlx5_counter_stats_mem_mng *mem_mng;
7724         volatile struct flow_counter_stats *raw_data;
7725         int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
7726         int size = (sizeof(struct flow_counter_stats) *
7727                         MLX5_COUNTERS_PER_POOL +
7728                         sizeof(struct mlx5_counter_stats_raw)) * raws_n +
7729                         sizeof(struct mlx5_counter_stats_mem_mng);
7730         size_t pgsize = rte_mem_page_size();
7731         uint8_t *mem;
7732         int i;
7733
7734         if (pgsize == (size_t)-1) {
7735                 DRV_LOG(ERR, "Failed to get mem page size");
7736                 rte_errno = ENOMEM;
7737                 return -ENOMEM;
7738         }
7739         mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
7740         if (!mem) {
7741                 rte_errno = ENOMEM;
7742                 return -ENOMEM;
7743         }
7744         mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
7745         size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
7746         mem_mng->umem = mlx5_os_umem_reg(sh->cdev->ctx, mem, size,
7747                                                  IBV_ACCESS_LOCAL_WRITE);
7748         if (!mem_mng->umem) {
7749                 rte_errno = errno;
7750                 mlx5_free(mem);
7751                 return -rte_errno;
7752         }
7753         memset(&mkey_attr, 0, sizeof(mkey_attr));
7754         mkey_attr.addr = (uintptr_t)mem;
7755         mkey_attr.size = size;
7756         mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem);
7757         mkey_attr.pd = sh->cdev->pdn;
7758         mkey_attr.relaxed_ordering_write = sh->cmng.relaxed_ordering_write;
7759         mkey_attr.relaxed_ordering_read = sh->cmng.relaxed_ordering_read;
7760         mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->cdev->ctx, &mkey_attr);
7761         if (!mem_mng->dm) {
7762                 mlx5_os_umem_dereg(mem_mng->umem);
7763                 rte_errno = errno;
7764                 mlx5_free(mem);
7765                 return -rte_errno;
7766         }
7767         mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
7768         raw_data = (volatile struct flow_counter_stats *)mem;
7769         for (i = 0; i < raws_n; ++i) {
7770                 mem_mng->raws[i].mem_mng = mem_mng;
7771                 mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
7772         }
7773         for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
7774                 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
7775                                  mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
7776                                  next);
7777         LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
7778         sh->cmng.mem_mng = mem_mng;
7779         return 0;
7780 }
7781
7782 /**
7783  * Set the statistic memory to the new counter pool.
7784  *
7785  * @param[in] sh
7786  *   Pointer to mlx5_dev_ctx_shared object.
7787  * @param[in] pool
7788  *   Pointer to the pool to set the statistic memory.
7789  *
7790  * @return
7791  *   0 on success, a negative errno value otherwise.
7792  */
7793 static int
7794 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
7795                                struct mlx5_flow_counter_pool *pool)
7796 {
7797         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
7798         /* Resize statistic memory once used out. */
7799         if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
7800             mlx5_flow_create_counter_stat_mem_mng(sh)) {
7801                 DRV_LOG(ERR, "Cannot resize counter stat mem.");
7802                 return -1;
7803         }
7804         rte_spinlock_lock(&pool->sl);
7805         pool->raw = cmng->mem_mng->raws + pool->index %
7806                     MLX5_CNT_CONTAINER_RESIZE;
7807         rte_spinlock_unlock(&pool->sl);
7808         pool->raw_hw = NULL;
7809         return 0;
7810 }
7811
7812 #define MLX5_POOL_QUERY_FREQ_US 1000000
7813
7814 /**
7815  * Set the periodic procedure for triggering asynchronous batch queries for all
7816  * the counter pools.
7817  *
7818  * @param[in] sh
7819  *   Pointer to mlx5_dev_ctx_shared object.
7820  */
7821 void
7822 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
7823 {
7824         uint32_t pools_n, us;
7825
7826         pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
7827         us = MLX5_POOL_QUERY_FREQ_US / pools_n;
7828         DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
7829         if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
7830                 sh->cmng.query_thread_on = 0;
7831                 DRV_LOG(ERR, "Cannot reinitialize query alarm");
7832         } else {
7833                 sh->cmng.query_thread_on = 1;
7834         }
7835 }
7836
7837 /**
7838  * The periodic procedure for triggering asynchronous batch queries for all the
7839  * counter pools. This function is probably called by the host thread.
7840  *
7841  * @param[in] arg
7842  *   The parameter for the alarm process.
7843  */
7844 void
7845 mlx5_flow_query_alarm(void *arg)
7846 {
7847         struct mlx5_dev_ctx_shared *sh = arg;
7848         int ret;
7849         uint16_t pool_index = sh->cmng.pool_index;
7850         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
7851         struct mlx5_flow_counter_pool *pool;
7852         uint16_t n_valid;
7853
7854         if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
7855                 goto set_alarm;
7856         rte_spinlock_lock(&cmng->pool_update_sl);
7857         pool = cmng->pools[pool_index];
7858         n_valid = cmng->n_valid;
7859         rte_spinlock_unlock(&cmng->pool_update_sl);
7860         /* Set the statistic memory to the new created pool. */
7861         if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
7862                 goto set_alarm;
7863         if (pool->raw_hw)
7864                 /* There is a pool query in progress. */
7865                 goto set_alarm;
7866         pool->raw_hw =
7867                 LIST_FIRST(&sh->cmng.free_stat_raws);
7868         if (!pool->raw_hw)
7869                 /* No free counter statistics raw memory. */
7870                 goto set_alarm;
7871         /*
7872          * Identify the counters released between query trigger and query
7873          * handle more efficiently. The counter released in this gap period
7874          * should wait for a new round of query as the new arrived packets
7875          * will not be taken into account.
7876          */
7877         pool->query_gen++;
7878         ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
7879                                                MLX5_COUNTERS_PER_POOL,
7880                                                NULL, NULL,
7881                                                pool->raw_hw->mem_mng->dm->id,
7882                                                (void *)(uintptr_t)
7883                                                pool->raw_hw->data,
7884                                                sh->devx_comp,
7885                                                (uint64_t)(uintptr_t)pool);
7886         if (ret) {
7887                 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
7888                         " %d", pool->min_dcs->id);
7889                 pool->raw_hw = NULL;
7890                 goto set_alarm;
7891         }
7892         LIST_REMOVE(pool->raw_hw, next);
7893         sh->cmng.pending_queries++;
7894         pool_index++;
7895         if (pool_index >= n_valid)
7896                 pool_index = 0;
7897 set_alarm:
7898         sh->cmng.pool_index = pool_index;
7899         mlx5_set_query_alarm(sh);
7900 }
7901
7902 /**
7903  * Check and callback event for new aged flow in the counter pool
7904  *
7905  * @param[in] sh
7906  *   Pointer to mlx5_dev_ctx_shared object.
7907  * @param[in] pool
7908  *   Pointer to Current counter pool.
7909  */
7910 static void
7911 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
7912                    struct mlx5_flow_counter_pool *pool)
7913 {
7914         struct mlx5_priv *priv;
7915         struct mlx5_flow_counter *cnt;
7916         struct mlx5_age_info *age_info;
7917         struct mlx5_age_param *age_param;
7918         struct mlx5_counter_stats_raw *cur = pool->raw_hw;
7919         struct mlx5_counter_stats_raw *prev = pool->raw;
7920         const uint64_t curr_time = MLX5_CURR_TIME_SEC;
7921         const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
7922         uint16_t expected = AGE_CANDIDATE;
7923         uint32_t i;
7924
7925         pool->time_of_last_age_check = curr_time;
7926         for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
7927                 cnt = MLX5_POOL_GET_CNT(pool, i);
7928                 age_param = MLX5_CNT_TO_AGE(cnt);
7929                 if (__atomic_load_n(&age_param->state,
7930                                     __ATOMIC_RELAXED) != AGE_CANDIDATE)
7931                         continue;
7932                 if (cur->data[i].hits != prev->data[i].hits) {
7933                         __atomic_store_n(&age_param->sec_since_last_hit, 0,
7934                                          __ATOMIC_RELAXED);
7935                         continue;
7936                 }
7937                 if (__atomic_add_fetch(&age_param->sec_since_last_hit,
7938                                        time_delta,
7939                                        __ATOMIC_RELAXED) <= age_param->timeout)
7940                         continue;
7941                 /**
7942                  * Hold the lock first, or if between the
7943                  * state AGE_TMOUT and tailq operation the
7944                  * release happened, the release procedure
7945                  * may delete a non-existent tailq node.
7946                  */
7947                 priv = rte_eth_devices[age_param->port_id].data->dev_private;
7948                 age_info = GET_PORT_AGE_INFO(priv);
7949                 rte_spinlock_lock(&age_info->aged_sl);
7950                 if (__atomic_compare_exchange_n(&age_param->state, &expected,
7951                                                 AGE_TMOUT, false,
7952                                                 __ATOMIC_RELAXED,
7953                                                 __ATOMIC_RELAXED)) {
7954                         TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
7955                         MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
7956                 }
7957                 rte_spinlock_unlock(&age_info->aged_sl);
7958         }
7959         mlx5_age_event_prepare(sh);
7960 }
7961
7962 /**
7963  * Handler for the HW respond about ready values from an asynchronous batch
7964  * query. This function is probably called by the host thread.
7965  *
7966  * @param[in] sh
7967  *   The pointer to the shared device context.
7968  * @param[in] async_id
7969  *   The Devx async ID.
7970  * @param[in] status
7971  *   The status of the completion.
7972  */
7973 void
7974 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
7975                                   uint64_t async_id, int status)
7976 {
7977         struct mlx5_flow_counter_pool *pool =
7978                 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
7979         struct mlx5_counter_stats_raw *raw_to_free;
7980         uint8_t query_gen = pool->query_gen ^ 1;
7981         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
7982         enum mlx5_counter_type cnt_type =
7983                 pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
7984                                 MLX5_COUNTER_TYPE_ORIGIN;
7985
7986         if (unlikely(status)) {
7987                 raw_to_free = pool->raw_hw;
7988         } else {
7989                 raw_to_free = pool->raw;
7990                 if (pool->is_aged)
7991                         mlx5_flow_aging_check(sh, pool);
7992                 rte_spinlock_lock(&pool->sl);
7993                 pool->raw = pool->raw_hw;
7994                 rte_spinlock_unlock(&pool->sl);
7995                 /* Be sure the new raw counters data is updated in memory. */
7996                 rte_io_wmb();
7997                 if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
7998                         rte_spinlock_lock(&cmng->csl[cnt_type]);
7999                         TAILQ_CONCAT(&cmng->counters[cnt_type],
8000                                      &pool->counters[query_gen], next);
8001                         rte_spinlock_unlock(&cmng->csl[cnt_type]);
8002                 }
8003         }
8004         LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
8005         pool->raw_hw = NULL;
8006         sh->cmng.pending_queries--;
8007 }
8008
8009 static int
8010 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
8011                     const struct flow_grp_info *grp_info,
8012                     struct rte_flow_error *error)
8013 {
8014         if (grp_info->transfer && grp_info->external &&
8015             grp_info->fdb_def_rule) {
8016                 if (group == UINT32_MAX)
8017                         return rte_flow_error_set
8018                                                 (error, EINVAL,
8019                                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
8020                                                  NULL,
8021                                                  "group index not supported");
8022                 *table = group + 1;
8023         } else {
8024                 *table = group;
8025         }
8026         DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
8027         return 0;
8028 }
8029
8030 /**
8031  * Translate the rte_flow group index to HW table value.
8032  *
8033  * If tunnel offload is disabled, all group ids converted to flow table
8034  * id using the standard method.
8035  * If tunnel offload is enabled, group id can be converted using the
8036  * standard or tunnel conversion method. Group conversion method
8037  * selection depends on flags in `grp_info` parameter:
8038  * - Internal (grp_info.external == 0) groups conversion uses the
8039  *   standard method.
8040  * - Group ids in JUMP action converted with the tunnel conversion.
8041  * - Group id in rule attribute conversion depends on a rule type and
8042  *   group id value:
8043  *   ** non zero group attributes converted with the tunnel method
8044  *   ** zero group attribute in non-tunnel rule is converted using the
8045  *      standard method - there's only one root table
8046  *   ** zero group attribute in steer tunnel rule is converted with the
8047  *      standard method - single root table
8048  *   ** zero group attribute in match tunnel rule is a special OvS
8049  *      case: that value is used for portability reasons. That group
8050  *      id is converted with the tunnel conversion method.
8051  *
8052  * @param[in] dev
8053  *   Port device
8054  * @param[in] tunnel
8055  *   PMD tunnel offload object
8056  * @param[in] group
8057  *   rte_flow group index value.
8058  * @param[out] table
8059  *   HW table value.
8060  * @param[in] grp_info
8061  *   flags used for conversion
8062  * @param[out] error
8063  *   Pointer to error structure.
8064  *
8065  * @return
8066  *   0 on success, a negative errno value otherwise and rte_errno is set.
8067  */
8068 int
8069 mlx5_flow_group_to_table(struct rte_eth_dev *dev,
8070                          const struct mlx5_flow_tunnel *tunnel,
8071                          uint32_t group, uint32_t *table,
8072                          const struct flow_grp_info *grp_info,
8073                          struct rte_flow_error *error)
8074 {
8075         int ret;
8076         bool standard_translation;
8077
8078         if (!grp_info->skip_scale && grp_info->external &&
8079             group < MLX5_MAX_TABLES_EXTERNAL)
8080                 group *= MLX5_FLOW_TABLE_FACTOR;
8081         if (is_tunnel_offload_active(dev)) {
8082                 standard_translation = !grp_info->external ||
8083                                         grp_info->std_tbl_fix;
8084         } else {
8085                 standard_translation = true;
8086         }
8087         DRV_LOG(DEBUG,
8088                 "port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
8089                 dev->data->port_id, group, grp_info->transfer,
8090                 grp_info->external, grp_info->fdb_def_rule,
8091                 standard_translation ? "STANDARD" : "TUNNEL");
8092         if (standard_translation)
8093                 ret = flow_group_to_table(dev->data->port_id, group, table,
8094                                           grp_info, error);
8095         else
8096                 ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
8097                                                       table, error);
8098
8099         return ret;
8100 }
8101
8102 /**
8103  * Discover availability of metadata reg_c's.
8104  *
8105  * Iteratively use test flows to check availability.
8106  *
8107  * @param[in] dev
8108  *   Pointer to the Ethernet device structure.
8109  *
8110  * @return
8111  *   0 on success, a negative errno value otherwise and rte_errno is set.
8112  */
8113 int
8114 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
8115 {
8116         struct mlx5_priv *priv = dev->data->dev_private;
8117         enum modify_reg idx;
8118         int n = 0;
8119
8120         /* reg_c[0] and reg_c[1] are reserved. */
8121         priv->sh->flow_mreg_c[n++] = REG_C_0;
8122         priv->sh->flow_mreg_c[n++] = REG_C_1;
8123         /* Discover availability of other reg_c's. */
8124         for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
8125                 struct rte_flow_attr attr = {
8126                         .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
8127                         .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
8128                         .ingress = 1,
8129                 };
8130                 struct rte_flow_item items[] = {
8131                         [0] = {
8132                                 .type = RTE_FLOW_ITEM_TYPE_END,
8133                         },
8134                 };
8135                 struct rte_flow_action actions[] = {
8136                         [0] = {
8137                                 .type = (enum rte_flow_action_type)
8138                                         MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
8139                                 .conf = &(struct mlx5_flow_action_copy_mreg){
8140                                         .src = REG_C_1,
8141                                         .dst = idx,
8142                                 },
8143                         },
8144                         [1] = {
8145                                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
8146                                 .conf = &(struct rte_flow_action_jump){
8147                                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
8148                                 },
8149                         },
8150                         [2] = {
8151                                 .type = RTE_FLOW_ACTION_TYPE_END,
8152                         },
8153                 };
8154                 uint32_t flow_idx;
8155                 struct rte_flow *flow;
8156                 struct rte_flow_error error;
8157
8158                 if (!priv->config.dv_flow_en)
8159                         break;
8160                 /* Create internal flow, validation skips copy action. */
8161                 flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_GEN, &attr,
8162                                         items, actions, false, &error);
8163                 flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
8164                                       flow_idx);
8165                 if (!flow)
8166                         continue;
8167                 priv->sh->flow_mreg_c[n++] = idx;
8168                 flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN, flow_idx);
8169         }
8170         for (; n < MLX5_MREG_C_NUM; ++n)
8171                 priv->sh->flow_mreg_c[n] = REG_NON;
8172         priv->sh->metadata_regc_check_flag = 1;
8173         return 0;
8174 }
8175
8176 int
8177 save_dump_file(const uint8_t *data, uint32_t size,
8178         uint32_t type, uint64_t id, void *arg, FILE *file)
8179 {
8180         char line[BUF_SIZE];
8181         uint32_t out = 0;
8182         uint32_t k;
8183         uint32_t actions_num;
8184         struct rte_flow_query_count *count;
8185
8186         memset(line, 0, BUF_SIZE);
8187         switch (type) {
8188         case DR_DUMP_REC_TYPE_PMD_MODIFY_HDR:
8189                 actions_num = *(uint32_t *)(arg);
8190                 out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",%d,",
8191                                 type, id, actions_num);
8192                 break;
8193         case DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT:
8194                 out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",",
8195                                 type, id);
8196                 break;
8197         case DR_DUMP_REC_TYPE_PMD_COUNTER:
8198                 count = (struct rte_flow_query_count *)arg;
8199                 fprintf(file,
8200                         "%d,0x%" PRIx64 ",%" PRIu64 ",%" PRIu64 "\n",
8201                         type, id, count->hits, count->bytes);
8202                 return 0;
8203         default:
8204                 return -1;
8205         }
8206
8207         for (k = 0; k < size; k++) {
8208                 /* Make sure we do not overrun the line buffer length. */
8209                 if (out >= BUF_SIZE - 4) {
8210                         line[out] = '\0';
8211                         break;
8212                 }
8213                 out += snprintf(line + out, BUF_SIZE - out, "%02x",
8214                                 (data[k]) & 0xff);
8215         }
8216         fprintf(file, "%s\n", line);
8217         return 0;
8218 }
8219
8220 int
8221 mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow,
8222         struct rte_flow_query_count *count, struct rte_flow_error *error)
8223 {
8224         struct rte_flow_action action[2];
8225         enum mlx5_flow_drv_type ftype;
8226         const struct mlx5_flow_driver_ops *fops;
8227
8228         if (!flow) {
8229                 return rte_flow_error_set(error, ENOENT,
8230                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8231                                 NULL,
8232                                 "invalid flow handle");
8233         }
8234         action[0].type = RTE_FLOW_ACTION_TYPE_COUNT;
8235         action[1].type = RTE_FLOW_ACTION_TYPE_END;
8236         if (flow->counter) {
8237                 memset(count, 0, sizeof(struct rte_flow_query_count));
8238                 ftype = (enum mlx5_flow_drv_type)(flow->drv_type);
8239                 MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN &&
8240                                                 ftype < MLX5_FLOW_TYPE_MAX);
8241                 fops = flow_get_drv_ops(ftype);
8242                 return fops->query(dev, flow, action, count, error);
8243         }
8244         return -1;
8245 }
8246
8247 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8248 /**
8249  * Dump flow ipool data to file
8250  *
8251  * @param[in] dev
8252  *   The pointer to Ethernet device.
8253  * @param[in] file
8254  *   A pointer to a file for output.
8255  * @param[out] error
8256  *   Perform verbose error reporting if not NULL. PMDs initialize this
8257  *   structure in case of error only.
8258  * @return
8259  *   0 on success, a negative value otherwise.
8260  */
8261 int
8262 mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev,
8263         struct rte_flow *flow, FILE *file,
8264         struct rte_flow_error *error)
8265 {
8266         struct mlx5_priv *priv = dev->data->dev_private;
8267         struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
8268         struct mlx5_flow_dv_encap_decap_resource *encap_decap;
8269         uint32_t handle_idx;
8270         struct mlx5_flow_handle *dh;
8271         struct rte_flow_query_count count;
8272         uint32_t actions_num;
8273         const uint8_t *data;
8274         size_t size;
8275         uint64_t id;
8276         uint32_t type;
8277         void *action = NULL;
8278
8279         if (!flow) {
8280                 return rte_flow_error_set(error, ENOENT,
8281                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8282                                 NULL,
8283                                 "invalid flow handle");
8284         }
8285         handle_idx = flow->dev_handles;
8286         while (handle_idx) {
8287                 dh = mlx5_ipool_get(priv->sh->ipool
8288                                 [MLX5_IPOOL_MLX5_FLOW], handle_idx);
8289                 if (!dh)
8290                         continue;
8291                 handle_idx = dh->next.next;
8292
8293                 /* query counter */
8294                 type = DR_DUMP_REC_TYPE_PMD_COUNTER;
8295                 flow_dv_query_count_ptr(dev, flow->counter,
8296                                                 &action, error);
8297                 if (action) {
8298                         id = (uint64_t)(uintptr_t)action;
8299                         if (!mlx5_flow_query_counter(dev, flow, &count, error))
8300                                 save_dump_file(NULL, 0, type,
8301                                                 id, (void *)&count, file);
8302                 }
8303                 /* Get modify_hdr and encap_decap buf from ipools. */
8304                 encap_decap = NULL;
8305                 modify_hdr = dh->dvh.modify_hdr;
8306
8307                 if (dh->dvh.rix_encap_decap) {
8308                         encap_decap = mlx5_ipool_get(priv->sh->ipool
8309                                                 [MLX5_IPOOL_DECAP_ENCAP],
8310                                                 dh->dvh.rix_encap_decap);
8311                 }
8312                 if (modify_hdr) {
8313                         data = (const uint8_t *)modify_hdr->actions;
8314                         size = (size_t)(modify_hdr->actions_num) * 8;
8315                         id = (uint64_t)(uintptr_t)modify_hdr->action;
8316                         actions_num = modify_hdr->actions_num;
8317                         type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
8318                         save_dump_file(data, size, type, id,
8319                                                 (void *)(&actions_num), file);
8320                 }
8321                 if (encap_decap) {
8322                         data = encap_decap->buf;
8323                         size = encap_decap->size;
8324                         id = (uint64_t)(uintptr_t)encap_decap->action;
8325                         type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
8326                         save_dump_file(data, size, type,
8327                                                 id, NULL, file);
8328                 }
8329         }
8330         return 0;
8331 }
8332
8333 /**
8334  * Dump all flow's encap_decap/modify_hdr/counter data to file
8335  *
8336  * @param[in] dev
8337  *   The pointer to Ethernet device.
8338  * @param[in] file
8339  *   A pointer to a file for output.
8340  * @param[out] error
8341  *   Perform verbose error reporting if not NULL. PMDs initialize this
8342  *   structure in case of error only.
8343  * @return
8344  *   0 on success, a negative value otherwise.
8345  */
8346 static int
8347 mlx5_flow_dev_dump_sh_all(struct rte_eth_dev *dev,
8348         FILE *file, struct rte_flow_error *error)
8349 {
8350         struct mlx5_priv *priv = dev->data->dev_private;
8351         struct mlx5_dev_ctx_shared *sh = priv->sh;
8352         struct mlx5_hlist *h;
8353         struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
8354         struct mlx5_flow_dv_encap_decap_resource *encap_decap;
8355         struct rte_flow_query_count count;
8356         uint32_t actions_num;
8357         const uint8_t *data;
8358         size_t size;
8359         uint64_t id;
8360         uint32_t type;
8361         uint32_t i;
8362         uint32_t j;
8363         struct mlx5_list_inconst *l_inconst;
8364         struct mlx5_list_entry *e;
8365         int lcore_index;
8366         struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
8367         uint32_t max;
8368         void *action;
8369
8370         /* encap_decap hlist is lcore_share, get global core cache. */
8371         i = MLX5_LIST_GLOBAL;
8372         h = sh->encaps_decaps;
8373         if (h) {
8374                 for (j = 0; j <= h->mask; j++) {
8375                         l_inconst = &h->buckets[j].l;
8376                         if (!l_inconst || !l_inconst->cache[i])
8377                                 continue;
8378
8379                         e = LIST_FIRST(&l_inconst->cache[i]->h);
8380                         while (e) {
8381                                 encap_decap =
8382                                 (struct mlx5_flow_dv_encap_decap_resource *)e;
8383                                 data = encap_decap->buf;
8384                                 size = encap_decap->size;
8385                                 id = (uint64_t)(uintptr_t)encap_decap->action;
8386                                 type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
8387                                 save_dump_file(data, size, type,
8388                                         id, NULL, file);
8389                                 e = LIST_NEXT(e, next);
8390                         }
8391                 }
8392         }
8393
8394         /* get modify_hdr */
8395         h = sh->modify_cmds;
8396         if (h) {
8397                 lcore_index = rte_lcore_index(rte_lcore_id());
8398                 if (unlikely(lcore_index == -1)) {
8399                         lcore_index = MLX5_LIST_NLCORE;
8400                         rte_spinlock_lock(&h->l_const.lcore_lock);
8401                 }
8402                 i = lcore_index;
8403
8404                 for (j = 0; j <= h->mask; j++) {
8405                         l_inconst = &h->buckets[j].l;
8406                         if (!l_inconst || !l_inconst->cache[i])
8407                                 continue;
8408
8409                         e = LIST_FIRST(&l_inconst->cache[i]->h);
8410                         while (e) {
8411                                 modify_hdr =
8412                                 (struct mlx5_flow_dv_modify_hdr_resource *)e;
8413                                 data = (const uint8_t *)modify_hdr->actions;
8414                                 size = (size_t)(modify_hdr->actions_num) * 8;
8415                                 actions_num = modify_hdr->actions_num;
8416                                 id = (uint64_t)(uintptr_t)modify_hdr->action;
8417                                 type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
8418                                 save_dump_file(data, size, type, id,
8419                                                 (void *)(&actions_num), file);
8420                                 e = LIST_NEXT(e, next);
8421                         }
8422                 }
8423
8424                 if (unlikely(lcore_index == MLX5_LIST_NLCORE))
8425                         rte_spinlock_unlock(&h->l_const.lcore_lock);
8426         }
8427
8428         /* get counter */
8429         MLX5_ASSERT(cmng->n_valid <= cmng->n);
8430         max = MLX5_COUNTERS_PER_POOL * cmng->n_valid;
8431         for (j = 1; j <= max; j++) {
8432                 action = NULL;
8433                 flow_dv_query_count_ptr(dev, j, &action, error);
8434                 if (action) {
8435                         if (!flow_dv_query_count(dev, j, &count, error)) {
8436                                 type = DR_DUMP_REC_TYPE_PMD_COUNTER;
8437                                 id = (uint64_t)(uintptr_t)action;
8438                                 save_dump_file(NULL, 0, type,
8439                                                 id, (void *)&count, file);
8440                         }
8441                 }
8442         }
8443         return 0;
8444 }
8445 #endif
8446
8447 /**
8448  * Dump flow raw hw data to file
8449  *
8450  * @param[in] dev
8451  *    The pointer to Ethernet device.
8452  * @param[in] file
8453  *   A pointer to a file for output.
8454  * @param[out] error
8455  *   Perform verbose error reporting if not NULL. PMDs initialize this
8456  *   structure in case of error only.
8457  * @return
8458  *   0 on success, a nagative value otherwise.
8459  */
8460 int
8461 mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow_idx,
8462                    FILE *file,
8463                    struct rte_flow_error *error __rte_unused)
8464 {
8465         struct mlx5_priv *priv = dev->data->dev_private;
8466         struct mlx5_dev_ctx_shared *sh = priv->sh;
8467         uint32_t handle_idx;
8468         int ret;
8469         struct mlx5_flow_handle *dh;
8470         struct rte_flow *flow;
8471
8472         if (!priv->config.dv_flow_en) {
8473                 if (fputs("device dv flow disabled\n", file) <= 0)
8474                         return -errno;
8475                 return -ENOTSUP;
8476         }
8477
8478         /* dump all */
8479         if (!flow_idx) {
8480 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8481                 if (mlx5_flow_dev_dump_sh_all(dev, file, error))
8482                         return -EINVAL;
8483 #endif
8484                 return mlx5_devx_cmd_flow_dump(sh->fdb_domain,
8485                                         sh->rx_domain,
8486                                         sh->tx_domain, file);
8487         }
8488         /* dump one */
8489         flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
8490                         (uintptr_t)(void *)flow_idx);
8491         if (!flow)
8492                 return -EINVAL;
8493
8494 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8495         mlx5_flow_dev_dump_ipool(dev, flow, file, error);
8496 #endif
8497         handle_idx = flow->dev_handles;
8498         while (handle_idx) {
8499                 dh = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
8500                                 handle_idx);
8501                 if (!dh)
8502                         return -ENOENT;
8503                 if (dh->drv_flow) {
8504                         ret = mlx5_devx_cmd_flow_single_dump(dh->drv_flow,
8505                                         file);
8506                         if (ret)
8507                                 return -ENOENT;
8508                 }
8509                 handle_idx = dh->next.next;
8510         }
8511         return 0;
8512 }
8513
8514 /**
8515  * Get aged-out flows.
8516  *
8517  * @param[in] dev
8518  *   Pointer to the Ethernet device structure.
8519  * @param[in] context
8520  *   The address of an array of pointers to the aged-out flows contexts.
8521  * @param[in] nb_countexts
8522  *   The length of context array pointers.
8523  * @param[out] error
8524  *   Perform verbose error reporting if not NULL. Initialized in case of
8525  *   error only.
8526  *
8527  * @return
8528  *   how many contexts get in success, otherwise negative errno value.
8529  *   if nb_contexts is 0, return the amount of all aged contexts.
8530  *   if nb_contexts is not 0 , return the amount of aged flows reported
8531  *   in the context array.
8532  */
8533 int
8534 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
8535                         uint32_t nb_contexts, struct rte_flow_error *error)
8536 {
8537         const struct mlx5_flow_driver_ops *fops;
8538         struct rte_flow_attr attr = { .transfer = 0 };
8539
8540         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8541                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8542                 return fops->get_aged_flows(dev, contexts, nb_contexts,
8543                                                     error);
8544         }
8545         DRV_LOG(ERR,
8546                 "port %u get aged flows is not supported.",
8547                  dev->data->port_id);
8548         return -ENOTSUP;
8549 }
8550
8551 /* Wrapper for driver action_validate op callback */
8552 static int
8553 flow_drv_action_validate(struct rte_eth_dev *dev,
8554                          const struct rte_flow_indir_action_conf *conf,
8555                          const struct rte_flow_action *action,
8556                          const struct mlx5_flow_driver_ops *fops,
8557                          struct rte_flow_error *error)
8558 {
8559         static const char err_msg[] = "indirect action validation unsupported";
8560
8561         if (!fops->action_validate) {
8562                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8563                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8564                                    NULL, err_msg);
8565                 return -rte_errno;
8566         }
8567         return fops->action_validate(dev, conf, action, error);
8568 }
8569
8570 /**
8571  * Destroys the shared action by handle.
8572  *
8573  * @param dev
8574  *   Pointer to Ethernet device structure.
8575  * @param[in] handle
8576  *   Handle for the indirect action object to be destroyed.
8577  * @param[out] error
8578  *   Perform verbose error reporting if not NULL. PMDs initialize this
8579  *   structure in case of error only.
8580  *
8581  * @return
8582  *   0 on success, a negative errno value otherwise and rte_errno is set.
8583  *
8584  * @note: wrapper for driver action_create op callback.
8585  */
8586 static int
8587 mlx5_action_handle_destroy(struct rte_eth_dev *dev,
8588                            struct rte_flow_action_handle *handle,
8589                            struct rte_flow_error *error)
8590 {
8591         static const char err_msg[] = "indirect action destruction unsupported";
8592         struct rte_flow_attr attr = { .transfer = 0 };
8593         const struct mlx5_flow_driver_ops *fops =
8594                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8595
8596         if (!fops->action_destroy) {
8597                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8598                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8599                                    NULL, err_msg);
8600                 return -rte_errno;
8601         }
8602         return fops->action_destroy(dev, handle, error);
8603 }
8604
8605 /* Wrapper for driver action_destroy op callback */
8606 static int
8607 flow_drv_action_update(struct rte_eth_dev *dev,
8608                        struct rte_flow_action_handle *handle,
8609                        const void *update,
8610                        const struct mlx5_flow_driver_ops *fops,
8611                        struct rte_flow_error *error)
8612 {
8613         static const char err_msg[] = "indirect action update unsupported";
8614
8615         if (!fops->action_update) {
8616                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8617                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8618                                    NULL, err_msg);
8619                 return -rte_errno;
8620         }
8621         return fops->action_update(dev, handle, update, error);
8622 }
8623
8624 /* Wrapper for driver action_destroy op callback */
8625 static int
8626 flow_drv_action_query(struct rte_eth_dev *dev,
8627                       const struct rte_flow_action_handle *handle,
8628                       void *data,
8629                       const struct mlx5_flow_driver_ops *fops,
8630                       struct rte_flow_error *error)
8631 {
8632         static const char err_msg[] = "indirect action query unsupported";
8633
8634         if (!fops->action_query) {
8635                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8636                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8637                                    NULL, err_msg);
8638                 return -rte_errno;
8639         }
8640         return fops->action_query(dev, handle, data, error);
8641 }
8642
8643 /**
8644  * Create indirect action for reuse in multiple flow rules.
8645  *
8646  * @param dev
8647  *   Pointer to Ethernet device structure.
8648  * @param conf
8649  *   Pointer to indirect action object configuration.
8650  * @param[in] action
8651  *   Action configuration for indirect action object creation.
8652  * @param[out] error
8653  *   Perform verbose error reporting if not NULL. PMDs initialize this
8654  *   structure in case of error only.
8655  * @return
8656  *   A valid handle in case of success, NULL otherwise and rte_errno is set.
8657  */
8658 static struct rte_flow_action_handle *
8659 mlx5_action_handle_create(struct rte_eth_dev *dev,
8660                           const struct rte_flow_indir_action_conf *conf,
8661                           const struct rte_flow_action *action,
8662                           struct rte_flow_error *error)
8663 {
8664         static const char err_msg[] = "indirect action creation unsupported";
8665         struct rte_flow_attr attr = { .transfer = 0 };
8666         const struct mlx5_flow_driver_ops *fops =
8667                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8668
8669         if (flow_drv_action_validate(dev, conf, action, fops, error))
8670                 return NULL;
8671         if (!fops->action_create) {
8672                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8673                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8674                                    NULL, err_msg);
8675                 return NULL;
8676         }
8677         return fops->action_create(dev, conf, action, error);
8678 }
8679
8680 /**
8681  * Updates inplace the indirect action configuration pointed by *handle*
8682  * with the configuration provided as *update* argument.
8683  * The update of the indirect action configuration effects all flow rules
8684  * reusing the action via handle.
8685  *
8686  * @param dev
8687  *   Pointer to Ethernet device structure.
8688  * @param[in] handle
8689  *   Handle for the indirect action to be updated.
8690  * @param[in] update
8691  *   Action specification used to modify the action pointed by handle.
8692  *   *update* could be of same type with the action pointed by the *handle*
8693  *   handle argument, or some other structures like a wrapper, depending on
8694  *   the indirect action type.
8695  * @param[out] error
8696  *   Perform verbose error reporting if not NULL. PMDs initialize this
8697  *   structure in case of error only.
8698  *
8699  * @return
8700  *   0 on success, a negative errno value otherwise and rte_errno is set.
8701  */
8702 static int
8703 mlx5_action_handle_update(struct rte_eth_dev *dev,
8704                 struct rte_flow_action_handle *handle,
8705                 const void *update,
8706                 struct rte_flow_error *error)
8707 {
8708         struct rte_flow_attr attr = { .transfer = 0 };
8709         const struct mlx5_flow_driver_ops *fops =
8710                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8711         int ret;
8712
8713         ret = flow_drv_action_validate(dev, NULL,
8714                         (const struct rte_flow_action *)update, fops, error);
8715         if (ret)
8716                 return ret;
8717         return flow_drv_action_update(dev, handle, update, fops,
8718                                       error);
8719 }
8720
8721 /**
8722  * Query the indirect action by handle.
8723  *
8724  * This function allows retrieving action-specific data such as counters.
8725  * Data is gathered by special action which may be present/referenced in
8726  * more than one flow rule definition.
8727  *
8728  * see @RTE_FLOW_ACTION_TYPE_COUNT
8729  *
8730  * @param dev
8731  *   Pointer to Ethernet device structure.
8732  * @param[in] handle
8733  *   Handle for the indirect action to query.
8734  * @param[in, out] data
8735  *   Pointer to storage for the associated query data type.
8736  * @param[out] error
8737  *   Perform verbose error reporting if not NULL. PMDs initialize this
8738  *   structure in case of error only.
8739  *
8740  * @return
8741  *   0 on success, a negative errno value otherwise and rte_errno is set.
8742  */
8743 static int
8744 mlx5_action_handle_query(struct rte_eth_dev *dev,
8745                          const struct rte_flow_action_handle *handle,
8746                          void *data,
8747                          struct rte_flow_error *error)
8748 {
8749         struct rte_flow_attr attr = { .transfer = 0 };
8750         const struct mlx5_flow_driver_ops *fops =
8751                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8752
8753         return flow_drv_action_query(dev, handle, data, fops, error);
8754 }
8755
8756 /**
8757  * Destroy all indirect actions (shared RSS).
8758  *
8759  * @param dev
8760  *   Pointer to Ethernet device.
8761  *
8762  * @return
8763  *   0 on success, a negative errno value otherwise and rte_errno is set.
8764  */
8765 int
8766 mlx5_action_handle_flush(struct rte_eth_dev *dev)
8767 {
8768         struct rte_flow_error error;
8769         struct mlx5_priv *priv = dev->data->dev_private;
8770         struct mlx5_shared_action_rss *shared_rss;
8771         int ret = 0;
8772         uint32_t idx;
8773
8774         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
8775                       priv->rss_shared_actions, idx, shared_rss, next) {
8776                 ret |= mlx5_action_handle_destroy(dev,
8777                        (struct rte_flow_action_handle *)(uintptr_t)idx, &error);
8778         }
8779         return ret;
8780 }
8781
8782 #ifndef HAVE_MLX5DV_DR
8783 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
8784 #else
8785 #define MLX5_DOMAIN_SYNC_FLOW \
8786         (MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
8787 #endif
8788
8789 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
8790 {
8791         struct rte_eth_dev *dev = &rte_eth_devices[port_id];
8792         const struct mlx5_flow_driver_ops *fops;
8793         int ret;
8794         struct rte_flow_attr attr = { .transfer = 0 };
8795
8796         fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8797         ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
8798         if (ret > 0)
8799                 ret = -ret;
8800         return ret;
8801 }
8802
8803 const struct mlx5_flow_tunnel *
8804 mlx5_get_tof(const struct rte_flow_item *item,
8805              const struct rte_flow_action *action,
8806              enum mlx5_tof_rule_type *rule_type)
8807 {
8808         for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
8809                 if (item->type == (typeof(item->type))
8810                                   MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL) {
8811                         *rule_type = MLX5_TUNNEL_OFFLOAD_MATCH_RULE;
8812                         return flow_items_to_tunnel(item);
8813                 }
8814         }
8815         for (; action->conf != RTE_FLOW_ACTION_TYPE_END; action++) {
8816                 if (action->type == (typeof(action->type))
8817                                     MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET) {
8818                         *rule_type = MLX5_TUNNEL_OFFLOAD_SET_RULE;
8819                         return flow_actions_to_tunnel(action);
8820                 }
8821         }
8822         return NULL;
8823 }
8824
8825 /**
8826  * tunnel offload functionalilty is defined for DV environment only
8827  */
8828 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8829 __extension__
8830 union tunnel_offload_mark {
8831         uint32_t val;
8832         struct {
8833                 uint32_t app_reserve:8;
8834                 uint32_t table_id:15;
8835                 uint32_t transfer:1;
8836                 uint32_t _unused_:8;
8837         };
8838 };
8839
8840 static bool
8841 mlx5_access_tunnel_offload_db
8842         (struct rte_eth_dev *dev,
8843          bool (*match)(struct rte_eth_dev *,
8844                        struct mlx5_flow_tunnel *, const void *),
8845          void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
8846          void (*miss)(struct rte_eth_dev *, void *),
8847          void *ctx, bool lock_op);
8848
8849 static int
8850 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
8851                              struct rte_flow *flow,
8852                              const struct rte_flow_attr *attr,
8853                              const struct rte_flow_action *app_actions,
8854                              uint32_t flow_idx,
8855                              const struct mlx5_flow_tunnel *tunnel,
8856                              struct tunnel_default_miss_ctx *ctx,
8857                              struct rte_flow_error *error)
8858 {
8859         struct mlx5_priv *priv = dev->data->dev_private;
8860         struct mlx5_flow *dev_flow;
8861         struct rte_flow_attr miss_attr = *attr;
8862         const struct rte_flow_item miss_items[2] = {
8863                 {
8864                         .type = RTE_FLOW_ITEM_TYPE_ETH,
8865                         .spec = NULL,
8866                         .last = NULL,
8867                         .mask = NULL
8868                 },
8869                 {
8870                         .type = RTE_FLOW_ITEM_TYPE_END,
8871                         .spec = NULL,
8872                         .last = NULL,
8873                         .mask = NULL
8874                 }
8875         };
8876         union tunnel_offload_mark mark_id;
8877         struct rte_flow_action_mark miss_mark;
8878         struct rte_flow_action miss_actions[3] = {
8879                 [0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
8880                 [2] = { .type = RTE_FLOW_ACTION_TYPE_END,  .conf = NULL }
8881         };
8882         const struct rte_flow_action_jump *jump_data;
8883         uint32_t i, flow_table = 0; /* prevent compilation warning */
8884         struct flow_grp_info grp_info = {
8885                 .external = 1,
8886                 .transfer = attr->transfer,
8887                 .fdb_def_rule = !!priv->fdb_def_rule,
8888                 .std_tbl_fix = 0,
8889         };
8890         int ret;
8891
8892         if (!attr->transfer) {
8893                 uint32_t q_size;
8894
8895                 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
8896                 q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
8897                 ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
8898                                          0, SOCKET_ID_ANY);
8899                 if (!ctx->queue)
8900                         return rte_flow_error_set
8901                                 (error, ENOMEM,
8902                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
8903                                 NULL, "invalid default miss RSS");
8904                 ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
8905                 ctx->action_rss.level = 0,
8906                 ctx->action_rss.types = priv->rss_conf.rss_hf,
8907                 ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
8908                 ctx->action_rss.queue_num = priv->reta_idx_n,
8909                 ctx->action_rss.key = priv->rss_conf.rss_key,
8910                 ctx->action_rss.queue = ctx->queue;
8911                 if (!priv->reta_idx_n || !priv->rxqs_n)
8912                         return rte_flow_error_set
8913                                 (error, EINVAL,
8914                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
8915                                 NULL, "invalid port configuration");
8916                 if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
8917                         ctx->action_rss.types = 0;
8918                 for (i = 0; i != priv->reta_idx_n; ++i)
8919                         ctx->queue[i] = (*priv->reta_idx)[i];
8920         } else {
8921                 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
8922                 ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
8923         }
8924         miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
8925         for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
8926         jump_data = app_actions->conf;
8927         miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
8928         miss_attr.group = jump_data->group;
8929         ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
8930                                        &flow_table, &grp_info, error);
8931         if (ret)
8932                 return rte_flow_error_set(error, EINVAL,
8933                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
8934                                           NULL, "invalid tunnel id");
8935         mark_id.app_reserve = 0;
8936         mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
8937         mark_id.transfer = !!attr->transfer;
8938         mark_id._unused_ = 0;
8939         miss_mark.id = mark_id.val;
8940         dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
8941                                     miss_items, miss_actions, flow_idx, error);
8942         if (!dev_flow)
8943                 return -rte_errno;
8944         dev_flow->flow = flow;
8945         dev_flow->external = true;
8946         dev_flow->tunnel = tunnel;
8947         dev_flow->tof_type = MLX5_TUNNEL_OFFLOAD_MISS_RULE;
8948         /* Subflow object was created, we must include one in the list. */
8949         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
8950                       dev_flow->handle, next);
8951         DRV_LOG(DEBUG,
8952                 "port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
8953                 dev->data->port_id, tunnel->app_tunnel.type,
8954                 tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
8955         ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
8956                                   miss_actions, error);
8957         if (!ret)
8958                 ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
8959                                                   error);
8960
8961         return ret;
8962 }
8963
8964 static const struct mlx5_flow_tbl_data_entry  *
8965 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
8966 {
8967         struct mlx5_priv *priv = dev->data->dev_private;
8968         struct mlx5_dev_ctx_shared *sh = priv->sh;
8969         struct mlx5_list_entry *he;
8970         union tunnel_offload_mark mbits = { .val = mark };
8971         union mlx5_flow_tbl_key table_key = {
8972                 {
8973                         .level = tunnel_id_to_flow_tbl(mbits.table_id),
8974                         .id = 0,
8975                         .reserved = 0,
8976                         .dummy = 0,
8977                         .is_fdb = !!mbits.transfer,
8978                         .is_egress = 0,
8979                 }
8980         };
8981         struct mlx5_flow_cb_ctx ctx = {
8982                 .data = &table_key.v64,
8983         };
8984
8985         he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, &ctx);
8986         return he ?
8987                container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
8988 }
8989
8990 static void
8991 mlx5_flow_tunnel_grp2tbl_remove_cb(void *tool_ctx,
8992                                    struct mlx5_list_entry *entry)
8993 {
8994         struct mlx5_dev_ctx_shared *sh = tool_ctx;
8995         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
8996
8997         mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
8998                         tunnel_flow_tbl_to_id(tte->flow_table));
8999         mlx5_free(tte);
9000 }
9001
9002 static int
9003 mlx5_flow_tunnel_grp2tbl_match_cb(void *tool_ctx __rte_unused,
9004                                   struct mlx5_list_entry *entry, void *cb_ctx)
9005 {
9006         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
9007         union tunnel_tbl_key tbl = {
9008                 .val = *(uint64_t *)(ctx->data),
9009         };
9010         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
9011
9012         return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
9013 }
9014
9015 static struct mlx5_list_entry *
9016 mlx5_flow_tunnel_grp2tbl_create_cb(void *tool_ctx, void *cb_ctx)
9017 {
9018         struct mlx5_dev_ctx_shared *sh = tool_ctx;
9019         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
9020         struct tunnel_tbl_entry *tte;
9021         union tunnel_tbl_key tbl = {
9022                 .val = *(uint64_t *)(ctx->data),
9023         };
9024
9025         tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
9026                           sizeof(*tte), 0,
9027                           SOCKET_ID_ANY);
9028         if (!tte)
9029                 goto err;
9030         mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
9031                           &tte->flow_table);
9032         if (tte->flow_table >= MLX5_MAX_TABLES) {
9033                 DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
9034                         tte->flow_table);
9035                 mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
9036                                 tte->flow_table);
9037                 goto err;
9038         } else if (!tte->flow_table) {
9039                 goto err;
9040         }
9041         tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
9042         tte->tunnel_id = tbl.tunnel_id;
9043         tte->group = tbl.group;
9044         return &tte->hash;
9045 err:
9046         if (tte)
9047                 mlx5_free(tte);
9048         return NULL;
9049 }
9050
9051 static struct mlx5_list_entry *
9052 mlx5_flow_tunnel_grp2tbl_clone_cb(void *tool_ctx __rte_unused,
9053                                   struct mlx5_list_entry *oentry,
9054                                   void *cb_ctx __rte_unused)
9055 {
9056         struct tunnel_tbl_entry *tte = mlx5_malloc(MLX5_MEM_SYS, sizeof(*tte),
9057                                                    0, SOCKET_ID_ANY);
9058
9059         if (!tte)
9060                 return NULL;
9061         memcpy(tte, oentry, sizeof(*tte));
9062         return &tte->hash;
9063 }
9064
9065 static void
9066 mlx5_flow_tunnel_grp2tbl_clone_free_cb(void *tool_ctx __rte_unused,
9067                                        struct mlx5_list_entry *entry)
9068 {
9069         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
9070
9071         mlx5_free(tte);
9072 }
9073
9074 static uint32_t
9075 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
9076                                 const struct mlx5_flow_tunnel *tunnel,
9077                                 uint32_t group, uint32_t *table,
9078                                 struct rte_flow_error *error)
9079 {
9080         struct mlx5_list_entry *he;
9081         struct tunnel_tbl_entry *tte;
9082         union tunnel_tbl_key key = {
9083                 .tunnel_id = tunnel ? tunnel->tunnel_id : 0,
9084                 .group = group
9085         };
9086         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
9087         struct mlx5_hlist *group_hash;
9088         struct mlx5_flow_cb_ctx ctx = {
9089                 .data = &key.val,
9090         };
9091
9092         group_hash = tunnel ? tunnel->groups : thub->groups;
9093         he = mlx5_hlist_register(group_hash, key.val, &ctx);
9094         if (!he)
9095                 return rte_flow_error_set(error, EINVAL,
9096                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
9097                                           NULL,
9098                                           "tunnel group index not supported");
9099         tte = container_of(he, typeof(*tte), hash);
9100         *table = tte->flow_table;
9101         DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
9102                 dev->data->port_id, key.tunnel_id, group, *table);
9103         return 0;
9104 }
9105
9106 static void
9107 mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
9108                       struct mlx5_flow_tunnel *tunnel)
9109 {
9110         struct mlx5_priv *priv = dev->data->dev_private;
9111         struct mlx5_indexed_pool *ipool;
9112
9113         DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
9114                 dev->data->port_id, tunnel->tunnel_id);
9115         LIST_REMOVE(tunnel, chain);
9116         mlx5_hlist_destroy(tunnel->groups);
9117         ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
9118         mlx5_ipool_free(ipool, tunnel->tunnel_id);
9119 }
9120
9121 static bool
9122 mlx5_access_tunnel_offload_db
9123         (struct rte_eth_dev *dev,
9124          bool (*match)(struct rte_eth_dev *,
9125                        struct mlx5_flow_tunnel *, const void *),
9126          void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
9127          void (*miss)(struct rte_eth_dev *, void *),
9128          void *ctx, bool lock_op)
9129 {
9130         bool verdict = false;
9131         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
9132         struct mlx5_flow_tunnel *tunnel;
9133
9134         rte_spinlock_lock(&thub->sl);
9135         LIST_FOREACH(tunnel, &thub->tunnels, chain) {
9136                 verdict = match(dev, tunnel, (const void *)ctx);
9137                 if (verdict)
9138                         break;
9139         }
9140         if (!lock_op)
9141                 rte_spinlock_unlock(&thub->sl);
9142         if (verdict && hit)
9143                 hit(dev, tunnel, ctx);
9144         if (!verdict && miss)
9145                 miss(dev, ctx);
9146         if (lock_op)
9147                 rte_spinlock_unlock(&thub->sl);
9148
9149         return verdict;
9150 }
9151
9152 struct tunnel_db_find_tunnel_id_ctx {
9153         uint32_t tunnel_id;
9154         struct mlx5_flow_tunnel *tunnel;
9155 };
9156
9157 static bool
9158 find_tunnel_id_match(struct rte_eth_dev *dev,
9159                      struct mlx5_flow_tunnel *tunnel, const void *x)
9160 {
9161         const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
9162
9163         RTE_SET_USED(dev);
9164         return tunnel->tunnel_id == ctx->tunnel_id;
9165 }
9166
9167 static void
9168 find_tunnel_id_hit(struct rte_eth_dev *dev,
9169                    struct mlx5_flow_tunnel *tunnel, void *x)
9170 {
9171         struct tunnel_db_find_tunnel_id_ctx *ctx = x;
9172         RTE_SET_USED(dev);
9173         ctx->tunnel = tunnel;
9174 }
9175
9176 static struct mlx5_flow_tunnel *
9177 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
9178 {
9179         struct tunnel_db_find_tunnel_id_ctx ctx = {
9180                 .tunnel_id = id,
9181         };
9182
9183         mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
9184                                       find_tunnel_id_hit, NULL, &ctx, true);
9185
9186         return ctx.tunnel;
9187 }
9188
9189 static struct mlx5_flow_tunnel *
9190 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
9191                           const struct rte_flow_tunnel *app_tunnel)
9192 {
9193         struct mlx5_priv *priv = dev->data->dev_private;
9194         struct mlx5_indexed_pool *ipool;
9195         struct mlx5_flow_tunnel *tunnel;
9196         uint32_t id;
9197
9198         ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
9199         tunnel = mlx5_ipool_zmalloc(ipool, &id);
9200         if (!tunnel)
9201                 return NULL;
9202         if (id >= MLX5_MAX_TUNNELS) {
9203                 mlx5_ipool_free(ipool, id);
9204                 DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
9205                 return NULL;
9206         }
9207         tunnel->groups = mlx5_hlist_create("tunnel groups", 64, false, true,
9208                                            priv->sh,
9209                                            mlx5_flow_tunnel_grp2tbl_create_cb,
9210                                            mlx5_flow_tunnel_grp2tbl_match_cb,
9211                                            mlx5_flow_tunnel_grp2tbl_remove_cb,
9212                                            mlx5_flow_tunnel_grp2tbl_clone_cb,
9213                                         mlx5_flow_tunnel_grp2tbl_clone_free_cb);
9214         if (!tunnel->groups) {
9215                 mlx5_ipool_free(ipool, id);
9216                 return NULL;
9217         }
9218         /* initiate new PMD tunnel */
9219         memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
9220         tunnel->tunnel_id = id;
9221         tunnel->action.type = (typeof(tunnel->action.type))
9222                               MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
9223         tunnel->action.conf = tunnel;
9224         tunnel->item.type = (typeof(tunnel->item.type))
9225                             MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
9226         tunnel->item.spec = tunnel;
9227         tunnel->item.last = NULL;
9228         tunnel->item.mask = NULL;
9229
9230         DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
9231                 dev->data->port_id, tunnel->tunnel_id);
9232
9233         return tunnel;
9234 }
9235
9236 struct tunnel_db_get_tunnel_ctx {
9237         const struct rte_flow_tunnel *app_tunnel;
9238         struct mlx5_flow_tunnel *tunnel;
9239 };
9240
9241 static bool get_tunnel_match(struct rte_eth_dev *dev,
9242                              struct mlx5_flow_tunnel *tunnel, const void *x)
9243 {
9244         const struct tunnel_db_get_tunnel_ctx *ctx = x;
9245
9246         RTE_SET_USED(dev);
9247         return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
9248                        sizeof(*ctx->app_tunnel));
9249 }
9250
9251 static void get_tunnel_hit(struct rte_eth_dev *dev,
9252                            struct mlx5_flow_tunnel *tunnel, void *x)
9253 {
9254         /* called under tunnel spinlock protection */
9255         struct tunnel_db_get_tunnel_ctx *ctx = x;
9256
9257         RTE_SET_USED(dev);
9258         tunnel->refctn++;
9259         ctx->tunnel = tunnel;
9260 }
9261
9262 static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
9263 {
9264         /* called under tunnel spinlock protection */
9265         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
9266         struct tunnel_db_get_tunnel_ctx *ctx = x;
9267
9268         rte_spinlock_unlock(&thub->sl);
9269         ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
9270         rte_spinlock_lock(&thub->sl);
9271         if (ctx->tunnel) {
9272                 ctx->tunnel->refctn = 1;
9273                 LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
9274         }
9275 }
9276
9277
9278 static int
9279 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
9280                      const struct rte_flow_tunnel *app_tunnel,
9281                      struct mlx5_flow_tunnel **tunnel)
9282 {
9283         struct tunnel_db_get_tunnel_ctx ctx = {
9284                 .app_tunnel = app_tunnel,
9285         };
9286
9287         mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
9288                                       get_tunnel_miss, &ctx, true);
9289         *tunnel = ctx.tunnel;
9290         return ctx.tunnel ? 0 : -ENOMEM;
9291 }
9292
9293 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
9294 {
9295         struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
9296
9297         if (!thub)
9298                 return;
9299         if (!LIST_EMPTY(&thub->tunnels))
9300                 DRV_LOG(WARNING, "port %u tunnels present", port_id);
9301         mlx5_hlist_destroy(thub->groups);
9302         mlx5_free(thub);
9303 }
9304
9305 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
9306 {
9307         int err;
9308         struct mlx5_flow_tunnel_hub *thub;
9309
9310         thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
9311                            0, SOCKET_ID_ANY);
9312         if (!thub)
9313                 return -ENOMEM;
9314         LIST_INIT(&thub->tunnels);
9315         rte_spinlock_init(&thub->sl);
9316         thub->groups = mlx5_hlist_create("flow groups", 64,
9317                                          false, true, sh,
9318                                          mlx5_flow_tunnel_grp2tbl_create_cb,
9319                                          mlx5_flow_tunnel_grp2tbl_match_cb,
9320                                          mlx5_flow_tunnel_grp2tbl_remove_cb,
9321                                          mlx5_flow_tunnel_grp2tbl_clone_cb,
9322                                         mlx5_flow_tunnel_grp2tbl_clone_free_cb);
9323         if (!thub->groups) {
9324                 err = -rte_errno;
9325                 goto err;
9326         }
9327         sh->tunnel_hub = thub;
9328
9329         return 0;
9330
9331 err:
9332         if (thub->groups)
9333                 mlx5_hlist_destroy(thub->groups);
9334         if (thub)
9335                 mlx5_free(thub);
9336         return err;
9337 }
9338
9339 static inline bool
9340 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
9341                           struct rte_flow_tunnel *tunnel,
9342                           const char *err_msg)
9343 {
9344         err_msg = NULL;
9345         if (!is_tunnel_offload_active(dev)) {
9346                 err_msg = "tunnel offload was not activated";
9347                 goto out;
9348         } else if (!tunnel) {
9349                 err_msg = "no application tunnel";
9350                 goto out;
9351         }
9352
9353         switch (tunnel->type) {
9354         default:
9355                 err_msg = "unsupported tunnel type";
9356                 goto out;
9357         case RTE_FLOW_ITEM_TYPE_VXLAN:
9358         case RTE_FLOW_ITEM_TYPE_GRE:
9359         case RTE_FLOW_ITEM_TYPE_NVGRE:
9360         case RTE_FLOW_ITEM_TYPE_GENEVE:
9361                 break;
9362         }
9363
9364 out:
9365         return !err_msg;
9366 }
9367
9368 static int
9369 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
9370                     struct rte_flow_tunnel *app_tunnel,
9371                     struct rte_flow_action **actions,
9372                     uint32_t *num_of_actions,
9373                     struct rte_flow_error *error)
9374 {
9375         int ret;
9376         struct mlx5_flow_tunnel *tunnel;
9377         const char *err_msg = NULL;
9378         bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
9379
9380         if (!verdict)
9381                 return rte_flow_error_set(error, EINVAL,
9382                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
9383                                           err_msg);
9384         ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
9385         if (ret < 0) {
9386                 return rte_flow_error_set(error, ret,
9387                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
9388                                           "failed to initialize pmd tunnel");
9389         }
9390         *actions = &tunnel->action;
9391         *num_of_actions = 1;
9392         return 0;
9393 }
9394
9395 static int
9396 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
9397                        struct rte_flow_tunnel *app_tunnel,
9398                        struct rte_flow_item **items,
9399                        uint32_t *num_of_items,
9400                        struct rte_flow_error *error)
9401 {
9402         int ret;
9403         struct mlx5_flow_tunnel *tunnel;
9404         const char *err_msg = NULL;
9405         bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
9406
9407         if (!verdict)
9408                 return rte_flow_error_set(error, EINVAL,
9409                                           RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
9410                                           err_msg);
9411         ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
9412         if (ret < 0) {
9413                 return rte_flow_error_set(error, ret,
9414                                           RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
9415                                           "failed to initialize pmd tunnel");
9416         }
9417         *items = &tunnel->item;
9418         *num_of_items = 1;
9419         return 0;
9420 }
9421
9422 struct tunnel_db_element_release_ctx {
9423         struct rte_flow_item *items;
9424         struct rte_flow_action *actions;
9425         uint32_t num_elements;
9426         struct rte_flow_error *error;
9427         int ret;
9428 };
9429
9430 static bool
9431 tunnel_element_release_match(struct rte_eth_dev *dev,
9432                              struct mlx5_flow_tunnel *tunnel, const void *x)
9433 {
9434         const struct tunnel_db_element_release_ctx *ctx = x;
9435
9436         RTE_SET_USED(dev);
9437         if (ctx->num_elements != 1)
9438                 return false;
9439         else if (ctx->items)
9440                 return ctx->items == &tunnel->item;
9441         else if (ctx->actions)
9442                 return ctx->actions == &tunnel->action;
9443
9444         return false;
9445 }
9446
9447 static void
9448 tunnel_element_release_hit(struct rte_eth_dev *dev,
9449                            struct mlx5_flow_tunnel *tunnel, void *x)
9450 {
9451         struct tunnel_db_element_release_ctx *ctx = x;
9452         ctx->ret = 0;
9453         if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
9454                 mlx5_flow_tunnel_free(dev, tunnel);
9455 }
9456
9457 static void
9458 tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
9459 {
9460         struct tunnel_db_element_release_ctx *ctx = x;
9461         RTE_SET_USED(dev);
9462         ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
9463                                       RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
9464                                       "invalid argument");
9465 }
9466
9467 static int
9468 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
9469                        struct rte_flow_item *pmd_items,
9470                        uint32_t num_items, struct rte_flow_error *err)
9471 {
9472         struct tunnel_db_element_release_ctx ctx = {
9473                 .items = pmd_items,
9474                 .actions = NULL,
9475                 .num_elements = num_items,
9476                 .error = err,
9477         };
9478
9479         mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
9480                                       tunnel_element_release_hit,
9481                                       tunnel_element_release_miss, &ctx, false);
9482
9483         return ctx.ret;
9484 }
9485
9486 static int
9487 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
9488                          struct rte_flow_action *pmd_actions,
9489                          uint32_t num_actions, struct rte_flow_error *err)
9490 {
9491         struct tunnel_db_element_release_ctx ctx = {
9492                 .items = NULL,
9493                 .actions = pmd_actions,
9494                 .num_elements = num_actions,
9495                 .error = err,
9496         };
9497
9498         mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
9499                                       tunnel_element_release_hit,
9500                                       tunnel_element_release_miss, &ctx, false);
9501
9502         return ctx.ret;
9503 }
9504
9505 static int
9506 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
9507                                   struct rte_mbuf *m,
9508                                   struct rte_flow_restore_info *info,
9509                                   struct rte_flow_error *err)
9510 {
9511         uint64_t ol_flags = m->ol_flags;
9512         const struct mlx5_flow_tbl_data_entry *tble;
9513         const uint64_t mask = RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
9514
9515         if (!is_tunnel_offload_active(dev)) {
9516                 info->flags = 0;
9517                 return 0;
9518         }
9519
9520         if ((ol_flags & mask) != mask)
9521                 goto err;
9522         tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
9523         if (!tble) {
9524                 DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
9525                         dev->data->port_id, m->hash.fdir.hi);
9526                 goto err;
9527         }
9528         MLX5_ASSERT(tble->tunnel);
9529         memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
9530         info->group_id = tble->group_id;
9531         info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
9532                       RTE_FLOW_RESTORE_INFO_GROUP_ID |
9533                       RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
9534
9535         return 0;
9536
9537 err:
9538         return rte_flow_error_set(err, EINVAL,
9539                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
9540                                   "failed to get restore info");
9541 }
9542
9543 #else /* HAVE_IBV_FLOW_DV_SUPPORT */
9544 static int
9545 mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
9546                            __rte_unused struct rte_flow_tunnel *app_tunnel,
9547                            __rte_unused struct rte_flow_action **actions,
9548                            __rte_unused uint32_t *num_of_actions,
9549                            __rte_unused struct rte_flow_error *error)
9550 {
9551         return -ENOTSUP;
9552 }
9553
9554 static int
9555 mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
9556                        __rte_unused struct rte_flow_tunnel *app_tunnel,
9557                        __rte_unused struct rte_flow_item **items,
9558                        __rte_unused uint32_t *num_of_items,
9559                        __rte_unused struct rte_flow_error *error)
9560 {
9561         return -ENOTSUP;
9562 }
9563
9564 static int
9565 mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
9566                               __rte_unused struct rte_flow_item *pmd_items,
9567                               __rte_unused uint32_t num_items,
9568                               __rte_unused struct rte_flow_error *err)
9569 {
9570         return -ENOTSUP;
9571 }
9572
9573 static int
9574 mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
9575                                 __rte_unused struct rte_flow_action *pmd_action,
9576                                 __rte_unused uint32_t num_actions,
9577                                 __rte_unused struct rte_flow_error *err)
9578 {
9579         return -ENOTSUP;
9580 }
9581
9582 static int
9583 mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
9584                                   __rte_unused struct rte_mbuf *m,
9585                                   __rte_unused struct rte_flow_restore_info *i,
9586                                   __rte_unused struct rte_flow_error *err)
9587 {
9588         return -ENOTSUP;
9589 }
9590
9591 static int
9592 flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
9593                              __rte_unused struct rte_flow *flow,
9594                              __rte_unused const struct rte_flow_attr *attr,
9595                              __rte_unused const struct rte_flow_action *actions,
9596                              __rte_unused uint32_t flow_idx,
9597                              __rte_unused const struct mlx5_flow_tunnel *tunnel,
9598                              __rte_unused struct tunnel_default_miss_ctx *ctx,
9599                              __rte_unused struct rte_flow_error *error)
9600 {
9601         return -ENOTSUP;
9602 }
9603
9604 static struct mlx5_flow_tunnel *
9605 mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
9606                     __rte_unused uint32_t id)
9607 {
9608         return NULL;
9609 }
9610
9611 static void
9612 mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
9613                       __rte_unused struct mlx5_flow_tunnel *tunnel)
9614 {
9615 }
9616
9617 static uint32_t
9618 tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
9619                                 __rte_unused const struct mlx5_flow_tunnel *t,
9620                                 __rte_unused uint32_t group,
9621                                 __rte_unused uint32_t *table,
9622                                 struct rte_flow_error *error)
9623 {
9624         return rte_flow_error_set(error, ENOTSUP,
9625                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
9626                                   "tunnel offload requires DV support");
9627 }
9628
9629 void
9630 mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
9631                         __rte_unused  uint16_t port_id)
9632 {
9633 }
9634 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
9635
9636 static void
9637 mlx5_dbg__print_pattern(const struct rte_flow_item *item)
9638 {
9639         int ret;
9640         struct rte_flow_error error;
9641
9642         for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
9643                 char *item_name;
9644                 ret = rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_NAME_PTR, &item_name,
9645                                     sizeof(item_name),
9646                                     (void *)(uintptr_t)item->type, &error);
9647                 if (ret > 0)
9648                         printf("%s ", item_name);
9649                 else
9650                         printf("%d\n", (int)item->type);
9651         }
9652         printf("END\n");
9653 }
9654
9655 static int
9656 mlx5_flow_is_std_vxlan_port(const struct rte_flow_item *udp_item)
9657 {
9658         const struct rte_flow_item_udp *spec = udp_item->spec;
9659         const struct rte_flow_item_udp *mask = udp_item->mask;
9660         uint16_t udp_dport = 0;
9661
9662         if (spec != NULL) {
9663                 if (!mask)
9664                         mask = &rte_flow_item_udp_mask;
9665                 udp_dport = rte_be_to_cpu_16(spec->hdr.dst_port &
9666                                 mask->hdr.dst_port);
9667         }
9668         return (!udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN);
9669 }
9670
9671 static const struct mlx5_flow_expand_node *
9672 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
9673                 unsigned int item_idx,
9674                 const struct mlx5_flow_expand_node graph[],
9675                 const struct mlx5_flow_expand_node *node)
9676 {
9677         const struct rte_flow_item *item = pattern + item_idx, *prev_item;
9678         switch (item->type) {
9679         case RTE_FLOW_ITEM_TYPE_VXLAN:
9680                 MLX5_ASSERT(item_idx > 0);
9681                 prev_item = pattern + item_idx - 1;
9682                 MLX5_ASSERT(prev_item->type == RTE_FLOW_ITEM_TYPE_UDP);
9683                 if (mlx5_flow_is_std_vxlan_port(prev_item))
9684                         return &graph[MLX5_EXPANSION_STD_VXLAN];
9685                 else
9686                         return &graph[MLX5_EXPANSION_L3_VXLAN];
9687                 break;
9688         default:
9689                 return node;
9690         }
9691 }