net/i40e: fix Rx packet statistics
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <stdalign.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <stdbool.h>
10 #include <sys/queue.h>
11
12 #include <rte_common.h>
13 #include <rte_ether.h>
14 #include <ethdev_driver.h>
15 #include <rte_eal_paging.h>
16 #include <rte_flow.h>
17 #include <rte_cycles.h>
18 #include <rte_flow_driver.h>
19 #include <rte_malloc.h>
20 #include <rte_ip.h>
21
22 #include <mlx5_glue.h>
23 #include <mlx5_devx_cmds.h>
24 #include <mlx5_prm.h>
25 #include <mlx5_malloc.h>
26
27 #include "mlx5_defs.h"
28 #include "mlx5.h"
29 #include "mlx5_flow.h"
30 #include "mlx5_flow_os.h"
31 #include "mlx5_rx.h"
32 #include "mlx5_tx.h"
33 #include "mlx5_common_os.h"
34 #include "rte_pmd_mlx5.h"
35
36 struct tunnel_default_miss_ctx {
37         uint16_t *queue;
38         __extension__
39         union {
40                 struct rte_flow_action_rss action_rss;
41                 struct rte_flow_action_queue miss_queue;
42                 struct rte_flow_action_jump miss_jump;
43                 uint8_t raw[0];
44         };
45 };
46
47 static int
48 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
49                              struct rte_flow *flow,
50                              const struct rte_flow_attr *attr,
51                              const struct rte_flow_action *app_actions,
52                              uint32_t flow_idx,
53                              const struct mlx5_flow_tunnel *tunnel,
54                              struct tunnel_default_miss_ctx *ctx,
55                              struct rte_flow_error *error);
56 static struct mlx5_flow_tunnel *
57 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id);
58 static void
59 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel);
60 static uint32_t
61 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
62                                 const struct mlx5_flow_tunnel *tunnel,
63                                 uint32_t group, uint32_t *table,
64                                 struct rte_flow_error *error);
65
66 static struct mlx5_flow_workspace *mlx5_flow_push_thread_workspace(void);
67 static void mlx5_flow_pop_thread_workspace(void);
68
69
70 /** Device flow drivers. */
71 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
72
73 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
74
75 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
76         [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
77 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
78         [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
79 #endif
80         [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
81         [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
82 };
83
84 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
85 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
86         (const int []){ \
87                 __VA_ARGS__, 0, \
88         }
89
90 /** Node object of input graph for mlx5_flow_expand_rss(). */
91 struct mlx5_flow_expand_node {
92         const int *const next;
93         /**<
94          * List of next node indexes. Index 0 is interpreted as a terminator.
95          */
96         const enum rte_flow_item_type type;
97         /**< Pattern item type of current node. */
98         uint64_t rss_types;
99         /**<
100          * RSS types bit-field associated with this node
101          * (see ETH_RSS_* definitions).
102          */
103         uint64_t node_flags;
104         /**<
105          *  Bit-fields that define how the node is used in the expansion.
106          * (see MLX5_EXPANSION_NODE_* definitions).
107          */
108 };
109
110 /* Optional expand field. The expansion alg will not go deeper. */
111 #define MLX5_EXPANSION_NODE_OPTIONAL (UINT64_C(1) << 0)
112
113 /* The node is not added implicitly as expansion to the flow pattern.
114  * If the node type does not match the flow pattern item type, the
115  * expansion alg will go deeper to its next items.
116  * In the current implementation, the list of next nodes indexes can
117  * have up to one node with this flag set and it has to be the last
118  * node index (before the list terminator).
119  */
120 #define MLX5_EXPANSION_NODE_EXPLICIT (UINT64_C(1) << 1)
121
122 /** Object returned by mlx5_flow_expand_rss(). */
123 struct mlx5_flow_expand_rss {
124         uint32_t entries;
125         /**< Number of entries @p patterns and @p priorities. */
126         struct {
127                 struct rte_flow_item *pattern; /**< Expanded pattern array. */
128                 uint32_t priority; /**< Priority offset for each expansion. */
129         } entry[];
130 };
131
132 static void
133 mlx5_dbg__print_pattern(const struct rte_flow_item *item);
134
135 static bool
136 mlx5_flow_is_rss_expandable_item(const struct rte_flow_item *item)
137 {
138         switch (item->type) {
139         case RTE_FLOW_ITEM_TYPE_ETH:
140         case RTE_FLOW_ITEM_TYPE_VLAN:
141         case RTE_FLOW_ITEM_TYPE_IPV4:
142         case RTE_FLOW_ITEM_TYPE_IPV6:
143         case RTE_FLOW_ITEM_TYPE_UDP:
144         case RTE_FLOW_ITEM_TYPE_TCP:
145         case RTE_FLOW_ITEM_TYPE_VXLAN:
146         case RTE_FLOW_ITEM_TYPE_NVGRE:
147         case RTE_FLOW_ITEM_TYPE_GRE:
148         case RTE_FLOW_ITEM_TYPE_GENEVE:
149         case RTE_FLOW_ITEM_TYPE_MPLS:
150         case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
151         case RTE_FLOW_ITEM_TYPE_GRE_KEY:
152         case RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT:
153         case RTE_FLOW_ITEM_TYPE_GTP:
154                 return true;
155         default:
156                 break;
157         }
158         return false;
159 }
160
161 static enum rte_flow_item_type
162 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
163 {
164         enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID;
165         uint16_t ether_type = 0;
166         uint16_t ether_type_m;
167         uint8_t ip_next_proto = 0;
168         uint8_t ip_next_proto_m;
169
170         if (item == NULL || item->spec == NULL)
171                 return ret;
172         switch (item->type) {
173         case RTE_FLOW_ITEM_TYPE_ETH:
174                 if (item->mask)
175                         ether_type_m = ((const struct rte_flow_item_eth *)
176                                                 (item->mask))->type;
177                 else
178                         ether_type_m = rte_flow_item_eth_mask.type;
179                 if (ether_type_m != RTE_BE16(0xFFFF))
180                         break;
181                 ether_type = ((const struct rte_flow_item_eth *)
182                                 (item->spec))->type;
183                 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
184                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
185                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
186                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
187                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
188                         ret = RTE_FLOW_ITEM_TYPE_VLAN;
189                 else
190                         ret = RTE_FLOW_ITEM_TYPE_END;
191                 break;
192         case RTE_FLOW_ITEM_TYPE_VLAN:
193                 if (item->mask)
194                         ether_type_m = ((const struct rte_flow_item_vlan *)
195                                                 (item->mask))->inner_type;
196                 else
197                         ether_type_m = rte_flow_item_vlan_mask.inner_type;
198                 if (ether_type_m != RTE_BE16(0xFFFF))
199                         break;
200                 ether_type = ((const struct rte_flow_item_vlan *)
201                                 (item->spec))->inner_type;
202                 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
203                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
204                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
205                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
206                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
207                         ret = RTE_FLOW_ITEM_TYPE_VLAN;
208                 else
209                         ret = RTE_FLOW_ITEM_TYPE_END;
210                 break;
211         case RTE_FLOW_ITEM_TYPE_IPV4:
212                 if (item->mask)
213                         ip_next_proto_m = ((const struct rte_flow_item_ipv4 *)
214                                         (item->mask))->hdr.next_proto_id;
215                 else
216                         ip_next_proto_m =
217                                 rte_flow_item_ipv4_mask.hdr.next_proto_id;
218                 if (ip_next_proto_m != 0xFF)
219                         break;
220                 ip_next_proto = ((const struct rte_flow_item_ipv4 *)
221                                 (item->spec))->hdr.next_proto_id;
222                 if (ip_next_proto == IPPROTO_UDP)
223                         ret = RTE_FLOW_ITEM_TYPE_UDP;
224                 else if (ip_next_proto == IPPROTO_TCP)
225                         ret = RTE_FLOW_ITEM_TYPE_TCP;
226                 else if (ip_next_proto == IPPROTO_IP)
227                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
228                 else if (ip_next_proto == IPPROTO_IPV6)
229                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
230                 else
231                         ret = RTE_FLOW_ITEM_TYPE_END;
232                 break;
233         case RTE_FLOW_ITEM_TYPE_IPV6:
234                 if (item->mask)
235                         ip_next_proto_m = ((const struct rte_flow_item_ipv6 *)
236                                                 (item->mask))->hdr.proto;
237                 else
238                         ip_next_proto_m =
239                                 rte_flow_item_ipv6_mask.hdr.proto;
240                 if (ip_next_proto_m != 0xFF)
241                         break;
242                 ip_next_proto = ((const struct rte_flow_item_ipv6 *)
243                                 (item->spec))->hdr.proto;
244                 if (ip_next_proto == IPPROTO_UDP)
245                         ret = RTE_FLOW_ITEM_TYPE_UDP;
246                 else if (ip_next_proto == IPPROTO_TCP)
247                         ret = RTE_FLOW_ITEM_TYPE_TCP;
248                 else if (ip_next_proto == IPPROTO_IP)
249                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
250                 else if (ip_next_proto == IPPROTO_IPV6)
251                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
252                 else
253                         ret = RTE_FLOW_ITEM_TYPE_END;
254                 break;
255         default:
256                 ret = RTE_FLOW_ITEM_TYPE_VOID;
257                 break;
258         }
259         return ret;
260 }
261
262 static const int *
263 mlx5_flow_expand_rss_skip_explicit(const struct mlx5_flow_expand_node graph[],
264                 const int *next_node)
265 {
266         const struct mlx5_flow_expand_node *node = NULL;
267         const int *next = next_node;
268
269         while (next && *next) {
270                 /*
271                  * Skip the nodes with the MLX5_EXPANSION_NODE_EXPLICIT
272                  * flag set, because they were not found in the flow pattern.
273                  */
274                 node = &graph[*next];
275                 if (!(node->node_flags & MLX5_EXPANSION_NODE_EXPLICIT))
276                         break;
277                 next = node->next;
278         }
279         return next;
280 }
281
282 #define MLX5_RSS_EXP_ELT_N 16
283
284 /**
285  * Expand RSS flows into several possible flows according to the RSS hash
286  * fields requested and the driver capabilities.
287  *
288  * @param[out] buf
289  *   Buffer to store the result expansion.
290  * @param[in] size
291  *   Buffer size in bytes. If 0, @p buf can be NULL.
292  * @param[in] pattern
293  *   User flow pattern.
294  * @param[in] types
295  *   RSS types to expand (see ETH_RSS_* definitions).
296  * @param[in] graph
297  *   Input graph to expand @p pattern according to @p types.
298  * @param[in] graph_root_index
299  *   Index of root node in @p graph, typically 0.
300  *
301  * @return
302  *   A positive value representing the size of @p buf in bytes regardless of
303  *   @p size on success, a negative errno value otherwise and rte_errno is
304  *   set, the following errors are defined:
305  *
306  *   -E2BIG: graph-depth @p graph is too deep.
307  *   -EINVAL: @p size has not enough space for expanded pattern.
308  */
309 static int
310 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
311                      const struct rte_flow_item *pattern, uint64_t types,
312                      const struct mlx5_flow_expand_node graph[],
313                      int graph_root_index)
314 {
315         const struct rte_flow_item *item;
316         const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
317         const int *next_node;
318         const int *stack[MLX5_RSS_EXP_ELT_N];
319         int stack_pos = 0;
320         struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N];
321         unsigned int i;
322         size_t lsize;
323         size_t user_pattern_size = 0;
324         void *addr = NULL;
325         const struct mlx5_flow_expand_node *next = NULL;
326         struct rte_flow_item missed_item;
327         int missed = 0;
328         int elt = 0;
329         const struct rte_flow_item *last_item = NULL;
330
331         memset(&missed_item, 0, sizeof(missed_item));
332         lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
333                 MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]);
334         if (lsize > size)
335                 return -EINVAL;
336         buf->entry[0].priority = 0;
337         buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N];
338         buf->entries = 0;
339         addr = buf->entry[0].pattern;
340         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
341                 if (!mlx5_flow_is_rss_expandable_item(item)) {
342                         user_pattern_size += sizeof(*item);
343                         continue;
344                 }
345                 last_item = item;
346                 i = 0;
347                 while (node->next && node->next[i]) {
348                         next = &graph[node->next[i]];
349                         if (next->type == item->type)
350                                 break;
351                         if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
352                                 node = next;
353                                 i = 0;
354                         } else {
355                                 ++i;
356                         }
357                 }
358                 if (next)
359                         node = next;
360                 user_pattern_size += sizeof(*item);
361         }
362         user_pattern_size += sizeof(*item); /* Handle END item. */
363         lsize += user_pattern_size;
364         if (lsize > size)
365                 return -EINVAL;
366         /* Copy the user pattern in the first entry of the buffer. */
367         rte_memcpy(addr, pattern, user_pattern_size);
368         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
369         buf->entries = 1;
370         /* Start expanding. */
371         memset(flow_items, 0, sizeof(flow_items));
372         user_pattern_size -= sizeof(*item);
373         /*
374          * Check if the last valid item has spec set, need complete pattern,
375          * and the pattern can be used for expansion.
376          */
377         missed_item.type = mlx5_flow_expand_rss_item_complete(last_item);
378         if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
379                 /* Item type END indicates expansion is not required. */
380                 return lsize;
381         }
382         if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
383                 next = NULL;
384                 missed = 1;
385                 for (i = 0; node->next && node->next[i]; ++i) {
386                         next = &graph[node->next[i]];
387                         if (next->type == missed_item.type) {
388                                 flow_items[0].type = missed_item.type;
389                                 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
390                                 break;
391                         }
392                         next = NULL;
393                 }
394         }
395         if (next && missed) {
396                 elt = 2; /* missed item + item end. */
397                 node = next;
398                 lsize += elt * sizeof(*item) + user_pattern_size;
399                 if (lsize > size)
400                         return -EINVAL;
401                 if (node->rss_types & types) {
402                         buf->entry[buf->entries].priority = 1;
403                         buf->entry[buf->entries].pattern = addr;
404                         buf->entries++;
405                         rte_memcpy(addr, buf->entry[0].pattern,
406                                    user_pattern_size);
407                         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
408                         rte_memcpy(addr, flow_items, elt * sizeof(*item));
409                         addr = (void *)(((uintptr_t)addr) +
410                                         elt * sizeof(*item));
411                 }
412         }
413         memset(flow_items, 0, sizeof(flow_items));
414         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
415                         node->next);
416         stack[stack_pos] = next_node;
417         node = next_node ? &graph[*next_node] : NULL;
418         while (node) {
419                 flow_items[stack_pos].type = node->type;
420                 if (node->rss_types & types) {
421                         size_t n;
422                         /*
423                          * compute the number of items to copy from the
424                          * expansion and copy it.
425                          * When the stack_pos is 0, there are 1 element in it,
426                          * plus the addition END item.
427                          */
428                         elt = stack_pos + 2;
429                         flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
430                         lsize += elt * sizeof(*item) + user_pattern_size;
431                         if (lsize > size)
432                                 return -EINVAL;
433                         n = elt * sizeof(*item);
434                         buf->entry[buf->entries].priority =
435                                 stack_pos + 1 + missed;
436                         buf->entry[buf->entries].pattern = addr;
437                         buf->entries++;
438                         rte_memcpy(addr, buf->entry[0].pattern,
439                                    user_pattern_size);
440                         addr = (void *)(((uintptr_t)addr) +
441                                         user_pattern_size);
442                         rte_memcpy(addr, &missed_item,
443                                    missed * sizeof(*item));
444                         addr = (void *)(((uintptr_t)addr) +
445                                 missed * sizeof(*item));
446                         rte_memcpy(addr, flow_items, n);
447                         addr = (void *)(((uintptr_t)addr) + n);
448                 }
449                 /* Go deeper. */
450                 if (!(node->node_flags & MLX5_EXPANSION_NODE_OPTIONAL) &&
451                                 node->next) {
452                         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
453                                         node->next);
454                         if (stack_pos++ == MLX5_RSS_EXP_ELT_N) {
455                                 rte_errno = E2BIG;
456                                 return -rte_errno;
457                         }
458                         stack[stack_pos] = next_node;
459                 } else if (*(next_node + 1)) {
460                         /* Follow up with the next possibility. */
461                         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
462                                         ++next_node);
463                 } else if (!stack_pos) {
464                         /*
465                          * Completing the traverse over the different paths.
466                          * The next_node is advanced to the terminator.
467                          */
468                         ++next_node;
469                 } else {
470                         /* Move to the next path. */
471                         while (stack_pos) {
472                                 next_node = stack[--stack_pos];
473                                 next_node++;
474                                 if (*next_node)
475                                         break;
476                         }
477                         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
478                                         next_node);
479                         stack[stack_pos] = next_node;
480                 }
481                 node = next_node && *next_node ? &graph[*next_node] : NULL;
482         };
483         return lsize;
484 }
485
486 enum mlx5_expansion {
487         MLX5_EXPANSION_ROOT,
488         MLX5_EXPANSION_ROOT_OUTER,
489         MLX5_EXPANSION_OUTER_ETH,
490         MLX5_EXPANSION_OUTER_VLAN,
491         MLX5_EXPANSION_OUTER_IPV4,
492         MLX5_EXPANSION_OUTER_IPV4_UDP,
493         MLX5_EXPANSION_OUTER_IPV4_TCP,
494         MLX5_EXPANSION_OUTER_IPV6,
495         MLX5_EXPANSION_OUTER_IPV6_UDP,
496         MLX5_EXPANSION_OUTER_IPV6_TCP,
497         MLX5_EXPANSION_VXLAN,
498         MLX5_EXPANSION_VXLAN_GPE,
499         MLX5_EXPANSION_GRE,
500         MLX5_EXPANSION_NVGRE,
501         MLX5_EXPANSION_GRE_KEY,
502         MLX5_EXPANSION_MPLS,
503         MLX5_EXPANSION_ETH,
504         MLX5_EXPANSION_VLAN,
505         MLX5_EXPANSION_IPV4,
506         MLX5_EXPANSION_IPV4_UDP,
507         MLX5_EXPANSION_IPV4_TCP,
508         MLX5_EXPANSION_IPV6,
509         MLX5_EXPANSION_IPV6_UDP,
510         MLX5_EXPANSION_IPV6_TCP,
511         MLX5_EXPANSION_IPV6_FRAG_EXT,
512         MLX5_EXPANSION_GTP
513 };
514
515 /** Supported expansion of items. */
516 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
517         [MLX5_EXPANSION_ROOT] = {
518                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
519                                                   MLX5_EXPANSION_IPV4,
520                                                   MLX5_EXPANSION_IPV6),
521                 .type = RTE_FLOW_ITEM_TYPE_END,
522         },
523         [MLX5_EXPANSION_ROOT_OUTER] = {
524                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
525                                                   MLX5_EXPANSION_OUTER_IPV4,
526                                                   MLX5_EXPANSION_OUTER_IPV6),
527                 .type = RTE_FLOW_ITEM_TYPE_END,
528         },
529         [MLX5_EXPANSION_OUTER_ETH] = {
530                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
531                 .type = RTE_FLOW_ITEM_TYPE_ETH,
532                 .rss_types = 0,
533         },
534         [MLX5_EXPANSION_OUTER_VLAN] = {
535                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
536                                                   MLX5_EXPANSION_OUTER_IPV6),
537                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
538                 .node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
539         },
540         [MLX5_EXPANSION_OUTER_IPV4] = {
541                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
542                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
543                          MLX5_EXPANSION_OUTER_IPV4_TCP,
544                          MLX5_EXPANSION_GRE,
545                          MLX5_EXPANSION_NVGRE,
546                          MLX5_EXPANSION_IPV4,
547                          MLX5_EXPANSION_IPV6),
548                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
549                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
550                         ETH_RSS_NONFRAG_IPV4_OTHER,
551         },
552         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
553                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
554                                                   MLX5_EXPANSION_VXLAN_GPE,
555                                                   MLX5_EXPANSION_MPLS,
556                                                   MLX5_EXPANSION_GTP),
557                 .type = RTE_FLOW_ITEM_TYPE_UDP,
558                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
559         },
560         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
561                 .type = RTE_FLOW_ITEM_TYPE_TCP,
562                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
563         },
564         [MLX5_EXPANSION_OUTER_IPV6] = {
565                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
566                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
567                          MLX5_EXPANSION_OUTER_IPV6_TCP,
568                          MLX5_EXPANSION_IPV4,
569                          MLX5_EXPANSION_IPV6,
570                          MLX5_EXPANSION_GRE,
571                          MLX5_EXPANSION_NVGRE),
572                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
573                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
574                         ETH_RSS_NONFRAG_IPV6_OTHER,
575         },
576         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
577                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
578                                                   MLX5_EXPANSION_VXLAN_GPE,
579                                                   MLX5_EXPANSION_MPLS,
580                                                   MLX5_EXPANSION_GTP),
581                 .type = RTE_FLOW_ITEM_TYPE_UDP,
582                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
583         },
584         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
585                 .type = RTE_FLOW_ITEM_TYPE_TCP,
586                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
587         },
588         [MLX5_EXPANSION_VXLAN] = {
589                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
590                                                   MLX5_EXPANSION_IPV4,
591                                                   MLX5_EXPANSION_IPV6),
592                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
593         },
594         [MLX5_EXPANSION_VXLAN_GPE] = {
595                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
596                                                   MLX5_EXPANSION_IPV4,
597                                                   MLX5_EXPANSION_IPV6),
598                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
599         },
600         [MLX5_EXPANSION_GRE] = {
601                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
602                                                   MLX5_EXPANSION_IPV6,
603                                                   MLX5_EXPANSION_GRE_KEY,
604                                                   MLX5_EXPANSION_MPLS),
605                 .type = RTE_FLOW_ITEM_TYPE_GRE,
606         },
607         [MLX5_EXPANSION_GRE_KEY] = {
608                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
609                                                   MLX5_EXPANSION_IPV6,
610                                                   MLX5_EXPANSION_MPLS),
611                 .type = RTE_FLOW_ITEM_TYPE_GRE_KEY,
612                 .node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
613         },
614         [MLX5_EXPANSION_NVGRE] = {
615                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
616                 .type = RTE_FLOW_ITEM_TYPE_NVGRE,
617         },
618         [MLX5_EXPANSION_MPLS] = {
619                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
620                                                   MLX5_EXPANSION_IPV6,
621                                                   MLX5_EXPANSION_ETH),
622                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
623                 .node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
624         },
625         [MLX5_EXPANSION_ETH] = {
626                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
627                 .type = RTE_FLOW_ITEM_TYPE_ETH,
628         },
629         [MLX5_EXPANSION_VLAN] = {
630                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
631                                                   MLX5_EXPANSION_IPV6),
632                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
633                 .node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
634         },
635         [MLX5_EXPANSION_IPV4] = {
636                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
637                                                   MLX5_EXPANSION_IPV4_TCP),
638                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
639                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
640                         ETH_RSS_NONFRAG_IPV4_OTHER,
641         },
642         [MLX5_EXPANSION_IPV4_UDP] = {
643                 .type = RTE_FLOW_ITEM_TYPE_UDP,
644                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
645         },
646         [MLX5_EXPANSION_IPV4_TCP] = {
647                 .type = RTE_FLOW_ITEM_TYPE_TCP,
648                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
649         },
650         [MLX5_EXPANSION_IPV6] = {
651                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
652                                                   MLX5_EXPANSION_IPV6_TCP,
653                                                   MLX5_EXPANSION_IPV6_FRAG_EXT),
654                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
655                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
656                         ETH_RSS_NONFRAG_IPV6_OTHER,
657         },
658         [MLX5_EXPANSION_IPV6_UDP] = {
659                 .type = RTE_FLOW_ITEM_TYPE_UDP,
660                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
661         },
662         [MLX5_EXPANSION_IPV6_TCP] = {
663                 .type = RTE_FLOW_ITEM_TYPE_TCP,
664                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
665         },
666         [MLX5_EXPANSION_IPV6_FRAG_EXT] = {
667                 .type = RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
668         },
669         [MLX5_EXPANSION_GTP] = {
670                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
671                                                   MLX5_EXPANSION_IPV6),
672                 .type = RTE_FLOW_ITEM_TYPE_GTP,
673         },
674 };
675
676 static struct rte_flow_action_handle *
677 mlx5_action_handle_create(struct rte_eth_dev *dev,
678                           const struct rte_flow_indir_action_conf *conf,
679                           const struct rte_flow_action *action,
680                           struct rte_flow_error *error);
681 static int mlx5_action_handle_destroy
682                                 (struct rte_eth_dev *dev,
683                                  struct rte_flow_action_handle *handle,
684                                  struct rte_flow_error *error);
685 static int mlx5_action_handle_update
686                                 (struct rte_eth_dev *dev,
687                                  struct rte_flow_action_handle *handle,
688                                  const void *update,
689                                  struct rte_flow_error *error);
690 static int mlx5_action_handle_query
691                                 (struct rte_eth_dev *dev,
692                                  const struct rte_flow_action_handle *handle,
693                                  void *data,
694                                  struct rte_flow_error *error);
695 static int
696 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
697                     struct rte_flow_tunnel *app_tunnel,
698                     struct rte_flow_action **actions,
699                     uint32_t *num_of_actions,
700                     struct rte_flow_error *error);
701 static int
702 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
703                        struct rte_flow_tunnel *app_tunnel,
704                        struct rte_flow_item **items,
705                        uint32_t *num_of_items,
706                        struct rte_flow_error *error);
707 static int
708 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
709                               struct rte_flow_item *pmd_items,
710                               uint32_t num_items, struct rte_flow_error *err);
711 static int
712 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
713                                 struct rte_flow_action *pmd_actions,
714                                 uint32_t num_actions,
715                                 struct rte_flow_error *err);
716 static int
717 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
718                                   struct rte_mbuf *m,
719                                   struct rte_flow_restore_info *info,
720                                   struct rte_flow_error *err);
721
722 static const struct rte_flow_ops mlx5_flow_ops = {
723         .validate = mlx5_flow_validate,
724         .create = mlx5_flow_create,
725         .destroy = mlx5_flow_destroy,
726         .flush = mlx5_flow_flush,
727         .isolate = mlx5_flow_isolate,
728         .query = mlx5_flow_query,
729         .dev_dump = mlx5_flow_dev_dump,
730         .get_aged_flows = mlx5_flow_get_aged_flows,
731         .action_handle_create = mlx5_action_handle_create,
732         .action_handle_destroy = mlx5_action_handle_destroy,
733         .action_handle_update = mlx5_action_handle_update,
734         .action_handle_query = mlx5_action_handle_query,
735         .tunnel_decap_set = mlx5_flow_tunnel_decap_set,
736         .tunnel_match = mlx5_flow_tunnel_match,
737         .tunnel_action_decap_release = mlx5_flow_tunnel_action_release,
738         .tunnel_item_release = mlx5_flow_tunnel_item_release,
739         .get_restore_info = mlx5_flow_tunnel_get_restore_info,
740 };
741
742 /* Tunnel information. */
743 struct mlx5_flow_tunnel_info {
744         uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
745         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
746 };
747
748 static struct mlx5_flow_tunnel_info tunnels_info[] = {
749         {
750                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
751                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
752         },
753         {
754                 .tunnel = MLX5_FLOW_LAYER_GENEVE,
755                 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
756         },
757         {
758                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
759                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
760         },
761         {
762                 .tunnel = MLX5_FLOW_LAYER_GRE,
763                 .ptype = RTE_PTYPE_TUNNEL_GRE,
764         },
765         {
766                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
767                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
768         },
769         {
770                 .tunnel = MLX5_FLOW_LAYER_MPLS,
771                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
772         },
773         {
774                 .tunnel = MLX5_FLOW_LAYER_NVGRE,
775                 .ptype = RTE_PTYPE_TUNNEL_NVGRE,
776         },
777         {
778                 .tunnel = MLX5_FLOW_LAYER_IPIP,
779                 .ptype = RTE_PTYPE_TUNNEL_IP,
780         },
781         {
782                 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
783                 .ptype = RTE_PTYPE_TUNNEL_IP,
784         },
785         {
786                 .tunnel = MLX5_FLOW_LAYER_GTP,
787                 .ptype = RTE_PTYPE_TUNNEL_GTPU,
788         },
789 };
790
791
792
793 /**
794  * Translate tag ID to register.
795  *
796  * @param[in] dev
797  *   Pointer to the Ethernet device structure.
798  * @param[in] feature
799  *   The feature that request the register.
800  * @param[in] id
801  *   The request register ID.
802  * @param[out] error
803  *   Error description in case of any.
804  *
805  * @return
806  *   The request register on success, a negative errno
807  *   value otherwise and rte_errno is set.
808  */
809 int
810 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
811                      enum mlx5_feature_name feature,
812                      uint32_t id,
813                      struct rte_flow_error *error)
814 {
815         struct mlx5_priv *priv = dev->data->dev_private;
816         struct mlx5_dev_config *config = &priv->config;
817         enum modify_reg start_reg;
818         bool skip_mtr_reg = false;
819
820         switch (feature) {
821         case MLX5_HAIRPIN_RX:
822                 return REG_B;
823         case MLX5_HAIRPIN_TX:
824                 return REG_A;
825         case MLX5_METADATA_RX:
826                 switch (config->dv_xmeta_en) {
827                 case MLX5_XMETA_MODE_LEGACY:
828                         return REG_B;
829                 case MLX5_XMETA_MODE_META16:
830                         return REG_C_0;
831                 case MLX5_XMETA_MODE_META32:
832                         return REG_C_1;
833                 }
834                 break;
835         case MLX5_METADATA_TX:
836                 return REG_A;
837         case MLX5_METADATA_FDB:
838                 switch (config->dv_xmeta_en) {
839                 case MLX5_XMETA_MODE_LEGACY:
840                         return REG_NON;
841                 case MLX5_XMETA_MODE_META16:
842                         return REG_C_0;
843                 case MLX5_XMETA_MODE_META32:
844                         return REG_C_1;
845                 }
846                 break;
847         case MLX5_FLOW_MARK:
848                 switch (config->dv_xmeta_en) {
849                 case MLX5_XMETA_MODE_LEGACY:
850                         return REG_NON;
851                 case MLX5_XMETA_MODE_META16:
852                         return REG_C_1;
853                 case MLX5_XMETA_MODE_META32:
854                         return REG_C_0;
855                 }
856                 break;
857         case MLX5_MTR_ID:
858                 /*
859                  * If meter color and meter id share one register, flow match
860                  * should use the meter color register for match.
861                  */
862                 if (priv->mtr_reg_share)
863                         return priv->mtr_color_reg;
864                 else
865                         return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
866                                REG_C_3;
867         case MLX5_MTR_COLOR:
868         case MLX5_ASO_FLOW_HIT:
869         case MLX5_ASO_CONNTRACK:
870                 /* All features use the same REG_C. */
871                 MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
872                 return priv->mtr_color_reg;
873         case MLX5_COPY_MARK:
874                 /*
875                  * Metadata COPY_MARK register using is in meter suffix sub
876                  * flow while with meter. It's safe to share the same register.
877                  */
878                 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
879         case MLX5_APP_TAG:
880                 /*
881                  * If meter is enable, it will engage the register for color
882                  * match and flow match. If meter color match is not using the
883                  * REG_C_2, need to skip the REG_C_x be used by meter color
884                  * match.
885                  * If meter is disable, free to use all available registers.
886                  */
887                 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
888                             (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
889                 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
890                 if (id > (uint32_t)(REG_C_7 - start_reg))
891                         return rte_flow_error_set(error, EINVAL,
892                                                   RTE_FLOW_ERROR_TYPE_ITEM,
893                                                   NULL, "invalid tag id");
894                 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
895                         return rte_flow_error_set(error, ENOTSUP,
896                                                   RTE_FLOW_ERROR_TYPE_ITEM,
897                                                   NULL, "unsupported tag id");
898                 /*
899                  * This case means meter is using the REG_C_x great than 2.
900                  * Take care not to conflict with meter color REG_C_x.
901                  * If the available index REG_C_y >= REG_C_x, skip the
902                  * color register.
903                  */
904                 if (skip_mtr_reg && config->flow_mreg_c
905                     [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
906                         if (id >= (uint32_t)(REG_C_7 - start_reg))
907                                 return rte_flow_error_set(error, EINVAL,
908                                                        RTE_FLOW_ERROR_TYPE_ITEM,
909                                                         NULL, "invalid tag id");
910                         if (config->flow_mreg_c
911                             [id + 1 + start_reg - REG_C_0] != REG_NON)
912                                 return config->flow_mreg_c
913                                                [id + 1 + start_reg - REG_C_0];
914                         return rte_flow_error_set(error, ENOTSUP,
915                                                   RTE_FLOW_ERROR_TYPE_ITEM,
916                                                   NULL, "unsupported tag id");
917                 }
918                 return config->flow_mreg_c[id + start_reg - REG_C_0];
919         }
920         MLX5_ASSERT(false);
921         return rte_flow_error_set(error, EINVAL,
922                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
923                                   NULL, "invalid feature name");
924 }
925
926 /**
927  * Check extensive flow metadata register support.
928  *
929  * @param dev
930  *   Pointer to rte_eth_dev structure.
931  *
932  * @return
933  *   True if device supports extensive flow metadata register, otherwise false.
934  */
935 bool
936 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
937 {
938         struct mlx5_priv *priv = dev->data->dev_private;
939         struct mlx5_dev_config *config = &priv->config;
940
941         /*
942          * Having available reg_c can be regarded inclusively as supporting
943          * extensive flow metadata register, which could mean,
944          * - metadata register copy action by modify header.
945          * - 16 modify header actions is supported.
946          * - reg_c's are preserved across different domain (FDB and NIC) on
947          *   packet loopback by flow lookup miss.
948          */
949         return config->flow_mreg_c[2] != REG_NON;
950 }
951
952 /**
953  * Get the lowest priority.
954  *
955  * @param[in] dev
956  *   Pointer to the Ethernet device structure.
957  * @param[in] attributes
958  *   Pointer to device flow rule attributes.
959  *
960  * @return
961  *   The value of lowest priority of flow.
962  */
963 uint32_t
964 mlx5_get_lowest_priority(struct rte_eth_dev *dev,
965                           const struct rte_flow_attr *attr)
966 {
967         struct mlx5_priv *priv = dev->data->dev_private;
968
969         if (!attr->group && !attr->transfer)
970                 return priv->config.flow_prio - 2;
971         return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1;
972 }
973
974 /**
975  * Calculate matcher priority of the flow.
976  *
977  * @param[in] dev
978  *   Pointer to the Ethernet device structure.
979  * @param[in] attr
980  *   Pointer to device flow rule attributes.
981  * @param[in] subpriority
982  *   The priority based on the items.
983  * @return
984  *   The matcher priority of the flow.
985  */
986 uint16_t
987 mlx5_get_matcher_priority(struct rte_eth_dev *dev,
988                           const struct rte_flow_attr *attr,
989                           uint32_t subpriority)
990 {
991         uint16_t priority = (uint16_t)attr->priority;
992         struct mlx5_priv *priv = dev->data->dev_private;
993
994         if (!attr->group && !attr->transfer) {
995                 if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
996                         priority = priv->config.flow_prio - 1;
997                 return mlx5_os_flow_adjust_priority(dev, priority, subpriority);
998         }
999         if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1000                 priority = MLX5_NON_ROOT_FLOW_MAX_PRIO;
1001         return priority * 3 + subpriority;
1002 }
1003
1004 /**
1005  * Verify the @p item specifications (spec, last, mask) are compatible with the
1006  * NIC capabilities.
1007  *
1008  * @param[in] item
1009  *   Item specification.
1010  * @param[in] mask
1011  *   @p item->mask or flow default bit-masks.
1012  * @param[in] nic_mask
1013  *   Bit-masks covering supported fields by the NIC to compare with user mask.
1014  * @param[in] size
1015  *   Bit-masks size in bytes.
1016  * @param[in] range_accepted
1017  *   True if range of values is accepted for specific fields, false otherwise.
1018  * @param[out] error
1019  *   Pointer to error structure.
1020  *
1021  * @return
1022  *   0 on success, a negative errno value otherwise and rte_errno is set.
1023  */
1024 int
1025 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
1026                           const uint8_t *mask,
1027                           const uint8_t *nic_mask,
1028                           unsigned int size,
1029                           bool range_accepted,
1030                           struct rte_flow_error *error)
1031 {
1032         unsigned int i;
1033
1034         MLX5_ASSERT(nic_mask);
1035         for (i = 0; i < size; ++i)
1036                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
1037                         return rte_flow_error_set(error, ENOTSUP,
1038                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1039                                                   item,
1040                                                   "mask enables non supported"
1041                                                   " bits");
1042         if (!item->spec && (item->mask || item->last))
1043                 return rte_flow_error_set(error, EINVAL,
1044                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1045                                           "mask/last without a spec is not"
1046                                           " supported");
1047         if (item->spec && item->last && !range_accepted) {
1048                 uint8_t spec[size];
1049                 uint8_t last[size];
1050                 unsigned int i;
1051                 int ret;
1052
1053                 for (i = 0; i < size; ++i) {
1054                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
1055                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
1056                 }
1057                 ret = memcmp(spec, last, size);
1058                 if (ret != 0)
1059                         return rte_flow_error_set(error, EINVAL,
1060                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1061                                                   item,
1062                                                   "range is not valid");
1063         }
1064         return 0;
1065 }
1066
1067 /**
1068  * Adjust the hash fields according to the @p flow information.
1069  *
1070  * @param[in] dev_flow.
1071  *   Pointer to the mlx5_flow.
1072  * @param[in] tunnel
1073  *   1 when the hash field is for a tunnel item.
1074  * @param[in] layer_types
1075  *   ETH_RSS_* types.
1076  * @param[in] hash_fields
1077  *   Item hash fields.
1078  *
1079  * @return
1080  *   The hash fields that should be used.
1081  */
1082 uint64_t
1083 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
1084                             int tunnel __rte_unused, uint64_t layer_types,
1085                             uint64_t hash_fields)
1086 {
1087 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1088         int rss_request_inner = rss_desc->level >= 2;
1089
1090         /* Check RSS hash level for tunnel. */
1091         if (tunnel && rss_request_inner)
1092                 hash_fields |= IBV_RX_HASH_INNER;
1093         else if (tunnel || rss_request_inner)
1094                 return 0;
1095 #endif
1096         /* Check if requested layer matches RSS hash fields. */
1097         if (!(rss_desc->types & layer_types))
1098                 return 0;
1099         return hash_fields;
1100 }
1101
1102 /**
1103  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
1104  * if several tunnel rules are used on this queue, the tunnel ptype will be
1105  * cleared.
1106  *
1107  * @param rxq_ctrl
1108  *   Rx queue to update.
1109  */
1110 static void
1111 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
1112 {
1113         unsigned int i;
1114         uint32_t tunnel_ptype = 0;
1115
1116         /* Look up for the ptype to use. */
1117         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
1118                 if (!rxq_ctrl->flow_tunnels_n[i])
1119                         continue;
1120                 if (!tunnel_ptype) {
1121                         tunnel_ptype = tunnels_info[i].ptype;
1122                 } else {
1123                         tunnel_ptype = 0;
1124                         break;
1125                 }
1126         }
1127         rxq_ctrl->rxq.tunnel = tunnel_ptype;
1128 }
1129
1130 /**
1131  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
1132  * flow.
1133  *
1134  * @param[in] dev
1135  *   Pointer to the Ethernet device structure.
1136  * @param[in] dev_handle
1137  *   Pointer to device flow handle structure.
1138  */
1139 void
1140 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
1141                        struct mlx5_flow_handle *dev_handle)
1142 {
1143         struct mlx5_priv *priv = dev->data->dev_private;
1144         const int mark = dev_handle->mark;
1145         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1146         struct mlx5_ind_table_obj *ind_tbl = NULL;
1147         unsigned int i;
1148
1149         if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1150                 struct mlx5_hrxq *hrxq;
1151
1152                 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1153                               dev_handle->rix_hrxq);
1154                 if (hrxq)
1155                         ind_tbl = hrxq->ind_table;
1156         } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1157                 struct mlx5_shared_action_rss *shared_rss;
1158
1159                 shared_rss = mlx5_ipool_get
1160                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1161                          dev_handle->rix_srss);
1162                 if (shared_rss)
1163                         ind_tbl = shared_rss->ind_tbl;
1164         }
1165         if (!ind_tbl)
1166                 return;
1167         for (i = 0; i != ind_tbl->queues_n; ++i) {
1168                 int idx = ind_tbl->queues[i];
1169                 struct mlx5_rxq_ctrl *rxq_ctrl =
1170                         container_of((*priv->rxqs)[idx],
1171                                      struct mlx5_rxq_ctrl, rxq);
1172
1173                 /*
1174                  * To support metadata register copy on Tx loopback,
1175                  * this must be always enabled (metadata may arive
1176                  * from other port - not from local flows only.
1177                  */
1178                 if (priv->config.dv_flow_en &&
1179                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
1180                     mlx5_flow_ext_mreg_supported(dev)) {
1181                         rxq_ctrl->rxq.mark = 1;
1182                         rxq_ctrl->flow_mark_n = 1;
1183                 } else if (mark) {
1184                         rxq_ctrl->rxq.mark = 1;
1185                         rxq_ctrl->flow_mark_n++;
1186                 }
1187                 if (tunnel) {
1188                         unsigned int j;
1189
1190                         /* Increase the counter matching the flow. */
1191                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1192                                 if ((tunnels_info[j].tunnel &
1193                                      dev_handle->layers) ==
1194                                     tunnels_info[j].tunnel) {
1195                                         rxq_ctrl->flow_tunnels_n[j]++;
1196                                         break;
1197                                 }
1198                         }
1199                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1200                 }
1201         }
1202 }
1203
1204 /**
1205  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
1206  *
1207  * @param[in] dev
1208  *   Pointer to the Ethernet device structure.
1209  * @param[in] flow
1210  *   Pointer to flow structure.
1211  */
1212 static void
1213 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1214 {
1215         struct mlx5_priv *priv = dev->data->dev_private;
1216         uint32_t handle_idx;
1217         struct mlx5_flow_handle *dev_handle;
1218
1219         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1220                        handle_idx, dev_handle, next)
1221                 flow_drv_rxq_flags_set(dev, dev_handle);
1222 }
1223
1224 /**
1225  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1226  * device flow if no other flow uses it with the same kind of request.
1227  *
1228  * @param dev
1229  *   Pointer to Ethernet device.
1230  * @param[in] dev_handle
1231  *   Pointer to the device flow handle structure.
1232  */
1233 static void
1234 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1235                         struct mlx5_flow_handle *dev_handle)
1236 {
1237         struct mlx5_priv *priv = dev->data->dev_private;
1238         const int mark = dev_handle->mark;
1239         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1240         struct mlx5_ind_table_obj *ind_tbl = NULL;
1241         unsigned int i;
1242
1243         if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1244                 struct mlx5_hrxq *hrxq;
1245
1246                 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1247                               dev_handle->rix_hrxq);
1248                 if (hrxq)
1249                         ind_tbl = hrxq->ind_table;
1250         } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1251                 struct mlx5_shared_action_rss *shared_rss;
1252
1253                 shared_rss = mlx5_ipool_get
1254                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1255                          dev_handle->rix_srss);
1256                 if (shared_rss)
1257                         ind_tbl = shared_rss->ind_tbl;
1258         }
1259         if (!ind_tbl)
1260                 return;
1261         MLX5_ASSERT(dev->data->dev_started);
1262         for (i = 0; i != ind_tbl->queues_n; ++i) {
1263                 int idx = ind_tbl->queues[i];
1264                 struct mlx5_rxq_ctrl *rxq_ctrl =
1265                         container_of((*priv->rxqs)[idx],
1266                                      struct mlx5_rxq_ctrl, rxq);
1267
1268                 if (priv->config.dv_flow_en &&
1269                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
1270                     mlx5_flow_ext_mreg_supported(dev)) {
1271                         rxq_ctrl->rxq.mark = 1;
1272                         rxq_ctrl->flow_mark_n = 1;
1273                 } else if (mark) {
1274                         rxq_ctrl->flow_mark_n--;
1275                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
1276                 }
1277                 if (tunnel) {
1278                         unsigned int j;
1279
1280                         /* Decrease the counter matching the flow. */
1281                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1282                                 if ((tunnels_info[j].tunnel &
1283                                      dev_handle->layers) ==
1284                                     tunnels_info[j].tunnel) {
1285                                         rxq_ctrl->flow_tunnels_n[j]--;
1286                                         break;
1287                                 }
1288                         }
1289                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1290                 }
1291         }
1292 }
1293
1294 /**
1295  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1296  * @p flow if no other flow uses it with the same kind of request.
1297  *
1298  * @param dev
1299  *   Pointer to Ethernet device.
1300  * @param[in] flow
1301  *   Pointer to the flow.
1302  */
1303 static void
1304 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1305 {
1306         struct mlx5_priv *priv = dev->data->dev_private;
1307         uint32_t handle_idx;
1308         struct mlx5_flow_handle *dev_handle;
1309
1310         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1311                        handle_idx, dev_handle, next)
1312                 flow_drv_rxq_flags_trim(dev, dev_handle);
1313 }
1314
1315 /**
1316  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1317  *
1318  * @param dev
1319  *   Pointer to Ethernet device.
1320  */
1321 static void
1322 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1323 {
1324         struct mlx5_priv *priv = dev->data->dev_private;
1325         unsigned int i;
1326
1327         for (i = 0; i != priv->rxqs_n; ++i) {
1328                 struct mlx5_rxq_ctrl *rxq_ctrl;
1329                 unsigned int j;
1330
1331                 if (!(*priv->rxqs)[i])
1332                         continue;
1333                 rxq_ctrl = container_of((*priv->rxqs)[i],
1334                                         struct mlx5_rxq_ctrl, rxq);
1335                 rxq_ctrl->flow_mark_n = 0;
1336                 rxq_ctrl->rxq.mark = 0;
1337                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1338                         rxq_ctrl->flow_tunnels_n[j] = 0;
1339                 rxq_ctrl->rxq.tunnel = 0;
1340         }
1341 }
1342
1343 /**
1344  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1345  *
1346  * @param[in] dev
1347  *   Pointer to the Ethernet device structure.
1348  */
1349 void
1350 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1351 {
1352         struct mlx5_priv *priv = dev->data->dev_private;
1353         struct mlx5_rxq_data *data;
1354         unsigned int i;
1355
1356         for (i = 0; i != priv->rxqs_n; ++i) {
1357                 if (!(*priv->rxqs)[i])
1358                         continue;
1359                 data = (*priv->rxqs)[i];
1360                 if (!rte_flow_dynf_metadata_avail()) {
1361                         data->dynf_meta = 0;
1362                         data->flow_meta_mask = 0;
1363                         data->flow_meta_offset = -1;
1364                         data->flow_meta_port_mask = 0;
1365                 } else {
1366                         data->dynf_meta = 1;
1367                         data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1368                         data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1369                         data->flow_meta_port_mask = priv->sh->dv_meta_mask;
1370                 }
1371         }
1372 }
1373
1374 /*
1375  * return a pointer to the desired action in the list of actions.
1376  *
1377  * @param[in] actions
1378  *   The list of actions to search the action in.
1379  * @param[in] action
1380  *   The action to find.
1381  *
1382  * @return
1383  *   Pointer to the action in the list, if found. NULL otherwise.
1384  */
1385 const struct rte_flow_action *
1386 mlx5_flow_find_action(const struct rte_flow_action *actions,
1387                       enum rte_flow_action_type action)
1388 {
1389         if (actions == NULL)
1390                 return NULL;
1391         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1392                 if (actions->type == action)
1393                         return actions;
1394         return NULL;
1395 }
1396
1397 /*
1398  * Validate the flag action.
1399  *
1400  * @param[in] action_flags
1401  *   Bit-fields that holds the actions detected until now.
1402  * @param[in] attr
1403  *   Attributes of flow that includes this action.
1404  * @param[out] error
1405  *   Pointer to error structure.
1406  *
1407  * @return
1408  *   0 on success, a negative errno value otherwise and rte_errno is set.
1409  */
1410 int
1411 mlx5_flow_validate_action_flag(uint64_t action_flags,
1412                                const struct rte_flow_attr *attr,
1413                                struct rte_flow_error *error)
1414 {
1415         if (action_flags & MLX5_FLOW_ACTION_MARK)
1416                 return rte_flow_error_set(error, EINVAL,
1417                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1418                                           "can't mark and flag in same flow");
1419         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1420                 return rte_flow_error_set(error, EINVAL,
1421                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1422                                           "can't have 2 flag"
1423                                           " actions in same flow");
1424         if (attr->egress)
1425                 return rte_flow_error_set(error, ENOTSUP,
1426                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1427                                           "flag action not supported for "
1428                                           "egress");
1429         return 0;
1430 }
1431
1432 /*
1433  * Validate the mark action.
1434  *
1435  * @param[in] action
1436  *   Pointer to the queue action.
1437  * @param[in] action_flags
1438  *   Bit-fields that holds the actions detected until now.
1439  * @param[in] attr
1440  *   Attributes of flow that includes this action.
1441  * @param[out] error
1442  *   Pointer to error structure.
1443  *
1444  * @return
1445  *   0 on success, a negative errno value otherwise and rte_errno is set.
1446  */
1447 int
1448 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1449                                uint64_t action_flags,
1450                                const struct rte_flow_attr *attr,
1451                                struct rte_flow_error *error)
1452 {
1453         const struct rte_flow_action_mark *mark = action->conf;
1454
1455         if (!mark)
1456                 return rte_flow_error_set(error, EINVAL,
1457                                           RTE_FLOW_ERROR_TYPE_ACTION,
1458                                           action,
1459                                           "configuration cannot be null");
1460         if (mark->id >= MLX5_FLOW_MARK_MAX)
1461                 return rte_flow_error_set(error, EINVAL,
1462                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1463                                           &mark->id,
1464                                           "mark id must in 0 <= id < "
1465                                           RTE_STR(MLX5_FLOW_MARK_MAX));
1466         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1467                 return rte_flow_error_set(error, EINVAL,
1468                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1469                                           "can't flag and mark in same flow");
1470         if (action_flags & MLX5_FLOW_ACTION_MARK)
1471                 return rte_flow_error_set(error, EINVAL,
1472                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1473                                           "can't have 2 mark actions in same"
1474                                           " flow");
1475         if (attr->egress)
1476                 return rte_flow_error_set(error, ENOTSUP,
1477                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1478                                           "mark action not supported for "
1479                                           "egress");
1480         return 0;
1481 }
1482
1483 /*
1484  * Validate the drop action.
1485  *
1486  * @param[in] action_flags
1487  *   Bit-fields that holds the actions detected until now.
1488  * @param[in] attr
1489  *   Attributes of flow that includes this action.
1490  * @param[out] error
1491  *   Pointer to error structure.
1492  *
1493  * @return
1494  *   0 on success, a negative errno value otherwise and rte_errno is set.
1495  */
1496 int
1497 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1498                                const struct rte_flow_attr *attr,
1499                                struct rte_flow_error *error)
1500 {
1501         if (attr->egress)
1502                 return rte_flow_error_set(error, ENOTSUP,
1503                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1504                                           "drop action not supported for "
1505                                           "egress");
1506         return 0;
1507 }
1508
1509 /*
1510  * Validate the queue action.
1511  *
1512  * @param[in] action
1513  *   Pointer to the queue action.
1514  * @param[in] action_flags
1515  *   Bit-fields that holds the actions detected until now.
1516  * @param[in] dev
1517  *   Pointer to the Ethernet device structure.
1518  * @param[in] attr
1519  *   Attributes of flow that includes this action.
1520  * @param[out] error
1521  *   Pointer to error structure.
1522  *
1523  * @return
1524  *   0 on success, a negative errno value otherwise and rte_errno is set.
1525  */
1526 int
1527 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1528                                 uint64_t action_flags,
1529                                 struct rte_eth_dev *dev,
1530                                 const struct rte_flow_attr *attr,
1531                                 struct rte_flow_error *error)
1532 {
1533         struct mlx5_priv *priv = dev->data->dev_private;
1534         const struct rte_flow_action_queue *queue = action->conf;
1535
1536         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1537                 return rte_flow_error_set(error, EINVAL,
1538                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1539                                           "can't have 2 fate actions in"
1540                                           " same flow");
1541         if (!priv->rxqs_n)
1542                 return rte_flow_error_set(error, EINVAL,
1543                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1544                                           NULL, "No Rx queues configured");
1545         if (queue->index >= priv->rxqs_n)
1546                 return rte_flow_error_set(error, EINVAL,
1547                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1548                                           &queue->index,
1549                                           "queue index out of range");
1550         if (!(*priv->rxqs)[queue->index])
1551                 return rte_flow_error_set(error, EINVAL,
1552                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1553                                           &queue->index,
1554                                           "queue is not configured");
1555         if (attr->egress)
1556                 return rte_flow_error_set(error, ENOTSUP,
1557                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1558                                           "queue action not supported for "
1559                                           "egress");
1560         return 0;
1561 }
1562
1563 /*
1564  * Validate the rss action.
1565  *
1566  * @param[in] dev
1567  *   Pointer to the Ethernet device structure.
1568  * @param[in] action
1569  *   Pointer to the queue action.
1570  * @param[out] error
1571  *   Pointer to error structure.
1572  *
1573  * @return
1574  *   0 on success, a negative errno value otherwise and rte_errno is set.
1575  */
1576 int
1577 mlx5_validate_action_rss(struct rte_eth_dev *dev,
1578                          const struct rte_flow_action *action,
1579                          struct rte_flow_error *error)
1580 {
1581         struct mlx5_priv *priv = dev->data->dev_private;
1582         const struct rte_flow_action_rss *rss = action->conf;
1583         enum mlx5_rxq_type rxq_type = MLX5_RXQ_TYPE_UNDEFINED;
1584         unsigned int i;
1585
1586         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1587             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1588                 return rte_flow_error_set(error, ENOTSUP,
1589                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1590                                           &rss->func,
1591                                           "RSS hash function not supported");
1592 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1593         if (rss->level > 2)
1594 #else
1595         if (rss->level > 1)
1596 #endif
1597                 return rte_flow_error_set(error, ENOTSUP,
1598                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1599                                           &rss->level,
1600                                           "tunnel RSS is not supported");
1601         /* allow RSS key_len 0 in case of NULL (default) RSS key. */
1602         if (rss->key_len == 0 && rss->key != NULL)
1603                 return rte_flow_error_set(error, ENOTSUP,
1604                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1605                                           &rss->key_len,
1606                                           "RSS hash key length 0");
1607         if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1608                 return rte_flow_error_set(error, ENOTSUP,
1609                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1610                                           &rss->key_len,
1611                                           "RSS hash key too small");
1612         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1613                 return rte_flow_error_set(error, ENOTSUP,
1614                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1615                                           &rss->key_len,
1616                                           "RSS hash key too large");
1617         if (rss->queue_num > priv->config.ind_table_max_size)
1618                 return rte_flow_error_set(error, ENOTSUP,
1619                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1620                                           &rss->queue_num,
1621                                           "number of queues too large");
1622         if (rss->types & MLX5_RSS_HF_MASK)
1623                 return rte_flow_error_set(error, ENOTSUP,
1624                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1625                                           &rss->types,
1626                                           "some RSS protocols are not"
1627                                           " supported");
1628         if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) &&
1629             !(rss->types & ETH_RSS_IP))
1630                 return rte_flow_error_set(error, EINVAL,
1631                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1632                                           "L3 partial RSS requested but L3 RSS"
1633                                           " type not specified");
1634         if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) &&
1635             !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP)))
1636                 return rte_flow_error_set(error, EINVAL,
1637                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1638                                           "L4 partial RSS requested but L4 RSS"
1639                                           " type not specified");
1640         if (!priv->rxqs_n)
1641                 return rte_flow_error_set(error, EINVAL,
1642                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1643                                           NULL, "No Rx queues configured");
1644         if (!rss->queue_num)
1645                 return rte_flow_error_set(error, EINVAL,
1646                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1647                                           NULL, "No queues configured");
1648         for (i = 0; i != rss->queue_num; ++i) {
1649                 struct mlx5_rxq_ctrl *rxq_ctrl;
1650
1651                 if (rss->queue[i] >= priv->rxqs_n)
1652                         return rte_flow_error_set
1653                                 (error, EINVAL,
1654                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1655                                  &rss->queue[i], "queue index out of range");
1656                 if (!(*priv->rxqs)[rss->queue[i]])
1657                         return rte_flow_error_set
1658                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1659                                  &rss->queue[i], "queue is not configured");
1660                 rxq_ctrl = container_of((*priv->rxqs)[rss->queue[i]],
1661                                         struct mlx5_rxq_ctrl, rxq);
1662                 if (i == 0)
1663                         rxq_type = rxq_ctrl->type;
1664                 if (rxq_type != rxq_ctrl->type)
1665                         return rte_flow_error_set
1666                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1667                                  &rss->queue[i],
1668                                  "combining hairpin and regular RSS queues is not supported");
1669         }
1670         return 0;
1671 }
1672
1673 /*
1674  * Validate the rss action.
1675  *
1676  * @param[in] action
1677  *   Pointer to the queue action.
1678  * @param[in] action_flags
1679  *   Bit-fields that holds the actions detected until now.
1680  * @param[in] dev
1681  *   Pointer to the Ethernet device structure.
1682  * @param[in] attr
1683  *   Attributes of flow that includes this action.
1684  * @param[in] item_flags
1685  *   Items that were detected.
1686  * @param[out] error
1687  *   Pointer to error structure.
1688  *
1689  * @return
1690  *   0 on success, a negative errno value otherwise and rte_errno is set.
1691  */
1692 int
1693 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1694                               uint64_t action_flags,
1695                               struct rte_eth_dev *dev,
1696                               const struct rte_flow_attr *attr,
1697                               uint64_t item_flags,
1698                               struct rte_flow_error *error)
1699 {
1700         const struct rte_flow_action_rss *rss = action->conf;
1701         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1702         int ret;
1703
1704         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1705                 return rte_flow_error_set(error, EINVAL,
1706                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1707                                           "can't have 2 fate actions"
1708                                           " in same flow");
1709         ret = mlx5_validate_action_rss(dev, action, error);
1710         if (ret)
1711                 return ret;
1712         if (attr->egress)
1713                 return rte_flow_error_set(error, ENOTSUP,
1714                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1715                                           "rss action not supported for "
1716                                           "egress");
1717         if (rss->level > 1 && !tunnel)
1718                 return rte_flow_error_set(error, EINVAL,
1719                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1720                                           "inner RSS is not supported for "
1721                                           "non-tunnel flows");
1722         if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
1723             !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
1724                 return rte_flow_error_set(error, EINVAL,
1725                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1726                                           "RSS on eCPRI is not supported now");
1727         }
1728         if ((item_flags & MLX5_FLOW_LAYER_MPLS) &&
1729             !(item_flags &
1730               (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3)) &&
1731             rss->level > 1)
1732                 return rte_flow_error_set(error, EINVAL,
1733                                           RTE_FLOW_ERROR_TYPE_ITEM, NULL,
1734                                           "MPLS inner RSS needs to specify inner L2/L3 items after MPLS in pattern");
1735         return 0;
1736 }
1737
1738 /*
1739  * Validate the default miss action.
1740  *
1741  * @param[in] action_flags
1742  *   Bit-fields that holds the actions detected until now.
1743  * @param[out] error
1744  *   Pointer to error structure.
1745  *
1746  * @return
1747  *   0 on success, a negative errno value otherwise and rte_errno is set.
1748  */
1749 int
1750 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
1751                                 const struct rte_flow_attr *attr,
1752                                 struct rte_flow_error *error)
1753 {
1754         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1755                 return rte_flow_error_set(error, EINVAL,
1756                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1757                                           "can't have 2 fate actions in"
1758                                           " same flow");
1759         if (attr->egress)
1760                 return rte_flow_error_set(error, ENOTSUP,
1761                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1762                                           "default miss action not supported "
1763                                           "for egress");
1764         if (attr->group)
1765                 return rte_flow_error_set(error, ENOTSUP,
1766                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
1767                                           "only group 0 is supported");
1768         if (attr->transfer)
1769                 return rte_flow_error_set(error, ENOTSUP,
1770                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1771                                           NULL, "transfer is not supported");
1772         return 0;
1773 }
1774
1775 /*
1776  * Validate the count action.
1777  *
1778  * @param[in] dev
1779  *   Pointer to the Ethernet device structure.
1780  * @param[in] attr
1781  *   Attributes of flow that includes this action.
1782  * @param[out] error
1783  *   Pointer to error structure.
1784  *
1785  * @return
1786  *   0 on success, a negative errno value otherwise and rte_errno is set.
1787  */
1788 int
1789 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1790                                 const struct rte_flow_attr *attr,
1791                                 struct rte_flow_error *error)
1792 {
1793         if (attr->egress)
1794                 return rte_flow_error_set(error, ENOTSUP,
1795                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1796                                           "count action not supported for "
1797                                           "egress");
1798         return 0;
1799 }
1800
1801 /*
1802  * Validate the ASO CT action.
1803  *
1804  * @param[in] dev
1805  *   Pointer to the Ethernet device structure.
1806  * @param[in] conntrack
1807  *   Pointer to the CT action profile.
1808  * @param[out] error
1809  *   Pointer to error structure.
1810  *
1811  * @return
1812  *   0 on success, a negative errno value otherwise and rte_errno is set.
1813  */
1814 int
1815 mlx5_validate_action_ct(struct rte_eth_dev *dev,
1816                         const struct rte_flow_action_conntrack *conntrack,
1817                         struct rte_flow_error *error)
1818 {
1819         RTE_SET_USED(dev);
1820
1821         if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
1822                 return rte_flow_error_set(error, EINVAL,
1823                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1824                                           "Invalid CT state");
1825         if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
1826                 return rte_flow_error_set(error, EINVAL,
1827                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1828                                           "Invalid last TCP packet flag");
1829         return 0;
1830 }
1831
1832 /**
1833  * Verify the @p attributes will be correctly understood by the NIC and store
1834  * them in the @p flow if everything is correct.
1835  *
1836  * @param[in] dev
1837  *   Pointer to the Ethernet device structure.
1838  * @param[in] attributes
1839  *   Pointer to flow attributes
1840  * @param[out] error
1841  *   Pointer to error structure.
1842  *
1843  * @return
1844  *   0 on success, a negative errno value otherwise and rte_errno is set.
1845  */
1846 int
1847 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
1848                               const struct rte_flow_attr *attributes,
1849                               struct rte_flow_error *error)
1850 {
1851         struct mlx5_priv *priv = dev->data->dev_private;
1852         uint32_t priority_max = priv->config.flow_prio - 1;
1853
1854         if (attributes->group)
1855                 return rte_flow_error_set(error, ENOTSUP,
1856                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1857                                           NULL, "groups is not supported");
1858         if (attributes->priority != MLX5_FLOW_LOWEST_PRIO_INDICATOR &&
1859             attributes->priority >= priority_max)
1860                 return rte_flow_error_set(error, ENOTSUP,
1861                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1862                                           NULL, "priority out of range");
1863         if (attributes->egress)
1864                 return rte_flow_error_set(error, ENOTSUP,
1865                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1866                                           "egress is not supported");
1867         if (attributes->transfer && !priv->config.dv_esw_en)
1868                 return rte_flow_error_set(error, ENOTSUP,
1869                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1870                                           NULL, "transfer is not supported");
1871         if (!attributes->ingress)
1872                 return rte_flow_error_set(error, EINVAL,
1873                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1874                                           NULL,
1875                                           "ingress attribute is mandatory");
1876         return 0;
1877 }
1878
1879 /**
1880  * Validate ICMP6 item.
1881  *
1882  * @param[in] item
1883  *   Item specification.
1884  * @param[in] item_flags
1885  *   Bit-fields that holds the items detected until now.
1886  * @param[in] ext_vlan_sup
1887  *   Whether extended VLAN features are supported or not.
1888  * @param[out] error
1889  *   Pointer to error structure.
1890  *
1891  * @return
1892  *   0 on success, a negative errno value otherwise and rte_errno is set.
1893  */
1894 int
1895 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
1896                                uint64_t item_flags,
1897                                uint8_t target_protocol,
1898                                struct rte_flow_error *error)
1899 {
1900         const struct rte_flow_item_icmp6 *mask = item->mask;
1901         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1902         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1903                                       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1904         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1905                                       MLX5_FLOW_LAYER_OUTER_L4;
1906         int ret;
1907
1908         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
1909                 return rte_flow_error_set(error, EINVAL,
1910                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1911                                           "protocol filtering not compatible"
1912                                           " with ICMP6 layer");
1913         if (!(item_flags & l3m))
1914                 return rte_flow_error_set(error, EINVAL,
1915                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1916                                           "IPv6 is mandatory to filter on"
1917                                           " ICMP6");
1918         if (item_flags & l4m)
1919                 return rte_flow_error_set(error, EINVAL,
1920                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1921                                           "multiple L4 layers not supported");
1922         if (!mask)
1923                 mask = &rte_flow_item_icmp6_mask;
1924         ret = mlx5_flow_item_acceptable
1925                 (item, (const uint8_t *)mask,
1926                  (const uint8_t *)&rte_flow_item_icmp6_mask,
1927                  sizeof(struct rte_flow_item_icmp6),
1928                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1929         if (ret < 0)
1930                 return ret;
1931         return 0;
1932 }
1933
1934 /**
1935  * Validate ICMP item.
1936  *
1937  * @param[in] item
1938  *   Item specification.
1939  * @param[in] item_flags
1940  *   Bit-fields that holds the items detected until now.
1941  * @param[out] error
1942  *   Pointer to error structure.
1943  *
1944  * @return
1945  *   0 on success, a negative errno value otherwise and rte_errno is set.
1946  */
1947 int
1948 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
1949                              uint64_t item_flags,
1950                              uint8_t target_protocol,
1951                              struct rte_flow_error *error)
1952 {
1953         const struct rte_flow_item_icmp *mask = item->mask;
1954         const struct rte_flow_item_icmp nic_mask = {
1955                 .hdr.icmp_type = 0xff,
1956                 .hdr.icmp_code = 0xff,
1957                 .hdr.icmp_ident = RTE_BE16(0xffff),
1958                 .hdr.icmp_seq_nb = RTE_BE16(0xffff),
1959         };
1960         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1961         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1962                                       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1963         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1964                                       MLX5_FLOW_LAYER_OUTER_L4;
1965         int ret;
1966
1967         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
1968                 return rte_flow_error_set(error, EINVAL,
1969                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1970                                           "protocol filtering not compatible"
1971                                           " with ICMP layer");
1972         if (!(item_flags & l3m))
1973                 return rte_flow_error_set(error, EINVAL,
1974                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1975                                           "IPv4 is mandatory to filter"
1976                                           " on ICMP");
1977         if (item_flags & l4m)
1978                 return rte_flow_error_set(error, EINVAL,
1979                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1980                                           "multiple L4 layers not supported");
1981         if (!mask)
1982                 mask = &nic_mask;
1983         ret = mlx5_flow_item_acceptable
1984                 (item, (const uint8_t *)mask,
1985                  (const uint8_t *)&nic_mask,
1986                  sizeof(struct rte_flow_item_icmp),
1987                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1988         if (ret < 0)
1989                 return ret;
1990         return 0;
1991 }
1992
1993 /**
1994  * Validate Ethernet item.
1995  *
1996  * @param[in] item
1997  *   Item specification.
1998  * @param[in] item_flags
1999  *   Bit-fields that holds the items detected until now.
2000  * @param[out] error
2001  *   Pointer to error structure.
2002  *
2003  * @return
2004  *   0 on success, a negative errno value otherwise and rte_errno is set.
2005  */
2006 int
2007 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
2008                             uint64_t item_flags, bool ext_vlan_sup,
2009                             struct rte_flow_error *error)
2010 {
2011         const struct rte_flow_item_eth *mask = item->mask;
2012         const struct rte_flow_item_eth nic_mask = {
2013                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2014                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2015                 .type = RTE_BE16(0xffff),
2016                 .has_vlan = ext_vlan_sup ? 1 : 0,
2017         };
2018         int ret;
2019         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2020         const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
2021                                        MLX5_FLOW_LAYER_OUTER_L2;
2022
2023         if (item_flags & ethm)
2024                 return rte_flow_error_set(error, ENOTSUP,
2025                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2026                                           "multiple L2 layers not supported");
2027         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
2028             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
2029                 return rte_flow_error_set(error, EINVAL,
2030                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2031                                           "L2 layer should not follow "
2032                                           "L3 layers");
2033         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
2034             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
2035                 return rte_flow_error_set(error, EINVAL,
2036                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2037                                           "L2 layer should not follow VLAN");
2038         if (item_flags & MLX5_FLOW_LAYER_GTP)
2039                 return rte_flow_error_set(error, EINVAL,
2040                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2041                                           "L2 layer should not follow GTP");
2042         if (!mask)
2043                 mask = &rte_flow_item_eth_mask;
2044         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2045                                         (const uint8_t *)&nic_mask,
2046                                         sizeof(struct rte_flow_item_eth),
2047                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2048         return ret;
2049 }
2050
2051 /**
2052  * Validate VLAN item.
2053  *
2054  * @param[in] item
2055  *   Item specification.
2056  * @param[in] item_flags
2057  *   Bit-fields that holds the items detected until now.
2058  * @param[in] dev
2059  *   Ethernet device flow is being created on.
2060  * @param[out] error
2061  *   Pointer to error structure.
2062  *
2063  * @return
2064  *   0 on success, a negative errno value otherwise and rte_errno is set.
2065  */
2066 int
2067 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
2068                              uint64_t item_flags,
2069                              struct rte_eth_dev *dev,
2070                              struct rte_flow_error *error)
2071 {
2072         const struct rte_flow_item_vlan *spec = item->spec;
2073         const struct rte_flow_item_vlan *mask = item->mask;
2074         const struct rte_flow_item_vlan nic_mask = {
2075                 .tci = RTE_BE16(UINT16_MAX),
2076                 .inner_type = RTE_BE16(UINT16_MAX),
2077         };
2078         uint16_t vlan_tag = 0;
2079         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2080         int ret;
2081         const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
2082                                         MLX5_FLOW_LAYER_INNER_L4) :
2083                                        (MLX5_FLOW_LAYER_OUTER_L3 |
2084                                         MLX5_FLOW_LAYER_OUTER_L4);
2085         const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
2086                                         MLX5_FLOW_LAYER_OUTER_VLAN;
2087
2088         if (item_flags & vlanm)
2089                 return rte_flow_error_set(error, EINVAL,
2090                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2091                                           "multiple VLAN layers not supported");
2092         else if ((item_flags & l34m) != 0)
2093                 return rte_flow_error_set(error, EINVAL,
2094                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2095                                           "VLAN cannot follow L3/L4 layer");
2096         if (!mask)
2097                 mask = &rte_flow_item_vlan_mask;
2098         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2099                                         (const uint8_t *)&nic_mask,
2100                                         sizeof(struct rte_flow_item_vlan),
2101                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2102         if (ret)
2103                 return ret;
2104         if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
2105                 struct mlx5_priv *priv = dev->data->dev_private;
2106
2107                 if (priv->vmwa_context) {
2108                         /*
2109                          * Non-NULL context means we have a virtual machine
2110                          * and SR-IOV enabled, we have to create VLAN interface
2111                          * to make hypervisor to setup E-Switch vport
2112                          * context correctly. We avoid creating the multiple
2113                          * VLAN interfaces, so we cannot support VLAN tag mask.
2114                          */
2115                         return rte_flow_error_set(error, EINVAL,
2116                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2117                                                   item,
2118                                                   "VLAN tag mask is not"
2119                                                   " supported in virtual"
2120                                                   " environment");
2121                 }
2122         }
2123         if (spec) {
2124                 vlan_tag = spec->tci;
2125                 vlan_tag &= mask->tci;
2126         }
2127         /*
2128          * From verbs perspective an empty VLAN is equivalent
2129          * to a packet without VLAN layer.
2130          */
2131         if (!vlan_tag)
2132                 return rte_flow_error_set(error, EINVAL,
2133                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2134                                           item->spec,
2135                                           "VLAN cannot be empty");
2136         return 0;
2137 }
2138
2139 /**
2140  * Validate IPV4 item.
2141  *
2142  * @param[in] item
2143  *   Item specification.
2144  * @param[in] item_flags
2145  *   Bit-fields that holds the items detected until now.
2146  * @param[in] last_item
2147  *   Previous validated item in the pattern items.
2148  * @param[in] ether_type
2149  *   Type in the ethernet layer header (including dot1q).
2150  * @param[in] acc_mask
2151  *   Acceptable mask, if NULL default internal default mask
2152  *   will be used to check whether item fields are supported.
2153  * @param[in] range_accepted
2154  *   True if range of values is accepted for specific fields, false otherwise.
2155  * @param[out] error
2156  *   Pointer to error structure.
2157  *
2158  * @return
2159  *   0 on success, a negative errno value otherwise and rte_errno is set.
2160  */
2161 int
2162 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2163                              uint64_t item_flags,
2164                              uint64_t last_item,
2165                              uint16_t ether_type,
2166                              const struct rte_flow_item_ipv4 *acc_mask,
2167                              bool range_accepted,
2168                              struct rte_flow_error *error)
2169 {
2170         const struct rte_flow_item_ipv4 *mask = item->mask;
2171         const struct rte_flow_item_ipv4 *spec = item->spec;
2172         const struct rte_flow_item_ipv4 nic_mask = {
2173                 .hdr = {
2174                         .src_addr = RTE_BE32(0xffffffff),
2175                         .dst_addr = RTE_BE32(0xffffffff),
2176                         .type_of_service = 0xff,
2177                         .next_proto_id = 0xff,
2178                 },
2179         };
2180         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2181         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2182                                       MLX5_FLOW_LAYER_OUTER_L3;
2183         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2184                                       MLX5_FLOW_LAYER_OUTER_L4;
2185         int ret;
2186         uint8_t next_proto = 0xFF;
2187         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2188                                   MLX5_FLOW_LAYER_OUTER_VLAN |
2189                                   MLX5_FLOW_LAYER_INNER_VLAN);
2190
2191         if ((last_item & l2_vlan) && ether_type &&
2192             ether_type != RTE_ETHER_TYPE_IPV4)
2193                 return rte_flow_error_set(error, EINVAL,
2194                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2195                                           "IPv4 cannot follow L2/VLAN layer "
2196                                           "which ether type is not IPv4");
2197         if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2198                 if (mask && spec)
2199                         next_proto = mask->hdr.next_proto_id &
2200                                      spec->hdr.next_proto_id;
2201                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2202                         return rte_flow_error_set(error, EINVAL,
2203                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2204                                                   item,
2205                                                   "multiple tunnel "
2206                                                   "not supported");
2207         }
2208         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
2209                 return rte_flow_error_set(error, EINVAL,
2210                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2211                                           "wrong tunnel type - IPv6 specified "
2212                                           "but IPv4 item provided");
2213         if (item_flags & l3m)
2214                 return rte_flow_error_set(error, ENOTSUP,
2215                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2216                                           "multiple L3 layers not supported");
2217         else if (item_flags & l4m)
2218                 return rte_flow_error_set(error, EINVAL,
2219                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2220                                           "L3 cannot follow an L4 layer.");
2221         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2222                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2223                 return rte_flow_error_set(error, EINVAL,
2224                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2225                                           "L3 cannot follow an NVGRE layer.");
2226         if (!mask)
2227                 mask = &rte_flow_item_ipv4_mask;
2228         else if (mask->hdr.next_proto_id != 0 &&
2229                  mask->hdr.next_proto_id != 0xff)
2230                 return rte_flow_error_set(error, EINVAL,
2231                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2232                                           "partial mask is not supported"
2233                                           " for protocol");
2234         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2235                                         acc_mask ? (const uint8_t *)acc_mask
2236                                                  : (const uint8_t *)&nic_mask,
2237                                         sizeof(struct rte_flow_item_ipv4),
2238                                         range_accepted, error);
2239         if (ret < 0)
2240                 return ret;
2241         return 0;
2242 }
2243
2244 /**
2245  * Validate IPV6 item.
2246  *
2247  * @param[in] item
2248  *   Item specification.
2249  * @param[in] item_flags
2250  *   Bit-fields that holds the items detected until now.
2251  * @param[in] last_item
2252  *   Previous validated item in the pattern items.
2253  * @param[in] ether_type
2254  *   Type in the ethernet layer header (including dot1q).
2255  * @param[in] acc_mask
2256  *   Acceptable mask, if NULL default internal default mask
2257  *   will be used to check whether item fields are supported.
2258  * @param[out] error
2259  *   Pointer to error structure.
2260  *
2261  * @return
2262  *   0 on success, a negative errno value otherwise and rte_errno is set.
2263  */
2264 int
2265 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2266                              uint64_t item_flags,
2267                              uint64_t last_item,
2268                              uint16_t ether_type,
2269                              const struct rte_flow_item_ipv6 *acc_mask,
2270                              struct rte_flow_error *error)
2271 {
2272         const struct rte_flow_item_ipv6 *mask = item->mask;
2273         const struct rte_flow_item_ipv6 *spec = item->spec;
2274         const struct rte_flow_item_ipv6 nic_mask = {
2275                 .hdr = {
2276                         .src_addr =
2277                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2278                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2279                         .dst_addr =
2280                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2281                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2282                         .vtc_flow = RTE_BE32(0xffffffff),
2283                         .proto = 0xff,
2284                 },
2285         };
2286         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2287         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2288                                       MLX5_FLOW_LAYER_OUTER_L3;
2289         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2290                                       MLX5_FLOW_LAYER_OUTER_L4;
2291         int ret;
2292         uint8_t next_proto = 0xFF;
2293         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2294                                   MLX5_FLOW_LAYER_OUTER_VLAN |
2295                                   MLX5_FLOW_LAYER_INNER_VLAN);
2296
2297         if ((last_item & l2_vlan) && ether_type &&
2298             ether_type != RTE_ETHER_TYPE_IPV6)
2299                 return rte_flow_error_set(error, EINVAL,
2300                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2301                                           "IPv6 cannot follow L2/VLAN layer "
2302                                           "which ether type is not IPv6");
2303         if (mask && mask->hdr.proto == UINT8_MAX && spec)
2304                 next_proto = spec->hdr.proto;
2305         if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2306                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2307                         return rte_flow_error_set(error, EINVAL,
2308                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2309                                                   item,
2310                                                   "multiple tunnel "
2311                                                   "not supported");
2312         }
2313         if (next_proto == IPPROTO_HOPOPTS  ||
2314             next_proto == IPPROTO_ROUTING  ||
2315             next_proto == IPPROTO_FRAGMENT ||
2316             next_proto == IPPROTO_ESP      ||
2317             next_proto == IPPROTO_AH       ||
2318             next_proto == IPPROTO_DSTOPTS)
2319                 return rte_flow_error_set(error, EINVAL,
2320                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2321                                           "IPv6 proto (next header) should "
2322                                           "not be set as extension header");
2323         if (item_flags & MLX5_FLOW_LAYER_IPIP)
2324                 return rte_flow_error_set(error, EINVAL,
2325                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2326                                           "wrong tunnel type - IPv4 specified "
2327                                           "but IPv6 item provided");
2328         if (item_flags & l3m)
2329                 return rte_flow_error_set(error, ENOTSUP,
2330                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2331                                           "multiple L3 layers not supported");
2332         else if (item_flags & l4m)
2333                 return rte_flow_error_set(error, EINVAL,
2334                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2335                                           "L3 cannot follow an L4 layer.");
2336         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2337                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2338                 return rte_flow_error_set(error, EINVAL,
2339                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2340                                           "L3 cannot follow an NVGRE layer.");
2341         if (!mask)
2342                 mask = &rte_flow_item_ipv6_mask;
2343         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2344                                         acc_mask ? (const uint8_t *)acc_mask
2345                                                  : (const uint8_t *)&nic_mask,
2346                                         sizeof(struct rte_flow_item_ipv6),
2347                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2348         if (ret < 0)
2349                 return ret;
2350         return 0;
2351 }
2352
2353 /**
2354  * Validate UDP item.
2355  *
2356  * @param[in] item
2357  *   Item specification.
2358  * @param[in] item_flags
2359  *   Bit-fields that holds the items detected until now.
2360  * @param[in] target_protocol
2361  *   The next protocol in the previous item.
2362  * @param[in] flow_mask
2363  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2364  * @param[out] error
2365  *   Pointer to error structure.
2366  *
2367  * @return
2368  *   0 on success, a negative errno value otherwise and rte_errno is set.
2369  */
2370 int
2371 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2372                             uint64_t item_flags,
2373                             uint8_t target_protocol,
2374                             struct rte_flow_error *error)
2375 {
2376         const struct rte_flow_item_udp *mask = item->mask;
2377         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2378         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2379                                       MLX5_FLOW_LAYER_OUTER_L3;
2380         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2381                                       MLX5_FLOW_LAYER_OUTER_L4;
2382         int ret;
2383
2384         if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2385                 return rte_flow_error_set(error, EINVAL,
2386                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2387                                           "protocol filtering not compatible"
2388                                           " with UDP layer");
2389         if (!(item_flags & l3m))
2390                 return rte_flow_error_set(error, EINVAL,
2391                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2392                                           "L3 is mandatory to filter on L4");
2393         if (item_flags & l4m)
2394                 return rte_flow_error_set(error, EINVAL,
2395                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2396                                           "multiple L4 layers not supported");
2397         if (!mask)
2398                 mask = &rte_flow_item_udp_mask;
2399         ret = mlx5_flow_item_acceptable
2400                 (item, (const uint8_t *)mask,
2401                  (const uint8_t *)&rte_flow_item_udp_mask,
2402                  sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2403                  error);
2404         if (ret < 0)
2405                 return ret;
2406         return 0;
2407 }
2408
2409 /**
2410  * Validate TCP item.
2411  *
2412  * @param[in] item
2413  *   Item specification.
2414  * @param[in] item_flags
2415  *   Bit-fields that holds the items detected until now.
2416  * @param[in] target_protocol
2417  *   The next protocol in the previous item.
2418  * @param[out] error
2419  *   Pointer to error structure.
2420  *
2421  * @return
2422  *   0 on success, a negative errno value otherwise and rte_errno is set.
2423  */
2424 int
2425 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2426                             uint64_t item_flags,
2427                             uint8_t target_protocol,
2428                             const struct rte_flow_item_tcp *flow_mask,
2429                             struct rte_flow_error *error)
2430 {
2431         const struct rte_flow_item_tcp *mask = item->mask;
2432         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2433         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2434                                       MLX5_FLOW_LAYER_OUTER_L3;
2435         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2436                                       MLX5_FLOW_LAYER_OUTER_L4;
2437         int ret;
2438
2439         MLX5_ASSERT(flow_mask);
2440         if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2441                 return rte_flow_error_set(error, EINVAL,
2442                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2443                                           "protocol filtering not compatible"
2444                                           " with TCP layer");
2445         if (!(item_flags & l3m))
2446                 return rte_flow_error_set(error, EINVAL,
2447                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2448                                           "L3 is mandatory to filter on L4");
2449         if (item_flags & l4m)
2450                 return rte_flow_error_set(error, EINVAL,
2451                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2452                                           "multiple L4 layers not supported");
2453         if (!mask)
2454                 mask = &rte_flow_item_tcp_mask;
2455         ret = mlx5_flow_item_acceptable
2456                 (item, (const uint8_t *)mask,
2457                  (const uint8_t *)flow_mask,
2458                  sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2459                  error);
2460         if (ret < 0)
2461                 return ret;
2462         return 0;
2463 }
2464
2465 /**
2466  * Validate VXLAN item.
2467  *
2468  * @param[in] dev
2469  *   Pointer to the Ethernet device structure.
2470  * @param[in] udp_dport
2471  *   UDP destination port
2472  * @param[in] item
2473  *   Item specification.
2474  * @param[in] item_flags
2475  *   Bit-fields that holds the items detected until now.
2476  * @param[in] attr
2477  *   Flow rule attributes.
2478  * @param[out] error
2479  *   Pointer to error structure.
2480  *
2481  * @return
2482  *   0 on success, a negative errno value otherwise and rte_errno is set.
2483  */
2484 int
2485 mlx5_flow_validate_item_vxlan(struct rte_eth_dev *dev,
2486                               uint16_t udp_dport,
2487                               const struct rte_flow_item *item,
2488                               uint64_t item_flags,
2489                               const struct rte_flow_attr *attr,
2490                               struct rte_flow_error *error)
2491 {
2492         const struct rte_flow_item_vxlan *spec = item->spec;
2493         const struct rte_flow_item_vxlan *mask = item->mask;
2494         int ret;
2495         struct mlx5_priv *priv = dev->data->dev_private;
2496         union vni {
2497                 uint32_t vlan_id;
2498                 uint8_t vni[4];
2499         } id = { .vlan_id = 0, };
2500         const struct rte_flow_item_vxlan nic_mask = {
2501                 .vni = "\xff\xff\xff",
2502                 .rsvd1 = 0xff,
2503         };
2504         const struct rte_flow_item_vxlan *valid_mask;
2505
2506         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2507                 return rte_flow_error_set(error, ENOTSUP,
2508                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2509                                           "multiple tunnel layers not"
2510                                           " supported");
2511         valid_mask = &rte_flow_item_vxlan_mask;
2512         /*
2513          * Verify only UDPv4 is present as defined in
2514          * https://tools.ietf.org/html/rfc7348
2515          */
2516         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2517                 return rte_flow_error_set(error, EINVAL,
2518                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2519                                           "no outer UDP layer found");
2520         if (!mask)
2521                 mask = &rte_flow_item_vxlan_mask;
2522
2523         if (priv->sh->steering_format_version !=
2524             MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
2525             !udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN) {
2526                 /* FDB domain & NIC domain non-zero group */
2527                 if ((attr->transfer || attr->group) && priv->sh->misc5_cap)
2528                         valid_mask = &nic_mask;
2529                 /* Group zero in NIC domain */
2530                 if (!attr->group && !attr->transfer &&
2531                     priv->sh->tunnel_header_0_1)
2532                         valid_mask = &nic_mask;
2533         }
2534         ret = mlx5_flow_item_acceptable
2535                 (item, (const uint8_t *)mask,
2536                  (const uint8_t *)valid_mask,
2537                  sizeof(struct rte_flow_item_vxlan),
2538                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2539         if (ret < 0)
2540                 return ret;
2541         if (spec) {
2542                 memcpy(&id.vni[1], spec->vni, 3);
2543                 memcpy(&id.vni[1], mask->vni, 3);
2544         }
2545         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2546                 return rte_flow_error_set(error, ENOTSUP,
2547                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2548                                           "VXLAN tunnel must be fully defined");
2549         return 0;
2550 }
2551
2552 /**
2553  * Validate VXLAN_GPE item.
2554  *
2555  * @param[in] item
2556  *   Item specification.
2557  * @param[in] item_flags
2558  *   Bit-fields that holds the items detected until now.
2559  * @param[in] priv
2560  *   Pointer to the private data structure.
2561  * @param[in] target_protocol
2562  *   The next protocol in the previous item.
2563  * @param[out] error
2564  *   Pointer to error structure.
2565  *
2566  * @return
2567  *   0 on success, a negative errno value otherwise and rte_errno is set.
2568  */
2569 int
2570 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
2571                                   uint64_t item_flags,
2572                                   struct rte_eth_dev *dev,
2573                                   struct rte_flow_error *error)
2574 {
2575         struct mlx5_priv *priv = dev->data->dev_private;
2576         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
2577         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
2578         int ret;
2579         union vni {
2580                 uint32_t vlan_id;
2581                 uint8_t vni[4];
2582         } id = { .vlan_id = 0, };
2583
2584         if (!priv->config.l3_vxlan_en)
2585                 return rte_flow_error_set(error, ENOTSUP,
2586                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2587                                           "L3 VXLAN is not enabled by device"
2588                                           " parameter and/or not configured in"
2589                                           " firmware");
2590         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2591                 return rte_flow_error_set(error, ENOTSUP,
2592                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2593                                           "multiple tunnel layers not"
2594                                           " supported");
2595         /*
2596          * Verify only UDPv4 is present as defined in
2597          * https://tools.ietf.org/html/rfc7348
2598          */
2599         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2600                 return rte_flow_error_set(error, EINVAL,
2601                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2602                                           "no outer UDP layer found");
2603         if (!mask)
2604                 mask = &rte_flow_item_vxlan_gpe_mask;
2605         ret = mlx5_flow_item_acceptable
2606                 (item, (const uint8_t *)mask,
2607                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2608                  sizeof(struct rte_flow_item_vxlan_gpe),
2609                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2610         if (ret < 0)
2611                 return ret;
2612         if (spec) {
2613                 if (spec->protocol)
2614                         return rte_flow_error_set(error, ENOTSUP,
2615                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2616                                                   item,
2617                                                   "VxLAN-GPE protocol"
2618                                                   " not supported");
2619                 memcpy(&id.vni[1], spec->vni, 3);
2620                 memcpy(&id.vni[1], mask->vni, 3);
2621         }
2622         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2623                 return rte_flow_error_set(error, ENOTSUP,
2624                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2625                                           "VXLAN-GPE tunnel must be fully"
2626                                           " defined");
2627         return 0;
2628 }
2629 /**
2630  * Validate GRE Key item.
2631  *
2632  * @param[in] item
2633  *   Item specification.
2634  * @param[in] item_flags
2635  *   Bit flags to mark detected items.
2636  * @param[in] gre_item
2637  *   Pointer to gre_item
2638  * @param[out] error
2639  *   Pointer to error structure.
2640  *
2641  * @return
2642  *   0 on success, a negative errno value otherwise and rte_errno is set.
2643  */
2644 int
2645 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2646                                 uint64_t item_flags,
2647                                 const struct rte_flow_item *gre_item,
2648                                 struct rte_flow_error *error)
2649 {
2650         const rte_be32_t *mask = item->mask;
2651         int ret = 0;
2652         rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2653         const struct rte_flow_item_gre *gre_spec;
2654         const struct rte_flow_item_gre *gre_mask;
2655
2656         if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2657                 return rte_flow_error_set(error, ENOTSUP,
2658                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2659                                           "Multiple GRE key not support");
2660         if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2661                 return rte_flow_error_set(error, ENOTSUP,
2662                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2663                                           "No preceding GRE header");
2664         if (item_flags & MLX5_FLOW_LAYER_INNER)
2665                 return rte_flow_error_set(error, ENOTSUP,
2666                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2667                                           "GRE key following a wrong item");
2668         gre_mask = gre_item->mask;
2669         if (!gre_mask)
2670                 gre_mask = &rte_flow_item_gre_mask;
2671         gre_spec = gre_item->spec;
2672         if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2673                          !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2674                 return rte_flow_error_set(error, EINVAL,
2675                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2676                                           "Key bit must be on");
2677
2678         if (!mask)
2679                 mask = &gre_key_default_mask;
2680         ret = mlx5_flow_item_acceptable
2681                 (item, (const uint8_t *)mask,
2682                  (const uint8_t *)&gre_key_default_mask,
2683                  sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2684         return ret;
2685 }
2686
2687 /**
2688  * Validate GRE item.
2689  *
2690  * @param[in] item
2691  *   Item specification.
2692  * @param[in] item_flags
2693  *   Bit flags to mark detected items.
2694  * @param[in] target_protocol
2695  *   The next protocol in the previous item.
2696  * @param[out] error
2697  *   Pointer to error structure.
2698  *
2699  * @return
2700  *   0 on success, a negative errno value otherwise and rte_errno is set.
2701  */
2702 int
2703 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
2704                             uint64_t item_flags,
2705                             uint8_t target_protocol,
2706                             struct rte_flow_error *error)
2707 {
2708         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
2709         const struct rte_flow_item_gre *mask = item->mask;
2710         int ret;
2711         const struct rte_flow_item_gre nic_mask = {
2712                 .c_rsvd0_ver = RTE_BE16(0xB000),
2713                 .protocol = RTE_BE16(UINT16_MAX),
2714         };
2715
2716         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2717                 return rte_flow_error_set(error, EINVAL,
2718                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2719                                           "protocol filtering not compatible"
2720                                           " with this GRE layer");
2721         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2722                 return rte_flow_error_set(error, ENOTSUP,
2723                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2724                                           "multiple tunnel layers not"
2725                                           " supported");
2726         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2727                 return rte_flow_error_set(error, ENOTSUP,
2728                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2729                                           "L3 Layer is missing");
2730         if (!mask)
2731                 mask = &rte_flow_item_gre_mask;
2732         ret = mlx5_flow_item_acceptable
2733                 (item, (const uint8_t *)mask,
2734                  (const uint8_t *)&nic_mask,
2735                  sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2736                  error);
2737         if (ret < 0)
2738                 return ret;
2739 #ifndef HAVE_MLX5DV_DR
2740 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2741         if (spec && (spec->protocol & mask->protocol))
2742                 return rte_flow_error_set(error, ENOTSUP,
2743                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2744                                           "without MPLS support the"
2745                                           " specification cannot be used for"
2746                                           " filtering");
2747 #endif
2748 #endif
2749         return 0;
2750 }
2751
2752 /**
2753  * Validate Geneve item.
2754  *
2755  * @param[in] item
2756  *   Item specification.
2757  * @param[in] itemFlags
2758  *   Bit-fields that holds the items detected until now.
2759  * @param[in] enPriv
2760  *   Pointer to the private data structure.
2761  * @param[out] error
2762  *   Pointer to error structure.
2763  *
2764  * @return
2765  *   0 on success, a negative errno value otherwise and rte_errno is set.
2766  */
2767
2768 int
2769 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
2770                                uint64_t item_flags,
2771                                struct rte_eth_dev *dev,
2772                                struct rte_flow_error *error)
2773 {
2774         struct mlx5_priv *priv = dev->data->dev_private;
2775         const struct rte_flow_item_geneve *spec = item->spec;
2776         const struct rte_flow_item_geneve *mask = item->mask;
2777         int ret;
2778         uint16_t gbhdr;
2779         uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
2780                           MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2781         const struct rte_flow_item_geneve nic_mask = {
2782                 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2783                 .vni = "\xff\xff\xff",
2784                 .protocol = RTE_BE16(UINT16_MAX),
2785         };
2786
2787         if (!priv->config.hca_attr.tunnel_stateless_geneve_rx)
2788                 return rte_flow_error_set(error, ENOTSUP,
2789                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2790                                           "L3 Geneve is not enabled by device"
2791                                           " parameter and/or not configured in"
2792                                           " firmware");
2793         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2794                 return rte_flow_error_set(error, ENOTSUP,
2795                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2796                                           "multiple tunnel layers not"
2797                                           " supported");
2798         /*
2799          * Verify only UDPv4 is present as defined in
2800          * https://tools.ietf.org/html/rfc7348
2801          */
2802         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2803                 return rte_flow_error_set(error, EINVAL,
2804                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2805                                           "no outer UDP layer found");
2806         if (!mask)
2807                 mask = &rte_flow_item_geneve_mask;
2808         ret = mlx5_flow_item_acceptable
2809                                   (item, (const uint8_t *)mask,
2810                                    (const uint8_t *)&nic_mask,
2811                                    sizeof(struct rte_flow_item_geneve),
2812                                    MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2813         if (ret)
2814                 return ret;
2815         if (spec) {
2816                 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2817                 if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2818                      MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2819                      MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2820                         return rte_flow_error_set(error, ENOTSUP,
2821                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2822                                                   item,
2823                                                   "Geneve protocol unsupported"
2824                                                   " fields are being used");
2825                 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2826                         return rte_flow_error_set
2827                                         (error, ENOTSUP,
2828                                          RTE_FLOW_ERROR_TYPE_ITEM,
2829                                          item,
2830                                          "Unsupported Geneve options length");
2831         }
2832         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2833                 return rte_flow_error_set
2834                                     (error, ENOTSUP,
2835                                      RTE_FLOW_ERROR_TYPE_ITEM, item,
2836                                      "Geneve tunnel must be fully defined");
2837         return 0;
2838 }
2839
2840 /**
2841  * Validate Geneve TLV option item.
2842  *
2843  * @param[in] item
2844  *   Item specification.
2845  * @param[in] last_item
2846  *   Previous validated item in the pattern items.
2847  * @param[in] geneve_item
2848  *   Previous GENEVE item specification.
2849  * @param[in] dev
2850  *   Pointer to the rte_eth_dev structure.
2851  * @param[out] error
2852  *   Pointer to error structure.
2853  *
2854  * @return
2855  *   0 on success, a negative errno value otherwise and rte_errno is set.
2856  */
2857 int
2858 mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item,
2859                                    uint64_t last_item,
2860                                    const struct rte_flow_item *geneve_item,
2861                                    struct rte_eth_dev *dev,
2862                                    struct rte_flow_error *error)
2863 {
2864         struct mlx5_priv *priv = dev->data->dev_private;
2865         struct mlx5_dev_ctx_shared *sh = priv->sh;
2866         struct mlx5_geneve_tlv_option_resource *geneve_opt_resource;
2867         struct mlx5_hca_attr *hca_attr = &priv->config.hca_attr;
2868         uint8_t data_max_supported =
2869                         hca_attr->max_geneve_tlv_option_data_len * 4;
2870         struct mlx5_dev_config *config = &priv->config;
2871         const struct rte_flow_item_geneve *geneve_spec;
2872         const struct rte_flow_item_geneve *geneve_mask;
2873         const struct rte_flow_item_geneve_opt *spec = item->spec;
2874         const struct rte_flow_item_geneve_opt *mask = item->mask;
2875         unsigned int i;
2876         unsigned int data_len;
2877         uint8_t tlv_option_len;
2878         uint16_t optlen_m, optlen_v;
2879         const struct rte_flow_item_geneve_opt full_mask = {
2880                 .option_class = RTE_BE16(0xffff),
2881                 .option_type = 0xff,
2882                 .option_len = 0x1f,
2883         };
2884
2885         if (!mask)
2886                 mask = &rte_flow_item_geneve_opt_mask;
2887         if (!spec)
2888                 return rte_flow_error_set
2889                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2890                         "Geneve TLV opt class/type/length must be specified");
2891         if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK)
2892                 return rte_flow_error_set
2893                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2894                         "Geneve TLV opt length exceeeds the limit (31)");
2895         /* Check if class type and length masks are full. */
2896         if (full_mask.option_class != mask->option_class ||
2897             full_mask.option_type != mask->option_type ||
2898             full_mask.option_len != (mask->option_len & full_mask.option_len))
2899                 return rte_flow_error_set
2900                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2901                         "Geneve TLV opt class/type/length masks must be full");
2902         /* Check if length is supported */
2903         if ((uint32_t)spec->option_len >
2904                         config->hca_attr.max_geneve_tlv_option_data_len)
2905                 return rte_flow_error_set
2906                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2907                         "Geneve TLV opt length not supported");
2908         if (config->hca_attr.max_geneve_tlv_options > 1)
2909                 DRV_LOG(DEBUG,
2910                         "max_geneve_tlv_options supports more than 1 option");
2911         /* Check GENEVE item preceding. */
2912         if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE))
2913                 return rte_flow_error_set
2914                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2915                         "Geneve opt item must be preceded with Geneve item");
2916         geneve_spec = geneve_item->spec;
2917         geneve_mask = geneve_item->mask ? geneve_item->mask :
2918                                           &rte_flow_item_geneve_mask;
2919         /* Check if GENEVE TLV option size doesn't exceed option length */
2920         if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 ||
2921                             geneve_spec->ver_opt_len_o_c_rsvd0)) {
2922                 tlv_option_len = spec->option_len & mask->option_len;
2923                 optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0);
2924                 optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v);
2925                 optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0);
2926                 optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m);
2927                 if ((optlen_v & optlen_m) <= tlv_option_len)
2928                         return rte_flow_error_set
2929                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2930                                  "GENEVE TLV option length exceeds optlen");
2931         }
2932         /* Check if length is 0 or data is 0. */
2933         if (spec->data == NULL || spec->option_len == 0)
2934                 return rte_flow_error_set
2935                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2936                         "Geneve TLV opt with zero data/length not supported");
2937         /* Check not all data & mask are 0. */
2938         data_len = spec->option_len * 4;
2939         if (mask->data == NULL) {
2940                 for (i = 0; i < data_len; i++)
2941                         if (spec->data[i])
2942                                 break;
2943                 if (i == data_len)
2944                         return rte_flow_error_set(error, ENOTSUP,
2945                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
2946                                 "Can't match on Geneve option data 0");
2947         } else {
2948                 for (i = 0; i < data_len; i++)
2949                         if (spec->data[i] & mask->data[i])
2950                                 break;
2951                 if (i == data_len)
2952                         return rte_flow_error_set(error, ENOTSUP,
2953                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
2954                                 "Can't match on Geneve option data and mask 0");
2955                 /* Check data mask supported. */
2956                 for (i = data_max_supported; i < data_len ; i++)
2957                         if (mask->data[i])
2958                                 return rte_flow_error_set(error, ENOTSUP,
2959                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
2960                                         "Data mask is of unsupported size");
2961         }
2962         /* Check GENEVE option is supported in NIC. */
2963         if (!config->hca_attr.geneve_tlv_opt)
2964                 return rte_flow_error_set
2965                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2966                         "Geneve TLV opt not supported");
2967         /* Check if we already have geneve option with different type/class. */
2968         rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
2969         geneve_opt_resource = sh->geneve_tlv_option_resource;
2970         if (geneve_opt_resource != NULL)
2971                 if (geneve_opt_resource->option_class != spec->option_class ||
2972                     geneve_opt_resource->option_type != spec->option_type ||
2973                     geneve_opt_resource->length != spec->option_len) {
2974                         rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
2975                         return rte_flow_error_set(error, ENOTSUP,
2976                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
2977                                 "Only one Geneve TLV option supported");
2978                 }
2979         rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
2980         return 0;
2981 }
2982
2983 /**
2984  * Validate MPLS item.
2985  *
2986  * @param[in] dev
2987  *   Pointer to the rte_eth_dev structure.
2988  * @param[in] item
2989  *   Item specification.
2990  * @param[in] item_flags
2991  *   Bit-fields that holds the items detected until now.
2992  * @param[in] prev_layer
2993  *   The protocol layer indicated in previous item.
2994  * @param[out] error
2995  *   Pointer to error structure.
2996  *
2997  * @return
2998  *   0 on success, a negative errno value otherwise and rte_errno is set.
2999  */
3000 int
3001 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
3002                              const struct rte_flow_item *item __rte_unused,
3003                              uint64_t item_flags __rte_unused,
3004                              uint64_t prev_layer __rte_unused,
3005                              struct rte_flow_error *error)
3006 {
3007 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
3008         const struct rte_flow_item_mpls *mask = item->mask;
3009         struct mlx5_priv *priv = dev->data->dev_private;
3010         int ret;
3011
3012         if (!priv->config.mpls_en)
3013                 return rte_flow_error_set(error, ENOTSUP,
3014                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3015                                           "MPLS not supported or"
3016                                           " disabled in firmware"
3017                                           " configuration.");
3018         /* MPLS over UDP, GRE is allowed */
3019         if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L4_UDP |
3020                             MLX5_FLOW_LAYER_GRE |
3021                             MLX5_FLOW_LAYER_GRE_KEY)))
3022                 return rte_flow_error_set(error, EINVAL,
3023                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3024                                           "protocol filtering not compatible"
3025                                           " with MPLS layer");
3026         /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
3027         if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
3028             !(item_flags & MLX5_FLOW_LAYER_GRE))
3029                 return rte_flow_error_set(error, ENOTSUP,
3030                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3031                                           "multiple tunnel layers not"
3032                                           " supported");
3033         if (!mask)
3034                 mask = &rte_flow_item_mpls_mask;
3035         ret = mlx5_flow_item_acceptable
3036                 (item, (const uint8_t *)mask,
3037                  (const uint8_t *)&rte_flow_item_mpls_mask,
3038                  sizeof(struct rte_flow_item_mpls),
3039                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3040         if (ret < 0)
3041                 return ret;
3042         return 0;
3043 #else
3044         return rte_flow_error_set(error, ENOTSUP,
3045                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
3046                                   "MPLS is not supported by Verbs, please"
3047                                   " update.");
3048 #endif
3049 }
3050
3051 /**
3052  * Validate NVGRE item.
3053  *
3054  * @param[in] item
3055  *   Item specification.
3056  * @param[in] item_flags
3057  *   Bit flags to mark detected items.
3058  * @param[in] target_protocol
3059  *   The next protocol in the previous item.
3060  * @param[out] error
3061  *   Pointer to error structure.
3062  *
3063  * @return
3064  *   0 on success, a negative errno value otherwise and rte_errno is set.
3065  */
3066 int
3067 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
3068                               uint64_t item_flags,
3069                               uint8_t target_protocol,
3070                               struct rte_flow_error *error)
3071 {
3072         const struct rte_flow_item_nvgre *mask = item->mask;
3073         int ret;
3074
3075         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3076                 return rte_flow_error_set(error, EINVAL,
3077                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3078                                           "protocol filtering not compatible"
3079                                           " with this GRE layer");
3080         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3081                 return rte_flow_error_set(error, ENOTSUP,
3082                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3083                                           "multiple tunnel layers not"
3084                                           " supported");
3085         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3086                 return rte_flow_error_set(error, ENOTSUP,
3087                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3088                                           "L3 Layer is missing");
3089         if (!mask)
3090                 mask = &rte_flow_item_nvgre_mask;
3091         ret = mlx5_flow_item_acceptable
3092                 (item, (const uint8_t *)mask,
3093                  (const uint8_t *)&rte_flow_item_nvgre_mask,
3094                  sizeof(struct rte_flow_item_nvgre),
3095                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3096         if (ret < 0)
3097                 return ret;
3098         return 0;
3099 }
3100
3101 /**
3102  * Validate eCPRI item.
3103  *
3104  * @param[in] item
3105  *   Item specification.
3106  * @param[in] item_flags
3107  *   Bit-fields that holds the items detected until now.
3108  * @param[in] last_item
3109  *   Previous validated item in the pattern items.
3110  * @param[in] ether_type
3111  *   Type in the ethernet layer header (including dot1q).
3112  * @param[in] acc_mask
3113  *   Acceptable mask, if NULL default internal default mask
3114  *   will be used to check whether item fields are supported.
3115  * @param[out] error
3116  *   Pointer to error structure.
3117  *
3118  * @return
3119  *   0 on success, a negative errno value otherwise and rte_errno is set.
3120  */
3121 int
3122 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
3123                               uint64_t item_flags,
3124                               uint64_t last_item,
3125                               uint16_t ether_type,
3126                               const struct rte_flow_item_ecpri *acc_mask,
3127                               struct rte_flow_error *error)
3128 {
3129         const struct rte_flow_item_ecpri *mask = item->mask;
3130         const struct rte_flow_item_ecpri nic_mask = {
3131                 .hdr = {
3132                         .common = {
3133                                 .u32 =
3134                                 RTE_BE32(((const struct rte_ecpri_common_hdr) {
3135                                         .type = 0xFF,
3136                                         }).u32),
3137                         },
3138                         .dummy[0] = 0xFFFFFFFF,
3139                 },
3140         };
3141         const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
3142                                         MLX5_FLOW_LAYER_OUTER_VLAN);
3143         struct rte_flow_item_ecpri mask_lo;
3144
3145         if (!(last_item & outer_l2_vlan) &&
3146             last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP)
3147                 return rte_flow_error_set(error, EINVAL,
3148                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3149                                           "eCPRI can only follow L2/VLAN layer or UDP layer");
3150         if ((last_item & outer_l2_vlan) && ether_type &&
3151             ether_type != RTE_ETHER_TYPE_ECPRI)
3152                 return rte_flow_error_set(error, EINVAL,
3153                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3154                                           "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE");
3155         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3156                 return rte_flow_error_set(error, EINVAL,
3157                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3158                                           "eCPRI with tunnel is not supported right now");
3159         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
3160                 return rte_flow_error_set(error, ENOTSUP,
3161                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3162                                           "multiple L3 layers not supported");
3163         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
3164                 return rte_flow_error_set(error, EINVAL,
3165                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3166                                           "eCPRI cannot coexist with a TCP layer");
3167         /* In specification, eCPRI could be over UDP layer. */
3168         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
3169                 return rte_flow_error_set(error, EINVAL,
3170                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3171                                           "eCPRI over UDP layer is not yet supported right now");
3172         /* Mask for type field in common header could be zero. */
3173         if (!mask)
3174                 mask = &rte_flow_item_ecpri_mask;
3175         mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
3176         /* Input mask is in big-endian format. */
3177         if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
3178                 return rte_flow_error_set(error, EINVAL,
3179                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3180                                           "partial mask is not supported for protocol");
3181         else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
3182                 return rte_flow_error_set(error, EINVAL,
3183                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3184                                           "message header mask must be after a type mask");
3185         return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
3186                                          acc_mask ? (const uint8_t *)acc_mask
3187                                                   : (const uint8_t *)&nic_mask,
3188                                          sizeof(struct rte_flow_item_ecpri),
3189                                          MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3190 }
3191
3192 static int
3193 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
3194                    const struct rte_flow_attr *attr __rte_unused,
3195                    const struct rte_flow_item items[] __rte_unused,
3196                    const struct rte_flow_action actions[] __rte_unused,
3197                    bool external __rte_unused,
3198                    int hairpin __rte_unused,
3199                    struct rte_flow_error *error)
3200 {
3201         return rte_flow_error_set(error, ENOTSUP,
3202                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3203 }
3204
3205 static struct mlx5_flow *
3206 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
3207                   const struct rte_flow_attr *attr __rte_unused,
3208                   const struct rte_flow_item items[] __rte_unused,
3209                   const struct rte_flow_action actions[] __rte_unused,
3210                   struct rte_flow_error *error)
3211 {
3212         rte_flow_error_set(error, ENOTSUP,
3213                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3214         return NULL;
3215 }
3216
3217 static int
3218 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
3219                     struct mlx5_flow *dev_flow __rte_unused,
3220                     const struct rte_flow_attr *attr __rte_unused,
3221                     const struct rte_flow_item items[] __rte_unused,
3222                     const struct rte_flow_action actions[] __rte_unused,
3223                     struct rte_flow_error *error)
3224 {
3225         return rte_flow_error_set(error, ENOTSUP,
3226                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3227 }
3228
3229 static int
3230 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
3231                 struct rte_flow *flow __rte_unused,
3232                 struct rte_flow_error *error)
3233 {
3234         return rte_flow_error_set(error, ENOTSUP,
3235                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3236 }
3237
3238 static void
3239 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
3240                  struct rte_flow *flow __rte_unused)
3241 {
3242 }
3243
3244 static void
3245 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
3246                   struct rte_flow *flow __rte_unused)
3247 {
3248 }
3249
3250 static int
3251 flow_null_query(struct rte_eth_dev *dev __rte_unused,
3252                 struct rte_flow *flow __rte_unused,
3253                 const struct rte_flow_action *actions __rte_unused,
3254                 void *data __rte_unused,
3255                 struct rte_flow_error *error)
3256 {
3257         return rte_flow_error_set(error, ENOTSUP,
3258                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3259 }
3260
3261 static int
3262 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused,
3263                       uint32_t domains __rte_unused,
3264                       uint32_t flags __rte_unused)
3265 {
3266         return 0;
3267 }
3268
3269 /* Void driver to protect from null pointer reference. */
3270 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
3271         .validate = flow_null_validate,
3272         .prepare = flow_null_prepare,
3273         .translate = flow_null_translate,
3274         .apply = flow_null_apply,
3275         .remove = flow_null_remove,
3276         .destroy = flow_null_destroy,
3277         .query = flow_null_query,
3278         .sync_domain = flow_null_sync_domain,
3279 };
3280
3281 /**
3282  * Select flow driver type according to flow attributes and device
3283  * configuration.
3284  *
3285  * @param[in] dev
3286  *   Pointer to the dev structure.
3287  * @param[in] attr
3288  *   Pointer to the flow attributes.
3289  *
3290  * @return
3291  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
3292  */
3293 static enum mlx5_flow_drv_type
3294 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
3295 {
3296         struct mlx5_priv *priv = dev->data->dev_private;
3297         /* The OS can determine first a specific flow type (DV, VERBS) */
3298         enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
3299
3300         if (type != MLX5_FLOW_TYPE_MAX)
3301                 return type;
3302         /* If no OS specific type - continue with DV/VERBS selection */
3303         if (attr->transfer && priv->config.dv_esw_en)
3304                 type = MLX5_FLOW_TYPE_DV;
3305         if (!attr->transfer)
3306                 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
3307                                                  MLX5_FLOW_TYPE_VERBS;
3308         return type;
3309 }
3310
3311 #define flow_get_drv_ops(type) flow_drv_ops[type]
3312
3313 /**
3314  * Flow driver validation API. This abstracts calling driver specific functions.
3315  * The type of flow driver is determined according to flow attributes.
3316  *
3317  * @param[in] dev
3318  *   Pointer to the dev structure.
3319  * @param[in] attr
3320  *   Pointer to the flow attributes.
3321  * @param[in] items
3322  *   Pointer to the list of items.
3323  * @param[in] actions
3324  *   Pointer to the list of actions.
3325  * @param[in] external
3326  *   This flow rule is created by request external to PMD.
3327  * @param[in] hairpin
3328  *   Number of hairpin TX actions, 0 means classic flow.
3329  * @param[out] error
3330  *   Pointer to the error structure.
3331  *
3332  * @return
3333  *   0 on success, a negative errno value otherwise and rte_errno is set.
3334  */
3335 static inline int
3336 flow_drv_validate(struct rte_eth_dev *dev,
3337                   const struct rte_flow_attr *attr,
3338                   const struct rte_flow_item items[],
3339                   const struct rte_flow_action actions[],
3340                   bool external, int hairpin, struct rte_flow_error *error)
3341 {
3342         const struct mlx5_flow_driver_ops *fops;
3343         enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
3344
3345         fops = flow_get_drv_ops(type);
3346         return fops->validate(dev, attr, items, actions, external,
3347                               hairpin, error);
3348 }
3349
3350 /**
3351  * Flow driver preparation API. This abstracts calling driver specific
3352  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3353  * calculates the size of memory required for device flow, allocates the memory,
3354  * initializes the device flow and returns the pointer.
3355  *
3356  * @note
3357  *   This function initializes device flow structure such as dv or verbs in
3358  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
3359  *   rest. For example, adding returning device flow to flow->dev_flow list and
3360  *   setting backward reference to the flow should be done out of this function.
3361  *   layers field is not filled either.
3362  *
3363  * @param[in] dev
3364  *   Pointer to the dev structure.
3365  * @param[in] attr
3366  *   Pointer to the flow attributes.
3367  * @param[in] items
3368  *   Pointer to the list of items.
3369  * @param[in] actions
3370  *   Pointer to the list of actions.
3371  * @param[in] flow_idx
3372  *   This memory pool index to the flow.
3373  * @param[out] error
3374  *   Pointer to the error structure.
3375  *
3376  * @return
3377  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
3378  */
3379 static inline struct mlx5_flow *
3380 flow_drv_prepare(struct rte_eth_dev *dev,
3381                  const struct rte_flow *flow,
3382                  const struct rte_flow_attr *attr,
3383                  const struct rte_flow_item items[],
3384                  const struct rte_flow_action actions[],
3385                  uint32_t flow_idx,
3386                  struct rte_flow_error *error)
3387 {
3388         const struct mlx5_flow_driver_ops *fops;
3389         enum mlx5_flow_drv_type type = flow->drv_type;
3390         struct mlx5_flow *mlx5_flow = NULL;
3391
3392         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3393         fops = flow_get_drv_ops(type);
3394         mlx5_flow = fops->prepare(dev, attr, items, actions, error);
3395         if (mlx5_flow)
3396                 mlx5_flow->flow_idx = flow_idx;
3397         return mlx5_flow;
3398 }
3399
3400 /**
3401  * Flow driver translation API. This abstracts calling driver specific
3402  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3403  * translates a generic flow into a driver flow. flow_drv_prepare() must
3404  * precede.
3405  *
3406  * @note
3407  *   dev_flow->layers could be filled as a result of parsing during translation
3408  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
3409  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
3410  *   flow->actions could be overwritten even though all the expanded dev_flows
3411  *   have the same actions.
3412  *
3413  * @param[in] dev
3414  *   Pointer to the rte dev structure.
3415  * @param[in, out] dev_flow
3416  *   Pointer to the mlx5 flow.
3417  * @param[in] attr
3418  *   Pointer to the flow attributes.
3419  * @param[in] items
3420  *   Pointer to the list of items.
3421  * @param[in] actions
3422  *   Pointer to the list of actions.
3423  * @param[out] error
3424  *   Pointer to the error structure.
3425  *
3426  * @return
3427  *   0 on success, a negative errno value otherwise and rte_errno is set.
3428  */
3429 static inline int
3430 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
3431                    const struct rte_flow_attr *attr,
3432                    const struct rte_flow_item items[],
3433                    const struct rte_flow_action actions[],
3434                    struct rte_flow_error *error)
3435 {
3436         const struct mlx5_flow_driver_ops *fops;
3437         enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
3438
3439         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3440         fops = flow_get_drv_ops(type);
3441         return fops->translate(dev, dev_flow, attr, items, actions, error);
3442 }
3443
3444 /**
3445  * Flow driver apply API. This abstracts calling driver specific functions.
3446  * Parent flow (rte_flow) should have driver type (drv_type). It applies
3447  * translated driver flows on to device. flow_drv_translate() must precede.
3448  *
3449  * @param[in] dev
3450  *   Pointer to Ethernet device structure.
3451  * @param[in, out] flow
3452  *   Pointer to flow structure.
3453  * @param[out] error
3454  *   Pointer to error structure.
3455  *
3456  * @return
3457  *   0 on success, a negative errno value otherwise and rte_errno is set.
3458  */
3459 static inline int
3460 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3461                struct rte_flow_error *error)
3462 {
3463         const struct mlx5_flow_driver_ops *fops;
3464         enum mlx5_flow_drv_type type = flow->drv_type;
3465
3466         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3467         fops = flow_get_drv_ops(type);
3468         return fops->apply(dev, flow, error);
3469 }
3470
3471 /**
3472  * Flow driver destroy API. This abstracts calling driver specific functions.
3473  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3474  * on device and releases resources of the flow.
3475  *
3476  * @param[in] dev
3477  *   Pointer to Ethernet device.
3478  * @param[in, out] flow
3479  *   Pointer to flow structure.
3480  */
3481 static inline void
3482 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3483 {
3484         const struct mlx5_flow_driver_ops *fops;
3485         enum mlx5_flow_drv_type type = flow->drv_type;
3486
3487         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3488         fops = flow_get_drv_ops(type);
3489         fops->destroy(dev, flow);
3490 }
3491
3492 /**
3493  * Flow driver find RSS policy tbl API. This abstracts calling driver
3494  * specific functions. Parent flow (rte_flow) should have driver
3495  * type (drv_type). It will find the RSS policy table that has the rss_desc.
3496  *
3497  * @param[in] dev
3498  *   Pointer to Ethernet device.
3499  * @param[in, out] flow
3500  *   Pointer to flow structure.
3501  * @param[in] policy
3502  *   Pointer to meter policy table.
3503  * @param[in] rss_desc
3504  *   Pointer to rss_desc
3505  */
3506 static struct mlx5_flow_meter_sub_policy *
3507 flow_drv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
3508                 struct rte_flow *flow,
3509                 struct mlx5_flow_meter_policy *policy,
3510                 struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS])
3511 {
3512         const struct mlx5_flow_driver_ops *fops;
3513         enum mlx5_flow_drv_type type = flow->drv_type;
3514
3515         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3516         fops = flow_get_drv_ops(type);
3517         return fops->meter_sub_policy_rss_prepare(dev, policy, rss_desc);
3518 }
3519
3520 /**
3521  * Flow driver color tag rule API. This abstracts calling driver
3522  * specific functions. Parent flow (rte_flow) should have driver
3523  * type (drv_type). It will create the color tag rules in hierarchy meter.
3524  *
3525  * @param[in] dev
3526  *   Pointer to Ethernet device.
3527  * @param[in, out] flow
3528  *   Pointer to flow structure.
3529  * @param[in] fm
3530  *   Pointer to flow meter structure.
3531  * @param[in] src_port
3532  *   The src port this extra rule should use.
3533  * @param[in] item
3534  *   The src port id match item.
3535  * @param[out] error
3536  *   Pointer to error structure.
3537  */
3538 static int
3539 flow_drv_mtr_hierarchy_rule_create(struct rte_eth_dev *dev,
3540                 struct rte_flow *flow,
3541                 struct mlx5_flow_meter_info *fm,
3542                 int32_t src_port,
3543                 const struct rte_flow_item *item,
3544                 struct rte_flow_error *error)
3545 {
3546         const struct mlx5_flow_driver_ops *fops;
3547         enum mlx5_flow_drv_type type = flow->drv_type;
3548
3549         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3550         fops = flow_get_drv_ops(type);
3551         return fops->meter_hierarchy_rule_create(dev, fm,
3552                                                 src_port, item, error);
3553 }
3554
3555 /**
3556  * Get RSS action from the action list.
3557  *
3558  * @param[in] dev
3559  *   Pointer to Ethernet device.
3560  * @param[in] actions
3561  *   Pointer to the list of actions.
3562  * @param[in] flow
3563  *   Parent flow structure pointer.
3564  *
3565  * @return
3566  *   Pointer to the RSS action if exist, else return NULL.
3567  */
3568 static const struct rte_flow_action_rss*
3569 flow_get_rss_action(struct rte_eth_dev *dev,
3570                     const struct rte_flow_action actions[])
3571 {
3572         struct mlx5_priv *priv = dev->data->dev_private;
3573         const struct rte_flow_action_rss *rss = NULL;
3574
3575         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3576                 switch (actions->type) {
3577                 case RTE_FLOW_ACTION_TYPE_RSS:
3578                         rss = actions->conf;
3579                         break;
3580                 case RTE_FLOW_ACTION_TYPE_SAMPLE:
3581                 {
3582                         const struct rte_flow_action_sample *sample =
3583                                                                 actions->conf;
3584                         const struct rte_flow_action *act = sample->actions;
3585                         for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
3586                                 if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
3587                                         rss = act->conf;
3588                         break;
3589                 }
3590                 case RTE_FLOW_ACTION_TYPE_METER:
3591                 {
3592                         uint32_t mtr_idx;
3593                         struct mlx5_flow_meter_info *fm;
3594                         struct mlx5_flow_meter_policy *policy;
3595                         const struct rte_flow_action_meter *mtr = actions->conf;
3596
3597                         fm = mlx5_flow_meter_find(priv, mtr->mtr_id, &mtr_idx);
3598                         if (fm && !fm->def_policy) {
3599                                 policy = mlx5_flow_meter_policy_find(dev,
3600                                                 fm->policy_id, NULL);
3601                                 MLX5_ASSERT(policy);
3602                                 if (policy->is_hierarchy) {
3603                                         policy =
3604                                 mlx5_flow_meter_hierarchy_get_final_policy(dev,
3605                                                                         policy);
3606                                         if (!policy)
3607                                                 return NULL;
3608                                 }
3609                                 if (policy->is_rss)
3610                                         rss =
3611                                 policy->act_cnt[RTE_COLOR_GREEN].rss->conf;
3612                         }
3613                         break;
3614                 }
3615                 default:
3616                         break;
3617                 }
3618         }
3619         return rss;
3620 }
3621
3622 /**
3623  * Get ASO age action by index.
3624  *
3625  * @param[in] dev
3626  *   Pointer to the Ethernet device structure.
3627  * @param[in] age_idx
3628  *   Index to the ASO age action.
3629  *
3630  * @return
3631  *   The specified ASO age action.
3632  */
3633 struct mlx5_aso_age_action*
3634 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx)
3635 {
3636         uint16_t pool_idx = age_idx & UINT16_MAX;
3637         uint16_t offset = (age_idx >> 16) & UINT16_MAX;
3638         struct mlx5_priv *priv = dev->data->dev_private;
3639         struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
3640         struct mlx5_aso_age_pool *pool = mng->pools[pool_idx];
3641
3642         return &pool->actions[offset - 1];
3643 }
3644
3645 /* maps indirect action to translated direct in some actions array */
3646 struct mlx5_translated_action_handle {
3647         struct rte_flow_action_handle *action; /**< Indirect action handle. */
3648         int index; /**< Index in related array of rte_flow_action. */
3649 };
3650
3651 /**
3652  * Translates actions of type RTE_FLOW_ACTION_TYPE_INDIRECT to related
3653  * direct action if translation possible.
3654  * This functionality used to run same execution path for both direct and
3655  * indirect actions on flow create. All necessary preparations for indirect
3656  * action handling should be performed on *handle* actions list returned
3657  * from this call.
3658  *
3659  * @param[in] dev
3660  *   Pointer to Ethernet device.
3661  * @param[in] actions
3662  *   List of actions to translate.
3663  * @param[out] handle
3664  *   List to store translated indirect action object handles.
3665  * @param[in, out] indir_n
3666  *   Size of *handle* array. On return should be updated with number of
3667  *   indirect actions retrieved from the *actions* list.
3668  * @param[out] translated_actions
3669  *   List of actions where all indirect actions were translated to direct
3670  *   if possible. NULL if no translation took place.
3671  * @param[out] error
3672  *   Pointer to the error structure.
3673  *
3674  * @return
3675  *   0 on success, a negative errno value otherwise and rte_errno is set.
3676  */
3677 static int
3678 flow_action_handles_translate(struct rte_eth_dev *dev,
3679                               const struct rte_flow_action actions[],
3680                               struct mlx5_translated_action_handle *handle,
3681                               int *indir_n,
3682                               struct rte_flow_action **translated_actions,
3683                               struct rte_flow_error *error)
3684 {
3685         struct mlx5_priv *priv = dev->data->dev_private;
3686         struct rte_flow_action *translated = NULL;
3687         size_t actions_size;
3688         int n;
3689         int copied_n = 0;
3690         struct mlx5_translated_action_handle *handle_end = NULL;
3691
3692         for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) {
3693                 if (actions[n].type != RTE_FLOW_ACTION_TYPE_INDIRECT)
3694                         continue;
3695                 if (copied_n == *indir_n) {
3696                         return rte_flow_error_set
3697                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM,
3698                                  NULL, "too many shared actions");
3699                 }
3700                 rte_memcpy(&handle[copied_n].action, &actions[n].conf,
3701                            sizeof(actions[n].conf));
3702                 handle[copied_n].index = n;
3703                 copied_n++;
3704         }
3705         n++;
3706         *indir_n = copied_n;
3707         if (!copied_n)
3708                 return 0;
3709         actions_size = sizeof(struct rte_flow_action) * n;
3710         translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY);
3711         if (!translated) {
3712                 rte_errno = ENOMEM;
3713                 return -ENOMEM;
3714         }
3715         memcpy(translated, actions, actions_size);
3716         for (handle_end = handle + copied_n; handle < handle_end; handle++) {
3717                 struct mlx5_shared_action_rss *shared_rss;
3718                 uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
3719                 uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
3720                 uint32_t idx = act_idx &
3721                                ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
3722
3723                 switch (type) {
3724                 case MLX5_INDIRECT_ACTION_TYPE_RSS:
3725                         shared_rss = mlx5_ipool_get
3726                           (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx);
3727                         translated[handle->index].type =
3728                                 RTE_FLOW_ACTION_TYPE_RSS;
3729                         translated[handle->index].conf =
3730                                 &shared_rss->origin;
3731                         break;
3732                 case MLX5_INDIRECT_ACTION_TYPE_COUNT:
3733                         translated[handle->index].type =
3734                                                 (enum rte_flow_action_type)
3735                                                 MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
3736                         translated[handle->index].conf = (void *)(uintptr_t)idx;
3737                         break;
3738                 case MLX5_INDIRECT_ACTION_TYPE_AGE:
3739                         if (priv->sh->flow_hit_aso_en) {
3740                                 translated[handle->index].type =
3741                                         (enum rte_flow_action_type)
3742                                         MLX5_RTE_FLOW_ACTION_TYPE_AGE;
3743                                 translated[handle->index].conf =
3744                                                          (void *)(uintptr_t)idx;
3745                                 break;
3746                         }
3747                         /* Fall-through */
3748                 case MLX5_INDIRECT_ACTION_TYPE_CT:
3749                         if (priv->sh->ct_aso_en) {
3750                                 translated[handle->index].type =
3751                                         RTE_FLOW_ACTION_TYPE_CONNTRACK;
3752                                 translated[handle->index].conf =
3753                                                          (void *)(uintptr_t)idx;
3754                                 break;
3755                         }
3756                         /* Fall-through */
3757                 default:
3758                         mlx5_free(translated);
3759                         return rte_flow_error_set
3760                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
3761                                  NULL, "invalid indirect action type");
3762                 }
3763         }
3764         *translated_actions = translated;
3765         return 0;
3766 }
3767
3768 /**
3769  * Get Shared RSS action from the action list.
3770  *
3771  * @param[in] dev
3772  *   Pointer to Ethernet device.
3773  * @param[in] shared
3774  *   Pointer to the list of actions.
3775  * @param[in] shared_n
3776  *   Actions list length.
3777  *
3778  * @return
3779  *   The MLX5 RSS action ID if exists, otherwise return 0.
3780  */
3781 static uint32_t
3782 flow_get_shared_rss_action(struct rte_eth_dev *dev,
3783                            struct mlx5_translated_action_handle *handle,
3784                            int shared_n)
3785 {
3786         struct mlx5_translated_action_handle *handle_end;
3787         struct mlx5_priv *priv = dev->data->dev_private;
3788         struct mlx5_shared_action_rss *shared_rss;
3789
3790
3791         for (handle_end = handle + shared_n; handle < handle_end; handle++) {
3792                 uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
3793                 uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
3794                 uint32_t idx = act_idx &
3795                                ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
3796                 switch (type) {
3797                 case MLX5_INDIRECT_ACTION_TYPE_RSS:
3798                         shared_rss = mlx5_ipool_get
3799                                 (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
3800                                                                            idx);
3801                         __atomic_add_fetch(&shared_rss->refcnt, 1,
3802                                            __ATOMIC_RELAXED);
3803                         return idx;
3804                 default:
3805                         break;
3806                 }
3807         }
3808         return 0;
3809 }
3810
3811 static unsigned int
3812 find_graph_root(uint32_t rss_level)
3813 {
3814         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
3815                                MLX5_EXPANSION_ROOT_OUTER;
3816 }
3817
3818 /**
3819  *  Get layer flags from the prefix flow.
3820  *
3821  *  Some flows may be split to several subflows, the prefix subflow gets the
3822  *  match items and the suffix sub flow gets the actions.
3823  *  Some actions need the user defined match item flags to get the detail for
3824  *  the action.
3825  *  This function helps the suffix flow to get the item layer flags from prefix
3826  *  subflow.
3827  *
3828  * @param[in] dev_flow
3829  *   Pointer the created preifx subflow.
3830  *
3831  * @return
3832  *   The layers get from prefix subflow.
3833  */
3834 static inline uint64_t
3835 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
3836 {
3837         uint64_t layers = 0;
3838
3839         /*
3840          * Layers bits could be localization, but usually the compiler will
3841          * help to do the optimization work for source code.
3842          * If no decap actions, use the layers directly.
3843          */
3844         if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
3845                 return dev_flow->handle->layers;
3846         /* Convert L3 layers with decap action. */
3847         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
3848                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
3849         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
3850                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
3851         /* Convert L4 layers with decap action.  */
3852         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
3853                 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
3854         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
3855                 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
3856         return layers;
3857 }
3858
3859 /**
3860  * Get metadata split action information.
3861  *
3862  * @param[in] actions
3863  *   Pointer to the list of actions.
3864  * @param[out] qrss
3865  *   Pointer to the return pointer.
3866  * @param[out] qrss_type
3867  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
3868  *   if no QUEUE/RSS is found.
3869  * @param[out] encap_idx
3870  *   Pointer to the index of the encap action if exists, otherwise the last
3871  *   action index.
3872  *
3873  * @return
3874  *   Total number of actions.
3875  */
3876 static int
3877 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
3878                                        const struct rte_flow_action **qrss,
3879                                        int *encap_idx)
3880 {
3881         const struct rte_flow_action_raw_encap *raw_encap;
3882         int actions_n = 0;
3883         int raw_decap_idx = -1;
3884
3885         *encap_idx = -1;
3886         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3887                 switch (actions->type) {
3888                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3889                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3890                         *encap_idx = actions_n;
3891                         break;
3892                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3893                         raw_decap_idx = actions_n;
3894                         break;
3895                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3896                         raw_encap = actions->conf;
3897                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
3898                                 *encap_idx = raw_decap_idx != -1 ?
3899                                                       raw_decap_idx : actions_n;
3900                         break;
3901                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3902                 case RTE_FLOW_ACTION_TYPE_RSS:
3903                         *qrss = actions;
3904                         break;
3905                 default:
3906                         break;
3907                 }
3908                 actions_n++;
3909         }
3910         if (*encap_idx == -1)
3911                 *encap_idx = actions_n;
3912         /* Count RTE_FLOW_ACTION_TYPE_END. */
3913         return actions_n + 1;
3914 }
3915
3916 /**
3917  * Check if the action will change packet.
3918  *
3919  * @param dev
3920  *   Pointer to Ethernet device.
3921  * @param[in] type
3922  *   action type.
3923  *
3924  * @return
3925  *   true if action will change packet, false otherwise.
3926  */
3927 static bool flow_check_modify_action_type(struct rte_eth_dev *dev,
3928                                           enum rte_flow_action_type type)
3929 {
3930         struct mlx5_priv *priv = dev->data->dev_private;
3931
3932         switch (type) {
3933         case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
3934         case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
3935         case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
3936         case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
3937         case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
3938         case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
3939         case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
3940         case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
3941         case RTE_FLOW_ACTION_TYPE_DEC_TTL:
3942         case RTE_FLOW_ACTION_TYPE_SET_TTL:
3943         case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
3944         case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
3945         case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
3946         case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
3947         case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
3948         case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
3949         case RTE_FLOW_ACTION_TYPE_SET_META:
3950         case RTE_FLOW_ACTION_TYPE_SET_TAG:
3951         case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
3952         case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3953         case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3954         case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
3955         case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3956         case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
3957         case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3958         case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
3959         case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3960         case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3961         case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
3962                 return true;
3963         case RTE_FLOW_ACTION_TYPE_FLAG:
3964         case RTE_FLOW_ACTION_TYPE_MARK:
3965                 if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY)
3966                         return true;
3967                 else
3968                         return false;
3969         default:
3970                 return false;
3971         }
3972 }
3973
3974 /**
3975  * Check meter action from the action list.
3976  *
3977  * @param dev
3978  *   Pointer to Ethernet device.
3979  * @param[in] actions
3980  *   Pointer to the list of actions.
3981  * @param[out] has_mtr
3982  *   Pointer to the meter exist flag.
3983  * @param[out] has_modify
3984  *   Pointer to the flag showing there's packet change action.
3985  * @param[out] meter_id
3986  *   Pointer to the meter id.
3987  *
3988  * @return
3989  *   Total number of actions.
3990  */
3991 static int
3992 flow_check_meter_action(struct rte_eth_dev *dev,
3993                         const struct rte_flow_action actions[],
3994                         bool *has_mtr, bool *has_modify, uint32_t *meter_id)
3995 {
3996         const struct rte_flow_action_meter *mtr = NULL;
3997         int actions_n = 0;
3998
3999         MLX5_ASSERT(has_mtr);
4000         *has_mtr = false;
4001         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4002                 switch (actions->type) {
4003                 case RTE_FLOW_ACTION_TYPE_METER:
4004                         mtr = actions->conf;
4005                         *meter_id = mtr->mtr_id;
4006                         *has_mtr = true;
4007                         break;
4008                 default:
4009                         break;
4010                 }
4011                 if (!*has_mtr)
4012                         *has_modify |= flow_check_modify_action_type(dev,
4013                                                                 actions->type);
4014                 actions_n++;
4015         }
4016         /* Count RTE_FLOW_ACTION_TYPE_END. */
4017         return actions_n + 1;
4018 }
4019
4020 /**
4021  * Check if the flow should be split due to hairpin.
4022  * The reason for the split is that in current HW we can't
4023  * support encap and push-vlan on Rx, so if a flow contains
4024  * these actions we move it to Tx.
4025  *
4026  * @param dev
4027  *   Pointer to Ethernet device.
4028  * @param[in] attr
4029  *   Flow rule attributes.
4030  * @param[in] actions
4031  *   Associated actions (list terminated by the END action).
4032  *
4033  * @return
4034  *   > 0 the number of actions and the flow should be split,
4035  *   0 when no split required.
4036  */
4037 static int
4038 flow_check_hairpin_split(struct rte_eth_dev *dev,
4039                          const struct rte_flow_attr *attr,
4040                          const struct rte_flow_action actions[])
4041 {
4042         int queue_action = 0;
4043         int action_n = 0;
4044         int split = 0;
4045         const struct rte_flow_action_queue *queue;
4046         const struct rte_flow_action_rss *rss;
4047         const struct rte_flow_action_raw_encap *raw_encap;
4048         const struct rte_eth_hairpin_conf *conf;
4049
4050         if (!attr->ingress)
4051                 return 0;
4052         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4053                 switch (actions->type) {
4054                 case RTE_FLOW_ACTION_TYPE_QUEUE:
4055                         queue = actions->conf;
4056                         if (queue == NULL)
4057                                 return 0;
4058                         conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
4059                         if (conf == NULL || conf->tx_explicit != 0)
4060                                 return 0;
4061                         queue_action = 1;
4062                         action_n++;
4063                         break;
4064                 case RTE_FLOW_ACTION_TYPE_RSS:
4065                         rss = actions->conf;
4066                         if (rss == NULL || rss->queue_num == 0)
4067                                 return 0;
4068                         conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
4069                         if (conf == NULL || conf->tx_explicit != 0)
4070                                 return 0;
4071                         queue_action = 1;
4072                         action_n++;
4073                         break;
4074                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4075                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4076                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4077                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4078                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4079                         split++;
4080                         action_n++;
4081                         break;
4082                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4083                         raw_encap = actions->conf;
4084                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4085                                 split++;
4086                         action_n++;
4087                         break;
4088                 default:
4089                         action_n++;
4090                         break;
4091                 }
4092         }
4093         if (split && queue_action)
4094                 return action_n;
4095         return 0;
4096 }
4097
4098 /* Declare flow create/destroy prototype in advance. */
4099 static uint32_t
4100 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4101                  const struct rte_flow_attr *attr,
4102                  const struct rte_flow_item items[],
4103                  const struct rte_flow_action actions[],
4104                  bool external, struct rte_flow_error *error);
4105
4106 static void
4107 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4108                   uint32_t flow_idx);
4109
4110 int
4111 flow_dv_mreg_match_cb(void *tool_ctx __rte_unused,
4112                       struct mlx5_list_entry *entry, void *cb_ctx)
4113 {
4114         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4115         struct mlx5_flow_mreg_copy_resource *mcp_res =
4116                                container_of(entry, typeof(*mcp_res), hlist_ent);
4117
4118         return mcp_res->mark_id != *(uint32_t *)(ctx->data);
4119 }
4120
4121 struct mlx5_list_entry *
4122 flow_dv_mreg_create_cb(void *tool_ctx, void *cb_ctx)
4123 {
4124         struct rte_eth_dev *dev = tool_ctx;
4125         struct mlx5_priv *priv = dev->data->dev_private;
4126         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4127         struct mlx5_flow_mreg_copy_resource *mcp_res;
4128         struct rte_flow_error *error = ctx->error;
4129         uint32_t idx = 0;
4130         int ret;
4131         uint32_t mark_id = *(uint32_t *)(ctx->data);
4132         struct rte_flow_attr attr = {
4133                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4134                 .ingress = 1,
4135         };
4136         struct mlx5_rte_flow_item_tag tag_spec = {
4137                 .data = mark_id,
4138         };
4139         struct rte_flow_item items[] = {
4140                 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
4141         };
4142         struct rte_flow_action_mark ftag = {
4143                 .id = mark_id,
4144         };
4145         struct mlx5_flow_action_copy_mreg cp_mreg = {
4146                 .dst = REG_B,
4147                 .src = REG_NON,
4148         };
4149         struct rte_flow_action_jump jump = {
4150                 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4151         };
4152         struct rte_flow_action actions[] = {
4153                 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
4154         };
4155
4156         /* Fill the register fileds in the flow. */
4157         ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
4158         if (ret < 0)
4159                 return NULL;
4160         tag_spec.id = ret;
4161         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4162         if (ret < 0)
4163                 return NULL;
4164         cp_mreg.src = ret;
4165         /* Provide the full width of FLAG specific value. */
4166         if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
4167                 tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
4168         /* Build a new flow. */
4169         if (mark_id != MLX5_DEFAULT_COPY_ID) {
4170                 items[0] = (struct rte_flow_item){
4171                         .type = (enum rte_flow_item_type)
4172                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4173                         .spec = &tag_spec,
4174                 };
4175                 items[1] = (struct rte_flow_item){
4176                         .type = RTE_FLOW_ITEM_TYPE_END,
4177                 };
4178                 actions[0] = (struct rte_flow_action){
4179                         .type = (enum rte_flow_action_type)
4180                                 MLX5_RTE_FLOW_ACTION_TYPE_MARK,
4181                         .conf = &ftag,
4182                 };
4183                 actions[1] = (struct rte_flow_action){
4184                         .type = (enum rte_flow_action_type)
4185                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4186                         .conf = &cp_mreg,
4187                 };
4188                 actions[2] = (struct rte_flow_action){
4189                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
4190                         .conf = &jump,
4191                 };
4192                 actions[3] = (struct rte_flow_action){
4193                         .type = RTE_FLOW_ACTION_TYPE_END,
4194                 };
4195         } else {
4196                 /* Default rule, wildcard match. */
4197                 attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR;
4198                 items[0] = (struct rte_flow_item){
4199                         .type = RTE_FLOW_ITEM_TYPE_END,
4200                 };
4201                 actions[0] = (struct rte_flow_action){
4202                         .type = (enum rte_flow_action_type)
4203                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4204                         .conf = &cp_mreg,
4205                 };
4206                 actions[1] = (struct rte_flow_action){
4207                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
4208                         .conf = &jump,
4209                 };
4210                 actions[2] = (struct rte_flow_action){
4211                         .type = RTE_FLOW_ACTION_TYPE_END,
4212                 };
4213         }
4214         /* Build a new entry. */
4215         mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4216         if (!mcp_res) {
4217                 rte_errno = ENOMEM;
4218                 return NULL;
4219         }
4220         mcp_res->idx = idx;
4221         mcp_res->mark_id = mark_id;
4222         /*
4223          * The copy Flows are not included in any list. There
4224          * ones are referenced from other Flows and can not
4225          * be applied, removed, deleted in ardbitrary order
4226          * by list traversing.
4227          */
4228         mcp_res->rix_flow = flow_list_create(dev, MLX5_FLOW_TYPE_MCP,
4229                                         &attr, items, actions, false, error);
4230         if (!mcp_res->rix_flow) {
4231                 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx);
4232                 return NULL;
4233         }
4234         return &mcp_res->hlist_ent;
4235 }
4236
4237 struct mlx5_list_entry *
4238 flow_dv_mreg_clone_cb(void *tool_ctx, struct mlx5_list_entry *oentry,
4239                       void *cb_ctx __rte_unused)
4240 {
4241         struct rte_eth_dev *dev = tool_ctx;
4242         struct mlx5_priv *priv = dev->data->dev_private;
4243         struct mlx5_flow_mreg_copy_resource *mcp_res;
4244         uint32_t idx = 0;
4245
4246         mcp_res = mlx5_ipool_malloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4247         if (!mcp_res) {
4248                 rte_errno = ENOMEM;
4249                 return NULL;
4250         }
4251         memcpy(mcp_res, oentry, sizeof(*mcp_res));
4252         mcp_res->idx = idx;
4253         return &mcp_res->hlist_ent;
4254 }
4255
4256 void
4257 flow_dv_mreg_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4258 {
4259         struct mlx5_flow_mreg_copy_resource *mcp_res =
4260                                container_of(entry, typeof(*mcp_res), hlist_ent);
4261         struct rte_eth_dev *dev = tool_ctx;
4262         struct mlx5_priv *priv = dev->data->dev_private;
4263
4264         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4265 }
4266
4267 /**
4268  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4269  *
4270  * As mark_id is unique, if there's already a registered flow for the mark_id,
4271  * return by increasing the reference counter of the resource. Otherwise, create
4272  * the resource (mcp_res) and flow.
4273  *
4274  * Flow looks like,
4275  *   - If ingress port is ANY and reg_c[1] is mark_id,
4276  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4277  *
4278  * For default flow (zero mark_id), flow is like,
4279  *   - If ingress port is ANY,
4280  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
4281  *
4282  * @param dev
4283  *   Pointer to Ethernet device.
4284  * @param mark_id
4285  *   ID of MARK action, zero means default flow for META.
4286  * @param[out] error
4287  *   Perform verbose error reporting if not NULL.
4288  *
4289  * @return
4290  *   Associated resource on success, NULL otherwise and rte_errno is set.
4291  */
4292 static struct mlx5_flow_mreg_copy_resource *
4293 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
4294                           struct rte_flow_error *error)
4295 {
4296         struct mlx5_priv *priv = dev->data->dev_private;
4297         struct mlx5_list_entry *entry;
4298         struct mlx5_flow_cb_ctx ctx = {
4299                 .dev = dev,
4300                 .error = error,
4301                 .data = &mark_id,
4302         };
4303
4304         /* Check if already registered. */
4305         MLX5_ASSERT(priv->mreg_cp_tbl);
4306         entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx);
4307         if (!entry)
4308                 return NULL;
4309         return container_of(entry, struct mlx5_flow_mreg_copy_resource,
4310                             hlist_ent);
4311 }
4312
4313 void
4314 flow_dv_mreg_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4315 {
4316         struct mlx5_flow_mreg_copy_resource *mcp_res =
4317                                container_of(entry, typeof(*mcp_res), hlist_ent);
4318         struct rte_eth_dev *dev = tool_ctx;
4319         struct mlx5_priv *priv = dev->data->dev_private;
4320
4321         MLX5_ASSERT(mcp_res->rix_flow);
4322         flow_list_destroy(dev, MLX5_FLOW_TYPE_MCP, mcp_res->rix_flow);
4323         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4324 }
4325
4326 /**
4327  * Release flow in RX_CP_TBL.
4328  *
4329  * @param dev
4330  *   Pointer to Ethernet device.
4331  * @flow
4332  *   Parent flow for wich copying is provided.
4333  */
4334 static void
4335 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
4336                           struct rte_flow *flow)
4337 {
4338         struct mlx5_flow_mreg_copy_resource *mcp_res;
4339         struct mlx5_priv *priv = dev->data->dev_private;
4340
4341         if (!flow->rix_mreg_copy)
4342                 return;
4343         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
4344                                  flow->rix_mreg_copy);
4345         if (!mcp_res || !priv->mreg_cp_tbl)
4346                 return;
4347         MLX5_ASSERT(mcp_res->rix_flow);
4348         mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
4349         flow->rix_mreg_copy = 0;
4350 }
4351
4352 /**
4353  * Remove the default copy action from RX_CP_TBL.
4354  *
4355  * This functions is called in the mlx5_dev_start(). No thread safe
4356  * is guaranteed.
4357  *
4358  * @param dev
4359  *   Pointer to Ethernet device.
4360  */
4361 static void
4362 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
4363 {
4364         struct mlx5_list_entry *entry;
4365         struct mlx5_priv *priv = dev->data->dev_private;
4366         struct mlx5_flow_cb_ctx ctx;
4367         uint32_t mark_id;
4368
4369         /* Check if default flow is registered. */
4370         if (!priv->mreg_cp_tbl)
4371                 return;
4372         mark_id = MLX5_DEFAULT_COPY_ID;
4373         ctx.data = &mark_id;
4374         entry = mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx);
4375         if (!entry)
4376                 return;
4377         mlx5_hlist_unregister(priv->mreg_cp_tbl, entry);
4378 }
4379
4380 /**
4381  * Add the default copy action in in RX_CP_TBL.
4382  *
4383  * This functions is called in the mlx5_dev_start(). No thread safe
4384  * is guaranteed.
4385  *
4386  * @param dev
4387  *   Pointer to Ethernet device.
4388  * @param[out] error
4389  *   Perform verbose error reporting if not NULL.
4390  *
4391  * @return
4392  *   0 for success, negative value otherwise and rte_errno is set.
4393  */
4394 static int
4395 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
4396                                   struct rte_flow_error *error)
4397 {
4398         struct mlx5_priv *priv = dev->data->dev_private;
4399         struct mlx5_flow_mreg_copy_resource *mcp_res;
4400         struct mlx5_flow_cb_ctx ctx;
4401         uint32_t mark_id;
4402
4403         /* Check whether extensive metadata feature is engaged. */
4404         if (!priv->config.dv_flow_en ||
4405             priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4406             !mlx5_flow_ext_mreg_supported(dev) ||
4407             !priv->sh->dv_regc0_mask)
4408                 return 0;
4409         /*
4410          * Add default mreg copy flow may be called multiple time, but
4411          * only be called once in stop. Avoid register it twice.
4412          */
4413         mark_id = MLX5_DEFAULT_COPY_ID;
4414         ctx.data = &mark_id;
4415         if (mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx))
4416                 return 0;
4417         mcp_res = flow_mreg_add_copy_action(dev, mark_id, error);
4418         if (!mcp_res)
4419                 return -rte_errno;
4420         return 0;
4421 }
4422
4423 /**
4424  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4425  *
4426  * All the flow having Q/RSS action should be split by
4427  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
4428  * performs the following,
4429  *   - CQE->flow_tag := reg_c[1] (MARK)
4430  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4431  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
4432  * but there should be a flow per each MARK ID set by MARK action.
4433  *
4434  * For the aforementioned reason, if there's a MARK action in flow's action
4435  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
4436  * the MARK ID to CQE's flow_tag like,
4437  *   - If reg_c[1] is mark_id,
4438  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4439  *
4440  * For SET_META action which stores value in reg_c[0], as the destination is
4441  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
4442  * MARK ID means the default flow. The default flow looks like,
4443  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4444  *
4445  * @param dev
4446  *   Pointer to Ethernet device.
4447  * @param flow
4448  *   Pointer to flow structure.
4449  * @param[in] actions
4450  *   Pointer to the list of actions.
4451  * @param[out] error
4452  *   Perform verbose error reporting if not NULL.
4453  *
4454  * @return
4455  *   0 on success, negative value otherwise and rte_errno is set.
4456  */
4457 static int
4458 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
4459                             struct rte_flow *flow,
4460                             const struct rte_flow_action *actions,
4461                             struct rte_flow_error *error)
4462 {
4463         struct mlx5_priv *priv = dev->data->dev_private;
4464         struct mlx5_dev_config *config = &priv->config;
4465         struct mlx5_flow_mreg_copy_resource *mcp_res;
4466         const struct rte_flow_action_mark *mark;
4467
4468         /* Check whether extensive metadata feature is engaged. */
4469         if (!config->dv_flow_en ||
4470             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4471             !mlx5_flow_ext_mreg_supported(dev) ||
4472             !priv->sh->dv_regc0_mask)
4473                 return 0;
4474         /* Find MARK action. */
4475         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4476                 switch (actions->type) {
4477                 case RTE_FLOW_ACTION_TYPE_FLAG:
4478                         mcp_res = flow_mreg_add_copy_action
4479                                 (dev, MLX5_FLOW_MARK_DEFAULT, error);
4480                         if (!mcp_res)
4481                                 return -rte_errno;
4482                         flow->rix_mreg_copy = mcp_res->idx;
4483                         return 0;
4484                 case RTE_FLOW_ACTION_TYPE_MARK:
4485                         mark = (const struct rte_flow_action_mark *)
4486                                 actions->conf;
4487                         mcp_res =
4488                                 flow_mreg_add_copy_action(dev, mark->id, error);
4489                         if (!mcp_res)
4490                                 return -rte_errno;
4491                         flow->rix_mreg_copy = mcp_res->idx;
4492                         return 0;
4493                 default:
4494                         break;
4495                 }
4496         }
4497         return 0;
4498 }
4499
4500 #define MLX5_MAX_SPLIT_ACTIONS 24
4501 #define MLX5_MAX_SPLIT_ITEMS 24
4502
4503 /**
4504  * Split the hairpin flow.
4505  * Since HW can't support encap and push-vlan on Rx, we move these
4506  * actions to Tx.
4507  * If the count action is after the encap then we also
4508  * move the count action. in this case the count will also measure
4509  * the outer bytes.
4510  *
4511  * @param dev
4512  *   Pointer to Ethernet device.
4513  * @param[in] actions
4514  *   Associated actions (list terminated by the END action).
4515  * @param[out] actions_rx
4516  *   Rx flow actions.
4517  * @param[out] actions_tx
4518  *   Tx flow actions..
4519  * @param[out] pattern_tx
4520  *   The pattern items for the Tx flow.
4521  * @param[out] flow_id
4522  *   The flow ID connected to this flow.
4523  *
4524  * @return
4525  *   0 on success.
4526  */
4527 static int
4528 flow_hairpin_split(struct rte_eth_dev *dev,
4529                    const struct rte_flow_action actions[],
4530                    struct rte_flow_action actions_rx[],
4531                    struct rte_flow_action actions_tx[],
4532                    struct rte_flow_item pattern_tx[],
4533                    uint32_t flow_id)
4534 {
4535         const struct rte_flow_action_raw_encap *raw_encap;
4536         const struct rte_flow_action_raw_decap *raw_decap;
4537         struct mlx5_rte_flow_action_set_tag *set_tag;
4538         struct rte_flow_action *tag_action;
4539         struct mlx5_rte_flow_item_tag *tag_item;
4540         struct rte_flow_item *item;
4541         char *addr;
4542         int encap = 0;
4543
4544         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4545                 switch (actions->type) {
4546                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4547                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4548                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4549                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4550                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4551                         rte_memcpy(actions_tx, actions,
4552                                sizeof(struct rte_flow_action));
4553                         actions_tx++;
4554                         break;
4555                 case RTE_FLOW_ACTION_TYPE_COUNT:
4556                         if (encap) {
4557                                 rte_memcpy(actions_tx, actions,
4558                                            sizeof(struct rte_flow_action));
4559                                 actions_tx++;
4560                         } else {
4561                                 rte_memcpy(actions_rx, actions,
4562                                            sizeof(struct rte_flow_action));
4563                                 actions_rx++;
4564                         }
4565                         break;
4566                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4567                         raw_encap = actions->conf;
4568                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) {
4569                                 memcpy(actions_tx, actions,
4570                                        sizeof(struct rte_flow_action));
4571                                 actions_tx++;
4572                                 encap = 1;
4573                         } else {
4574                                 rte_memcpy(actions_rx, actions,
4575                                            sizeof(struct rte_flow_action));
4576                                 actions_rx++;
4577                         }
4578                         break;
4579                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4580                         raw_decap = actions->conf;
4581                         if (raw_decap->size < MLX5_ENCAPSULATION_DECISION_SIZE) {
4582                                 memcpy(actions_tx, actions,
4583                                        sizeof(struct rte_flow_action));
4584                                 actions_tx++;
4585                         } else {
4586                                 rte_memcpy(actions_rx, actions,
4587                                            sizeof(struct rte_flow_action));
4588                                 actions_rx++;
4589                         }
4590                         break;
4591                 default:
4592                         rte_memcpy(actions_rx, actions,
4593                                    sizeof(struct rte_flow_action));
4594                         actions_rx++;
4595                         break;
4596                 }
4597         }
4598         /* Add set meta action and end action for the Rx flow. */
4599         tag_action = actions_rx;
4600         tag_action->type = (enum rte_flow_action_type)
4601                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4602         actions_rx++;
4603         rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
4604         actions_rx++;
4605         set_tag = (void *)actions_rx;
4606         *set_tag = (struct mlx5_rte_flow_action_set_tag) {
4607                 .id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL),
4608                 .data = flow_id,
4609         };
4610         MLX5_ASSERT(set_tag->id > REG_NON);
4611         tag_action->conf = set_tag;
4612         /* Create Tx item list. */
4613         rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
4614         addr = (void *)&pattern_tx[2];
4615         item = pattern_tx;
4616         item->type = (enum rte_flow_item_type)
4617                      MLX5_RTE_FLOW_ITEM_TYPE_TAG;
4618         tag_item = (void *)addr;
4619         tag_item->data = flow_id;
4620         tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
4621         MLX5_ASSERT(set_tag->id > REG_NON);
4622         item->spec = tag_item;
4623         addr += sizeof(struct mlx5_rte_flow_item_tag);
4624         tag_item = (void *)addr;
4625         tag_item->data = UINT32_MAX;
4626         tag_item->id = UINT16_MAX;
4627         item->mask = tag_item;
4628         item->last = NULL;
4629         item++;
4630         item->type = RTE_FLOW_ITEM_TYPE_END;
4631         return 0;
4632 }
4633
4634 /**
4635  * The last stage of splitting chain, just creates the subflow
4636  * without any modification.
4637  *
4638  * @param[in] dev
4639  *   Pointer to Ethernet device.
4640  * @param[in] flow
4641  *   Parent flow structure pointer.
4642  * @param[in, out] sub_flow
4643  *   Pointer to return the created subflow, may be NULL.
4644  * @param[in] attr
4645  *   Flow rule attributes.
4646  * @param[in] items
4647  *   Pattern specification (list terminated by the END pattern item).
4648  * @param[in] actions
4649  *   Associated actions (list terminated by the END action).
4650  * @param[in] flow_split_info
4651  *   Pointer to flow split info structure.
4652  * @param[out] error
4653  *   Perform verbose error reporting if not NULL.
4654  * @return
4655  *   0 on success, negative value otherwise
4656  */
4657 static int
4658 flow_create_split_inner(struct rte_eth_dev *dev,
4659                         struct rte_flow *flow,
4660                         struct mlx5_flow **sub_flow,
4661                         const struct rte_flow_attr *attr,
4662                         const struct rte_flow_item items[],
4663                         const struct rte_flow_action actions[],
4664                         struct mlx5_flow_split_info *flow_split_info,
4665                         struct rte_flow_error *error)
4666 {
4667         struct mlx5_flow *dev_flow;
4668
4669         dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
4670                                     flow_split_info->flow_idx, error);
4671         if (!dev_flow)
4672                 return -rte_errno;
4673         dev_flow->flow = flow;
4674         dev_flow->external = flow_split_info->external;
4675         dev_flow->skip_scale = flow_split_info->skip_scale;
4676         /* Subflow object was created, we must include one in the list. */
4677         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
4678                       dev_flow->handle, next);
4679         /*
4680          * If dev_flow is as one of the suffix flow, some actions in suffix
4681          * flow may need some user defined item layer flags, and pass the
4682          * Metadate rxq mark flag to suffix flow as well.
4683          */
4684         if (flow_split_info->prefix_layers)
4685                 dev_flow->handle->layers = flow_split_info->prefix_layers;
4686         if (flow_split_info->prefix_mark)
4687                 dev_flow->handle->mark = 1;
4688         if (sub_flow)
4689                 *sub_flow = dev_flow;
4690 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
4691         dev_flow->dv.table_id = flow_split_info->table_id;
4692 #endif
4693         return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
4694 }
4695
4696 /**
4697  * Get the sub policy of a meter.
4698  *
4699  * @param[in] dev
4700  *   Pointer to Ethernet device.
4701  * @param[in] flow
4702  *   Parent flow structure pointer.
4703  * @param wks
4704  *   Pointer to thread flow work space.
4705  * @param[in] attr
4706  *   Flow rule attributes.
4707  * @param[in] items
4708  *   Pattern specification (list terminated by the END pattern item).
4709  * @param[out] error
4710  *   Perform verbose error reporting if not NULL.
4711  *
4712  * @return
4713  *   Pointer to the meter sub policy, NULL otherwise and rte_errno is set.
4714  */
4715 static struct mlx5_flow_meter_sub_policy *
4716 get_meter_sub_policy(struct rte_eth_dev *dev,
4717                      struct rte_flow *flow,
4718                      struct mlx5_flow_workspace *wks,
4719                      const struct rte_flow_attr *attr,
4720                      const struct rte_flow_item items[],
4721                      struct rte_flow_error *error)
4722 {
4723         struct mlx5_flow_meter_policy *policy;
4724         struct mlx5_flow_meter_policy *final_policy;
4725         struct mlx5_flow_meter_sub_policy *sub_policy = NULL;
4726
4727         policy = wks->policy;
4728         final_policy = policy->is_hierarchy ? wks->final_policy : policy;
4729         if (final_policy->is_rss || final_policy->is_queue) {
4730                 struct mlx5_flow_rss_desc rss_desc_v[MLX5_MTR_RTE_COLORS];
4731                 struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS] = {0};
4732                 uint32_t i;
4733
4734                 /*
4735                  * This is a tmp dev_flow,
4736                  * no need to register any matcher for it in translate.
4737                  */
4738                 wks->skip_matcher_reg = 1;
4739                 for (i = 0; i < MLX5_MTR_RTE_COLORS; i++) {
4740                         struct mlx5_flow dev_flow = {0};
4741                         struct mlx5_flow_handle dev_handle = { {0} };
4742                         uint8_t fate = final_policy->act_cnt[i].fate_action;
4743
4744                         if (fate == MLX5_FLOW_FATE_SHARED_RSS) {
4745                                 const struct rte_flow_action_rss *rss_act =
4746                                         final_policy->act_cnt[i].rss->conf;
4747                                 struct rte_flow_action rss_actions[2] = {
4748                                         [0] = {
4749                                         .type = RTE_FLOW_ACTION_TYPE_RSS,
4750                                         .conf = rss_act,
4751                                         },
4752                                         [1] = {
4753                                         .type = RTE_FLOW_ACTION_TYPE_END,
4754                                         .conf = NULL,
4755                                         }
4756                                 };
4757
4758                                 dev_flow.handle = &dev_handle;
4759                                 dev_flow.ingress = attr->ingress;
4760                                 dev_flow.flow = flow;
4761                                 dev_flow.external = 0;
4762 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
4763                                 dev_flow.dv.transfer = attr->transfer;
4764 #endif
4765                                 /**
4766                                  * Translate RSS action to get rss hash fields.
4767                                  */
4768                                 if (flow_drv_translate(dev, &dev_flow, attr,
4769                                                 items, rss_actions, error))
4770                                         goto exit;
4771                                 rss_desc_v[i] = wks->rss_desc;
4772                                 rss_desc_v[i].key_len = MLX5_RSS_HASH_KEY_LEN;
4773                                 rss_desc_v[i].hash_fields =
4774                                                 dev_flow.hash_fields;
4775                                 rss_desc_v[i].queue_num =
4776                                                 rss_desc_v[i].hash_fields ?
4777                                                 rss_desc_v[i].queue_num : 1;
4778                                 rss_desc_v[i].tunnel =
4779                                                 !!(dev_flow.handle->layers &
4780                                                    MLX5_FLOW_LAYER_TUNNEL);
4781                                 /* Use the RSS queues in the containers. */
4782                                 rss_desc_v[i].queue =
4783                                         (uint16_t *)(uintptr_t)rss_act->queue;
4784                                 rss_desc[i] = &rss_desc_v[i];
4785                         } else if (fate == MLX5_FLOW_FATE_QUEUE) {
4786                                 /* This is queue action. */
4787                                 rss_desc_v[i] = wks->rss_desc;
4788                                 rss_desc_v[i].key_len = 0;
4789                                 rss_desc_v[i].hash_fields = 0;
4790                                 rss_desc_v[i].queue =
4791                                         &final_policy->act_cnt[i].queue;
4792                                 rss_desc_v[i].queue_num = 1;
4793                                 rss_desc[i] = &rss_desc_v[i];
4794                         } else {
4795                                 rss_desc[i] = NULL;
4796                         }
4797                 }
4798                 sub_policy = flow_drv_meter_sub_policy_rss_prepare(dev,
4799                                                 flow, policy, rss_desc);
4800         } else {
4801                 enum mlx5_meter_domain mtr_domain =
4802                         attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
4803                                 (attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
4804                                                 MLX5_MTR_DOMAIN_INGRESS);
4805                 sub_policy = policy->sub_policys[mtr_domain][0];
4806         }
4807         if (!sub_policy)
4808                 rte_flow_error_set(error, EINVAL,
4809                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4810                                    "Failed to get meter sub-policy.");
4811 exit:
4812         return sub_policy;
4813 }
4814
4815 /**
4816  * Split the meter flow.
4817  *
4818  * As meter flow will split to three sub flow, other than meter
4819  * action, the other actions make sense to only meter accepts
4820  * the packet. If it need to be dropped, no other additional
4821  * actions should be take.
4822  *
4823  * One kind of special action which decapsulates the L3 tunnel
4824  * header will be in the prefix sub flow, as not to take the
4825  * L3 tunnel header into account.
4826  *
4827  * @param[in] dev
4828  *   Pointer to Ethernet device.
4829  * @param[in] flow
4830  *   Parent flow structure pointer.
4831  * @param wks
4832  *   Pointer to thread flow work space.
4833  * @param[in] attr
4834  *   Flow rule attributes.
4835  * @param[in] items
4836  *   Pattern specification (list terminated by the END pattern item).
4837  * @param[out] sfx_items
4838  *   Suffix flow match items (list terminated by the END pattern item).
4839  * @param[in] actions
4840  *   Associated actions (list terminated by the END action).
4841  * @param[out] actions_sfx
4842  *   Suffix flow actions.
4843  * @param[out] actions_pre
4844  *   Prefix flow actions.
4845  * @param[out] mtr_flow_id
4846  *   Pointer to meter flow id.
4847  * @param[out] error
4848  *   Perform verbose error reporting if not NULL.
4849  *
4850  * @return
4851  *   0 on success, a negative errno value otherwise and rte_errno is set.
4852  */
4853 static int
4854 flow_meter_split_prep(struct rte_eth_dev *dev,
4855                       struct rte_flow *flow,
4856                       struct mlx5_flow_workspace *wks,
4857                       const struct rte_flow_attr *attr,
4858                       const struct rte_flow_item items[],
4859                       struct rte_flow_item sfx_items[],
4860                       const struct rte_flow_action actions[],
4861                       struct rte_flow_action actions_sfx[],
4862                       struct rte_flow_action actions_pre[],
4863                       uint32_t *mtr_flow_id,
4864                       struct rte_flow_error *error)
4865 {
4866         struct mlx5_priv *priv = dev->data->dev_private;
4867         struct mlx5_flow_meter_info *fm = wks->fm;
4868         struct rte_flow_action *tag_action = NULL;
4869         struct rte_flow_item *tag_item;
4870         struct mlx5_rte_flow_action_set_tag *set_tag;
4871         const struct rte_flow_action_raw_encap *raw_encap;
4872         const struct rte_flow_action_raw_decap *raw_decap;
4873         struct mlx5_rte_flow_item_tag *tag_item_spec;
4874         struct mlx5_rte_flow_item_tag *tag_item_mask;
4875         uint32_t tag_id = 0;
4876         struct rte_flow_item *vlan_item_dst = NULL;
4877         const struct rte_flow_item *vlan_item_src = NULL;
4878         struct rte_flow_action *hw_mtr_action;
4879         struct rte_flow_action *action_pre_head = NULL;
4880         int32_t flow_src_port = priv->representor_id;
4881         bool mtr_first;
4882         uint8_t mtr_id_offset = priv->mtr_reg_share ? MLX5_MTR_COLOR_BITS : 0;
4883         uint8_t mtr_reg_bits = priv->mtr_reg_share ?
4884                                 MLX5_MTR_IDLE_BITS_IN_COLOR_REG : MLX5_REG_BITS;
4885         uint32_t flow_id = 0;
4886         uint32_t flow_id_reversed = 0;
4887         uint8_t flow_id_bits = 0;
4888         int shift;
4889
4890         /* Prepare the suffix subflow items. */
4891         tag_item = sfx_items++;
4892         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
4893                 struct mlx5_priv *port_priv;
4894                 const struct rte_flow_item_port_id *pid_v;
4895                 int item_type = items->type;
4896
4897                 switch (item_type) {
4898                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
4899                         pid_v = items->spec;
4900                         MLX5_ASSERT(pid_v);
4901                         port_priv = mlx5_port_to_eswitch_info(pid_v->id, false);
4902                         if (!port_priv)
4903                                 return rte_flow_error_set(error,
4904                                                 rte_errno,
4905                                                 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
4906                                                 pid_v,
4907                                                 "Failed to get port info.");
4908                         flow_src_port = port_priv->representor_id;
4909                         if (!fm->def_policy && wks->policy->is_hierarchy &&
4910                             flow_src_port != priv->representor_id) {
4911                                 if (flow_drv_mtr_hierarchy_rule_create(dev,
4912                                                                 flow, fm,
4913                                                                 flow_src_port,
4914                                                                 items,
4915                                                                 error))
4916                                         return -rte_errno;
4917                         }
4918                         memcpy(sfx_items, items, sizeof(*sfx_items));
4919                         sfx_items++;
4920                         break;
4921                 case RTE_FLOW_ITEM_TYPE_VLAN:
4922                         /* Determine if copy vlan item below. */
4923                         vlan_item_src = items;
4924                         vlan_item_dst = sfx_items++;
4925                         vlan_item_dst->type = RTE_FLOW_ITEM_TYPE_VOID;
4926                         break;
4927                 default:
4928                         break;
4929                 }
4930         }
4931         sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
4932         sfx_items++;
4933         mtr_first = priv->sh->meter_aso_en &&
4934                 (attr->egress || (attr->transfer && flow_src_port != UINT16_MAX));
4935         /* For ASO meter, meter must be before tag in TX direction. */
4936         if (mtr_first) {
4937                 action_pre_head = actions_pre++;
4938                 /* Leave space for tag action. */
4939                 tag_action = actions_pre++;
4940         }
4941         /* Prepare the actions for prefix and suffix flow. */
4942         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4943                 struct rte_flow_action *action_cur = NULL;
4944
4945                 switch (actions->type) {
4946                 case RTE_FLOW_ACTION_TYPE_METER:
4947                         if (mtr_first) {
4948                                 action_cur = action_pre_head;
4949                         } else {
4950                                 /* Leave space for tag action. */
4951                                 tag_action = actions_pre++;
4952                                 action_cur = actions_pre++;
4953                         }
4954                         break;
4955                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4956                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4957                         action_cur = actions_pre++;
4958                         break;
4959                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4960                         raw_encap = actions->conf;
4961                         if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
4962                                 action_cur = actions_pre++;
4963                         break;
4964                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4965                         raw_decap = actions->conf;
4966                         if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4967                                 action_cur = actions_pre++;
4968                         break;
4969                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4970                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4971                         if (vlan_item_dst && vlan_item_src) {
4972                                 memcpy(vlan_item_dst, vlan_item_src,
4973                                         sizeof(*vlan_item_dst));
4974                                 /*
4975                                  * Convert to internal match item, it is used
4976                                  * for vlan push and set vid.
4977                                  */
4978                                 vlan_item_dst->type = (enum rte_flow_item_type)
4979                                                 MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
4980                         }
4981                         break;
4982                 default:
4983                         break;
4984                 }
4985                 if (!action_cur)
4986                         action_cur = (fm->def_policy) ?
4987                                         actions_sfx++ : actions_pre++;
4988                 memcpy(action_cur, actions, sizeof(struct rte_flow_action));
4989         }
4990         /* Add end action to the actions. */
4991         actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
4992         if (priv->sh->meter_aso_en) {
4993                 /**
4994                  * For ASO meter, need to add an extra jump action explicitly,
4995                  * to jump from meter to policer table.
4996                  */
4997                 struct mlx5_flow_meter_sub_policy *sub_policy;
4998                 struct mlx5_flow_tbl_data_entry *tbl_data;
4999
5000                 if (!fm->def_policy) {
5001                         sub_policy = get_meter_sub_policy(dev, flow, wks,
5002                                                           attr, items, error);
5003                         if (!sub_policy)
5004                                 return -rte_errno;
5005                 } else {
5006                         enum mlx5_meter_domain mtr_domain =
5007                         attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5008                                 (attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5009                                                 MLX5_MTR_DOMAIN_INGRESS);
5010
5011                         sub_policy =
5012                         &priv->sh->mtrmng->def_policy[mtr_domain]->sub_policy;
5013                 }
5014                 tbl_data = container_of(sub_policy->tbl_rsc,
5015                                         struct mlx5_flow_tbl_data_entry, tbl);
5016                 hw_mtr_action = actions_pre++;
5017                 hw_mtr_action->type = (enum rte_flow_action_type)
5018                                       MLX5_RTE_FLOW_ACTION_TYPE_JUMP;
5019                 hw_mtr_action->conf = tbl_data->jump.action;
5020         }
5021         actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
5022         actions_pre++;
5023         if (!tag_action)
5024                 return rte_flow_error_set(error, ENOMEM,
5025                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5026                                           NULL, "No tag action space.");
5027         if (!mtr_flow_id) {
5028                 tag_action->type = RTE_FLOW_ACTION_TYPE_VOID;
5029                 goto exit;
5030         }
5031         /* Only default-policy Meter creates mtr flow id. */
5032         if (fm->def_policy) {
5033                 mlx5_ipool_malloc(fm->flow_ipool, &tag_id);
5034                 if (!tag_id)
5035                         return rte_flow_error_set(error, ENOMEM,
5036                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5037                                         "Failed to allocate meter flow id.");
5038                 flow_id = tag_id - 1;
5039                 flow_id_bits = (!flow_id) ? 1 :
5040                                 (MLX5_REG_BITS - __builtin_clz(flow_id));
5041                 if ((flow_id_bits + priv->sh->mtrmng->max_mtr_bits) >
5042                     mtr_reg_bits) {
5043                         mlx5_ipool_free(fm->flow_ipool, tag_id);
5044                         return rte_flow_error_set(error, EINVAL,
5045                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5046                                         "Meter flow id exceeds max limit.");
5047                 }
5048                 if (flow_id_bits > priv->sh->mtrmng->max_mtr_flow_bits)
5049                         priv->sh->mtrmng->max_mtr_flow_bits = flow_id_bits;
5050         }
5051         /* Build tag actions and items for meter_id/meter flow_id. */
5052         set_tag = (struct mlx5_rte_flow_action_set_tag *)actions_pre;
5053         tag_item_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
5054         tag_item_mask = tag_item_spec + 1;
5055         /* Both flow_id and meter_id share the same register. */
5056         *set_tag = (struct mlx5_rte_flow_action_set_tag) {
5057                 .id = (enum modify_reg)mlx5_flow_get_reg_id(dev, MLX5_MTR_ID,
5058                                                             0, error),
5059                 .offset = mtr_id_offset,
5060                 .length = mtr_reg_bits,
5061                 .data = flow->meter,
5062         };
5063         /*
5064          * The color Reg bits used by flow_id are growing from
5065          * msb to lsb, so must do bit reverse for flow_id val in RegC.
5066          */
5067         for (shift = 0; shift < flow_id_bits; shift++)
5068                 flow_id_reversed = (flow_id_reversed << 1) |
5069                                 ((flow_id >> shift) & 0x1);
5070         set_tag->data |=
5071                 flow_id_reversed << (mtr_reg_bits - flow_id_bits);
5072         tag_item_spec->id = set_tag->id;
5073         tag_item_spec->data = set_tag->data << mtr_id_offset;
5074         tag_item_mask->data = UINT32_MAX << mtr_id_offset;
5075         tag_action->type = (enum rte_flow_action_type)
5076                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5077         tag_action->conf = set_tag;
5078         tag_item->type = (enum rte_flow_item_type)
5079                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5080         tag_item->spec = tag_item_spec;
5081         tag_item->last = NULL;
5082         tag_item->mask = tag_item_mask;
5083 exit:
5084         if (mtr_flow_id)
5085                 *mtr_flow_id = tag_id;
5086         return 0;
5087 }
5088
5089 /**
5090  * Split action list having QUEUE/RSS for metadata register copy.
5091  *
5092  * Once Q/RSS action is detected in user's action list, the flow action
5093  * should be split in order to copy metadata registers, which will happen in
5094  * RX_CP_TBL like,
5095  *   - CQE->flow_tag := reg_c[1] (MARK)
5096  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
5097  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
5098  * This is because the last action of each flow must be a terminal action
5099  * (QUEUE, RSS or DROP).
5100  *
5101  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
5102  * stored and kept in the mlx5_flow structure per each sub_flow.
5103  *
5104  * The Q/RSS action is replaced with,
5105  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
5106  * And the following JUMP action is added at the end,
5107  *   - JUMP, to RX_CP_TBL.
5108  *
5109  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
5110  * flow_create_split_metadata() routine. The flow will look like,
5111  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
5112  *
5113  * @param dev
5114  *   Pointer to Ethernet device.
5115  * @param[out] split_actions
5116  *   Pointer to store split actions to jump to CP_TBL.
5117  * @param[in] actions
5118  *   Pointer to the list of original flow actions.
5119  * @param[in] qrss
5120  *   Pointer to the Q/RSS action.
5121  * @param[in] actions_n
5122  *   Number of original actions.
5123  * @param[out] error
5124  *   Perform verbose error reporting if not NULL.
5125  *
5126  * @return
5127  *   non-zero unique flow_id on success, otherwise 0 and
5128  *   error/rte_error are set.
5129  */
5130 static uint32_t
5131 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
5132                           struct rte_flow_action *split_actions,
5133                           const struct rte_flow_action *actions,
5134                           const struct rte_flow_action *qrss,
5135                           int actions_n, struct rte_flow_error *error)
5136 {
5137         struct mlx5_priv *priv = dev->data->dev_private;
5138         struct mlx5_rte_flow_action_set_tag *set_tag;
5139         struct rte_flow_action_jump *jump;
5140         const int qrss_idx = qrss - actions;
5141         uint32_t flow_id = 0;
5142         int ret = 0;
5143
5144         /*
5145          * Given actions will be split
5146          * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
5147          * - Add jump to mreg CP_TBL.
5148          * As a result, there will be one more action.
5149          */
5150         ++actions_n;
5151         memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
5152         set_tag = (void *)(split_actions + actions_n);
5153         /*
5154          * If tag action is not set to void(it means we are not the meter
5155          * suffix flow), add the tag action. Since meter suffix flow already
5156          * has the tag added.
5157          */
5158         if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) {
5159                 /*
5160                  * Allocate the new subflow ID. This one is unique within
5161                  * device and not shared with representors. Otherwise,
5162                  * we would have to resolve multi-thread access synch
5163                  * issue. Each flow on the shared device is appended
5164                  * with source vport identifier, so the resulting
5165                  * flows will be unique in the shared (by master and
5166                  * representors) domain even if they have coinciding
5167                  * IDs.
5168                  */
5169                 mlx5_ipool_malloc(priv->sh->ipool
5170                                   [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id);
5171                 if (!flow_id)
5172                         return rte_flow_error_set(error, ENOMEM,
5173                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5174                                                   NULL, "can't allocate id "
5175                                                   "for split Q/RSS subflow");
5176                 /* Internal SET_TAG action to set flow ID. */
5177                 *set_tag = (struct mlx5_rte_flow_action_set_tag){
5178                         .data = flow_id,
5179                 };
5180                 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
5181                 if (ret < 0)
5182                         return ret;
5183                 set_tag->id = ret;
5184                 /* Construct new actions array. */
5185                 /* Replace QUEUE/RSS action. */
5186                 split_actions[qrss_idx] = (struct rte_flow_action){
5187                         .type = (enum rte_flow_action_type)
5188                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5189                         .conf = set_tag,
5190                 };
5191         }
5192         /* JUMP action to jump to mreg copy table (CP_TBL). */
5193         jump = (void *)(set_tag + 1);
5194         *jump = (struct rte_flow_action_jump){
5195                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
5196         };
5197         split_actions[actions_n - 2] = (struct rte_flow_action){
5198                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
5199                 .conf = jump,
5200         };
5201         split_actions[actions_n - 1] = (struct rte_flow_action){
5202                 .type = RTE_FLOW_ACTION_TYPE_END,
5203         };
5204         return flow_id;
5205 }
5206
5207 /**
5208  * Extend the given action list for Tx metadata copy.
5209  *
5210  * Copy the given action list to the ext_actions and add flow metadata register
5211  * copy action in order to copy reg_a set by WQE to reg_c[0].
5212  *
5213  * @param[out] ext_actions
5214  *   Pointer to the extended action list.
5215  * @param[in] actions
5216  *   Pointer to the list of actions.
5217  * @param[in] actions_n
5218  *   Number of actions in the list.
5219  * @param[out] error
5220  *   Perform verbose error reporting if not NULL.
5221  * @param[in] encap_idx
5222  *   The encap action inndex.
5223  *
5224  * @return
5225  *   0 on success, negative value otherwise
5226  */
5227 static int
5228 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
5229                        struct rte_flow_action *ext_actions,
5230                        const struct rte_flow_action *actions,
5231                        int actions_n, struct rte_flow_error *error,
5232                        int encap_idx)
5233 {
5234         struct mlx5_flow_action_copy_mreg *cp_mreg =
5235                 (struct mlx5_flow_action_copy_mreg *)
5236                         (ext_actions + actions_n + 1);
5237         int ret;
5238
5239         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
5240         if (ret < 0)
5241                 return ret;
5242         cp_mreg->dst = ret;
5243         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
5244         if (ret < 0)
5245                 return ret;
5246         cp_mreg->src = ret;
5247         if (encap_idx != 0)
5248                 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
5249         if (encap_idx == actions_n - 1) {
5250                 ext_actions[actions_n - 1] = (struct rte_flow_action){
5251                         .type = (enum rte_flow_action_type)
5252                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5253                         .conf = cp_mreg,
5254                 };
5255                 ext_actions[actions_n] = (struct rte_flow_action){
5256                         .type = RTE_FLOW_ACTION_TYPE_END,
5257                 };
5258         } else {
5259                 ext_actions[encap_idx] = (struct rte_flow_action){
5260                         .type = (enum rte_flow_action_type)
5261                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5262                         .conf = cp_mreg,
5263                 };
5264                 memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
5265                                 sizeof(*ext_actions) * (actions_n - encap_idx));
5266         }
5267         return 0;
5268 }
5269
5270 /**
5271  * Check the match action from the action list.
5272  *
5273  * @param[in] actions
5274  *   Pointer to the list of actions.
5275  * @param[in] attr
5276  *   Flow rule attributes.
5277  * @param[in] action
5278  *   The action to be check if exist.
5279  * @param[out] match_action_pos
5280  *   Pointer to the position of the matched action if exists, otherwise is -1.
5281  * @param[out] qrss_action_pos
5282  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
5283  * @param[out] modify_after_mirror
5284  *   Pointer to the flag of modify action after FDB mirroring.
5285  *
5286  * @return
5287  *   > 0 the total number of actions.
5288  *   0 if not found match action in action list.
5289  */
5290 static int
5291 flow_check_match_action(const struct rte_flow_action actions[],
5292                         const struct rte_flow_attr *attr,
5293                         enum rte_flow_action_type action,
5294                         int *match_action_pos, int *qrss_action_pos,
5295                         int *modify_after_mirror)
5296 {
5297         const struct rte_flow_action_sample *sample;
5298         const struct rte_flow_action_raw_decap *decap;
5299         int actions_n = 0;
5300         uint32_t ratio = 0;
5301         int sub_type = 0;
5302         int flag = 0;
5303         int fdb_mirror = 0;
5304
5305         *match_action_pos = -1;
5306         *qrss_action_pos = -1;
5307         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5308                 if (actions->type == action) {
5309                         flag = 1;
5310                         *match_action_pos = actions_n;
5311                 }
5312                 switch (actions->type) {
5313                 case RTE_FLOW_ACTION_TYPE_QUEUE:
5314                 case RTE_FLOW_ACTION_TYPE_RSS:
5315                         *qrss_action_pos = actions_n;
5316                         break;
5317                 case RTE_FLOW_ACTION_TYPE_SAMPLE:
5318                         sample = actions->conf;
5319                         ratio = sample->ratio;
5320                         sub_type = ((const struct rte_flow_action *)
5321                                         (sample->actions))->type;
5322                         if (ratio == 1 && attr->transfer)
5323                                 fdb_mirror = 1;
5324                         break;
5325                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
5326                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
5327                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
5328                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
5329                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
5330                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
5331                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
5332                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
5333                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
5334                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
5335                 case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
5336                 case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
5337                 case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
5338                 case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
5339                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
5340                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
5341                 case RTE_FLOW_ACTION_TYPE_FLAG:
5342                 case RTE_FLOW_ACTION_TYPE_MARK:
5343                 case RTE_FLOW_ACTION_TYPE_SET_META:
5344                 case RTE_FLOW_ACTION_TYPE_SET_TAG:
5345                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
5346                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5347                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5348                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
5349                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5350                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5351                 case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
5352                 case RTE_FLOW_ACTION_TYPE_METER:
5353                         if (fdb_mirror)
5354                                 *modify_after_mirror = 1;
5355                         break;
5356                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5357                         decap = actions->conf;
5358                         while ((++actions)->type == RTE_FLOW_ACTION_TYPE_VOID)
5359                                 ;
5360                         actions_n++;
5361                         if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
5362                                 const struct rte_flow_action_raw_encap *encap =
5363                                                                 actions->conf;
5364                                 if (decap->size <=
5365                                         MLX5_ENCAPSULATION_DECISION_SIZE &&
5366                                     encap->size >
5367                                         MLX5_ENCAPSULATION_DECISION_SIZE)
5368                                         /* L3 encap. */
5369                                         break;
5370                         }
5371                         if (fdb_mirror)
5372                                 *modify_after_mirror = 1;
5373                         break;
5374                 default:
5375                         break;
5376                 }
5377                 actions_n++;
5378         }
5379         if (flag && fdb_mirror && !*modify_after_mirror) {
5380                 /* FDB mirroring uses the destination array to implement
5381                  * instead of FLOW_SAMPLER object.
5382                  */
5383                 if (sub_type != RTE_FLOW_ACTION_TYPE_END)
5384                         flag = 0;
5385         }
5386         /* Count RTE_FLOW_ACTION_TYPE_END. */
5387         return flag ? actions_n + 1 : 0;
5388 }
5389
5390 #define SAMPLE_SUFFIX_ITEM 2
5391
5392 /**
5393  * Split the sample flow.
5394  *
5395  * As sample flow will split to two sub flow, sample flow with
5396  * sample action, the other actions will move to new suffix flow.
5397  *
5398  * Also add unique tag id with tag action in the sample flow,
5399  * the same tag id will be as match in the suffix flow.
5400  *
5401  * @param dev
5402  *   Pointer to Ethernet device.
5403  * @param[in] add_tag
5404  *   Add extra tag action flag.
5405  * @param[out] sfx_items
5406  *   Suffix flow match items (list terminated by the END pattern item).
5407  * @param[in] actions
5408  *   Associated actions (list terminated by the END action).
5409  * @param[out] actions_sfx
5410  *   Suffix flow actions.
5411  * @param[out] actions_pre
5412  *   Prefix flow actions.
5413  * @param[in] actions_n
5414  *  The total number of actions.
5415  * @param[in] sample_action_pos
5416  *   The sample action position.
5417  * @param[in] qrss_action_pos
5418  *   The Queue/RSS action position.
5419  * @param[in] jump_table
5420  *   Add extra jump action flag.
5421  * @param[out] error
5422  *   Perform verbose error reporting if not NULL.
5423  *
5424  * @return
5425  *   0 on success, or unique flow_id, a negative errno value
5426  *   otherwise and rte_errno is set.
5427  */
5428 static int
5429 flow_sample_split_prep(struct rte_eth_dev *dev,
5430                        int add_tag,
5431                        struct rte_flow_item sfx_items[],
5432                        const struct rte_flow_action actions[],
5433                        struct rte_flow_action actions_sfx[],
5434                        struct rte_flow_action actions_pre[],
5435                        int actions_n,
5436                        int sample_action_pos,
5437                        int qrss_action_pos,
5438                        int jump_table,
5439                        struct rte_flow_error *error)
5440 {
5441         struct mlx5_priv *priv = dev->data->dev_private;
5442         struct mlx5_rte_flow_action_set_tag *set_tag;
5443         struct mlx5_rte_flow_item_tag *tag_spec;
5444         struct mlx5_rte_flow_item_tag *tag_mask;
5445         struct rte_flow_action_jump *jump_action;
5446         uint32_t tag_id = 0;
5447         int index;
5448         int append_index = 0;
5449         int ret;
5450
5451         if (sample_action_pos < 0)
5452                 return rte_flow_error_set(error, EINVAL,
5453                                           RTE_FLOW_ERROR_TYPE_ACTION,
5454                                           NULL, "invalid position of sample "
5455                                           "action in list");
5456         /* Prepare the actions for prefix and suffix flow. */
5457         if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
5458                 index = qrss_action_pos;
5459                 /* Put the preceding the Queue/RSS action into prefix flow. */
5460                 if (index != 0)
5461                         memcpy(actions_pre, actions,
5462                                sizeof(struct rte_flow_action) * index);
5463                 /* Put others preceding the sample action into prefix flow. */
5464                 if (sample_action_pos > index + 1)
5465                         memcpy(actions_pre + index, actions + index + 1,
5466                                sizeof(struct rte_flow_action) *
5467                                (sample_action_pos - index - 1));
5468                 index = sample_action_pos - 1;
5469                 /* Put Queue/RSS action into Suffix flow. */
5470                 memcpy(actions_sfx, actions + qrss_action_pos,
5471                        sizeof(struct rte_flow_action));
5472                 actions_sfx++;
5473         } else {
5474                 index = sample_action_pos;
5475                 if (index != 0)
5476                         memcpy(actions_pre, actions,
5477                                sizeof(struct rte_flow_action) * index);
5478         }
5479         /* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
5480          * For CX6DX and above, metadata registers Cx preserve their value,
5481          * add an extra tag action for NIC-RX and E-Switch Domain.
5482          */
5483         if (add_tag) {
5484                 /* Prepare the prefix tag action. */
5485                 append_index++;
5486                 set_tag = (void *)(actions_pre + actions_n + append_index);
5487                 ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
5488                 if (ret < 0)
5489                         return ret;
5490                 mlx5_ipool_malloc(priv->sh->ipool
5491                                   [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id);
5492                 *set_tag = (struct mlx5_rte_flow_action_set_tag) {
5493                         .id = ret,
5494                         .data = tag_id,
5495                 };
5496                 /* Prepare the suffix subflow items. */
5497                 tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
5498                 tag_spec->data = tag_id;
5499                 tag_spec->id = set_tag->id;
5500                 tag_mask = tag_spec + 1;
5501                 tag_mask->data = UINT32_MAX;
5502                 sfx_items[0] = (struct rte_flow_item){
5503                         .type = (enum rte_flow_item_type)
5504                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
5505                         .spec = tag_spec,
5506                         .last = NULL,
5507                         .mask = tag_mask,
5508                 };
5509                 sfx_items[1] = (struct rte_flow_item){
5510                         .type = (enum rte_flow_item_type)
5511                                 RTE_FLOW_ITEM_TYPE_END,
5512                 };
5513                 /* Prepare the tag action in prefix subflow. */
5514                 actions_pre[index++] =
5515                         (struct rte_flow_action){
5516                         .type = (enum rte_flow_action_type)
5517                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5518                         .conf = set_tag,
5519                 };
5520         }
5521         memcpy(actions_pre + index, actions + sample_action_pos,
5522                sizeof(struct rte_flow_action));
5523         index += 1;
5524         /* For the modify action after the sample action in E-Switch mirroring,
5525          * Add the extra jump action in prefix subflow and jump into the next
5526          * table, then do the modify action in the new table.
5527          */
5528         if (jump_table) {
5529                 /* Prepare the prefix jump action. */
5530                 append_index++;
5531                 jump_action = (void *)(actions_pre + actions_n + append_index);
5532                 jump_action->group = jump_table;
5533                 actions_pre[index++] =
5534                         (struct rte_flow_action){
5535                         .type = (enum rte_flow_action_type)
5536                                 RTE_FLOW_ACTION_TYPE_JUMP,
5537                         .conf = jump_action,
5538                 };
5539         }
5540         actions_pre[index] = (struct rte_flow_action){
5541                 .type = (enum rte_flow_action_type)
5542                         RTE_FLOW_ACTION_TYPE_END,
5543         };
5544         /* Put the actions after sample into Suffix flow. */
5545         memcpy(actions_sfx, actions + sample_action_pos + 1,
5546                sizeof(struct rte_flow_action) *
5547                (actions_n - sample_action_pos - 1));
5548         return tag_id;
5549 }
5550
5551 /**
5552  * The splitting for metadata feature.
5553  *
5554  * - Q/RSS action on NIC Rx should be split in order to pass by
5555  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
5556  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
5557  *
5558  * - All the actions on NIC Tx should have a mreg copy action to
5559  *   copy reg_a from WQE to reg_c[0].
5560  *
5561  * @param dev
5562  *   Pointer to Ethernet device.
5563  * @param[in] flow
5564  *   Parent flow structure pointer.
5565  * @param[in] attr
5566  *   Flow rule attributes.
5567  * @param[in] items
5568  *   Pattern specification (list terminated by the END pattern item).
5569  * @param[in] actions
5570  *   Associated actions (list terminated by the END action).
5571  * @param[in] flow_split_info
5572  *   Pointer to flow split info structure.
5573  * @param[out] error
5574  *   Perform verbose error reporting if not NULL.
5575  * @return
5576  *   0 on success, negative value otherwise
5577  */
5578 static int
5579 flow_create_split_metadata(struct rte_eth_dev *dev,
5580                            struct rte_flow *flow,
5581                            const struct rte_flow_attr *attr,
5582                            const struct rte_flow_item items[],
5583                            const struct rte_flow_action actions[],
5584                            struct mlx5_flow_split_info *flow_split_info,
5585                            struct rte_flow_error *error)
5586 {
5587         struct mlx5_priv *priv = dev->data->dev_private;
5588         struct mlx5_dev_config *config = &priv->config;
5589         const struct rte_flow_action *qrss = NULL;
5590         struct rte_flow_action *ext_actions = NULL;
5591         struct mlx5_flow *dev_flow = NULL;
5592         uint32_t qrss_id = 0;
5593         int mtr_sfx = 0;
5594         size_t act_size;
5595         int actions_n;
5596         int encap_idx;
5597         int ret;
5598
5599         /* Check whether extensive metadata feature is engaged. */
5600         if (!config->dv_flow_en ||
5601             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
5602             !mlx5_flow_ext_mreg_supported(dev))
5603                 return flow_create_split_inner(dev, flow, NULL, attr, items,
5604                                                actions, flow_split_info, error);
5605         actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
5606                                                            &encap_idx);
5607         if (qrss) {
5608                 /* Exclude hairpin flows from splitting. */
5609                 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
5610                         const struct rte_flow_action_queue *queue;
5611
5612                         queue = qrss->conf;
5613                         if (mlx5_rxq_get_type(dev, queue->index) ==
5614                             MLX5_RXQ_TYPE_HAIRPIN)
5615                                 qrss = NULL;
5616                 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
5617                         const struct rte_flow_action_rss *rss;
5618
5619                         rss = qrss->conf;
5620                         if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
5621                             MLX5_RXQ_TYPE_HAIRPIN)
5622                                 qrss = NULL;
5623                 }
5624         }
5625         if (qrss) {
5626                 /* Check if it is in meter suffix table. */
5627                 mtr_sfx = attr->group == (attr->transfer ?
5628                           (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
5629                           MLX5_FLOW_TABLE_LEVEL_METER);
5630                 /*
5631                  * Q/RSS action on NIC Rx should be split in order to pass by
5632                  * the mreg copy table (RX_CP_TBL) and then it jumps to the
5633                  * action table (RX_ACT_TBL) which has the split Q/RSS action.
5634                  */
5635                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
5636                            sizeof(struct rte_flow_action_set_tag) +
5637                            sizeof(struct rte_flow_action_jump);
5638                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
5639                                           SOCKET_ID_ANY);
5640                 if (!ext_actions)
5641                         return rte_flow_error_set(error, ENOMEM,
5642                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5643                                                   NULL, "no memory to split "
5644                                                   "metadata flow");
5645                 /*
5646                  * If we are the suffix flow of meter, tag already exist.
5647                  * Set the tag action to void.
5648                  */
5649                 if (mtr_sfx)
5650                         ext_actions[qrss - actions].type =
5651                                                 RTE_FLOW_ACTION_TYPE_VOID;
5652                 else
5653                         ext_actions[qrss - actions].type =
5654                                                 (enum rte_flow_action_type)
5655                                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5656                 /*
5657                  * Create the new actions list with removed Q/RSS action
5658                  * and appended set tag and jump to register copy table
5659                  * (RX_CP_TBL). We should preallocate unique tag ID here
5660                  * in advance, because it is needed for set tag action.
5661                  */
5662                 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
5663                                                     qrss, actions_n, error);
5664                 if (!mtr_sfx && !qrss_id) {
5665                         ret = -rte_errno;
5666                         goto exit;
5667                 }
5668         } else if (attr->egress && !attr->transfer) {
5669                 /*
5670                  * All the actions on NIC Tx should have a metadata register
5671                  * copy action to copy reg_a from WQE to reg_c[meta]
5672                  */
5673                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
5674                            sizeof(struct mlx5_flow_action_copy_mreg);
5675                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
5676                                           SOCKET_ID_ANY);
5677                 if (!ext_actions)
5678                         return rte_flow_error_set(error, ENOMEM,
5679                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5680                                                   NULL, "no memory to split "
5681                                                   "metadata flow");
5682                 /* Create the action list appended with copy register. */
5683                 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
5684                                              actions_n, error, encap_idx);
5685                 if (ret < 0)
5686                         goto exit;
5687         }
5688         /* Add the unmodified original or prefix subflow. */
5689         ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
5690                                       items, ext_actions ? ext_actions :
5691                                       actions, flow_split_info, error);
5692         if (ret < 0)
5693                 goto exit;
5694         MLX5_ASSERT(dev_flow);
5695         if (qrss) {
5696                 const struct rte_flow_attr q_attr = {
5697                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
5698                         .ingress = 1,
5699                 };
5700                 /* Internal PMD action to set register. */
5701                 struct mlx5_rte_flow_item_tag q_tag_spec = {
5702                         .data = qrss_id,
5703                         .id = REG_NON,
5704                 };
5705                 struct rte_flow_item q_items[] = {
5706                         {
5707                                 .type = (enum rte_flow_item_type)
5708                                         MLX5_RTE_FLOW_ITEM_TYPE_TAG,
5709                                 .spec = &q_tag_spec,
5710                                 .last = NULL,
5711                                 .mask = NULL,
5712                         },
5713                         {
5714                                 .type = RTE_FLOW_ITEM_TYPE_END,
5715                         },
5716                 };
5717                 struct rte_flow_action q_actions[] = {
5718                         {
5719                                 .type = qrss->type,
5720                                 .conf = qrss->conf,
5721                         },
5722                         {
5723                                 .type = RTE_FLOW_ACTION_TYPE_END,
5724                         },
5725                 };
5726                 uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
5727
5728                 /*
5729                  * Configure the tag item only if there is no meter subflow.
5730                  * Since tag is already marked in the meter suffix subflow
5731                  * we can just use the meter suffix items as is.
5732                  */
5733                 if (qrss_id) {
5734                         /* Not meter subflow. */
5735                         MLX5_ASSERT(!mtr_sfx);
5736                         /*
5737                          * Put unique id in prefix flow due to it is destroyed
5738                          * after suffix flow and id will be freed after there
5739                          * is no actual flows with this id and identifier
5740                          * reallocation becomes possible (for example, for
5741                          * other flows in other threads).
5742                          */
5743                         dev_flow->handle->split_flow_id = qrss_id;
5744                         ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
5745                                                    error);
5746                         if (ret < 0)
5747                                 goto exit;
5748                         q_tag_spec.id = ret;
5749                 }
5750                 dev_flow = NULL;
5751                 /* Add suffix subflow to execute Q/RSS. */
5752                 flow_split_info->prefix_layers = layers;
5753                 flow_split_info->prefix_mark = 0;
5754                 ret = flow_create_split_inner(dev, flow, &dev_flow,
5755                                               &q_attr, mtr_sfx ? items :
5756                                               q_items, q_actions,
5757                                               flow_split_info, error);
5758                 if (ret < 0)
5759                         goto exit;
5760                 /* qrss ID should be freed if failed. */
5761                 qrss_id = 0;
5762                 MLX5_ASSERT(dev_flow);
5763         }
5764
5765 exit:
5766         /*
5767          * We do not destroy the partially created sub_flows in case of error.
5768          * These ones are included into parent flow list and will be destroyed
5769          * by flow_drv_destroy.
5770          */
5771         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
5772                         qrss_id);
5773         mlx5_free(ext_actions);
5774         return ret;
5775 }
5776
5777 /**
5778  * Create meter internal drop flow with the original pattern.
5779  *
5780  * @param dev
5781  *   Pointer to Ethernet device.
5782  * @param[in] flow
5783  *   Parent flow structure pointer.
5784  * @param[in] attr
5785  *   Flow rule attributes.
5786  * @param[in] items
5787  *   Pattern specification (list terminated by the END pattern item).
5788  * @param[in] flow_split_info
5789  *   Pointer to flow split info structure.
5790  * @param[in] fm
5791  *   Pointer to flow meter structure.
5792  * @param[out] error
5793  *   Perform verbose error reporting if not NULL.
5794  * @return
5795  *   0 on success, negative value otherwise
5796  */
5797 static uint32_t
5798 flow_meter_create_drop_flow_with_org_pattern(struct rte_eth_dev *dev,
5799                         struct rte_flow *flow,
5800                         const struct rte_flow_attr *attr,
5801                         const struct rte_flow_item items[],
5802                         struct mlx5_flow_split_info *flow_split_info,
5803                         struct mlx5_flow_meter_info *fm,
5804                         struct rte_flow_error *error)
5805 {
5806         struct mlx5_flow *dev_flow = NULL;
5807         struct rte_flow_attr drop_attr = *attr;
5808         struct rte_flow_action drop_actions[3];
5809         struct mlx5_flow_split_info drop_split_info = *flow_split_info;
5810
5811         MLX5_ASSERT(fm->drop_cnt);
5812         drop_actions[0].type =
5813                 (enum rte_flow_action_type)MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
5814         drop_actions[0].conf = (void *)(uintptr_t)fm->drop_cnt;
5815         drop_actions[1].type = RTE_FLOW_ACTION_TYPE_DROP;
5816         drop_actions[1].conf = NULL;
5817         drop_actions[2].type = RTE_FLOW_ACTION_TYPE_END;
5818         drop_actions[2].conf = NULL;
5819         drop_split_info.external = false;
5820         drop_split_info.skip_scale |= 1 << MLX5_SCALE_FLOW_GROUP_BIT;
5821         drop_split_info.table_id = MLX5_MTR_TABLE_ID_DROP;
5822         drop_attr.group = MLX5_FLOW_TABLE_LEVEL_METER;
5823         return flow_create_split_inner(dev, flow, &dev_flow,
5824                                 &drop_attr, items, drop_actions,
5825                                 &drop_split_info, error);
5826 }
5827
5828 /**
5829  * The splitting for meter feature.
5830  *
5831  * - The meter flow will be split to two flows as prefix and
5832  *   suffix flow. The packets make sense only it pass the prefix
5833  *   meter action.
5834  *
5835  * - Reg_C_5 is used for the packet to match betweend prefix and
5836  *   suffix flow.
5837  *
5838  * @param dev
5839  *   Pointer to Ethernet device.
5840  * @param[in] flow
5841  *   Parent flow structure pointer.
5842  * @param[in] attr
5843  *   Flow rule attributes.
5844  * @param[in] items
5845  *   Pattern specification (list terminated by the END pattern item).
5846  * @param[in] actions
5847  *   Associated actions (list terminated by the END action).
5848  * @param[in] flow_split_info
5849  *   Pointer to flow split info structure.
5850  * @param[out] error
5851  *   Perform verbose error reporting if not NULL.
5852  * @return
5853  *   0 on success, negative value otherwise
5854  */
5855 static int
5856 flow_create_split_meter(struct rte_eth_dev *dev,
5857                         struct rte_flow *flow,
5858                         const struct rte_flow_attr *attr,
5859                         const struct rte_flow_item items[],
5860                         const struct rte_flow_action actions[],
5861                         struct mlx5_flow_split_info *flow_split_info,
5862                         struct rte_flow_error *error)
5863 {
5864         struct mlx5_priv *priv = dev->data->dev_private;
5865         struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
5866         struct rte_flow_action *sfx_actions = NULL;
5867         struct rte_flow_action *pre_actions = NULL;
5868         struct rte_flow_item *sfx_items = NULL;
5869         struct mlx5_flow *dev_flow = NULL;
5870         struct rte_flow_attr sfx_attr = *attr;
5871         struct mlx5_flow_meter_info *fm = NULL;
5872         uint8_t skip_scale_restore;
5873         bool has_mtr = false;
5874         bool has_modify = false;
5875         bool set_mtr_reg = true;
5876         bool is_mtr_hierarchy = false;
5877         uint32_t meter_id = 0;
5878         uint32_t mtr_idx = 0;
5879         uint32_t mtr_flow_id = 0;
5880         size_t act_size;
5881         size_t item_size;
5882         int actions_n = 0;
5883         int ret = 0;
5884
5885         if (priv->mtr_en)
5886                 actions_n = flow_check_meter_action(dev, actions, &has_mtr,
5887                                                     &has_modify, &meter_id);
5888         if (has_mtr) {
5889                 if (flow->meter) {
5890                         fm = flow_dv_meter_find_by_idx(priv, flow->meter);
5891                         if (!fm)
5892                                 return rte_flow_error_set(error, EINVAL,
5893                                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5894                                                 NULL, "Meter not found.");
5895                 } else {
5896                         fm = mlx5_flow_meter_find(priv, meter_id, &mtr_idx);
5897                         if (!fm)
5898                                 return rte_flow_error_set(error, EINVAL,
5899                                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5900                                                 NULL, "Meter not found.");
5901                         ret = mlx5_flow_meter_attach(priv, fm,
5902                                                      &sfx_attr, error);
5903                         if (ret)
5904                                 return -rte_errno;
5905                         flow->meter = mtr_idx;
5906                 }
5907                 MLX5_ASSERT(wks);
5908                 wks->fm = fm;
5909                 if (!fm->def_policy) {
5910                         wks->policy = mlx5_flow_meter_policy_find(dev,
5911                                                                   fm->policy_id,
5912                                                                   NULL);
5913                         MLX5_ASSERT(wks->policy);
5914                         if (wks->policy->is_hierarchy) {
5915                                 wks->final_policy =
5916                                 mlx5_flow_meter_hierarchy_get_final_policy(dev,
5917                                                                 wks->policy);
5918                                 if (!wks->final_policy)
5919                                         return rte_flow_error_set(error,
5920                                         EINVAL,
5921                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
5922                                 "Failed to find terminal policy of hierarchy.");
5923                                 is_mtr_hierarchy = true;
5924                         }
5925                 }
5926                 /*
5927                  * If it isn't default-policy Meter, and
5928                  * 1. There's no action in flow to change
5929                  *    packet (modify/encap/decap etc.), OR
5930                  * 2. No drop count needed for this meter.
5931                  * 3. It's not meter hierarchy.
5932                  * Then no need to use regC to save meter id anymore.
5933                  */
5934                 if (!fm->def_policy && !is_mtr_hierarchy &&
5935                     (!has_modify || !fm->drop_cnt))
5936                         set_mtr_reg = false;
5937                 /* Prefix actions: meter, decap, encap, tag, jump, end. */
5938                 act_size = sizeof(struct rte_flow_action) * (actions_n + 6) +
5939                            sizeof(struct mlx5_rte_flow_action_set_tag);
5940                 /* Suffix items: tag, vlan, port id, end. */
5941 #define METER_SUFFIX_ITEM 4
5942                 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
5943                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
5944                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
5945                                           0, SOCKET_ID_ANY);
5946                 if (!sfx_actions)
5947                         return rte_flow_error_set(error, ENOMEM,
5948                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5949                                                   NULL, "no memory to split "
5950                                                   "meter flow");
5951                 sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
5952                              act_size);
5953                 /* There's no suffix flow for meter of non-default policy. */
5954                 if (!fm->def_policy)
5955                         pre_actions = sfx_actions + 1;
5956                 else
5957                         pre_actions = sfx_actions + actions_n;
5958                 ret = flow_meter_split_prep(dev, flow, wks, &sfx_attr,
5959                                             items, sfx_items, actions,
5960                                             sfx_actions, pre_actions,
5961                                             (set_mtr_reg ? &mtr_flow_id : NULL),
5962                                             error);
5963                 if (ret) {
5964                         ret = -rte_errno;
5965                         goto exit;
5966                 }
5967                 /* Add the prefix subflow. */
5968                 flow_split_info->prefix_mark = 0;
5969                 skip_scale_restore = flow_split_info->skip_scale;
5970                 flow_split_info->skip_scale |=
5971                         1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
5972                 ret = flow_create_split_inner(dev, flow, &dev_flow,
5973                                               attr, items, pre_actions,
5974                                               flow_split_info, error);
5975                 flow_split_info->skip_scale = skip_scale_restore;
5976                 if (ret) {
5977                         if (mtr_flow_id)
5978                                 mlx5_ipool_free(fm->flow_ipool, mtr_flow_id);
5979                         ret = -rte_errno;
5980                         goto exit;
5981                 }
5982                 if (mtr_flow_id) {
5983                         dev_flow->handle->split_flow_id = mtr_flow_id;
5984                         dev_flow->handle->is_meter_flow_id = 1;
5985                 }
5986                 if (!fm->def_policy) {
5987                         if (!set_mtr_reg && fm->drop_cnt)
5988                                 ret =
5989                         flow_meter_create_drop_flow_with_org_pattern(dev, flow,
5990                                                         &sfx_attr, items,
5991                                                         flow_split_info,
5992                                                         fm, error);
5993                         goto exit;
5994                 }
5995                 /* Setting the sfx group atrr. */
5996                 sfx_attr.group = sfx_attr.transfer ?
5997                                 (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
5998                                  MLX5_FLOW_TABLE_LEVEL_METER;
5999                 flow_split_info->prefix_layers =
6000                                 flow_get_prefix_layer_flags(dev_flow);
6001                 flow_split_info->prefix_mark = dev_flow->handle->mark;
6002                 flow_split_info->table_id = MLX5_MTR_TABLE_ID_SUFFIX;
6003         }
6004         /* Add the prefix subflow. */
6005         ret = flow_create_split_metadata(dev, flow,
6006                                          &sfx_attr, sfx_items ?
6007                                          sfx_items : items,
6008                                          sfx_actions ? sfx_actions : actions,
6009                                          flow_split_info, error);
6010 exit:
6011         if (sfx_actions)
6012                 mlx5_free(sfx_actions);
6013         return ret;
6014 }
6015
6016 /**
6017  * The splitting for sample feature.
6018  *
6019  * Once Sample action is detected in the action list, the flow actions should
6020  * be split into prefix sub flow and suffix sub flow.
6021  *
6022  * The original items remain in the prefix sub flow, all actions preceding the
6023  * sample action and the sample action itself will be copied to the prefix
6024  * sub flow, the actions following the sample action will be copied to the
6025  * suffix sub flow, Queue action always be located in the suffix sub flow.
6026  *
6027  * In order to make the packet from prefix sub flow matches with suffix sub
6028  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
6029  * flow uses tag item with the unique flow id.
6030  *
6031  * @param dev
6032  *   Pointer to Ethernet device.
6033  * @param[in] flow
6034  *   Parent flow structure pointer.
6035  * @param[in] attr
6036  *   Flow rule attributes.
6037  * @param[in] items
6038  *   Pattern specification (list terminated by the END pattern item).
6039  * @param[in] actions
6040  *   Associated actions (list terminated by the END action).
6041  * @param[in] flow_split_info
6042  *   Pointer to flow split info structure.
6043  * @param[out] error
6044  *   Perform verbose error reporting if not NULL.
6045  * @return
6046  *   0 on success, negative value otherwise
6047  */
6048 static int
6049 flow_create_split_sample(struct rte_eth_dev *dev,
6050                          struct rte_flow *flow,
6051                          const struct rte_flow_attr *attr,
6052                          const struct rte_flow_item items[],
6053                          const struct rte_flow_action actions[],
6054                          struct mlx5_flow_split_info *flow_split_info,
6055                          struct rte_flow_error *error)
6056 {
6057         struct mlx5_priv *priv = dev->data->dev_private;
6058         struct rte_flow_action *sfx_actions = NULL;
6059         struct rte_flow_action *pre_actions = NULL;
6060         struct rte_flow_item *sfx_items = NULL;
6061         struct mlx5_flow *dev_flow = NULL;
6062         struct rte_flow_attr sfx_attr = *attr;
6063 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6064         struct mlx5_flow_dv_sample_resource *sample_res;
6065         struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
6066         struct mlx5_flow_tbl_resource *sfx_tbl;
6067 #endif
6068         size_t act_size;
6069         size_t item_size;
6070         uint32_t fdb_tx = 0;
6071         int32_t tag_id = 0;
6072         int actions_n = 0;
6073         int sample_action_pos;
6074         int qrss_action_pos;
6075         int add_tag = 0;
6076         int modify_after_mirror = 0;
6077         uint16_t jump_table = 0;
6078         const uint32_t next_ft_step = 1;
6079         int ret = 0;
6080
6081         if (priv->sampler_en)
6082                 actions_n = flow_check_match_action(actions, attr,
6083                                         RTE_FLOW_ACTION_TYPE_SAMPLE,
6084                                         &sample_action_pos, &qrss_action_pos,
6085                                         &modify_after_mirror);
6086         if (actions_n) {
6087                 /* The prefix actions must includes sample, tag, end. */
6088                 act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
6089                            + sizeof(struct mlx5_rte_flow_action_set_tag);
6090                 item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
6091                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
6092                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
6093                                           item_size), 0, SOCKET_ID_ANY);
6094                 if (!sfx_actions)
6095                         return rte_flow_error_set(error, ENOMEM,
6096                                                   RTE_FLOW_ERROR_TYPE_ACTION,
6097                                                   NULL, "no memory to split "
6098                                                   "sample flow");
6099                 /* The representor_id is UINT16_MAX for uplink. */
6100                 fdb_tx = (attr->transfer && priv->representor_id != UINT16_MAX);
6101                 /*
6102                  * When reg_c_preserve is set, metadata registers Cx preserve
6103                  * their value even through packet duplication.
6104                  */
6105                 add_tag = (!fdb_tx || priv->config.hca_attr.reg_c_preserve);
6106                 if (add_tag)
6107                         sfx_items = (struct rte_flow_item *)((char *)sfx_actions
6108                                         + act_size);
6109                 if (modify_after_mirror)
6110                         jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
6111                                      next_ft_step;
6112                 pre_actions = sfx_actions + actions_n;
6113                 tag_id = flow_sample_split_prep(dev, add_tag, sfx_items,
6114                                                 actions, sfx_actions,
6115                                                 pre_actions, actions_n,
6116                                                 sample_action_pos,
6117                                                 qrss_action_pos, jump_table,
6118                                                 error);
6119                 if (tag_id < 0 || (add_tag && !tag_id)) {
6120                         ret = -rte_errno;
6121                         goto exit;
6122                 }
6123                 if (modify_after_mirror)
6124                         flow_split_info->skip_scale =
6125                                         1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6126                 /* Add the prefix subflow. */
6127                 ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6128                                               items, pre_actions,
6129                                               flow_split_info, error);
6130                 if (ret) {
6131                         ret = -rte_errno;
6132                         goto exit;
6133                 }
6134                 dev_flow->handle->split_flow_id = tag_id;
6135 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6136                 if (!modify_after_mirror) {
6137                         /* Set the sfx group attr. */
6138                         sample_res = (struct mlx5_flow_dv_sample_resource *)
6139                                                 dev_flow->dv.sample_res;
6140                         sfx_tbl = (struct mlx5_flow_tbl_resource *)
6141                                                 sample_res->normal_path_tbl;
6142                         sfx_tbl_data = container_of(sfx_tbl,
6143                                                 struct mlx5_flow_tbl_data_entry,
6144                                                 tbl);
6145                         sfx_attr.group = sfx_attr.transfer ?
6146                         (sfx_tbl_data->level - 1) : sfx_tbl_data->level;
6147                 } else {
6148                         MLX5_ASSERT(attr->transfer);
6149                         sfx_attr.group = jump_table;
6150                 }
6151                 flow_split_info->prefix_layers =
6152                                 flow_get_prefix_layer_flags(dev_flow);
6153                 flow_split_info->prefix_mark = dev_flow->handle->mark;
6154                 /* Suffix group level already be scaled with factor, set
6155                  * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
6156                  * again in translation.
6157                  */
6158                 flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6159 #endif
6160         }
6161         /* Add the suffix subflow. */
6162         ret = flow_create_split_meter(dev, flow, &sfx_attr,
6163                                       sfx_items ? sfx_items : items,
6164                                       sfx_actions ? sfx_actions : actions,
6165                                       flow_split_info, error);
6166 exit:
6167         if (sfx_actions)
6168                 mlx5_free(sfx_actions);
6169         return ret;
6170 }
6171
6172 /**
6173  * Split the flow to subflow set. The splitters might be linked
6174  * in the chain, like this:
6175  * flow_create_split_outer() calls:
6176  *   flow_create_split_meter() calls:
6177  *     flow_create_split_metadata(meter_subflow_0) calls:
6178  *       flow_create_split_inner(metadata_subflow_0)
6179  *       flow_create_split_inner(metadata_subflow_1)
6180  *       flow_create_split_inner(metadata_subflow_2)
6181  *     flow_create_split_metadata(meter_subflow_1) calls:
6182  *       flow_create_split_inner(metadata_subflow_0)
6183  *       flow_create_split_inner(metadata_subflow_1)
6184  *       flow_create_split_inner(metadata_subflow_2)
6185  *
6186  * This provide flexible way to add new levels of flow splitting.
6187  * The all of successfully created subflows are included to the
6188  * parent flow dev_flow list.
6189  *
6190  * @param dev
6191  *   Pointer to Ethernet device.
6192  * @param[in] flow
6193  *   Parent flow structure pointer.
6194  * @param[in] attr
6195  *   Flow rule attributes.
6196  * @param[in] items
6197  *   Pattern specification (list terminated by the END pattern item).
6198  * @param[in] actions
6199  *   Associated actions (list terminated by the END action).
6200  * @param[in] flow_split_info
6201  *   Pointer to flow split info structure.
6202  * @param[out] error
6203  *   Perform verbose error reporting if not NULL.
6204  * @return
6205  *   0 on success, negative value otherwise
6206  */
6207 static int
6208 flow_create_split_outer(struct rte_eth_dev *dev,
6209                         struct rte_flow *flow,
6210                         const struct rte_flow_attr *attr,
6211                         const struct rte_flow_item items[],
6212                         const struct rte_flow_action actions[],
6213                         struct mlx5_flow_split_info *flow_split_info,
6214                         struct rte_flow_error *error)
6215 {
6216         int ret;
6217
6218         ret = flow_create_split_sample(dev, flow, attr, items,
6219                                        actions, flow_split_info, error);
6220         MLX5_ASSERT(ret <= 0);
6221         return ret;
6222 }
6223
6224 static inline struct mlx5_flow_tunnel *
6225 flow_tunnel_from_rule(const struct mlx5_flow *flow)
6226 {
6227         struct mlx5_flow_tunnel *tunnel;
6228
6229 #pragma GCC diagnostic push
6230 #pragma GCC diagnostic ignored "-Wcast-qual"
6231         tunnel = (typeof(tunnel))flow->tunnel;
6232 #pragma GCC diagnostic pop
6233
6234         return tunnel;
6235 }
6236
6237 /**
6238  * Adjust flow RSS workspace if needed.
6239  *
6240  * @param wks
6241  *   Pointer to thread flow work space.
6242  * @param rss_desc
6243  *   Pointer to RSS descriptor.
6244  * @param[in] nrssq_num
6245  *   New RSS queue number.
6246  *
6247  * @return
6248  *   0 on success, -1 otherwise and rte_errno is set.
6249  */
6250 static int
6251 flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
6252                           struct mlx5_flow_rss_desc *rss_desc,
6253                           uint32_t nrssq_num)
6254 {
6255         if (likely(nrssq_num <= wks->rssq_num))
6256                 return 0;
6257         rss_desc->queue = realloc(rss_desc->queue,
6258                           sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2));
6259         if (!rss_desc->queue) {
6260                 rte_errno = ENOMEM;
6261                 return -1;
6262         }
6263         wks->rssq_num = RTE_ALIGN(nrssq_num, 2);
6264         return 0;
6265 }
6266
6267 /**
6268  * Create a flow and add it to @p list.
6269  *
6270  * @param dev
6271  *   Pointer to Ethernet device.
6272  * @param list
6273  *   Pointer to a TAILQ flow list. If this parameter NULL,
6274  *   no list insertion occurred, flow is just created,
6275  *   this is caller's responsibility to track the
6276  *   created flow.
6277  * @param[in] attr
6278  *   Flow rule attributes.
6279  * @param[in] items
6280  *   Pattern specification (list terminated by the END pattern item).
6281  * @param[in] actions
6282  *   Associated actions (list terminated by the END action).
6283  * @param[in] external
6284  *   This flow rule is created by request external to PMD.
6285  * @param[out] error
6286  *   Perform verbose error reporting if not NULL.
6287  *
6288  * @return
6289  *   A flow index on success, 0 otherwise and rte_errno is set.
6290  */
6291 static uint32_t
6292 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6293                  const struct rte_flow_attr *attr,
6294                  const struct rte_flow_item items[],
6295                  const struct rte_flow_action original_actions[],
6296                  bool external, struct rte_flow_error *error)
6297 {
6298         struct mlx5_priv *priv = dev->data->dev_private;
6299         struct rte_flow *flow = NULL;
6300         struct mlx5_flow *dev_flow;
6301         const struct rte_flow_action_rss *rss = NULL;
6302         struct mlx5_translated_action_handle
6303                 indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
6304         int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
6305         union {
6306                 struct mlx5_flow_expand_rss buf;
6307                 uint8_t buffer[4096];
6308         } expand_buffer;
6309         union {
6310                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6311                 uint8_t buffer[2048];
6312         } actions_rx;
6313         union {
6314                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6315                 uint8_t buffer[2048];
6316         } actions_hairpin_tx;
6317         union {
6318                 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
6319                 uint8_t buffer[2048];
6320         } items_tx;
6321         struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
6322         struct mlx5_flow_rss_desc *rss_desc;
6323         const struct rte_flow_action *p_actions_rx;
6324         uint32_t i;
6325         uint32_t idx = 0;
6326         int hairpin_flow;
6327         struct rte_flow_attr attr_tx = { .priority = 0 };
6328         const struct rte_flow_action *actions;
6329         struct rte_flow_action *translated_actions = NULL;
6330         struct mlx5_flow_tunnel *tunnel;
6331         struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
6332         struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
6333         struct mlx5_flow_split_info flow_split_info = {
6334                 .external = !!external,
6335                 .skip_scale = 0,
6336                 .flow_idx = 0,
6337                 .prefix_mark = 0,
6338                 .prefix_layers = 0,
6339                 .table_id = 0
6340         };
6341         int ret;
6342
6343         MLX5_ASSERT(wks);
6344         rss_desc = &wks->rss_desc;
6345         ret = flow_action_handles_translate(dev, original_actions,
6346                                             indir_actions,
6347                                             &indir_actions_n,
6348                                             &translated_actions, error);
6349         if (ret < 0) {
6350                 MLX5_ASSERT(translated_actions == NULL);
6351                 return 0;
6352         }
6353         actions = translated_actions ? translated_actions : original_actions;
6354         p_actions_rx = actions;
6355         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
6356         ret = flow_drv_validate(dev, attr, items, p_actions_rx,
6357                                 external, hairpin_flow, error);
6358         if (ret < 0)
6359                 goto error_before_hairpin_split;
6360         flow = mlx5_ipool_zmalloc(priv->flows[type], &idx);
6361         if (!flow) {
6362                 rte_errno = ENOMEM;
6363                 goto error_before_hairpin_split;
6364         }
6365         if (hairpin_flow > 0) {
6366                 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
6367                         rte_errno = EINVAL;
6368                         goto error_before_hairpin_split;
6369                 }
6370                 flow_hairpin_split(dev, actions, actions_rx.actions,
6371                                    actions_hairpin_tx.actions, items_tx.items,
6372                                    idx);
6373                 p_actions_rx = actions_rx.actions;
6374         }
6375         flow_split_info.flow_idx = idx;
6376         flow->drv_type = flow_get_drv_type(dev, attr);
6377         MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
6378                     flow->drv_type < MLX5_FLOW_TYPE_MAX);
6379         memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
6380         /* RSS Action only works on NIC RX domain */
6381         if (attr->ingress && !attr->transfer)
6382                 rss = flow_get_rss_action(dev, p_actions_rx);
6383         if (rss) {
6384                 if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
6385                         return 0;
6386                 /*
6387                  * The following information is required by
6388                  * mlx5_flow_hashfields_adjust() in advance.
6389                  */
6390                 rss_desc->level = rss->level;
6391                 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
6392                 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types;
6393         }
6394         flow->dev_handles = 0;
6395         if (rss && rss->types) {
6396                 unsigned int graph_root;
6397
6398                 graph_root = find_graph_root(rss->level);
6399                 ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
6400                                            items, rss->types,
6401                                            mlx5_support_expansion, graph_root);
6402                 MLX5_ASSERT(ret > 0 &&
6403                        (unsigned int)ret < sizeof(expand_buffer.buffer));
6404                 if (rte_log_can_log(mlx5_logtype, RTE_LOG_DEBUG)) {
6405                         for (i = 0; i < buf->entries; ++i)
6406                                 mlx5_dbg__print_pattern(buf->entry[i].pattern);
6407                 }
6408         } else {
6409                 buf->entries = 1;
6410                 buf->entry[0].pattern = (void *)(uintptr_t)items;
6411         }
6412         rss_desc->shared_rss = flow_get_shared_rss_action(dev, indir_actions,
6413                                                       indir_actions_n);
6414         for (i = 0; i < buf->entries; ++i) {
6415                 /* Initialize flow split data. */
6416                 flow_split_info.prefix_layers = 0;
6417                 flow_split_info.prefix_mark = 0;
6418                 flow_split_info.skip_scale = 0;
6419                 /*
6420                  * The splitter may create multiple dev_flows,
6421                  * depending on configuration. In the simplest
6422                  * case it just creates unmodified original flow.
6423                  */
6424                 ret = flow_create_split_outer(dev, flow, attr,
6425                                               buf->entry[i].pattern,
6426                                               p_actions_rx, &flow_split_info,
6427                                               error);
6428                 if (ret < 0)
6429                         goto error;
6430                 if (is_flow_tunnel_steer_rule(wks->flows[0].tof_type)) {
6431                         ret = flow_tunnel_add_default_miss(dev, flow, attr,
6432                                                            p_actions_rx,
6433                                                            idx,
6434                                                            wks->flows[0].tunnel,
6435                                                            &default_miss_ctx,
6436                                                            error);
6437                         if (ret < 0) {
6438                                 mlx5_free(default_miss_ctx.queue);
6439                                 goto error;
6440                         }
6441                 }
6442         }
6443         /* Create the tx flow. */
6444         if (hairpin_flow) {
6445                 attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
6446                 attr_tx.ingress = 0;
6447                 attr_tx.egress = 1;
6448                 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
6449                                          actions_hairpin_tx.actions,
6450                                          idx, error);
6451                 if (!dev_flow)
6452                         goto error;
6453                 dev_flow->flow = flow;
6454                 dev_flow->external = 0;
6455                 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
6456                               dev_flow->handle, next);
6457                 ret = flow_drv_translate(dev, dev_flow, &attr_tx,
6458                                          items_tx.items,
6459                                          actions_hairpin_tx.actions, error);
6460                 if (ret < 0)
6461                         goto error;
6462         }
6463         /*
6464          * Update the metadata register copy table. If extensive
6465          * metadata feature is enabled and registers are supported
6466          * we might create the extra rte_flow for each unique
6467          * MARK/FLAG action ID.
6468          *
6469          * The table is updated for ingress Flows only, because
6470          * the egress Flows belong to the different device and
6471          * copy table should be updated in peer NIC Rx domain.
6472          */
6473         if (attr->ingress &&
6474             (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
6475                 ret = flow_mreg_update_copy_table(dev, flow, actions, error);
6476                 if (ret)
6477                         goto error;
6478         }
6479         /*
6480          * If the flow is external (from application) OR device is started,
6481          * OR mreg discover, then apply immediately.
6482          */
6483         if (external || dev->data->dev_started ||
6484             (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
6485              attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
6486                 ret = flow_drv_apply(dev, flow, error);
6487                 if (ret < 0)
6488                         goto error;
6489         }
6490         flow->type = type;
6491         flow_rxq_flags_set(dev, flow);
6492         rte_free(translated_actions);
6493         tunnel = flow_tunnel_from_rule(wks->flows);
6494         if (tunnel) {
6495                 flow->tunnel = 1;
6496                 flow->tunnel_id = tunnel->tunnel_id;
6497                 __atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
6498                 mlx5_free(default_miss_ctx.queue);
6499         }
6500         mlx5_flow_pop_thread_workspace();
6501         return idx;
6502 error:
6503         MLX5_ASSERT(flow);
6504         ret = rte_errno; /* Save rte_errno before cleanup. */
6505         flow_mreg_del_copy_action(dev, flow);
6506         flow_drv_destroy(dev, flow);
6507         if (rss_desc->shared_rss)
6508                 __atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
6509                         mlx5_ipool_get
6510                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
6511                         rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
6512         mlx5_ipool_free(priv->flows[type], idx);
6513         rte_errno = ret; /* Restore rte_errno. */
6514         ret = rte_errno;
6515         rte_errno = ret;
6516         mlx5_flow_pop_thread_workspace();
6517 error_before_hairpin_split:
6518         rte_free(translated_actions);
6519         return 0;
6520 }
6521
6522 /**
6523  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
6524  * incoming packets to table 1.
6525  *
6526  * Other flow rules, requested for group n, will be created in
6527  * e-switch table n+1.
6528  * Jump action to e-switch group n will be created to group n+1.
6529  *
6530  * Used when working in switchdev mode, to utilise advantages of table 1
6531  * and above.
6532  *
6533  * @param dev
6534  *   Pointer to Ethernet device.
6535  *
6536  * @return
6537  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
6538  */
6539 struct rte_flow *
6540 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
6541 {
6542         const struct rte_flow_attr attr = {
6543                 .group = 0,
6544                 .priority = 0,
6545                 .ingress = 1,
6546                 .egress = 0,
6547                 .transfer = 1,
6548         };
6549         const struct rte_flow_item pattern = {
6550                 .type = RTE_FLOW_ITEM_TYPE_END,
6551         };
6552         struct rte_flow_action_jump jump = {
6553                 .group = 1,
6554         };
6555         const struct rte_flow_action actions[] = {
6556                 {
6557                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
6558                         .conf = &jump,
6559                 },
6560                 {
6561                         .type = RTE_FLOW_ACTION_TYPE_END,
6562                 },
6563         };
6564         struct rte_flow_error error;
6565
6566         return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
6567                                                    &attr, &pattern,
6568                                                    actions, false, &error);
6569 }
6570
6571 /**
6572  * Validate a flow supported by the NIC.
6573  *
6574  * @see rte_flow_validate()
6575  * @see rte_flow_ops
6576  */
6577 int
6578 mlx5_flow_validate(struct rte_eth_dev *dev,
6579                    const struct rte_flow_attr *attr,
6580                    const struct rte_flow_item items[],
6581                    const struct rte_flow_action original_actions[],
6582                    struct rte_flow_error *error)
6583 {
6584         int hairpin_flow;
6585         struct mlx5_translated_action_handle
6586                 indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
6587         int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
6588         const struct rte_flow_action *actions;
6589         struct rte_flow_action *translated_actions = NULL;
6590         int ret = flow_action_handles_translate(dev, original_actions,
6591                                                 indir_actions,
6592                                                 &indir_actions_n,
6593                                                 &translated_actions, error);
6594
6595         if (ret)
6596                 return ret;
6597         actions = translated_actions ? translated_actions : original_actions;
6598         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
6599         ret = flow_drv_validate(dev, attr, items, actions,
6600                                 true, hairpin_flow, error);
6601         rte_free(translated_actions);
6602         return ret;
6603 }
6604
6605 /**
6606  * Create a flow.
6607  *
6608  * @see rte_flow_create()
6609  * @see rte_flow_ops
6610  */
6611 struct rte_flow *
6612 mlx5_flow_create(struct rte_eth_dev *dev,
6613                  const struct rte_flow_attr *attr,
6614                  const struct rte_flow_item items[],
6615                  const struct rte_flow_action actions[],
6616                  struct rte_flow_error *error)
6617 {
6618         /*
6619          * If the device is not started yet, it is not allowed to created a
6620          * flow from application. PMD default flows and traffic control flows
6621          * are not affected.
6622          */
6623         if (unlikely(!dev->data->dev_started)) {
6624                 DRV_LOG(DEBUG, "port %u is not started when "
6625                         "inserting a flow", dev->data->port_id);
6626                 rte_flow_error_set(error, ENODEV,
6627                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6628                                    NULL,
6629                                    "port not started");
6630                 return NULL;
6631         }
6632
6633         return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_GEN,
6634                                                    attr, items, actions,
6635                                                    true, error);
6636 }
6637
6638 /**
6639  * Destroy a flow in a list.
6640  *
6641  * @param dev
6642  *   Pointer to Ethernet device.
6643  * @param[in] flow_idx
6644  *   Index of flow to destroy.
6645  */
6646 static void
6647 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6648                   uint32_t flow_idx)
6649 {
6650         struct mlx5_priv *priv = dev->data->dev_private;
6651         struct rte_flow *flow = mlx5_ipool_get(priv->flows[type], flow_idx);
6652
6653         if (!flow)
6654                 return;
6655         MLX5_ASSERT(flow->type == type);
6656         /*
6657          * Update RX queue flags only if port is started, otherwise it is
6658          * already clean.
6659          */
6660         if (dev->data->dev_started)
6661                 flow_rxq_flags_trim(dev, flow);
6662         flow_drv_destroy(dev, flow);
6663         if (flow->tunnel) {
6664                 struct mlx5_flow_tunnel *tunnel;
6665
6666                 tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
6667                 RTE_VERIFY(tunnel);
6668                 if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
6669                         mlx5_flow_tunnel_free(dev, tunnel);
6670         }
6671         flow_mreg_del_copy_action(dev, flow);
6672         mlx5_ipool_free(priv->flows[type], flow_idx);
6673 }
6674
6675 /**
6676  * Destroy all flows.
6677  *
6678  * @param dev
6679  *   Pointer to Ethernet device.
6680  * @param type
6681  *   Flow type to be flushed.
6682  * @param active
6683  *   If flushing is called avtively.
6684  */
6685 void
6686 mlx5_flow_list_flush(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6687                      bool active)
6688 {
6689         struct mlx5_priv *priv = dev->data->dev_private;
6690         uint32_t num_flushed = 0, fidx = 1;
6691         struct rte_flow *flow;
6692
6693         MLX5_IPOOL_FOREACH(priv->flows[type], fidx, flow) {
6694                 flow_list_destroy(dev, type, fidx);
6695                 num_flushed++;
6696         }
6697         if (active) {
6698                 DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
6699                         dev->data->port_id, num_flushed);
6700         }
6701 }
6702
6703 /**
6704  * Stop all default actions for flows.
6705  *
6706  * @param dev
6707  *   Pointer to Ethernet device.
6708  */
6709 void
6710 mlx5_flow_stop_default(struct rte_eth_dev *dev)
6711 {
6712         flow_mreg_del_default_copy_action(dev);
6713         flow_rxq_flags_clear(dev);
6714 }
6715
6716 /**
6717  * Start all default actions for flows.
6718  *
6719  * @param dev
6720  *   Pointer to Ethernet device.
6721  * @return
6722  *   0 on success, a negative errno value otherwise and rte_errno is set.
6723  */
6724 int
6725 mlx5_flow_start_default(struct rte_eth_dev *dev)
6726 {
6727         struct rte_flow_error error;
6728
6729         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
6730         return flow_mreg_add_default_copy_action(dev, &error);
6731 }
6732
6733 /**
6734  * Release key of thread specific flow workspace data.
6735  */
6736 void
6737 flow_release_workspace(void *data)
6738 {
6739         struct mlx5_flow_workspace *wks = data;
6740         struct mlx5_flow_workspace *next;
6741
6742         while (wks) {
6743                 next = wks->next;
6744                 free(wks->rss_desc.queue);
6745                 free(wks);
6746                 wks = next;
6747         }
6748 }
6749
6750 /**
6751  * Get thread specific current flow workspace.
6752  *
6753  * @return pointer to thread specific flow workspace data, NULL on error.
6754  */
6755 struct mlx5_flow_workspace*
6756 mlx5_flow_get_thread_workspace(void)
6757 {
6758         struct mlx5_flow_workspace *data;
6759
6760         data = mlx5_flow_os_get_specific_workspace();
6761         MLX5_ASSERT(data && data->inuse);
6762         if (!data || !data->inuse)
6763                 DRV_LOG(ERR, "flow workspace not initialized.");
6764         return data;
6765 }
6766
6767 /**
6768  * Allocate and init new flow workspace.
6769  *
6770  * @return pointer to flow workspace data, NULL on error.
6771  */
6772 static struct mlx5_flow_workspace*
6773 flow_alloc_thread_workspace(void)
6774 {
6775         struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
6776
6777         if (!data) {
6778                 DRV_LOG(ERR, "Failed to allocate flow workspace "
6779                         "memory.");
6780                 return NULL;
6781         }
6782         data->rss_desc.queue = calloc(1,
6783                         sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
6784         if (!data->rss_desc.queue)
6785                 goto err;
6786         data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
6787         return data;
6788 err:
6789         if (data->rss_desc.queue)
6790                 free(data->rss_desc.queue);
6791         free(data);
6792         return NULL;
6793 }
6794
6795 /**
6796  * Get new thread specific flow workspace.
6797  *
6798  * If current workspace inuse, create new one and set as current.
6799  *
6800  * @return pointer to thread specific flow workspace data, NULL on error.
6801  */
6802 static struct mlx5_flow_workspace*
6803 mlx5_flow_push_thread_workspace(void)
6804 {
6805         struct mlx5_flow_workspace *curr;
6806         struct mlx5_flow_workspace *data;
6807
6808         curr = mlx5_flow_os_get_specific_workspace();
6809         if (!curr) {
6810                 data = flow_alloc_thread_workspace();
6811                 if (!data)
6812                         return NULL;
6813         } else if (!curr->inuse) {
6814                 data = curr;
6815         } else if (curr->next) {
6816                 data = curr->next;
6817         } else {
6818                 data = flow_alloc_thread_workspace();
6819                 if (!data)
6820                         return NULL;
6821                 curr->next = data;
6822                 data->prev = curr;
6823         }
6824         data->inuse = 1;
6825         data->flow_idx = 0;
6826         /* Set as current workspace */
6827         if (mlx5_flow_os_set_specific_workspace(data))
6828                 DRV_LOG(ERR, "Failed to set flow workspace to thread.");
6829         return data;
6830 }
6831
6832 /**
6833  * Close current thread specific flow workspace.
6834  *
6835  * If previous workspace available, set it as current.
6836  *
6837  * @return pointer to thread specific flow workspace data, NULL on error.
6838  */
6839 static void
6840 mlx5_flow_pop_thread_workspace(void)
6841 {
6842         struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
6843
6844         if (!data)
6845                 return;
6846         if (!data->inuse) {
6847                 DRV_LOG(ERR, "Failed to close unused flow workspace.");
6848                 return;
6849         }
6850         data->inuse = 0;
6851         if (!data->prev)
6852                 return;
6853         if (mlx5_flow_os_set_specific_workspace(data->prev))
6854                 DRV_LOG(ERR, "Failed to set flow workspace to thread.");
6855 }
6856
6857 /**
6858  * Verify the flow list is empty
6859  *
6860  * @param dev
6861  *  Pointer to Ethernet device.
6862  *
6863  * @return the number of flows not released.
6864  */
6865 int
6866 mlx5_flow_verify(struct rte_eth_dev *dev __rte_unused)
6867 {
6868         struct mlx5_priv *priv = dev->data->dev_private;
6869         struct rte_flow *flow;
6870         uint32_t idx = 0;
6871         int ret = 0, i;
6872
6873         for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
6874                 MLX5_IPOOL_FOREACH(priv->flows[i], idx, flow) {
6875                         DRV_LOG(DEBUG, "port %u flow %p still referenced",
6876                                 dev->data->port_id, (void *)flow);
6877                         ret++;
6878                 }
6879         }
6880         return ret;
6881 }
6882
6883 /**
6884  * Enable default hairpin egress flow.
6885  *
6886  * @param dev
6887  *   Pointer to Ethernet device.
6888  * @param queue
6889  *   The queue index.
6890  *
6891  * @return
6892  *   0 on success, a negative errno value otherwise and rte_errno is set.
6893  */
6894 int
6895 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
6896                             uint32_t queue)
6897 {
6898         const struct rte_flow_attr attr = {
6899                 .egress = 1,
6900                 .priority = 0,
6901         };
6902         struct mlx5_rte_flow_item_tx_queue queue_spec = {
6903                 .queue = queue,
6904         };
6905         struct mlx5_rte_flow_item_tx_queue queue_mask = {
6906                 .queue = UINT32_MAX,
6907         };
6908         struct rte_flow_item items[] = {
6909                 {
6910                         .type = (enum rte_flow_item_type)
6911                                 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
6912                         .spec = &queue_spec,
6913                         .last = NULL,
6914                         .mask = &queue_mask,
6915                 },
6916                 {
6917                         .type = RTE_FLOW_ITEM_TYPE_END,
6918                 },
6919         };
6920         struct rte_flow_action_jump jump = {
6921                 .group = MLX5_HAIRPIN_TX_TABLE,
6922         };
6923         struct rte_flow_action actions[2];
6924         uint32_t flow_idx;
6925         struct rte_flow_error error;
6926
6927         actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
6928         actions[0].conf = &jump;
6929         actions[1].type = RTE_FLOW_ACTION_TYPE_END;
6930         flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
6931                                     &attr, items, actions, false, &error);
6932         if (!flow_idx) {
6933                 DRV_LOG(DEBUG,
6934                         "Failed to create ctrl flow: rte_errno(%d),"
6935                         " type(%d), message(%s)",
6936                         rte_errno, error.type,
6937                         error.message ? error.message : " (no stated reason)");
6938                 return -rte_errno;
6939         }
6940         return 0;
6941 }
6942
6943 /**
6944  * Enable a control flow configured from the control plane.
6945  *
6946  * @param dev
6947  *   Pointer to Ethernet device.
6948  * @param eth_spec
6949  *   An Ethernet flow spec to apply.
6950  * @param eth_mask
6951  *   An Ethernet flow mask to apply.
6952  * @param vlan_spec
6953  *   A VLAN flow spec to apply.
6954  * @param vlan_mask
6955  *   A VLAN flow mask to apply.
6956  *
6957  * @return
6958  *   0 on success, a negative errno value otherwise and rte_errno is set.
6959  */
6960 int
6961 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
6962                     struct rte_flow_item_eth *eth_spec,
6963                     struct rte_flow_item_eth *eth_mask,
6964                     struct rte_flow_item_vlan *vlan_spec,
6965                     struct rte_flow_item_vlan *vlan_mask)
6966 {
6967         struct mlx5_priv *priv = dev->data->dev_private;
6968         const struct rte_flow_attr attr = {
6969                 .ingress = 1,
6970                 .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
6971         };
6972         struct rte_flow_item items[] = {
6973                 {
6974                         .type = RTE_FLOW_ITEM_TYPE_ETH,
6975                         .spec = eth_spec,
6976                         .last = NULL,
6977                         .mask = eth_mask,
6978                 },
6979                 {
6980                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
6981                                               RTE_FLOW_ITEM_TYPE_END,
6982                         .spec = vlan_spec,
6983                         .last = NULL,
6984                         .mask = vlan_mask,
6985                 },
6986                 {
6987                         .type = RTE_FLOW_ITEM_TYPE_END,
6988                 },
6989         };
6990         uint16_t queue[priv->reta_idx_n];
6991         struct rte_flow_action_rss action_rss = {
6992                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
6993                 .level = 0,
6994                 .types = priv->rss_conf.rss_hf,
6995                 .key_len = priv->rss_conf.rss_key_len,
6996                 .queue_num = priv->reta_idx_n,
6997                 .key = priv->rss_conf.rss_key,
6998                 .queue = queue,
6999         };
7000         struct rte_flow_action actions[] = {
7001                 {
7002                         .type = RTE_FLOW_ACTION_TYPE_RSS,
7003                         .conf = &action_rss,
7004                 },
7005                 {
7006                         .type = RTE_FLOW_ACTION_TYPE_END,
7007                 },
7008         };
7009         uint32_t flow_idx;
7010         struct rte_flow_error error;
7011         unsigned int i;
7012
7013         if (!priv->reta_idx_n || !priv->rxqs_n) {
7014                 return 0;
7015         }
7016         if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
7017                 action_rss.types = 0;
7018         for (i = 0; i != priv->reta_idx_n; ++i)
7019                 queue[i] = (*priv->reta_idx)[i];
7020         flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7021                                     &attr, items, actions, false, &error);
7022         if (!flow_idx)
7023                 return -rte_errno;
7024         return 0;
7025 }
7026
7027 /**
7028  * Enable a flow control configured from the control plane.
7029  *
7030  * @param dev
7031  *   Pointer to Ethernet device.
7032  * @param eth_spec
7033  *   An Ethernet flow spec to apply.
7034  * @param eth_mask
7035  *   An Ethernet flow mask to apply.
7036  *
7037  * @return
7038  *   0 on success, a negative errno value otherwise and rte_errno is set.
7039  */
7040 int
7041 mlx5_ctrl_flow(struct rte_eth_dev *dev,
7042                struct rte_flow_item_eth *eth_spec,
7043                struct rte_flow_item_eth *eth_mask)
7044 {
7045         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
7046 }
7047
7048 /**
7049  * Create default miss flow rule matching lacp traffic
7050  *
7051  * @param dev
7052  *   Pointer to Ethernet device.
7053  * @param eth_spec
7054  *   An Ethernet flow spec to apply.
7055  *
7056  * @return
7057  *   0 on success, a negative errno value otherwise and rte_errno is set.
7058  */
7059 int
7060 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
7061 {
7062         /*
7063          * The LACP matching is done by only using ether type since using
7064          * a multicast dst mac causes kernel to give low priority to this flow.
7065          */
7066         static const struct rte_flow_item_eth lacp_spec = {
7067                 .type = RTE_BE16(0x8809),
7068         };
7069         static const struct rte_flow_item_eth lacp_mask = {
7070                 .type = 0xffff,
7071         };
7072         const struct rte_flow_attr attr = {
7073                 .ingress = 1,
7074         };
7075         struct rte_flow_item items[] = {
7076                 {
7077                         .type = RTE_FLOW_ITEM_TYPE_ETH,
7078                         .spec = &lacp_spec,
7079                         .mask = &lacp_mask,
7080                 },
7081                 {
7082                         .type = RTE_FLOW_ITEM_TYPE_END,
7083                 },
7084         };
7085         struct rte_flow_action actions[] = {
7086                 {
7087                         .type = (enum rte_flow_action_type)
7088                                 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
7089                 },
7090                 {
7091                         .type = RTE_FLOW_ACTION_TYPE_END,
7092                 },
7093         };
7094         struct rte_flow_error error;
7095         uint32_t flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7096                                         &attr, items, actions,
7097                                         false, &error);
7098
7099         if (!flow_idx)
7100                 return -rte_errno;
7101         return 0;
7102 }
7103
7104 /**
7105  * Destroy a flow.
7106  *
7107  * @see rte_flow_destroy()
7108  * @see rte_flow_ops
7109  */
7110 int
7111 mlx5_flow_destroy(struct rte_eth_dev *dev,
7112                   struct rte_flow *flow,
7113                   struct rte_flow_error *error __rte_unused)
7114 {
7115         flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN,
7116                                 (uintptr_t)(void *)flow);
7117         return 0;
7118 }
7119
7120 /**
7121  * Destroy all flows.
7122  *
7123  * @see rte_flow_flush()
7124  * @see rte_flow_ops
7125  */
7126 int
7127 mlx5_flow_flush(struct rte_eth_dev *dev,
7128                 struct rte_flow_error *error __rte_unused)
7129 {
7130         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, false);
7131         return 0;
7132 }
7133
7134 /**
7135  * Isolated mode.
7136  *
7137  * @see rte_flow_isolate()
7138  * @see rte_flow_ops
7139  */
7140 int
7141 mlx5_flow_isolate(struct rte_eth_dev *dev,
7142                   int enable,
7143                   struct rte_flow_error *error)
7144 {
7145         struct mlx5_priv *priv = dev->data->dev_private;
7146
7147         if (dev->data->dev_started) {
7148                 rte_flow_error_set(error, EBUSY,
7149                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7150                                    NULL,
7151                                    "port must be stopped first");
7152                 return -rte_errno;
7153         }
7154         priv->isolated = !!enable;
7155         if (enable)
7156                 dev->dev_ops = &mlx5_dev_ops_isolate;
7157         else
7158                 dev->dev_ops = &mlx5_dev_ops;
7159
7160         dev->rx_descriptor_status = mlx5_rx_descriptor_status;
7161         dev->tx_descriptor_status = mlx5_tx_descriptor_status;
7162
7163         return 0;
7164 }
7165
7166 /**
7167  * Query a flow.
7168  *
7169  * @see rte_flow_query()
7170  * @see rte_flow_ops
7171  */
7172 static int
7173 flow_drv_query(struct rte_eth_dev *dev,
7174                uint32_t flow_idx,
7175                const struct rte_flow_action *actions,
7176                void *data,
7177                struct rte_flow_error *error)
7178 {
7179         struct mlx5_priv *priv = dev->data->dev_private;
7180         const struct mlx5_flow_driver_ops *fops;
7181         struct rte_flow *flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
7182                                                flow_idx);
7183         enum mlx5_flow_drv_type ftype;
7184
7185         if (!flow) {
7186                 return rte_flow_error_set(error, ENOENT,
7187                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7188                           NULL,
7189                           "invalid flow handle");
7190         }
7191         ftype = flow->drv_type;
7192         MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
7193         fops = flow_get_drv_ops(ftype);
7194
7195         return fops->query(dev, flow, actions, data, error);
7196 }
7197
7198 /**
7199  * Query a flow.
7200  *
7201  * @see rte_flow_query()
7202  * @see rte_flow_ops
7203  */
7204 int
7205 mlx5_flow_query(struct rte_eth_dev *dev,
7206                 struct rte_flow *flow,
7207                 const struct rte_flow_action *actions,
7208                 void *data,
7209                 struct rte_flow_error *error)
7210 {
7211         int ret;
7212
7213         ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
7214                              error);
7215         if (ret < 0)
7216                 return ret;
7217         return 0;
7218 }
7219
7220 /**
7221  * Get rte_flow callbacks.
7222  *
7223  * @param dev
7224  *   Pointer to Ethernet device structure.
7225  * @param ops
7226  *   Pointer to operation-specific structure.
7227  *
7228  * @return 0
7229  */
7230 int
7231 mlx5_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
7232                   const struct rte_flow_ops **ops)
7233 {
7234         *ops = &mlx5_flow_ops;
7235         return 0;
7236 }
7237
7238 /**
7239  * Validate meter policy actions.
7240  * Dispatcher for action type specific validation.
7241  *
7242  * @param[in] dev
7243  *   Pointer to the Ethernet device structure.
7244  * @param[in] action
7245  *   The meter policy action object to validate.
7246  * @param[in] attr
7247  *   Attributes of flow to determine steering domain.
7248  * @param[out] is_rss
7249  *   Is RSS or not.
7250  * @param[out] domain_bitmap
7251  *   Domain bitmap.
7252  * @param[out] is_def_policy
7253  *   Is default policy or not.
7254  * @param[out] error
7255  *   Perform verbose error reporting if not NULL. Initialized in case of
7256  *   error only.
7257  *
7258  * @return
7259  *   0 on success, otherwise negative errno value.
7260  */
7261 int
7262 mlx5_flow_validate_mtr_acts(struct rte_eth_dev *dev,
7263                         const struct rte_flow_action *actions[RTE_COLORS],
7264                         struct rte_flow_attr *attr,
7265                         bool *is_rss,
7266                         uint8_t *domain_bitmap,
7267                         uint8_t *policy_mode,
7268                         struct rte_mtr_error *error)
7269 {
7270         const struct mlx5_flow_driver_ops *fops;
7271
7272         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7273         return fops->validate_mtr_acts(dev, actions, attr, is_rss,
7274                                        domain_bitmap, policy_mode, error);
7275 }
7276
7277 /**
7278  * Destroy the meter table set.
7279  *
7280  * @param[in] dev
7281  *   Pointer to Ethernet device.
7282  * @param[in] mtr_policy
7283  *   Meter policy struct.
7284  */
7285 void
7286 mlx5_flow_destroy_mtr_acts(struct rte_eth_dev *dev,
7287                       struct mlx5_flow_meter_policy *mtr_policy)
7288 {
7289         const struct mlx5_flow_driver_ops *fops;
7290
7291         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7292         fops->destroy_mtr_acts(dev, mtr_policy);
7293 }
7294
7295 /**
7296  * Create policy action, lock free,
7297  * (mutex should be acquired by caller).
7298  * Dispatcher for action type specific call.
7299  *
7300  * @param[in] dev
7301  *   Pointer to the Ethernet device structure.
7302  * @param[in] mtr_policy
7303  *   Meter policy struct.
7304  * @param[in] action
7305  *   Action specification used to create meter actions.
7306  * @param[out] error
7307  *   Perform verbose error reporting if not NULL. Initialized in case of
7308  *   error only.
7309  *
7310  * @return
7311  *   0 on success, otherwise negative errno value.
7312  */
7313 int
7314 mlx5_flow_create_mtr_acts(struct rte_eth_dev *dev,
7315                       struct mlx5_flow_meter_policy *mtr_policy,
7316                       const struct rte_flow_action *actions[RTE_COLORS],
7317                       struct rte_mtr_error *error)
7318 {
7319         const struct mlx5_flow_driver_ops *fops;
7320
7321         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7322         return fops->create_mtr_acts(dev, mtr_policy, actions, error);
7323 }
7324
7325 /**
7326  * Create policy rules, lock free,
7327  * (mutex should be acquired by caller).
7328  * Dispatcher for action type specific call.
7329  *
7330  * @param[in] dev
7331  *   Pointer to the Ethernet device structure.
7332  * @param[in] mtr_policy
7333  *   Meter policy struct.
7334  *
7335  * @return
7336  *   0 on success, -1 otherwise.
7337  */
7338 int
7339 mlx5_flow_create_policy_rules(struct rte_eth_dev *dev,
7340                              struct mlx5_flow_meter_policy *mtr_policy)
7341 {
7342         const struct mlx5_flow_driver_ops *fops;
7343
7344         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7345         return fops->create_policy_rules(dev, mtr_policy);
7346 }
7347
7348 /**
7349  * Destroy policy rules, lock free,
7350  * (mutex should be acquired by caller).
7351  * Dispatcher for action type specific call.
7352  *
7353  * @param[in] dev
7354  *   Pointer to the Ethernet device structure.
7355  * @param[in] mtr_policy
7356  *   Meter policy struct.
7357  */
7358 void
7359 mlx5_flow_destroy_policy_rules(struct rte_eth_dev *dev,
7360                              struct mlx5_flow_meter_policy *mtr_policy)
7361 {
7362         const struct mlx5_flow_driver_ops *fops;
7363
7364         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7365         fops->destroy_policy_rules(dev, mtr_policy);
7366 }
7367
7368 /**
7369  * Destroy the default policy table set.
7370  *
7371  * @param[in] dev
7372  *   Pointer to Ethernet device.
7373  */
7374 void
7375 mlx5_flow_destroy_def_policy(struct rte_eth_dev *dev)
7376 {
7377         const struct mlx5_flow_driver_ops *fops;
7378
7379         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7380         fops->destroy_def_policy(dev);
7381 }
7382
7383 /**
7384  * Destroy the default policy table set.
7385  *
7386  * @param[in] dev
7387  *   Pointer to Ethernet device.
7388  *
7389  * @return
7390  *   0 on success, -1 otherwise.
7391  */
7392 int
7393 mlx5_flow_create_def_policy(struct rte_eth_dev *dev)
7394 {
7395         const struct mlx5_flow_driver_ops *fops;
7396
7397         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7398         return fops->create_def_policy(dev);
7399 }
7400
7401 /**
7402  * Create the needed meter and suffix tables.
7403  *
7404  * @param[in] dev
7405  *   Pointer to Ethernet device.
7406  *
7407  * @return
7408  *   0 on success, -1 otherwise.
7409  */
7410 int
7411 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
7412                         struct mlx5_flow_meter_info *fm,
7413                         uint32_t mtr_idx,
7414                         uint8_t domain_bitmap)
7415 {
7416         const struct mlx5_flow_driver_ops *fops;
7417
7418         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7419         return fops->create_mtr_tbls(dev, fm, mtr_idx, domain_bitmap);
7420 }
7421
7422 /**
7423  * Destroy the meter table set.
7424  *
7425  * @param[in] dev
7426  *   Pointer to Ethernet device.
7427  * @param[in] tbl
7428  *   Pointer to the meter table set.
7429  */
7430 void
7431 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
7432                            struct mlx5_flow_meter_info *fm)
7433 {
7434         const struct mlx5_flow_driver_ops *fops;
7435
7436         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7437         fops->destroy_mtr_tbls(dev, fm);
7438 }
7439
7440 /**
7441  * Destroy the global meter drop table.
7442  *
7443  * @param[in] dev
7444  *   Pointer to Ethernet device.
7445  */
7446 void
7447 mlx5_flow_destroy_mtr_drop_tbls(struct rte_eth_dev *dev)
7448 {
7449         const struct mlx5_flow_driver_ops *fops;
7450
7451         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7452         fops->destroy_mtr_drop_tbls(dev);
7453 }
7454
7455 /**
7456  * Destroy the sub policy table with RX queue.
7457  *
7458  * @param[in] dev
7459  *   Pointer to Ethernet device.
7460  * @param[in] mtr_policy
7461  *   Pointer to meter policy table.
7462  */
7463 void
7464 mlx5_flow_destroy_sub_policy_with_rxq(struct rte_eth_dev *dev,
7465                 struct mlx5_flow_meter_policy *mtr_policy)
7466 {
7467         const struct mlx5_flow_driver_ops *fops;
7468
7469         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7470         fops->destroy_sub_policy_with_rxq(dev, mtr_policy);
7471 }
7472
7473 /**
7474  * Allocate the needed aso flow meter id.
7475  *
7476  * @param[in] dev
7477  *   Pointer to Ethernet device.
7478  *
7479  * @return
7480  *   Index to aso flow meter on success, NULL otherwise.
7481  */
7482 uint32_t
7483 mlx5_flow_mtr_alloc(struct rte_eth_dev *dev)
7484 {
7485         const struct mlx5_flow_driver_ops *fops;
7486
7487         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7488         return fops->create_meter(dev);
7489 }
7490
7491 /**
7492  * Free the aso flow meter id.
7493  *
7494  * @param[in] dev
7495  *   Pointer to Ethernet device.
7496  * @param[in] mtr_idx
7497  *  Index to aso flow meter to be free.
7498  *
7499  * @return
7500  *   0 on success.
7501  */
7502 void
7503 mlx5_flow_mtr_free(struct rte_eth_dev *dev, uint32_t mtr_idx)
7504 {
7505         const struct mlx5_flow_driver_ops *fops;
7506
7507         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7508         fops->free_meter(dev, mtr_idx);
7509 }
7510
7511 /**
7512  * Allocate a counter.
7513  *
7514  * @param[in] dev
7515  *   Pointer to Ethernet device structure.
7516  *
7517  * @return
7518  *   Index to allocated counter  on success, 0 otherwise.
7519  */
7520 uint32_t
7521 mlx5_counter_alloc(struct rte_eth_dev *dev)
7522 {
7523         const struct mlx5_flow_driver_ops *fops;
7524         struct rte_flow_attr attr = { .transfer = 0 };
7525
7526         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7527                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7528                 return fops->counter_alloc(dev);
7529         }
7530         DRV_LOG(ERR,
7531                 "port %u counter allocate is not supported.",
7532                  dev->data->port_id);
7533         return 0;
7534 }
7535
7536 /**
7537  * Free a counter.
7538  *
7539  * @param[in] dev
7540  *   Pointer to Ethernet device structure.
7541  * @param[in] cnt
7542  *   Index to counter to be free.
7543  */
7544 void
7545 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
7546 {
7547         const struct mlx5_flow_driver_ops *fops;
7548         struct rte_flow_attr attr = { .transfer = 0 };
7549
7550         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7551                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7552                 fops->counter_free(dev, cnt);
7553                 return;
7554         }
7555         DRV_LOG(ERR,
7556                 "port %u counter free is not supported.",
7557                  dev->data->port_id);
7558 }
7559
7560 /**
7561  * Query counter statistics.
7562  *
7563  * @param[in] dev
7564  *   Pointer to Ethernet device structure.
7565  * @param[in] cnt
7566  *   Index to counter to query.
7567  * @param[in] clear
7568  *   Set to clear counter statistics.
7569  * @param[out] pkts
7570  *   The counter hits packets number to save.
7571  * @param[out] bytes
7572  *   The counter hits bytes number to save.
7573  *
7574  * @return
7575  *   0 on success, a negative errno value otherwise.
7576  */
7577 int
7578 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
7579                    bool clear, uint64_t *pkts, uint64_t *bytes)
7580 {
7581         const struct mlx5_flow_driver_ops *fops;
7582         struct rte_flow_attr attr = { .transfer = 0 };
7583
7584         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7585                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7586                 return fops->counter_query(dev, cnt, clear, pkts, bytes);
7587         }
7588         DRV_LOG(ERR,
7589                 "port %u counter query is not supported.",
7590                  dev->data->port_id);
7591         return -ENOTSUP;
7592 }
7593
7594 /**
7595  * Allocate a new memory for the counter values wrapped by all the needed
7596  * management.
7597  *
7598  * @param[in] sh
7599  *   Pointer to mlx5_dev_ctx_shared object.
7600  *
7601  * @return
7602  *   0 on success, a negative errno value otherwise.
7603  */
7604 static int
7605 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
7606 {
7607         struct mlx5_devx_mkey_attr mkey_attr;
7608         struct mlx5_counter_stats_mem_mng *mem_mng;
7609         volatile struct flow_counter_stats *raw_data;
7610         int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
7611         int size = (sizeof(struct flow_counter_stats) *
7612                         MLX5_COUNTERS_PER_POOL +
7613                         sizeof(struct mlx5_counter_stats_raw)) * raws_n +
7614                         sizeof(struct mlx5_counter_stats_mem_mng);
7615         size_t pgsize = rte_mem_page_size();
7616         uint8_t *mem;
7617         int i;
7618
7619         if (pgsize == (size_t)-1) {
7620                 DRV_LOG(ERR, "Failed to get mem page size");
7621                 rte_errno = ENOMEM;
7622                 return -ENOMEM;
7623         }
7624         mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
7625         if (!mem) {
7626                 rte_errno = ENOMEM;
7627                 return -ENOMEM;
7628         }
7629         mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
7630         size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
7631         mem_mng->umem = mlx5_os_umem_reg(sh->ctx, mem, size,
7632                                                  IBV_ACCESS_LOCAL_WRITE);
7633         if (!mem_mng->umem) {
7634                 rte_errno = errno;
7635                 mlx5_free(mem);
7636                 return -rte_errno;
7637         }
7638         memset(&mkey_attr, 0, sizeof(mkey_attr));
7639         mkey_attr.addr = (uintptr_t)mem;
7640         mkey_attr.size = size;
7641         mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem);
7642         mkey_attr.pd = sh->pdn;
7643         mkey_attr.relaxed_ordering_write = sh->cmng.relaxed_ordering_write;
7644         mkey_attr.relaxed_ordering_read = sh->cmng.relaxed_ordering_read;
7645         mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
7646         if (!mem_mng->dm) {
7647                 mlx5_os_umem_dereg(mem_mng->umem);
7648                 rte_errno = errno;
7649                 mlx5_free(mem);
7650                 return -rte_errno;
7651         }
7652         mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
7653         raw_data = (volatile struct flow_counter_stats *)mem;
7654         for (i = 0; i < raws_n; ++i) {
7655                 mem_mng->raws[i].mem_mng = mem_mng;
7656                 mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
7657         }
7658         for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
7659                 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
7660                                  mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
7661                                  next);
7662         LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
7663         sh->cmng.mem_mng = mem_mng;
7664         return 0;
7665 }
7666
7667 /**
7668  * Set the statistic memory to the new counter pool.
7669  *
7670  * @param[in] sh
7671  *   Pointer to mlx5_dev_ctx_shared object.
7672  * @param[in] pool
7673  *   Pointer to the pool to set the statistic memory.
7674  *
7675  * @return
7676  *   0 on success, a negative errno value otherwise.
7677  */
7678 static int
7679 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
7680                                struct mlx5_flow_counter_pool *pool)
7681 {
7682         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
7683         /* Resize statistic memory once used out. */
7684         if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
7685             mlx5_flow_create_counter_stat_mem_mng(sh)) {
7686                 DRV_LOG(ERR, "Cannot resize counter stat mem.");
7687                 return -1;
7688         }
7689         rte_spinlock_lock(&pool->sl);
7690         pool->raw = cmng->mem_mng->raws + pool->index %
7691                     MLX5_CNT_CONTAINER_RESIZE;
7692         rte_spinlock_unlock(&pool->sl);
7693         pool->raw_hw = NULL;
7694         return 0;
7695 }
7696
7697 #define MLX5_POOL_QUERY_FREQ_US 1000000
7698
7699 /**
7700  * Set the periodic procedure for triggering asynchronous batch queries for all
7701  * the counter pools.
7702  *
7703  * @param[in] sh
7704  *   Pointer to mlx5_dev_ctx_shared object.
7705  */
7706 void
7707 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
7708 {
7709         uint32_t pools_n, us;
7710
7711         pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
7712         us = MLX5_POOL_QUERY_FREQ_US / pools_n;
7713         DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
7714         if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
7715                 sh->cmng.query_thread_on = 0;
7716                 DRV_LOG(ERR, "Cannot reinitialize query alarm");
7717         } else {
7718                 sh->cmng.query_thread_on = 1;
7719         }
7720 }
7721
7722 /**
7723  * The periodic procedure for triggering asynchronous batch queries for all the
7724  * counter pools. This function is probably called by the host thread.
7725  *
7726  * @param[in] arg
7727  *   The parameter for the alarm process.
7728  */
7729 void
7730 mlx5_flow_query_alarm(void *arg)
7731 {
7732         struct mlx5_dev_ctx_shared *sh = arg;
7733         int ret;
7734         uint16_t pool_index = sh->cmng.pool_index;
7735         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
7736         struct mlx5_flow_counter_pool *pool;
7737         uint16_t n_valid;
7738
7739         if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
7740                 goto set_alarm;
7741         rte_spinlock_lock(&cmng->pool_update_sl);
7742         pool = cmng->pools[pool_index];
7743         n_valid = cmng->n_valid;
7744         rte_spinlock_unlock(&cmng->pool_update_sl);
7745         /* Set the statistic memory to the new created pool. */
7746         if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
7747                 goto set_alarm;
7748         if (pool->raw_hw)
7749                 /* There is a pool query in progress. */
7750                 goto set_alarm;
7751         pool->raw_hw =
7752                 LIST_FIRST(&sh->cmng.free_stat_raws);
7753         if (!pool->raw_hw)
7754                 /* No free counter statistics raw memory. */
7755                 goto set_alarm;
7756         /*
7757          * Identify the counters released between query trigger and query
7758          * handle more efficiently. The counter released in this gap period
7759          * should wait for a new round of query as the new arrived packets
7760          * will not be taken into account.
7761          */
7762         pool->query_gen++;
7763         ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
7764                                                MLX5_COUNTERS_PER_POOL,
7765                                                NULL, NULL,
7766                                                pool->raw_hw->mem_mng->dm->id,
7767                                                (void *)(uintptr_t)
7768                                                pool->raw_hw->data,
7769                                                sh->devx_comp,
7770                                                (uint64_t)(uintptr_t)pool);
7771         if (ret) {
7772                 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
7773                         " %d", pool->min_dcs->id);
7774                 pool->raw_hw = NULL;
7775                 goto set_alarm;
7776         }
7777         LIST_REMOVE(pool->raw_hw, next);
7778         sh->cmng.pending_queries++;
7779         pool_index++;
7780         if (pool_index >= n_valid)
7781                 pool_index = 0;
7782 set_alarm:
7783         sh->cmng.pool_index = pool_index;
7784         mlx5_set_query_alarm(sh);
7785 }
7786
7787 /**
7788  * Check and callback event for new aged flow in the counter pool
7789  *
7790  * @param[in] sh
7791  *   Pointer to mlx5_dev_ctx_shared object.
7792  * @param[in] pool
7793  *   Pointer to Current counter pool.
7794  */
7795 static void
7796 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
7797                    struct mlx5_flow_counter_pool *pool)
7798 {
7799         struct mlx5_priv *priv;
7800         struct mlx5_flow_counter *cnt;
7801         struct mlx5_age_info *age_info;
7802         struct mlx5_age_param *age_param;
7803         struct mlx5_counter_stats_raw *cur = pool->raw_hw;
7804         struct mlx5_counter_stats_raw *prev = pool->raw;
7805         const uint64_t curr_time = MLX5_CURR_TIME_SEC;
7806         const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
7807         uint16_t expected = AGE_CANDIDATE;
7808         uint32_t i;
7809
7810         pool->time_of_last_age_check = curr_time;
7811         for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
7812                 cnt = MLX5_POOL_GET_CNT(pool, i);
7813                 age_param = MLX5_CNT_TO_AGE(cnt);
7814                 if (__atomic_load_n(&age_param->state,
7815                                     __ATOMIC_RELAXED) != AGE_CANDIDATE)
7816                         continue;
7817                 if (cur->data[i].hits != prev->data[i].hits) {
7818                         __atomic_store_n(&age_param->sec_since_last_hit, 0,
7819                                          __ATOMIC_RELAXED);
7820                         continue;
7821                 }
7822                 if (__atomic_add_fetch(&age_param->sec_since_last_hit,
7823                                        time_delta,
7824                                        __ATOMIC_RELAXED) <= age_param->timeout)
7825                         continue;
7826                 /**
7827                  * Hold the lock first, or if between the
7828                  * state AGE_TMOUT and tailq operation the
7829                  * release happened, the release procedure
7830                  * may delete a non-existent tailq node.
7831                  */
7832                 priv = rte_eth_devices[age_param->port_id].data->dev_private;
7833                 age_info = GET_PORT_AGE_INFO(priv);
7834                 rte_spinlock_lock(&age_info->aged_sl);
7835                 if (__atomic_compare_exchange_n(&age_param->state, &expected,
7836                                                 AGE_TMOUT, false,
7837                                                 __ATOMIC_RELAXED,
7838                                                 __ATOMIC_RELAXED)) {
7839                         TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
7840                         MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
7841                 }
7842                 rte_spinlock_unlock(&age_info->aged_sl);
7843         }
7844         mlx5_age_event_prepare(sh);
7845 }
7846
7847 /**
7848  * Handler for the HW respond about ready values from an asynchronous batch
7849  * query. This function is probably called by the host thread.
7850  *
7851  * @param[in] sh
7852  *   The pointer to the shared device context.
7853  * @param[in] async_id
7854  *   The Devx async ID.
7855  * @param[in] status
7856  *   The status of the completion.
7857  */
7858 void
7859 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
7860                                   uint64_t async_id, int status)
7861 {
7862         struct mlx5_flow_counter_pool *pool =
7863                 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
7864         struct mlx5_counter_stats_raw *raw_to_free;
7865         uint8_t query_gen = pool->query_gen ^ 1;
7866         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
7867         enum mlx5_counter_type cnt_type =
7868                 pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
7869                                 MLX5_COUNTER_TYPE_ORIGIN;
7870
7871         if (unlikely(status)) {
7872                 raw_to_free = pool->raw_hw;
7873         } else {
7874                 raw_to_free = pool->raw;
7875                 if (pool->is_aged)
7876                         mlx5_flow_aging_check(sh, pool);
7877                 rte_spinlock_lock(&pool->sl);
7878                 pool->raw = pool->raw_hw;
7879                 rte_spinlock_unlock(&pool->sl);
7880                 /* Be sure the new raw counters data is updated in memory. */
7881                 rte_io_wmb();
7882                 if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
7883                         rte_spinlock_lock(&cmng->csl[cnt_type]);
7884                         TAILQ_CONCAT(&cmng->counters[cnt_type],
7885                                      &pool->counters[query_gen], next);
7886                         rte_spinlock_unlock(&cmng->csl[cnt_type]);
7887                 }
7888         }
7889         LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
7890         pool->raw_hw = NULL;
7891         sh->cmng.pending_queries--;
7892 }
7893
7894 static int
7895 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
7896                     const struct flow_grp_info *grp_info,
7897                     struct rte_flow_error *error)
7898 {
7899         if (grp_info->transfer && grp_info->external &&
7900             grp_info->fdb_def_rule) {
7901                 if (group == UINT32_MAX)
7902                         return rte_flow_error_set
7903                                                 (error, EINVAL,
7904                                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
7905                                                  NULL,
7906                                                  "group index not supported");
7907                 *table = group + 1;
7908         } else {
7909                 *table = group;
7910         }
7911         DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
7912         return 0;
7913 }
7914
7915 /**
7916  * Translate the rte_flow group index to HW table value.
7917  *
7918  * If tunnel offload is disabled, all group ids converted to flow table
7919  * id using the standard method.
7920  * If tunnel offload is enabled, group id can be converted using the
7921  * standard or tunnel conversion method. Group conversion method
7922  * selection depends on flags in `grp_info` parameter:
7923  * - Internal (grp_info.external == 0) groups conversion uses the
7924  *   standard method.
7925  * - Group ids in JUMP action converted with the tunnel conversion.
7926  * - Group id in rule attribute conversion depends on a rule type and
7927  *   group id value:
7928  *   ** non zero group attributes converted with the tunnel method
7929  *   ** zero group attribute in non-tunnel rule is converted using the
7930  *      standard method - there's only one root table
7931  *   ** zero group attribute in steer tunnel rule is converted with the
7932  *      standard method - single root table
7933  *   ** zero group attribute in match tunnel rule is a special OvS
7934  *      case: that value is used for portability reasons. That group
7935  *      id is converted with the tunnel conversion method.
7936  *
7937  * @param[in] dev
7938  *   Port device
7939  * @param[in] tunnel
7940  *   PMD tunnel offload object
7941  * @param[in] group
7942  *   rte_flow group index value.
7943  * @param[out] table
7944  *   HW table value.
7945  * @param[in] grp_info
7946  *   flags used for conversion
7947  * @param[out] error
7948  *   Pointer to error structure.
7949  *
7950  * @return
7951  *   0 on success, a negative errno value otherwise and rte_errno is set.
7952  */
7953 int
7954 mlx5_flow_group_to_table(struct rte_eth_dev *dev,
7955                          const struct mlx5_flow_tunnel *tunnel,
7956                          uint32_t group, uint32_t *table,
7957                          const struct flow_grp_info *grp_info,
7958                          struct rte_flow_error *error)
7959 {
7960         int ret;
7961         bool standard_translation;
7962
7963         if (!grp_info->skip_scale && grp_info->external &&
7964             group < MLX5_MAX_TABLES_EXTERNAL)
7965                 group *= MLX5_FLOW_TABLE_FACTOR;
7966         if (is_tunnel_offload_active(dev)) {
7967                 standard_translation = !grp_info->external ||
7968                                         grp_info->std_tbl_fix;
7969         } else {
7970                 standard_translation = true;
7971         }
7972         DRV_LOG(DEBUG,
7973                 "port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
7974                 dev->data->port_id, group, grp_info->transfer,
7975                 grp_info->external, grp_info->fdb_def_rule,
7976                 standard_translation ? "STANDARD" : "TUNNEL");
7977         if (standard_translation)
7978                 ret = flow_group_to_table(dev->data->port_id, group, table,
7979                                           grp_info, error);
7980         else
7981                 ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
7982                                                       table, error);
7983
7984         return ret;
7985 }
7986
7987 /**
7988  * Discover availability of metadata reg_c's.
7989  *
7990  * Iteratively use test flows to check availability.
7991  *
7992  * @param[in] dev
7993  *   Pointer to the Ethernet device structure.
7994  *
7995  * @return
7996  *   0 on success, a negative errno value otherwise and rte_errno is set.
7997  */
7998 int
7999 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
8000 {
8001         struct mlx5_priv *priv = dev->data->dev_private;
8002         struct mlx5_dev_config *config = &priv->config;
8003         enum modify_reg idx;
8004         int n = 0;
8005
8006         /* reg_c[0] and reg_c[1] are reserved. */
8007         config->flow_mreg_c[n++] = REG_C_0;
8008         config->flow_mreg_c[n++] = REG_C_1;
8009         /* Discover availability of other reg_c's. */
8010         for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
8011                 struct rte_flow_attr attr = {
8012                         .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
8013                         .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
8014                         .ingress = 1,
8015                 };
8016                 struct rte_flow_item items[] = {
8017                         [0] = {
8018                                 .type = RTE_FLOW_ITEM_TYPE_END,
8019                         },
8020                 };
8021                 struct rte_flow_action actions[] = {
8022                         [0] = {
8023                                 .type = (enum rte_flow_action_type)
8024                                         MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
8025                                 .conf = &(struct mlx5_flow_action_copy_mreg){
8026                                         .src = REG_C_1,
8027                                         .dst = idx,
8028                                 },
8029                         },
8030                         [1] = {
8031                                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
8032                                 .conf = &(struct rte_flow_action_jump){
8033                                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
8034                                 },
8035                         },
8036                         [2] = {
8037                                 .type = RTE_FLOW_ACTION_TYPE_END,
8038                         },
8039                 };
8040                 uint32_t flow_idx;
8041                 struct rte_flow *flow;
8042                 struct rte_flow_error error;
8043
8044                 if (!config->dv_flow_en)
8045                         break;
8046                 /* Create internal flow, validation skips copy action. */
8047                 flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_GEN, &attr,
8048                                         items, actions, false, &error);
8049                 flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
8050                                       flow_idx);
8051                 if (!flow)
8052                         continue;
8053                 config->flow_mreg_c[n++] = idx;
8054                 flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN, flow_idx);
8055         }
8056         for (; n < MLX5_MREG_C_NUM; ++n)
8057                 config->flow_mreg_c[n] = REG_NON;
8058         return 0;
8059 }
8060
8061 int
8062 save_dump_file(const uint8_t *data, uint32_t size,
8063         uint32_t type, uint32_t id, void *arg, FILE *file)
8064 {
8065         char line[BUF_SIZE];
8066         uint32_t out = 0;
8067         uint32_t k;
8068         uint32_t actions_num;
8069         struct rte_flow_query_count *count;
8070
8071         memset(line, 0, BUF_SIZE);
8072         switch (type) {
8073         case DR_DUMP_REC_TYPE_PMD_MODIFY_HDR:
8074                 actions_num = *(uint32_t *)(arg);
8075                 out += snprintf(line + out, BUF_SIZE - out, "%d,0x%x,%d,",
8076                                 type, id, actions_num);
8077                 break;
8078         case DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT:
8079                 out += snprintf(line + out, BUF_SIZE - out, "%d,0x%x,",
8080                                 type, id);
8081                 break;
8082         case DR_DUMP_REC_TYPE_PMD_COUNTER:
8083                 count = (struct rte_flow_query_count *)arg;
8084                 fprintf(file, "%d,0x%x,%" PRIu64 ",%" PRIu64 "\n", type,
8085                                 id, count->hits, count->bytes);
8086                 return 0;
8087         default:
8088                 return -1;
8089         }
8090
8091         for (k = 0; k < size; k++) {
8092                 /* Make sure we do not overrun the line buffer length. */
8093                 if (out >= BUF_SIZE - 4) {
8094                         line[out] = '\0';
8095                         break;
8096                 }
8097                 out += snprintf(line + out, BUF_SIZE - out, "%02x",
8098                                 (data[k]) & 0xff);
8099         }
8100         fprintf(file, "%s\n", line);
8101         return 0;
8102 }
8103
8104 int
8105 mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow,
8106         struct rte_flow_query_count *count, struct rte_flow_error *error)
8107 {
8108         struct rte_flow_action action[2];
8109         enum mlx5_flow_drv_type ftype;
8110         const struct mlx5_flow_driver_ops *fops;
8111
8112         if (!flow) {
8113                 return rte_flow_error_set(error, ENOENT,
8114                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8115                                 NULL,
8116                                 "invalid flow handle");
8117         }
8118         action[0].type = RTE_FLOW_ACTION_TYPE_COUNT;
8119         action[1].type = RTE_FLOW_ACTION_TYPE_END;
8120         if (flow->counter) {
8121                 memset(count, 0, sizeof(struct rte_flow_query_count));
8122                 ftype = (enum mlx5_flow_drv_type)(flow->drv_type);
8123                 MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN &&
8124                                                 ftype < MLX5_FLOW_TYPE_MAX);
8125                 fops = flow_get_drv_ops(ftype);
8126                 return fops->query(dev, flow, action, count, error);
8127         }
8128         return -1;
8129 }
8130
8131 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8132 /**
8133  * Dump flow ipool data to file
8134  *
8135  * @param[in] dev
8136  *   The pointer to Ethernet device.
8137  * @param[in] file
8138  *   A pointer to a file for output.
8139  * @param[out] error
8140  *   Perform verbose error reporting if not NULL. PMDs initialize this
8141  *   structure in case of error only.
8142  * @return
8143  *   0 on success, a negative value otherwise.
8144  */
8145 int
8146 mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev,
8147         struct rte_flow *flow, FILE *file,
8148         struct rte_flow_error *error)
8149 {
8150         struct mlx5_priv *priv = dev->data->dev_private;
8151         struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
8152         struct mlx5_flow_dv_encap_decap_resource *encap_decap;
8153         uint32_t handle_idx;
8154         struct mlx5_flow_handle *dh;
8155         struct rte_flow_query_count count;
8156         uint32_t actions_num;
8157         const uint8_t *data;
8158         size_t size;
8159         uint32_t id;
8160         uint32_t type;
8161
8162         if (!flow) {
8163                 return rte_flow_error_set(error, ENOENT,
8164                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8165                         NULL,
8166                         "invalid flow handle");
8167         }
8168         handle_idx = flow->dev_handles;
8169         while (handle_idx) {
8170                 dh = mlx5_ipool_get(priv->sh->ipool
8171                         [MLX5_IPOOL_MLX5_FLOW], handle_idx);
8172                 if (!dh)
8173                         continue;
8174                 handle_idx = dh->next.next;
8175                 id = (uint32_t)(uintptr_t)dh->drv_flow;
8176
8177                 /* query counter */
8178                 type = DR_DUMP_REC_TYPE_PMD_COUNTER;
8179                 if (!mlx5_flow_query_counter(dev, flow, &count, error))
8180                         save_dump_file(NULL, 0, type,
8181                                         id, (void *)&count, file);
8182
8183                 /* Get modify_hdr and encap_decap buf from ipools. */
8184                 encap_decap = NULL;
8185                 modify_hdr = dh->dvh.modify_hdr;
8186
8187                 if (dh->dvh.rix_encap_decap) {
8188                         encap_decap = mlx5_ipool_get(priv->sh->ipool
8189                                                 [MLX5_IPOOL_DECAP_ENCAP],
8190                                                 dh->dvh.rix_encap_decap);
8191                 }
8192                 if (modify_hdr) {
8193                         data = (const uint8_t *)modify_hdr->actions;
8194                         size = (size_t)(modify_hdr->actions_num) * 8;
8195                         actions_num = modify_hdr->actions_num;
8196                         type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
8197                         save_dump_file(data, size, type, id,
8198                                         (void *)(&actions_num), file);
8199                 }
8200                 if (encap_decap) {
8201                         data = encap_decap->buf;
8202                         size = encap_decap->size;
8203                         type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
8204                         save_dump_file(data, size, type,
8205                                                 id, NULL, file);
8206                 }
8207         }
8208         return 0;
8209 }
8210 #endif
8211
8212 /**
8213  * Dump flow raw hw data to file
8214  *
8215  * @param[in] dev
8216  *    The pointer to Ethernet device.
8217  * @param[in] file
8218  *   A pointer to a file for output.
8219  * @param[out] error
8220  *   Perform verbose error reporting if not NULL. PMDs initialize this
8221  *   structure in case of error only.
8222  * @return
8223  *   0 on success, a nagative value otherwise.
8224  */
8225 int
8226 mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow_idx,
8227                    FILE *file,
8228                    struct rte_flow_error *error __rte_unused)
8229 {
8230         struct mlx5_priv *priv = dev->data->dev_private;
8231         struct mlx5_dev_ctx_shared *sh = priv->sh;
8232         uint32_t handle_idx;
8233         int ret;
8234         struct mlx5_flow_handle *dh;
8235         struct rte_flow *flow;
8236 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8237         uint32_t idx;
8238 #endif
8239
8240         if (!priv->config.dv_flow_en) {
8241                 if (fputs("device dv flow disabled\n", file) <= 0)
8242                         return -errno;
8243                 return -ENOTSUP;
8244         }
8245
8246         /* dump all */
8247         if (!flow_idx) {
8248 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8249                 MLX5_IPOOL_FOREACH(priv->flows[MLX5_FLOW_TYPE_GEN], idx, flow)
8250                         mlx5_flow_dev_dump_ipool(dev, flow, file, error);
8251 #endif
8252                 return mlx5_devx_cmd_flow_dump(sh->fdb_domain,
8253                                         sh->rx_domain,
8254                                         sh->tx_domain, file);
8255         }
8256         /* dump one */
8257         flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
8258                         (uintptr_t)(void *)flow_idx);
8259         if (!flow)
8260                 return -ENOENT;
8261
8262 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8263         mlx5_flow_dev_dump_ipool(dev, flow, file, error);
8264 #endif
8265         handle_idx = flow->dev_handles;
8266         while (handle_idx) {
8267                 dh = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
8268                                 handle_idx);
8269                 if (!dh)
8270                         return -ENOENT;
8271                 if (dh->drv_flow) {
8272                         ret = mlx5_devx_cmd_flow_single_dump(dh->drv_flow,
8273                                         file);
8274                         if (ret)
8275                                 return -ENOENT;
8276                 }
8277                 handle_idx = dh->next.next;
8278         }
8279         return 0;
8280 }
8281
8282 /**
8283  * Get aged-out flows.
8284  *
8285  * @param[in] dev
8286  *   Pointer to the Ethernet device structure.
8287  * @param[in] context
8288  *   The address of an array of pointers to the aged-out flows contexts.
8289  * @param[in] nb_countexts
8290  *   The length of context array pointers.
8291  * @param[out] error
8292  *   Perform verbose error reporting if not NULL. Initialized in case of
8293  *   error only.
8294  *
8295  * @return
8296  *   how many contexts get in success, otherwise negative errno value.
8297  *   if nb_contexts is 0, return the amount of all aged contexts.
8298  *   if nb_contexts is not 0 , return the amount of aged flows reported
8299  *   in the context array.
8300  */
8301 int
8302 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
8303                         uint32_t nb_contexts, struct rte_flow_error *error)
8304 {
8305         const struct mlx5_flow_driver_ops *fops;
8306         struct rte_flow_attr attr = { .transfer = 0 };
8307
8308         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8309                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8310                 return fops->get_aged_flows(dev, contexts, nb_contexts,
8311                                                     error);
8312         }
8313         DRV_LOG(ERR,
8314                 "port %u get aged flows is not supported.",
8315                  dev->data->port_id);
8316         return -ENOTSUP;
8317 }
8318
8319 /* Wrapper for driver action_validate op callback */
8320 static int
8321 flow_drv_action_validate(struct rte_eth_dev *dev,
8322                          const struct rte_flow_indir_action_conf *conf,
8323                          const struct rte_flow_action *action,
8324                          const struct mlx5_flow_driver_ops *fops,
8325                          struct rte_flow_error *error)
8326 {
8327         static const char err_msg[] = "indirect action validation unsupported";
8328
8329         if (!fops->action_validate) {
8330                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8331                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8332                                    NULL, err_msg);
8333                 return -rte_errno;
8334         }
8335         return fops->action_validate(dev, conf, action, error);
8336 }
8337
8338 /**
8339  * Destroys the shared action by handle.
8340  *
8341  * @param dev
8342  *   Pointer to Ethernet device structure.
8343  * @param[in] handle
8344  *   Handle for the indirect action object to be destroyed.
8345  * @param[out] error
8346  *   Perform verbose error reporting if not NULL. PMDs initialize this
8347  *   structure in case of error only.
8348  *
8349  * @return
8350  *   0 on success, a negative errno value otherwise and rte_errno is set.
8351  *
8352  * @note: wrapper for driver action_create op callback.
8353  */
8354 static int
8355 mlx5_action_handle_destroy(struct rte_eth_dev *dev,
8356                            struct rte_flow_action_handle *handle,
8357                            struct rte_flow_error *error)
8358 {
8359         static const char err_msg[] = "indirect action destruction unsupported";
8360         struct rte_flow_attr attr = { .transfer = 0 };
8361         const struct mlx5_flow_driver_ops *fops =
8362                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8363
8364         if (!fops->action_destroy) {
8365                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8366                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8367                                    NULL, err_msg);
8368                 return -rte_errno;
8369         }
8370         return fops->action_destroy(dev, handle, error);
8371 }
8372
8373 /* Wrapper for driver action_destroy op callback */
8374 static int
8375 flow_drv_action_update(struct rte_eth_dev *dev,
8376                        struct rte_flow_action_handle *handle,
8377                        const void *update,
8378                        const struct mlx5_flow_driver_ops *fops,
8379                        struct rte_flow_error *error)
8380 {
8381         static const char err_msg[] = "indirect action update unsupported";
8382
8383         if (!fops->action_update) {
8384                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8385                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8386                                    NULL, err_msg);
8387                 return -rte_errno;
8388         }
8389         return fops->action_update(dev, handle, update, error);
8390 }
8391
8392 /* Wrapper for driver action_destroy op callback */
8393 static int
8394 flow_drv_action_query(struct rte_eth_dev *dev,
8395                       const struct rte_flow_action_handle *handle,
8396                       void *data,
8397                       const struct mlx5_flow_driver_ops *fops,
8398                       struct rte_flow_error *error)
8399 {
8400         static const char err_msg[] = "indirect action query unsupported";
8401
8402         if (!fops->action_query) {
8403                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8404                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8405                                    NULL, err_msg);
8406                 return -rte_errno;
8407         }
8408         return fops->action_query(dev, handle, data, error);
8409 }
8410
8411 /**
8412  * Create indirect action for reuse in multiple flow rules.
8413  *
8414  * @param dev
8415  *   Pointer to Ethernet device structure.
8416  * @param conf
8417  *   Pointer to indirect action object configuration.
8418  * @param[in] action
8419  *   Action configuration for indirect action object creation.
8420  * @param[out] error
8421  *   Perform verbose error reporting if not NULL. PMDs initialize this
8422  *   structure in case of error only.
8423  * @return
8424  *   A valid handle in case of success, NULL otherwise and rte_errno is set.
8425  */
8426 static struct rte_flow_action_handle *
8427 mlx5_action_handle_create(struct rte_eth_dev *dev,
8428                           const struct rte_flow_indir_action_conf *conf,
8429                           const struct rte_flow_action *action,
8430                           struct rte_flow_error *error)
8431 {
8432         static const char err_msg[] = "indirect action creation unsupported";
8433         struct rte_flow_attr attr = { .transfer = 0 };
8434         const struct mlx5_flow_driver_ops *fops =
8435                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8436
8437         if (flow_drv_action_validate(dev, conf, action, fops, error))
8438                 return NULL;
8439         if (!fops->action_create) {
8440                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8441                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8442                                    NULL, err_msg);
8443                 return NULL;
8444         }
8445         return fops->action_create(dev, conf, action, error);
8446 }
8447
8448 /**
8449  * Updates inplace the indirect action configuration pointed by *handle*
8450  * with the configuration provided as *update* argument.
8451  * The update of the indirect action configuration effects all flow rules
8452  * reusing the action via handle.
8453  *
8454  * @param dev
8455  *   Pointer to Ethernet device structure.
8456  * @param[in] handle
8457  *   Handle for the indirect action to be updated.
8458  * @param[in] update
8459  *   Action specification used to modify the action pointed by handle.
8460  *   *update* could be of same type with the action pointed by the *handle*
8461  *   handle argument, or some other structures like a wrapper, depending on
8462  *   the indirect action type.
8463  * @param[out] error
8464  *   Perform verbose error reporting if not NULL. PMDs initialize this
8465  *   structure in case of error only.
8466  *
8467  * @return
8468  *   0 on success, a negative errno value otherwise and rte_errno is set.
8469  */
8470 static int
8471 mlx5_action_handle_update(struct rte_eth_dev *dev,
8472                 struct rte_flow_action_handle *handle,
8473                 const void *update,
8474                 struct rte_flow_error *error)
8475 {
8476         struct rte_flow_attr attr = { .transfer = 0 };
8477         const struct mlx5_flow_driver_ops *fops =
8478                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8479         int ret;
8480
8481         ret = flow_drv_action_validate(dev, NULL,
8482                         (const struct rte_flow_action *)update, fops, error);
8483         if (ret)
8484                 return ret;
8485         return flow_drv_action_update(dev, handle, update, fops,
8486                                       error);
8487 }
8488
8489 /**
8490  * Query the indirect action by handle.
8491  *
8492  * This function allows retrieving action-specific data such as counters.
8493  * Data is gathered by special action which may be present/referenced in
8494  * more than one flow rule definition.
8495  *
8496  * see @RTE_FLOW_ACTION_TYPE_COUNT
8497  *
8498  * @param dev
8499  *   Pointer to Ethernet device structure.
8500  * @param[in] handle
8501  *   Handle for the indirect action to query.
8502  * @param[in, out] data
8503  *   Pointer to storage for the associated query data type.
8504  * @param[out] error
8505  *   Perform verbose error reporting if not NULL. PMDs initialize this
8506  *   structure in case of error only.
8507  *
8508  * @return
8509  *   0 on success, a negative errno value otherwise and rte_errno is set.
8510  */
8511 static int
8512 mlx5_action_handle_query(struct rte_eth_dev *dev,
8513                          const struct rte_flow_action_handle *handle,
8514                          void *data,
8515                          struct rte_flow_error *error)
8516 {
8517         struct rte_flow_attr attr = { .transfer = 0 };
8518         const struct mlx5_flow_driver_ops *fops =
8519                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8520
8521         return flow_drv_action_query(dev, handle, data, fops, error);
8522 }
8523
8524 /**
8525  * Destroy all indirect actions (shared RSS).
8526  *
8527  * @param dev
8528  *   Pointer to Ethernet device.
8529  *
8530  * @return
8531  *   0 on success, a negative errno value otherwise and rte_errno is set.
8532  */
8533 int
8534 mlx5_action_handle_flush(struct rte_eth_dev *dev)
8535 {
8536         struct rte_flow_error error;
8537         struct mlx5_priv *priv = dev->data->dev_private;
8538         struct mlx5_shared_action_rss *shared_rss;
8539         int ret = 0;
8540         uint32_t idx;
8541
8542         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
8543                       priv->rss_shared_actions, idx, shared_rss, next) {
8544                 ret |= mlx5_action_handle_destroy(dev,
8545                        (struct rte_flow_action_handle *)(uintptr_t)idx, &error);
8546         }
8547         return ret;
8548 }
8549
8550 #ifndef HAVE_MLX5DV_DR
8551 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
8552 #else
8553 #define MLX5_DOMAIN_SYNC_FLOW \
8554         (MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
8555 #endif
8556
8557 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
8558 {
8559         struct rte_eth_dev *dev = &rte_eth_devices[port_id];
8560         const struct mlx5_flow_driver_ops *fops;
8561         int ret;
8562         struct rte_flow_attr attr = { .transfer = 0 };
8563
8564         fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8565         ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
8566         if (ret > 0)
8567                 ret = -ret;
8568         return ret;
8569 }
8570
8571 const struct mlx5_flow_tunnel *
8572 mlx5_get_tof(const struct rte_flow_item *item,
8573              const struct rte_flow_action *action,
8574              enum mlx5_tof_rule_type *rule_type)
8575 {
8576         for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
8577                 if (item->type == (typeof(item->type))
8578                                   MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL) {
8579                         *rule_type = MLX5_TUNNEL_OFFLOAD_MATCH_RULE;
8580                         return flow_items_to_tunnel(item);
8581                 }
8582         }
8583         for (; action->conf != RTE_FLOW_ACTION_TYPE_END; action++) {
8584                 if (action->type == (typeof(action->type))
8585                                     MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET) {
8586                         *rule_type = MLX5_TUNNEL_OFFLOAD_SET_RULE;
8587                         return flow_actions_to_tunnel(action);
8588                 }
8589         }
8590         return NULL;
8591 }
8592
8593 /**
8594  * tunnel offload functionalilty is defined for DV environment only
8595  */
8596 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8597 __extension__
8598 union tunnel_offload_mark {
8599         uint32_t val;
8600         struct {
8601                 uint32_t app_reserve:8;
8602                 uint32_t table_id:15;
8603                 uint32_t transfer:1;
8604                 uint32_t _unused_:8;
8605         };
8606 };
8607
8608 static bool
8609 mlx5_access_tunnel_offload_db
8610         (struct rte_eth_dev *dev,
8611          bool (*match)(struct rte_eth_dev *,
8612                        struct mlx5_flow_tunnel *, const void *),
8613          void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
8614          void (*miss)(struct rte_eth_dev *, void *),
8615          void *ctx, bool lock_op);
8616
8617 static int
8618 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
8619                              struct rte_flow *flow,
8620                              const struct rte_flow_attr *attr,
8621                              const struct rte_flow_action *app_actions,
8622                              uint32_t flow_idx,
8623                              const struct mlx5_flow_tunnel *tunnel,
8624                              struct tunnel_default_miss_ctx *ctx,
8625                              struct rte_flow_error *error)
8626 {
8627         struct mlx5_priv *priv = dev->data->dev_private;
8628         struct mlx5_flow *dev_flow;
8629         struct rte_flow_attr miss_attr = *attr;
8630         const struct rte_flow_item miss_items[2] = {
8631                 {
8632                         .type = RTE_FLOW_ITEM_TYPE_ETH,
8633                         .spec = NULL,
8634                         .last = NULL,
8635                         .mask = NULL
8636                 },
8637                 {
8638                         .type = RTE_FLOW_ITEM_TYPE_END,
8639                         .spec = NULL,
8640                         .last = NULL,
8641                         .mask = NULL
8642                 }
8643         };
8644         union tunnel_offload_mark mark_id;
8645         struct rte_flow_action_mark miss_mark;
8646         struct rte_flow_action miss_actions[3] = {
8647                 [0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
8648                 [2] = { .type = RTE_FLOW_ACTION_TYPE_END,  .conf = NULL }
8649         };
8650         const struct rte_flow_action_jump *jump_data;
8651         uint32_t i, flow_table = 0; /* prevent compilation warning */
8652         struct flow_grp_info grp_info = {
8653                 .external = 1,
8654                 .transfer = attr->transfer,
8655                 .fdb_def_rule = !!priv->fdb_def_rule,
8656                 .std_tbl_fix = 0,
8657         };
8658         int ret;
8659
8660         if (!attr->transfer) {
8661                 uint32_t q_size;
8662
8663                 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
8664                 q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
8665                 ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
8666                                          0, SOCKET_ID_ANY);
8667                 if (!ctx->queue)
8668                         return rte_flow_error_set
8669                                 (error, ENOMEM,
8670                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
8671                                 NULL, "invalid default miss RSS");
8672                 ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
8673                 ctx->action_rss.level = 0,
8674                 ctx->action_rss.types = priv->rss_conf.rss_hf,
8675                 ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
8676                 ctx->action_rss.queue_num = priv->reta_idx_n,
8677                 ctx->action_rss.key = priv->rss_conf.rss_key,
8678                 ctx->action_rss.queue = ctx->queue;
8679                 if (!priv->reta_idx_n || !priv->rxqs_n)
8680                         return rte_flow_error_set
8681                                 (error, EINVAL,
8682                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
8683                                 NULL, "invalid port configuration");
8684                 if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
8685                         ctx->action_rss.types = 0;
8686                 for (i = 0; i != priv->reta_idx_n; ++i)
8687                         ctx->queue[i] = (*priv->reta_idx)[i];
8688         } else {
8689                 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
8690                 ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
8691         }
8692         miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
8693         for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
8694         jump_data = app_actions->conf;
8695         miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
8696         miss_attr.group = jump_data->group;
8697         ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
8698                                        &flow_table, &grp_info, error);
8699         if (ret)
8700                 return rte_flow_error_set(error, EINVAL,
8701                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
8702                                           NULL, "invalid tunnel id");
8703         mark_id.app_reserve = 0;
8704         mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
8705         mark_id.transfer = !!attr->transfer;
8706         mark_id._unused_ = 0;
8707         miss_mark.id = mark_id.val;
8708         dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
8709                                     miss_items, miss_actions, flow_idx, error);
8710         if (!dev_flow)
8711                 return -rte_errno;
8712         dev_flow->flow = flow;
8713         dev_flow->external = true;
8714         dev_flow->tunnel = tunnel;
8715         dev_flow->tof_type = MLX5_TUNNEL_OFFLOAD_MISS_RULE;
8716         /* Subflow object was created, we must include one in the list. */
8717         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
8718                       dev_flow->handle, next);
8719         DRV_LOG(DEBUG,
8720                 "port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
8721                 dev->data->port_id, tunnel->app_tunnel.type,
8722                 tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
8723         ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
8724                                   miss_actions, error);
8725         if (!ret)
8726                 ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
8727                                                   error);
8728
8729         return ret;
8730 }
8731
8732 static const struct mlx5_flow_tbl_data_entry  *
8733 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
8734 {
8735         struct mlx5_priv *priv = dev->data->dev_private;
8736         struct mlx5_dev_ctx_shared *sh = priv->sh;
8737         struct mlx5_list_entry *he;
8738         union tunnel_offload_mark mbits = { .val = mark };
8739         union mlx5_flow_tbl_key table_key = {
8740                 {
8741                         .level = tunnel_id_to_flow_tbl(mbits.table_id),
8742                         .id = 0,
8743                         .reserved = 0,
8744                         .dummy = 0,
8745                         .is_fdb = !!mbits.transfer,
8746                         .is_egress = 0,
8747                 }
8748         };
8749         struct mlx5_flow_cb_ctx ctx = {
8750                 .data = &table_key.v64,
8751         };
8752
8753         he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, &ctx);
8754         return he ?
8755                container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
8756 }
8757
8758 static void
8759 mlx5_flow_tunnel_grp2tbl_remove_cb(void *tool_ctx,
8760                                    struct mlx5_list_entry *entry)
8761 {
8762         struct mlx5_dev_ctx_shared *sh = tool_ctx;
8763         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
8764
8765         mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
8766                         tunnel_flow_tbl_to_id(tte->flow_table));
8767         mlx5_free(tte);
8768 }
8769
8770 static int
8771 mlx5_flow_tunnel_grp2tbl_match_cb(void *tool_ctx __rte_unused,
8772                                   struct mlx5_list_entry *entry, void *cb_ctx)
8773 {
8774         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
8775         union tunnel_tbl_key tbl = {
8776                 .val = *(uint64_t *)(ctx->data),
8777         };
8778         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
8779
8780         return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
8781 }
8782
8783 static struct mlx5_list_entry *
8784 mlx5_flow_tunnel_grp2tbl_create_cb(void *tool_ctx, void *cb_ctx)
8785 {
8786         struct mlx5_dev_ctx_shared *sh = tool_ctx;
8787         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
8788         struct tunnel_tbl_entry *tte;
8789         union tunnel_tbl_key tbl = {
8790                 .val = *(uint64_t *)(ctx->data),
8791         };
8792
8793         tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
8794                           sizeof(*tte), 0,
8795                           SOCKET_ID_ANY);
8796         if (!tte)
8797                 goto err;
8798         mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
8799                           &tte->flow_table);
8800         if (tte->flow_table >= MLX5_MAX_TABLES) {
8801                 DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
8802                         tte->flow_table);
8803                 mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
8804                                 tte->flow_table);
8805                 goto err;
8806         } else if (!tte->flow_table) {
8807                 goto err;
8808         }
8809         tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
8810         tte->tunnel_id = tbl.tunnel_id;
8811         tte->group = tbl.group;
8812         return &tte->hash;
8813 err:
8814         if (tte)
8815                 mlx5_free(tte);
8816         return NULL;
8817 }
8818
8819 static struct mlx5_list_entry *
8820 mlx5_flow_tunnel_grp2tbl_clone_cb(void *tool_ctx __rte_unused,
8821                                   struct mlx5_list_entry *oentry,
8822                                   void *cb_ctx __rte_unused)
8823 {
8824         struct tunnel_tbl_entry *tte = mlx5_malloc(MLX5_MEM_SYS, sizeof(*tte),
8825                                                    0, SOCKET_ID_ANY);
8826
8827         if (!tte)
8828                 return NULL;
8829         memcpy(tte, oentry, sizeof(*tte));
8830         return &tte->hash;
8831 }
8832
8833 static void
8834 mlx5_flow_tunnel_grp2tbl_clone_free_cb(void *tool_ctx __rte_unused,
8835                                        struct mlx5_list_entry *entry)
8836 {
8837         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
8838
8839         mlx5_free(tte);
8840 }
8841
8842 static uint32_t
8843 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
8844                                 const struct mlx5_flow_tunnel *tunnel,
8845                                 uint32_t group, uint32_t *table,
8846                                 struct rte_flow_error *error)
8847 {
8848         struct mlx5_list_entry *he;
8849         struct tunnel_tbl_entry *tte;
8850         union tunnel_tbl_key key = {
8851                 .tunnel_id = tunnel ? tunnel->tunnel_id : 0,
8852                 .group = group
8853         };
8854         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
8855         struct mlx5_hlist *group_hash;
8856         struct mlx5_flow_cb_ctx ctx = {
8857                 .data = &key.val,
8858         };
8859
8860         group_hash = tunnel ? tunnel->groups : thub->groups;
8861         he = mlx5_hlist_register(group_hash, key.val, &ctx);
8862         if (!he)
8863                 return rte_flow_error_set(error, EINVAL,
8864                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
8865                                           NULL,
8866                                           "tunnel group index not supported");
8867         tte = container_of(he, typeof(*tte), hash);
8868         *table = tte->flow_table;
8869         DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
8870                 dev->data->port_id, key.tunnel_id, group, *table);
8871         return 0;
8872 }
8873
8874 static void
8875 mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
8876                       struct mlx5_flow_tunnel *tunnel)
8877 {
8878         struct mlx5_priv *priv = dev->data->dev_private;
8879         struct mlx5_indexed_pool *ipool;
8880
8881         DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
8882                 dev->data->port_id, tunnel->tunnel_id);
8883         LIST_REMOVE(tunnel, chain);
8884         mlx5_hlist_destroy(tunnel->groups);
8885         ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
8886         mlx5_ipool_free(ipool, tunnel->tunnel_id);
8887 }
8888
8889 static bool
8890 mlx5_access_tunnel_offload_db
8891         (struct rte_eth_dev *dev,
8892          bool (*match)(struct rte_eth_dev *,
8893                        struct mlx5_flow_tunnel *, const void *),
8894          void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
8895          void (*miss)(struct rte_eth_dev *, void *),
8896          void *ctx, bool lock_op)
8897 {
8898         bool verdict = false;
8899         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
8900         struct mlx5_flow_tunnel *tunnel;
8901
8902         rte_spinlock_lock(&thub->sl);
8903         LIST_FOREACH(tunnel, &thub->tunnels, chain) {
8904                 verdict = match(dev, tunnel, (const void *)ctx);
8905                 if (verdict)
8906                         break;
8907         }
8908         if (!lock_op)
8909                 rte_spinlock_unlock(&thub->sl);
8910         if (verdict && hit)
8911                 hit(dev, tunnel, ctx);
8912         if (!verdict && miss)
8913                 miss(dev, ctx);
8914         if (lock_op)
8915                 rte_spinlock_unlock(&thub->sl);
8916
8917         return verdict;
8918 }
8919
8920 struct tunnel_db_find_tunnel_id_ctx {
8921         uint32_t tunnel_id;
8922         struct mlx5_flow_tunnel *tunnel;
8923 };
8924
8925 static bool
8926 find_tunnel_id_match(struct rte_eth_dev *dev,
8927                      struct mlx5_flow_tunnel *tunnel, const void *x)
8928 {
8929         const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
8930
8931         RTE_SET_USED(dev);
8932         return tunnel->tunnel_id == ctx->tunnel_id;
8933 }
8934
8935 static void
8936 find_tunnel_id_hit(struct rte_eth_dev *dev,
8937                    struct mlx5_flow_tunnel *tunnel, void *x)
8938 {
8939         struct tunnel_db_find_tunnel_id_ctx *ctx = x;
8940         RTE_SET_USED(dev);
8941         ctx->tunnel = tunnel;
8942 }
8943
8944 static struct mlx5_flow_tunnel *
8945 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
8946 {
8947         struct tunnel_db_find_tunnel_id_ctx ctx = {
8948                 .tunnel_id = id,
8949         };
8950
8951         mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
8952                                       find_tunnel_id_hit, NULL, &ctx, true);
8953
8954         return ctx.tunnel;
8955 }
8956
8957 static struct mlx5_flow_tunnel *
8958 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
8959                           const struct rte_flow_tunnel *app_tunnel)
8960 {
8961         struct mlx5_priv *priv = dev->data->dev_private;
8962         struct mlx5_indexed_pool *ipool;
8963         struct mlx5_flow_tunnel *tunnel;
8964         uint32_t id;
8965
8966         ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
8967         tunnel = mlx5_ipool_zmalloc(ipool, &id);
8968         if (!tunnel)
8969                 return NULL;
8970         if (id >= MLX5_MAX_TUNNELS) {
8971                 mlx5_ipool_free(ipool, id);
8972                 DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
8973                 return NULL;
8974         }
8975         tunnel->groups = mlx5_hlist_create("tunnel groups", 64, false, true,
8976                                            priv->sh,
8977                                            mlx5_flow_tunnel_grp2tbl_create_cb,
8978                                            mlx5_flow_tunnel_grp2tbl_match_cb,
8979                                            mlx5_flow_tunnel_grp2tbl_remove_cb,
8980                                            mlx5_flow_tunnel_grp2tbl_clone_cb,
8981                                         mlx5_flow_tunnel_grp2tbl_clone_free_cb);
8982         if (!tunnel->groups) {
8983                 mlx5_ipool_free(ipool, id);
8984                 return NULL;
8985         }
8986         /* initiate new PMD tunnel */
8987         memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
8988         tunnel->tunnel_id = id;
8989         tunnel->action.type = (typeof(tunnel->action.type))
8990                               MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
8991         tunnel->action.conf = tunnel;
8992         tunnel->item.type = (typeof(tunnel->item.type))
8993                             MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
8994         tunnel->item.spec = tunnel;
8995         tunnel->item.last = NULL;
8996         tunnel->item.mask = NULL;
8997
8998         DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
8999                 dev->data->port_id, tunnel->tunnel_id);
9000
9001         return tunnel;
9002 }
9003
9004 struct tunnel_db_get_tunnel_ctx {
9005         const struct rte_flow_tunnel *app_tunnel;
9006         struct mlx5_flow_tunnel *tunnel;
9007 };
9008
9009 static bool get_tunnel_match(struct rte_eth_dev *dev,
9010                              struct mlx5_flow_tunnel *tunnel, const void *x)
9011 {
9012         const struct tunnel_db_get_tunnel_ctx *ctx = x;
9013
9014         RTE_SET_USED(dev);
9015         return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
9016                        sizeof(*ctx->app_tunnel));
9017 }
9018
9019 static void get_tunnel_hit(struct rte_eth_dev *dev,
9020                            struct mlx5_flow_tunnel *tunnel, void *x)
9021 {
9022         /* called under tunnel spinlock protection */
9023         struct tunnel_db_get_tunnel_ctx *ctx = x;
9024
9025         RTE_SET_USED(dev);
9026         tunnel->refctn++;
9027         ctx->tunnel = tunnel;
9028 }
9029
9030 static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
9031 {
9032         /* called under tunnel spinlock protection */
9033         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
9034         struct tunnel_db_get_tunnel_ctx *ctx = x;
9035
9036         rte_spinlock_unlock(&thub->sl);
9037         ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
9038         rte_spinlock_lock(&thub->sl);
9039         if (ctx->tunnel) {
9040                 ctx->tunnel->refctn = 1;
9041                 LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
9042         }
9043 }
9044
9045
9046 static int
9047 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
9048                      const struct rte_flow_tunnel *app_tunnel,
9049                      struct mlx5_flow_tunnel **tunnel)
9050 {
9051         struct tunnel_db_get_tunnel_ctx ctx = {
9052                 .app_tunnel = app_tunnel,
9053         };
9054
9055         mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
9056                                       get_tunnel_miss, &ctx, true);
9057         *tunnel = ctx.tunnel;
9058         return ctx.tunnel ? 0 : -ENOMEM;
9059 }
9060
9061 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
9062 {
9063         struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
9064
9065         if (!thub)
9066                 return;
9067         if (!LIST_EMPTY(&thub->tunnels))
9068                 DRV_LOG(WARNING, "port %u tunnels present", port_id);
9069         mlx5_hlist_destroy(thub->groups);
9070         mlx5_free(thub);
9071 }
9072
9073 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
9074 {
9075         int err;
9076         struct mlx5_flow_tunnel_hub *thub;
9077
9078         thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
9079                            0, SOCKET_ID_ANY);
9080         if (!thub)
9081                 return -ENOMEM;
9082         LIST_INIT(&thub->tunnels);
9083         rte_spinlock_init(&thub->sl);
9084         thub->groups = mlx5_hlist_create("flow groups", 64,
9085                                          false, true, sh,
9086                                          mlx5_flow_tunnel_grp2tbl_create_cb,
9087                                          mlx5_flow_tunnel_grp2tbl_match_cb,
9088                                          mlx5_flow_tunnel_grp2tbl_remove_cb,
9089                                          mlx5_flow_tunnel_grp2tbl_clone_cb,
9090                                         mlx5_flow_tunnel_grp2tbl_clone_free_cb);
9091         if (!thub->groups) {
9092                 err = -rte_errno;
9093                 goto err;
9094         }
9095         sh->tunnel_hub = thub;
9096
9097         return 0;
9098
9099 err:
9100         if (thub->groups)
9101                 mlx5_hlist_destroy(thub->groups);
9102         if (thub)
9103                 mlx5_free(thub);
9104         return err;
9105 }
9106
9107 static inline bool
9108 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
9109                           struct rte_flow_tunnel *tunnel,
9110                           const char *err_msg)
9111 {
9112         err_msg = NULL;
9113         if (!is_tunnel_offload_active(dev)) {
9114                 err_msg = "tunnel offload was not activated";
9115                 goto out;
9116         } else if (!tunnel) {
9117                 err_msg = "no application tunnel";
9118                 goto out;
9119         }
9120
9121         switch (tunnel->type) {
9122         default:
9123                 err_msg = "unsupported tunnel type";
9124                 goto out;
9125         case RTE_FLOW_ITEM_TYPE_VXLAN:
9126                 break;
9127         }
9128
9129 out:
9130         return !err_msg;
9131 }
9132
9133 static int
9134 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
9135                     struct rte_flow_tunnel *app_tunnel,
9136                     struct rte_flow_action **actions,
9137                     uint32_t *num_of_actions,
9138                     struct rte_flow_error *error)
9139 {
9140         int ret;
9141         struct mlx5_flow_tunnel *tunnel;
9142         const char *err_msg = NULL;
9143         bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
9144
9145         if (!verdict)
9146                 return rte_flow_error_set(error, EINVAL,
9147                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
9148                                           err_msg);
9149         ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
9150         if (ret < 0) {
9151                 return rte_flow_error_set(error, ret,
9152                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
9153                                           "failed to initialize pmd tunnel");
9154         }
9155         *actions = &tunnel->action;
9156         *num_of_actions = 1;
9157         return 0;
9158 }
9159
9160 static int
9161 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
9162                        struct rte_flow_tunnel *app_tunnel,
9163                        struct rte_flow_item **items,
9164                        uint32_t *num_of_items,
9165                        struct rte_flow_error *error)
9166 {
9167         int ret;
9168         struct mlx5_flow_tunnel *tunnel;
9169         const char *err_msg = NULL;
9170         bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
9171
9172         if (!verdict)
9173                 return rte_flow_error_set(error, EINVAL,
9174                                           RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
9175                                           err_msg);
9176         ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
9177         if (ret < 0) {
9178                 return rte_flow_error_set(error, ret,
9179                                           RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
9180                                           "failed to initialize pmd tunnel");
9181         }
9182         *items = &tunnel->item;
9183         *num_of_items = 1;
9184         return 0;
9185 }
9186
9187 struct tunnel_db_element_release_ctx {
9188         struct rte_flow_item *items;
9189         struct rte_flow_action *actions;
9190         uint32_t num_elements;
9191         struct rte_flow_error *error;
9192         int ret;
9193 };
9194
9195 static bool
9196 tunnel_element_release_match(struct rte_eth_dev *dev,
9197                              struct mlx5_flow_tunnel *tunnel, const void *x)
9198 {
9199         const struct tunnel_db_element_release_ctx *ctx = x;
9200
9201         RTE_SET_USED(dev);
9202         if (ctx->num_elements != 1)
9203                 return false;
9204         else if (ctx->items)
9205                 return ctx->items == &tunnel->item;
9206         else if (ctx->actions)
9207                 return ctx->actions == &tunnel->action;
9208
9209         return false;
9210 }
9211
9212 static void
9213 tunnel_element_release_hit(struct rte_eth_dev *dev,
9214                            struct mlx5_flow_tunnel *tunnel, void *x)
9215 {
9216         struct tunnel_db_element_release_ctx *ctx = x;
9217         ctx->ret = 0;
9218         if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
9219                 mlx5_flow_tunnel_free(dev, tunnel);
9220 }
9221
9222 static void
9223 tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
9224 {
9225         struct tunnel_db_element_release_ctx *ctx = x;
9226         RTE_SET_USED(dev);
9227         ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
9228                                       RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
9229                                       "invalid argument");
9230 }
9231
9232 static int
9233 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
9234                        struct rte_flow_item *pmd_items,
9235                        uint32_t num_items, struct rte_flow_error *err)
9236 {
9237         struct tunnel_db_element_release_ctx ctx = {
9238                 .items = pmd_items,
9239                 .actions = NULL,
9240                 .num_elements = num_items,
9241                 .error = err,
9242         };
9243
9244         mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
9245                                       tunnel_element_release_hit,
9246                                       tunnel_element_release_miss, &ctx, false);
9247
9248         return ctx.ret;
9249 }
9250
9251 static int
9252 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
9253                          struct rte_flow_action *pmd_actions,
9254                          uint32_t num_actions, struct rte_flow_error *err)
9255 {
9256         struct tunnel_db_element_release_ctx ctx = {
9257                 .items = NULL,
9258                 .actions = pmd_actions,
9259                 .num_elements = num_actions,
9260                 .error = err,
9261         };
9262
9263         mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
9264                                       tunnel_element_release_hit,
9265                                       tunnel_element_release_miss, &ctx, false);
9266
9267         return ctx.ret;
9268 }
9269
9270 static int
9271 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
9272                                   struct rte_mbuf *m,
9273                                   struct rte_flow_restore_info *info,
9274                                   struct rte_flow_error *err)
9275 {
9276         uint64_t ol_flags = m->ol_flags;
9277         const struct mlx5_flow_tbl_data_entry *tble;
9278         const uint64_t mask = PKT_RX_FDIR | PKT_RX_FDIR_ID;
9279
9280         if (!is_tunnel_offload_active(dev)) {
9281                 info->flags = 0;
9282                 return 0;
9283         }
9284
9285         if ((ol_flags & mask) != mask)
9286                 goto err;
9287         tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
9288         if (!tble) {
9289                 DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
9290                         dev->data->port_id, m->hash.fdir.hi);
9291                 goto err;
9292         }
9293         MLX5_ASSERT(tble->tunnel);
9294         memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
9295         info->group_id = tble->group_id;
9296         info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
9297                       RTE_FLOW_RESTORE_INFO_GROUP_ID |
9298                       RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
9299
9300         return 0;
9301
9302 err:
9303         return rte_flow_error_set(err, EINVAL,
9304                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
9305                                   "failed to get restore info");
9306 }
9307
9308 #else /* HAVE_IBV_FLOW_DV_SUPPORT */
9309 static int
9310 mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
9311                            __rte_unused struct rte_flow_tunnel *app_tunnel,
9312                            __rte_unused struct rte_flow_action **actions,
9313                            __rte_unused uint32_t *num_of_actions,
9314                            __rte_unused struct rte_flow_error *error)
9315 {
9316         return -ENOTSUP;
9317 }
9318
9319 static int
9320 mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
9321                        __rte_unused struct rte_flow_tunnel *app_tunnel,
9322                        __rte_unused struct rte_flow_item **items,
9323                        __rte_unused uint32_t *num_of_items,
9324                        __rte_unused struct rte_flow_error *error)
9325 {
9326         return -ENOTSUP;
9327 }
9328
9329 static int
9330 mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
9331                               __rte_unused struct rte_flow_item *pmd_items,
9332                               __rte_unused uint32_t num_items,
9333                               __rte_unused struct rte_flow_error *err)
9334 {
9335         return -ENOTSUP;
9336 }
9337
9338 static int
9339 mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
9340                                 __rte_unused struct rte_flow_action *pmd_action,
9341                                 __rte_unused uint32_t num_actions,
9342                                 __rte_unused struct rte_flow_error *err)
9343 {
9344         return -ENOTSUP;
9345 }
9346
9347 static int
9348 mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
9349                                   __rte_unused struct rte_mbuf *m,
9350                                   __rte_unused struct rte_flow_restore_info *i,
9351                                   __rte_unused struct rte_flow_error *err)
9352 {
9353         return -ENOTSUP;
9354 }
9355
9356 static int
9357 flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
9358                              __rte_unused struct rte_flow *flow,
9359                              __rte_unused const struct rte_flow_attr *attr,
9360                              __rte_unused const struct rte_flow_action *actions,
9361                              __rte_unused uint32_t flow_idx,
9362                              __rte_unused const struct mlx5_flow_tunnel *tunnel,
9363                              __rte_unused struct tunnel_default_miss_ctx *ctx,
9364                              __rte_unused struct rte_flow_error *error)
9365 {
9366         return -ENOTSUP;
9367 }
9368
9369 static struct mlx5_flow_tunnel *
9370 mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
9371                     __rte_unused uint32_t id)
9372 {
9373         return NULL;
9374 }
9375
9376 static void
9377 mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
9378                       __rte_unused struct mlx5_flow_tunnel *tunnel)
9379 {
9380 }
9381
9382 static uint32_t
9383 tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
9384                                 __rte_unused const struct mlx5_flow_tunnel *t,
9385                                 __rte_unused uint32_t group,
9386                                 __rte_unused uint32_t *table,
9387                                 struct rte_flow_error *error)
9388 {
9389         return rte_flow_error_set(error, ENOTSUP,
9390                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
9391                                   "tunnel offload requires DV support");
9392 }
9393
9394 void
9395 mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
9396                         __rte_unused  uint16_t port_id)
9397 {
9398 }
9399 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
9400
9401 static void
9402 mlx5_dbg__print_pattern(const struct rte_flow_item *item)
9403 {
9404         int ret;
9405         struct rte_flow_error error;
9406
9407         for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
9408                 char *item_name;
9409                 ret = rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_NAME_PTR, &item_name,
9410                                     sizeof(item_name),
9411                                     (void *)(uintptr_t)item->type, &error);
9412                 if (ret > 0)
9413                         printf("%s ", item_name);
9414                 else
9415                         printf("%d\n", (int)item->type);
9416         }
9417         printf("END\n");
9418 }